From 125799a1b06d7223a96ed24f4bddb797c045c949 Mon Sep 17 00:00:00 2001
From: Akash Kothari <akashk4@tyler.cs.illinois.edu>
Date: Thu, 21 Jan 2021 00:50:46 -0600
Subject: [PATCH] Try fixing potential runtime errors when running DNN
 benchmarks

---
 hpvm/CMakeLists.txt                           |   1 -
 hpvm/lib/Transforms/CMakeLists.txt            |   3 +
 .../alexnet/data/profile_info_0.txt           | 333 ++++++
 .../mobilenet/data/profile_info_0.txt         | 973 ++++++++++++++++++
 4 files changed, 1309 insertions(+), 1 deletion(-)
 create mode 100644 hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt
 create mode 100644 hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/profile_info_0.txt

diff --git a/hpvm/CMakeLists.txt b/hpvm/CMakeLists.txt
index d63675b342..be0376ab58 100644
--- a/hpvm/CMakeLists.txt
+++ b/hpvm/CMakeLists.txt
@@ -12,7 +12,6 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
 add_subdirectory(lib)
 add_subdirectory(projects)
 add_subdirectory(tools)
-
 add_subdirectory(test)
 
 # Add a global check rule now that all subdirectories have been traversed
diff --git a/hpvm/lib/Transforms/CMakeLists.txt b/hpvm/lib/Transforms/CMakeLists.txt
index b18cd4551b..ce1c152ef3 100644
--- a/hpvm/lib/Transforms/CMakeLists.txt
+++ b/hpvm/lib/Transforms/CMakeLists.txt
@@ -8,3 +8,6 @@ add_subdirectory(DFG2LLVM_WrapperAPI)
 add_subdirectory(DFG2LLVM_CUDNN)
 add_subdirectory(FuseHPVMTensorNodes)
 add_subdirectory(InPlaceDFG)
+
+set(TENSOR_RT_PREFIX ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
+set(TENSOR_RT_LL ${TENSOR_RT_PREFIX}/tensor_runtime.ll)
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt
new file mode 100644
index 0000000000..c22fc1985c
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt
@@ -0,0 +1,333 @@
+Iteration 0
+tensorConvApprox 11.4549 0
+tensorAdd 0.987733 0
+tensorTanh 0.89782 0
+tensorPooling 3.92176 0
+tensorConvApprox 30.4695 0
+tensorAdd 0.756682 0
+tensorTanh 0.659855 0
+tensorPooling 3.87498 0
+tensorConvApprox 12.9027 0
+tensorAdd 0.529304 0
+tensorTanh 0.360859 0
+tensorConvApprox 14.1188 0
+tensorAdd 0.572512 0
+tensorTanh 0.269676 0
+tensorConvApprox 12.5122 0
+tensorAdd 0.37547 0
+tensorTanh 0.260915 0
+tensorPooling 2.68986 0
+tensorGemmGPU 0.215392 0
+tensorAdd 0.120663 0
+tensorSoftmax 0.134412 0
+
+Iteration Compute Time   : 98.0861
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.025326
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 1
+tensorConvApprox 14.367 0
+tensorAdd 1.01869 0
+tensorTanh 0.88308 0
+tensorPooling 3.88747 0
+tensorConvApprox 31.0697 0
+tensorAdd 0.775118 0
+tensorTanh 0.672938 0
+tensorPooling 3.17431 0
+tensorConvApprox 19.3971 0
+tensorAdd 0.528325 0
+tensorTanh 0.361839 0
+tensorConvApprox 14.727 0
+tensorAdd 0.367463 0
+tensorTanh 0.258956 0
+tensorConvApprox 15.0059 0
+tensorAdd 0.361187 0
+tensorTanh 0.258366 0
+tensorPooling 2.70526 0
+tensorGemmGPU 0.18939 0
+tensorAdd 0.068016 0
+tensorSoftmax 0.536969 0
+
+Iteration Compute Time   : 110.614
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.021939
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 2
+tensorConvApprox 10.204 0
+tensorAdd 1.0115 0
+tensorTanh 0.900257 0
+tensorPooling 3.90068 0
+tensorConvApprox 27.5914 0
+tensorAdd 0.822439 0
+tensorTanh 0.849694 0
+tensorPooling 2.97768 0
+tensorConvApprox 17.8808 0
+tensorAdd 0.55192 0
+tensorTanh 0.378285 0
+tensorConvApprox 13.1066 0
+tensorAdd 0.621008 0
+tensorTanh 0.258276 0
+tensorConvApprox 18.631 0
+tensorAdd 0.422421 0
+tensorTanh 0.274397 0
+tensorPooling 2.65083 0
+tensorGemmGPU 0.409805 0
+tensorAdd 0.068461 0
+tensorSoftmax 0.104962 0
+
+Iteration Compute Time   : 103.616
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.152392
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 3
+tensorConvApprox 16.3712 0
+tensorAdd 1.03894 0
+tensorTanh 0.891796 0
+tensorPooling 4.02378 0
+tensorConvApprox 25.7412 0
+tensorAdd 0.820657 0
+tensorTanh 0.702908 0
+tensorPooling 3.12982 0
+tensorConvApprox 17.68 0
+tensorAdd 0.543514 0
+tensorTanh 0.378761 0
+tensorConvApprox 12.42 0
+tensorAdd 0.408593 0
+tensorTanh 0.403677 0
+tensorConvApprox 14.9018 0
+tensorAdd 0.408336 0
+tensorTanh 0.269063 0
+tensorPooling 2.6564 0
+tensorGemmGPU 0.169787 0
+tensorAdd 0.057522 0
+tensorSoftmax 0.083927 0
+
+Iteration Compute Time   : 103.102
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.022148
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 4
+tensorConvApprox 9.93468 0
+tensorAdd 1.02456 0
+tensorTanh 0.899216 0
+tensorPooling 3.89126 0
+tensorConvApprox 25.9977 0
+tensorAdd 1.24468 0
+tensorTanh 0.704036 0
+tensorPooling 3.74854 0
+tensorConvApprox 17.644 0
+tensorAdd 0.55182 0
+tensorTanh 0.37049 0
+tensorConvApprox 12.5094 0
+tensorAdd 0.407149 0
+tensorTanh 0.248609 0
+tensorConvApprox 13.9976 0
+tensorAdd 0.403548 0
+tensorTanh 0.26787 0
+tensorPooling 2.65781 0
+tensorGemmGPU 0.163417 0
+tensorAdd 0.055534 0
+tensorSoftmax 0.079422 0
+
+Iteration Compute Time   : 96.8013
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.021165
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 5
+tensorConvApprox 9.80512 0
+tensorAdd 1.00169 0
+tensorTanh 0.921429 0
+tensorPooling 3.91348 0
+tensorConvApprox 25.9371 0
+tensorAdd 0.815127 0
+tensorTanh 0.673571 0
+tensorPooling 7.36816 0
+tensorConvApprox 16.8525 0
+tensorAdd 0.525325 0
+tensorTanh 0.375484 0
+tensorConvApprox 13.4568 0
+tensorAdd 0.413802 0
+tensorTanh 0.250907 0
+tensorConvApprox 14.0005 0
+tensorAdd 0.408961 0
+tensorTanh 0.248077 0
+tensorPooling 2.66095 0
+tensorGemmGPU 0.165503 0
+tensorAdd 0.056493 0
+tensorSoftmax 0.077052 0
+
+Iteration Compute Time   : 99.9281
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.021977
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 6
+tensorConvApprox 9.80471 0
+tensorAdd 0.999942 0
+tensorTanh 0.915084 0
+tensorPooling 3.91451 0
+tensorConvApprox 25.834 0
+tensorAdd 1.14345 0
+tensorTanh 0.74676 0
+tensorPooling 3.82146 0
+tensorConvApprox 17.6392 0
+tensorAdd 0.556454 0
+tensorTanh 0.370676 0
+tensorConvApprox 12.4922 0
+tensorAdd 0.404391 0
+tensorTanh 0.250653 0
+tensorConvApprox 14.002 0
+tensorAdd 0.403829 0
+tensorTanh 0.25152 0
+tensorPooling 2.66016 0
+tensorGemmGPU 0.161451 0
+tensorAdd 0.059232 0
+tensorSoftmax 0.076951 0
+
+Iteration Compute Time   : 96.5086
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.021092
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 7
+tensorConvApprox 9.79061 0
+tensorAdd 1.00411 0
+tensorTanh 0.923385 0
+tensorPooling 3.89759 0
+tensorConvApprox 25.887 0
+tensorAdd 0.821996 0
+tensorTanh 0.685319 0
+tensorPooling 3.12557 0
+tensorConvApprox 17.6648 0
+tensorAdd 0.555787 0
+tensorTanh 0.385425 0
+tensorConvApprox 12.5025 0
+tensorAdd 0.404708 0
+tensorTanh 0.25099 0
+tensorConvApprox 13.9948 0
+tensorAdd 0.403679 0
+tensorTanh 0.252795 0
+tensorPooling 2.65976 0
+tensorGemmGPU 0.154435 0
+tensorAdd 0.058066 0
+tensorSoftmax 0.102014 0
+
+Iteration Compute Time   : 95.5253
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.03922
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 8
+tensorConvApprox 10.3697 0
+tensorAdd 1.02741 0
+tensorTanh 0.912612 0
+tensorPooling 3.87946 0
+tensorConvApprox 26.2823 0
+tensorAdd 0.816944 0
+tensorTanh 0.686638 0
+tensorPooling 3.12319 0
+tensorConvApprox 17.918 0
+tensorAdd 0.530344 0
+tensorTanh 0.362426 0
+tensorConvApprox 12.8083 0
+tensorAdd 0.360747 0
+tensorTanh 0.257041 0
+tensorConvApprox 14.2165 0
+tensorAdd 0.528494 0
+tensorTanh 0.257368 0
+tensorPooling 2.53903 0
+tensorGemmGPU 0.170898 0
+tensorAdd 0.100506 0
+tensorSoftmax 0.119107 0
+
+Iteration Compute Time   : 97.2669
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.041931
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 9
+tensorConvApprox 10.2616 0
+tensorAdd 0.98462 0
+tensorTanh 0.895782 0
+tensorPooling 3.9356 0
+tensorConvApprox 26.2674 0
+tensorAdd 0.786113 0
+tensorTanh 0.676243 0
+tensorPooling 3.163 0
+tensorConvApprox 23.1817 0
+tensorAdd 0.847734 0
+tensorTanh 0.367746 0
+tensorConvApprox 12.9463 0
+tensorAdd 0.371015 0
+tensorTanh 0.263012 0
+tensorConvApprox 15.1519 0
+tensorAdd 0.428313 0
+tensorTanh 0.28493 0
+tensorPooling 2.61769 0
+tensorGemmGPU 0.167254 0
+tensorAdd 0.056753 0
+tensorSoftmax 0.082013 0
+
+Iteration Compute Time   : 103.737
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.022577
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+
+
+Total Compute Time  : 1005.19
+Total Compute Energy: 6.91275e-310
+
+Total Control Time  : 0.389767
+Total Control Energy: 3.51633e-315
+
+Total Config Time  : 3.51633e-315
+Total Config Energy: 3.51633e-315
+
+Total Time  : 1005.57
+Total Energy: 0
diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/profile_info_0.txt b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/profile_info_0.txt
new file mode 100644
index 0000000000..27063f1379
--- /dev/null
+++ b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/profile_info_0.txt
@@ -0,0 +1,973 @@
+Iteration 0
+tensorConvApprox 2.23701 0
+tensorBatchNorm 0.529425 0
+tensorRelu 0.479808 0
+tensorConvCutlass 0.957134 0
+tensorBatchNorm 0.429569 0
+tensorRelu 0.498568 0
+tensorConvApprox 3.54345 0
+tensorBatchNorm 0.893891 0
+tensorRelu 0.887381 0
+tensorConvCutlass 2.66411 0
+tensorBatchNorm 0.251158 0
+tensorRelu 0.234899 0
+tensorConvApprox 1.85116 0
+tensorBatchNorm 0.618027 0
+tensorRelu 0.426936 0
+tensorConvCutlass 2.1397 0
+tensorBatchNorm 0.453199 0
+tensorRelu 0.434558 0
+tensorConvApprox 2.51921 0
+tensorBatchNorm 0.493453 0
+tensorRelu 0.454728 0
+tensorConvCutlass 2.09737 0
+tensorBatchNorm 0.165386 0
+tensorRelu 0.14034 0
+tensorConvApprox 1.24949 0
+tensorBatchNorm 0.271424 0
+tensorRelu 0.246688 0
+tensorConvCutlass 1.26528 0
+tensorBatchNorm 0.249295 0
+tensorRelu 0.233471 0
+tensorConvApprox 1.61344 0
+tensorBatchNorm 0.281014 0
+tensorRelu 0.272026 0
+tensorConvCutlass 0.89206 0
+tensorBatchNorm 0.241964 0
+tensorRelu 0.093642 0
+tensorConvApprox 0.95434 0
+tensorBatchNorm 0.425067 0
+tensorRelu 0.133866 0
+tensorConvCutlass 0.78137 0
+tensorBatchNorm 0.397914 0
+tensorRelu 0.138306 0
+tensorConvApprox 1.24994 0
+tensorBatchNorm 0.428037 0
+tensorRelu 0.132677 0
+tensorConvCutlass 0.715246 0
+tensorBatchNorm 0.483229 0
+tensorRelu 0.146812 0
+tensorConvApprox 1.24338 0
+tensorBatchNorm 0.43213 0
+tensorRelu 0.134047 0
+tensorConvCutlass 0.7264 0
+tensorBatchNorm 0.424954 0
+tensorRelu 0.139791 0
+tensorConvApprox 1.23298 0
+tensorBatchNorm 0.416953 0
+tensorRelu 0.134486 0
+tensorConvCutlass 0.679017 0
+tensorBatchNorm 0.409803 0
+tensorRelu 0.132933 0
+tensorConvApprox 1.25983 0
+tensorBatchNorm 0.431743 0
+tensorRelu 0.132827 0
+tensorConvCutlass 0.712734 0
+tensorBatchNorm 0.393237 0
+tensorRelu 0.156427 0
+tensorConvApprox 1.23481 0
+tensorBatchNorm 0.413014 0
+tensorRelu 0.132817 0
+tensorConvCutlass 0.700018 0
+tensorBatchNorm 0.41414 0
+tensorRelu 0.077319 0
+tensorConvApprox 0.851131 0
+tensorBatchNorm 0.764938 0
+tensorRelu 0.088765 0
+tensorConvCutlass 0.420801 0
+tensorBatchNorm 0.740765 0
+tensorRelu 0.09237 0
+tensorConvApprox 1.13472 0
+tensorBatchNorm 0.756702 0
+tensorRelu 0.087387 0
+tensorPooling 0.28436 0
+tensorGemmGPU 0.118249 0
+tensorAdd 0.070506 0
+tensorSoftmax 0.086034 0
+
+Iteration Compute Time   : 55.9596
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.035796
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 1
+tensorConvApprox 1.97561 0
+tensorBatchNorm 0.473865 0
+tensorRelu 0.453925 0
+tensorConvCutlass 1.04306 0
+tensorBatchNorm 0.436546 0
+tensorRelu 0.411711 0
+tensorConvApprox 3.43348 0
+tensorBatchNorm 0.895659 0
+tensorRelu 0.873215 0
+tensorConvCutlass 1.98017 0
+tensorBatchNorm 0.245242 0
+tensorRelu 0.233063 0
+tensorConvApprox 1.84183 0
+tensorBatchNorm 0.478471 0
+tensorRelu 0.443878 0
+tensorConvCutlass 2.15217 0
+tensorBatchNorm 0.422718 0
+tensorRelu 0.414853 0
+tensorConvApprox 2.35653 0
+tensorBatchNorm 0.477226 0
+tensorRelu 0.471601 0
+tensorConvCutlass 2.11536 0
+tensorBatchNorm 0.160139 0
+tensorRelu 0.138821 0
+tensorConvApprox 1.25131 0
+tensorBatchNorm 0.268332 0
+tensorRelu 0.23243 0
+tensorConvCutlass 1.23673 0
+tensorBatchNorm 0.248686 0
+tensorRelu 0.232362 0
+tensorConvApprox 1.6491 0
+tensorBatchNorm 0.255622 0
+tensorRelu 0.250747 0
+tensorConvCutlass 1.00023 0
+tensorBatchNorm 0.234396 0
+tensorRelu 0.092353 0
+tensorConvApprox 0.956705 0
+tensorBatchNorm 0.401337 0
+tensorRelu 0.132566 0
+tensorConvCutlass 0.727656 0
+tensorBatchNorm 0.39494 0
+tensorRelu 0.135475 0
+tensorConvApprox 1.21273 0
+tensorBatchNorm 0.399682 0
+tensorRelu 0.134199 0
+tensorConvCutlass 0.707999 0
+tensorBatchNorm 0.400456 0
+tensorRelu 0.131647 0
+tensorConvApprox 1.20442 0
+tensorBatchNorm 0.408527 0
+tensorRelu 0.133741 0
+tensorConvCutlass 0.710463 0
+tensorBatchNorm 0.400938 0
+tensorRelu 0.147735 0
+tensorConvApprox 1.25009 0
+tensorBatchNorm 0.439884 0
+tensorRelu 0.141364 0
+tensorConvCutlass 0.722939 0
+tensorBatchNorm 0.39408 0
+tensorRelu 0.138576 0
+tensorConvApprox 1.27766 0
+tensorBatchNorm 0.400103 0
+tensorRelu 0.133306 0
+tensorConvCutlass 0.701348 0
+tensorBatchNorm 0.531719 0
+tensorRelu 0.13418 0
+tensorConvApprox 1.20153 0
+tensorBatchNorm 0.406742 0
+tensorRelu 0.132218 0
+tensorConvCutlass 0.680362 0
+tensorBatchNorm 0.396494 0
+tensorRelu 0.06394 0
+tensorConvApprox 0.819766 0
+tensorBatchNorm 0.741952 0
+tensorRelu 0.085646 0
+tensorConvCutlass 0.428946 0
+tensorBatchNorm 0.738589 0
+tensorRelu 0.092674 0
+tensorConvApprox 1.06239 0
+tensorBatchNorm 0.748871 0
+tensorRelu 0.084497 0
+tensorPooling 0.255138 0
+tensorGemmGPU 0.115002 0
+tensorAdd 0.05473 0
+tensorSoftmax 0.072544 0
+
+Iteration Compute Time   : 54.0699
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.019171
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 2
+tensorConvApprox 1.9938 0
+tensorBatchNorm 0.483517 0
+tensorRelu 0.456522 0
+tensorConvCutlass 1.0357 0
+tensorBatchNorm 0.429524 0
+tensorRelu 0.412926 0
+tensorConvApprox 3.42249 0
+tensorBatchNorm 0.893502 0
+tensorRelu 0.865981 0
+tensorConvCutlass 2.60753 0
+tensorBatchNorm 0.245639 0
+tensorRelu 0.23249 0
+tensorConvApprox 1.8282 0
+tensorBatchNorm 0.473188 0
+tensorRelu 0.44711 0
+tensorConvCutlass 2.2097 0
+tensorBatchNorm 0.422328 0
+tensorRelu 0.41563 0
+tensorConvApprox 2.35971 0
+tensorBatchNorm 0.473869 0
+tensorRelu 0.454988 0
+tensorConvCutlass 2.14168 0
+tensorBatchNorm 0.16224 0
+tensorRelu 0.138312 0
+tensorConvApprox 1.2518 0
+tensorBatchNorm 0.268174 0
+tensorRelu 0.236937 0
+tensorConvCutlass 1.33004 0
+tensorBatchNorm 0.314835 0
+tensorRelu 0.264317 0
+tensorConvApprox 1.58082 0
+tensorBatchNorm 0.274042 0
+tensorRelu 0.249993 0
+tensorConvCutlass 1.03923 0
+tensorBatchNorm 0.22875 0
+tensorRelu 0.090939 0
+tensorConvApprox 0.936895 0
+tensorBatchNorm 0.402702 0
+tensorRelu 0.132519 0
+tensorConvCutlass 0.727177 0
+tensorBatchNorm 0.395688 0
+tensorRelu 0.135304 0
+tensorConvApprox 1.22042 0
+tensorBatchNorm 0.401043 0
+tensorRelu 0.133329 0
+tensorConvCutlass 0.734079 0
+tensorBatchNorm 0.400517 0
+tensorRelu 0.131085 0
+tensorConvApprox 1.20347 0
+tensorBatchNorm 0.408406 0
+tensorRelu 0.13312 0
+tensorConvCutlass 0.71914 0
+tensorBatchNorm 0.400224 0
+tensorRelu 0.131462 0
+tensorConvApprox 1.1849 0
+tensorBatchNorm 0.40678 0
+tensorRelu 0.133044 0
+tensorConvCutlass 0.700435 0
+tensorBatchNorm 0.400726 0
+tensorRelu 0.131443 0
+tensorConvApprox 1.2809 0
+tensorBatchNorm 0.408323 0
+tensorRelu 0.130881 0
+tensorConvCutlass 0.713094 0
+tensorBatchNorm 0.400763 0
+tensorRelu 0.134344 0
+tensorConvApprox 1.32531 0
+tensorBatchNorm 0.408491 0
+tensorRelu 0.135343 0
+tensorConvCutlass 0.683423 0
+tensorBatchNorm 0.383472 0
+tensorRelu 0.055856 0
+tensorConvApprox 0.802831 0
+tensorBatchNorm 0.750119 0
+tensorRelu 0.086386 0
+tensorConvCutlass 0.505849 0
+tensorBatchNorm 0.757398 0
+tensorRelu 0.104829 0
+tensorConvApprox 1.0665 0
+tensorBatchNorm 0.758638 0
+tensorRelu 0.108774 0
+tensorPooling 0.256465 0
+tensorGemmGPU 0.107896 0
+tensorAdd 0.053549 0
+tensorSoftmax 0.073444 0
+
+Iteration Compute Time   : 54.9332
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.019819
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 3
+tensorConvApprox 1.96858 0
+tensorBatchNorm 0.481127 0
+tensorRelu 0.453004 0
+tensorConvCutlass 1.06524 0
+tensorBatchNorm 0.438826 0
+tensorRelu 0.413673 0
+tensorConvApprox 3.45964 0
+tensorBatchNorm 1.13057 0
+tensorRelu 0.876152 0
+tensorConvCutlass 3.06743 0
+tensorBatchNorm 0.241919 0
+tensorRelu 0.234321 0
+tensorConvApprox 1.82083 0
+tensorBatchNorm 0.473934 0
+tensorRelu 0.444206 0
+tensorConvCutlass 2.2154 0
+tensorBatchNorm 0.425108 0
+tensorRelu 0.413497 0
+tensorConvApprox 3.79539 0
+tensorBatchNorm 0.485401 0
+tensorRelu 0.456188 0
+tensorConvCutlass 1.67366 0
+tensorBatchNorm 0.159367 0
+tensorRelu 0.138328 0
+tensorConvApprox 1.25087 0
+tensorBatchNorm 0.280366 0
+tensorRelu 0.238291 0
+tensorConvCutlass 1.21768 0
+tensorBatchNorm 0.25647 0
+tensorRelu 0.232797 0
+tensorConvApprox 1.583 0
+tensorBatchNorm 0.274884 0
+tensorRelu 0.250904 0
+tensorConvCutlass 1.01374 0
+tensorBatchNorm 0.232925 0
+tensorRelu 0.092624 0
+tensorConvApprox 0.929718 0
+tensorBatchNorm 0.406899 0
+tensorRelu 0.13354 0
+tensorConvCutlass 0.715119 0
+tensorBatchNorm 0.401388 0
+tensorRelu 0.132678 0
+tensorConvApprox 1.21308 0
+tensorBatchNorm 0.411271 0
+tensorRelu 0.136159 0
+tensorConvCutlass 0.728206 0
+tensorBatchNorm 0.40348 0
+tensorRelu 0.131293 0
+tensorConvApprox 1.2437 0
+tensorBatchNorm 0.415867 0
+tensorRelu 0.13346 0
+tensorConvCutlass 0.728818 0
+tensorBatchNorm 0.404325 0
+tensorRelu 0.133644 0
+tensorConvApprox 1.18933 0
+tensorBatchNorm 0.408998 0
+tensorRelu 0.135374 0
+tensorConvCutlass 0.758913 0
+tensorBatchNorm 0.401487 0
+tensorRelu 0.133675 0
+tensorConvApprox 1.22111 0
+tensorBatchNorm 0.408526 0
+tensorRelu 0.133156 0
+tensorConvCutlass 0.713234 0
+tensorBatchNorm 0.404604 0
+tensorRelu 0.135062 0
+tensorConvApprox 1.19223 0
+tensorBatchNorm 0.422434 0
+tensorRelu 0.151752 0
+tensorConvCutlass 0.674382 0
+tensorBatchNorm 0.399276 0
+tensorRelu 0.055825 0
+tensorConvApprox 0.813887 0
+tensorBatchNorm 0.749868 0
+tensorRelu 0.086545 0
+tensorConvCutlass 0.43128 0
+tensorBatchNorm 0.741291 0
+tensorRelu 0.091888 0
+tensorConvApprox 1.10082 0
+tensorBatchNorm 0.742328 0
+tensorRelu 0.086367 0
+tensorPooling 0.260055 0
+tensorGemmGPU 0.118566 0
+tensorAdd 0.054399 0
+tensorSoftmax 0.081731 0
+
+Iteration Compute Time   : 56.3574
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.019227
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 4
+tensorConvApprox 2.75777 0
+tensorBatchNorm 0.491652 0
+tensorRelu 0.466859 0
+tensorConvCutlass 1.04383 0
+tensorBatchNorm 0.435906 0
+tensorRelu 0.422722 0
+tensorConvApprox 3.65792 0
+tensorBatchNorm 0.893743 0
+tensorRelu 0.87971 0
+tensorConvCutlass 2.67706 0
+tensorBatchNorm 0.246657 0
+tensorRelu 0.239448 0
+tensorConvApprox 2.00886 0
+tensorBatchNorm 0.484758 0
+tensorRelu 0.461588 0
+tensorConvCutlass 2.19886 0
+tensorBatchNorm 0.430631 0
+tensorRelu 0.42929 0
+tensorConvApprox 2.58732 0
+tensorBatchNorm 0.483099 0
+tensorRelu 0.462454 0
+tensorConvCutlass 2.11366 0
+tensorBatchNorm 0.16427 0
+tensorRelu 0.151241 0
+tensorConvApprox 1.39126 0
+tensorBatchNorm 0.276029 0
+tensorRelu 0.244813 0
+tensorConvCutlass 1.29254 0
+tensorBatchNorm 0.263643 0
+tensorRelu 0.241842 0
+tensorConvApprox 1.74431 0
+tensorBatchNorm 0.277158 0
+tensorRelu 0.264752 0
+tensorConvCutlass 0.97804 0
+tensorBatchNorm 0.236649 0
+tensorRelu 0.100587 0
+tensorConvApprox 1.07438 0
+tensorBatchNorm 0.417747 0
+tensorRelu 0.145716 0
+tensorConvCutlass 0.78966 0
+tensorBatchNorm 0.408245 0
+tensorRelu 0.146282 0
+tensorConvApprox 1.59601 0
+tensorBatchNorm 0.417057 0
+tensorRelu 0.148021 0
+tensorConvCutlass 0.76697 0
+tensorBatchNorm 0.410469 0
+tensorRelu 0.143291 0
+tensorConvApprox 1.37314 0
+tensorBatchNorm 0.420813 0
+tensorRelu 0.149074 0
+tensorConvCutlass 0.785019 0
+tensorBatchNorm 0.406105 0
+tensorRelu 0.144829 0
+tensorConvApprox 1.32397 0
+tensorBatchNorm 0.41983 0
+tensorRelu 0.147431 0
+tensorConvCutlass 0.741959 0
+tensorBatchNorm 0.411628 0
+tensorRelu 0.149026 0
+tensorConvApprox 1.35713 0
+tensorBatchNorm 0.417514 0
+tensorRelu 0.142716 0
+tensorConvCutlass 0.78773 0
+tensorBatchNorm 0.40768 0
+tensorRelu 0.149051 0
+tensorConvApprox 1.32946 0
+tensorBatchNorm 0.415265 0
+tensorRelu 0.144227 0
+tensorConvCutlass 0.72498 0
+tensorBatchNorm 0.403585 0
+tensorRelu 0.067153 0
+tensorConvApprox 0.931648 0
+tensorBatchNorm 0.755898 0
+tensorRelu 0.096675 0
+tensorConvCutlass 0.47856 0
+tensorBatchNorm 0.754063 0
+tensorRelu 0.270367 0
+tensorConvApprox 1.19719 0
+tensorBatchNorm 0.760267 0
+tensorRelu 0.09892 0
+tensorPooling 0.305295 0
+tensorGemmGPU 0.148363 0
+tensorAdd 0.07344 0
+tensorSoftmax 0.09613 0
+
+Iteration Compute Time   : 58.7509
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.026229
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 5
+tensorConvApprox 2.1726 0
+tensorBatchNorm 0.488988 0
+tensorRelu 0.465871 0
+tensorConvCutlass 1.04703 0
+tensorBatchNorm 0.440283 0
+tensorRelu 0.429357 0
+tensorConvApprox 3.66603 0
+tensorBatchNorm 0.893586 0
+tensorRelu 0.877117 0
+tensorConvCutlass 2.13282 0
+tensorBatchNorm 0.249723 0
+tensorRelu 0.241791 0
+tensorConvApprox 1.99587 0
+tensorBatchNorm 0.484073 0
+tensorRelu 0.454081 0
+tensorConvCutlass 2.22364 0
+tensorBatchNorm 0.646739 0
+tensorRelu 0.433169 0
+tensorConvApprox 2.64464 0
+tensorBatchNorm 0.47868 0
+tensorRelu 0.468067 0
+tensorConvCutlass 2.16956 0
+tensorBatchNorm 0.167527 0
+tensorRelu 0.149678 0
+tensorConvApprox 1.38195 0
+tensorBatchNorm 0.28112 0
+tensorRelu 0.24312 0
+tensorConvCutlass 1.26873 0
+tensorBatchNorm 0.254428 0
+tensorRelu 0.233827 0
+tensorConvApprox 1.58365 0
+tensorBatchNorm 0.278968 0
+tensorRelu 0.25334 0
+tensorConvCutlass 1.0156 0
+tensorBatchNorm 0.244076 0
+tensorRelu 0.093283 0
+tensorConvApprox 0.965024 0
+tensorBatchNorm 0.404682 0
+tensorRelu 0.133879 0
+tensorConvCutlass 0.747094 0
+tensorBatchNorm 0.402763 0
+tensorRelu 0.142304 0
+tensorConvApprox 1.26048 0
+tensorBatchNorm 0.406361 0
+tensorRelu 0.133341 0
+tensorConvCutlass 0.729584 0
+tensorBatchNorm 0.396997 0
+tensorRelu 0.136837 0
+tensorConvApprox 1.21978 0
+tensorBatchNorm 0.411099 0
+tensorRelu 0.133601 0
+tensorConvCutlass 0.726998 0
+tensorBatchNorm 0.400946 0
+tensorRelu 0.132781 0
+tensorConvApprox 1.20036 0
+tensorBatchNorm 0.409799 0
+tensorRelu 0.136065 0
+tensorConvCutlass 0.696289 0
+tensorBatchNorm 0.406103 0
+tensorRelu 0.132264 0
+tensorConvApprox 1.26211 0
+tensorBatchNorm 0.418643 0
+tensorRelu 0.13414 0
+tensorConvCutlass 0.738303 0
+tensorBatchNorm 0.401346 0
+tensorRelu 0.169646 0
+tensorConvApprox 1.22581 0
+tensorBatchNorm 0.409902 0
+tensorRelu 0.133014 0
+tensorConvCutlass 0.770886 0
+tensorBatchNorm 0.40109 0
+tensorRelu 0.057301 0
+tensorConvApprox 0.820137 0
+tensorBatchNorm 0.751111 0
+tensorRelu 0.086029 0
+tensorConvCutlass 0.428312 0
+tensorBatchNorm 0.740022 0
+tensorRelu 0.092405 0
+tensorConvApprox 1.14936 0
+tensorBatchNorm 0.762909 0
+tensorRelu 0.091007 0
+tensorPooling 0.264327 0
+tensorGemmGPU 0.117489 0
+tensorAdd 0.055461 0
+tensorSoftmax 0.074949 0
+
+Iteration Compute Time   : 55.9742
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.018705
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 6
+tensorConvApprox 1.96907 0
+tensorBatchNorm 0.48141 0
+tensorRelu 0.454234 0
+tensorConvCutlass 1.04364 0
+tensorBatchNorm 0.430053 0
+tensorRelu 0.418357 0
+tensorConvApprox 3.41283 0
+tensorBatchNorm 0.888804 0
+tensorRelu 0.869239 0
+tensorConvCutlass 2.00479 0
+tensorBatchNorm 0.246474 0
+tensorRelu 0.234112 0
+tensorConvApprox 1.8452 0
+tensorBatchNorm 0.472051 0
+tensorRelu 0.445524 0
+tensorConvCutlass 2.24807 0
+tensorBatchNorm 0.429071 0
+tensorRelu 0.413047 0
+tensorConvApprox 2.35288 0
+tensorBatchNorm 0.482299 0
+tensorRelu 0.453052 0
+tensorConvCutlass 2.15299 0
+tensorBatchNorm 0.168091 0
+tensorRelu 0.140115 0
+tensorConvApprox 1.22523 0
+tensorBatchNorm 0.271412 0
+tensorRelu 0.23909 0
+tensorConvCutlass 1.19494 0
+tensorBatchNorm 0.249981 0
+tensorRelu 0.231756 0
+tensorConvApprox 1.5918 0
+tensorBatchNorm 0.270204 0
+tensorRelu 0.250939 0
+tensorConvCutlass 1.01044 0
+tensorBatchNorm 0.229694 0
+tensorRelu 0.091054 0
+tensorConvApprox 0.931967 0
+tensorBatchNorm 0.416154 0
+tensorRelu 0.136451 0
+tensorConvCutlass 0.746695 0
+tensorBatchNorm 0.393873 0
+tensorRelu 0.151156 0
+tensorConvApprox 1.18598 0
+tensorBatchNorm 0.414867 0
+tensorRelu 0.136287 0
+tensorConvCutlass 0.711735 0
+tensorBatchNorm 0.394707 0
+tensorRelu 0.158015 0
+tensorConvApprox 1.24798 0
+tensorBatchNorm 0.402511 0
+tensorRelu 0.131976 0
+tensorConvCutlass 0.89021 0
+tensorBatchNorm 0.3973 0
+tensorRelu 0.140986 0
+tensorConvApprox 1.2066 0
+tensorBatchNorm 0.416513 0
+tensorRelu 0.133314 0
+tensorConvCutlass 0.713295 0
+tensorBatchNorm 0.397544 0
+tensorRelu 0.139411 0
+tensorConvApprox 1.24168 0
+tensorBatchNorm 0.417975 0
+tensorRelu 0.131993 0
+tensorConvCutlass 0.710762 0
+tensorBatchNorm 0.396162 0
+tensorRelu 0.141993 0
+tensorConvApprox 1.34712 0
+tensorBatchNorm 0.405441 0
+tensorRelu 0.13312 0
+tensorConvCutlass 0.674823 0
+tensorBatchNorm 0.395537 0
+tensorRelu 0.057344 0
+tensorConvApprox 0.806353 0
+tensorBatchNorm 0.744342 0
+tensorRelu 0.088901 0
+tensorConvCutlass 0.417066 0
+tensorBatchNorm 0.735117 0
+tensorRelu 0.10534 0
+tensorConvApprox 1.04822 0
+tensorBatchNorm 0.743019 0
+tensorRelu 0.104582 0
+tensorPooling 0.452869 0
+tensorGemmGPU 0.114989 0
+tensorAdd 0.055767 0
+tensorSoftmax 0.07351 0
+
+Iteration Compute Time   : 54.4515
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.018769
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 7
+tensorConvApprox 1.97893 0
+tensorBatchNorm 0.477949 0
+tensorRelu 0.453688 0
+tensorConvCutlass 1.03878 0
+tensorBatchNorm 0.43087 0
+tensorRelu 0.411306 0
+tensorConvApprox 3.41754 0
+tensorBatchNorm 0.887339 0
+tensorRelu 0.869579 0
+tensorConvCutlass 2.85909 0
+tensorBatchNorm 0.244878 0
+tensorRelu 0.231961 0
+tensorConvApprox 1.94304 0
+tensorBatchNorm 0.473929 0
+tensorRelu 0.447546 0
+tensorConvCutlass 2.17425 0
+tensorBatchNorm 0.42527 0
+tensorRelu 0.415782 0
+tensorConvApprox 2.36657 0
+tensorBatchNorm 0.469628 0
+tensorRelu 0.451354 0
+tensorConvCutlass 2.14711 0
+tensorBatchNorm 0.160772 0
+tensorRelu 0.137116 0
+tensorConvApprox 1.23451 0
+tensorBatchNorm 0.270616 0
+tensorRelu 0.239943 0
+tensorConvCutlass 1.19111 0
+tensorBatchNorm 0.25036 0
+tensorRelu 0.233829 0
+tensorConvApprox 1.55993 0
+tensorBatchNorm 0.269845 0
+tensorRelu 0.250934 0
+tensorConvCutlass 1.03172 0
+tensorBatchNorm 0.228142 0
+tensorRelu 0.107492 0
+tensorConvApprox 0.930437 0
+tensorBatchNorm 0.404727 0
+tensorRelu 0.138149 0
+tensorConvCutlass 0.738596 0
+tensorBatchNorm 0.396348 0
+tensorRelu 0.152118 0
+tensorConvApprox 1.19572 0
+tensorBatchNorm 0.399799 0
+tensorRelu 0.136477 0
+tensorConvCutlass 0.72083 0
+tensorBatchNorm 0.395849 0
+tensorRelu 0.134113 0
+tensorConvApprox 1.20781 0
+tensorBatchNorm 0.402487 0
+tensorRelu 0.137157 0
+tensorConvCutlass 0.721853 0
+tensorBatchNorm 0.395229 0
+tensorRelu 0.137036 0
+tensorConvApprox 1.29612 0
+tensorBatchNorm 0.39542 0
+tensorRelu 0.136881 0
+tensorConvCutlass 0.690593 0
+tensorBatchNorm 0.400451 0
+tensorRelu 0.15126 0
+tensorConvApprox 1.24175 0
+tensorBatchNorm 0.403511 0
+tensorRelu 0.134717 0
+tensorConvCutlass 0.703724 0
+tensorBatchNorm 0.397863 0
+tensorRelu 0.140934 0
+tensorConvApprox 1.22323 0
+tensorBatchNorm 0.415987 0
+tensorRelu 0.231116 0
+tensorConvCutlass 0.67374 0
+tensorBatchNorm 0.395974 0
+tensorRelu 0.056334 0
+tensorConvApprox 0.800636 0
+tensorBatchNorm 0.754627 0
+tensorRelu 0.105969 0
+tensorConvCutlass 0.416602 0
+tensorBatchNorm 0.750834 0
+tensorRelu 0.095854 0
+tensorConvApprox 1.05905 0
+tensorBatchNorm 0.744483 0
+tensorRelu 0.103922 0
+tensorPooling 0.254605 0
+tensorGemmGPU 0.113178 0
+tensorAdd 0.056922 0
+tensorSoftmax 0.071876 0
+
+Iteration Compute Time   : 54.9456
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.016209
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 8
+tensorConvApprox 1.97954 0
+tensorBatchNorm 0.480376 0
+tensorRelu 0.454296 0
+tensorConvCutlass 1.02312 0
+tensorBatchNorm 0.431889 0
+tensorRelu 0.412319 0
+tensorConvApprox 3.45825 0
+tensorBatchNorm 0.891789 0
+tensorRelu 0.867071 0
+tensorConvCutlass 2.54057 0
+tensorBatchNorm 0.243556 0
+tensorRelu 0.232223 0
+tensorConvApprox 1.83753 0
+tensorBatchNorm 0.470948 0
+tensorRelu 0.446066 0
+tensorConvCutlass 2.16032 0
+tensorBatchNorm 0.417745 0
+tensorRelu 0.416572 0
+tensorConvApprox 16.7109 0
+tensorBatchNorm 0.553729 0
+tensorRelu 0.511455 0
+tensorConvCutlass 1.95369 0
+tensorBatchNorm 0.342645 0
+tensorRelu 0.150224 0
+tensorConvApprox 1.27591 0
+tensorBatchNorm 0.276026 0
+tensorRelu 0.24126 0
+tensorConvCutlass 1.23113 0
+tensorBatchNorm 0.337165 0
+tensorRelu 0.244851 0
+tensorConvApprox 1.65138 0
+tensorBatchNorm 0.273883 0
+tensorRelu 0.258691 0
+tensorConvCutlass 0.989746 0
+tensorBatchNorm 0.231671 0
+tensorRelu 0.311367 0
+tensorConvApprox 1.65574 0
+tensorBatchNorm 0.481729 0
+tensorRelu 0.193443 0
+tensorConvCutlass 0.993404 0
+tensorBatchNorm 0.505042 0
+tensorRelu 0.312148 0
+tensorConvApprox 1.84112 0
+tensorBatchNorm 0.471851 0
+tensorRelu 0.188116 0
+tensorConvCutlass 1.04833 0
+tensorBatchNorm 0.457728 0
+tensorRelu 0.184556 0
+tensorConvApprox 2.11401 0
+tensorBatchNorm 0.473132 0
+tensorRelu 0.384353 0
+tensorConvCutlass 0.934064 0
+tensorBatchNorm 0.411389 0
+tensorRelu 0.142564 0
+tensorConvApprox 1.33875 0
+tensorBatchNorm 0.414552 0
+tensorRelu 0.150584 0
+tensorConvCutlass 0.75521 0
+tensorBatchNorm 0.410236 0
+tensorRelu 0.146846 0
+tensorConvApprox 1.39203 0
+tensorBatchNorm 0.412432 0
+tensorRelu 0.147136 0
+tensorConvCutlass 0.771719 0
+tensorBatchNorm 0.409913 0
+tensorRelu 0.150244 0
+tensorConvApprox 1.34712 0
+tensorBatchNorm 0.415759 0
+tensorRelu 0.151791 0
+tensorConvCutlass 0.714632 0
+tensorBatchNorm 0.40728 0
+tensorRelu 0.148979 0
+tensorConvApprox 0.930202 0
+tensorBatchNorm 0.759239 0
+tensorRelu 0.09663 0
+tensorConvCutlass 0.484437 0
+tensorBatchNorm 0.749036 0
+tensorRelu 0.102897 0
+tensorConvApprox 1.24439 0
+tensorBatchNorm 0.760227 0
+tensorRelu 0.096713 0
+tensorPooling 0.302764 0
+tensorGemmGPU 0.133992 0
+tensorAdd 0.061821 0
+tensorSoftmax 0.086043 0
+
+Iteration Compute Time   : 74.5962
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.022802
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+Iteration 9
+tensorConvApprox 2.17586 0
+tensorBatchNorm 0.487215 0
+tensorRelu 0.693381 0
+tensorConvCutlass 0.86818 0
+tensorBatchNorm 0.438246 0
+tensorRelu 0.420747 0
+tensorConvApprox 3.64405 0
+tensorBatchNorm 0.888827 0
+tensorRelu 0.877646 0
+tensorConvCutlass 2.86639 0
+tensorBatchNorm 0.246218 0
+tensorRelu 0.241441 0
+tensorConvApprox 1.98201 0
+tensorBatchNorm 0.486885 0
+tensorRelu 0.455583 0
+tensorConvCutlass 2.25638 0
+tensorBatchNorm 0.429762 0
+tensorRelu 0.425944 0
+tensorConvApprox 2.55321 0
+tensorBatchNorm 0.478803 0
+tensorRelu 0.464266 0
+tensorConvCutlass 2.06851 0
+tensorBatchNorm 0.160336 0
+tensorRelu 0.147377 0
+tensorConvApprox 1.38658 0
+tensorBatchNorm 0.276843 0
+tensorRelu 0.245167 0
+tensorConvCutlass 1.25279 0
+tensorBatchNorm 0.261347 0
+tensorRelu 0.242219 0
+tensorConvApprox 1.72877 0
+tensorBatchNorm 0.286292 0
+tensorRelu 0.260727 0
+tensorConvCutlass 0.974662 0
+tensorBatchNorm 0.236358 0
+tensorRelu 0.102523 0
+tensorConvApprox 1.05661 0
+tensorBatchNorm 0.417304 0
+tensorRelu 0.143981 0
+tensorConvCutlass 0.788655 0
+tensorBatchNorm 0.409153 0
+tensorRelu 0.150299 0
+tensorConvApprox 1.30689 0
+tensorBatchNorm 0.418391 0
+tensorRelu 0.15042 0
+tensorConvCutlass 0.78943 0
+tensorBatchNorm 0.404583 0
+tensorRelu 0.14435 0
+tensorConvApprox 1.35444 0
+tensorBatchNorm 0.413114 0
+tensorRelu 0.149313 0
+tensorConvCutlass 0.766193 0
+tensorBatchNorm 0.40833 0
+tensorRelu 0.147593 0
+tensorConvApprox 1.32582 0
+tensorBatchNorm 0.414352 0
+tensorRelu 0.153469 0
+tensorConvCutlass 0.70409 0
+tensorBatchNorm 0.409507 0
+tensorRelu 0.135821 0
+tensorConvApprox 1.26394 0
+tensorBatchNorm 0.403406 0
+tensorRelu 0.146978 0
+tensorConvCutlass 0.712843 0
+tensorBatchNorm 0.526892 0
+tensorRelu 0.138957 0
+tensorConvApprox 1.23422 0
+tensorBatchNorm 0.418712 0
+tensorRelu 0.146598 0
+tensorConvCutlass 0.688029 0
+tensorBatchNorm 0.416884 0
+tensorRelu 0.077336 0
+tensorConvApprox 0.814616 0
+tensorBatchNorm 0.761053 0
+tensorRelu 0.0994 0
+tensorConvCutlass 0.428408 0
+tensorBatchNorm 0.739751 0
+tensorRelu 0.093004 0
+tensorConvApprox 1.0683 0
+tensorBatchNorm 0.765454 0
+tensorRelu 0.099207 0
+tensorPooling 0.263111 0
+tensorGemmGPU 0.11969 0
+tensorAdd 0.054568 0
+tensorSoftmax 0.074868 0
+
+Iteration Compute Time   : 57.1299
+Iteration Compute Energy : 0
+Iteration Control Time   : 0.019048
+Iteration Control Energy : 0
+Iteration Config Time   : 0
+Iteration Config Energy : 0
+Iteration End Frequency : 0
+
+
+
+
+Total Compute Time  : 577.168
+Total Compute Energy: 0
+
+Total Control Time  : 0.215775
+Total Control Energy: 0
+
+Total Config Time  : 3.36941e-315
+Total Config Energy: 0
+
+Total Time  : 577.384
+Total Energy: 0
-- 
GitLab