diff --git a/hpvm/CMakeLists.txt b/hpvm/CMakeLists.txt index d63675b34275c3f83c10ca83005bbfe563777554..be0376ab58334d4d747fb9d7780d1411f5dfc727 100644 --- a/hpvm/CMakeLists.txt +++ b/hpvm/CMakeLists.txt @@ -12,7 +12,6 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR}) add_subdirectory(lib) add_subdirectory(projects) add_subdirectory(tools) - add_subdirectory(test) # Add a global check rule now that all subdirectories have been traversed diff --git a/hpvm/lib/Transforms/CMakeLists.txt b/hpvm/lib/Transforms/CMakeLists.txt index b18cd4551ba33e0c315a416164b45e6282098aeb..ce1c152ef3ce98ea34088ef9dc76ae8bac42ac66 100644 --- a/hpvm/lib/Transforms/CMakeLists.txt +++ b/hpvm/lib/Transforms/CMakeLists.txt @@ -8,3 +8,6 @@ add_subdirectory(DFG2LLVM_WrapperAPI) add_subdirectory(DFG2LLVM_CUDNN) add_subdirectory(FuseHPVMTensorNodes) add_subdirectory(InPlaceDFG) + +set(TENSOR_RT_PREFIX ${CMAKE_LIBRARY_OUTPUT_DIRECTORY}) +set(TENSOR_RT_LL ${TENSOR_RT_PREFIX}/tensor_runtime.ll) diff --git a/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt new file mode 100644 index 0000000000000000000000000000000000000000..c22fc1985ccd26b99a56cd8374e282b9ebbcca5e --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/alexnet/data/profile_info_0.txt @@ -0,0 +1,333 @@ +Iteration 0 +tensorConvApprox 11.4549 0 +tensorAdd 0.987733 0 +tensorTanh 0.89782 0 +tensorPooling 3.92176 0 +tensorConvApprox 30.4695 0 +tensorAdd 0.756682 0 +tensorTanh 0.659855 0 +tensorPooling 3.87498 0 +tensorConvApprox 12.9027 0 +tensorAdd 0.529304 0 +tensorTanh 0.360859 0 +tensorConvApprox 14.1188 0 +tensorAdd 0.572512 0 +tensorTanh 0.269676 0 +tensorConvApprox 12.5122 0 +tensorAdd 0.37547 0 +tensorTanh 0.260915 0 +tensorPooling 2.68986 0 +tensorGemmGPU 0.215392 0 +tensorAdd 0.120663 0 +tensorSoftmax 0.134412 0 + +Iteration Compute Time : 98.0861 +Iteration Compute Energy : 0 +Iteration Control Time : 0.025326 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 1 +tensorConvApprox 14.367 0 +tensorAdd 1.01869 0 +tensorTanh 0.88308 0 +tensorPooling 3.88747 0 +tensorConvApprox 31.0697 0 +tensorAdd 0.775118 0 +tensorTanh 0.672938 0 +tensorPooling 3.17431 0 +tensorConvApprox 19.3971 0 +tensorAdd 0.528325 0 +tensorTanh 0.361839 0 +tensorConvApprox 14.727 0 +tensorAdd 0.367463 0 +tensorTanh 0.258956 0 +tensorConvApprox 15.0059 0 +tensorAdd 0.361187 0 +tensorTanh 0.258366 0 +tensorPooling 2.70526 0 +tensorGemmGPU 0.18939 0 +tensorAdd 0.068016 0 +tensorSoftmax 0.536969 0 + +Iteration Compute Time : 110.614 +Iteration Compute Energy : 0 +Iteration Control Time : 0.021939 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 2 +tensorConvApprox 10.204 0 +tensorAdd 1.0115 0 +tensorTanh 0.900257 0 +tensorPooling 3.90068 0 +tensorConvApprox 27.5914 0 +tensorAdd 0.822439 0 +tensorTanh 0.849694 0 +tensorPooling 2.97768 0 +tensorConvApprox 17.8808 0 +tensorAdd 0.55192 0 +tensorTanh 0.378285 0 +tensorConvApprox 13.1066 0 +tensorAdd 0.621008 0 +tensorTanh 0.258276 0 +tensorConvApprox 18.631 0 +tensorAdd 0.422421 0 +tensorTanh 0.274397 0 +tensorPooling 2.65083 0 +tensorGemmGPU 0.409805 0 +tensorAdd 0.068461 0 +tensorSoftmax 0.104962 0 + +Iteration Compute Time : 103.616 +Iteration Compute Energy : 0 +Iteration Control Time : 0.152392 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 3 +tensorConvApprox 16.3712 0 +tensorAdd 1.03894 0 +tensorTanh 0.891796 0 +tensorPooling 4.02378 0 +tensorConvApprox 25.7412 0 +tensorAdd 0.820657 0 +tensorTanh 0.702908 0 +tensorPooling 3.12982 0 +tensorConvApprox 17.68 0 +tensorAdd 0.543514 0 +tensorTanh 0.378761 0 +tensorConvApprox 12.42 0 +tensorAdd 0.408593 0 +tensorTanh 0.403677 0 +tensorConvApprox 14.9018 0 +tensorAdd 0.408336 0 +tensorTanh 0.269063 0 +tensorPooling 2.6564 0 +tensorGemmGPU 0.169787 0 +tensorAdd 0.057522 0 +tensorSoftmax 0.083927 0 + +Iteration Compute Time : 103.102 +Iteration Compute Energy : 0 +Iteration Control Time : 0.022148 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 4 +tensorConvApprox 9.93468 0 +tensorAdd 1.02456 0 +tensorTanh 0.899216 0 +tensorPooling 3.89126 0 +tensorConvApprox 25.9977 0 +tensorAdd 1.24468 0 +tensorTanh 0.704036 0 +tensorPooling 3.74854 0 +tensorConvApprox 17.644 0 +tensorAdd 0.55182 0 +tensorTanh 0.37049 0 +tensorConvApprox 12.5094 0 +tensorAdd 0.407149 0 +tensorTanh 0.248609 0 +tensorConvApprox 13.9976 0 +tensorAdd 0.403548 0 +tensorTanh 0.26787 0 +tensorPooling 2.65781 0 +tensorGemmGPU 0.163417 0 +tensorAdd 0.055534 0 +tensorSoftmax 0.079422 0 + +Iteration Compute Time : 96.8013 +Iteration Compute Energy : 0 +Iteration Control Time : 0.021165 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 5 +tensorConvApprox 9.80512 0 +tensorAdd 1.00169 0 +tensorTanh 0.921429 0 +tensorPooling 3.91348 0 +tensorConvApprox 25.9371 0 +tensorAdd 0.815127 0 +tensorTanh 0.673571 0 +tensorPooling 7.36816 0 +tensorConvApprox 16.8525 0 +tensorAdd 0.525325 0 +tensorTanh 0.375484 0 +tensorConvApprox 13.4568 0 +tensorAdd 0.413802 0 +tensorTanh 0.250907 0 +tensorConvApprox 14.0005 0 +tensorAdd 0.408961 0 +tensorTanh 0.248077 0 +tensorPooling 2.66095 0 +tensorGemmGPU 0.165503 0 +tensorAdd 0.056493 0 +tensorSoftmax 0.077052 0 + +Iteration Compute Time : 99.9281 +Iteration Compute Energy : 0 +Iteration Control Time : 0.021977 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 6 +tensorConvApprox 9.80471 0 +tensorAdd 0.999942 0 +tensorTanh 0.915084 0 +tensorPooling 3.91451 0 +tensorConvApprox 25.834 0 +tensorAdd 1.14345 0 +tensorTanh 0.74676 0 +tensorPooling 3.82146 0 +tensorConvApprox 17.6392 0 +tensorAdd 0.556454 0 +tensorTanh 0.370676 0 +tensorConvApprox 12.4922 0 +tensorAdd 0.404391 0 +tensorTanh 0.250653 0 +tensorConvApprox 14.002 0 +tensorAdd 0.403829 0 +tensorTanh 0.25152 0 +tensorPooling 2.66016 0 +tensorGemmGPU 0.161451 0 +tensorAdd 0.059232 0 +tensorSoftmax 0.076951 0 + +Iteration Compute Time : 96.5086 +Iteration Compute Energy : 0 +Iteration Control Time : 0.021092 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 7 +tensorConvApprox 9.79061 0 +tensorAdd 1.00411 0 +tensorTanh 0.923385 0 +tensorPooling 3.89759 0 +tensorConvApprox 25.887 0 +tensorAdd 0.821996 0 +tensorTanh 0.685319 0 +tensorPooling 3.12557 0 +tensorConvApprox 17.6648 0 +tensorAdd 0.555787 0 +tensorTanh 0.385425 0 +tensorConvApprox 12.5025 0 +tensorAdd 0.404708 0 +tensorTanh 0.25099 0 +tensorConvApprox 13.9948 0 +tensorAdd 0.403679 0 +tensorTanh 0.252795 0 +tensorPooling 2.65976 0 +tensorGemmGPU 0.154435 0 +tensorAdd 0.058066 0 +tensorSoftmax 0.102014 0 + +Iteration Compute Time : 95.5253 +Iteration Compute Energy : 0 +Iteration Control Time : 0.03922 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 8 +tensorConvApprox 10.3697 0 +tensorAdd 1.02741 0 +tensorTanh 0.912612 0 +tensorPooling 3.87946 0 +tensorConvApprox 26.2823 0 +tensorAdd 0.816944 0 +tensorTanh 0.686638 0 +tensorPooling 3.12319 0 +tensorConvApprox 17.918 0 +tensorAdd 0.530344 0 +tensorTanh 0.362426 0 +tensorConvApprox 12.8083 0 +tensorAdd 0.360747 0 +tensorTanh 0.257041 0 +tensorConvApprox 14.2165 0 +tensorAdd 0.528494 0 +tensorTanh 0.257368 0 +tensorPooling 2.53903 0 +tensorGemmGPU 0.170898 0 +tensorAdd 0.100506 0 +tensorSoftmax 0.119107 0 + +Iteration Compute Time : 97.2669 +Iteration Compute Energy : 0 +Iteration Control Time : 0.041931 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 9 +tensorConvApprox 10.2616 0 +tensorAdd 0.98462 0 +tensorTanh 0.895782 0 +tensorPooling 3.9356 0 +tensorConvApprox 26.2674 0 +tensorAdd 0.786113 0 +tensorTanh 0.676243 0 +tensorPooling 3.163 0 +tensorConvApprox 23.1817 0 +tensorAdd 0.847734 0 +tensorTanh 0.367746 0 +tensorConvApprox 12.9463 0 +tensorAdd 0.371015 0 +tensorTanh 0.263012 0 +tensorConvApprox 15.1519 0 +tensorAdd 0.428313 0 +tensorTanh 0.28493 0 +tensorPooling 2.61769 0 +tensorGemmGPU 0.167254 0 +tensorAdd 0.056753 0 +tensorSoftmax 0.082013 0 + +Iteration Compute Time : 103.737 +Iteration Compute Energy : 0 +Iteration Control Time : 0.022577 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + + + +Total Compute Time : 1005.19 +Total Compute Energy: 6.91275e-310 + +Total Control Time : 0.389767 +Total Control Energy: 3.51633e-315 + +Total Config Time : 3.51633e-315 +Total Config Energy: 3.51633e-315 + +Total Time : 1005.57 +Total Energy: 0 diff --git a/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/profile_info_0.txt b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/profile_info_0.txt new file mode 100644 index 0000000000000000000000000000000000000000..27063f1379635ca1a4f27eac5ffe6fa173c9c48d --- /dev/null +++ b/hpvm/test/dnn_benchmarks/benchmarks/mobilenet/data/profile_info_0.txt @@ -0,0 +1,973 @@ +Iteration 0 +tensorConvApprox 2.23701 0 +tensorBatchNorm 0.529425 0 +tensorRelu 0.479808 0 +tensorConvCutlass 0.957134 0 +tensorBatchNorm 0.429569 0 +tensorRelu 0.498568 0 +tensorConvApprox 3.54345 0 +tensorBatchNorm 0.893891 0 +tensorRelu 0.887381 0 +tensorConvCutlass 2.66411 0 +tensorBatchNorm 0.251158 0 +tensorRelu 0.234899 0 +tensorConvApprox 1.85116 0 +tensorBatchNorm 0.618027 0 +tensorRelu 0.426936 0 +tensorConvCutlass 2.1397 0 +tensorBatchNorm 0.453199 0 +tensorRelu 0.434558 0 +tensorConvApprox 2.51921 0 +tensorBatchNorm 0.493453 0 +tensorRelu 0.454728 0 +tensorConvCutlass 2.09737 0 +tensorBatchNorm 0.165386 0 +tensorRelu 0.14034 0 +tensorConvApprox 1.24949 0 +tensorBatchNorm 0.271424 0 +tensorRelu 0.246688 0 +tensorConvCutlass 1.26528 0 +tensorBatchNorm 0.249295 0 +tensorRelu 0.233471 0 +tensorConvApprox 1.61344 0 +tensorBatchNorm 0.281014 0 +tensorRelu 0.272026 0 +tensorConvCutlass 0.89206 0 +tensorBatchNorm 0.241964 0 +tensorRelu 0.093642 0 +tensorConvApprox 0.95434 0 +tensorBatchNorm 0.425067 0 +tensorRelu 0.133866 0 +tensorConvCutlass 0.78137 0 +tensorBatchNorm 0.397914 0 +tensorRelu 0.138306 0 +tensorConvApprox 1.24994 0 +tensorBatchNorm 0.428037 0 +tensorRelu 0.132677 0 +tensorConvCutlass 0.715246 0 +tensorBatchNorm 0.483229 0 +tensorRelu 0.146812 0 +tensorConvApprox 1.24338 0 +tensorBatchNorm 0.43213 0 +tensorRelu 0.134047 0 +tensorConvCutlass 0.7264 0 +tensorBatchNorm 0.424954 0 +tensorRelu 0.139791 0 +tensorConvApprox 1.23298 0 +tensorBatchNorm 0.416953 0 +tensorRelu 0.134486 0 +tensorConvCutlass 0.679017 0 +tensorBatchNorm 0.409803 0 +tensorRelu 0.132933 0 +tensorConvApprox 1.25983 0 +tensorBatchNorm 0.431743 0 +tensorRelu 0.132827 0 +tensorConvCutlass 0.712734 0 +tensorBatchNorm 0.393237 0 +tensorRelu 0.156427 0 +tensorConvApprox 1.23481 0 +tensorBatchNorm 0.413014 0 +tensorRelu 0.132817 0 +tensorConvCutlass 0.700018 0 +tensorBatchNorm 0.41414 0 +tensorRelu 0.077319 0 +tensorConvApprox 0.851131 0 +tensorBatchNorm 0.764938 0 +tensorRelu 0.088765 0 +tensorConvCutlass 0.420801 0 +tensorBatchNorm 0.740765 0 +tensorRelu 0.09237 0 +tensorConvApprox 1.13472 0 +tensorBatchNorm 0.756702 0 +tensorRelu 0.087387 0 +tensorPooling 0.28436 0 +tensorGemmGPU 0.118249 0 +tensorAdd 0.070506 0 +tensorSoftmax 0.086034 0 + +Iteration Compute Time : 55.9596 +Iteration Compute Energy : 0 +Iteration Control Time : 0.035796 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 1 +tensorConvApprox 1.97561 0 +tensorBatchNorm 0.473865 0 +tensorRelu 0.453925 0 +tensorConvCutlass 1.04306 0 +tensorBatchNorm 0.436546 0 +tensorRelu 0.411711 0 +tensorConvApprox 3.43348 0 +tensorBatchNorm 0.895659 0 +tensorRelu 0.873215 0 +tensorConvCutlass 1.98017 0 +tensorBatchNorm 0.245242 0 +tensorRelu 0.233063 0 +tensorConvApprox 1.84183 0 +tensorBatchNorm 0.478471 0 +tensorRelu 0.443878 0 +tensorConvCutlass 2.15217 0 +tensorBatchNorm 0.422718 0 +tensorRelu 0.414853 0 +tensorConvApprox 2.35653 0 +tensorBatchNorm 0.477226 0 +tensorRelu 0.471601 0 +tensorConvCutlass 2.11536 0 +tensorBatchNorm 0.160139 0 +tensorRelu 0.138821 0 +tensorConvApprox 1.25131 0 +tensorBatchNorm 0.268332 0 +tensorRelu 0.23243 0 +tensorConvCutlass 1.23673 0 +tensorBatchNorm 0.248686 0 +tensorRelu 0.232362 0 +tensorConvApprox 1.6491 0 +tensorBatchNorm 0.255622 0 +tensorRelu 0.250747 0 +tensorConvCutlass 1.00023 0 +tensorBatchNorm 0.234396 0 +tensorRelu 0.092353 0 +tensorConvApprox 0.956705 0 +tensorBatchNorm 0.401337 0 +tensorRelu 0.132566 0 +tensorConvCutlass 0.727656 0 +tensorBatchNorm 0.39494 0 +tensorRelu 0.135475 0 +tensorConvApprox 1.21273 0 +tensorBatchNorm 0.399682 0 +tensorRelu 0.134199 0 +tensorConvCutlass 0.707999 0 +tensorBatchNorm 0.400456 0 +tensorRelu 0.131647 0 +tensorConvApprox 1.20442 0 +tensorBatchNorm 0.408527 0 +tensorRelu 0.133741 0 +tensorConvCutlass 0.710463 0 +tensorBatchNorm 0.400938 0 +tensorRelu 0.147735 0 +tensorConvApprox 1.25009 0 +tensorBatchNorm 0.439884 0 +tensorRelu 0.141364 0 +tensorConvCutlass 0.722939 0 +tensorBatchNorm 0.39408 0 +tensorRelu 0.138576 0 +tensorConvApprox 1.27766 0 +tensorBatchNorm 0.400103 0 +tensorRelu 0.133306 0 +tensorConvCutlass 0.701348 0 +tensorBatchNorm 0.531719 0 +tensorRelu 0.13418 0 +tensorConvApprox 1.20153 0 +tensorBatchNorm 0.406742 0 +tensorRelu 0.132218 0 +tensorConvCutlass 0.680362 0 +tensorBatchNorm 0.396494 0 +tensorRelu 0.06394 0 +tensorConvApprox 0.819766 0 +tensorBatchNorm 0.741952 0 +tensorRelu 0.085646 0 +tensorConvCutlass 0.428946 0 +tensorBatchNorm 0.738589 0 +tensorRelu 0.092674 0 +tensorConvApprox 1.06239 0 +tensorBatchNorm 0.748871 0 +tensorRelu 0.084497 0 +tensorPooling 0.255138 0 +tensorGemmGPU 0.115002 0 +tensorAdd 0.05473 0 +tensorSoftmax 0.072544 0 + +Iteration Compute Time : 54.0699 +Iteration Compute Energy : 0 +Iteration Control Time : 0.019171 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 2 +tensorConvApprox 1.9938 0 +tensorBatchNorm 0.483517 0 +tensorRelu 0.456522 0 +tensorConvCutlass 1.0357 0 +tensorBatchNorm 0.429524 0 +tensorRelu 0.412926 0 +tensorConvApprox 3.42249 0 +tensorBatchNorm 0.893502 0 +tensorRelu 0.865981 0 +tensorConvCutlass 2.60753 0 +tensorBatchNorm 0.245639 0 +tensorRelu 0.23249 0 +tensorConvApprox 1.8282 0 +tensorBatchNorm 0.473188 0 +tensorRelu 0.44711 0 +tensorConvCutlass 2.2097 0 +tensorBatchNorm 0.422328 0 +tensorRelu 0.41563 0 +tensorConvApprox 2.35971 0 +tensorBatchNorm 0.473869 0 +tensorRelu 0.454988 0 +tensorConvCutlass 2.14168 0 +tensorBatchNorm 0.16224 0 +tensorRelu 0.138312 0 +tensorConvApprox 1.2518 0 +tensorBatchNorm 0.268174 0 +tensorRelu 0.236937 0 +tensorConvCutlass 1.33004 0 +tensorBatchNorm 0.314835 0 +tensorRelu 0.264317 0 +tensorConvApprox 1.58082 0 +tensorBatchNorm 0.274042 0 +tensorRelu 0.249993 0 +tensorConvCutlass 1.03923 0 +tensorBatchNorm 0.22875 0 +tensorRelu 0.090939 0 +tensorConvApprox 0.936895 0 +tensorBatchNorm 0.402702 0 +tensorRelu 0.132519 0 +tensorConvCutlass 0.727177 0 +tensorBatchNorm 0.395688 0 +tensorRelu 0.135304 0 +tensorConvApprox 1.22042 0 +tensorBatchNorm 0.401043 0 +tensorRelu 0.133329 0 +tensorConvCutlass 0.734079 0 +tensorBatchNorm 0.400517 0 +tensorRelu 0.131085 0 +tensorConvApprox 1.20347 0 +tensorBatchNorm 0.408406 0 +tensorRelu 0.13312 0 +tensorConvCutlass 0.71914 0 +tensorBatchNorm 0.400224 0 +tensorRelu 0.131462 0 +tensorConvApprox 1.1849 0 +tensorBatchNorm 0.40678 0 +tensorRelu 0.133044 0 +tensorConvCutlass 0.700435 0 +tensorBatchNorm 0.400726 0 +tensorRelu 0.131443 0 +tensorConvApprox 1.2809 0 +tensorBatchNorm 0.408323 0 +tensorRelu 0.130881 0 +tensorConvCutlass 0.713094 0 +tensorBatchNorm 0.400763 0 +tensorRelu 0.134344 0 +tensorConvApprox 1.32531 0 +tensorBatchNorm 0.408491 0 +tensorRelu 0.135343 0 +tensorConvCutlass 0.683423 0 +tensorBatchNorm 0.383472 0 +tensorRelu 0.055856 0 +tensorConvApprox 0.802831 0 +tensorBatchNorm 0.750119 0 +tensorRelu 0.086386 0 +tensorConvCutlass 0.505849 0 +tensorBatchNorm 0.757398 0 +tensorRelu 0.104829 0 +tensorConvApprox 1.0665 0 +tensorBatchNorm 0.758638 0 +tensorRelu 0.108774 0 +tensorPooling 0.256465 0 +tensorGemmGPU 0.107896 0 +tensorAdd 0.053549 0 +tensorSoftmax 0.073444 0 + +Iteration Compute Time : 54.9332 +Iteration Compute Energy : 0 +Iteration Control Time : 0.019819 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 3 +tensorConvApprox 1.96858 0 +tensorBatchNorm 0.481127 0 +tensorRelu 0.453004 0 +tensorConvCutlass 1.06524 0 +tensorBatchNorm 0.438826 0 +tensorRelu 0.413673 0 +tensorConvApprox 3.45964 0 +tensorBatchNorm 1.13057 0 +tensorRelu 0.876152 0 +tensorConvCutlass 3.06743 0 +tensorBatchNorm 0.241919 0 +tensorRelu 0.234321 0 +tensorConvApprox 1.82083 0 +tensorBatchNorm 0.473934 0 +tensorRelu 0.444206 0 +tensorConvCutlass 2.2154 0 +tensorBatchNorm 0.425108 0 +tensorRelu 0.413497 0 +tensorConvApprox 3.79539 0 +tensorBatchNorm 0.485401 0 +tensorRelu 0.456188 0 +tensorConvCutlass 1.67366 0 +tensorBatchNorm 0.159367 0 +tensorRelu 0.138328 0 +tensorConvApprox 1.25087 0 +tensorBatchNorm 0.280366 0 +tensorRelu 0.238291 0 +tensorConvCutlass 1.21768 0 +tensorBatchNorm 0.25647 0 +tensorRelu 0.232797 0 +tensorConvApprox 1.583 0 +tensorBatchNorm 0.274884 0 +tensorRelu 0.250904 0 +tensorConvCutlass 1.01374 0 +tensorBatchNorm 0.232925 0 +tensorRelu 0.092624 0 +tensorConvApprox 0.929718 0 +tensorBatchNorm 0.406899 0 +tensorRelu 0.13354 0 +tensorConvCutlass 0.715119 0 +tensorBatchNorm 0.401388 0 +tensorRelu 0.132678 0 +tensorConvApprox 1.21308 0 +tensorBatchNorm 0.411271 0 +tensorRelu 0.136159 0 +tensorConvCutlass 0.728206 0 +tensorBatchNorm 0.40348 0 +tensorRelu 0.131293 0 +tensorConvApprox 1.2437 0 +tensorBatchNorm 0.415867 0 +tensorRelu 0.13346 0 +tensorConvCutlass 0.728818 0 +tensorBatchNorm 0.404325 0 +tensorRelu 0.133644 0 +tensorConvApprox 1.18933 0 +tensorBatchNorm 0.408998 0 +tensorRelu 0.135374 0 +tensorConvCutlass 0.758913 0 +tensorBatchNorm 0.401487 0 +tensorRelu 0.133675 0 +tensorConvApprox 1.22111 0 +tensorBatchNorm 0.408526 0 +tensorRelu 0.133156 0 +tensorConvCutlass 0.713234 0 +tensorBatchNorm 0.404604 0 +tensorRelu 0.135062 0 +tensorConvApprox 1.19223 0 +tensorBatchNorm 0.422434 0 +tensorRelu 0.151752 0 +tensorConvCutlass 0.674382 0 +tensorBatchNorm 0.399276 0 +tensorRelu 0.055825 0 +tensorConvApprox 0.813887 0 +tensorBatchNorm 0.749868 0 +tensorRelu 0.086545 0 +tensorConvCutlass 0.43128 0 +tensorBatchNorm 0.741291 0 +tensorRelu 0.091888 0 +tensorConvApprox 1.10082 0 +tensorBatchNorm 0.742328 0 +tensorRelu 0.086367 0 +tensorPooling 0.260055 0 +tensorGemmGPU 0.118566 0 +tensorAdd 0.054399 0 +tensorSoftmax 0.081731 0 + +Iteration Compute Time : 56.3574 +Iteration Compute Energy : 0 +Iteration Control Time : 0.019227 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 4 +tensorConvApprox 2.75777 0 +tensorBatchNorm 0.491652 0 +tensorRelu 0.466859 0 +tensorConvCutlass 1.04383 0 +tensorBatchNorm 0.435906 0 +tensorRelu 0.422722 0 +tensorConvApprox 3.65792 0 +tensorBatchNorm 0.893743 0 +tensorRelu 0.87971 0 +tensorConvCutlass 2.67706 0 +tensorBatchNorm 0.246657 0 +tensorRelu 0.239448 0 +tensorConvApprox 2.00886 0 +tensorBatchNorm 0.484758 0 +tensorRelu 0.461588 0 +tensorConvCutlass 2.19886 0 +tensorBatchNorm 0.430631 0 +tensorRelu 0.42929 0 +tensorConvApprox 2.58732 0 +tensorBatchNorm 0.483099 0 +tensorRelu 0.462454 0 +tensorConvCutlass 2.11366 0 +tensorBatchNorm 0.16427 0 +tensorRelu 0.151241 0 +tensorConvApprox 1.39126 0 +tensorBatchNorm 0.276029 0 +tensorRelu 0.244813 0 +tensorConvCutlass 1.29254 0 +tensorBatchNorm 0.263643 0 +tensorRelu 0.241842 0 +tensorConvApprox 1.74431 0 +tensorBatchNorm 0.277158 0 +tensorRelu 0.264752 0 +tensorConvCutlass 0.97804 0 +tensorBatchNorm 0.236649 0 +tensorRelu 0.100587 0 +tensorConvApprox 1.07438 0 +tensorBatchNorm 0.417747 0 +tensorRelu 0.145716 0 +tensorConvCutlass 0.78966 0 +tensorBatchNorm 0.408245 0 +tensorRelu 0.146282 0 +tensorConvApprox 1.59601 0 +tensorBatchNorm 0.417057 0 +tensorRelu 0.148021 0 +tensorConvCutlass 0.76697 0 +tensorBatchNorm 0.410469 0 +tensorRelu 0.143291 0 +tensorConvApprox 1.37314 0 +tensorBatchNorm 0.420813 0 +tensorRelu 0.149074 0 +tensorConvCutlass 0.785019 0 +tensorBatchNorm 0.406105 0 +tensorRelu 0.144829 0 +tensorConvApprox 1.32397 0 +tensorBatchNorm 0.41983 0 +tensorRelu 0.147431 0 +tensorConvCutlass 0.741959 0 +tensorBatchNorm 0.411628 0 +tensorRelu 0.149026 0 +tensorConvApprox 1.35713 0 +tensorBatchNorm 0.417514 0 +tensorRelu 0.142716 0 +tensorConvCutlass 0.78773 0 +tensorBatchNorm 0.40768 0 +tensorRelu 0.149051 0 +tensorConvApprox 1.32946 0 +tensorBatchNorm 0.415265 0 +tensorRelu 0.144227 0 +tensorConvCutlass 0.72498 0 +tensorBatchNorm 0.403585 0 +tensorRelu 0.067153 0 +tensorConvApprox 0.931648 0 +tensorBatchNorm 0.755898 0 +tensorRelu 0.096675 0 +tensorConvCutlass 0.47856 0 +tensorBatchNorm 0.754063 0 +tensorRelu 0.270367 0 +tensorConvApprox 1.19719 0 +tensorBatchNorm 0.760267 0 +tensorRelu 0.09892 0 +tensorPooling 0.305295 0 +tensorGemmGPU 0.148363 0 +tensorAdd 0.07344 0 +tensorSoftmax 0.09613 0 + +Iteration Compute Time : 58.7509 +Iteration Compute Energy : 0 +Iteration Control Time : 0.026229 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 5 +tensorConvApprox 2.1726 0 +tensorBatchNorm 0.488988 0 +tensorRelu 0.465871 0 +tensorConvCutlass 1.04703 0 +tensorBatchNorm 0.440283 0 +tensorRelu 0.429357 0 +tensorConvApprox 3.66603 0 +tensorBatchNorm 0.893586 0 +tensorRelu 0.877117 0 +tensorConvCutlass 2.13282 0 +tensorBatchNorm 0.249723 0 +tensorRelu 0.241791 0 +tensorConvApprox 1.99587 0 +tensorBatchNorm 0.484073 0 +tensorRelu 0.454081 0 +tensorConvCutlass 2.22364 0 +tensorBatchNorm 0.646739 0 +tensorRelu 0.433169 0 +tensorConvApprox 2.64464 0 +tensorBatchNorm 0.47868 0 +tensorRelu 0.468067 0 +tensorConvCutlass 2.16956 0 +tensorBatchNorm 0.167527 0 +tensorRelu 0.149678 0 +tensorConvApprox 1.38195 0 +tensorBatchNorm 0.28112 0 +tensorRelu 0.24312 0 +tensorConvCutlass 1.26873 0 +tensorBatchNorm 0.254428 0 +tensorRelu 0.233827 0 +tensorConvApprox 1.58365 0 +tensorBatchNorm 0.278968 0 +tensorRelu 0.25334 0 +tensorConvCutlass 1.0156 0 +tensorBatchNorm 0.244076 0 +tensorRelu 0.093283 0 +tensorConvApprox 0.965024 0 +tensorBatchNorm 0.404682 0 +tensorRelu 0.133879 0 +tensorConvCutlass 0.747094 0 +tensorBatchNorm 0.402763 0 +tensorRelu 0.142304 0 +tensorConvApprox 1.26048 0 +tensorBatchNorm 0.406361 0 +tensorRelu 0.133341 0 +tensorConvCutlass 0.729584 0 +tensorBatchNorm 0.396997 0 +tensorRelu 0.136837 0 +tensorConvApprox 1.21978 0 +tensorBatchNorm 0.411099 0 +tensorRelu 0.133601 0 +tensorConvCutlass 0.726998 0 +tensorBatchNorm 0.400946 0 +tensorRelu 0.132781 0 +tensorConvApprox 1.20036 0 +tensorBatchNorm 0.409799 0 +tensorRelu 0.136065 0 +tensorConvCutlass 0.696289 0 +tensorBatchNorm 0.406103 0 +tensorRelu 0.132264 0 +tensorConvApprox 1.26211 0 +tensorBatchNorm 0.418643 0 +tensorRelu 0.13414 0 +tensorConvCutlass 0.738303 0 +tensorBatchNorm 0.401346 0 +tensorRelu 0.169646 0 +tensorConvApprox 1.22581 0 +tensorBatchNorm 0.409902 0 +tensorRelu 0.133014 0 +tensorConvCutlass 0.770886 0 +tensorBatchNorm 0.40109 0 +tensorRelu 0.057301 0 +tensorConvApprox 0.820137 0 +tensorBatchNorm 0.751111 0 +tensorRelu 0.086029 0 +tensorConvCutlass 0.428312 0 +tensorBatchNorm 0.740022 0 +tensorRelu 0.092405 0 +tensorConvApprox 1.14936 0 +tensorBatchNorm 0.762909 0 +tensorRelu 0.091007 0 +tensorPooling 0.264327 0 +tensorGemmGPU 0.117489 0 +tensorAdd 0.055461 0 +tensorSoftmax 0.074949 0 + +Iteration Compute Time : 55.9742 +Iteration Compute Energy : 0 +Iteration Control Time : 0.018705 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 6 +tensorConvApprox 1.96907 0 +tensorBatchNorm 0.48141 0 +tensorRelu 0.454234 0 +tensorConvCutlass 1.04364 0 +tensorBatchNorm 0.430053 0 +tensorRelu 0.418357 0 +tensorConvApprox 3.41283 0 +tensorBatchNorm 0.888804 0 +tensorRelu 0.869239 0 +tensorConvCutlass 2.00479 0 +tensorBatchNorm 0.246474 0 +tensorRelu 0.234112 0 +tensorConvApprox 1.8452 0 +tensorBatchNorm 0.472051 0 +tensorRelu 0.445524 0 +tensorConvCutlass 2.24807 0 +tensorBatchNorm 0.429071 0 +tensorRelu 0.413047 0 +tensorConvApprox 2.35288 0 +tensorBatchNorm 0.482299 0 +tensorRelu 0.453052 0 +tensorConvCutlass 2.15299 0 +tensorBatchNorm 0.168091 0 +tensorRelu 0.140115 0 +tensorConvApprox 1.22523 0 +tensorBatchNorm 0.271412 0 +tensorRelu 0.23909 0 +tensorConvCutlass 1.19494 0 +tensorBatchNorm 0.249981 0 +tensorRelu 0.231756 0 +tensorConvApprox 1.5918 0 +tensorBatchNorm 0.270204 0 +tensorRelu 0.250939 0 +tensorConvCutlass 1.01044 0 +tensorBatchNorm 0.229694 0 +tensorRelu 0.091054 0 +tensorConvApprox 0.931967 0 +tensorBatchNorm 0.416154 0 +tensorRelu 0.136451 0 +tensorConvCutlass 0.746695 0 +tensorBatchNorm 0.393873 0 +tensorRelu 0.151156 0 +tensorConvApprox 1.18598 0 +tensorBatchNorm 0.414867 0 +tensorRelu 0.136287 0 +tensorConvCutlass 0.711735 0 +tensorBatchNorm 0.394707 0 +tensorRelu 0.158015 0 +tensorConvApprox 1.24798 0 +tensorBatchNorm 0.402511 0 +tensorRelu 0.131976 0 +tensorConvCutlass 0.89021 0 +tensorBatchNorm 0.3973 0 +tensorRelu 0.140986 0 +tensorConvApprox 1.2066 0 +tensorBatchNorm 0.416513 0 +tensorRelu 0.133314 0 +tensorConvCutlass 0.713295 0 +tensorBatchNorm 0.397544 0 +tensorRelu 0.139411 0 +tensorConvApprox 1.24168 0 +tensorBatchNorm 0.417975 0 +tensorRelu 0.131993 0 +tensorConvCutlass 0.710762 0 +tensorBatchNorm 0.396162 0 +tensorRelu 0.141993 0 +tensorConvApprox 1.34712 0 +tensorBatchNorm 0.405441 0 +tensorRelu 0.13312 0 +tensorConvCutlass 0.674823 0 +tensorBatchNorm 0.395537 0 +tensorRelu 0.057344 0 +tensorConvApprox 0.806353 0 +tensorBatchNorm 0.744342 0 +tensorRelu 0.088901 0 +tensorConvCutlass 0.417066 0 +tensorBatchNorm 0.735117 0 +tensorRelu 0.10534 0 +tensorConvApprox 1.04822 0 +tensorBatchNorm 0.743019 0 +tensorRelu 0.104582 0 +tensorPooling 0.452869 0 +tensorGemmGPU 0.114989 0 +tensorAdd 0.055767 0 +tensorSoftmax 0.07351 0 + +Iteration Compute Time : 54.4515 +Iteration Compute Energy : 0 +Iteration Control Time : 0.018769 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 7 +tensorConvApprox 1.97893 0 +tensorBatchNorm 0.477949 0 +tensorRelu 0.453688 0 +tensorConvCutlass 1.03878 0 +tensorBatchNorm 0.43087 0 +tensorRelu 0.411306 0 +tensorConvApprox 3.41754 0 +tensorBatchNorm 0.887339 0 +tensorRelu 0.869579 0 +tensorConvCutlass 2.85909 0 +tensorBatchNorm 0.244878 0 +tensorRelu 0.231961 0 +tensorConvApprox 1.94304 0 +tensorBatchNorm 0.473929 0 +tensorRelu 0.447546 0 +tensorConvCutlass 2.17425 0 +tensorBatchNorm 0.42527 0 +tensorRelu 0.415782 0 +tensorConvApprox 2.36657 0 +tensorBatchNorm 0.469628 0 +tensorRelu 0.451354 0 +tensorConvCutlass 2.14711 0 +tensorBatchNorm 0.160772 0 +tensorRelu 0.137116 0 +tensorConvApprox 1.23451 0 +tensorBatchNorm 0.270616 0 +tensorRelu 0.239943 0 +tensorConvCutlass 1.19111 0 +tensorBatchNorm 0.25036 0 +tensorRelu 0.233829 0 +tensorConvApprox 1.55993 0 +tensorBatchNorm 0.269845 0 +tensorRelu 0.250934 0 +tensorConvCutlass 1.03172 0 +tensorBatchNorm 0.228142 0 +tensorRelu 0.107492 0 +tensorConvApprox 0.930437 0 +tensorBatchNorm 0.404727 0 +tensorRelu 0.138149 0 +tensorConvCutlass 0.738596 0 +tensorBatchNorm 0.396348 0 +tensorRelu 0.152118 0 +tensorConvApprox 1.19572 0 +tensorBatchNorm 0.399799 0 +tensorRelu 0.136477 0 +tensorConvCutlass 0.72083 0 +tensorBatchNorm 0.395849 0 +tensorRelu 0.134113 0 +tensorConvApprox 1.20781 0 +tensorBatchNorm 0.402487 0 +tensorRelu 0.137157 0 +tensorConvCutlass 0.721853 0 +tensorBatchNorm 0.395229 0 +tensorRelu 0.137036 0 +tensorConvApprox 1.29612 0 +tensorBatchNorm 0.39542 0 +tensorRelu 0.136881 0 +tensorConvCutlass 0.690593 0 +tensorBatchNorm 0.400451 0 +tensorRelu 0.15126 0 +tensorConvApprox 1.24175 0 +tensorBatchNorm 0.403511 0 +tensorRelu 0.134717 0 +tensorConvCutlass 0.703724 0 +tensorBatchNorm 0.397863 0 +tensorRelu 0.140934 0 +tensorConvApprox 1.22323 0 +tensorBatchNorm 0.415987 0 +tensorRelu 0.231116 0 +tensorConvCutlass 0.67374 0 +tensorBatchNorm 0.395974 0 +tensorRelu 0.056334 0 +tensorConvApprox 0.800636 0 +tensorBatchNorm 0.754627 0 +tensorRelu 0.105969 0 +tensorConvCutlass 0.416602 0 +tensorBatchNorm 0.750834 0 +tensorRelu 0.095854 0 +tensorConvApprox 1.05905 0 +tensorBatchNorm 0.744483 0 +tensorRelu 0.103922 0 +tensorPooling 0.254605 0 +tensorGemmGPU 0.113178 0 +tensorAdd 0.056922 0 +tensorSoftmax 0.071876 0 + +Iteration Compute Time : 54.9456 +Iteration Compute Energy : 0 +Iteration Control Time : 0.016209 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 8 +tensorConvApprox 1.97954 0 +tensorBatchNorm 0.480376 0 +tensorRelu 0.454296 0 +tensorConvCutlass 1.02312 0 +tensorBatchNorm 0.431889 0 +tensorRelu 0.412319 0 +tensorConvApprox 3.45825 0 +tensorBatchNorm 0.891789 0 +tensorRelu 0.867071 0 +tensorConvCutlass 2.54057 0 +tensorBatchNorm 0.243556 0 +tensorRelu 0.232223 0 +tensorConvApprox 1.83753 0 +tensorBatchNorm 0.470948 0 +tensorRelu 0.446066 0 +tensorConvCutlass 2.16032 0 +tensorBatchNorm 0.417745 0 +tensorRelu 0.416572 0 +tensorConvApprox 16.7109 0 +tensorBatchNorm 0.553729 0 +tensorRelu 0.511455 0 +tensorConvCutlass 1.95369 0 +tensorBatchNorm 0.342645 0 +tensorRelu 0.150224 0 +tensorConvApprox 1.27591 0 +tensorBatchNorm 0.276026 0 +tensorRelu 0.24126 0 +tensorConvCutlass 1.23113 0 +tensorBatchNorm 0.337165 0 +tensorRelu 0.244851 0 +tensorConvApprox 1.65138 0 +tensorBatchNorm 0.273883 0 +tensorRelu 0.258691 0 +tensorConvCutlass 0.989746 0 +tensorBatchNorm 0.231671 0 +tensorRelu 0.311367 0 +tensorConvApprox 1.65574 0 +tensorBatchNorm 0.481729 0 +tensorRelu 0.193443 0 +tensorConvCutlass 0.993404 0 +tensorBatchNorm 0.505042 0 +tensorRelu 0.312148 0 +tensorConvApprox 1.84112 0 +tensorBatchNorm 0.471851 0 +tensorRelu 0.188116 0 +tensorConvCutlass 1.04833 0 +tensorBatchNorm 0.457728 0 +tensorRelu 0.184556 0 +tensorConvApprox 2.11401 0 +tensorBatchNorm 0.473132 0 +tensorRelu 0.384353 0 +tensorConvCutlass 0.934064 0 +tensorBatchNorm 0.411389 0 +tensorRelu 0.142564 0 +tensorConvApprox 1.33875 0 +tensorBatchNorm 0.414552 0 +tensorRelu 0.150584 0 +tensorConvCutlass 0.75521 0 +tensorBatchNorm 0.410236 0 +tensorRelu 0.146846 0 +tensorConvApprox 1.39203 0 +tensorBatchNorm 0.412432 0 +tensorRelu 0.147136 0 +tensorConvCutlass 0.771719 0 +tensorBatchNorm 0.409913 0 +tensorRelu 0.150244 0 +tensorConvApprox 1.34712 0 +tensorBatchNorm 0.415759 0 +tensorRelu 0.151791 0 +tensorConvCutlass 0.714632 0 +tensorBatchNorm 0.40728 0 +tensorRelu 0.148979 0 +tensorConvApprox 0.930202 0 +tensorBatchNorm 0.759239 0 +tensorRelu 0.09663 0 +tensorConvCutlass 0.484437 0 +tensorBatchNorm 0.749036 0 +tensorRelu 0.102897 0 +tensorConvApprox 1.24439 0 +tensorBatchNorm 0.760227 0 +tensorRelu 0.096713 0 +tensorPooling 0.302764 0 +tensorGemmGPU 0.133992 0 +tensorAdd 0.061821 0 +tensorSoftmax 0.086043 0 + +Iteration Compute Time : 74.5962 +Iteration Compute Energy : 0 +Iteration Control Time : 0.022802 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + +Iteration 9 +tensorConvApprox 2.17586 0 +tensorBatchNorm 0.487215 0 +tensorRelu 0.693381 0 +tensorConvCutlass 0.86818 0 +tensorBatchNorm 0.438246 0 +tensorRelu 0.420747 0 +tensorConvApprox 3.64405 0 +tensorBatchNorm 0.888827 0 +tensorRelu 0.877646 0 +tensorConvCutlass 2.86639 0 +tensorBatchNorm 0.246218 0 +tensorRelu 0.241441 0 +tensorConvApprox 1.98201 0 +tensorBatchNorm 0.486885 0 +tensorRelu 0.455583 0 +tensorConvCutlass 2.25638 0 +tensorBatchNorm 0.429762 0 +tensorRelu 0.425944 0 +tensorConvApprox 2.55321 0 +tensorBatchNorm 0.478803 0 +tensorRelu 0.464266 0 +tensorConvCutlass 2.06851 0 +tensorBatchNorm 0.160336 0 +tensorRelu 0.147377 0 +tensorConvApprox 1.38658 0 +tensorBatchNorm 0.276843 0 +tensorRelu 0.245167 0 +tensorConvCutlass 1.25279 0 +tensorBatchNorm 0.261347 0 +tensorRelu 0.242219 0 +tensorConvApprox 1.72877 0 +tensorBatchNorm 0.286292 0 +tensorRelu 0.260727 0 +tensorConvCutlass 0.974662 0 +tensorBatchNorm 0.236358 0 +tensorRelu 0.102523 0 +tensorConvApprox 1.05661 0 +tensorBatchNorm 0.417304 0 +tensorRelu 0.143981 0 +tensorConvCutlass 0.788655 0 +tensorBatchNorm 0.409153 0 +tensorRelu 0.150299 0 +tensorConvApprox 1.30689 0 +tensorBatchNorm 0.418391 0 +tensorRelu 0.15042 0 +tensorConvCutlass 0.78943 0 +tensorBatchNorm 0.404583 0 +tensorRelu 0.14435 0 +tensorConvApprox 1.35444 0 +tensorBatchNorm 0.413114 0 +tensorRelu 0.149313 0 +tensorConvCutlass 0.766193 0 +tensorBatchNorm 0.40833 0 +tensorRelu 0.147593 0 +tensorConvApprox 1.32582 0 +tensorBatchNorm 0.414352 0 +tensorRelu 0.153469 0 +tensorConvCutlass 0.70409 0 +tensorBatchNorm 0.409507 0 +tensorRelu 0.135821 0 +tensorConvApprox 1.26394 0 +tensorBatchNorm 0.403406 0 +tensorRelu 0.146978 0 +tensorConvCutlass 0.712843 0 +tensorBatchNorm 0.526892 0 +tensorRelu 0.138957 0 +tensorConvApprox 1.23422 0 +tensorBatchNorm 0.418712 0 +tensorRelu 0.146598 0 +tensorConvCutlass 0.688029 0 +tensorBatchNorm 0.416884 0 +tensorRelu 0.077336 0 +tensorConvApprox 0.814616 0 +tensorBatchNorm 0.761053 0 +tensorRelu 0.0994 0 +tensorConvCutlass 0.428408 0 +tensorBatchNorm 0.739751 0 +tensorRelu 0.093004 0 +tensorConvApprox 1.0683 0 +tensorBatchNorm 0.765454 0 +tensorRelu 0.099207 0 +tensorPooling 0.263111 0 +tensorGemmGPU 0.11969 0 +tensorAdd 0.054568 0 +tensorSoftmax 0.074868 0 + +Iteration Compute Time : 57.1299 +Iteration Compute Energy : 0 +Iteration Control Time : 0.019048 +Iteration Control Energy : 0 +Iteration Config Time : 0 +Iteration Config Energy : 0 +Iteration End Frequency : 0 + + + + +Total Compute Time : 577.168 +Total Compute Energy: 0 + +Total Control Time : 0.215775 +Total Control Energy: 0 + +Total Config Time : 3.36941e-315 +Total Config Energy: 0 + +Total Time : 577.384 +Total Energy: 0