Skip to content
Snippets Groups Projects
Commit edc43964 authored by Akash Kothari's avatar Akash Kothari
Browse files

Revert "Try fixing potential runtime errors when running DNN benchmarks"

This reverts commit 125799a1.
parent 125799a1
No related branches found
No related tags found
No related merge requests found
......@@ -12,6 +12,7 @@ include_directories(${CMAKE_CURRENT_BINARY_DIR})
add_subdirectory(lib)
add_subdirectory(projects)
add_subdirectory(tools)
add_subdirectory(test)
# Add a global check rule now that all subdirectories have been traversed
......
......@@ -8,6 +8,3 @@ add_subdirectory(DFG2LLVM_WrapperAPI)
add_subdirectory(DFG2LLVM_CUDNN)
add_subdirectory(FuseHPVMTensorNodes)
add_subdirectory(InPlaceDFG)
set(TENSOR_RT_PREFIX ${CMAKE_LIBRARY_OUTPUT_DIRECTORY})
set(TENSOR_RT_LL ${TENSOR_RT_PREFIX}/tensor_runtime.ll)
Iteration 0
tensorConvApprox 11.4549 0
tensorAdd 0.987733 0
tensorTanh 0.89782 0
tensorPooling 3.92176 0
tensorConvApprox 30.4695 0
tensorAdd 0.756682 0
tensorTanh 0.659855 0
tensorPooling 3.87498 0
tensorConvApprox 12.9027 0
tensorAdd 0.529304 0
tensorTanh 0.360859 0
tensorConvApprox 14.1188 0
tensorAdd 0.572512 0
tensorTanh 0.269676 0
tensorConvApprox 12.5122 0
tensorAdd 0.37547 0
tensorTanh 0.260915 0
tensorPooling 2.68986 0
tensorGemmGPU 0.215392 0
tensorAdd 0.120663 0
tensorSoftmax 0.134412 0
Iteration Compute Time : 98.0861
Iteration Compute Energy : 0
Iteration Control Time : 0.025326
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 1
tensorConvApprox 14.367 0
tensorAdd 1.01869 0
tensorTanh 0.88308 0
tensorPooling 3.88747 0
tensorConvApprox 31.0697 0
tensorAdd 0.775118 0
tensorTanh 0.672938 0
tensorPooling 3.17431 0
tensorConvApprox 19.3971 0
tensorAdd 0.528325 0
tensorTanh 0.361839 0
tensorConvApprox 14.727 0
tensorAdd 0.367463 0
tensorTanh 0.258956 0
tensorConvApprox 15.0059 0
tensorAdd 0.361187 0
tensorTanh 0.258366 0
tensorPooling 2.70526 0
tensorGemmGPU 0.18939 0
tensorAdd 0.068016 0
tensorSoftmax 0.536969 0
Iteration Compute Time : 110.614
Iteration Compute Energy : 0
Iteration Control Time : 0.021939
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 2
tensorConvApprox 10.204 0
tensorAdd 1.0115 0
tensorTanh 0.900257 0
tensorPooling 3.90068 0
tensorConvApprox 27.5914 0
tensorAdd 0.822439 0
tensorTanh 0.849694 0
tensorPooling 2.97768 0
tensorConvApprox 17.8808 0
tensorAdd 0.55192 0
tensorTanh 0.378285 0
tensorConvApprox 13.1066 0
tensorAdd 0.621008 0
tensorTanh 0.258276 0
tensorConvApprox 18.631 0
tensorAdd 0.422421 0
tensorTanh 0.274397 0
tensorPooling 2.65083 0
tensorGemmGPU 0.409805 0
tensorAdd 0.068461 0
tensorSoftmax 0.104962 0
Iteration Compute Time : 103.616
Iteration Compute Energy : 0
Iteration Control Time : 0.152392
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 3
tensorConvApprox 16.3712 0
tensorAdd 1.03894 0
tensorTanh 0.891796 0
tensorPooling 4.02378 0
tensorConvApprox 25.7412 0
tensorAdd 0.820657 0
tensorTanh 0.702908 0
tensorPooling 3.12982 0
tensorConvApprox 17.68 0
tensorAdd 0.543514 0
tensorTanh 0.378761 0
tensorConvApprox 12.42 0
tensorAdd 0.408593 0
tensorTanh 0.403677 0
tensorConvApprox 14.9018 0
tensorAdd 0.408336 0
tensorTanh 0.269063 0
tensorPooling 2.6564 0
tensorGemmGPU 0.169787 0
tensorAdd 0.057522 0
tensorSoftmax 0.083927 0
Iteration Compute Time : 103.102
Iteration Compute Energy : 0
Iteration Control Time : 0.022148
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 4
tensorConvApprox 9.93468 0
tensorAdd 1.02456 0
tensorTanh 0.899216 0
tensorPooling 3.89126 0
tensorConvApprox 25.9977 0
tensorAdd 1.24468 0
tensorTanh 0.704036 0
tensorPooling 3.74854 0
tensorConvApprox 17.644 0
tensorAdd 0.55182 0
tensorTanh 0.37049 0
tensorConvApprox 12.5094 0
tensorAdd 0.407149 0
tensorTanh 0.248609 0
tensorConvApprox 13.9976 0
tensorAdd 0.403548 0
tensorTanh 0.26787 0
tensorPooling 2.65781 0
tensorGemmGPU 0.163417 0
tensorAdd 0.055534 0
tensorSoftmax 0.079422 0
Iteration Compute Time : 96.8013
Iteration Compute Energy : 0
Iteration Control Time : 0.021165
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 5
tensorConvApprox 9.80512 0
tensorAdd 1.00169 0
tensorTanh 0.921429 0
tensorPooling 3.91348 0
tensorConvApprox 25.9371 0
tensorAdd 0.815127 0
tensorTanh 0.673571 0
tensorPooling 7.36816 0
tensorConvApprox 16.8525 0
tensorAdd 0.525325 0
tensorTanh 0.375484 0
tensorConvApprox 13.4568 0
tensorAdd 0.413802 0
tensorTanh 0.250907 0
tensorConvApprox 14.0005 0
tensorAdd 0.408961 0
tensorTanh 0.248077 0
tensorPooling 2.66095 0
tensorGemmGPU 0.165503 0
tensorAdd 0.056493 0
tensorSoftmax 0.077052 0
Iteration Compute Time : 99.9281
Iteration Compute Energy : 0
Iteration Control Time : 0.021977
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 6
tensorConvApprox 9.80471 0
tensorAdd 0.999942 0
tensorTanh 0.915084 0
tensorPooling 3.91451 0
tensorConvApprox 25.834 0
tensorAdd 1.14345 0
tensorTanh 0.74676 0
tensorPooling 3.82146 0
tensorConvApprox 17.6392 0
tensorAdd 0.556454 0
tensorTanh 0.370676 0
tensorConvApprox 12.4922 0
tensorAdd 0.404391 0
tensorTanh 0.250653 0
tensorConvApprox 14.002 0
tensorAdd 0.403829 0
tensorTanh 0.25152 0
tensorPooling 2.66016 0
tensorGemmGPU 0.161451 0
tensorAdd 0.059232 0
tensorSoftmax 0.076951 0
Iteration Compute Time : 96.5086
Iteration Compute Energy : 0
Iteration Control Time : 0.021092
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 7
tensorConvApprox 9.79061 0
tensorAdd 1.00411 0
tensorTanh 0.923385 0
tensorPooling 3.89759 0
tensorConvApprox 25.887 0
tensorAdd 0.821996 0
tensorTanh 0.685319 0
tensorPooling 3.12557 0
tensorConvApprox 17.6648 0
tensorAdd 0.555787 0
tensorTanh 0.385425 0
tensorConvApprox 12.5025 0
tensorAdd 0.404708 0
tensorTanh 0.25099 0
tensorConvApprox 13.9948 0
tensorAdd 0.403679 0
tensorTanh 0.252795 0
tensorPooling 2.65976 0
tensorGemmGPU 0.154435 0
tensorAdd 0.058066 0
tensorSoftmax 0.102014 0
Iteration Compute Time : 95.5253
Iteration Compute Energy : 0
Iteration Control Time : 0.03922
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 8
tensorConvApprox 10.3697 0
tensorAdd 1.02741 0
tensorTanh 0.912612 0
tensorPooling 3.87946 0
tensorConvApprox 26.2823 0
tensorAdd 0.816944 0
tensorTanh 0.686638 0
tensorPooling 3.12319 0
tensorConvApprox 17.918 0
tensorAdd 0.530344 0
tensorTanh 0.362426 0
tensorConvApprox 12.8083 0
tensorAdd 0.360747 0
tensorTanh 0.257041 0
tensorConvApprox 14.2165 0
tensorAdd 0.528494 0
tensorTanh 0.257368 0
tensorPooling 2.53903 0
tensorGemmGPU 0.170898 0
tensorAdd 0.100506 0
tensorSoftmax 0.119107 0
Iteration Compute Time : 97.2669
Iteration Compute Energy : 0
Iteration Control Time : 0.041931
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 9
tensorConvApprox 10.2616 0
tensorAdd 0.98462 0
tensorTanh 0.895782 0
tensorPooling 3.9356 0
tensorConvApprox 26.2674 0
tensorAdd 0.786113 0
tensorTanh 0.676243 0
tensorPooling 3.163 0
tensorConvApprox 23.1817 0
tensorAdd 0.847734 0
tensorTanh 0.367746 0
tensorConvApprox 12.9463 0
tensorAdd 0.371015 0
tensorTanh 0.263012 0
tensorConvApprox 15.1519 0
tensorAdd 0.428313 0
tensorTanh 0.28493 0
tensorPooling 2.61769 0
tensorGemmGPU 0.167254 0
tensorAdd 0.056753 0
tensorSoftmax 0.082013 0
Iteration Compute Time : 103.737
Iteration Compute Energy : 0
Iteration Control Time : 0.022577
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Total Compute Time : 1005.19
Total Compute Energy: 6.91275e-310
Total Control Time : 0.389767
Total Control Energy: 3.51633e-315
Total Config Time : 3.51633e-315
Total Config Energy: 3.51633e-315
Total Time : 1005.57
Total Energy: 0
Iteration 0
tensorConvApprox 2.23701 0
tensorBatchNorm 0.529425 0
tensorRelu 0.479808 0
tensorConvCutlass 0.957134 0
tensorBatchNorm 0.429569 0
tensorRelu 0.498568 0
tensorConvApprox 3.54345 0
tensorBatchNorm 0.893891 0
tensorRelu 0.887381 0
tensorConvCutlass 2.66411 0
tensorBatchNorm 0.251158 0
tensorRelu 0.234899 0
tensorConvApprox 1.85116 0
tensorBatchNorm 0.618027 0
tensorRelu 0.426936 0
tensorConvCutlass 2.1397 0
tensorBatchNorm 0.453199 0
tensorRelu 0.434558 0
tensorConvApprox 2.51921 0
tensorBatchNorm 0.493453 0
tensorRelu 0.454728 0
tensorConvCutlass 2.09737 0
tensorBatchNorm 0.165386 0
tensorRelu 0.14034 0
tensorConvApprox 1.24949 0
tensorBatchNorm 0.271424 0
tensorRelu 0.246688 0
tensorConvCutlass 1.26528 0
tensorBatchNorm 0.249295 0
tensorRelu 0.233471 0
tensorConvApprox 1.61344 0
tensorBatchNorm 0.281014 0
tensorRelu 0.272026 0
tensorConvCutlass 0.89206 0
tensorBatchNorm 0.241964 0
tensorRelu 0.093642 0
tensorConvApprox 0.95434 0
tensorBatchNorm 0.425067 0
tensorRelu 0.133866 0
tensorConvCutlass 0.78137 0
tensorBatchNorm 0.397914 0
tensorRelu 0.138306 0
tensorConvApprox 1.24994 0
tensorBatchNorm 0.428037 0
tensorRelu 0.132677 0
tensorConvCutlass 0.715246 0
tensorBatchNorm 0.483229 0
tensorRelu 0.146812 0
tensorConvApprox 1.24338 0
tensorBatchNorm 0.43213 0
tensorRelu 0.134047 0
tensorConvCutlass 0.7264 0
tensorBatchNorm 0.424954 0
tensorRelu 0.139791 0
tensorConvApprox 1.23298 0
tensorBatchNorm 0.416953 0
tensorRelu 0.134486 0
tensorConvCutlass 0.679017 0
tensorBatchNorm 0.409803 0
tensorRelu 0.132933 0
tensorConvApprox 1.25983 0
tensorBatchNorm 0.431743 0
tensorRelu 0.132827 0
tensorConvCutlass 0.712734 0
tensorBatchNorm 0.393237 0
tensorRelu 0.156427 0
tensorConvApprox 1.23481 0
tensorBatchNorm 0.413014 0
tensorRelu 0.132817 0
tensorConvCutlass 0.700018 0
tensorBatchNorm 0.41414 0
tensorRelu 0.077319 0
tensorConvApprox 0.851131 0
tensorBatchNorm 0.764938 0
tensorRelu 0.088765 0
tensorConvCutlass 0.420801 0
tensorBatchNorm 0.740765 0
tensorRelu 0.09237 0
tensorConvApprox 1.13472 0
tensorBatchNorm 0.756702 0
tensorRelu 0.087387 0
tensorPooling 0.28436 0
tensorGemmGPU 0.118249 0
tensorAdd 0.070506 0
tensorSoftmax 0.086034 0
Iteration Compute Time : 55.9596
Iteration Compute Energy : 0
Iteration Control Time : 0.035796
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 1
tensorConvApprox 1.97561 0
tensorBatchNorm 0.473865 0
tensorRelu 0.453925 0
tensorConvCutlass 1.04306 0
tensorBatchNorm 0.436546 0
tensorRelu 0.411711 0
tensorConvApprox 3.43348 0
tensorBatchNorm 0.895659 0
tensorRelu 0.873215 0
tensorConvCutlass 1.98017 0
tensorBatchNorm 0.245242 0
tensorRelu 0.233063 0
tensorConvApprox 1.84183 0
tensorBatchNorm 0.478471 0
tensorRelu 0.443878 0
tensorConvCutlass 2.15217 0
tensorBatchNorm 0.422718 0
tensorRelu 0.414853 0
tensorConvApprox 2.35653 0
tensorBatchNorm 0.477226 0
tensorRelu 0.471601 0
tensorConvCutlass 2.11536 0
tensorBatchNorm 0.160139 0
tensorRelu 0.138821 0
tensorConvApprox 1.25131 0
tensorBatchNorm 0.268332 0
tensorRelu 0.23243 0
tensorConvCutlass 1.23673 0
tensorBatchNorm 0.248686 0
tensorRelu 0.232362 0
tensorConvApprox 1.6491 0
tensorBatchNorm 0.255622 0
tensorRelu 0.250747 0
tensorConvCutlass 1.00023 0
tensorBatchNorm 0.234396 0
tensorRelu 0.092353 0
tensorConvApprox 0.956705 0
tensorBatchNorm 0.401337 0
tensorRelu 0.132566 0
tensorConvCutlass 0.727656 0
tensorBatchNorm 0.39494 0
tensorRelu 0.135475 0
tensorConvApprox 1.21273 0
tensorBatchNorm 0.399682 0
tensorRelu 0.134199 0
tensorConvCutlass 0.707999 0
tensorBatchNorm 0.400456 0
tensorRelu 0.131647 0
tensorConvApprox 1.20442 0
tensorBatchNorm 0.408527 0
tensorRelu 0.133741 0
tensorConvCutlass 0.710463 0
tensorBatchNorm 0.400938 0
tensorRelu 0.147735 0
tensorConvApprox 1.25009 0
tensorBatchNorm 0.439884 0
tensorRelu 0.141364 0
tensorConvCutlass 0.722939 0
tensorBatchNorm 0.39408 0
tensorRelu 0.138576 0
tensorConvApprox 1.27766 0
tensorBatchNorm 0.400103 0
tensorRelu 0.133306 0
tensorConvCutlass 0.701348 0
tensorBatchNorm 0.531719 0
tensorRelu 0.13418 0
tensorConvApprox 1.20153 0
tensorBatchNorm 0.406742 0
tensorRelu 0.132218 0
tensorConvCutlass 0.680362 0
tensorBatchNorm 0.396494 0
tensorRelu 0.06394 0
tensorConvApprox 0.819766 0
tensorBatchNorm 0.741952 0
tensorRelu 0.085646 0
tensorConvCutlass 0.428946 0
tensorBatchNorm 0.738589 0
tensorRelu 0.092674 0
tensorConvApprox 1.06239 0
tensorBatchNorm 0.748871 0
tensorRelu 0.084497 0
tensorPooling 0.255138 0
tensorGemmGPU 0.115002 0
tensorAdd 0.05473 0
tensorSoftmax 0.072544 0
Iteration Compute Time : 54.0699
Iteration Compute Energy : 0
Iteration Control Time : 0.019171
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 2
tensorConvApprox 1.9938 0
tensorBatchNorm 0.483517 0
tensorRelu 0.456522 0
tensorConvCutlass 1.0357 0
tensorBatchNorm 0.429524 0
tensorRelu 0.412926 0
tensorConvApprox 3.42249 0
tensorBatchNorm 0.893502 0
tensorRelu 0.865981 0
tensorConvCutlass 2.60753 0
tensorBatchNorm 0.245639 0
tensorRelu 0.23249 0
tensorConvApprox 1.8282 0
tensorBatchNorm 0.473188 0
tensorRelu 0.44711 0
tensorConvCutlass 2.2097 0
tensorBatchNorm 0.422328 0
tensorRelu 0.41563 0
tensorConvApprox 2.35971 0
tensorBatchNorm 0.473869 0
tensorRelu 0.454988 0
tensorConvCutlass 2.14168 0
tensorBatchNorm 0.16224 0
tensorRelu 0.138312 0
tensorConvApprox 1.2518 0
tensorBatchNorm 0.268174 0
tensorRelu 0.236937 0
tensorConvCutlass 1.33004 0
tensorBatchNorm 0.314835 0
tensorRelu 0.264317 0
tensorConvApprox 1.58082 0
tensorBatchNorm 0.274042 0
tensorRelu 0.249993 0
tensorConvCutlass 1.03923 0
tensorBatchNorm 0.22875 0
tensorRelu 0.090939 0
tensorConvApprox 0.936895 0
tensorBatchNorm 0.402702 0
tensorRelu 0.132519 0
tensorConvCutlass 0.727177 0
tensorBatchNorm 0.395688 0
tensorRelu 0.135304 0
tensorConvApprox 1.22042 0
tensorBatchNorm 0.401043 0
tensorRelu 0.133329 0
tensorConvCutlass 0.734079 0
tensorBatchNorm 0.400517 0
tensorRelu 0.131085 0
tensorConvApprox 1.20347 0
tensorBatchNorm 0.408406 0
tensorRelu 0.13312 0
tensorConvCutlass 0.71914 0
tensorBatchNorm 0.400224 0
tensorRelu 0.131462 0
tensorConvApprox 1.1849 0
tensorBatchNorm 0.40678 0
tensorRelu 0.133044 0
tensorConvCutlass 0.700435 0
tensorBatchNorm 0.400726 0
tensorRelu 0.131443 0
tensorConvApprox 1.2809 0
tensorBatchNorm 0.408323 0
tensorRelu 0.130881 0
tensorConvCutlass 0.713094 0
tensorBatchNorm 0.400763 0
tensorRelu 0.134344 0
tensorConvApprox 1.32531 0
tensorBatchNorm 0.408491 0
tensorRelu 0.135343 0
tensorConvCutlass 0.683423 0
tensorBatchNorm 0.383472 0
tensorRelu 0.055856 0
tensorConvApprox 0.802831 0
tensorBatchNorm 0.750119 0
tensorRelu 0.086386 0
tensorConvCutlass 0.505849 0
tensorBatchNorm 0.757398 0
tensorRelu 0.104829 0
tensorConvApprox 1.0665 0
tensorBatchNorm 0.758638 0
tensorRelu 0.108774 0
tensorPooling 0.256465 0
tensorGemmGPU 0.107896 0
tensorAdd 0.053549 0
tensorSoftmax 0.073444 0
Iteration Compute Time : 54.9332
Iteration Compute Energy : 0
Iteration Control Time : 0.019819
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 3
tensorConvApprox 1.96858 0
tensorBatchNorm 0.481127 0
tensorRelu 0.453004 0
tensorConvCutlass 1.06524 0
tensorBatchNorm 0.438826 0
tensorRelu 0.413673 0
tensorConvApprox 3.45964 0
tensorBatchNorm 1.13057 0
tensorRelu 0.876152 0
tensorConvCutlass 3.06743 0
tensorBatchNorm 0.241919 0
tensorRelu 0.234321 0
tensorConvApprox 1.82083 0
tensorBatchNorm 0.473934 0
tensorRelu 0.444206 0
tensorConvCutlass 2.2154 0
tensorBatchNorm 0.425108 0
tensorRelu 0.413497 0
tensorConvApprox 3.79539 0
tensorBatchNorm 0.485401 0
tensorRelu 0.456188 0
tensorConvCutlass 1.67366 0
tensorBatchNorm 0.159367 0
tensorRelu 0.138328 0
tensorConvApprox 1.25087 0
tensorBatchNorm 0.280366 0
tensorRelu 0.238291 0
tensorConvCutlass 1.21768 0
tensorBatchNorm 0.25647 0
tensorRelu 0.232797 0
tensorConvApprox 1.583 0
tensorBatchNorm 0.274884 0
tensorRelu 0.250904 0
tensorConvCutlass 1.01374 0
tensorBatchNorm 0.232925 0
tensorRelu 0.092624 0
tensorConvApprox 0.929718 0
tensorBatchNorm 0.406899 0
tensorRelu 0.13354 0
tensorConvCutlass 0.715119 0
tensorBatchNorm 0.401388 0
tensorRelu 0.132678 0
tensorConvApprox 1.21308 0
tensorBatchNorm 0.411271 0
tensorRelu 0.136159 0
tensorConvCutlass 0.728206 0
tensorBatchNorm 0.40348 0
tensorRelu 0.131293 0
tensorConvApprox 1.2437 0
tensorBatchNorm 0.415867 0
tensorRelu 0.13346 0
tensorConvCutlass 0.728818 0
tensorBatchNorm 0.404325 0
tensorRelu 0.133644 0
tensorConvApprox 1.18933 0
tensorBatchNorm 0.408998 0
tensorRelu 0.135374 0
tensorConvCutlass 0.758913 0
tensorBatchNorm 0.401487 0
tensorRelu 0.133675 0
tensorConvApprox 1.22111 0
tensorBatchNorm 0.408526 0
tensorRelu 0.133156 0
tensorConvCutlass 0.713234 0
tensorBatchNorm 0.404604 0
tensorRelu 0.135062 0
tensorConvApprox 1.19223 0
tensorBatchNorm 0.422434 0
tensorRelu 0.151752 0
tensorConvCutlass 0.674382 0
tensorBatchNorm 0.399276 0
tensorRelu 0.055825 0
tensorConvApprox 0.813887 0
tensorBatchNorm 0.749868 0
tensorRelu 0.086545 0
tensorConvCutlass 0.43128 0
tensorBatchNorm 0.741291 0
tensorRelu 0.091888 0
tensorConvApprox 1.10082 0
tensorBatchNorm 0.742328 0
tensorRelu 0.086367 0
tensorPooling 0.260055 0
tensorGemmGPU 0.118566 0
tensorAdd 0.054399 0
tensorSoftmax 0.081731 0
Iteration Compute Time : 56.3574
Iteration Compute Energy : 0
Iteration Control Time : 0.019227
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 4
tensorConvApprox 2.75777 0
tensorBatchNorm 0.491652 0
tensorRelu 0.466859 0
tensorConvCutlass 1.04383 0
tensorBatchNorm 0.435906 0
tensorRelu 0.422722 0
tensorConvApprox 3.65792 0
tensorBatchNorm 0.893743 0
tensorRelu 0.87971 0
tensorConvCutlass 2.67706 0
tensorBatchNorm 0.246657 0
tensorRelu 0.239448 0
tensorConvApprox 2.00886 0
tensorBatchNorm 0.484758 0
tensorRelu 0.461588 0
tensorConvCutlass 2.19886 0
tensorBatchNorm 0.430631 0
tensorRelu 0.42929 0
tensorConvApprox 2.58732 0
tensorBatchNorm 0.483099 0
tensorRelu 0.462454 0
tensorConvCutlass 2.11366 0
tensorBatchNorm 0.16427 0
tensorRelu 0.151241 0
tensorConvApprox 1.39126 0
tensorBatchNorm 0.276029 0
tensorRelu 0.244813 0
tensorConvCutlass 1.29254 0
tensorBatchNorm 0.263643 0
tensorRelu 0.241842 0
tensorConvApprox 1.74431 0
tensorBatchNorm 0.277158 0
tensorRelu 0.264752 0
tensorConvCutlass 0.97804 0
tensorBatchNorm 0.236649 0
tensorRelu 0.100587 0
tensorConvApprox 1.07438 0
tensorBatchNorm 0.417747 0
tensorRelu 0.145716 0
tensorConvCutlass 0.78966 0
tensorBatchNorm 0.408245 0
tensorRelu 0.146282 0
tensorConvApprox 1.59601 0
tensorBatchNorm 0.417057 0
tensorRelu 0.148021 0
tensorConvCutlass 0.76697 0
tensorBatchNorm 0.410469 0
tensorRelu 0.143291 0
tensorConvApprox 1.37314 0
tensorBatchNorm 0.420813 0
tensorRelu 0.149074 0
tensorConvCutlass 0.785019 0
tensorBatchNorm 0.406105 0
tensorRelu 0.144829 0
tensorConvApprox 1.32397 0
tensorBatchNorm 0.41983 0
tensorRelu 0.147431 0
tensorConvCutlass 0.741959 0
tensorBatchNorm 0.411628 0
tensorRelu 0.149026 0
tensorConvApprox 1.35713 0
tensorBatchNorm 0.417514 0
tensorRelu 0.142716 0
tensorConvCutlass 0.78773 0
tensorBatchNorm 0.40768 0
tensorRelu 0.149051 0
tensorConvApprox 1.32946 0
tensorBatchNorm 0.415265 0
tensorRelu 0.144227 0
tensorConvCutlass 0.72498 0
tensorBatchNorm 0.403585 0
tensorRelu 0.067153 0
tensorConvApprox 0.931648 0
tensorBatchNorm 0.755898 0
tensorRelu 0.096675 0
tensorConvCutlass 0.47856 0
tensorBatchNorm 0.754063 0
tensorRelu 0.270367 0
tensorConvApprox 1.19719 0
tensorBatchNorm 0.760267 0
tensorRelu 0.09892 0
tensorPooling 0.305295 0
tensorGemmGPU 0.148363 0
tensorAdd 0.07344 0
tensorSoftmax 0.09613 0
Iteration Compute Time : 58.7509
Iteration Compute Energy : 0
Iteration Control Time : 0.026229
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 5
tensorConvApprox 2.1726 0
tensorBatchNorm 0.488988 0
tensorRelu 0.465871 0
tensorConvCutlass 1.04703 0
tensorBatchNorm 0.440283 0
tensorRelu 0.429357 0
tensorConvApprox 3.66603 0
tensorBatchNorm 0.893586 0
tensorRelu 0.877117 0
tensorConvCutlass 2.13282 0
tensorBatchNorm 0.249723 0
tensorRelu 0.241791 0
tensorConvApprox 1.99587 0
tensorBatchNorm 0.484073 0
tensorRelu 0.454081 0
tensorConvCutlass 2.22364 0
tensorBatchNorm 0.646739 0
tensorRelu 0.433169 0
tensorConvApprox 2.64464 0
tensorBatchNorm 0.47868 0
tensorRelu 0.468067 0
tensorConvCutlass 2.16956 0
tensorBatchNorm 0.167527 0
tensorRelu 0.149678 0
tensorConvApprox 1.38195 0
tensorBatchNorm 0.28112 0
tensorRelu 0.24312 0
tensorConvCutlass 1.26873 0
tensorBatchNorm 0.254428 0
tensorRelu 0.233827 0
tensorConvApprox 1.58365 0
tensorBatchNorm 0.278968 0
tensorRelu 0.25334 0
tensorConvCutlass 1.0156 0
tensorBatchNorm 0.244076 0
tensorRelu 0.093283 0
tensorConvApprox 0.965024 0
tensorBatchNorm 0.404682 0
tensorRelu 0.133879 0
tensorConvCutlass 0.747094 0
tensorBatchNorm 0.402763 0
tensorRelu 0.142304 0
tensorConvApprox 1.26048 0
tensorBatchNorm 0.406361 0
tensorRelu 0.133341 0
tensorConvCutlass 0.729584 0
tensorBatchNorm 0.396997 0
tensorRelu 0.136837 0
tensorConvApprox 1.21978 0
tensorBatchNorm 0.411099 0
tensorRelu 0.133601 0
tensorConvCutlass 0.726998 0
tensorBatchNorm 0.400946 0
tensorRelu 0.132781 0
tensorConvApprox 1.20036 0
tensorBatchNorm 0.409799 0
tensorRelu 0.136065 0
tensorConvCutlass 0.696289 0
tensorBatchNorm 0.406103 0
tensorRelu 0.132264 0
tensorConvApprox 1.26211 0
tensorBatchNorm 0.418643 0
tensorRelu 0.13414 0
tensorConvCutlass 0.738303 0
tensorBatchNorm 0.401346 0
tensorRelu 0.169646 0
tensorConvApprox 1.22581 0
tensorBatchNorm 0.409902 0
tensorRelu 0.133014 0
tensorConvCutlass 0.770886 0
tensorBatchNorm 0.40109 0
tensorRelu 0.057301 0
tensorConvApprox 0.820137 0
tensorBatchNorm 0.751111 0
tensorRelu 0.086029 0
tensorConvCutlass 0.428312 0
tensorBatchNorm 0.740022 0
tensorRelu 0.092405 0
tensorConvApprox 1.14936 0
tensorBatchNorm 0.762909 0
tensorRelu 0.091007 0
tensorPooling 0.264327 0
tensorGemmGPU 0.117489 0
tensorAdd 0.055461 0
tensorSoftmax 0.074949 0
Iteration Compute Time : 55.9742
Iteration Compute Energy : 0
Iteration Control Time : 0.018705
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 6
tensorConvApprox 1.96907 0
tensorBatchNorm 0.48141 0
tensorRelu 0.454234 0
tensorConvCutlass 1.04364 0
tensorBatchNorm 0.430053 0
tensorRelu 0.418357 0
tensorConvApprox 3.41283 0
tensorBatchNorm 0.888804 0
tensorRelu 0.869239 0
tensorConvCutlass 2.00479 0
tensorBatchNorm 0.246474 0
tensorRelu 0.234112 0
tensorConvApprox 1.8452 0
tensorBatchNorm 0.472051 0
tensorRelu 0.445524 0
tensorConvCutlass 2.24807 0
tensorBatchNorm 0.429071 0
tensorRelu 0.413047 0
tensorConvApprox 2.35288 0
tensorBatchNorm 0.482299 0
tensorRelu 0.453052 0
tensorConvCutlass 2.15299 0
tensorBatchNorm 0.168091 0
tensorRelu 0.140115 0
tensorConvApprox 1.22523 0
tensorBatchNorm 0.271412 0
tensorRelu 0.23909 0
tensorConvCutlass 1.19494 0
tensorBatchNorm 0.249981 0
tensorRelu 0.231756 0
tensorConvApprox 1.5918 0
tensorBatchNorm 0.270204 0
tensorRelu 0.250939 0
tensorConvCutlass 1.01044 0
tensorBatchNorm 0.229694 0
tensorRelu 0.091054 0
tensorConvApprox 0.931967 0
tensorBatchNorm 0.416154 0
tensorRelu 0.136451 0
tensorConvCutlass 0.746695 0
tensorBatchNorm 0.393873 0
tensorRelu 0.151156 0
tensorConvApprox 1.18598 0
tensorBatchNorm 0.414867 0
tensorRelu 0.136287 0
tensorConvCutlass 0.711735 0
tensorBatchNorm 0.394707 0
tensorRelu 0.158015 0
tensorConvApprox 1.24798 0
tensorBatchNorm 0.402511 0
tensorRelu 0.131976 0
tensorConvCutlass 0.89021 0
tensorBatchNorm 0.3973 0
tensorRelu 0.140986 0
tensorConvApprox 1.2066 0
tensorBatchNorm 0.416513 0
tensorRelu 0.133314 0
tensorConvCutlass 0.713295 0
tensorBatchNorm 0.397544 0
tensorRelu 0.139411 0
tensorConvApprox 1.24168 0
tensorBatchNorm 0.417975 0
tensorRelu 0.131993 0
tensorConvCutlass 0.710762 0
tensorBatchNorm 0.396162 0
tensorRelu 0.141993 0
tensorConvApprox 1.34712 0
tensorBatchNorm 0.405441 0
tensorRelu 0.13312 0
tensorConvCutlass 0.674823 0
tensorBatchNorm 0.395537 0
tensorRelu 0.057344 0
tensorConvApprox 0.806353 0
tensorBatchNorm 0.744342 0
tensorRelu 0.088901 0
tensorConvCutlass 0.417066 0
tensorBatchNorm 0.735117 0
tensorRelu 0.10534 0
tensorConvApprox 1.04822 0
tensorBatchNorm 0.743019 0
tensorRelu 0.104582 0
tensorPooling 0.452869 0
tensorGemmGPU 0.114989 0
tensorAdd 0.055767 0
tensorSoftmax 0.07351 0
Iteration Compute Time : 54.4515
Iteration Compute Energy : 0
Iteration Control Time : 0.018769
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 7
tensorConvApprox 1.97893 0
tensorBatchNorm 0.477949 0
tensorRelu 0.453688 0
tensorConvCutlass 1.03878 0
tensorBatchNorm 0.43087 0
tensorRelu 0.411306 0
tensorConvApprox 3.41754 0
tensorBatchNorm 0.887339 0
tensorRelu 0.869579 0
tensorConvCutlass 2.85909 0
tensorBatchNorm 0.244878 0
tensorRelu 0.231961 0
tensorConvApprox 1.94304 0
tensorBatchNorm 0.473929 0
tensorRelu 0.447546 0
tensorConvCutlass 2.17425 0
tensorBatchNorm 0.42527 0
tensorRelu 0.415782 0
tensorConvApprox 2.36657 0
tensorBatchNorm 0.469628 0
tensorRelu 0.451354 0
tensorConvCutlass 2.14711 0
tensorBatchNorm 0.160772 0
tensorRelu 0.137116 0
tensorConvApprox 1.23451 0
tensorBatchNorm 0.270616 0
tensorRelu 0.239943 0
tensorConvCutlass 1.19111 0
tensorBatchNorm 0.25036 0
tensorRelu 0.233829 0
tensorConvApprox 1.55993 0
tensorBatchNorm 0.269845 0
tensorRelu 0.250934 0
tensorConvCutlass 1.03172 0
tensorBatchNorm 0.228142 0
tensorRelu 0.107492 0
tensorConvApprox 0.930437 0
tensorBatchNorm 0.404727 0
tensorRelu 0.138149 0
tensorConvCutlass 0.738596 0
tensorBatchNorm 0.396348 0
tensorRelu 0.152118 0
tensorConvApprox 1.19572 0
tensorBatchNorm 0.399799 0
tensorRelu 0.136477 0
tensorConvCutlass 0.72083 0
tensorBatchNorm 0.395849 0
tensorRelu 0.134113 0
tensorConvApprox 1.20781 0
tensorBatchNorm 0.402487 0
tensorRelu 0.137157 0
tensorConvCutlass 0.721853 0
tensorBatchNorm 0.395229 0
tensorRelu 0.137036 0
tensorConvApprox 1.29612 0
tensorBatchNorm 0.39542 0
tensorRelu 0.136881 0
tensorConvCutlass 0.690593 0
tensorBatchNorm 0.400451 0
tensorRelu 0.15126 0
tensorConvApprox 1.24175 0
tensorBatchNorm 0.403511 0
tensorRelu 0.134717 0
tensorConvCutlass 0.703724 0
tensorBatchNorm 0.397863 0
tensorRelu 0.140934 0
tensorConvApprox 1.22323 0
tensorBatchNorm 0.415987 0
tensorRelu 0.231116 0
tensorConvCutlass 0.67374 0
tensorBatchNorm 0.395974 0
tensorRelu 0.056334 0
tensorConvApprox 0.800636 0
tensorBatchNorm 0.754627 0
tensorRelu 0.105969 0
tensorConvCutlass 0.416602 0
tensorBatchNorm 0.750834 0
tensorRelu 0.095854 0
tensorConvApprox 1.05905 0
tensorBatchNorm 0.744483 0
tensorRelu 0.103922 0
tensorPooling 0.254605 0
tensorGemmGPU 0.113178 0
tensorAdd 0.056922 0
tensorSoftmax 0.071876 0
Iteration Compute Time : 54.9456
Iteration Compute Energy : 0
Iteration Control Time : 0.016209
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 8
tensorConvApprox 1.97954 0
tensorBatchNorm 0.480376 0
tensorRelu 0.454296 0
tensorConvCutlass 1.02312 0
tensorBatchNorm 0.431889 0
tensorRelu 0.412319 0
tensorConvApprox 3.45825 0
tensorBatchNorm 0.891789 0
tensorRelu 0.867071 0
tensorConvCutlass 2.54057 0
tensorBatchNorm 0.243556 0
tensorRelu 0.232223 0
tensorConvApprox 1.83753 0
tensorBatchNorm 0.470948 0
tensorRelu 0.446066 0
tensorConvCutlass 2.16032 0
tensorBatchNorm 0.417745 0
tensorRelu 0.416572 0
tensorConvApprox 16.7109 0
tensorBatchNorm 0.553729 0
tensorRelu 0.511455 0
tensorConvCutlass 1.95369 0
tensorBatchNorm 0.342645 0
tensorRelu 0.150224 0
tensorConvApprox 1.27591 0
tensorBatchNorm 0.276026 0
tensorRelu 0.24126 0
tensorConvCutlass 1.23113 0
tensorBatchNorm 0.337165 0
tensorRelu 0.244851 0
tensorConvApprox 1.65138 0
tensorBatchNorm 0.273883 0
tensorRelu 0.258691 0
tensorConvCutlass 0.989746 0
tensorBatchNorm 0.231671 0
tensorRelu 0.311367 0
tensorConvApprox 1.65574 0
tensorBatchNorm 0.481729 0
tensorRelu 0.193443 0
tensorConvCutlass 0.993404 0
tensorBatchNorm 0.505042 0
tensorRelu 0.312148 0
tensorConvApprox 1.84112 0
tensorBatchNorm 0.471851 0
tensorRelu 0.188116 0
tensorConvCutlass 1.04833 0
tensorBatchNorm 0.457728 0
tensorRelu 0.184556 0
tensorConvApprox 2.11401 0
tensorBatchNorm 0.473132 0
tensorRelu 0.384353 0
tensorConvCutlass 0.934064 0
tensorBatchNorm 0.411389 0
tensorRelu 0.142564 0
tensorConvApprox 1.33875 0
tensorBatchNorm 0.414552 0
tensorRelu 0.150584 0
tensorConvCutlass 0.75521 0
tensorBatchNorm 0.410236 0
tensorRelu 0.146846 0
tensorConvApprox 1.39203 0
tensorBatchNorm 0.412432 0
tensorRelu 0.147136 0
tensorConvCutlass 0.771719 0
tensorBatchNorm 0.409913 0
tensorRelu 0.150244 0
tensorConvApprox 1.34712 0
tensorBatchNorm 0.415759 0
tensorRelu 0.151791 0
tensorConvCutlass 0.714632 0
tensorBatchNorm 0.40728 0
tensorRelu 0.148979 0
tensorConvApprox 0.930202 0
tensorBatchNorm 0.759239 0
tensorRelu 0.09663 0
tensorConvCutlass 0.484437 0
tensorBatchNorm 0.749036 0
tensorRelu 0.102897 0
tensorConvApprox 1.24439 0
tensorBatchNorm 0.760227 0
tensorRelu 0.096713 0
tensorPooling 0.302764 0
tensorGemmGPU 0.133992 0
tensorAdd 0.061821 0
tensorSoftmax 0.086043 0
Iteration Compute Time : 74.5962
Iteration Compute Energy : 0
Iteration Control Time : 0.022802
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Iteration 9
tensorConvApprox 2.17586 0
tensorBatchNorm 0.487215 0
tensorRelu 0.693381 0
tensorConvCutlass 0.86818 0
tensorBatchNorm 0.438246 0
tensorRelu 0.420747 0
tensorConvApprox 3.64405 0
tensorBatchNorm 0.888827 0
tensorRelu 0.877646 0
tensorConvCutlass 2.86639 0
tensorBatchNorm 0.246218 0
tensorRelu 0.241441 0
tensorConvApprox 1.98201 0
tensorBatchNorm 0.486885 0
tensorRelu 0.455583 0
tensorConvCutlass 2.25638 0
tensorBatchNorm 0.429762 0
tensorRelu 0.425944 0
tensorConvApprox 2.55321 0
tensorBatchNorm 0.478803 0
tensorRelu 0.464266 0
tensorConvCutlass 2.06851 0
tensorBatchNorm 0.160336 0
tensorRelu 0.147377 0
tensorConvApprox 1.38658 0
tensorBatchNorm 0.276843 0
tensorRelu 0.245167 0
tensorConvCutlass 1.25279 0
tensorBatchNorm 0.261347 0
tensorRelu 0.242219 0
tensorConvApprox 1.72877 0
tensorBatchNorm 0.286292 0
tensorRelu 0.260727 0
tensorConvCutlass 0.974662 0
tensorBatchNorm 0.236358 0
tensorRelu 0.102523 0
tensorConvApprox 1.05661 0
tensorBatchNorm 0.417304 0
tensorRelu 0.143981 0
tensorConvCutlass 0.788655 0
tensorBatchNorm 0.409153 0
tensorRelu 0.150299 0
tensorConvApprox 1.30689 0
tensorBatchNorm 0.418391 0
tensorRelu 0.15042 0
tensorConvCutlass 0.78943 0
tensorBatchNorm 0.404583 0
tensorRelu 0.14435 0
tensorConvApprox 1.35444 0
tensorBatchNorm 0.413114 0
tensorRelu 0.149313 0
tensorConvCutlass 0.766193 0
tensorBatchNorm 0.40833 0
tensorRelu 0.147593 0
tensorConvApprox 1.32582 0
tensorBatchNorm 0.414352 0
tensorRelu 0.153469 0
tensorConvCutlass 0.70409 0
tensorBatchNorm 0.409507 0
tensorRelu 0.135821 0
tensorConvApprox 1.26394 0
tensorBatchNorm 0.403406 0
tensorRelu 0.146978 0
tensorConvCutlass 0.712843 0
tensorBatchNorm 0.526892 0
tensorRelu 0.138957 0
tensorConvApprox 1.23422 0
tensorBatchNorm 0.418712 0
tensorRelu 0.146598 0
tensorConvCutlass 0.688029 0
tensorBatchNorm 0.416884 0
tensorRelu 0.077336 0
tensorConvApprox 0.814616 0
tensorBatchNorm 0.761053 0
tensorRelu 0.0994 0
tensorConvCutlass 0.428408 0
tensorBatchNorm 0.739751 0
tensorRelu 0.093004 0
tensorConvApprox 1.0683 0
tensorBatchNorm 0.765454 0
tensorRelu 0.099207 0
tensorPooling 0.263111 0
tensorGemmGPU 0.11969 0
tensorAdd 0.054568 0
tensorSoftmax 0.074868 0
Iteration Compute Time : 57.1299
Iteration Compute Energy : 0
Iteration Control Time : 0.019048
Iteration Control Energy : 0
Iteration Config Time : 0
Iteration Config Energy : 0
Iteration End Frequency : 0
Total Compute Time : 577.168
Total Compute Energy: 0
Total Control Time : 0.215775
Total Control Energy: 0
Total Config Time : 3.36941e-315
Total Config Energy: 0
Total Time : 577.384
Total Energy: 0
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment