diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/alexnet2_cifar10/alexnet2_cifar10.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/alexnet2_cifar10/alexnet2_cifar10.txt new file mode 100644 index 0000000000000000000000000000000000000000..208f154e02ef37a6ae87904844c826ce72012b32 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/alexnet2_cifar10/alexnet2_cifar10.txt @@ -0,0 +1,23 @@ +1114.3009809999999 ++++++ +conf1 1 1 84.76 0.0 +1 gpu conv fp32 11 add fp32 1 tanh fp32 1 +2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv fp32 11 add fp32 1 tanh fp32 1 +4 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 +5 gpu conv fp32 11 add fp32 1 tanh fp32 1 +6 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 +7 gpu mul fp32 11 add fp32 1 +8 gpu softmax fp32 1 +----- ++++++ +conf2 1.678391931801309 1.4393008204786808 84.76 0.0 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 +2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 +6 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +7 gpu mul fp16 12 add fp16 1 +8 gpu softmax fp32 1 +----- diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/alexnet_cifar10/alexnet_cifar10.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/alexnet_cifar10/alexnet_cifar10.txt new file mode 100644 index 0000000000000000000000000000000000000000..eba22e3f01e227041fcb406f87a996837cd5fa2b --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/alexnet_cifar10/alexnet_cifar10.txt @@ -0,0 +1,421 @@ +2592.187221 ++++++ +conf1 1 1 78.78 0.0 +1 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 +2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 +3 gpu conv fp32 11 add fp32 1 tanh fp32 1 +4 gpu conv fp32 11 add fp32 1 tanh fp32 1 +5 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 +6 gpu mul fp32 11 add fp32 1 +7 gpu softmax fp32 1 +----- ++++++ +conf2 1.7593976485873195 1.6193399031642917 78.78 0.0 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf3 2.081712090729918 1.9102226906341664 78.53999999999999 0.2400000000000091 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf4 2.081712090729918 1.9102226906341664 78.53999999999999 0.2400000000000091 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf5 2.2627828537139263 2.065683616898884 78.34 0.4399999999999977 +1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf6 2.3527290658539215 2.145832257234814 78.10000000000001 0.6799999999999926 +1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf7 2.3527290658539215 2.145832257234814 78.10000000000001 0.6799999999999926 +1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf8 2.3527290658539215 2.145832257234814 78.10000000000001 0.6799999999999926 +1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf9 2.2247938983110425 2.060416584958474 77.98 0.7999999999999972 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf10 2.2247938983110425 2.060416584958474 77.98 0.7999999999999972 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf11 2.4370818494175888 2.250857540113024 77.98 0.7999999999999972 +1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf12 2.432854949808342 2.2424500615508003 77.9 0.8799999999999955 +1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf13 2.432854949808342 2.2424500615508003 77.9 0.8799999999999955 +1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf14 2.432854949808342 2.2424500615508003 77.9 0.8799999999999955 +1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf15 2.228328207535687 2.0675123320068267 77.82 0.960000000000008 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf16 2.228328207535687 2.0675123320068267 77.82 0.960000000000008 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf17 2.3417491169395532 2.1355030360671465 77.78 1.0 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf18 2.3417491169395532 2.1355030360671465 77.78 1.0 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf19 2.3417491169395532 2.1355030360671465 77.78 1.0 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv fp16 12 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf20 2.5243776633638846 2.324968713897418 77.78 1.0 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf21 2.5243776633638846 2.324968713897418 77.78 1.0 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf22 2.5243776633638846 2.324968713897418 77.78 1.0 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf23 2.5371416718362823 2.3372173527293847 77.56 1.2199999999999989 +1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf24 2.5371416718362823 2.3372173527293847 77.56 1.2199999999999989 +1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf25 2.472472828611022 2.286262888143739 77.48 1.2999999999999972 +1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf26 2.574475112841438 2.3637004022727544 77.4 1.3799999999999955 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp_fp16 267 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf27 2.1200397577541747 1.951741010849448 77.3 1.480000000000004 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf28 2.1200397577541747 1.951741010849448 77.3 1.480000000000004 +1 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 266 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf29 2.5289288699015304 2.334007588396142 77.2 1.5799999999999983 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf30 2.5289288699015304 2.334007588396142 77.2 1.5799999999999983 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf31 2.5289288699015304 2.334007588396142 77.2 1.5799999999999983 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf32 2.541739061163583 2.3463519042470864 77.18 1.5999999999999943 +1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf33 2.541739061163583 2.3463519042470864 77.18 1.5999999999999943 +1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf34 2.580258965052788 2.3848508703934153 76.96 1.8200000000000074 +1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf35 2.580258965052788 2.3848508703934153 76.96 1.8200000000000074 +1 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf36 2.4768386387310675 2.295002745725082 76.94 1.8400000000000034 +1 gpu conv samp_fp16 263 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf37 2.5713008246729716 2.3684101116633007 76.94 1.8400000000000034 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf38 2.5713008246729716 2.3684101116633007 76.94 1.8400000000000034 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf39 2.5670585645212847 2.3720992406158463 76.92 1.8599999999999994 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf40 2.5670585645212847 2.3720992406158463 76.92 1.8599999999999994 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 +5 gpu conv samp_fp16 268 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf41 2.5760229577267673 2.3777906009584133 76.9 1.8799999999999955 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- ++++++ +conf42 2.5760229577267673 2.3777906009584133 76.9 1.8799999999999955 +1 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 tanh fp16 1 pool_max fp16 1 +3 gpu conv samp_fp16 269 add fp16 1 tanh fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 tanh fp16 1 +5 gpu conv fp16 12 add fp16 1 tanh fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 +7 gpu softmax fp32 1 +----- diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/alexnet_imagenet/alexnet_imagenet.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/alexnet_imagenet/alexnet_imagenet.txt new file mode 100644 index 0000000000000000000000000000000000000000..8ae986b90ce53e80d10e19525a51ec32f51397d8 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/alexnet_imagenet/alexnet_imagenet.txt @@ -0,0 +1,289 @@ +2739.950736 ++++++ +conf1 1 1 56.3 0.0 +1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp32 11 add fp32 1 relu fp32 1 +4 gpu conv fp32 11 add fp32 1 relu fp32 1 +5 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +6 gpu mul fp32 11 add fp32 1 relu fp32 1 +7 gpu mul fp32 11 add fp32 1 relu fp32 1 +8 gpu mul fp32 11 add fp32 1 +9 gpu softmax fp32 1 +----- ++++++ +conf2 1.802133644103582 1.8186433204507424 55.76 0.5399999999999991 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv fp16 12 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf3 2.0227701930718065 2.043112495268932 55.42 0.8799999999999955 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv fp16 12 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf4 1.8063132288735129 1.8239088223620996 54.96 1.3399999999999963 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf5 1.8063132288735129 1.8239088223620996 54.96 1.3399999999999963 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf6 1.8063132288735129 1.8239088223620996 54.96 1.3399999999999963 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf7 2.085011755614172 2.122606306624671 54.92 1.3799999999999955 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv fp16 12 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf8 2.085011755614172 2.122606306624671 54.92 1.3799999999999955 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 159 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv fp16 12 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf9 1.8052659214923805 1.8217111622759978 54.82 1.4799999999999969 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf10 2.0146435217865446 2.0367475358800102 54.58 1.7199999999999989 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv fp16 12 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf11 1.9101312060368951 1.9552389688678584 54.24 2.059999999999995 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf12 1.9101312060368951 1.9552389688678584 54.24 2.059999999999995 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf13 1.9101312060368951 1.9552389688678584 54.24 2.059999999999995 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf14 2.019868378233057 2.0433540129730265 54.17999999999999 2.1200000000000045 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf15 2.019868378233057 2.0433540129730265 54.17999999999999 2.1200000000000045 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf16 2.028037341700216 2.049760395549724 53.98 2.3200000000000003 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf17 2.028037341700216 2.049760395549724 53.98 2.3200000000000003 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf18 2.028037341700216 2.049760395549724 53.98 2.3200000000000003 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 163 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf19 1.8052659214923805 1.8217111622759978 53.879999999999995 2.4200000000000017 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 11 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf20 1.8052659214923805 1.8217111622759978 53.879999999999995 2.4200000000000017 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 11 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf21 2.0267172350289036 2.046985186681549 53.86 2.4399999999999977 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf22 2.0267172350289036 2.046985186681549 53.86 2.4399999999999977 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf23 2.0267172350289036 2.046985186681549 53.86 2.4399999999999977 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 166 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- ++++++ +conf24 2.0185588815268836 2.0405961127674277 53.559999999999995 2.740000000000002 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu conv perf_fp16 162 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv perf_fp16 164 add fp16 1 relu fp16 1 +5 gpu conv perf_fp16 157 add fp16 1 relu fp16 1 pool_max fp16 1 +6 gpu mul fp16 12 add fp16 1 relu fp16 1 +7 gpu mul fp16 12 add fp16 1 relu fp16 1 +8 gpu mul fp16 12 add fp16 1 +9 gpu softmax fp32 1 +----- diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/lenet_keras/lenet_keras.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/lenet_keras/lenet_keras.txt new file mode 100644 index 0000000000000000000000000000000000000000..da88f7cd26b049fd18644a834e4d34b944149cb2 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/lenet_keras/lenet_keras.txt @@ -0,0 +1,409 @@ +282.5141369999999 ++++++ +conf1 1 1 98.7 0.0 +1 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 +2 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 +3 gpu mul fp32 11 add fp32 1 tanh fp32 1 +4 gpu mul fp32 11 add fp32 1 tanh fp32 1 +5 gpu softmax fp32 1 +----- ++++++ +conf2 1.9343699741206566 2.1183040240042 98.68 0.01999999999999602 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 265 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf3 1.9343699741206566 2.1183040240042 98.68 0.01999999999999602 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 265 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf4 1.8936889628815377 2.139779619692146 98.68 0.01999999999999602 +1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf5 1.8936889628815377 2.139779619692146 98.68 0.01999999999999602 +1 gpu conv perf_fp16 152 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf6 1.6415764141643088 1.8012120076077847 98.66 0.04000000000000625 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 265 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf7 1.9358279784215788 2.1233340385374495 98.66 0.04000000000000625 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf8 1.9358279784215788 2.1233340385374495 98.66 0.04000000000000625 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf9 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 +1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf10 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 +1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf11 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 +1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf12 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf13 1.6319327047042609 1.8046853367113418 98.64 0.060000000000002274 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf14 1.5602284338468988 1.7102497386784767 98.61999999999999 0.0800000000000125 +1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf15 1.5602284338468988 1.7102497386784767 98.61999999999999 0.0800000000000125 +1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf16 1.5602284338468988 1.7102497386784767 98.61999999999999 0.0800000000000125 +1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf17 1.8224050632690918 1.9936046569348063 98.61999999999999 0.0800000000000125 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf18 1.8224050632690918 1.9936046569348063 98.61999999999999 0.0800000000000125 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf19 1.8224050632690918 1.9936046569348063 98.61999999999999 0.0800000000000125 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf20 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf21 2.2168527051833635 2.453341076720038 98.61999999999999 0.0800000000000125 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf22 1.9040998718547615 2.1501783570812565 98.61999999999999 0.0800000000000125 +1 gpu conv perf_fp16 151 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf23 1.9040998718547615 2.1501783570812565 98.61999999999999 0.0800000000000125 +1 gpu conv perf_fp16 151 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf24 1.5630416487818 1.7451546885860074 98.6 0.10000000000000853 +1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf25 1.5630416487818 1.7451546885860074 98.6 0.10000000000000853 +1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf26 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 +1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf27 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 +1 gpu conv fp16 12 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf28 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf29 1.8406161850501603 2.037849502542524 98.6 0.10000000000000853 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf30 2.1941568976363475 2.4445764373737644 98.6 0.10000000000000853 +1 gpu conv samp_fp16 269 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf31 2.1941568976363475 2.4445764373737644 98.6 0.10000000000000853 +1 gpu conv samp_fp16 269 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf32 1.5602284338468988 1.7102497386784767 98.58 0.12000000000000455 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf33 1.5602284338468988 1.7102497386784767 98.58 0.12000000000000455 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf34 1.5602284338468988 1.7102497386784767 98.58 0.12000000000000455 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 267 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf35 1.9209933607603906 2.123109543083542 98.58 0.12000000000000455 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf36 1.9209933607603906 2.123109543083542 98.58 0.12000000000000455 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf37 1.9209933607603906 2.123109543083542 98.58 0.12000000000000455 +1 gpu conv samp_fp16 264 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf38 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf39 1.8406161850501603 2.037849502542524 98.58 0.12000000000000455 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf40 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 +1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf41 1.8445326456180258 2.087601822059355 98.58 0.12000000000000455 +1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf42 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 +1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf43 1.8649226857257986 2.1076025277601325 98.56 0.14000000000000057 +1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf44 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 +1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf45 1.8463058650555446 2.067271423078985 98.56 0.14000000000000057 +1 gpu conv perf_fp16 157 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf46 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 +1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf47 1.9234076467497994 2.1864740913112275 98.56 0.14000000000000057 +1 gpu conv perf_fp16 153 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 262 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf48 1.6319327047042609 1.8046853367113418 98.54 0.1599999999999966 +1 gpu conv fp16 11 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf49 1.6350106933897723 1.8435952834193967 98.52 0.18000000000000682 +1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf50 1.6350106933897723 1.8435952834193967 98.52 0.18000000000000682 +1 gpu conv perf_fp16 156 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- ++++++ +conf51 1.6510114896409525 1.8591762752048948 98.48 0.21999999999999886 +1 gpu conv perf_fp16 168 add fp16 1 pool_max fp16 1 tanh fp16 1 +2 gpu conv samp_fp16 263 add fp16 1 pool_max fp16 1 tanh fp16 1 +3 gpu mul fp16 12 add fp16 1 tanh fp16 1 +4 gpu mul fp16 12 add fp16 1 tanh fp16 1 +5 gpu softmax fp32 1 +----- diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/mobilenet_cifar10/mobilenet_cifar10.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/mobilenet_cifar10/mobilenet_cifar10.txt new file mode 100644 index 0000000000000000000000000000000000000000..93ca37c00a73f1a1cfc72bf58e8067906269d813 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/mobilenet_cifar10/mobilenet_cifar10.txt @@ -0,0 +1,871 @@ +4077.307063200001 ++++++ +conf1 1 1 84.42 0.0 +1 gpu conv fp32 11 +2 gpu batchnorm fp32 11 +3 gpu relu fp32 11 +4 gpu group_conv fp32 11 +5 gpu batchnorm fp32 11 +6 gpu relu fp32 11 +7 gpu conv fp32 11 +8 gpu batchnorm fp32 11 +9 gpu relu fp32 11 +10 gpu group_conv fp32 11 +11 gpu batchnorm fp32 11 +12 gpu relu fp32 11 +13 gpu conv fp32 11 +14 gpu batchnorm fp32 11 +15 gpu relu fp32 11 +16 gpu group_conv fp32 11 +17 gpu batchnorm fp32 11 +18 gpu relu fp32 11 +19 gpu conv fp32 11 +20 gpu batchnorm fp32 11 +21 gpu relu fp32 11 +22 gpu group_conv fp32 11 +23 gpu batchnorm fp32 11 +24 gpu relu fp32 11 +25 gpu conv fp32 11 +26 gpu batchnorm fp32 11 +27 gpu relu fp32 11 +28 gpu group_conv fp32 11 +29 gpu batchnorm fp32 11 +30 gpu relu fp32 11 +31 gpu conv fp32 11 +32 gpu batchnorm fp32 11 +33 gpu relu fp32 11 +34 gpu group_conv fp32 11 +35 gpu batchnorm fp32 11 +36 gpu relu fp32 11 +37 gpu conv fp32 11 +38 gpu batchnorm fp32 11 +39 gpu relu fp32 11 +40 gpu group_conv fp32 11 +41 gpu batchnorm fp32 11 +42 gpu relu fp32 11 +43 gpu conv fp32 11 +44 gpu batchnorm fp32 11 +45 gpu relu fp32 11 +46 gpu group_conv fp32 11 +47 gpu batchnorm fp32 11 +48 gpu relu fp32 11 +49 gpu conv fp32 11 +50 gpu batchnorm fp32 11 +51 gpu relu fp32 11 +52 gpu group_conv fp32 11 +53 gpu batchnorm fp32 11 +54 gpu relu fp32 11 +55 gpu conv fp32 11 +56 gpu batchnorm fp32 11 +57 gpu relu fp32 11 +58 gpu group_conv fp32 11 +59 gpu batchnorm fp32 11 +60 gpu relu fp32 11 +61 gpu conv fp32 11 +62 gpu batchnorm fp32 11 +63 gpu relu fp32 11 +64 gpu group_conv fp32 11 +65 gpu batchnorm fp32 11 +66 gpu relu fp32 11 +67 gpu conv fp32 11 +68 gpu batchnorm fp32 11 +69 gpu relu fp32 11 +70 gpu group_conv fp32 11 +71 gpu batchnorm fp32 11 +72 gpu relu fp32 11 +73 gpu conv fp32 11 +74 gpu batchnorm fp32 11 +75 gpu relu fp32 11 +76 gpu group_conv fp32 11 +77 gpu batchnorm fp32 11 +78 gpu relu fp32 11 +79 gpu conv fp32 11 +80 gpu batchnorm fp32 11 +81 gpu relu fp32 11 +82 gpu pool_mean fp32 11 +83 gpu mul fp32 11 add fp32 1 +84 gpu softmax fp32 1 +----- ++++++ +conf2 1.504059255565631 1.4598468219902432 81.86 2.5600000000000023 +1 gpu conv fp16 12 +2 gpu batchnorm fp16 12 +3 gpu relu fp16 12 +4 gpu group_conv fp16 12 +5 gpu batchnorm fp16 12 +6 gpu relu fp16 12 +7 gpu conv fp16 12 +8 gpu batchnorm fp16 12 +9 gpu relu fp16 12 +10 gpu group_conv fp16 12 +11 gpu batchnorm fp16 12 +12 gpu relu fp16 12 +13 gpu conv fp16 12 +14 gpu batchnorm fp16 12 +15 gpu relu fp16 12 +16 gpu group_conv fp16 12 +17 gpu batchnorm fp16 12 +18 gpu relu fp16 12 +19 gpu conv perf_fp16 161 +20 gpu batchnorm fp16 12 +21 gpu relu fp16 12 +22 gpu group_conv fp16 12 +23 gpu batchnorm fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu group_conv fp16 12 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv perf_fp16 152 +32 gpu batchnorm fp16 12 +33 gpu relu fp16 12 +34 gpu group_conv fp16 12 +35 gpu batchnorm fp16 12 +36 gpu relu fp16 12 +37 gpu conv fp16 12 +38 gpu batchnorm fp16 12 +39 gpu relu fp16 12 +40 gpu group_conv fp16 12 +41 gpu batchnorm fp16 12 +42 gpu relu fp16 12 +43 gpu conv perf_fp16 161 +44 gpu batchnorm fp16 12 +45 gpu relu fp16 12 +46 gpu group_conv fp16 12 +47 gpu batchnorm fp16 12 +48 gpu relu fp16 12 +49 gpu conv perf_fp16 155 +50 gpu batchnorm fp16 12 +51 gpu relu fp16 12 +52 gpu group_conv fp16 12 +53 gpu batchnorm fp16 12 +54 gpu relu fp16 12 +55 gpu conv perf_fp16 152 +56 gpu batchnorm fp16 12 +57 gpu relu fp16 12 +58 gpu group_conv fp16 12 +59 gpu batchnorm fp16 12 +60 gpu relu fp16 12 +61 gpu conv perf_fp16 151 +62 gpu batchnorm fp16 12 +63 gpu relu fp16 12 +64 gpu group_conv fp16 12 +65 gpu batchnorm fp16 12 +66 gpu relu fp16 12 +67 gpu conv perf_fp16 152 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu group_conv fp16 12 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv perf_fp16 151 +74 gpu batchnorm fp16 12 +75 gpu relu fp16 12 +76 gpu group_conv fp16 12 +77 gpu batchnorm fp16 12 +78 gpu relu fp16 12 +79 gpu conv perf_fp16 152 +80 gpu batchnorm fp16 12 +81 gpu relu fp16 12 +82 gpu pool_mean fp16 12 +83 gpu mul fp16 12 add fp16 1 +84 gpu softmax fp32 1 +----- ++++++ +conf3 1.5040783418076804 1.459845395800413 81.86 2.5600000000000023 +1 gpu conv fp16 12 +2 gpu batchnorm fp16 12 +3 gpu relu fp16 12 +4 gpu group_conv fp16 12 +5 gpu batchnorm fp16 12 +6 gpu relu fp16 12 +7 gpu conv fp16 12 +8 gpu batchnorm fp16 12 +9 gpu relu fp16 12 +10 gpu group_conv fp16 12 +11 gpu batchnorm fp16 12 +12 gpu relu fp16 12 +13 gpu conv fp16 12 +14 gpu batchnorm fp16 12 +15 gpu relu fp16 12 +16 gpu group_conv fp16 12 +17 gpu batchnorm fp16 12 +18 gpu relu fp16 12 +19 gpu conv perf_fp16 161 +20 gpu batchnorm fp16 12 +21 gpu relu fp16 12 +22 gpu group_conv fp16 12 +23 gpu batchnorm fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu group_conv fp16 12 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv perf_fp16 152 +32 gpu batchnorm fp16 12 +33 gpu relu fp16 12 +34 gpu group_conv fp16 12 +35 gpu batchnorm fp16 12 +36 gpu relu fp16 12 +37 gpu conv fp16 12 +38 gpu batchnorm fp16 12 +39 gpu relu fp16 12 +40 gpu group_conv fp16 12 +41 gpu batchnorm fp16 12 +42 gpu relu fp16 12 +43 gpu conv perf_fp16 161 +44 gpu batchnorm fp16 12 +45 gpu relu fp16 12 +46 gpu group_conv fp16 12 +47 gpu batchnorm fp16 12 +48 gpu relu fp16 12 +49 gpu conv perf_fp16 155 +50 gpu batchnorm fp16 12 +51 gpu relu fp16 12 +52 gpu group_conv fp16 12 +53 gpu batchnorm fp16 12 +54 gpu relu fp16 12 +55 gpu conv perf_fp16 152 +56 gpu batchnorm fp16 12 +57 gpu relu fp16 12 +58 gpu group_conv fp16 12 +59 gpu batchnorm fp16 12 +60 gpu relu fp16 12 +61 gpu conv perf_fp16 152 +62 gpu batchnorm fp16 12 +63 gpu relu fp16 12 +64 gpu group_conv fp16 12 +65 gpu batchnorm fp16 12 +66 gpu relu fp16 12 +67 gpu conv perf_fp16 151 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu group_conv fp16 12 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv perf_fp16 151 +74 gpu batchnorm fp16 12 +75 gpu relu fp16 12 +76 gpu group_conv fp16 12 +77 gpu batchnorm fp16 12 +78 gpu relu fp16 12 +79 gpu conv perf_fp16 152 +80 gpu batchnorm fp16 12 +81 gpu relu fp16 12 +82 gpu pool_mean fp16 12 +83 gpu mul fp16 12 add fp16 1 +84 gpu softmax fp32 1 +----- ++++++ +conf4 1.5042737817275433 1.4598464522370567 81.74 2.680000000000007 +1 gpu conv fp16 12 +2 gpu batchnorm fp16 12 +3 gpu relu fp16 12 +4 gpu group_conv fp16 12 +5 gpu batchnorm fp16 12 +6 gpu relu fp16 12 +7 gpu conv fp16 12 +8 gpu batchnorm fp16 12 +9 gpu relu fp16 12 +10 gpu group_conv fp16 12 +11 gpu batchnorm fp16 12 +12 gpu relu fp16 12 +13 gpu conv fp16 12 +14 gpu batchnorm fp16 12 +15 gpu relu fp16 12 +16 gpu group_conv fp16 12 +17 gpu batchnorm fp16 12 +18 gpu relu fp16 12 +19 gpu conv perf_fp16 161 +20 gpu batchnorm fp16 12 +21 gpu relu fp16 12 +22 gpu group_conv fp16 12 +23 gpu batchnorm fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu group_conv fp16 12 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv perf_fp16 152 +32 gpu batchnorm fp16 12 +33 gpu relu fp16 12 +34 gpu group_conv fp16 12 +35 gpu batchnorm fp16 12 +36 gpu relu fp16 12 +37 gpu conv fp16 12 +38 gpu batchnorm fp16 12 +39 gpu relu fp16 12 +40 gpu group_conv fp16 12 +41 gpu batchnorm fp16 12 +42 gpu relu fp16 12 +43 gpu conv perf_fp16 161 +44 gpu batchnorm fp16 12 +45 gpu relu fp16 12 +46 gpu group_conv fp16 12 +47 gpu batchnorm fp16 12 +48 gpu relu fp16 12 +49 gpu conv perf_fp16 155 +50 gpu batchnorm fp16 12 +51 gpu relu fp16 12 +52 gpu group_conv fp16 12 +53 gpu batchnorm fp16 12 +54 gpu relu fp16 12 +55 gpu conv perf_fp16 155 +56 gpu batchnorm fp16 12 +57 gpu relu fp16 12 +58 gpu group_conv fp16 12 +59 gpu batchnorm fp16 12 +60 gpu relu fp16 12 +61 gpu conv perf_fp16 151 +62 gpu batchnorm fp16 12 +63 gpu relu fp16 12 +64 gpu group_conv fp16 12 +65 gpu batchnorm fp16 12 +66 gpu relu fp16 12 +67 gpu conv perf_fp16 152 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu group_conv fp16 12 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv perf_fp16 152 +74 gpu batchnorm fp16 12 +75 gpu relu fp16 12 +76 gpu group_conv fp16 12 +77 gpu batchnorm fp16 12 +78 gpu relu fp16 12 +79 gpu conv perf_fp16 152 +80 gpu batchnorm fp16 12 +81 gpu relu fp16 12 +82 gpu pool_mean fp16 12 +83 gpu mul fp16 12 add fp16 1 +84 gpu softmax fp32 1 +----- ++++++ +conf5 1.5042737817275433 1.4598464522370567 81.74 2.680000000000007 +1 gpu conv fp16 12 +2 gpu batchnorm fp16 12 +3 gpu relu fp16 12 +4 gpu group_conv fp16 12 +5 gpu batchnorm fp16 12 +6 gpu relu fp16 12 +7 gpu conv fp16 12 +8 gpu batchnorm fp16 12 +9 gpu relu fp16 12 +10 gpu group_conv fp16 12 +11 gpu batchnorm fp16 12 +12 gpu relu fp16 12 +13 gpu conv fp16 12 +14 gpu batchnorm fp16 12 +15 gpu relu fp16 12 +16 gpu group_conv fp16 12 +17 gpu batchnorm fp16 12 +18 gpu relu fp16 12 +19 gpu conv perf_fp16 161 +20 gpu batchnorm fp16 12 +21 gpu relu fp16 12 +22 gpu group_conv fp16 12 +23 gpu batchnorm fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu group_conv fp16 12 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv perf_fp16 152 +32 gpu batchnorm fp16 12 +33 gpu relu fp16 12 +34 gpu group_conv fp16 12 +35 gpu batchnorm fp16 12 +36 gpu relu fp16 12 +37 gpu conv fp16 12 +38 gpu batchnorm fp16 12 +39 gpu relu fp16 12 +40 gpu group_conv fp16 12 +41 gpu batchnorm fp16 12 +42 gpu relu fp16 12 +43 gpu conv perf_fp16 161 +44 gpu batchnorm fp16 12 +45 gpu relu fp16 12 +46 gpu group_conv fp16 12 +47 gpu batchnorm fp16 12 +48 gpu relu fp16 12 +49 gpu conv perf_fp16 155 +50 gpu batchnorm fp16 12 +51 gpu relu fp16 12 +52 gpu group_conv fp16 12 +53 gpu batchnorm fp16 12 +54 gpu relu fp16 12 +55 gpu conv perf_fp16 155 +56 gpu batchnorm fp16 12 +57 gpu relu fp16 12 +58 gpu group_conv fp16 12 +59 gpu batchnorm fp16 12 +60 gpu relu fp16 12 +61 gpu conv perf_fp16 151 +62 gpu batchnorm fp16 12 +63 gpu relu fp16 12 +64 gpu group_conv fp16 12 +65 gpu batchnorm fp16 12 +66 gpu relu fp16 12 +67 gpu conv perf_fp16 152 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu group_conv fp16 12 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv perf_fp16 152 +74 gpu batchnorm fp16 12 +75 gpu relu fp16 12 +76 gpu group_conv fp16 12 +77 gpu batchnorm fp16 12 +78 gpu relu fp16 12 +79 gpu conv perf_fp16 152 +80 gpu batchnorm fp16 12 +81 gpu relu fp16 12 +82 gpu pool_mean fp16 12 +83 gpu mul fp16 12 add fp16 1 +84 gpu softmax fp32 1 +----- ++++++ +conf6 1.5070383438802568 1.463241585164149 81.69999999999999 2.720000000000013 +1 gpu conv fp16 12 +2 gpu batchnorm fp16 12 +3 gpu relu fp16 12 +4 gpu group_conv fp16 12 +5 gpu batchnorm fp16 12 +6 gpu relu fp16 12 +7 gpu conv fp16 12 +8 gpu batchnorm fp16 12 +9 gpu relu fp16 12 +10 gpu group_conv fp16 12 +11 gpu batchnorm fp16 12 +12 gpu relu fp16 12 +13 gpu conv fp16 12 +14 gpu batchnorm fp16 12 +15 gpu relu fp16 12 +16 gpu group_conv fp16 12 +17 gpu batchnorm fp16 12 +18 gpu relu fp16 12 +19 gpu conv perf_fp16 168 +20 gpu batchnorm fp16 12 +21 gpu relu fp16 12 +22 gpu group_conv fp16 12 +23 gpu batchnorm fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu group_conv fp16 12 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv perf_fp16 152 +32 gpu batchnorm fp16 12 +33 gpu relu fp16 12 +34 gpu group_conv fp16 12 +35 gpu batchnorm fp16 12 +36 gpu relu fp16 12 +37 gpu conv fp16 12 +38 gpu batchnorm fp16 12 +39 gpu relu fp16 12 +40 gpu group_conv fp16 12 +41 gpu batchnorm fp16 12 +42 gpu relu fp16 12 +43 gpu conv perf_fp16 161 +44 gpu batchnorm fp16 12 +45 gpu relu fp16 12 +46 gpu group_conv fp16 12 +47 gpu batchnorm fp16 12 +48 gpu relu fp16 12 +49 gpu conv perf_fp16 155 +50 gpu batchnorm fp16 12 +51 gpu relu fp16 12 +52 gpu group_conv fp16 12 +53 gpu batchnorm fp16 12 +54 gpu relu fp16 12 +55 gpu conv perf_fp16 152 +56 gpu batchnorm fp16 12 +57 gpu relu fp16 12 +58 gpu group_conv fp16 12 +59 gpu batchnorm fp16 12 +60 gpu relu fp16 12 +61 gpu conv perf_fp16 151 +62 gpu batchnorm fp16 12 +63 gpu relu fp16 12 +64 gpu group_conv fp16 12 +65 gpu batchnorm fp16 12 +66 gpu relu fp16 12 +67 gpu conv perf_fp16 152 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu group_conv fp16 12 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv perf_fp16 151 +74 gpu batchnorm fp16 12 +75 gpu relu fp16 12 +76 gpu group_conv fp16 12 +77 gpu batchnorm fp16 12 +78 gpu relu fp16 12 +79 gpu conv perf_fp16 152 +80 gpu batchnorm fp16 12 +81 gpu relu fp16 12 +82 gpu pool_mean fp16 12 +83 gpu mul fp16 12 add fp16 1 +84 gpu softmax fp32 1 +----- ++++++ +conf7 1.5070575058058588 1.463240152333617 81.58 2.8400000000000034 +1 gpu conv fp16 12 +2 gpu batchnorm fp16 12 +3 gpu relu fp16 12 +4 gpu group_conv fp16 12 +5 gpu batchnorm fp16 12 +6 gpu relu fp16 12 +7 gpu conv fp16 12 +8 gpu batchnorm fp16 12 +9 gpu relu fp16 12 +10 gpu group_conv fp16 12 +11 gpu batchnorm fp16 12 +12 gpu relu fp16 12 +13 gpu conv fp16 12 +14 gpu batchnorm fp16 12 +15 gpu relu fp16 12 +16 gpu group_conv fp16 12 +17 gpu batchnorm fp16 12 +18 gpu relu fp16 12 +19 gpu conv perf_fp16 168 +20 gpu batchnorm fp16 12 +21 gpu relu fp16 12 +22 gpu group_conv fp16 12 +23 gpu batchnorm fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu group_conv fp16 12 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv perf_fp16 152 +32 gpu batchnorm fp16 12 +33 gpu relu fp16 12 +34 gpu group_conv fp16 12 +35 gpu batchnorm fp16 12 +36 gpu relu fp16 12 +37 gpu conv fp16 12 +38 gpu batchnorm fp16 12 +39 gpu relu fp16 12 +40 gpu group_conv fp16 12 +41 gpu batchnorm fp16 12 +42 gpu relu fp16 12 +43 gpu conv perf_fp16 161 +44 gpu batchnorm fp16 12 +45 gpu relu fp16 12 +46 gpu group_conv fp16 12 +47 gpu batchnorm fp16 12 +48 gpu relu fp16 12 +49 gpu conv perf_fp16 155 +50 gpu batchnorm fp16 12 +51 gpu relu fp16 12 +52 gpu group_conv fp16 12 +53 gpu batchnorm fp16 12 +54 gpu relu fp16 12 +55 gpu conv perf_fp16 152 +56 gpu batchnorm fp16 12 +57 gpu relu fp16 12 +58 gpu group_conv fp16 12 +59 gpu batchnorm fp16 12 +60 gpu relu fp16 12 +61 gpu conv perf_fp16 152 +62 gpu batchnorm fp16 12 +63 gpu relu fp16 12 +64 gpu group_conv fp16 12 +65 gpu batchnorm fp16 12 +66 gpu relu fp16 12 +67 gpu conv perf_fp16 151 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu group_conv fp16 12 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv perf_fp16 151 +74 gpu batchnorm fp16 12 +75 gpu relu fp16 12 +76 gpu group_conv fp16 12 +77 gpu batchnorm fp16 12 +78 gpu relu fp16 12 +79 gpu conv perf_fp16 152 +80 gpu batchnorm fp16 12 +81 gpu relu fp16 12 +82 gpu pool_mean fp16 12 +83 gpu mul fp16 12 add fp16 1 +84 gpu softmax fp32 1 +----- ++++++ +conf8 1.5039678813445672 1.4598454486222088 81.56 2.8599999999999994 +1 gpu conv fp16 12 +2 gpu batchnorm fp16 12 +3 gpu relu fp16 12 +4 gpu group_conv fp16 12 +5 gpu batchnorm fp16 12 +6 gpu relu fp16 12 +7 gpu conv fp16 12 +8 gpu batchnorm fp16 12 +9 gpu relu fp16 12 +10 gpu group_conv fp16 12 +11 gpu batchnorm fp16 12 +12 gpu relu fp16 12 +13 gpu conv fp16 12 +14 gpu batchnorm fp16 12 +15 gpu relu fp16 12 +16 gpu group_conv fp16 12 +17 gpu batchnorm fp16 12 +18 gpu relu fp16 12 +19 gpu conv perf_fp16 161 +20 gpu batchnorm fp16 12 +21 gpu relu fp16 12 +22 gpu group_conv fp16 12 +23 gpu batchnorm fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu group_conv fp16 12 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv perf_fp16 152 +32 gpu batchnorm fp16 12 +33 gpu relu fp16 12 +34 gpu group_conv fp16 12 +35 gpu batchnorm fp16 12 +36 gpu relu fp16 12 +37 gpu conv fp16 12 +38 gpu batchnorm fp16 12 +39 gpu relu fp16 12 +40 gpu group_conv fp16 12 +41 gpu batchnorm fp16 12 +42 gpu relu fp16 12 +43 gpu conv perf_fp16 161 +44 gpu batchnorm fp16 12 +45 gpu relu fp16 12 +46 gpu group_conv fp16 12 +47 gpu batchnorm fp16 12 +48 gpu relu fp16 12 +49 gpu conv perf_fp16 155 +50 gpu batchnorm fp16 12 +51 gpu relu fp16 12 +52 gpu group_conv fp16 12 +53 gpu batchnorm fp16 12 +54 gpu relu fp16 12 +55 gpu conv perf_fp16 152 +56 gpu batchnorm fp16 12 +57 gpu relu fp16 12 +58 gpu group_conv fp16 12 +59 gpu batchnorm fp16 12 +60 gpu relu fp16 12 +61 gpu conv perf_fp16 152 +62 gpu batchnorm fp16 12 +63 gpu relu fp16 12 +64 gpu group_conv fp16 12 +65 gpu batchnorm fp16 12 +66 gpu relu fp16 12 +67 gpu conv perf_fp16 151 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu group_conv fp16 12 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv perf_fp16 152 +74 gpu batchnorm fp16 12 +75 gpu relu fp16 12 +76 gpu group_conv fp16 12 +77 gpu batchnorm fp16 12 +78 gpu relu fp16 12 +79 gpu conv perf_fp16 153 +80 gpu batchnorm fp16 12 +81 gpu relu fp16 12 +82 gpu pool_mean fp16 12 +83 gpu mul fp16 12 add fp16 1 +84 gpu softmax fp32 1 +----- ++++++ +conf9 1.5038655354281372 1.4599130636549171 81.46 2.960000000000008 +1 gpu conv fp16 12 +2 gpu batchnorm fp16 12 +3 gpu relu fp16 12 +4 gpu group_conv fp16 12 +5 gpu batchnorm fp16 12 +6 gpu relu fp16 12 +7 gpu conv fp16 12 +8 gpu batchnorm fp16 12 +9 gpu relu fp16 12 +10 gpu group_conv fp16 12 +11 gpu batchnorm fp16 12 +12 gpu relu fp16 12 +13 gpu conv fp16 12 +14 gpu batchnorm fp16 12 +15 gpu relu fp16 12 +16 gpu group_conv fp16 12 +17 gpu batchnorm fp16 12 +18 gpu relu fp16 12 +19 gpu conv perf_fp16 161 +20 gpu batchnorm fp16 12 +21 gpu relu fp16 12 +22 gpu group_conv fp16 12 +23 gpu batchnorm fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu group_conv fp16 12 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv perf_fp16 152 +32 gpu batchnorm fp16 12 +33 gpu relu fp16 12 +34 gpu group_conv fp16 12 +35 gpu batchnorm fp16 12 +36 gpu relu fp16 12 +37 gpu conv fp16 12 +38 gpu batchnorm fp16 12 +39 gpu relu fp16 12 +40 gpu group_conv fp16 12 +41 gpu batchnorm fp16 12 +42 gpu relu fp16 12 +43 gpu conv perf_fp16 161 +44 gpu batchnorm fp16 12 +45 gpu relu fp16 12 +46 gpu group_conv fp16 12 +47 gpu batchnorm fp16 12 +48 gpu relu fp16 12 +49 gpu conv perf_fp16 155 +50 gpu batchnorm fp16 12 +51 gpu relu fp16 12 +52 gpu group_conv fp16 12 +53 gpu batchnorm fp16 12 +54 gpu relu fp16 12 +55 gpu conv perf_fp16 152 +56 gpu batchnorm fp16 12 +57 gpu relu fp16 12 +58 gpu group_conv fp16 12 +59 gpu batchnorm fp16 12 +60 gpu relu fp16 12 +61 gpu conv perf_fp16 152 +62 gpu batchnorm fp16 12 +63 gpu relu fp16 12 +64 gpu group_conv fp16 12 +65 gpu batchnorm fp16 12 +66 gpu relu fp16 12 +67 gpu conv perf_fp16 151 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu group_conv fp16 12 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv perf_fp16 151 +74 gpu batchnorm fp16 12 +75 gpu relu fp16 12 +76 gpu group_conv fp16 12 +77 gpu batchnorm fp16 12 +78 gpu relu fp16 12 +79 gpu conv perf_fp16 153 +80 gpu batchnorm fp16 12 +81 gpu relu fp16 12 +82 gpu pool_mean fp16 12 +83 gpu mul fp16 12 add fp16 1 +84 gpu softmax fp32 1 +----- ++++++ +conf10 1.4785375660713596 1.4280520288797043 84.42 0.0 +1 gpu conv fp16 12 +2 gpu batchnorm fp16 12 +3 gpu relu fp16 12 +4 gpu group_conv fp16 12 +5 gpu batchnorm fp16 12 +6 gpu relu fp16 12 +7 gpu conv fp16 12 +8 gpu batchnorm fp16 12 +9 gpu relu fp16 12 +10 gpu group_conv fp16 12 +11 gpu batchnorm fp16 12 +12 gpu relu fp16 12 +13 gpu conv fp16 12 +14 gpu batchnorm fp16 12 +15 gpu relu fp16 12 +16 gpu group_conv fp16 12 +17 gpu batchnorm fp16 12 +18 gpu relu fp16 12 +19 gpu conv fp16 12 +20 gpu batchnorm fp16 12 +21 gpu relu fp16 12 +22 gpu group_conv fp16 12 +23 gpu batchnorm fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu group_conv fp16 12 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv fp16 12 +32 gpu batchnorm fp16 12 +33 gpu relu fp16 12 +34 gpu group_conv fp16 12 +35 gpu batchnorm fp16 12 +36 gpu relu fp16 12 +37 gpu conv fp16 12 +38 gpu batchnorm fp16 12 +39 gpu relu fp16 12 +40 gpu group_conv fp16 12 +41 gpu batchnorm fp16 12 +42 gpu relu fp16 12 +43 gpu conv fp16 12 +44 gpu batchnorm fp16 12 +45 gpu relu fp16 12 +46 gpu group_conv fp16 12 +47 gpu batchnorm fp16 12 +48 gpu relu fp16 12 +49 gpu conv fp16 12 +50 gpu batchnorm fp16 12 +51 gpu relu fp16 12 +52 gpu group_conv fp16 12 +53 gpu batchnorm fp16 12 +54 gpu relu fp16 12 +55 gpu conv fp16 12 +56 gpu batchnorm fp16 12 +57 gpu relu fp16 12 +58 gpu group_conv fp16 12 +59 gpu batchnorm fp16 12 +60 gpu relu fp16 12 +61 gpu conv fp16 12 +62 gpu batchnorm fp16 12 +63 gpu relu fp16 12 +64 gpu group_conv fp16 12 +65 gpu batchnorm fp16 12 +66 gpu relu fp16 12 +67 gpu conv fp16 12 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu group_conv fp16 12 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv fp16 12 +74 gpu batchnorm fp16 12 +75 gpu relu fp16 12 +76 gpu group_conv fp16 12 +77 gpu batchnorm fp16 12 +78 gpu relu fp16 12 +79 gpu conv fp16 12 +80 gpu batchnorm fp16 12 +81 gpu relu fp16 12 +82 gpu pool_mean fp16 12 +83 gpu mul fp16 12 add fp16 1 +84 gpu softmax fp32 1 +----- diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/resnet18_cifar10/resnet18_cifar10.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/resnet18_cifar10/resnet18_cifar10.txt new file mode 100644 index 0000000000000000000000000000000000000000..d1d75a011e9ada7994dcd5a31ee5d56fc2ee3e2f --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/resnet18_cifar10/resnet18_cifar10.txt @@ -0,0 +1,91 @@ +2484.981244 ++++++ +conf1 1 1 89.42 0.0 +1 gpu conv fp32 11 add fp32 1 relu fp32 1 +2 gpu conv fp32 11 add fp32 1 relu fp32 1 +3 gpu conv fp32 11 add fp32 1 +4 gpu add fp32 11 +5 gpu relu fp32 11 +6 gpu conv fp32 11 add fp32 1 relu fp32 1 +7 gpu conv fp32 11 add fp32 1 +8 gpu add fp32 11 +9 gpu relu fp32 11 +10 gpu conv fp32 11 add fp32 1 relu fp32 1 +11 gpu conv fp32 11 add fp32 1 +12 gpu add fp32 11 +13 gpu relu fp32 11 +14 gpu conv fp32 11 add fp32 1 relu fp32 1 +15 gpu conv fp32 11 add fp32 1 +16 gpu conv fp32 11 add fp32 1 +17 gpu add fp32 11 +18 gpu relu fp32 11 +19 gpu conv fp32 11 add fp32 1 relu fp32 1 +20 gpu conv fp32 11 add fp32 1 +21 gpu add fp32 11 +22 gpu relu fp32 11 +23 gpu conv fp32 11 add fp32 1 relu fp32 1 +24 gpu conv fp32 11 add fp32 1 +25 gpu add fp32 11 +26 gpu relu fp32 11 +27 gpu conv fp32 11 add fp32 1 relu fp32 1 +28 gpu conv fp32 11 add fp32 1 +29 gpu conv fp32 11 add fp32 1 +30 gpu add fp32 11 +31 gpu relu fp32 11 +32 gpu conv fp32 11 add fp32 1 relu fp32 1 +33 gpu conv fp32 11 add fp32 1 +34 gpu add fp32 11 +35 gpu relu fp32 11 +36 gpu conv fp32 11 add fp32 1 relu fp32 1 +37 gpu conv fp32 11 add fp32 1 +38 gpu add fp32 11 +39 gpu relu fp32 11 +40 gpu pool_mean fp32 11 +41 gpu mul fp32 11 add fp32 1 +42 gpu softmax fp32 1 +----- ++++++ +conf2 1.3617910209460897 1.3866827244386561 89.42 0.0 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 +3 gpu conv fp16 12 add fp16 1 +4 gpu add fp16 12 +5 gpu relu fp16 12 +6 gpu conv fp16 12 add fp16 1 relu fp16 1 +7 gpu conv fp16 12 add fp16 1 +8 gpu add fp16 12 +9 gpu relu fp16 12 +10 gpu conv fp16 12 add fp16 1 relu fp16 1 +11 gpu conv fp16 12 add fp16 1 +12 gpu add fp16 12 +13 gpu relu fp16 12 +14 gpu conv fp16 12 add fp16 1 relu fp16 1 +15 gpu conv fp16 12 add fp16 1 +16 gpu conv fp16 12 add fp16 1 +17 gpu add fp16 12 +18 gpu relu fp16 12 +19 gpu conv fp16 12 add fp16 1 relu fp16 1 +20 gpu conv fp16 12 add fp16 1 +21 gpu add fp16 12 +22 gpu relu fp16 12 +23 gpu conv fp16 12 add fp16 1 relu fp16 1 +24 gpu conv fp16 12 add fp16 1 +25 gpu add fp16 12 +26 gpu relu fp16 12 +27 gpu conv fp16 12 add fp16 1 relu fp16 1 +28 gpu conv fp16 12 add fp16 1 +29 gpu conv fp16 12 add fp16 1 +30 gpu add fp16 12 +31 gpu relu fp16 12 +32 gpu conv fp16 12 add fp16 1 relu fp16 1 +33 gpu conv fp16 12 add fp16 1 +34 gpu add fp16 12 +35 gpu relu fp16 12 +36 gpu conv fp16 12 add fp16 1 relu fp16 1 +37 gpu conv fp16 12 add fp16 1 +38 gpu add fp16 12 +39 gpu relu fp16 12 +40 gpu pool_mean fp16 12 +41 gpu mul fp16 12 add fp16 1 +42 gpu softmax fp32 1 +----- diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/resnet50_imagenet/resnet50_imagenet.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/resnet50_imagenet/resnet50_imagenet.txt new file mode 100644 index 0000000000000000000000000000000000000000..a045011580adb912289364d35fb85668e74261e7 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/resnet50_imagenet/resnet50_imagenet.txt @@ -0,0 +1,1233 @@ +7161.053769000008 ++++++ +conf1 1 1 75.7 0.0 +1 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +2 gpu batchnorm fp32 11 +3 gpu conv fp32 11 add fp32 1 +4 gpu batchnorm fp32 11 +5 gpu relu fp32 11 +6 gpu conv fp32 11 add fp32 1 +7 gpu batchnorm fp32 11 +8 gpu relu fp32 11 +9 gpu conv fp32 11 add fp32 1 +10 gpu batchnorm fp32 11 +11 gpu conv fp32 11 add fp32 1 +12 gpu batchnorm fp32 11 +13 gpu add fp32 11 +14 gpu relu fp32 11 +15 gpu conv fp32 11 add fp32 1 +16 gpu batchnorm fp32 11 +17 gpu relu fp32 11 +18 gpu conv fp32 11 add fp32 1 +19 gpu batchnorm fp32 11 +20 gpu relu fp32 11 +21 gpu conv fp32 11 add fp32 1 +22 gpu batchnorm fp32 11 +23 gpu add fp32 11 +24 gpu relu fp32 11 +25 gpu conv fp32 11 add fp32 1 +26 gpu batchnorm fp32 11 +27 gpu relu fp32 11 +28 gpu conv fp32 11 add fp32 1 +29 gpu batchnorm fp32 11 +30 gpu relu fp32 11 +31 gpu conv fp32 11 add fp32 1 +32 gpu batchnorm fp32 11 +33 gpu add fp32 11 +34 gpu relu fp32 11 +35 gpu conv fp32 11 add fp32 1 +36 gpu batchnorm fp32 11 +37 gpu relu fp32 11 +38 gpu conv fp32 11 add fp32 1 +39 gpu batchnorm fp32 11 +40 gpu relu fp32 11 +41 gpu conv fp32 11 add fp32 1 +42 gpu batchnorm fp32 11 +43 gpu conv fp32 11 add fp32 1 +44 gpu batchnorm fp32 11 +45 gpu add fp32 11 +46 gpu relu fp32 11 +47 gpu conv fp32 11 add fp32 1 +48 gpu batchnorm fp32 11 +49 gpu relu fp32 11 +50 gpu conv fp32 11 add fp32 1 +51 gpu batchnorm fp32 11 +52 gpu relu fp32 11 +53 gpu conv fp32 11 add fp32 1 +54 gpu batchnorm fp32 11 +55 gpu add fp32 11 +56 gpu relu fp32 11 +57 gpu conv fp32 11 add fp32 1 +58 gpu batchnorm fp32 11 +59 gpu relu fp32 11 +60 gpu conv fp32 11 add fp32 1 +61 gpu batchnorm fp32 11 +62 gpu relu fp32 11 +63 gpu conv fp32 11 add fp32 1 +64 gpu batchnorm fp32 11 +65 gpu add fp32 11 +66 gpu relu fp32 11 +67 gpu conv fp32 11 add fp32 1 +68 gpu batchnorm fp32 11 +69 gpu relu fp32 11 +70 gpu conv fp32 11 add fp32 1 +71 gpu batchnorm fp32 11 +72 gpu relu fp32 11 +73 gpu conv fp32 11 add fp32 1 +74 gpu batchnorm fp32 11 +75 gpu add fp32 11 +76 gpu relu fp32 11 +77 gpu conv fp32 11 add fp32 1 +78 gpu batchnorm fp32 11 +79 gpu relu fp32 11 +80 gpu conv fp32 11 add fp32 1 +81 gpu batchnorm fp32 11 +82 gpu relu fp32 11 +83 gpu conv fp32 11 add fp32 1 +84 gpu batchnorm fp32 11 +85 gpu conv fp32 11 add fp32 1 +86 gpu batchnorm fp32 11 +87 gpu add fp32 11 +88 gpu relu fp32 11 +89 gpu conv fp32 11 add fp32 1 +90 gpu batchnorm fp32 11 +91 gpu relu fp32 11 +92 gpu conv fp32 11 add fp32 1 +93 gpu batchnorm fp32 11 +94 gpu relu fp32 11 +95 gpu conv fp32 11 add fp32 1 +96 gpu batchnorm fp32 11 +97 gpu add fp32 11 +98 gpu relu fp32 11 +99 gpu conv fp32 11 add fp32 1 +100 gpu batchnorm fp32 11 +101 gpu relu fp32 11 +102 gpu conv fp32 11 add fp32 1 +103 gpu batchnorm fp32 11 +104 gpu relu fp32 11 +105 gpu conv fp32 11 add fp32 1 +106 gpu batchnorm fp32 11 +107 gpu add fp32 11 +108 gpu relu fp32 11 +109 gpu conv fp32 11 add fp32 1 +110 gpu batchnorm fp32 11 +111 gpu relu fp32 11 +112 gpu conv fp32 11 add fp32 1 +113 gpu batchnorm fp32 11 +114 gpu relu fp32 11 +115 gpu conv fp32 11 add fp32 1 +116 gpu batchnorm fp32 11 +117 gpu add fp32 11 +118 gpu relu fp32 11 +119 gpu conv fp32 11 add fp32 1 +120 gpu batchnorm fp32 11 +121 gpu relu fp32 11 +122 gpu conv fp32 11 add fp32 1 +123 gpu batchnorm fp32 11 +124 gpu relu fp32 11 +125 gpu conv fp32 11 add fp32 1 +126 gpu batchnorm fp32 11 +127 gpu add fp32 11 +128 gpu relu fp32 11 +129 gpu conv fp32 11 add fp32 1 +130 gpu batchnorm fp32 11 +131 gpu relu fp32 11 +132 gpu conv fp32 11 add fp32 1 +133 gpu batchnorm fp32 11 +134 gpu relu fp32 11 +135 gpu conv fp32 11 add fp32 1 +136 gpu batchnorm fp32 11 +137 gpu add fp32 11 +138 gpu relu fp32 11 +139 gpu conv fp32 11 add fp32 1 +140 gpu batchnorm fp32 11 +141 gpu relu fp32 11 +142 gpu conv fp32 11 add fp32 1 +143 gpu batchnorm fp32 11 +144 gpu relu fp32 11 +145 gpu conv fp32 11 add fp32 1 +146 gpu batchnorm fp32 11 +147 gpu conv fp32 11 add fp32 1 +148 gpu batchnorm fp32 11 +149 gpu add fp32 11 +150 gpu relu fp32 11 +151 gpu conv fp32 11 add fp32 1 +152 gpu batchnorm fp32 11 +153 gpu relu fp32 11 +154 gpu conv fp32 11 add fp32 1 +155 gpu batchnorm fp32 11 +156 gpu relu fp32 11 +157 gpu conv fp32 11 add fp32 1 +158 gpu batchnorm fp32 11 +159 gpu add fp32 11 +160 gpu relu fp32 11 +161 gpu conv fp32 11 add fp32 1 +162 gpu batchnorm fp32 11 +163 gpu relu fp32 11 +164 gpu conv fp32 11 add fp32 1 +165 gpu batchnorm fp32 11 +166 gpu relu fp32 11 +167 gpu conv fp32 11 add fp32 1 +168 gpu batchnorm fp32 11 +169 gpu add fp32 11 +170 gpu relu fp32 11 +171 gpu pool_max fp32 11 +172 gpu mul fp32 11 add fp32 1 +173 gpu softmax fp32 1 +----- ++++++ +conf2 1.8254789092281507 1.4527803526239977 75.7 0.0 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu batchnorm fp16 12 +3 gpu conv fp16 12 add fp16 1 +4 gpu batchnorm fp16 12 +5 gpu relu fp16 12 +6 gpu conv fp16 12 add fp16 1 +7 gpu batchnorm fp16 12 +8 gpu relu fp16 12 +9 gpu conv fp16 12 add fp16 1 +10 gpu batchnorm fp16 12 +11 gpu conv fp16 12 add fp16 1 +12 gpu batchnorm fp16 12 +13 gpu add fp16 12 +14 gpu relu fp16 12 +15 gpu conv fp16 12 add fp16 1 +16 gpu batchnorm fp16 12 +17 gpu relu fp16 12 +18 gpu conv fp16 12 add fp16 1 +19 gpu batchnorm fp16 12 +20 gpu relu fp16 12 +21 gpu conv fp16 12 add fp16 1 +22 gpu batchnorm fp16 12 +23 gpu add fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 add fp16 1 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu conv fp16 12 add fp16 1 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv fp16 12 add fp16 1 +32 gpu batchnorm fp16 12 +33 gpu add fp16 12 +34 gpu relu fp16 12 +35 gpu conv fp16 12 add fp16 1 +36 gpu batchnorm fp16 12 +37 gpu relu fp16 12 +38 gpu conv fp16 12 add fp16 1 +39 gpu batchnorm fp16 12 +40 gpu relu fp16 12 +41 gpu conv fp16 12 add fp16 1 +42 gpu batchnorm fp16 12 +43 gpu conv fp16 12 add fp16 1 +44 gpu batchnorm fp16 12 +45 gpu add fp16 12 +46 gpu relu fp16 12 +47 gpu conv fp16 12 add fp16 1 +48 gpu batchnorm fp16 12 +49 gpu relu fp16 12 +50 gpu conv fp16 12 add fp16 1 +51 gpu batchnorm fp16 12 +52 gpu relu fp16 12 +53 gpu conv fp16 12 add fp16 1 +54 gpu batchnorm fp16 12 +55 gpu add fp16 12 +56 gpu relu fp16 12 +57 gpu conv fp16 12 add fp16 1 +58 gpu batchnorm fp16 12 +59 gpu relu fp16 12 +60 gpu conv fp16 12 add fp16 1 +61 gpu batchnorm fp16 12 +62 gpu relu fp16 12 +63 gpu conv fp16 12 add fp16 1 +64 gpu batchnorm fp16 12 +65 gpu add fp16 12 +66 gpu relu fp16 12 +67 gpu conv fp16 12 add fp16 1 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu conv fp16 12 add fp16 1 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv fp16 12 add fp16 1 +74 gpu batchnorm fp16 12 +75 gpu add fp16 12 +76 gpu relu fp16 12 +77 gpu conv fp16 12 add fp16 1 +78 gpu batchnorm fp16 12 +79 gpu relu fp16 12 +80 gpu conv fp16 12 add fp16 1 +81 gpu batchnorm fp16 12 +82 gpu relu fp16 12 +83 gpu conv fp16 12 add fp16 1 +84 gpu batchnorm fp16 12 +85 gpu conv fp16 12 add fp16 1 +86 gpu batchnorm fp16 12 +87 gpu add fp16 12 +88 gpu relu fp16 12 +89 gpu conv fp16 12 add fp16 1 +90 gpu batchnorm fp16 12 +91 gpu relu fp16 12 +92 gpu conv fp16 12 add fp16 1 +93 gpu batchnorm fp16 12 +94 gpu relu fp16 12 +95 gpu conv fp16 12 add fp16 1 +96 gpu batchnorm fp16 12 +97 gpu add fp16 12 +98 gpu relu fp16 12 +99 gpu conv fp16 12 add fp16 1 +100 gpu batchnorm fp16 12 +101 gpu relu fp16 12 +102 gpu conv fp16 12 add fp16 1 +103 gpu batchnorm fp16 12 +104 gpu relu fp16 12 +105 gpu conv fp16 12 add fp16 1 +106 gpu batchnorm fp16 12 +107 gpu add fp16 12 +108 gpu relu fp16 12 +109 gpu conv fp16 12 add fp16 1 +110 gpu batchnorm fp16 12 +111 gpu relu fp16 12 +112 gpu conv fp16 12 add fp16 1 +113 gpu batchnorm fp16 12 +114 gpu relu fp16 12 +115 gpu conv fp16 12 add fp16 1 +116 gpu batchnorm fp16 12 +117 gpu add fp16 12 +118 gpu relu fp16 12 +119 gpu conv fp16 12 add fp16 1 +120 gpu batchnorm fp16 12 +121 gpu relu fp16 12 +122 gpu conv fp16 12 add fp16 1 +123 gpu batchnorm fp16 12 +124 gpu relu fp16 12 +125 gpu conv fp16 12 add fp16 1 +126 gpu batchnorm fp16 12 +127 gpu add fp16 12 +128 gpu relu fp16 12 +129 gpu conv fp16 12 add fp16 1 +130 gpu batchnorm fp16 12 +131 gpu relu fp16 12 +132 gpu conv fp16 12 add fp16 1 +133 gpu batchnorm fp16 12 +134 gpu relu fp16 12 +135 gpu conv fp16 12 add fp16 1 +136 gpu batchnorm fp16 12 +137 gpu add fp16 12 +138 gpu relu fp16 12 +139 gpu conv fp16 12 add fp16 1 +140 gpu batchnorm fp16 12 +141 gpu relu fp16 12 +142 gpu conv fp16 12 add fp16 1 +143 gpu batchnorm fp16 12 +144 gpu relu fp16 12 +145 gpu conv fp16 12 add fp16 1 +146 gpu batchnorm fp16 12 +147 gpu conv fp16 12 add fp16 1 +148 gpu batchnorm fp16 12 +149 gpu add fp16 12 +150 gpu relu fp16 12 +151 gpu conv fp16 12 add fp16 1 +152 gpu batchnorm fp16 12 +153 gpu relu fp16 12 +154 gpu conv fp16 12 add fp16 1 +155 gpu batchnorm fp16 12 +156 gpu relu fp16 12 +157 gpu conv fp16 12 add fp16 1 +158 gpu batchnorm fp16 12 +159 gpu add fp16 12 +160 gpu relu fp16 12 +161 gpu conv fp16 12 add fp16 1 +162 gpu batchnorm fp16 12 +163 gpu relu fp16 12 +164 gpu conv fp16 12 add fp16 1 +165 gpu batchnorm fp16 12 +166 gpu relu fp16 12 +167 gpu conv fp16 12 add fp16 1 +168 gpu batchnorm fp16 12 +169 gpu add fp16 12 +170 gpu relu fp16 12 +171 gpu pool_max fp16 12 +172 gpu mul fp16 12 add fp16 1 +173 gpu softmax fp32 1 +----- ++++++ +conf3 1.8521749055745271 1.472492365706726 75.02 0.6800000000000068 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu batchnorm fp16 12 +3 gpu conv fp16 12 add fp16 1 +4 gpu batchnorm fp16 12 +5 gpu relu fp16 12 +6 gpu conv fp16 12 add fp16 1 +7 gpu batchnorm fp16 12 +8 gpu relu fp16 12 +9 gpu conv fp16 12 add fp16 1 +10 gpu batchnorm fp16 12 +11 gpu conv fp16 12 add fp16 1 +12 gpu batchnorm fp16 12 +13 gpu add fp16 12 +14 gpu relu fp16 12 +15 gpu conv fp16 12 add fp16 1 +16 gpu batchnorm fp16 12 +17 gpu relu fp16 12 +18 gpu conv fp16 12 add fp16 1 +19 gpu batchnorm fp16 12 +20 gpu relu fp16 12 +21 gpu conv fp16 12 add fp16 1 +22 gpu batchnorm fp16 12 +23 gpu add fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 add fp16 1 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu conv perf_fp16 160 add fp16 1 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv fp16 11 add fp16 1 +32 gpu batchnorm fp16 12 +33 gpu add fp16 12 +34 gpu relu fp16 12 +35 gpu conv fp16 12 add fp16 1 +36 gpu batchnorm fp16 12 +37 gpu relu fp16 12 +38 gpu conv fp16 12 add fp16 1 +39 gpu batchnorm fp16 12 +40 gpu relu fp16 12 +41 gpu conv fp16 12 add fp16 1 +42 gpu batchnorm fp16 12 +43 gpu conv fp16 12 add fp16 1 +44 gpu batchnorm fp16 12 +45 gpu add fp16 12 +46 gpu relu fp16 12 +47 gpu conv fp16 12 add fp16 1 +48 gpu batchnorm fp16 12 +49 gpu relu fp16 12 +50 gpu conv fp16 12 add fp16 1 +51 gpu batchnorm fp16 12 +52 gpu relu fp16 12 +53 gpu conv fp16 12 add fp16 1 +54 gpu batchnorm fp16 12 +55 gpu add fp16 12 +56 gpu relu fp16 12 +57 gpu conv fp16 12 add fp16 1 +58 gpu batchnorm fp16 12 +59 gpu relu fp16 12 +60 gpu conv fp16 12 add fp16 1 +61 gpu batchnorm fp16 12 +62 gpu relu fp16 12 +63 gpu conv fp16 12 add fp16 1 +64 gpu batchnorm fp16 12 +65 gpu add fp16 12 +66 gpu relu fp16 12 +67 gpu conv fp16 12 add fp16 1 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu conv fp16 12 add fp16 1 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv fp16 12 add fp16 1 +74 gpu batchnorm fp16 12 +75 gpu add fp16 12 +76 gpu relu fp16 12 +77 gpu conv fp16 12 add fp16 1 +78 gpu batchnorm fp16 12 +79 gpu relu fp16 12 +80 gpu conv fp16 12 add fp16 1 +81 gpu batchnorm fp16 12 +82 gpu relu fp16 12 +83 gpu conv fp16 12 add fp16 1 +84 gpu batchnorm fp16 12 +85 gpu conv fp16 12 add fp16 1 +86 gpu batchnorm fp16 12 +87 gpu add fp16 12 +88 gpu relu fp16 12 +89 gpu conv fp16 12 add fp16 1 +90 gpu batchnorm fp16 12 +91 gpu relu fp16 12 +92 gpu conv fp16 12 add fp16 1 +93 gpu batchnorm fp16 12 +94 gpu relu fp16 12 +95 gpu conv fp16 11 add fp16 1 +96 gpu batchnorm fp16 12 +97 gpu add fp16 12 +98 gpu relu fp16 12 +99 gpu conv fp16 12 add fp16 1 +100 gpu batchnorm fp16 12 +101 gpu relu fp16 12 +102 gpu conv perf_fp16 164 add fp16 1 +103 gpu batchnorm fp16 12 +104 gpu relu fp16 12 +105 gpu conv fp16 12 add fp16 1 +106 gpu batchnorm fp16 12 +107 gpu add fp16 12 +108 gpu relu fp16 12 +109 gpu conv fp16 12 add fp16 1 +110 gpu batchnorm fp16 12 +111 gpu relu fp16 12 +112 gpu conv fp16 12 add fp16 1 +113 gpu batchnorm fp16 12 +114 gpu relu fp16 12 +115 gpu conv fp16 12 add fp16 1 +116 gpu batchnorm fp16 12 +117 gpu add fp16 12 +118 gpu relu fp16 12 +119 gpu conv fp16 12 add fp16 1 +120 gpu batchnorm fp16 12 +121 gpu relu fp16 12 +122 gpu conv fp16 12 add fp16 1 +123 gpu batchnorm fp16 12 +124 gpu relu fp16 12 +125 gpu conv fp16 12 add fp16 1 +126 gpu batchnorm fp16 12 +127 gpu add fp16 12 +128 gpu relu fp16 12 +129 gpu conv fp16 12 add fp16 1 +130 gpu batchnorm fp16 12 +131 gpu relu fp16 12 +132 gpu conv fp16 12 add fp16 1 +133 gpu batchnorm fp16 12 +134 gpu relu fp16 12 +135 gpu conv fp16 12 add fp16 1 +136 gpu batchnorm fp16 12 +137 gpu add fp16 12 +138 gpu relu fp16 12 +139 gpu conv fp16 12 add fp16 1 +140 gpu batchnorm fp16 12 +141 gpu relu fp16 12 +142 gpu conv fp16 12 add fp16 1 +143 gpu batchnorm fp16 12 +144 gpu relu fp16 12 +145 gpu conv fp16 12 add fp16 1 +146 gpu batchnorm fp16 12 +147 gpu conv fp16 12 add fp16 1 +148 gpu batchnorm fp16 12 +149 gpu add fp16 12 +150 gpu relu fp16 12 +151 gpu conv fp16 12 add fp16 1 +152 gpu batchnorm fp16 12 +153 gpu relu fp16 12 +154 gpu conv fp16 12 add fp16 1 +155 gpu batchnorm fp16 12 +156 gpu relu fp16 12 +157 gpu conv fp16 12 add fp16 1 +158 gpu batchnorm fp16 12 +159 gpu add fp16 12 +160 gpu relu fp16 12 +161 gpu conv fp16 12 add fp16 1 +162 gpu batchnorm fp16 12 +163 gpu relu fp16 12 +164 gpu conv fp16 12 add fp16 1 +165 gpu batchnorm fp16 12 +166 gpu relu fp16 12 +167 gpu conv fp16 12 add fp16 1 +168 gpu batchnorm fp16 12 +169 gpu add fp16 12 +170 gpu relu fp16 12 +171 gpu pool_max fp16 12 +172 gpu mul fp16 12 add fp16 1 +173 gpu softmax fp32 1 +----- ++++++ +conf4 1.8509087142956673 1.4713858340895483 74.68 1.019999999999996 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu batchnorm fp16 12 +3 gpu conv fp16 12 add fp16 1 +4 gpu batchnorm fp16 12 +5 gpu relu fp16 12 +6 gpu conv fp16 12 add fp16 1 +7 gpu batchnorm fp16 12 +8 gpu relu fp16 12 +9 gpu conv fp16 12 add fp16 1 +10 gpu batchnorm fp16 12 +11 gpu conv fp16 12 add fp16 1 +12 gpu batchnorm fp16 12 +13 gpu add fp16 12 +14 gpu relu fp16 12 +15 gpu conv fp16 12 add fp16 1 +16 gpu batchnorm fp16 12 +17 gpu relu fp16 12 +18 gpu conv fp16 12 add fp16 1 +19 gpu batchnorm fp16 12 +20 gpu relu fp16 12 +21 gpu conv fp16 12 add fp16 1 +22 gpu batchnorm fp16 12 +23 gpu add fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 add fp16 1 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu conv perf_fp16 160 add fp16 1 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv fp16 12 add fp16 1 +32 gpu batchnorm fp16 12 +33 gpu add fp16 12 +34 gpu relu fp16 12 +35 gpu conv fp16 12 add fp16 1 +36 gpu batchnorm fp16 12 +37 gpu relu fp16 12 +38 gpu conv fp16 12 add fp16 1 +39 gpu batchnorm fp16 12 +40 gpu relu fp16 12 +41 gpu conv fp16 12 add fp16 1 +42 gpu batchnorm fp16 12 +43 gpu conv fp16 12 add fp16 1 +44 gpu batchnorm fp16 12 +45 gpu add fp16 12 +46 gpu relu fp16 12 +47 gpu conv fp16 12 add fp16 1 +48 gpu batchnorm fp16 12 +49 gpu relu fp16 12 +50 gpu conv fp16 12 add fp16 1 +51 gpu batchnorm fp16 12 +52 gpu relu fp16 12 +53 gpu conv fp16 12 add fp16 1 +54 gpu batchnorm fp16 12 +55 gpu add fp16 12 +56 gpu relu fp16 12 +57 gpu conv fp16 12 add fp16 1 +58 gpu batchnorm fp16 12 +59 gpu relu fp16 12 +60 gpu conv fp16 12 add fp16 1 +61 gpu batchnorm fp16 12 +62 gpu relu fp16 12 +63 gpu conv fp16 12 add fp16 1 +64 gpu batchnorm fp16 12 +65 gpu add fp16 12 +66 gpu relu fp16 12 +67 gpu conv fp16 12 add fp16 1 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu conv fp16 12 add fp16 1 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv fp16 12 add fp16 1 +74 gpu batchnorm fp16 12 +75 gpu add fp16 12 +76 gpu relu fp16 12 +77 gpu conv fp16 12 add fp16 1 +78 gpu batchnorm fp16 12 +79 gpu relu fp16 12 +80 gpu conv fp16 12 add fp16 1 +81 gpu batchnorm fp16 12 +82 gpu relu fp16 12 +83 gpu conv fp16 12 add fp16 1 +84 gpu batchnorm fp16 12 +85 gpu conv fp16 12 add fp16 1 +86 gpu batchnorm fp16 12 +87 gpu add fp16 12 +88 gpu relu fp16 12 +89 gpu conv fp16 12 add fp16 1 +90 gpu batchnorm fp16 12 +91 gpu relu fp16 12 +92 gpu conv fp16 12 add fp16 1 +93 gpu batchnorm fp16 12 +94 gpu relu fp16 12 +95 gpu conv fp16 12 add fp16 1 +96 gpu batchnorm fp16 12 +97 gpu add fp16 12 +98 gpu relu fp16 12 +99 gpu conv fp16 12 add fp16 1 +100 gpu batchnorm fp16 12 +101 gpu relu fp16 12 +102 gpu conv fp16 12 add fp16 1 +103 gpu batchnorm fp16 12 +104 gpu relu fp16 12 +105 gpu conv fp16 12 add fp16 1 +106 gpu batchnorm fp16 12 +107 gpu add fp16 12 +108 gpu relu fp16 12 +109 gpu conv fp16 12 add fp16 1 +110 gpu batchnorm fp16 12 +111 gpu relu fp16 12 +112 gpu conv fp16 12 add fp16 1 +113 gpu batchnorm fp16 12 +114 gpu relu fp16 12 +115 gpu conv fp16 12 add fp16 1 +116 gpu batchnorm fp16 12 +117 gpu add fp16 12 +118 gpu relu fp16 12 +119 gpu conv fp16 12 add fp16 1 +120 gpu batchnorm fp16 12 +121 gpu relu fp16 12 +122 gpu conv fp16 12 add fp16 1 +123 gpu batchnorm fp16 12 +124 gpu relu fp16 12 +125 gpu conv fp16 12 add fp16 1 +126 gpu batchnorm fp16 12 +127 gpu add fp16 12 +128 gpu relu fp16 12 +129 gpu conv fp16 12 add fp16 1 +130 gpu batchnorm fp16 12 +131 gpu relu fp16 12 +132 gpu conv fp16 12 add fp16 1 +133 gpu batchnorm fp16 12 +134 gpu relu fp16 12 +135 gpu conv fp16 12 add fp16 1 +136 gpu batchnorm fp16 12 +137 gpu add fp16 12 +138 gpu relu fp16 12 +139 gpu conv fp16 12 add fp16 1 +140 gpu batchnorm fp16 12 +141 gpu relu fp16 12 +142 gpu conv fp16 12 add fp16 1 +143 gpu batchnorm fp16 12 +144 gpu relu fp16 12 +145 gpu conv fp16 12 add fp16 1 +146 gpu batchnorm fp16 12 +147 gpu conv fp16 12 add fp16 1 +148 gpu batchnorm fp16 12 +149 gpu add fp16 12 +150 gpu relu fp16 12 +151 gpu conv fp16 12 add fp16 1 +152 gpu batchnorm fp16 12 +153 gpu relu fp16 12 +154 gpu conv fp16 12 add fp16 1 +155 gpu batchnorm fp16 12 +156 gpu relu fp16 12 +157 gpu conv fp16 12 add fp16 1 +158 gpu batchnorm fp16 12 +159 gpu add fp16 12 +160 gpu relu fp16 12 +161 gpu conv fp16 12 add fp16 1 +162 gpu batchnorm fp16 12 +163 gpu relu fp16 12 +164 gpu conv fp16 12 add fp16 1 +165 gpu batchnorm fp16 12 +166 gpu relu fp16 12 +167 gpu conv fp16 12 add fp16 1 +168 gpu batchnorm fp16 12 +169 gpu add fp16 12 +170 gpu relu fp16 12 +171 gpu pool_max fp16 12 +172 gpu mul fp16 12 add fp16 1 +173 gpu softmax fp32 1 +----- ++++++ +conf5 1.8538077719438253 1.4749308494814874 73.82 1.8800000000000097 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu batchnorm fp16 12 +3 gpu conv fp16 12 add fp16 1 +4 gpu batchnorm fp16 12 +5 gpu relu fp16 12 +6 gpu conv fp16 12 add fp16 1 +7 gpu batchnorm fp16 12 +8 gpu relu fp16 12 +9 gpu conv fp16 12 add fp16 1 +10 gpu batchnorm fp16 12 +11 gpu conv fp16 12 add fp16 1 +12 gpu batchnorm fp16 12 +13 gpu add fp16 12 +14 gpu relu fp16 12 +15 gpu conv fp16 12 add fp16 1 +16 gpu batchnorm fp16 12 +17 gpu relu fp16 12 +18 gpu conv fp16 12 add fp16 1 +19 gpu batchnorm fp16 12 +20 gpu relu fp16 12 +21 gpu conv fp16 12 add fp16 1 +22 gpu batchnorm fp16 12 +23 gpu add fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 add fp16 1 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu conv perf_fp16 160 add fp16 1 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv fp16 11 add fp16 1 +32 gpu batchnorm fp16 12 +33 gpu add fp16 12 +34 gpu relu fp16 12 +35 gpu conv fp16 12 add fp16 1 +36 gpu batchnorm fp16 12 +37 gpu relu fp16 12 +38 gpu conv fp16 12 add fp16 1 +39 gpu batchnorm fp16 12 +40 gpu relu fp16 12 +41 gpu conv fp16 12 add fp16 1 +42 gpu batchnorm fp16 12 +43 gpu conv fp16 12 add fp16 1 +44 gpu batchnorm fp16 12 +45 gpu add fp16 12 +46 gpu relu fp16 12 +47 gpu conv fp16 12 add fp16 1 +48 gpu batchnorm fp16 12 +49 gpu relu fp16 12 +50 gpu conv fp16 12 add fp16 1 +51 gpu batchnorm fp16 12 +52 gpu relu fp16 12 +53 gpu conv perf_fp16 153 add fp16 1 +54 gpu batchnorm fp16 12 +55 gpu add fp16 12 +56 gpu relu fp16 12 +57 gpu conv fp16 12 add fp16 1 +58 gpu batchnorm fp16 12 +59 gpu relu fp16 12 +60 gpu conv fp16 12 add fp16 1 +61 gpu batchnorm fp16 12 +62 gpu relu fp16 12 +63 gpu conv fp16 12 add fp16 1 +64 gpu batchnorm fp16 12 +65 gpu add fp16 12 +66 gpu relu fp16 12 +67 gpu conv fp16 12 add fp16 1 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu conv fp16 12 add fp16 1 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv fp16 12 add fp16 1 +74 gpu batchnorm fp16 12 +75 gpu add fp16 12 +76 gpu relu fp16 12 +77 gpu conv fp16 12 add fp16 1 +78 gpu batchnorm fp16 12 +79 gpu relu fp16 12 +80 gpu conv fp16 12 add fp16 1 +81 gpu batchnorm fp16 12 +82 gpu relu fp16 12 +83 gpu conv fp16 12 add fp16 1 +84 gpu batchnorm fp16 12 +85 gpu conv fp16 12 add fp16 1 +86 gpu batchnorm fp16 12 +87 gpu add fp16 12 +88 gpu relu fp16 12 +89 gpu conv fp16 12 add fp16 1 +90 gpu batchnorm fp16 12 +91 gpu relu fp16 12 +92 gpu conv fp16 12 add fp16 1 +93 gpu batchnorm fp16 12 +94 gpu relu fp16 12 +95 gpu conv fp16 11 add fp16 1 +96 gpu batchnorm fp16 12 +97 gpu add fp16 12 +98 gpu relu fp16 12 +99 gpu conv fp16 12 add fp16 1 +100 gpu batchnorm fp16 12 +101 gpu relu fp16 12 +102 gpu conv perf_fp16 164 add fp16 1 +103 gpu batchnorm fp16 12 +104 gpu relu fp16 12 +105 gpu conv fp16 12 add fp16 1 +106 gpu batchnorm fp16 12 +107 gpu add fp16 12 +108 gpu relu fp16 12 +109 gpu conv fp16 12 add fp16 1 +110 gpu batchnorm fp16 12 +111 gpu relu fp16 12 +112 gpu conv fp16 12 add fp16 1 +113 gpu batchnorm fp16 12 +114 gpu relu fp16 12 +115 gpu conv fp16 12 add fp16 1 +116 gpu batchnorm fp16 12 +117 gpu add fp16 12 +118 gpu relu fp16 12 +119 gpu conv samp_fp16 268 add fp16 1 +120 gpu batchnorm fp16 12 +121 gpu relu fp16 12 +122 gpu conv fp16 12 add fp16 1 +123 gpu batchnorm fp16 12 +124 gpu relu fp16 12 +125 gpu conv fp16 12 add fp16 1 +126 gpu batchnorm fp16 12 +127 gpu add fp16 12 +128 gpu relu fp16 12 +129 gpu conv fp16 12 add fp16 1 +130 gpu batchnorm fp16 12 +131 gpu relu fp16 12 +132 gpu conv fp16 12 add fp16 1 +133 gpu batchnorm fp16 12 +134 gpu relu fp16 12 +135 gpu conv fp16 11 add fp16 1 +136 gpu batchnorm fp16 12 +137 gpu add fp16 12 +138 gpu relu fp16 12 +139 gpu conv fp16 12 add fp16 1 +140 gpu batchnorm fp16 12 +141 gpu relu fp16 12 +142 gpu conv fp16 12 add fp16 1 +143 gpu batchnorm fp16 12 +144 gpu relu fp16 12 +145 gpu conv fp16 12 add fp16 1 +146 gpu batchnorm fp16 12 +147 gpu conv fp16 12 add fp16 1 +148 gpu batchnorm fp16 12 +149 gpu add fp16 12 +150 gpu relu fp16 12 +151 gpu conv fp16 12 add fp16 1 +152 gpu batchnorm fp16 12 +153 gpu relu fp16 12 +154 gpu conv fp16 12 add fp16 1 +155 gpu batchnorm fp16 12 +156 gpu relu fp16 12 +157 gpu conv fp16 11 add fp16 1 +158 gpu batchnorm fp16 12 +159 gpu add fp16 12 +160 gpu relu fp16 12 +161 gpu conv fp16 12 add fp16 1 +162 gpu batchnorm fp16 12 +163 gpu relu fp16 12 +164 gpu conv fp16 12 add fp16 1 +165 gpu batchnorm fp16 12 +166 gpu relu fp16 12 +167 gpu conv fp16 12 add fp16 1 +168 gpu batchnorm fp16 12 +169 gpu add fp16 12 +170 gpu relu fp16 12 +171 gpu pool_max fp16 12 +172 gpu mul fp16 12 add fp16 1 +173 gpu softmax fp32 1 +----- ++++++ +conf6 1.8538077719438253 1.4749308494814874 73.7 2.0 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu batchnorm fp16 12 +3 gpu conv fp16 12 add fp16 1 +4 gpu batchnorm fp16 12 +5 gpu relu fp16 12 +6 gpu conv fp16 12 add fp16 1 +7 gpu batchnorm fp16 12 +8 gpu relu fp16 12 +9 gpu conv fp16 12 add fp16 1 +10 gpu batchnorm fp16 12 +11 gpu conv fp16 12 add fp16 1 +12 gpu batchnorm fp16 12 +13 gpu add fp16 12 +14 gpu relu fp16 12 +15 gpu conv fp16 12 add fp16 1 +16 gpu batchnorm fp16 12 +17 gpu relu fp16 12 +18 gpu conv fp16 12 add fp16 1 +19 gpu batchnorm fp16 12 +20 gpu relu fp16 12 +21 gpu conv fp16 12 add fp16 1 +22 gpu batchnorm fp16 12 +23 gpu add fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 add fp16 1 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu conv perf_fp16 160 add fp16 1 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv fp16 11 add fp16 1 +32 gpu batchnorm fp16 12 +33 gpu add fp16 12 +34 gpu relu fp16 12 +35 gpu conv fp16 12 add fp16 1 +36 gpu batchnorm fp16 12 +37 gpu relu fp16 12 +38 gpu conv fp16 12 add fp16 1 +39 gpu batchnorm fp16 12 +40 gpu relu fp16 12 +41 gpu conv fp16 12 add fp16 1 +42 gpu batchnorm fp16 12 +43 gpu conv fp16 12 add fp16 1 +44 gpu batchnorm fp16 12 +45 gpu add fp16 12 +46 gpu relu fp16 12 +47 gpu conv fp16 12 add fp16 1 +48 gpu batchnorm fp16 12 +49 gpu relu fp16 12 +50 gpu conv fp16 12 add fp16 1 +51 gpu batchnorm fp16 12 +52 gpu relu fp16 12 +53 gpu conv perf_fp16 153 add fp16 1 +54 gpu batchnorm fp16 12 +55 gpu add fp16 12 +56 gpu relu fp16 12 +57 gpu conv fp16 12 add fp16 1 +58 gpu batchnorm fp16 12 +59 gpu relu fp16 12 +60 gpu conv fp16 12 add fp16 1 +61 gpu batchnorm fp16 12 +62 gpu relu fp16 12 +63 gpu conv fp16 12 add fp16 1 +64 gpu batchnorm fp16 12 +65 gpu add fp16 12 +66 gpu relu fp16 12 +67 gpu conv fp16 12 add fp16 1 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu conv fp16 12 add fp16 1 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv fp16 12 add fp16 1 +74 gpu batchnorm fp16 12 +75 gpu add fp16 12 +76 gpu relu fp16 12 +77 gpu conv fp16 12 add fp16 1 +78 gpu batchnorm fp16 12 +79 gpu relu fp16 12 +80 gpu conv fp16 12 add fp16 1 +81 gpu batchnorm fp16 12 +82 gpu relu fp16 12 +83 gpu conv fp16 12 add fp16 1 +84 gpu batchnorm fp16 12 +85 gpu conv fp16 12 add fp16 1 +86 gpu batchnorm fp16 12 +87 gpu add fp16 12 +88 gpu relu fp16 12 +89 gpu conv fp16 12 add fp16 1 +90 gpu batchnorm fp16 12 +91 gpu relu fp16 12 +92 gpu conv fp16 12 add fp16 1 +93 gpu batchnorm fp16 12 +94 gpu relu fp16 12 +95 gpu conv fp16 11 add fp16 1 +96 gpu batchnorm fp16 12 +97 gpu add fp16 12 +98 gpu relu fp16 12 +99 gpu conv fp16 12 add fp16 1 +100 gpu batchnorm fp16 12 +101 gpu relu fp16 12 +102 gpu conv perf_fp16 164 add fp16 1 +103 gpu batchnorm fp16 12 +104 gpu relu fp16 12 +105 gpu conv fp16 12 add fp16 1 +106 gpu batchnorm fp16 12 +107 gpu add fp16 12 +108 gpu relu fp16 12 +109 gpu conv fp16 12 add fp16 1 +110 gpu batchnorm fp16 12 +111 gpu relu fp16 12 +112 gpu conv fp16 12 add fp16 1 +113 gpu batchnorm fp16 12 +114 gpu relu fp16 12 +115 gpu conv fp16 12 add fp16 1 +116 gpu batchnorm fp16 12 +117 gpu add fp16 12 +118 gpu relu fp16 12 +119 gpu conv samp_fp16 268 add fp16 1 +120 gpu batchnorm fp16 12 +121 gpu relu fp16 12 +122 gpu conv fp16 12 add fp16 1 +123 gpu batchnorm fp16 12 +124 gpu relu fp16 12 +125 gpu conv fp16 12 add fp16 1 +126 gpu batchnorm fp16 12 +127 gpu add fp16 12 +128 gpu relu fp16 12 +129 gpu conv fp16 12 add fp16 1 +130 gpu batchnorm fp16 12 +131 gpu relu fp16 12 +132 gpu conv fp16 12 add fp16 1 +133 gpu batchnorm fp16 12 +134 gpu relu fp16 12 +135 gpu conv fp16 12 add fp16 1 +136 gpu batchnorm fp16 12 +137 gpu add fp16 12 +138 gpu relu fp16 12 +139 gpu conv fp16 12 add fp16 1 +140 gpu batchnorm fp16 12 +141 gpu relu fp16 12 +142 gpu conv fp16 12 add fp16 1 +143 gpu batchnorm fp16 12 +144 gpu relu fp16 12 +145 gpu conv fp16 12 add fp16 1 +146 gpu batchnorm fp16 12 +147 gpu conv fp16 12 add fp16 1 +148 gpu batchnorm fp16 12 +149 gpu add fp16 12 +150 gpu relu fp16 12 +151 gpu conv fp16 12 add fp16 1 +152 gpu batchnorm fp16 12 +153 gpu relu fp16 12 +154 gpu conv fp16 12 add fp16 1 +155 gpu batchnorm fp16 12 +156 gpu relu fp16 12 +157 gpu conv fp16 12 add fp16 1 +158 gpu batchnorm fp16 12 +159 gpu add fp16 12 +160 gpu relu fp16 12 +161 gpu conv fp16 12 add fp16 1 +162 gpu batchnorm fp16 12 +163 gpu relu fp16 12 +164 gpu conv fp16 12 add fp16 1 +165 gpu batchnorm fp16 12 +166 gpu relu fp16 12 +167 gpu conv fp16 12 add fp16 1 +168 gpu batchnorm fp16 12 +169 gpu add fp16 12 +170 gpu relu fp16 12 +171 gpu pool_max fp16 12 +172 gpu mul fp16 12 add fp16 1 +173 gpu softmax fp32 1 +----- ++++++ +conf7 1.8577902325643394 1.478552049679054 72.82 2.8800000000000097 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +2 gpu batchnorm fp16 12 +3 gpu conv fp16 12 add fp16 1 +4 gpu batchnorm fp16 12 +5 gpu relu fp16 12 +6 gpu conv fp16 12 add fp16 1 +7 gpu batchnorm fp16 12 +8 gpu relu fp16 12 +9 gpu conv fp16 12 add fp16 1 +10 gpu batchnorm fp16 12 +11 gpu conv fp16 12 add fp16 1 +12 gpu batchnorm fp16 12 +13 gpu add fp16 12 +14 gpu relu fp16 12 +15 gpu conv fp16 12 add fp16 1 +16 gpu batchnorm fp16 12 +17 gpu relu fp16 12 +18 gpu conv fp16 12 add fp16 1 +19 gpu batchnorm fp16 12 +20 gpu relu fp16 12 +21 gpu conv fp16 12 add fp16 1 +22 gpu batchnorm fp16 12 +23 gpu add fp16 12 +24 gpu relu fp16 12 +25 gpu conv fp16 12 add fp16 1 +26 gpu batchnorm fp16 12 +27 gpu relu fp16 12 +28 gpu conv perf_fp16 160 add fp16 1 +29 gpu batchnorm fp16 12 +30 gpu relu fp16 12 +31 gpu conv fp16 11 add fp16 1 +32 gpu batchnorm fp16 12 +33 gpu add fp16 12 +34 gpu relu fp16 12 +35 gpu conv fp16 12 add fp16 1 +36 gpu batchnorm fp16 12 +37 gpu relu fp16 12 +38 gpu conv fp16 12 add fp16 1 +39 gpu batchnorm fp16 12 +40 gpu relu fp16 12 +41 gpu conv fp16 12 add fp16 1 +42 gpu batchnorm fp16 12 +43 gpu conv fp16 12 add fp16 1 +44 gpu batchnorm fp16 12 +45 gpu add fp16 12 +46 gpu relu fp16 12 +47 gpu conv fp16 12 add fp16 1 +48 gpu batchnorm fp16 12 +49 gpu relu fp16 12 +50 gpu conv fp16 12 add fp16 1 +51 gpu batchnorm fp16 12 +52 gpu relu fp16 12 +53 gpu conv fp16 11 add fp16 1 +54 gpu batchnorm fp16 12 +55 gpu add fp16 12 +56 gpu relu fp16 12 +57 gpu conv samp_fp16 268 add fp16 1 +58 gpu batchnorm fp16 12 +59 gpu relu fp16 12 +60 gpu conv fp16 12 add fp16 1 +61 gpu batchnorm fp16 12 +62 gpu relu fp16 12 +63 gpu conv fp16 12 add fp16 1 +64 gpu batchnorm fp16 12 +65 gpu add fp16 12 +66 gpu relu fp16 12 +67 gpu conv fp16 12 add fp16 1 +68 gpu batchnorm fp16 12 +69 gpu relu fp16 12 +70 gpu conv fp16 12 add fp16 1 +71 gpu batchnorm fp16 12 +72 gpu relu fp16 12 +73 gpu conv fp16 12 add fp16 1 +74 gpu batchnorm fp16 12 +75 gpu add fp16 12 +76 gpu relu fp16 12 +77 gpu conv fp16 12 add fp16 1 +78 gpu batchnorm fp16 12 +79 gpu relu fp16 12 +80 gpu conv fp16 12 add fp16 1 +81 gpu batchnorm fp16 12 +82 gpu relu fp16 12 +83 gpu conv fp16 12 add fp16 1 +84 gpu batchnorm fp16 12 +85 gpu conv fp16 12 add fp16 1 +86 gpu batchnorm fp16 12 +87 gpu add fp16 12 +88 gpu relu fp16 12 +89 gpu conv fp16 12 add fp16 1 +90 gpu batchnorm fp16 12 +91 gpu relu fp16 12 +92 gpu conv fp16 12 add fp16 1 +93 gpu batchnorm fp16 12 +94 gpu relu fp16 12 +95 gpu conv fp16 11 add fp16 1 +96 gpu batchnorm fp16 12 +97 gpu add fp16 12 +98 gpu relu fp16 12 +99 gpu conv fp16 12 add fp16 1 +100 gpu batchnorm fp16 12 +101 gpu relu fp16 12 +102 gpu conv perf_fp16 164 add fp16 1 +103 gpu batchnorm fp16 12 +104 gpu relu fp16 12 +105 gpu conv fp16 12 add fp16 1 +106 gpu batchnorm fp16 12 +107 gpu add fp16 12 +108 gpu relu fp16 12 +109 gpu conv fp16 12 add fp16 1 +110 gpu batchnorm fp16 12 +111 gpu relu fp16 12 +112 gpu conv fp16 12 add fp16 1 +113 gpu batchnorm fp16 12 +114 gpu relu fp16 12 +115 gpu conv fp16 12 add fp16 1 +116 gpu batchnorm fp16 12 +117 gpu add fp16 12 +118 gpu relu fp16 12 +119 gpu conv samp_fp16 268 add fp16 1 +120 gpu batchnorm fp16 12 +121 gpu relu fp16 12 +122 gpu conv fp16 12 add fp16 1 +123 gpu batchnorm fp16 12 +124 gpu relu fp16 12 +125 gpu conv fp16 12 add fp16 1 +126 gpu batchnorm fp16 12 +127 gpu add fp16 12 +128 gpu relu fp16 12 +129 gpu conv fp16 12 add fp16 1 +130 gpu batchnorm fp16 12 +131 gpu relu fp16 12 +132 gpu conv fp16 12 add fp16 1 +133 gpu batchnorm fp16 12 +134 gpu relu fp16 12 +135 gpu conv perf_fp16 158 add fp16 1 +136 gpu batchnorm fp16 12 +137 gpu add fp16 12 +138 gpu relu fp16 12 +139 gpu conv fp16 12 add fp16 1 +140 gpu batchnorm fp16 12 +141 gpu relu fp16 12 +142 gpu conv fp16 12 add fp16 1 +143 gpu batchnorm fp16 12 +144 gpu relu fp16 12 +145 gpu conv fp16 12 add fp16 1 +146 gpu batchnorm fp16 12 +147 gpu conv fp16 12 add fp16 1 +148 gpu batchnorm fp16 12 +149 gpu add fp16 12 +150 gpu relu fp16 12 +151 gpu conv fp16 12 add fp16 1 +152 gpu batchnorm fp16 12 +153 gpu relu fp16 12 +154 gpu conv fp16 12 add fp16 1 +155 gpu batchnorm fp16 12 +156 gpu relu fp16 12 +157 gpu conv fp16 11 add fp16 1 +158 gpu batchnorm fp16 12 +159 gpu add fp16 12 +160 gpu relu fp16 12 +161 gpu conv fp16 12 add fp16 1 +162 gpu batchnorm fp16 12 +163 gpu relu fp16 12 +164 gpu conv fp16 12 add fp16 1 +165 gpu batchnorm fp16 12 +166 gpu relu fp16 12 +167 gpu conv fp16 12 add fp16 1 +168 gpu batchnorm fp16 12 +169 gpu add fp16 12 +170 gpu relu fp16 12 +171 gpu pool_max fp16 12 +172 gpu mul fp16 12 add fp16 1 +173 gpu softmax fp32 1 +----- diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/vgg16_cifar10/vgg16_cifar10.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/vgg16_cifar10/vgg16_cifar10.txt new file mode 100644 index 0000000000000000000000000000000000000000..f4e185f358dbd2282b14c0865d829903d2d270e9 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/vgg16_cifar10/vgg16_cifar10.txt @@ -0,0 +1,58 @@ +3776.508929999999 ++++++ +conf1 1 1 89.96 0.0 +1 gpu conv fp32 11 add fp32 1 relu fp32 1 +2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp32 11 add fp32 1 relu fp32 1 +4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp32 11 add fp32 1 relu fp32 1 +6 gpu conv fp32 11 add fp32 1 relu fp32 1 +7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv fp32 11 add fp32 1 relu fp32 1 +9 gpu conv fp32 11 add fp32 1 relu fp32 1 +10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +11 gpu conv fp32 11 add fp32 1 relu fp32 1 +12 gpu conv fp32 11 add fp32 1 relu fp32 1 +13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +14 gpu mul fp32 11 add fp32 1 relu fp32 1 +15 gpu mul fp32 11 add fp32 1 +16 gpu softmax fp32 1 +----- ++++++ +conf2 2.4192803184847484 2.2393153800931898 89.22 0.7399999999999949 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 +2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv samp_fp16 266 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 12 add fp16 1 relu fp16 1 +6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 +7 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 +9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 +10 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 +12 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 +13 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 12 add fp16 1 relu fp16 1 +15 gpu mul fp16 12 add fp16 1 +16 gpu softmax fp32 1 +----- ++++++ +conf3 2.1240075032467187 1.9749367321301132 88.64 1.3199999999999932 +1 gpu conv fp16 11 add fp16 1 relu fp16 1 +2 gpu conv perf_fp16 167 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 12 add fp16 1 relu fp16 1 +6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 +7 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf_fp16 152 add fp16 1 relu fp16 1 +9 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 +10 gpu conv fp16 11 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 +12 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 +13 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 12 add fp16 1 relu fp16 1 +15 gpu mul fp16 12 add fp16 1 +16 gpu softmax fp32 1 +----- diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/vgg16_cifar100/vgg16_cifar100.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/vgg16_cifar100/vgg16_cifar100.txt new file mode 100644 index 0000000000000000000000000000000000000000..b55bb668b140ebcc9ee911f728726afed7274f85 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/vgg16_cifar100/vgg16_cifar100.txt @@ -0,0 +1,77 @@ +3768.819777999999 ++++++ +conf1 1 1 66.5 0.0 +1 gpu conv fp32 11 add fp32 1 relu fp32 1 +2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp32 11 add fp32 1 relu fp32 1 +4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp32 11 add fp32 1 relu fp32 1 +6 gpu conv fp32 11 add fp32 1 relu fp32 1 +7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv fp32 11 add fp32 1 relu fp32 1 +9 gpu conv fp32 11 add fp32 1 relu fp32 1 +10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +11 gpu conv fp32 11 add fp32 1 relu fp32 1 +12 gpu conv fp32 11 add fp32 1 relu fp32 1 +13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +14 gpu mul fp32 11 add fp32 1 relu fp32 1 +15 gpu mul fp32 11 add fp32 1 +16 gpu softmax fp32 1 +----- ++++++ +conf2 2.2793321208062913 2.0502797911533945 66.42 0.0799999999999983 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 +2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 12 add fp16 1 relu fp16 1 +6 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 +7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 12 add fp16 1 relu fp16 1 +9 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 +10 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 +12 gpu conv fp16 11 add fp16 1 relu fp16 1 +13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 12 add fp16 1 relu fp16 1 +15 gpu mul fp16 12 add fp16 1 +16 gpu softmax fp32 1 +----- ++++++ +conf3 2.2793321208062913 2.0502797911533945 66.42 0.0799999999999983 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 +2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 12 add fp16 1 relu fp16 1 +6 gpu conv samp_fp16 269 add fp16 1 relu fp16 1 +7 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 12 add fp16 1 relu fp16 1 +9 gpu conv samp_fp16 267 add fp16 1 relu fp16 1 +10 gpu conv samp_fp16 268 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 +12 gpu conv fp16 11 add fp16 1 relu fp16 1 +13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 12 add fp16 1 relu fp16 1 +15 gpu mul fp16 12 add fp16 1 +16 gpu softmax fp32 1 +----- ++++++ +conf4 2.664296720624579 2.427276363573644 64.7 1.7999999999999972 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 +2 gpu conv perf_fp16 153 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 +4 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 12 add fp16 1 relu fp16 1 +6 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 +7 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv perf_fp16 155 add fp16 1 relu fp16 1 +9 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 +10 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv perf_fp16 151 add fp16 1 relu fp16 1 +12 gpu conv samp_fp16 261 add fp16 1 relu fp16 1 +13 gpu conv samp_fp16 262 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 12 add fp16 1 relu fp16 1 +15 gpu mul fp16 12 add fp16 1 +16 gpu softmax fp32 1 +----- diff --git a/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/vgg16_imagenet/vgg16_imagenet.txt b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/vgg16_imagenet/vgg16_imagenet.txt new file mode 100644 index 0000000000000000000000000000000000000000..d0a23ffb10367c45ab76e4477f29932a5431e68b --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/PPoPP_results/soc_sim_results/emp_time/vgg16_imagenet/vgg16_imagenet.txt @@ -0,0 +1,41 @@ +19194.623482 ++++++ +conf1 1 1 72.84 0.0 +1 gpu conv fp32 11 add fp32 1 relu fp32 1 +2 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +3 gpu conv fp32 11 add fp32 1 relu fp32 1 +4 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +5 gpu conv fp32 11 add fp32 1 relu fp32 1 +6 gpu conv fp32 11 add fp32 1 relu fp32 1 +7 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +8 gpu conv fp32 11 add fp32 1 relu fp32 1 +9 gpu conv fp32 11 add fp32 1 relu fp32 1 +10 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +11 gpu conv fp32 11 add fp32 1 relu fp32 1 +12 gpu conv fp32 11 add fp32 1 relu fp32 1 +13 gpu conv fp32 11 add fp32 1 relu fp32 1 pool_max fp32 1 +14 gpu mul fp32 11 add fp32 1 relu fp32 1 +15 gpu mul fp32 11 add fp32 1 relu fp32 1 +16 gpu mul fp32 11 add fp32 1 +17 gpu softmax fp32 1 +----- ++++++ +conf2 1.7719381411481732 1.5850925672384186 72.84 0.0 +1 gpu conv fp16 12 add fp16 1 relu fp16 1 +2 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +3 gpu conv fp16 12 add fp16 1 relu fp16 1 +4 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +5 gpu conv fp16 12 add fp16 1 relu fp16 1 +6 gpu conv fp16 12 add fp16 1 relu fp16 1 +7 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +8 gpu conv fp16 12 add fp16 1 relu fp16 1 +9 gpu conv fp16 12 add fp16 1 relu fp16 1 +10 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +11 gpu conv fp16 12 add fp16 1 relu fp16 1 +12 gpu conv fp16 12 add fp16 1 relu fp16 1 +13 gpu conv fp16 12 add fp16 1 relu fp16 1 pool_max fp16 1 +14 gpu mul fp16 12 add fp16 1 relu fp16 1 +15 gpu mul fp16 12 add fp16 1 relu fp16 1 +16 gpu mul fp16 12 add fp16 1 +17 gpu softmax fp32 1 +-----