diff --git a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt index 820d41b9745e9893d4052ae500b7940aa05f0f7d..4f24d164c1a3a3a8081b9d98cd70977e81055dca 100644 --- a/llvm/projects/hpvm-tensor-rt/CMakeLists.txt +++ b/llvm/projects/hpvm-tensor-rt/CMakeLists.txt @@ -129,11 +129,11 @@ target_link_libraries(fc2_cpu tensor_cpu_runtime ${GPU_PROFILER_LIB} ${SOC_SIMU add_executable(lenet_keras dnn_sources/src/lenet_keras.cc) target_link_libraries(lenet_keras tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(alexnet_cifar10 dnn_sources/src/alexnet_cifar10_front.cc) +add_executable(alexnet_cifar10 dnn_sources/src/alexnet_cifar10.cc) target_link_libraries(alexnet_cifar10 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(alexnet_cifar10_tuner dnn_sources/src/alexnet_cifar10_tuner.cc) -target_link_libraries(alexnet_cifar10_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +#-- add_executable(alexnet_cifar10_tuner dnn_sources/src/alexnet_cifar10_tuner.cc) +#-- target_link_libraries(alexnet_cifar10_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) #add_executable(alexnet_cifar10_approx dnn_sources/src/alexnet_cifar10_approx.cc) #target_link_libraries(alexnet_cifar10_approx tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) @@ -141,14 +141,14 @@ target_link_libraries(alexnet_cifar10_tuner tensor_runtime ${GPU_PROFILER_LIB} add_executable(alexnet2_cifar10 dnn_sources/src/alexnet2_cifar10.cc) target_link_libraries(alexnet2_cifar10 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(alexnet2_cifar10_tuner dnn_sources/src/alexnet2_cifar10_tuner.cc) -target_link_libraries(alexnet2_cifar10_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +#-- add_executable(alexnet2_cifar10_tuner dnn_sources/src/alexnet2_cifar10_tuner.cc) +#-- target_link_libraries(alexnet2_cifar10_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) add_executable(vgg16_cifar10 dnn_sources/src/vgg16_cifar10.cc) target_link_libraries(vgg16_cifar10 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(vgg16_cifar10_tuner dnn_sources/src/vgg16_cifar10_tuner.cc) -target_link_libraries(vgg16_cifar10_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +#-- add_executable(vgg16_cifar10_tuner dnn_sources/src/vgg16_cifar10_tuner.cc) +#-- target_link_libraries(vgg16_cifar10_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) add_executable(resnet18_cifar10 dnn_sources/src/resnet18_cifar10.cc) target_link_libraries(resnet18_cifar10 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) @@ -159,29 +159,26 @@ target_link_libraries(resnet18_cifar10 tensor_runtime ${GPU_PROFILER_LIB} ${SOC #add_executable(resnet18_cifar10_inputapprox dnn_sources/src/resnet18_cifar10_inputapprox.cc) #target_link_libraries(resnet18_cifar10_inputapprox tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(resnet18_cifar10_tuner dnn_sources/src/resnet18_cifar10_tuner.cc) -target_link_libraries(resnet18_cifar10_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +#-- add_executable(resnet18_cifar10_tuner dnn_sources/src/resnet18_cifar10_tuner.cc) +#-- target_link_libraries(resnet18_cifar10_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) add_executable(vgg16_cifar100 dnn_sources/src/vgg16_cifar100.cc) target_link_libraries(vgg16_cifar100 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(vgg16_cifar100_tuner dnn_sources/src/vgg16_cifar100_tuner.cc) -target_link_libraries(vgg16_cifar100_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +#-- add_executable(vgg16_cifar100_tuner dnn_sources/src/vgg16_cifar100_tuner.cc) +#-- target_link_libraries(vgg16_cifar100_tuner tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(vgg16_cifar100_top5 dnn_sources/src/vgg16_cifar100_5.cc) -target_link_libraries(vgg16_cifar100_top5 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +#-- add_executable(vgg16_cifar100_top5 dnn_sources/src/vgg16_cifar100_5.cc) +#-- target_link_libraries(vgg16_cifar100_top5 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) # REF binaries -add_executable(mobilenet_cifar10 dnn_sources/src/mobilenet_cifar10.cc) -target_link_libraries(mobilenet_cifar10 tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(mobilenet_depthwise dnn_sources/src/mobilenet_depthwise.cc) +add_executable(mobilenet_depthwise dnn_sources/src/mobilenet.cc) target_link_libraries(mobilenet_depthwise tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) - -add_executable(mobilenet_cifar10_shallow dnn_sources/src/mobilenet_cifar10_shallow.cc) -target_link_libraries(mobilenet_cifar10_shallow tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +#-- add_executable(mobilenet_cifar10_shallow dnn_sources/src/mobilenet_cifar10_shallow.cc) +#-- target_link_libraries(mobilenet_cifar10_shallow tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) add_executable(mobilenet_shallow_depthwise dnn_sources/src/mobilenet_shallow_depthwise.cc) target_link_libraries(mobilenet_shallow_depthwise tensor_runtime ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) @@ -189,8 +186,8 @@ target_link_libraries(mobilenet_shallow_depthwise tensor_runtime ${GPU_PROFILER add_executable(resnet_imagenet dnn_sources/src/resnet_imagenet.cc) target_link_libraries(resnet_imagenet tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) -add_executable(mobilenet_imagenet dnn_sources/src/mobilenet_imagenet.cc) -target_link_libraries(mobilenet_imagenet tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) +#add_executable(mobilenet_imagenet dnn_sources/src/mobilenet_imagenet.cc) +#target_link_libraries(mobilenet_imagenet tensor_runtime_online ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB}) diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc index fe71eb14caedba8d5813bbb0fa7feadcf0c72950..ee7f50bed8dd2dfccf00489f5fcca6b2aa941595 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet2_cifar10.cc @@ -59,8 +59,8 @@ void testCifarNet(){ startMemTracking(); - int test_input_size = 10000; - int batch_size = 2500; + int test_input_size = 1000; + int batch_size = 1000; int batch_count = test_input_size / batch_size; float final_accuracy = 0.0; @@ -121,6 +121,7 @@ void testCifarNet(){ float accuracy = computeAccuracy2(labels, batch_size, result); final_accuracy += accuracy; + freeBatchMemory(); } diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc index 3e5cec7d0760252ebff1b31293a51bdf570415f4..7d2da0ce7fdef3b76d26c1d9d4a2050f3a16a692 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10.cc @@ -1,196 +1,106 @@ +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> +int main(){ + llvm_hpvm_initTensorRt(0); -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -bool Opentuner_run = false; - - -/* NOTE: Reference Architecture to use for profiling */ -void testCifarNet(){ - - int total_runs = 100; - if(Opentuner_run){ - total_runs = 1000000; - } - - - printf("********* CIFAR-10 DNN ********** \n"); - // FIXIT: Extend this to batch of images - currently 5 images - - int test_batch_size = 5000; - - //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size); - uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size); - - void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin", - float_type, - test_batch_size, 3, 32, 32); - - void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin", - float_type, 64, 3, 11, 11); - void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin", - float_type, 1, 64, 1, 1); - void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin", - float_type, 192, 64, 5, 5); - void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin", - float_type, 1, 192, 1, 1); - - void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin", - float_type, 384, 192, 3, 3); - void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin", - float_type, 1, 384, 1, 1); - void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin", - float_type, 256, 384, 3, 3); - void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin", - float_type, 1, 256, 1, 1); - void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin", - float_type, 256, 256, 3, 3); - void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin", - float_type, 1, 256, 1, 1); - - void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin", - float_type, 1, 1, 4096, 10); - void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin", - float_type, 1, 10, 1, 1); + //std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); + std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); + std::string input_path = dir_prefix + std::string("input.bin"); + //void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); + std::string labels_path = dir_prefix + std::string("labels.bin"); + //uint8_t* labels = readLabels(labels_path.c_str(),10000); + std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); + void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); + std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); + void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); + std::string conv2d_2_w_path = dir_prefix + std::string("conv3.bin"); + void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); + std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias3.bin"); + void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); + std::string conv2d_3_w_path = dir_prefix + std::string("conv6.bin"); + void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); + std::string conv2d_3_b_path = dir_prefix + std::string("conv_bias6.bin"); + void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); + std::string conv2d_4_w_path = dir_prefix + std::string("conv7.bin"); + void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); + std::string conv2d_4_b_path = dir_prefix + std::string("conv_bias7.bin"); + void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); + std::string conv2d_5_w_path = dir_prefix + std::string("conv8.bin"); + void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); + std::string conv2d_5_b_path = dir_prefix + std::string("conv_bias8.bin"); + void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); + std::string dense_1_w_path = dir_prefix + std::string("fc12.bin"); + void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); + std::string dense_1_b_path = dir_prefix + std::string("fc_bias12.bin"); + void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); + + + startMemTracking(); + + int test_input_size = 1000; + int batch_size = 1000; + int batch_count = test_input_size / batch_size; + float final_accuracy = 0.0; + + // NOTE: Starting time profiling + startProfiling(); - clearTensorMap(); - - for(int i = 0; i < total_runs; i++){ - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd = open(myfifo, O_RDONLY); - - int ret_val = fcntl(fd, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - char str[100]; - read(fd, str, 80); - if(strcmp(str, "stop_run") == 0){ - abort(); - } - - close(fd); - } - - - readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters - - // Start power and performnce profiling - startProfiling(); - - int conv_mode = 1; // NOTE: using CROSS_CORRELATION - int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum - - // NOTE: 'SAME' convolution - void* conv1out = tensorConvolution(input, conv1_filter, 5, 5, 1, 1, - conv_mode, conv_precision); - - tensorAdd(conv1out, conv1_bias); // NOTE: In place operation - - void* conv1_tanh = tensorTanh(conv1out); - - void* pool1out = tensorPooling(conv1_tanh, 0, 2, 2, 0, 0, 2, 2); - - // 2nd Layer - void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv2out, conv2_bias); // NOTE: In place operation - - void* conv2_tanh = tensorTanh(conv2out); - - void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); - - - // 3rd Layer - void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv3out, conv3_bias); // NOTE: In place operation - - void* conv3_tanh = tensorTanh(conv3out); - - // 4th Layer - void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv4out, conv4_bias); // NOTE: In place operation - - void* conv4_tanh = tensorTanh(conv4out); - - // 5th Layer - void* conv5out = tensorConvolution(conv4_tanh, conv5_filter, 1, 1, 1, 1, - conv_mode, conv_precision); - tensorAdd(conv5out, conv5_bias); // NOTE: In place operation - - void* conv5_tanh = tensorTanh(conv5out); - - void* pool5out = tensorPooling(conv5_tanh, 0, 2, 2, 0, 0, 2, 2); - - // final FC Layer - void* gemm1out = tensorGemmGPU(pool5out, fc1_weights); - - void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); - - void* result = tensorSoftmax(gemm1biasout); - - printTensorDims(result); - - // End profiling and dump output to profile.txt - stopProfiling(); - - computeAccuracy2(labels, test_batch_size, result); - - dumpAccuracyNorms(); - freeOutputTensors(); - - if(Opentuner_run){ - - const char* myfifo = "/tmp/myfifo"; - int fd_out = open(myfifo, O_WRONLY); - int ret_val = fcntl(fd_out, F_GETFD); - if(ret_val == -1){ - printf("Invalid descriptor \n"); - abort(); - } - - const char* str = "completed***!\n\0"; - write(fd_out, str, 80); - close(fd_out); - } + for(int i = 0; i < batch_count; i++){ + + int start = i * batch_size; + int end = (i + 1) * batch_size; + void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); + + void* var_0 = tensorConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0); + void* var_1 = tensorAdd(var_0, conv2d_1_b); + void* var_2 = tensorTanh(var_1); + void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); + void* var_5 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); + void* var_6 = tensorAdd(var_5, conv2d_2_b); + void* var_7 = tensorTanh(var_6); + void* var_8 = tensorPooling(var_7,0,2,2,0,0,2,2); + void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); + void* var_11 = tensorAdd(var_10, conv2d_3_b); + void* var_12 = tensorTanh(var_11); + void* var_13 = tensorConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0); + void* var_14 = tensorAdd(var_13, conv2d_4_b); + void* var_15 = tensorTanh(var_14); + void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); + void* var_17 = tensorAdd(var_16, conv2d_5_b); + void* var_18 = tensorTanh(var_17); + void* var_19 = tensorPooling(var_18,0,2,2,0,0,2,2); + void* var_22 = tensorGemmGPU(var_19, dense_1_w); + void* var_23 = tensorAdd(var_22, dense_1_b); + void* var_24 = tensorSoftmax(var_23); + + uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); + + float accuracy = computeAccuracy2(labels,batch_size,var_24); + final_accuracy += accuracy; + freeBatchMemory(); } + stopProfiling(); - -} - + final_accuracy = final_accuracy / batch_count; + dumpFinalAccuracy(final_accuracy); -int main(int argc, char* argv[]){ - if(argc > 1) - Opentuner_run = true; + llvm_hpvm_cleanupTensorRt(); - llvm_hpvm_initTensorRt(1); + return 0; - testCifarNet(); - - llvm_hpvm_cleanupTensorRt(); - - return 0; } - diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10_front.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10_front.cc deleted file mode 100644 index 84510c5342811eb20c8c7e834f4fcf34d5561ccb..0000000000000000000000000000000000000000 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10_front.cc +++ /dev/null @@ -1,104 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - - llvm_hpvm_initTensorRt(0); - - - std::string dir_prefix = std::string("../model_params/alexnet_cifar10_front/"); - std::string input_path = dir_prefix + std::string("input.bin"); - //void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - //uint8_t* labels = readLabels(labels_path.c_str(),10000); - std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv3.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias3.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv6.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv_bias6.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv7.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv_bias7.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv8.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv_bias8.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("fc12.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); - std::string dense_1_b_path = dir_prefix + std::string("fc_bias12.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - startMemTracking(); - - int test_input_size = 10000; - int batch_size = 2500; - int batch_count = test_input_size / batch_size; - float final_accuracy = 0.0; - - // NOTE: Starting time profiling - startProfiling(); - - for(int i = 0; i < batch_count; i++){ - - int start = i * batch_size; - int end = (i + 1) * batch_size; - void* input = readInputBatch(input_path.c_str(), 0,start,end,3,32,32); - - void* var_0 = tensorConvolution(input, conv2d_1_w, 5, 5, 1, 1, 1, 0); - void* var_1 = tensorAdd(var_0, conv2d_1_b); - void* var_2 = tensorTanh(var_1); - void* var_3 = tensorPooling(var_2,0,2,2,0,0,2,2); - void* var_5 = tensorConvolution(var_3, conv2d_2_w, 2, 2, 1, 1, 1, 0); - void* var_6 = tensorAdd(var_5, conv2d_2_b); - void* var_7 = tensorTanh(var_6); - void* var_8 = tensorPooling(var_7,0,2,2,0,0,2,2); - void* var_10 = tensorConvolution(var_8, conv2d_3_w, 1, 1, 1, 1, 1, 0); - void* var_11 = tensorAdd(var_10, conv2d_3_b); - void* var_12 = tensorTanh(var_11); - void* var_13 = tensorConvolution(var_12, conv2d_4_w, 1, 1, 1, 1, 1, 0); - void* var_14 = tensorAdd(var_13, conv2d_4_b); - void* var_15 = tensorTanh(var_14); - void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); - void* var_17 = tensorAdd(var_16, conv2d_5_b); - void* var_18 = tensorTanh(var_17); - void* var_19 = tensorPooling(var_18,0,2,2,0,0,2,2); - void* var_22 = tensorGemmGPU(var_19, dense_1_w); - void* var_23 = tensorAdd(var_22, dense_1_b); - void* var_24 = tensorSoftmax(var_23); - - uint8_t* labels = readLabelsBatch(labels_path.c_str(), start, end); - - float accuracy = computeAccuracy2(labels,batch_size,var_24); - final_accuracy += accuracy; - - freeBatchMemory(); - } - - stopProfiling(); - - final_accuracy = final_accuracy / batch_count; - dumpFinalAccuracy(final_accuracy); - - - llvm_hpvm_cleanupTensorRt(); - - return 0; - -} diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10_old.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10_old.cc new file mode 100644 index 0000000000000000000000000000000000000000..3e5cec7d0760252ebff1b31293a51bdf570415f4 --- /dev/null +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/alexnet_cifar10_old.cc @@ -0,0 +1,196 @@ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <string.h> + + +#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../include/utils.h" + + +bool Opentuner_run = false; + + +/* NOTE: Reference Architecture to use for profiling */ +void testCifarNet(){ + + int total_runs = 100; + if(Opentuner_run){ + total_runs = 1000000; + } + + + printf("********* CIFAR-10 DNN ********** \n"); + // FIXIT: Extend this to batch of images - currently 5 images + + int test_batch_size = 5000; + + //uint8_t* labels = readLabels("../model_params/cifar_keras/labels.bin", test_batch_size); + uint8_t* labels = readLabels("../model_params/alexnet_cifar10/test_labels.bin", test_batch_size); + + void* input = readTrainedWeights("../model_params/alexnet_cifar10/norm_cifar_input.bin", + float_type, + test_batch_size, 3, 32, 32); + + void* conv1_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv1.bin", + float_type, 64, 3, 11, 11); + void* conv1_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv1_bias.bin", + float_type, 1, 64, 1, 1); + void* conv2_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv2.bin", + float_type, 192, 64, 5, 5); + void* conv2_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv2_bias.bin", + float_type, 1, 192, 1, 1); + + void* conv3_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv3.bin", + float_type, 384, 192, 3, 3); + void* conv3_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv3_bias.bin", + float_type, 1, 384, 1, 1); + void* conv4_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv4.bin", + float_type, 256, 384, 3, 3); + void* conv4_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv4_bias.bin", + float_type, 1, 256, 1, 1); + void* conv5_filter = readTrainedWeights("../model_params/alexnet_cifar10/conv5.bin", + float_type, 256, 256, 3, 3); + void* conv5_bias = readTrainedWeights("../model_params/alexnet_cifar10/conv5_bias.bin", + float_type, 1, 256, 1, 1); + + void* fc1_weights = readTrainedWeights("../model_params/alexnet_cifar10/fc1.bin", + float_type, 1, 1, 4096, 10); + void* fc1_bias = readTrainedWeights("../model_params/alexnet_cifar10/fc1_bias.bin", + float_type, 1, 10, 1, 1); + + + clearTensorMap(); + + for(int i = 0; i < total_runs; i++){ + + if(Opentuner_run){ + + const char* myfifo = "/tmp/myfifo"; + int fd = open(myfifo, O_RDONLY); + + int ret_val = fcntl(fd, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + char str[100]; + read(fd, str, 80); + if(strcmp(str, "stop_run") == 0){ + abort(); + } + + close(fd); + } + + + readOpenTunerFlags("opentuner_flags"); // Resets the OpenTuner counters + + // Start power and performnce profiling + startProfiling(); + + int conv_mode = 1; // NOTE: using CROSS_CORRELATION + int conv_precision = 0; // NOTE: using Float as compute precision. FIXIT: use enum + + // NOTE: 'SAME' convolution + void* conv1out = tensorConvolution(input, conv1_filter, 5, 5, 1, 1, + conv_mode, conv_precision); + + tensorAdd(conv1out, conv1_bias); // NOTE: In place operation + + void* conv1_tanh = tensorTanh(conv1out); + + void* pool1out = tensorPooling(conv1_tanh, 0, 2, 2, 0, 0, 2, 2); + + // 2nd Layer + void* conv2out = tensorConvolution(pool1out, conv2_filter, 2, 2, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv2out, conv2_bias); // NOTE: In place operation + + void* conv2_tanh = tensorTanh(conv2out); + + void* pool2out = tensorPooling(conv2_tanh, 0, 2, 2, 0, 0, 2, 2); + + + // 3rd Layer + void* conv3out = tensorConvolution(pool2out, conv3_filter, 1, 1, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv3out, conv3_bias); // NOTE: In place operation + + void* conv3_tanh = tensorTanh(conv3out); + + // 4th Layer + void* conv4out = tensorConvolution(conv3_tanh, conv4_filter, 1, 1, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv4out, conv4_bias); // NOTE: In place operation + + void* conv4_tanh = tensorTanh(conv4out); + + // 5th Layer + void* conv5out = tensorConvolution(conv4_tanh, conv5_filter, 1, 1, 1, 1, + conv_mode, conv_precision); + tensorAdd(conv5out, conv5_bias); // NOTE: In place operation + + void* conv5_tanh = tensorTanh(conv5out); + + void* pool5out = tensorPooling(conv5_tanh, 0, 2, 2, 0, 0, 2, 2); + + // final FC Layer + void* gemm1out = tensorGemmGPU(pool5out, fc1_weights); + + void* gemm1biasout = tensorAdd(gemm1out, fc1_bias); + + void* result = tensorSoftmax(gemm1biasout); + + printTensorDims(result); + + // End profiling and dump output to profile.txt + stopProfiling(); + + computeAccuracy2(labels, test_batch_size, result); + + dumpAccuracyNorms(); + freeOutputTensors(); + + if(Opentuner_run){ + + const char* myfifo = "/tmp/myfifo"; + int fd_out = open(myfifo, O_WRONLY); + int ret_val = fcntl(fd_out, F_GETFD); + if(ret_val == -1){ + printf("Invalid descriptor \n"); + abort(); + } + + const char* str = "completed***!\n\0"; + write(fd_out, str, 80); + close(fd_out); + } + + } + + + +} + + +int main(int argc, char* argv[]){ + + if(argc > 1) + Opentuner_run = true; + + llvm_hpvm_initTensorRt(1); + + testCifarNet(); + + llvm_hpvm_cleanupTensorRt(); + + return 0; +} + diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet.cc index ba7af9846916057fedc05757bdad77fefb01590e..107024c81a7d8124a46528f7a59fac5af340bcac 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet.cc @@ -1,4 +1,5 @@ + #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -6,15 +7,15 @@ #include <sys/types.h> #include <sys/stat.h> #include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../../tensor_runtime/include/tensor_runtime.h" #include "../include/utils.h" int main(){ - llvm_hpvm_initTensorRt(1); + llvm_hpvm_initTensorRt(0); - std::string dir_prefix = std::string("../model_params/mobilenet_hpvm_3/"); + std::string dir_prefix = std::string("../model_params/mobilenet/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -296,8 +297,8 @@ int main(){ startMemTracking(); - int test_input_size = 3000; - int batch_size = 1000; + int test_input_size = 5000; + int batch_size = 2500; int batch_count = test_input_size / batch_size; float final_accuracy = 0.0; @@ -311,95 +312,95 @@ int main(){ void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); void* var_2 = tensorRelu(var_1); - void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_5 = tensorRelu(var_4); - void* var_6 = tensorConvolution(var_5, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_8 = tensorRelu(var_7); - void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_11 = tensorRelu(var_10); - void* var_12 = tensorConvolution(var_11, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_14 = tensorRelu(var_13); - void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); - void* var_17 = tensorRelu(var_16); - void* var_18 = tensorConvolution(var_17, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); + void* var_4 = tensorConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); + void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); + void* var_6 = tensorRelu(var_5); + void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); + void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); + void* var_9 = tensorRelu(var_8); + void* var_11 = tensorConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); + void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); + void* var_13 = tensorRelu(var_12); + void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); + void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); + void* var_16 = tensorRelu(var_15); + void* var_18 = tensorConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); + void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); void* var_20 = tensorRelu(var_19); - void* var_22 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_23 = tensorBatchNorm(var_22, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_24 = tensorRelu(var_23); - void* var_25 = tensorConvolution(var_24, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_26 = tensorBatchNorm(var_25, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_27 = tensorRelu(var_26); - void* var_28 = tensorConvolution(var_27, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_29 = tensorBatchNorm(var_28, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_30 = tensorRelu(var_29); - void* var_31 = tensorConvolution(var_30, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_32 = tensorBatchNorm(var_31, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_33 = tensorRelu(var_32); - void* var_35 = tensorConvolution(var_33, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_36 = tensorBatchNorm(var_35, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); - void* var_37 = tensorRelu(var_36); - void* var_38 = tensorConvolution(var_37, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_39 = tensorBatchNorm(var_38, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); - void* var_40 = tensorRelu(var_39); - void* var_41 = tensorConvolution(var_40, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); + void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); + void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); + void* var_23 = tensorRelu(var_22); + void* var_26 = tensorConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); + void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); + void* var_28 = tensorRelu(var_27); + void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); + void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); + void* var_31 = tensorRelu(var_30); + void* var_33 = tensorConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); + void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); + void* var_35 = tensorRelu(var_34); + void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); + void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); + void* var_38 = tensorRelu(var_37); + void* var_41 = tensorConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); + void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); + void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); + void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); void* var_46 = tensorRelu(var_45); - void* var_47 = tensorConvolution(var_46, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_48 = tensorBatchNorm(var_47, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_49 = tensorRelu(var_48); - void* var_50 = tensorConvolution(var_49, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_51 = tensorBatchNorm(var_50, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_52 = tensorRelu(var_51); - void* var_54 = tensorConvolution(var_52, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_55 = tensorBatchNorm(var_54, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); - void* var_56 = tensorRelu(var_55); - void* var_57 = tensorConvolution(var_56, conv2d_10_w, 0, 0, 1, 1, 1, 1); - void* var_58 = tensorBatchNorm(var_57, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); - void* var_59 = tensorRelu(var_58); - void* var_60 = tensorConvolution(var_59, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_61 = tensorBatchNorm(var_60, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_62 = tensorRelu(var_61); - void* var_63 = tensorConvolution(var_62, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_64 = tensorBatchNorm(var_63, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); + void* var_48 = tensorConvCutlass(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); + void* var_49 = tensorBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); + void* var_50 = tensorRelu(var_49); + void* var_51 = tensorConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); + void* var_52 = tensorBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); + void* var_53 = tensorRelu(var_52); + void* var_55 = tensorConvCutlass(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); + void* var_56 = tensorBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); + void* var_57 = tensorRelu(var_56); + void* var_58 = tensorConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); + void* var_59 = tensorBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); + void* var_60 = tensorRelu(var_59); + void* var_63 = tensorConvCutlass(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); + void* var_64 = tensorBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); void* var_65 = tensorRelu(var_64); - void* var_66 = tensorConvolution(var_65, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); + void* var_66 = tensorConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); + void* var_67 = tensorBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); void* var_68 = tensorRelu(var_67); - void* var_69 = tensorConvolution(var_68, conv2d_12_w, 0, 0, 1, 1, 1, 1); - void* var_70 = tensorBatchNorm(var_69, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_71 = tensorRelu(var_70); - void* var_73 = tensorConvolution(var_71, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_74 = tensorBatchNorm(var_73, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); + void* var_70 = tensorConvCutlass(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); + void* var_71 = tensorBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); + void* var_72 = tensorRelu(var_71); + void* var_73 = tensorConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); + void* var_74 = tensorBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); void* var_75 = tensorRelu(var_74); - void* var_76 = tensorConvolution(var_75, conv2d_13_w, 0, 0, 1, 1, 1, 1); - void* var_77 = tensorBatchNorm(var_76, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_78 = tensorRelu(var_77); - void* var_79 = tensorConvolution(var_78, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_80 = tensorBatchNorm(var_79, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_81 = tensorRelu(var_80); - void* var_82 = tensorConvolution(var_81, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_83 = tensorBatchNorm(var_82, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_84 = tensorRelu(var_83); - void* var_86 = tensorPooling(var_84,1,2,2,0,0,2,2); - void* var_88 = tensorGemmGPU(var_86, dense_1_w); - void* var_89 = tensorAdd(var_88, dense_1_b); - void* var_90 = tensorSoftmax(var_89); + void* var_77 = tensorConvCutlass(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); + void* var_78 = tensorBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); + void* var_79 = tensorRelu(var_78); + void* var_80 = tensorConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); + void* var_81 = tensorBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); + void* var_82 = tensorRelu(var_81); + void* var_85 = tensorConvCutlass(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); + void* var_86 = tensorBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); + void* var_87 = tensorRelu(var_86); + void* var_88 = tensorConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); + void* var_89 = tensorBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); + void* var_90 = tensorRelu(var_89); + void* var_92 = tensorConvCutlass(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); + void* var_93 = tensorBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); + void* var_94 = tensorRelu(var_93); + void* var_95 = tensorConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); + void* var_96 = tensorBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); + void* var_97 = tensorRelu(var_96); + void* var_99 = tensorPooling(var_97,1,2,2,0,0,2,2); + void* var_101 = tensorGemmGPU(var_99, dense_1_w); + void* var_102 = tensorAdd(var_101, dense_1_b); + void* var_103 = tensorSoftmax(var_102); uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - float accuracy = computeAccuracy2(labels, batch_size, var_90); + float accuracy = computeAccuracy2(labels, batch_size, var_103); final_accuracy += accuracy; freeBatchMemory(); - + } final_accuracy = final_accuracy / batch_count; diff --git a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet_depthwise.cc b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet_old.cc similarity index 88% rename from llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet_depthwise.cc rename to llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet_old.cc index 107024c81a7d8124a46528f7a59fac5af340bcac..ba7af9846916057fedc05757bdad77fefb01590e 100644 --- a/llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet_depthwise.cc +++ b/llvm/projects/hpvm-tensor-rt/dnn_sources/src/mobilenet_old.cc @@ -1,5 +1,4 @@ - #include <stdio.h> #include <stdlib.h> #include <unistd.h> @@ -7,15 +6,15 @@ #include <sys/types.h> #include <sys/stat.h> #include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" +#include "../../tensor_runtime/include/tensor_runtime.h" #include "../include/utils.h" int main(){ - llvm_hpvm_initTensorRt(0); + llvm_hpvm_initTensorRt(1); - std::string dir_prefix = std::string("../model_params/mobilenet/"); + std::string dir_prefix = std::string("../model_params/mobilenet_hpvm_3/"); std::string input_path = dir_prefix + std::string("input.bin"); std::string labels_path = dir_prefix + std::string("labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -297,8 +296,8 @@ int main(){ startMemTracking(); - int test_input_size = 5000; - int batch_size = 2500; + int test_input_size = 3000; + int batch_size = 1000; int batch_count = test_input_size / batch_size; float final_accuracy = 0.0; @@ -312,95 +311,95 @@ int main(){ void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 1); void* var_1 = tensorBatchNorm(var_0, batch_normalization_1_gamma, batch_normalization_1_beta, batch_normalization_1_mean, batch_normalization_1_variance, 0.001); void* var_2 = tensorRelu(var_1); - void* var_4 = tensorConvCutlass(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); - void* var_5 = tensorBatchNorm(var_4, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); - void* var_6 = tensorRelu(var_5); - void* var_7 = tensorConvolution(var_6, conv2d_2_w, 0, 0, 1, 1, 1, 1); - void* var_8 = tensorBatchNorm(var_7, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); - void* var_9 = tensorRelu(var_8); - void* var_11 = tensorConvCutlass(var_9, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); - void* var_12 = tensorBatchNorm(var_11, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); - void* var_13 = tensorRelu(var_12); - void* var_14 = tensorConvolution(var_13, conv2d_3_w, 0, 0, 1, 1, 1, 1); - void* var_15 = tensorBatchNorm(var_14, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); - void* var_16 = tensorRelu(var_15); - void* var_18 = tensorConvCutlass(var_16, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); - void* var_19 = tensorBatchNorm(var_18, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); + void* var_3 = tensorConvolution(var_2, depthwise_conv2d_1_w, 1, 1, 1, 1, 1, 32); + void* var_4 = tensorBatchNorm(var_3, batch_normalization_2_gamma, batch_normalization_2_beta, batch_normalization_2_mean, batch_normalization_2_variance, 0.001); + void* var_5 = tensorRelu(var_4); + void* var_6 = tensorConvolution(var_5, conv2d_2_w, 0, 0, 1, 1, 1, 1); + void* var_7 = tensorBatchNorm(var_6, batch_normalization_3_gamma, batch_normalization_3_beta, batch_normalization_3_mean, batch_normalization_3_variance, 0.001); + void* var_8 = tensorRelu(var_7); + void* var_9 = tensorConvolution(var_8, depthwise_conv2d_2_w, 1, 1, 2, 2, 1, 64); + void* var_10 = tensorBatchNorm(var_9, batch_normalization_4_gamma, batch_normalization_4_beta, batch_normalization_4_mean, batch_normalization_4_variance, 0.001); + void* var_11 = tensorRelu(var_10); + void* var_12 = tensorConvolution(var_11, conv2d_3_w, 0, 0, 1, 1, 1, 1); + void* var_13 = tensorBatchNorm(var_12, batch_normalization_5_gamma, batch_normalization_5_beta, batch_normalization_5_mean, batch_normalization_5_variance, 0.001); + void* var_14 = tensorRelu(var_13); + void* var_15 = tensorConvolution(var_14, depthwise_conv2d_3_w, 1, 1, 1, 1, 1, 128); + void* var_16 = tensorBatchNorm(var_15, batch_normalization_6_gamma, batch_normalization_6_beta, batch_normalization_6_mean, batch_normalization_6_variance, 0.001); + void* var_17 = tensorRelu(var_16); + void* var_18 = tensorConvolution(var_17, conv2d_4_w, 0, 0, 1, 1, 1, 1); + void* var_19 = tensorBatchNorm(var_18, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); void* var_20 = tensorRelu(var_19); - void* var_21 = tensorConvolution(var_20, conv2d_4_w, 0, 0, 1, 1, 1, 1); - void* var_22 = tensorBatchNorm(var_21, batch_normalization_7_gamma, batch_normalization_7_beta, batch_normalization_7_mean, batch_normalization_7_variance, 0.001); - void* var_23 = tensorRelu(var_22); - void* var_26 = tensorConvCutlass(var_23, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); - void* var_27 = tensorBatchNorm(var_26, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); - void* var_28 = tensorRelu(var_27); - void* var_29 = tensorConvolution(var_28, conv2d_5_w, 0, 0, 1, 1, 1, 1); - void* var_30 = tensorBatchNorm(var_29, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); - void* var_31 = tensorRelu(var_30); - void* var_33 = tensorConvCutlass(var_31, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); - void* var_34 = tensorBatchNorm(var_33, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); - void* var_35 = tensorRelu(var_34); - void* var_36 = tensorConvolution(var_35, conv2d_6_w, 0, 0, 1, 1, 1, 1); - void* var_37 = tensorBatchNorm(var_36, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); - void* var_38 = tensorRelu(var_37); - void* var_41 = tensorConvCutlass(var_38, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); - void* var_42 = tensorBatchNorm(var_41, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); + void* var_22 = tensorConvolution(var_20, depthwise_conv2d_4_w, 1, 1, 2, 2, 1, 128); + void* var_23 = tensorBatchNorm(var_22, batch_normalization_8_gamma, batch_normalization_8_beta, batch_normalization_8_mean, batch_normalization_8_variance, 0.001); + void* var_24 = tensorRelu(var_23); + void* var_25 = tensorConvolution(var_24, conv2d_5_w, 0, 0, 1, 1, 1, 1); + void* var_26 = tensorBatchNorm(var_25, batch_normalization_9_gamma, batch_normalization_9_beta, batch_normalization_9_mean, batch_normalization_9_variance, 0.001); + void* var_27 = tensorRelu(var_26); + void* var_28 = tensorConvolution(var_27, depthwise_conv2d_5_w, 1, 1, 1, 1, 1, 256); + void* var_29 = tensorBatchNorm(var_28, batch_normalization_10_gamma, batch_normalization_10_beta, batch_normalization_10_mean, batch_normalization_10_variance, 0.001); + void* var_30 = tensorRelu(var_29); + void* var_31 = tensorConvolution(var_30, conv2d_6_w, 0, 0, 1, 1, 1, 1); + void* var_32 = tensorBatchNorm(var_31, batch_normalization_11_gamma, batch_normalization_11_beta, batch_normalization_11_mean, batch_normalization_11_variance, 0.001); + void* var_33 = tensorRelu(var_32); + void* var_35 = tensorConvolution(var_33, depthwise_conv2d_6_w, 1, 1, 2, 2, 1, 256); + void* var_36 = tensorBatchNorm(var_35, batch_normalization_12_gamma, batch_normalization_12_beta, batch_normalization_12_mean, batch_normalization_12_variance, 0.001); + void* var_37 = tensorRelu(var_36); + void* var_38 = tensorConvolution(var_37, conv2d_7_w, 0, 0, 1, 1, 1, 1); + void* var_39 = tensorBatchNorm(var_38, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); + void* var_40 = tensorRelu(var_39); + void* var_41 = tensorConvolution(var_40, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); + void* var_42 = tensorBatchNorm(var_41, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); void* var_43 = tensorRelu(var_42); - void* var_44 = tensorConvolution(var_43, conv2d_7_w, 0, 0, 1, 1, 1, 1); - void* var_45 = tensorBatchNorm(var_44, batch_normalization_13_gamma, batch_normalization_13_beta, batch_normalization_13_mean, batch_normalization_13_variance, 0.001); + void* var_44 = tensorConvolution(var_43, conv2d_8_w, 0, 0, 1, 1, 1, 1); + void* var_45 = tensorBatchNorm(var_44, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); void* var_46 = tensorRelu(var_45); - void* var_48 = tensorConvCutlass(var_46, depthwise_conv2d_7_w, 1, 1, 1, 1, 1, 512); - void* var_49 = tensorBatchNorm(var_48, batch_normalization_14_gamma, batch_normalization_14_beta, batch_normalization_14_mean, batch_normalization_14_variance, 0.001); - void* var_50 = tensorRelu(var_49); - void* var_51 = tensorConvolution(var_50, conv2d_8_w, 0, 0, 1, 1, 1, 1); - void* var_52 = tensorBatchNorm(var_51, batch_normalization_15_gamma, batch_normalization_15_beta, batch_normalization_15_mean, batch_normalization_15_variance, 0.001); - void* var_53 = tensorRelu(var_52); - void* var_55 = tensorConvCutlass(var_53, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); - void* var_56 = tensorBatchNorm(var_55, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); - void* var_57 = tensorRelu(var_56); - void* var_58 = tensorConvolution(var_57, conv2d_9_w, 0, 0, 1, 1, 1, 1); - void* var_59 = tensorBatchNorm(var_58, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); - void* var_60 = tensorRelu(var_59); - void* var_63 = tensorConvCutlass(var_60, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); - void* var_64 = tensorBatchNorm(var_63, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); + void* var_47 = tensorConvolution(var_46, depthwise_conv2d_8_w, 1, 1, 1, 1, 1, 512); + void* var_48 = tensorBatchNorm(var_47, batch_normalization_16_gamma, batch_normalization_16_beta, batch_normalization_16_mean, batch_normalization_16_variance, 0.001); + void* var_49 = tensorRelu(var_48); + void* var_50 = tensorConvolution(var_49, conv2d_9_w, 0, 0, 1, 1, 1, 1); + void* var_51 = tensorBatchNorm(var_50, batch_normalization_17_gamma, batch_normalization_17_beta, batch_normalization_17_mean, batch_normalization_17_variance, 0.001); + void* var_52 = tensorRelu(var_51); + void* var_54 = tensorConvolution(var_52, depthwise_conv2d_9_w, 1, 1, 1, 1, 1, 512); + void* var_55 = tensorBatchNorm(var_54, batch_normalization_18_gamma, batch_normalization_18_beta, batch_normalization_18_mean, batch_normalization_18_variance, 0.001); + void* var_56 = tensorRelu(var_55); + void* var_57 = tensorConvolution(var_56, conv2d_10_w, 0, 0, 1, 1, 1, 1); + void* var_58 = tensorBatchNorm(var_57, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); + void* var_59 = tensorRelu(var_58); + void* var_60 = tensorConvolution(var_59, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); + void* var_61 = tensorBatchNorm(var_60, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); + void* var_62 = tensorRelu(var_61); + void* var_63 = tensorConvolution(var_62, conv2d_11_w, 0, 0, 1, 1, 1, 1); + void* var_64 = tensorBatchNorm(var_63, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); void* var_65 = tensorRelu(var_64); - void* var_66 = tensorConvolution(var_65, conv2d_10_w, 0, 0, 1, 1, 1, 1); - void* var_67 = tensorBatchNorm(var_66, batch_normalization_19_gamma, batch_normalization_19_beta, batch_normalization_19_mean, batch_normalization_19_variance, 0.001); + void* var_66 = tensorConvolution(var_65, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); + void* var_67 = tensorBatchNorm(var_66, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); void* var_68 = tensorRelu(var_67); - void* var_70 = tensorConvCutlass(var_68, depthwise_conv2d_10_w, 1, 1, 1, 1, 1, 512); - void* var_71 = tensorBatchNorm(var_70, batch_normalization_20_gamma, batch_normalization_20_beta, batch_normalization_20_mean, batch_normalization_20_variance, 0.001); - void* var_72 = tensorRelu(var_71); - void* var_73 = tensorConvolution(var_72, conv2d_11_w, 0, 0, 1, 1, 1, 1); - void* var_74 = tensorBatchNorm(var_73, batch_normalization_21_gamma, batch_normalization_21_beta, batch_normalization_21_mean, batch_normalization_21_variance, 0.001); + void* var_69 = tensorConvolution(var_68, conv2d_12_w, 0, 0, 1, 1, 1, 1); + void* var_70 = tensorBatchNorm(var_69, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); + void* var_71 = tensorRelu(var_70); + void* var_73 = tensorConvolution(var_71, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); + void* var_74 = tensorBatchNorm(var_73, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); void* var_75 = tensorRelu(var_74); - void* var_77 = tensorConvCutlass(var_75, depthwise_conv2d_11_w, 1, 1, 1, 1, 1, 512); - void* var_78 = tensorBatchNorm(var_77, batch_normalization_22_gamma, batch_normalization_22_beta, batch_normalization_22_mean, batch_normalization_22_variance, 0.001); - void* var_79 = tensorRelu(var_78); - void* var_80 = tensorConvolution(var_79, conv2d_12_w, 0, 0, 1, 1, 1, 1); - void* var_81 = tensorBatchNorm(var_80, batch_normalization_23_gamma, batch_normalization_23_beta, batch_normalization_23_mean, batch_normalization_23_variance, 0.001); - void* var_82 = tensorRelu(var_81); - void* var_85 = tensorConvCutlass(var_82, depthwise_conv2d_12_w, 1, 1, 2, 2, 1, 512); - void* var_86 = tensorBatchNorm(var_85, batch_normalization_24_gamma, batch_normalization_24_beta, batch_normalization_24_mean, batch_normalization_24_variance, 0.001); - void* var_87 = tensorRelu(var_86); - void* var_88 = tensorConvolution(var_87, conv2d_13_w, 0, 0, 1, 1, 1, 1); - void* var_89 = tensorBatchNorm(var_88, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); - void* var_90 = tensorRelu(var_89); - void* var_92 = tensorConvCutlass(var_90, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); - void* var_93 = tensorBatchNorm(var_92, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); - void* var_94 = tensorRelu(var_93); - void* var_95 = tensorConvolution(var_94, conv2d_14_w, 0, 0, 1, 1, 1, 1); - void* var_96 = tensorBatchNorm(var_95, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); - void* var_97 = tensorRelu(var_96); - void* var_99 = tensorPooling(var_97,1,2,2,0,0,2,2); - void* var_101 = tensorGemmGPU(var_99, dense_1_w); - void* var_102 = tensorAdd(var_101, dense_1_b); - void* var_103 = tensorSoftmax(var_102); + void* var_76 = tensorConvolution(var_75, conv2d_13_w, 0, 0, 1, 1, 1, 1); + void* var_77 = tensorBatchNorm(var_76, batch_normalization_25_gamma, batch_normalization_25_beta, batch_normalization_25_mean, batch_normalization_25_variance, 0.001); + void* var_78 = tensorRelu(var_77); + void* var_79 = tensorConvolution(var_78, depthwise_conv2d_13_w, 1, 1, 1, 1, 1, 1024); + void* var_80 = tensorBatchNorm(var_79, batch_normalization_26_gamma, batch_normalization_26_beta, batch_normalization_26_mean, batch_normalization_26_variance, 0.001); + void* var_81 = tensorRelu(var_80); + void* var_82 = tensorConvolution(var_81, conv2d_14_w, 0, 0, 1, 1, 1, 1); + void* var_83 = tensorBatchNorm(var_82, batch_normalization_27_gamma, batch_normalization_27_beta, batch_normalization_27_mean, batch_normalization_27_variance, 0.001); + void* var_84 = tensorRelu(var_83); + void* var_86 = tensorPooling(var_84,1,2,2,0,0,2,2); + void* var_88 = tensorGemmGPU(var_86, dense_1_w); + void* var_89 = tensorAdd(var_88, dense_1_b); + void* var_90 = tensorSoftmax(var_89); uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - float accuracy = computeAccuracy2(labels, batch_size, var_103); + float accuracy = computeAccuracy2(labels, batch_size, var_90); final_accuracy += accuracy; freeBatchMemory(); - + } final_accuracy = final_accuracy / batch_count;