From 03425866e7a72c1bf50cef375c75d26460f8c13e Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Thu, 25 Mar 2021 03:51:03 -0500 Subject: [PATCH] Fixed bugs in hpvm-c benchmarks --- .../alexnet2_cifar10/alexnet2_cifar10.cpp | 72 +- .../alexnet2_cifar10_cudnn.cpp | 54 +- .../alexnet_cifar10/alexnet_cifar10.cpp | 69 +- .../alexnet_cifar10/alexnet_cifar10_cudnn.cpp | 48 +- .../alexnet_imagenet/alexnet_imagenet.cpp | 74 +- .../alexnet_imagenet_cudnn.cpp | 51 +- .../benchmarks/lenet_mnist/lenet_mnist.cpp | 48 +- .../lenet_mnist/lenet_mnist_cudnn.cpp | 53 +- .../mobilenet_cifar10/mobilenet_cifar10.cpp | 49 +- .../mobilenet_cifar10_cudnn.cpp | 49 +- .../resnet18_cifar10/resnet18_cifar10.cpp | 74 +- .../resnet18_cifar10_cudnn.cpp | 51 +- .../resnet50_imagenet/resnet50_imagenet.cpp | 64 +- .../resnet50_imagenet_cudnn.cpp | 48 +- .../vgg16_cifar10/vgg16_cifar10.cpp | 67 +- .../vgg16_cifar10/vgg16_cifar10_cudnn.cpp | 60 +- .../vgg16_cifar100/vgg16_cifar100.cpp | 68 +- .../vgg16_cifar100/vgg16_cifar100_cudnn.cpp | 51 +- .../vgg16_imagenet/vgg16_imagenet.cpp | 65 +- .../vgg16_imagenet/vgg16_imagenet_cudnn.cpp | 48 +- .../hpvm-c/include/tensorUtils.h | 663 ++++++++---------- 21 files changed, 771 insertions(+), 1055 deletions(-) diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp index dd689d202a..35f8188f78 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -411,10 +405,12 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; - + std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = @@ -458,12 +454,10 @@ int main() { std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); - // void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); - // uint8_t* labels = readLabels(labels_path.c_str(),10000); - - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -493,45 +487,21 @@ int main() { args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - int batch_size = 500; - int test_input_size = 5000; - int batch_count = test_input_size / batch_size; - - std::string input_path = dir_prefix + std::string("test_input.bin"); - // void* input = create4DTensor(0,nchw,batch_size,3,32,32); - + __hpvm__init(); startMemTracking(); - startProfiling(); - - for (int j = 0; j < 1; j++) { - for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - // copyInputBatch(input_path.c_str(),start,end,3,32,32, input); - - void *input = - readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32); - - args->input = input; - args->input_bytes = 0; - - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - - llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - - freeBatchMemory(); - } +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + freeBatchMemory(); } - - stopProfiling(); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp index 448db0b4c8..5bcc5b627b 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10_cudnn.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -411,10 +405,17 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; +int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; + std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = @@ -458,9 +459,10 @@ int main() { std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -490,41 +492,25 @@ int main() { args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - int batch_size = 500; - int test_input_size = 5000; - int batch_count = test_input_size / batch_size; - - std::string input_path = dir_prefix + std::string("test_input.bin"); - void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); - + __hpvm__init(); + float total_accuracy = 0; startMemTracking(); +#pragma clang loop unroll(disable) for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - + int start = i * batch_size, end = start + batch_size; copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); - args->input = input; - args->input_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); - __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); - - computeAccuracy3(labels, result); - - // llvm_hpvm_invokeRtControl2(result, labels); - + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; freeBatchMemory(); } - + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp index ae5f31b7dc..51e0dd137d 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -362,12 +356,12 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; - std::string input_path = dir_prefix + std::string("test_input.bin"); - // void* input = readTrainedWeights(input_path.c_str(), 0,5000,3,32,32); std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint8_t *labels = readLabels(labels_path.c_str(), 5000); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -406,11 +400,10 @@ int main() { std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - - // args->input = input; - // args->input_bytes = 0; + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -436,43 +429,21 @@ int main() { args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - int batch_size = 500; - int test_input_size = 5000; - int batch_count = test_input_size / batch_size; - + __hpvm__init(); startMemTracking(); - startProfiling(); - - for (int j = 0; j < 1; j++) { - for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - // copyInputBatch(input_path.c_str(),start,end,3,32,32, input); - - // Replaced create4DTensor and copyInputBatch with readInputBatch - void *input = - readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32); - - args->input = input; - args->input_bytes = 0; - - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - - llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - - freeBatchMemory(); - } +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + freeBatchMemory(); } - - stopProfiling(); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp index ab4f6a2402..74c5420fd9 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10_cudnn.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -363,12 +357,17 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; +int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); - void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 3, 32, 32); std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint32_t *labels = readLabels3(labels_path.c_str(), 5000); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -407,9 +406,8 @@ int main() { std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); args->input = input; args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; @@ -436,14 +434,26 @@ int main() { args->dense_1_w_bytes = 0; args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->r.tensor; - - hpvm_request_tensor(result, 0); + __hpvm__init(); + float total_accuracy = 0; + startMemTracking(); +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; + freeBatchMemory(); + } + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - computeAccuracy3(labels, result); return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp index d49c0d2d06..16bcecf939 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp @@ -1,14 +1,8 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <config.h> #include <hpvm.h> +#include <string> #include <tensorTypes.h> #include <tensorUtils.h> -#include <config.h> void var_0_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { __hpvm__hint(hpvm::TENSOR_TARGET); @@ -460,11 +454,11 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { - - std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; +const int batch_size = 100, input_size = 5000, + batch_count = input_size / batch_size; +int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -515,16 +509,11 @@ int main() { std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin"); void *dense_3_b = readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1); - // void* input = readTrainedWeights(input_path.c_str(), 0, 1000,3,224,224); - // uint32_t* labels = readLabels2(labels_path.c_str(),6000); - // uint32_t* labels = readLabels3(labels_path.c_str(), 1000); - - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - - // args->input = input; - // args->input_bytes = 0; + void *input = create4DTensor(0, nchw, batch_size, 3, 224, 224); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -558,40 +547,21 @@ int main() { args->dense_3_b = dense_3_b; args->dense_3_b_bytes = 0; - int batch_size = 200; - int test_input_size = 4000; - int batch_count = test_input_size / batch_size; - + __hpvm__init(); startMemTracking(); - startProfiling(); - - for (int j = 0; j < 1; j++) { - for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void *input = - readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224); - - args->input = input; - args->input_bytes = 0; - - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - - llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - - freeBatchMemory(); - } +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + freeBatchMemory(); } - - stopProfiling(); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp index 4acba95173..5ddd969432 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet_cudnn.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -460,8 +454,15 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} +const int batch_size = 100, input_size = 5000, + batch_count = input_size / batch_size; + +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); @@ -513,14 +514,9 @@ int main() { std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin"); void *dense_3_b = readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1); - void *input = readTrainedWeights(input_path.c_str(), 0, 1000, 3, 224, 224); - // uint32_t* labels = readLabels2(labels_path.c_str(),6000); - - uint32_t *labels = readLabels3(labels_path.c_str(), 1000); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 224, 224); args->input = input; args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; @@ -556,14 +552,25 @@ int main() { args->dense_3_b = dense_3_b; args->dense_3_b_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - + __hpvm__init(); + float total_accuracy = 0; + startMemTracking(); +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; + freeBatchMemory(); + } + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - computeAccuracy3(labels, result); return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp index b67d585d01..ee81665ec9 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -264,13 +258,13 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +const int batch_size = 1000, input_size = 5000, + batch_count = input_size / batch_size; +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; - std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 1, 5, 5); @@ -294,15 +288,11 @@ int main() { readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 1024, 10); std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1); - // void* input = readTrainedWeights(input_path.c_str(), 0, 5000,1,28,28); - // uint32_t* labels = readLabels3(labels_path.c_str(), 5000); - - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - - // args->input = input; - // args->input_bytes = 0; + void *input = create4DTensor(0, nchw, batch_size, 1, 28, 28); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -320,37 +310,21 @@ int main() { args->dense_2_b = dense_2_b; args->dense_2_b_bytes = 0; - int batch_size = 500; - int test_input_size = 5000; - int batch_count = test_input_size / batch_size; - + __hpvm__init(); startMemTracking(); - startProfiling(); - +#pragma clang loop unroll(disable) for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void *input = readInputBatch(input_path.c_str(), 0, start, end, 1, 28, 28); - - args->input = input; - args->input_bytes = 0; + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 1, 28, 28, input); void *dfg = __hpvm__launch(0, root, (void *)args); - __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - freeBatchMemory(); } - - stopProfiling(); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp index 2159cfe286..eecc7f5d60 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist_cudnn.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -264,13 +258,18 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; +const int batch_size = 1000, input_size = 5000, + batch_count = input_size / batch_size; +int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 1, 5, 5); @@ -294,13 +293,9 @@ int main() { readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 1024, 10); std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1); - void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 1, 28, 28); - - uint32_t *labels = readLabels3(labels_path.c_str(), 5000); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 1, 28, 28); args->input = input; args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; @@ -320,15 +315,25 @@ int main() { args->dense_2_b = dense_2_b; args->dense_2_b_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - + __hpvm__init(); + float total_accuracy = 0; + startMemTracking(); +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 1, 28, 28, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; + freeBatchMemory(); + } + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - computeAccuracy3(labels, result); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp index a4de282621..58051e0993 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -1965,8 +1959,12 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; + int main() { - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/"; + std::string dir_prefix = + std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); @@ -2501,14 +2499,11 @@ int main() { readTrainedWeights(dense_1_w_path.c_str(), 0, 1, 1, 1024, 10); std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); - // void* input = readTrainedWeights(input_path.c_str(), 0, 5000,3,32,32); - // uint8_t* labels = readLabels(labels_path.c_str(), 5000); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - - // args->input = input; - // args->input_bytes = 0; + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->batch_normalization_1_gamma = batch_normalization_1_gamma; @@ -2784,39 +2779,21 @@ int main() { args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - int batch_size = 500; - int test_input_size = 5000; - int batch_count = test_input_size / batch_size; - - // void* input = create4DTensor(0,nchw,batch_size,3,32,32); - + __hpvm__init(); startMemTracking(); - startProfiling(); - +#pragma clang loop unroll(disable) for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - // copyInputBatch(input_path.c_str(),start,end,3,32,32, input); - void *input = readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32); - - args->input = input; - args->input_bytes = 0; + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); void *dfg = __hpvm__launch(0, root, (void *)args); - __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->r.tensor; hpvm_request_tensor(result, 0); llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - freeBatchMemory(); } - stopProfiling(); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp index 9f4069b34b..482a37d4c4 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10_cudnn.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -1965,11 +1959,17 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; + +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/"; - std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -2503,12 +2503,10 @@ int main() { readTrainedWeights(dense_1_w_path.c_str(), 0, 1, 1, 1024, 10); std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); - void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 3, 32, 32); uint32_t *labels = readLabels3(labels_path.c_str(), 5000); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); args->input = input; args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; @@ -2786,14 +2784,25 @@ int main() { args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - + __hpvm__init(); + float total_accuracy = 0; + startMemTracking(); +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; + freeBatchMemory(); + } + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - computeAccuracy3(labels, result); return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp index 66ab37cd33..a254a62570 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp @@ -1,11 +1,5 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -1300,14 +1294,13 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; + int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/"; - std::string input_path = dir_prefix + std::string("test_input.bin"); - // void* input = readTrainedWeights(input_path.c_str(), 0,5000,3,32,32); std::string labels_path = dir_prefix + std::string("test_labels.bin"); - // uint32_t* labels = readLabels3(labels_path.c_str(),5000); - std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 16, 3, 3, 3); @@ -1439,11 +1432,10 @@ int main() { std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - - // args->input = input; - // args->input_bytes = 0; + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -1533,47 +1525,21 @@ int main() { args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - int batch_size = 500; - int test_input_size = 5000; - int batch_count = test_input_size / batch_size; - - // NOTE-HASHIM: commented out - // void* input = create4DTensor(0,nchw,batch_size,3,32,32); - + __hpvm__init(); startMemTracking(); - startProfiling(); - - for (int j = 0; j < 1; j++) { - for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - // copyInputBatch(input_path.c_str(),start,end,3,32,32, input); - - // NOTE-HASHIM: Commented out above line and line that does create4DTensor - void *input = - readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32); - - args->input = input; - args->input_bytes = 0; - - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - - llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - - printf("RUNNING BATCH = %d \n", i); - - freeBatchMemory(); - } +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + freeBatchMemory(); } - - stopProfiling(); __hpvm__cleanup(); return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp index 1b6c98b886..da1ce91ba3 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10_cudnn.cpp @@ -1,11 +1,5 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -1226,12 +1220,17 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/"; +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; +int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); - void *input = readTrainedWeights(input_path.c_str(), 0, 5000, 3, 32, 32); std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint32_t *labels = readLabels3(labels_path.c_str(), 5000); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -1365,9 +1364,8 @@ int main() { std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 10, 1, 1); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); args->input = input; args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; @@ -1459,16 +1457,25 @@ int main() { args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - + __hpvm__init(); + float total_accuracy = 0; + startMemTracking(); +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; + freeBatchMemory(); + } + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - - computeAccuracy3(labels, result); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp index db6b64daa0..a3ece5fede 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -5132,8 +5126,10 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +const int batch_size = 25, input_size = 5000, + batch_count = input_size / batch_size; +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); @@ -6311,12 +6307,10 @@ int main() { void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 1000, 1, 1); - // void* input = readTrainedWeights(input_path.c_str(), 0,100,3,224,224); - // uint32_t* labels = readLabelsBatch3(labels_path.c_str(),0,100); - - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 224, 224); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -6958,39 +6952,21 @@ int main() { args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; + __hpvm__init(); startMemTracking(); - startProfiling(); - - unsigned int batch_size = 50; - unsigned int test_input_size = 1000; - unsigned int batch_count = test_input_size / batch_size; - - for (int j = 0; j < 1; j++) { - for (int i = 0; i < batch_count; i++) { - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void *input = - readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224); - - args->input = input; - args->input_bytes = 0; - - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - - llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - - freeBatchMemory(); - } +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + freeBatchMemory(); } - - stopProfiling(); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp index c7817caf53..03674b50a5 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet_cudnn.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -4903,8 +4897,15 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} +const int batch_size = 50, input_size = 5000, + batch_count = input_size / batch_size; + +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); @@ -6081,12 +6082,10 @@ int main() { std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); void *dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0, 1, 1000, 1, 1); - void *input = readTrainedWeights(input_path.c_str(), 0, 100, 3, 224, 224); uint32_t *labels = readLabels3(labels_path.c_str(), 100); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 224, 224); args->input = input; args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; @@ -6730,14 +6729,25 @@ int main() { args->dense_1_b = dense_1_b; args->dense_1_b_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - + __hpvm__init(); + float total_accuracy = 0; + startMemTracking(); +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; + freeBatchMemory(); + } + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - computeAccuracy3(labels, result); return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp index 39c2ffc876..cad22649fd 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -827,8 +821,10 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); @@ -920,14 +916,11 @@ int main() { readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 512, 10); std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1); - // void* input = readTrainedWeights(input_path.c_str(), 0,2000,3,32,32); - // uint32_t* labels = readLabels3(labels_path.c_str(),2000); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - - // args->input = input; - // args->input_bytes = 0; + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -989,41 +982,21 @@ int main() { args->dense_2_b = dense_2_b; args->dense_2_b_bytes = 0; - int batch_size = 500; - int test_input_size = 5000; - int batch_count = test_input_size / batch_size; - + __hpvm__init(); startMemTracking(); - startProfiling(); - - for (int j = 0; j < 1; j++) { - for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - // copyInputBatch(input_path.c_str(),start,end,3,32,32, input); - void *input = - readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32); - - args->input = input; - args->input_bytes = 0; - - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - - llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - - freeBatchMemory(); - } +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + freeBatchMemory(); } - - stopProfiling(); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp index 2f18dc17c6..6625202828 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10_cudnn.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -827,8 +821,15 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; + +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); @@ -920,12 +921,10 @@ int main() { readTrainedWeights(dense_2_w_path.c_str(), 0, 1, 1, 512, 10); std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 10, 1, 1); - void *input = readTrainedWeights(input_path.c_str(), 0, 2000, 3, 32, 32); uint32_t *labels = readLabels3(labels_path.c_str(), 2000); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); args->input = input; args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; @@ -989,28 +988,25 @@ int main() { args->dense_2_b = dense_2_b; args->dense_2_b_bytes = 0; + __hpvm__init(); + float total_accuracy = 0; startMemTracking(); - startProfiling(); - - input = readTrainedWeights(input_path.c_str(), 0, 2000, 3, 32, 32); - - args->input = input; - args->input_bytes = 0; - - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - - computeAccuracy3(labels, result); - - freeBatchMemory(); - - stopProfiling(); - +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; + freeBatchMemory(); + } + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp index ce899cd0a2..54417171fb 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -827,10 +821,11 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; - std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -922,14 +917,10 @@ int main() { std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 100, 1, 1); - // void* input = readTrainedWeights(input_path.c_str(), 0,2000,3,32,32); - // uint32_t* labels = readLabels3(labels_path.c_str(),2000); - - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - - // args->input = input; - // args->input_bytes = 0; + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -991,40 +982,21 @@ int main() { args->dense_2_b = dense_2_b; args->dense_2_b_bytes = 0; - int batch_size = 500; - int test_input_size = 5000; - int batch_count = test_input_size / batch_size; - + __hpvm__init(); startMemTracking(); - startProfiling(); - - for (int j = 0; j < 1; j++) { - for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void *input = - readInputBatch(input_path.c_str(), 0, start, end, 3, 32, 32); - - args->input = input; - args->input_bytes = 0; - - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - - llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - - freeBatchMemory(); - } +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + freeBatchMemory(); } - - stopProfiling(); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp index a3e147cb1a..9f989e3610 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100_cudnn.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -827,10 +821,16 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} - std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; +const int batch_size = 500, input_size = 5000, + batch_count = input_size / batch_size; +int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -922,12 +922,8 @@ int main() { std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); void *dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0, 1, 100, 1, 1); - void *input = readTrainedWeights(input_path.c_str(), 0, 2000, 3, 32, 32); - uint32_t *labels = readLabels3(labels_path.c_str(), 2000); - - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 32, 32); args->input = input; args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; @@ -991,14 +987,25 @@ int main() { args->dense_2_b = dense_2_b; args->dense_2_b_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - + __hpvm__init(); + float total_accuracy = 0; + startMemTracking(); +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 32, 32, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; + freeBatchMemory(); + } + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - computeAccuracy3(labels, result); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp index 91af01fe8e..12f7870a15 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -875,10 +869,11 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +const int batch_size = 10, input_size = 5000, + batch_count = input_size / batch_size; - std::string dir_prefix = - std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; +int main() { + std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); @@ -978,9 +973,10 @@ int main() { void *dense_3_b = readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 224, 224); + args->input = input; + args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; args->conv2d_1_w_bytes = 0; args->conv2d_1_b = conv2d_1_b; @@ -1046,40 +1042,21 @@ int main() { args->dense_3_b = dense_3_b; args->dense_3_b_bytes = 0; + __hpvm__init(); startMemTracking(); - startProfiling(); - - unsigned int batch_size = 50; - unsigned int test_input_size = 1000; - unsigned int batch_count = test_input_size / batch_size; - - for (int j = 0; j < 1; j++) { - for (int i = 0; i < batch_count; i++) { - - int start = i * batch_size; - int end = (i + 1) * batch_size; - - void *input = - readInputBatch(input_path.c_str(), 0, start, end, 3, 224, 224); - - args->input = input; - args->input_bytes = 0; - - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - - llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); - - freeBatchMemory(); - } +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); + freeBatchMemory(); } - - stopProfiling(); __hpvm__cleanup(); - return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp index 995502f907..189460c928 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet_cudnn.cpp @@ -1,10 +1,4 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> +#include <string> #include <hpvm.h> #include <tensorTypes.h> #include <tensorUtils.h> @@ -875,8 +869,15 @@ typedef struct __attribute__((__packed__)) { struct ret_t r; } RootIn; -int main() { +void write_accuracy(float accuracy) { + std::ofstream fout("final_accuracy"); + fout << std::fixed << accuracy; +} +const int batch_size = 25, input_size = 5000, + batch_count = input_size / batch_size; + +int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; std::string input_path = dir_prefix + std::string("test_input.bin"); std::string labels_path = dir_prefix + std::string("test_labels.bin"); @@ -976,12 +977,10 @@ int main() { std::string dense_3_b_path = dir_prefix + std::string("dense_3_b.bin"); void *dense_3_b = readTrainedWeights(dense_3_b_path.c_str(), 0, 1, 1000, 1, 1); - void *input = readTrainedWeights(input_path.c_str(), 0, 100, 3, 224, 224); uint32_t *labels = readLabelsBatch3(labels_path.c_str(), 0, 100); - __hpvm__init(); RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); - + void *input = create4DTensor(0, nchw, batch_size, 3, 224, 224); args->input = input; args->input_bytes = 0; args->conv2d_1_w = conv2d_1_w; @@ -1049,14 +1048,25 @@ int main() { args->dense_3_b = dense_3_b; args->dense_3_b_bytes = 0; - void *dfg = __hpvm__launch(0, root, (void *)args); - - __hpvm__wait(dfg); - - void *result = static_cast<RootIn *>(args)->r.tensor; - hpvm_request_tensor(result, 0); - + __hpvm__init(); + float total_accuracy = 0; + startMemTracking(); +#pragma clang loop unroll(disable) + for (int i = 0; i < batch_count; i++) { + int start = i * batch_size, end = start + batch_size; + copyInputBatch(input_path.c_str(), start, end, 3, 224, 224, input); + + void *dfg = __hpvm__launch(0, root, (void *)args); + __hpvm__wait(dfg); + void *result = static_cast<RootIn *>(args)->r.tensor; + hpvm_request_tensor(result, 0); + + uint32_t *labels = readLabelsBatch3(labels_path.c_str(), start, end); + float accuracy = computeAccuracy3(labels, result); + total_accuracy += accuracy * batch_size; + freeBatchMemory(); + } + write_accuracy(total_accuracy / input_size); __hpvm__cleanup(); - computeAccuracy3(labels, result); return 0; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h index 71e1c26872..05d9157a64 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h +++ b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h @@ -3,7 +3,6 @@ #ifndef UTILS_HEADER #define UTILS_HEADER - #include <sstream> #include <vector> #include <bits/stdc++.h> @@ -11,15 +10,13 @@ #include <tensor.h> #include <cmath> - std::vector<float> run_accuracies; +void printTensorInfo(void *tensor_ptr) { -void printTensorInfo(void* tensor_ptr){ + struct Tensor *tensor = (struct Tensor *)tensor_ptr; - struct Tensor* tensor = (struct Tensor*) tensor_ptr; - - if(tensor->gpu_data != NULL){ + if (tensor->gpu_data != NULL) { printf("Successful cudaMalloc \n"); } @@ -29,376 +26,354 @@ void printTensorInfo(void* tensor_ptr){ printf("num_elems = %lu \n", tensor->num_elems); } - // FIXIT: Move this to debug.h and include in all files -void dumpWeightsToFile(char* file_name, void* weights_ptr){ +void dumpWeightsToFile(char *file_name, void *weights_ptr) { - struct Tensor* weights = (Tensor*) weights_ptr; + struct Tensor *weights = (Tensor *)weights_ptr; // Move data back to host hpvm_request_tensor(weights, 0); - - FILE* fp = fopen(file_name, "wb"); - if(fp == NULL){ - printf("File %s could not be created. Check if directory exists \n", file_name); + + FILE *fp = fopen(file_name, "wb"); + if (fp == NULL) { + printf("File %s could not be created. Check if directory exists \n", + file_name); abort(); } - //printf("size_in_bytes = %lu \n", weights->size_in_bytes); - size_t bytes_written = fwrite(weights->host_data, 1, weights->size_in_bytes, fp); - //printf("bytes_written = %lu \n", bytes_written); + // printf("size_in_bytes = %lu \n", weights->size_in_bytes); + size_t bytes_written = + fwrite(weights->host_data, 1, weights->size_in_bytes, fp); + // printf("bytes_written = %lu \n", bytes_written); fclose(fp); } +void fillTensorWithOnes(void *tensor_ptr) { + struct Tensor *tensor = (struct Tensor *)tensor_ptr; -void fillTensorWithOnes(void* tensor_ptr){ - - struct Tensor* tensor = (struct Tensor*) tensor_ptr; - hpvm_request_tensor(tensor, 0); - + // initialization is specific to the floating point type - if(tensor->data_type == CUDNN_DATA_FLOAT){ - float* data_arr = (float*) tensor->host_data; - for(unsigned int i = 0; i < tensor->num_elems; i++){ - data_arr[i] = 1.0; + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems; i++) { + data_arr[i] = 1.0; } } } +void fillWithOnesAndTwos(void *tensor_ptr) { -void fillWithOnesAndTwos(void* tensor_ptr){ + struct Tensor *tensor = (struct Tensor *)tensor_ptr; - struct Tensor* tensor = (struct Tensor*) tensor_ptr; - hpvm_request_tensor(tensor, 0); - + // initialization is specific to the floating point type - if(tensor->data_type == CUDNN_DATA_FLOAT){ - float* data_arr = (float*) tensor->host_data; - for(unsigned int i = 0; i < tensor->num_elems/2; i++){ - data_arr[i] = 1.0; + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems / 2; i++) { + data_arr[i] = 1.0; } - for(unsigned int i = tensor->num_elems/2; i < tensor->num_elems; i++){ - data_arr[i] = 2.0; + for (unsigned int i = tensor->num_elems / 2; i < tensor->num_elems; i++) { + data_arr[i] = 2.0; } - } } +void fillTensorWithVal(void *tensor_ptr, float target_value) { -void fillTensorWithVal(void* tensor_ptr, float target_value){ + struct Tensor *tensor = (struct Tensor *)tensor_ptr; - struct Tensor* tensor = (struct Tensor*) tensor_ptr; - hpvm_request_tensor(tensor, 0); - + // initialization is specific to the floating point type - if(tensor->data_type == CUDNN_DATA_FLOAT){ - float* data_arr = (float*) tensor->host_data; - for(unsigned int i = 0; i < tensor->num_elems; i++){ - data_arr[i] = target_value; + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems; i++) { + data_arr[i] = target_value; } } } +void fillTensorWithNegOnes(void *tensor_ptr) { -void fillTensorWithNegOnes(void* tensor_ptr){ + struct Tensor *tensor = (struct Tensor *)tensor_ptr; - struct Tensor* tensor = (struct Tensor*) tensor_ptr; - hpvm_request_tensor(tensor, 0); - + // initialization is specific to the floating point type - if(tensor->data_type == CUDNN_DATA_FLOAT){ - float* data_arr = (float*) tensor->host_data; - for(unsigned int i = 0; i < tensor->num_elems; i++){ - data_arr[i] = -1.0; + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems; i++) { + data_arr[i] = -1.0; } } } +void fillTensorVals(void *tensor_ptr) { -void fillTensorVals(void* tensor_ptr){ - - struct Tensor* tensor = (struct Tensor*) tensor_ptr; + struct Tensor *tensor = (struct Tensor *)tensor_ptr; // initialization is specific to the floating point type - if(tensor->data_type == CUDNN_DATA_FLOAT){ - float* data_arr = (float*) tensor->host_data; - for(unsigned int i = 0; i < tensor->num_elems; i++){ - data_arr[i] = i + 1; + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems; i++) { + data_arr[i] = i + 1; } } } +void printTensorValues(void *tensor_ptr) { -void printTensorValues(void* tensor_ptr){ - - struct Tensor* tensor = (struct Tensor*) tensor_ptr; + struct Tensor *tensor = (struct Tensor *)tensor_ptr; hpvm_request_tensor(tensor, 0); - + // printing is specific to the floating point type - if(tensor->data_type == CUDNN_DATA_FLOAT){ - float* data_arr = (float*) tensor->host_data; - for(unsigned int i = 0; i < tensor->num_elems; i++){ - printf("%f,", data_arr[i]); + if (tensor->data_type == CUDNN_DATA_FLOAT) { + float *data_arr = (float *)tensor->host_data; + for (unsigned int i = 0; i < tensor->num_elems; i++) { + printf("%f,", data_arr[i]); } } printf("\n"); } +void printTensorDims(void *tensor_ptr) { -void printTensorDims(void* tensor_ptr){ - - struct Tensor* tensor = (struct Tensor*) tensor_ptr; + struct Tensor *tensor = (struct Tensor *)tensor_ptr; printf("Num_elems = %lu \n", tensor->num_elems); - for (int i = 0; i < tensor->dims.num_dims; i++){ + for (int i = 0; i < tensor->dims.num_dims; i++) { printf("dim[%d] = %lu \n", i, tensor->dims.dim_sizes[i]); } } +void compareTensors(void *tensor1_ptr, void *tensor2_ptr) { - -void compareTensors(void* tensor1_ptr, void* tensor2_ptr){ - - struct Tensor* tensor1 = (struct Tensor*) tensor1_ptr; - struct Tensor* tensor2 = (struct Tensor*) tensor2_ptr; + struct Tensor *tensor1 = (struct Tensor *)tensor1_ptr; + struct Tensor *tensor2 = (struct Tensor *)tensor2_ptr; hpvm_request_tensor(tensor1, 0); hpvm_request_tensor(tensor2, 0); - float* tensor_data1 = (float*) tensor1->host_data; - float* tensor_data2 = (float*) tensor2->host_data; - - for(unsigned int i = 0; i < tensor1->num_elems; i++){ - if(tensor_data1[i] != tensor_data2[i]){ + float *tensor_data1 = (float *)tensor1->host_data; + float *tensor_data2 = (float *)tensor2->host_data; + + for (unsigned int i = 0; i < tensor1->num_elems; i++) { + if (tensor_data1[i] != tensor_data2[i]) { printf("Tensor data mismatch at index %d \n", i); abort(); } } } +void compareValues(void *tensor_ptr, float *data, size_t num_elems) { + struct Tensor *tensor = (struct Tensor *)tensor_ptr; -void compareValues(void* tensor_ptr, float* data, size_t num_elems){ - - struct Tensor* tensor = (struct Tensor*) tensor_ptr; - hpvm_request_tensor(tensor, 0); - - float* tensor_data = (float*) tensor->host_data; - for(unsigned int i = 0; i < num_elems; i++){ - if(tensor_data[i] != data[i]){ + + float *tensor_data = (float *)tensor->host_data; + for (unsigned int i = 0; i < num_elems; i++) { + if (tensor_data[i] != data[i]) { printf("Tensor data mismatch"); abort(); } } } - -void* readInputTensor(const char* file_name, int data_type, int dim1_size, int dim2_size, - int dim3_size, int dim4_size){ +void *readInputTensor(const char *file_name, int data_type, int dim1_size, + int dim2_size, int dim3_size, int dim4_size) { int type_size = 4; // NOTE: Assuming floating point tensors int num_elems = dim1_size * dim2_size * dim3_size * dim4_size; int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; - uint8_t* file_data = (uint8_t*) malloc(sizeof(char) * num_elems); - float* tensor_data = (float*) malloc(sizeof(float) * num_elems); + uint8_t *file_data = (uint8_t *)malloc(sizeof(char) * num_elems); + float *tensor_data = (float *)malloc(sizeof(float) * num_elems); int file_header_size = 16; - - FILE* file = fopen(file_name, "rb"); - if(file == NULL){ + + FILE *file = fopen(file_name, "rb"); + if (file == NULL) { printf("Data file %s is not found. Aborting... \n", file_name); abort(); } - fseek(file, file_header_size, SEEK_CUR); // Skipping the file header size_t bytes_read = fread(file_data, 1, sizeof(uint8_t) * num_elems, file); fclose(file); - - for (size_t i = 0; i < num_elems; ++i){ - tensor_data[i] = (float) file_data[i] / 255.0f; + + for (size_t i = 0; i < num_elems; ++i) { + tensor_data[i] = (float)file_data[i] / 255.0f; } // NOTE: Using NCHW format - struct Tensor* input = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size, - dim3_size, dim4_size); - + struct Tensor *input = (struct Tensor *)create4DTensor( + data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size); + initTensorData(input, tensor_data, size_in_bytes); // compareValues(input, tensor_data, num_elems); - - return input; -} + return input; +} //*** FIXIT: Move this to CPU-only -struct Tensor* readTrainedWeightsCPU(const char* file_name, int data_type, - int dim1_size, int dim2_size, - int dim3_size, int dim4_size){ +struct Tensor *readTrainedWeightsCPU(const char *file_name, int data_type, + int dim1_size, int dim2_size, + int dim3_size, int dim4_size) { // FIXIT: Don't assume floating point types int type_size = 4; // NOTE: Assuming floating point tensors long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size; - long int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; - float* tensor_data = (float*) malloc(sizeof(float) * num_elems); + long int size_in_bytes = + type_size * dim1_size * dim2_size * dim3_size * dim4_size; + float *tensor_data = (float *)malloc(sizeof(float) * num_elems); int file_header_size = 0; - - FILE* file = fopen(file_name, "rb"); - if(file == NULL){ + + FILE *file = fopen(file_name, "rb"); + if (file == NULL) { printf("Data file %s is not found. Aborting... \n", file_name); abort(); } - + fseek(file, file_header_size, SEEK_CUR); // Skipping the file header size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes, bytes_read); fclose(file); - - - struct Tensor* weights = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size, - dim3_size, dim4_size); - + + struct Tensor *weights = (struct Tensor *)create4DTensor( + data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size); + initTensorData(weights, tensor_data, size_in_bytes); - //compareValues(weights, tensor_data, num_elems); + // compareValues(weights, tensor_data, num_elems); free(tensor_data); return weights; } - -struct Tensor* readTrainedWeights(const char* file_name, int data_type, - long int dim1_size, long int dim2_size, - long int dim3_size, long int dim4_size){ +struct Tensor *readTrainedWeights(const char *file_name, int data_type, + long int dim1_size, long int dim2_size, + long int dim3_size, long int dim4_size) { // FIXIT: Don't assume floating point types int type_size = 4; // NOTE: Assuming floating point tensors long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size; - long int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; - float* tensor_data = (float*) malloc(sizeof(float) * num_elems); + long int size_in_bytes = + type_size * dim1_size * dim2_size * dim3_size * dim4_size; + float *tensor_data = (float *)malloc(sizeof(float) * num_elems); printf("size_in_bytes = %lu \n", size_in_bytes); - + int file_header_size = 0; - - FILE* file = fopen(file_name, "rb"); - if(file == NULL){ + + FILE *file = fopen(file_name, "rb"); + if (file == NULL) { printf("Data file %s is not found. Aborting... \n", file_name); abort(); } - + fseek(file, file_header_size, SEEK_CUR); // Skipping the file header size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); - // printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes, bytes_read); + // printf("size in bytes = %lu, bytes read = %lu \n", size_in_bytes, + // bytes_read); fclose(file); - - - struct Tensor* weights = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size, - dim3_size, dim4_size); - + + struct Tensor *weights = (struct Tensor *)create4DTensor( + data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size); + initTensorData(weights, tensor_data, size_in_bytes); - //compareValues(weights, tensor_data, num_elems); + // compareValues(weights, tensor_data, num_elems); free(tensor_data); return weights; } - - - -struct Tensor* readInputBatch(const char* file_name, int data_type, - int start, int end, - int dim2_size, int dim3_size, int dim4_size){ +struct Tensor *readInputBatch(const char *file_name, long data_type, long start, + long end, long dim2_size, long dim3_size, + long dim4_size) { long int dim1_size = end - start; // FIXIT: Don't assume floating point types long int type_size = 4; // NOTE: Assuming floating point tensors long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size; - long int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; - float* tensor_data = (float*) malloc(sizeof(float) * num_elems); - long int file_header_size = type_size * start * dim2_size * dim3_size * dim4_size; - - FILE* file = fopen(file_name, "rb"); - if(file == NULL){ + long int size_in_bytes = + type_size * dim1_size * dim2_size * dim3_size * dim4_size; + float *tensor_data = (float *)malloc(sizeof(float) * num_elems); + long int file_header_size = + type_size * start * dim2_size * dim3_size * dim4_size; + + FILE *file = fopen(file_name, "rb"); + if (file == NULL) { printf("Data file %s is not found. Aborting... \n", file_name); abort(); } - + fseek(file, file_header_size, SEEK_SET); // Skipping the file header size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); fclose(file); - //printf ("FIXED input BATCH read \n"); - - struct Tensor* weights = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size, - dim3_size, dim4_size); - + // printf ("FIXED input BATCH read \n"); + + struct Tensor *weights = (struct Tensor *)create4DTensor( + data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size); + initTensorData(weights, tensor_data, size_in_bytes); free(tensor_data); return weights; } +void *copyInputBatch(const char *file_name, long start, long end, + long dim2_size, long dim3_size, long dim4_size, + void *inputTensor_ptr) { + struct Tensor *inputTensor = (struct Tensor *)inputTensor_ptr; -void* copyInputBatch(const char* file_name, - int start, int end, - int dim2_size, int dim3_size, int dim4_size, - void* inputTensor_ptr){ - - struct Tensor* inputTensor = (struct Tensor*) inputTensor_ptr; - int dim1_size = end - start; // FIXIT: Don't assume floating point types int type_size = 4; // NOTE: Assuming floating point tensors long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size; - long int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; - float* tensor_data = (float*) malloc(sizeof(float) * num_elems); - int file_header_size = type_size * start * dim2_size * dim3_size * dim4_size; - - FILE* file = fopen(file_name, "rb"); - if(file == NULL){ + long int size_in_bytes = + type_size * dim1_size * dim2_size * dim3_size * dim4_size; + float *tensor_data = (float *)malloc(sizeof(float) * num_elems); + long int file_header_size = + type_size * start * dim2_size * dim3_size * dim4_size; + + FILE *file = fopen(file_name, "rb"); + if (file == NULL) { printf("Data file %s is not found. Aborting... \n", file_name); abort(); } - + fseek(file, file_header_size, SEEK_SET); // Skipping the file header size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); fclose(file); - - + initTensorData(inputTensor, tensor_data, size_in_bytes); free(tensor_data); printf("******NOTE: tensor Dims = %d \n", inputTensor->dims.num_dims); - if(inputTensor->host_data == NULL || inputTensor->gpu_data == NULL) + if (inputTensor->host_data == NULL || inputTensor->gpu_data == NULL) printf("ERROR: NULL data pointers \n"); - - // Chaning Tensor Placement to HOST + // Chaning Tensor Placement to HOST changeTensorPlacement(inputTensor, HOST); - return inputTensor; } +uint8_t *readLabels(const char *labels_file, int num_labels) { - -uint8_t* readLabels(const char* labels_file, int num_labels){ - - uint8_t* labels = (uint8_t*) malloc(sizeof(uint8_t) * num_labels); - FILE* file = fopen(labels_file, "rb"); - if(file == NULL){ + uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels); + FILE *file = fopen(labels_file, "rb"); + if (file == NULL) { printf("Data file %s is not found. Aborting...\n", labels_file); abort(); } @@ -406,17 +381,15 @@ uint8_t* readLabels(const char* labels_file, int num_labels){ size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file); fclose(file); - + return labels; } +uint32_t *readLabels3(const char *labels_file, int num_labels) { - -uint32_t* readLabels3(const char* labels_file, int num_labels){ - - uint32_t* labels = (uint32_t*) malloc(sizeof(uint32_t) * num_labels); - FILE* file = fopen(labels_file, "rb"); - if(file == NULL){ + uint32_t *labels = (uint32_t *)malloc(sizeof(uint32_t) * num_labels); + FILE *file = fopen(labels_file, "rb"); + if (file == NULL) { printf("Data file %s is not found. Aborting...\n", labels_file); abort(); } @@ -424,264 +397,248 @@ uint32_t* readLabels3(const char* labels_file, int num_labels){ size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file); fclose(file); - + return labels; } - -uint8_t* readLabelsBatch(const char* labels_file, int start, int end){ +uint8_t *readLabelsBatch(const char *labels_file, int start, int end) { int num_labels = end - start; int file_header_size = sizeof(uint8_t) * start; - - uint8_t* labels = (uint8_t*) malloc(sizeof(uint8_t) * num_labels); - FILE* file = fopen(labels_file, "rb"); - if(file == NULL){ + + uint8_t *labels = (uint8_t *)malloc(sizeof(uint8_t) * num_labels); + FILE *file = fopen(labels_file, "rb"); + if (file == NULL) { printf("Data file %s is not found. Aborting...\n", labels_file); abort(); } - + fseek(file, file_header_size, SEEK_SET); // Skipping the file header - - size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file); + size_t bytes_read = fread(labels, 1, sizeof(uint8_t) * num_labels, file); fclose(file); - + // printf("--labels bytes_read = %lu \n", bytes_read); return labels; } - -uint32_t* readLabelsBatch3(const char* labels_file, int start, int end){ +uint32_t *readLabelsBatch3(const char *labels_file, int start, int end) { int num_labels = end - start; int file_header_size = sizeof(uint32_t) * start; - - uint32_t* labels = (uint32_t*) malloc(sizeof(uint32_t) * num_labels); - FILE* file = fopen(labels_file, "rb"); - if(file == NULL){ + + uint32_t *labels = (uint32_t *)malloc(sizeof(uint32_t) * num_labels); + FILE *file = fopen(labels_file, "rb"); + if (file == NULL) { printf("Data file %s is not found. Aborting...\n", labels_file); abort(); } - + fseek(file, file_header_size, SEEK_SET); // Skipping the file header - - size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file); + size_t bytes_read = fread(labels, 1, sizeof(uint32_t) * num_labels, file); fclose(file); - + return labels; } +void computeAccuracy(const char *labels_file, int num_labels, + void *result_ptr) { + struct Tensor *result = (struct Tensor *)result_ptr; -void computeAccuracy(const char* labels_file, int num_labels, void* result_ptr){ - - struct Tensor* result = (struct Tensor*) result_ptr; - - uint8_t* labels = readLabels(labels_file, num_labels); + uint8_t *labels = readLabels(labels_file, num_labels); size_t batch_dim = result->dims.dim_sizes[0]; size_t channels = result->dims.dim_sizes[1]; - float* data = (float*) result->host_data; + float *data = (float *)result->host_data; int num_errors = 0; - - for(int i = 0; i < batch_dim; i++){ + + for (int i = 0; i < batch_dim; i++) { int chosen = 0; - for (int id = 1; id < 10; ++id){ - if (data[i * channels + chosen] < data[i * channels + id]) chosen = id; + for (int id = 1; id < 10; ++id) { + if (data[i * channels + chosen] < data[i * channels + id]) + chosen = id; } - - //printf("chosen = %d, label = %d \n", chosen, labels[i]); - if(chosen != labels[i]) + + // printf("chosen = %d, label = %d \n", chosen, labels[i]); + if (chosen != labels[i]) num_errors++; } float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; printf("****** Accuracy = %f \n\n", accuracy); - - FILE* fp = fopen("final_accuracy", "w+"); - if(fp != NULL){ + FILE *fp = fopen("final_accuracy", "w+"); + if (fp != NULL) { std::ostringstream ss; ss << std::fixed << accuracy; std::string print_str = ss.str(); - + fwrite(print_str.c_str(), 1, print_str.length(), fp); fclose(fp); } - } +// NOTE: batch_size and num_classes are Unused arguments +float computeAccuracy2(uint8_t *labels, int batch_size, void *result_ptr, + size_t num_classes = 10) { + struct Tensor *result = (struct Tensor *)result_ptr; - -// NOTE: batch_size and num_classes are Unused arguments -float computeAccuracy2(uint8_t* labels, int batch_size, - void* result_ptr, size_t num_classes = 10){ - - struct Tensor* result = (struct Tensor*) result_ptr; - size_t batch_dim = result->dims.dim_sizes[0]; num_classes = result->dims.dim_sizes[1]; - float* data = (float*) result->host_data; + float *data = (float *)result->host_data; int num_errors = 0; printf("batch_dim = %lu, channels = %lu \n", batch_dim, num_classes); - - for(unsigned int i = 0; i < batch_dim; i++){ - + + for (unsigned int i = 0; i < batch_dim; i++) { + int chosen = 0; - for (int id = 1; id < num_classes; ++id){ - if (data[i * num_classes + chosen] < data[i * num_classes + id]) chosen = id; + for (int id = 1; id < num_classes; ++id) { + if (data[i * num_classes + chosen] < data[i * num_classes + id]) + chosen = id; } - - if(chosen != labels[i]) - num_errors++; + if (chosen != labels[i]) + num_errors++; } float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; printf("****** Accuracy = %f \n\n", accuracy); - FILE* fp = fopen("final_accuracy", "w+"); - if(fp != NULL){ + FILE *fp = fopen("final_accuracy", "w+"); + if (fp != NULL) { std::ostringstream ss; ss << std::fixed << accuracy; std::string print_str = ss.str(); - + fwrite(print_str.c_str(), 1, print_str.length(), fp); } fclose(fp); - return accuracy; + return accuracy; } +float computeAccuracy3(uint32_t *labels, void *result_ptr) { + struct Tensor *result = (struct Tensor *)result_ptr; -float computeAccuracy3(uint32_t* labels, void* result_ptr){ - - struct Tensor* result = (struct Tensor*) result_ptr; - size_t batch_dim = result->dims.dim_sizes[0]; size_t num_classes = result->dims.dim_sizes[1]; - float* data = (float*) result->host_data; + float *data = (float *)result->host_data; int num_errors = 0; printf("batch_dim = %lu, num_classes = %lu \n", batch_dim, num_classes); - - for(int i = 0; i < batch_dim; i++){ - + + for (int i = 0; i < batch_dim; i++) { + int chosen = 0; - for (int id = 1; id < num_classes; ++id){ - if (data[i * num_classes + chosen] < data[i * num_classes + id]) chosen = id; + for (int id = 1; id < num_classes; ++id) { + if (data[i * num_classes + chosen] < data[i * num_classes + id]) + chosen = id; } - - if(chosen != labels[i]) + + if (chosen != labels[i]) num_errors++; } float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; printf("****** Accuracy = %f \n\n", accuracy); - FILE* fp = fopen("final_accuracy", "w+"); - if(fp != NULL){ + FILE *fp = fopen("final_accuracy", "w+"); + if (fp != NULL) { std::ostringstream ss; ss << std::fixed << accuracy; std::string print_str = ss.str(); - + fwrite(print_str.c_str(), 1, print_str.length(), fp); } fclose(fp); - return accuracy; + return accuracy; } - - -struct ClassProb{ +struct ClassProb { float prob; int index; }; - -bool descendFloatComp(ClassProb obj1, ClassProb obj2){ +bool descendFloatComp(ClassProb obj1, ClassProb obj2) { return obj1.prob > obj2.prob; } +float computeTop5Accuracy(uint8_t *labels, int num_labels, void *result_ptr, + unsigned num_classes = 10) { + + struct Tensor *result = (struct Tensor *)result_ptr; -float computeTop5Accuracy(uint8_t* labels, int num_labels, - void* result_ptr, unsigned num_classes = 10){ - - struct Tensor* result = (struct Tensor*) result_ptr; - size_t batch_dim = result->dims.dim_sizes[0]; size_t channels = result->dims.dim_sizes[1]; - float* data = (float*) result->host_data; + float *data = (float *)result->host_data; int num_errors = 0; printf("batch_dim = %lu, channels = %lu \n", batch_dim, channels); - - for(int i = 0; i < num_labels; i++){ + + for (int i = 0; i < num_labels; i++) { std::vector<ClassProb> elem_probs; - for (int id = 0; id < num_classes; ++id){ + for (int id = 0; id < num_classes; ++id) { ClassProb cProb; cProb.prob = data[i * channels + id]; cProb.index = id; - elem_probs.push_back(cProb); + elem_probs.push_back(cProb); } - std:sort(elem_probs.begin(), elem_probs.end(), descendFloatComp); + std: + sort(elem_probs.begin(), elem_probs.end(), descendFloatComp); // Check if any of top-5 predictions matches bool matched = false; - for(int j = 0; j < 5; j++){ + for (int j = 0; j < 5; j++) { ClassProb cProb = elem_probs[j]; - if(cProb.index == labels[i]) + if (cProb.index == labels[i]) matched = true; } - if(!matched) - num_errors +=1; + if (!matched) + num_errors += 1; } float accuracy = ((batch_dim - num_errors) * 1.0 / batch_dim * 1.0) * 100.0; printf("****** Accuracy = %f \n\n", accuracy); - FILE* fp = fopen("final_accuracy", "w+"); - if(fp != NULL){ + FILE *fp = fopen("final_accuracy", "w+"); + if (fp != NULL) { std::ostringstream ss; ss << std::fixed << accuracy; std::string print_str = ss.str(); - + fwrite(print_str.c_str(), 1, print_str.length(), fp); } fclose(fp); - return accuracy; + return accuracy; } - - - -void dumpFinalAccuracy(float accuracy){ +void dumpFinalAccuracy(float accuracy) { printf("\n\n **** Final Accuracy = %f \n", accuracy); - - FILE* fp = fopen("final_accuracy", "w+"); - if(fp != NULL){ + + FILE *fp = fopen("final_accuracy", "w+"); + if (fp != NULL) { std::ostringstream ss; ss << std::fixed << accuracy; std::string print_str = ss.str(); - + fwrite(print_str.c_str(), 1, print_str.length(), fp); } @@ -690,44 +647,37 @@ void dumpFinalAccuracy(float accuracy){ run_accuracies.push_back(accuracy); } +void dumpAvgPSNR(float avg_psnr) { - -void dumpAvgPSNR(float avg_psnr){ - - FILE* fp = fopen("avg_psnr", "w+"); - if(fp != NULL){ + FILE *fp = fopen("avg_psnr", "w+"); + if (fp != NULL) { std::ostringstream ss; ss << std::fixed << avg_psnr; - std::string print_str = ss.str(); + std::string print_str = ss.str(); fwrite(print_str.c_str(), 1, print_str.length(), fp); } fclose(fp); } +void dumpPSNRStd(float psnr_std) { -void dumpPSNRStd(float psnr_std){ - - FILE* fp = fopen("psnr_std.txt", "w+"); - if(fp != NULL){ + FILE *fp = fopen("psnr_std.txt", "w+"); + if (fp != NULL) { std::ostringstream ss; ss << std::fixed << psnr_std; - std::string print_str = ss.str(); + std::string print_str = ss.str(); fwrite(print_str.c_str(), 1, print_str.length(), fp); } fclose(fp); } +void dumpExecutionAccuracies() { - - - -void dumpExecutionAccuracies(){ - - FILE* fp = fopen("run_accuracies.txt", "w+"); - if(fp != NULL){ - for (int i = 0; i < run_accuracies.size(); i++){ + FILE *fp = fopen("run_accuracies.txt", "w+"); + if (fp != NULL) { + for (int i = 0; i < run_accuracies.size(); i++) { float accuracy = run_accuracies[i]; std::ostringstream ss; ss << std::fixed << accuracy; @@ -735,63 +685,60 @@ void dumpExecutionAccuracies(){ fwrite(print_str.c_str(), 1, print_str.length(), fp); fwrite("\n", 1, 1, fp); } - } fclose(fp); } - -float readPSNRFromFile(const char* file_name){ +float readPSNRFromFile(const char *file_name) { float psnr; - FILE* pFile = fopen(file_name, "r"); - if(pFile == NULL){ + FILE *pFile = fopen(file_name, "r"); + if (pFile == NULL) { printf("ERROR: psnr.txt not found! \n"); abort(); } - + fscanf(pFile, "%f", &psnr); printf("**** PSNR read = %f \n\n", psnr); - return psnr; + return psnr; } +float computePSNRViolation(void *gold_ptr, void *approx_ptr, + float PSNR_threshold) { -float computePSNRViolation(void* gold_ptr, void* approx_ptr, float PSNR_threshold){ - - PSNR_threshold = readPSNRFromFile("psnr.txt"); std::vector<float> psnr_list; - - struct Tensor* gold_tensor = (struct Tensor*) gold_ptr; - struct Tensor* approx_tensor = (struct Tensor*) approx_ptr; - size_t* dim_sizes = gold_tensor->dims.dim_sizes; + struct Tensor *gold_tensor = (struct Tensor *)gold_ptr; + struct Tensor *approx_tensor = (struct Tensor *)approx_ptr; + + size_t *dim_sizes = gold_tensor->dims.dim_sizes; size_t batch_dim = dim_sizes[0]; size_t image_size = dim_sizes[1] * dim_sizes[2] * dim_sizes[3]; - + printf("batch_dim = %lu, image_size = %lu \n", batch_dim, image_size); - - float* gold_data = (float*) gold_tensor->host_data; - float* approx_data = (float*) approx_tensor->host_data; - FILE* fp = fopen("img_psnr.txt", "w+"); + float *gold_data = (float *)gold_tensor->host_data; + float *approx_data = (float *)approx_tensor->host_data; + + FILE *fp = fopen("img_psnr.txt", "w+"); float sum_psnr = 0.0; - int num_errors = 0; - for(size_t i = 0; i < batch_dim; i++){ + int num_errors = 0; + for (size_t i = 0; i < batch_dim; i++) { float mse_sum = 0.0; - float max_val = -999999; + float max_val = -999999; size_t offset = i * image_size; - - for(size_t j = 0; j < image_size; j++){ + + for (size_t j = 0; j < image_size; j++) { float diff = gold_data[offset + j] - approx_data[offset + j]; float diff_square = diff * diff; mse_sum += diff_square; - if(max_val < gold_data[offset + j]){ - max_val = gold_data[offset + j]; - } + if (max_val < gold_data[offset + j]) { + max_val = gold_data[offset + j]; + } } mse_sum = mse_sum / image_size; @@ -799,7 +746,7 @@ float computePSNRViolation(void* gold_ptr, void* approx_ptr, float PSNR_threshol sum_psnr += psnr; if (psnr < PSNR_threshold) - num_errors += 1; + num_errors += 1; printf("PSNR value = %f \n", psnr); psnr_list.push_back(psnr); @@ -817,39 +764,35 @@ float computePSNRViolation(void* gold_ptr, void* approx_ptr, float PSNR_threshol float avg_psnr = sum_psnr / batch_dim; printf("*** avg_psnr = %f \n\n", avg_psnr); dumpAvgPSNR(avg_psnr); - + float success_rate = 100.0 - violation_rate; dumpFinalAccuracy(success_rate); fclose(fp); - float var = 0.0; - for(size_t i = 0; i < batch_dim; i++){ - var = var + (psnr_list[i] - avg_psnr) * (psnr_list[i] - avg_psnr); + for (size_t i = 0; i < batch_dim; i++) { + var = var + (psnr_list[i] - avg_psnr) * (psnr_list[i] - avg_psnr); } var /= batch_dim; float std = sqrt(var); dumpPSNRStd(std); - - return violation_rate; -} + return violation_rate; +} -void dumpOutput(void* output_ptr, const char* file_name){ +void dumpOutput(void *output_ptr, const char *file_name) { - struct Tensor* out_tensor = (struct Tensor*) output_ptr; + struct Tensor *out_tensor = (struct Tensor *)output_ptr; size_t size_in_bytes = out_tensor->size_in_bytes; - printf ("** Output size = %lu \n", size_in_bytes); - - float* host_data = (float*) out_tensor->host_data; - FILE* fd = fopen(file_name, "w+"); + printf("** Output size = %lu \n", size_in_bytes); + + float *host_data = (float *)out_tensor->host_data; + FILE *fd = fopen(file_name, "w+"); fwrite(host_data, 1, size_in_bytes, fd); fclose(fd); } - - #endif -- GitLab