From 86323b34ee0793c288b058c2fef358f248e9ca40 Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Tue, 5 Nov 2019 15:26:16 -0600 Subject: [PATCH] Benchmark I/O --- .../benchmarks/canny_test/Makefile | 3 +- .../benchmarks/canny_test/src/canny_test.cpp | 304 +++++++++--------- 2 files changed, 150 insertions(+), 157 deletions(-) diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile index 3287830462..4b9b5baedd 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile @@ -24,8 +24,7 @@ LINKER_FLAGS = -L $(CUDA_TOOLKIT_ROOT_DIR)/lib64 -lpthread -lcudart -lcurand -lc HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib -# FIXME: -CONF_FILE_PATH= +CONF_FILE_PATH=$(realpath data/tuner_confs.txt) # NOTE: Needs proper handling in the WRAPPER backend because Quant range not needed for IMAGE Benchmarks WRAPPER_API_QUANT_FILE_PATH= diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp index 5f254f5e95..eb25c085e2 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp @@ -1,233 +1,227 @@ +#include <cstring> +#include <fcntl.h> #include <stdio.h> #include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> #include <sys/stat.h> -#include <cstring> -#include <visc.h> #include <tensorTypes.h> #include <tensorUtils.h> - -// __device__ float avg(float x) { -// return __fdividef(x, 3.0f); -// } - -// __device__ void *avg_ptr = (void *) avg; +#include <unistd.h> +#include <visc.h> /* 0. Grayscale */ -void var_0_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::GPU_TARGET); - __visc__attributes(1, t1, 0); +void var_0_node(void *t1, size_t bytes_t1) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(1, t1, 0); - void *r = __visc__tensor_reduce(t1, 1, nullptr); //device::fadd_ptrptr); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_reduce(t1, 1, device::fadd_ptrptr); + __visc__return(2, r, (size_t)0); } -void var_1_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::GPU_TARGET); - __visc__attributes(1, t1, 0); +void var_1_node(void *t1, size_t bytes_t1) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(1, t1, 0); - void *r = __visc__tensor_map1(nullptr, t1); // (void *) &avg_ptr - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_map1(device::favg3_ptrptr, t1); + __visc__return(2, r, (size_t)0); } /* 1. Denoise */ -void var_2_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_2_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); + __visc__return(2, r, (size_t)0); } /* 2. Get edge gradient / direction */ -void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_3_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); + __visc__return(2, r, (size_t)0); } -void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_4_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); + __visc__return(2, r, (size_t)0); } -void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_5_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_map2(nullptr, t1, t2); // device::fhypot_ptrptr - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_map2(device::fhypot_ptrptr, t1, t2); + __visc__return(2, r, (size_t)0); } /* 3. Normalize grad magnitude */ -void var_6_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::GPU_TARGET); - __visc__attributes(1, t1, 0); +void var_6_node(void *t1, size_t bytes_t1) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(1, t1, 0); - void *r = __visc__tensor_reduce(t1, 2, nullptr); // device::fmax_ptrptr - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_reduce(t1, 2, device::fmax_ptrptr); + __visc__return(2, r, (size_t)0); } -void var_7_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::GPU_TARGET); - __visc__attributes(1, t1, 0); +void var_7_node(void *t1, size_t bytes_t1) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(1, t1, 0); - void *r = __visc__tensor_reduce(t1, 3, nullptr); // device::fmax_ptrptr - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_reduce(t1, 3, device::fmax_ptrptr); + __visc__return(2, r, (size_t)0); } -void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::PROMISE_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_8_node(void *t1, size_t bytes_t1, void *t2, size_t bytes_t2) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_map2(nullptr, t1, t2); // device::fdiv_ptrptr - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_map2(device::fdiv_ptrptr, t1, t2); + __visc__return(2, r, (size_t)0); } void root( - void* input, size_t input_bytes, - void* gaussian, size_t gaussian_bytes, - void* sobel_x, size_t sobel_x_bytes, - void* sobel_y, size_t sobel_y_bytes -){ - __visc__hint(visc::CPU_TARGET); - __visc__attributes(8, input, input_bytes, gaussian, gaussian_bytes, sobel_x, sobel_x_bytes, sobel_y, sobel_y_bytes, 0); + void *input, size_t input_bytes, void *gaussian, size_t gaussian_bytes, + void *sobel_x, size_t sobel_x_bytes, void *sobel_y, size_t sobel_y_bytes) { + __visc__hint(visc::CPU_TARGET); + __visc__attributes( + 8, input, input_bytes, gaussian, gaussian_bytes, sobel_x, sobel_x_bytes, + sobel_y, sobel_y_bytes, 0); - void* var_0 = __visc__createNodeND(0, var_0_node); + void *var_0 = __visc__createNodeND(0, var_0_node); - __visc__bindIn(var_0, 0, 0, 0); - __visc__bindIn(var_0, 1, 1, 0); + __visc__bindIn(var_0, 0, 0, 0); + __visc__bindIn(var_0, 1, 1, 0); - void* var_1 = __visc__createNodeND(0, var_1_node); + void *var_1 = __visc__createNodeND(0, var_1_node); - __visc__edge(var_0, var_1, 1, 0, 0, 0); - __visc__edge(var_0, var_1, 1, 1, 1, 0); + __visc__edge(var_0, var_1, 1, 0, 0, 0); + __visc__edge(var_0, var_1, 1, 1, 1, 0); - void* var_2 = __visc__createNodeND(0, var_2_node); + void *var_2 = __visc__createNodeND(0, var_2_node); - __visc__edge(var_1, var_2, 1, 0, 0, 0); - __visc__edge(var_1, var_2, 1, 1, 1, 0); - __visc__bindIn(var_2, 2, 2, 0); - __visc__bindIn(var_2, 3, 3, 0); + __visc__edge(var_1, var_2, 1, 0, 0, 0); + __visc__edge(var_1, var_2, 1, 1, 1, 0); + __visc__bindIn(var_2, 2, 2, 0); + __visc__bindIn(var_2, 3, 3, 0); - void* var_3 = __visc__createNodeND(0, var_3_node); + void *var_3 = __visc__createNodeND(0, var_3_node); - __visc__edge(var_2, var_3, 1, 0, 0, 0); - __visc__edge(var_2, var_3, 1, 1, 1, 0); - __visc__bindIn(var_3, 4, 2, 0); - __visc__bindIn(var_3, 5, 3, 0); + __visc__edge(var_2, var_3, 1, 0, 0, 0); + __visc__edge(var_2, var_3, 1, 1, 1, 0); + __visc__bindIn(var_3, 4, 2, 0); + __visc__bindIn(var_3, 5, 3, 0); - void* var_4 = __visc__createNodeND(0, var_4_node); + void *var_4 = __visc__createNodeND(0, var_4_node); - __visc__edge(var_2, var_4, 1, 0, 0, 0); - __visc__edge(var_2, var_4, 1, 1, 1, 0); - __visc__bindIn(var_4, 6, 2, 0); - __visc__bindIn(var_4, 7, 3, 0); + __visc__edge(var_2, var_4, 1, 0, 0, 0); + __visc__edge(var_2, var_4, 1, 1, 1, 0); + __visc__bindIn(var_4, 6, 2, 0); + __visc__bindIn(var_4, 7, 3, 0); - void* var_5 = __visc__createNodeND(0, var_5_node); + void *var_5 = __visc__createNodeND(0, var_5_node); - __visc__edge(var_3, var_5, 1, 0, 0, 0); - __visc__edge(var_3, var_5, 1, 1, 1, 0); - __visc__edge(var_4, var_5, 1, 0, 2, 0); - __visc__edge(var_4, var_5, 1, 1, 3, 0); + __visc__edge(var_3, var_5, 1, 0, 0, 0); + __visc__edge(var_3, var_5, 1, 1, 1, 0); + __visc__edge(var_4, var_5, 1, 0, 2, 0); + __visc__edge(var_4, var_5, 1, 1, 3, 0); - void* var_6 = __visc__createNodeND(0, var_6_node); + void *var_6 = __visc__createNodeND(0, var_6_node); - __visc__edge(var_5, var_6, 1, 0, 0, 0); - __visc__edge(var_5, var_6, 1, 1, 1, 0); + __visc__edge(var_5, var_6, 1, 0, 0, 0); + __visc__edge(var_5, var_6, 1, 1, 1, 0); - void* var_7 = __visc__createNodeND(0, var_7_node); + void *var_7 = __visc__createNodeND(0, var_7_node); - __visc__edge(var_6, var_7, 1, 0, 0, 0); - __visc__edge(var_6, var_7, 1, 1, 1, 0); + __visc__edge(var_6, var_7, 1, 0, 0, 0); + __visc__edge(var_6, var_7, 1, 1, 1, 0); - void* var_8 = __visc__createNodeND(0, var_8_node); + void *var_8 = __visc__createNodeND(0, var_8_node); - __visc__edge(var_5, var_8, 1, 0, 0, 0); - __visc__edge(var_5, var_8, 1, 1, 1, 0); - __visc__edge(var_7, var_8, 1, 0, 2, 0); - __visc__edge(var_7, var_8, 1, 1, 3, 0); + __visc__edge(var_5, var_8, 1, 0, 0, 0); + __visc__edge(var_5, var_8, 1, 1, 1, 0); + __visc__edge(var_7, var_8, 1, 0, 2, 0); + __visc__edge(var_7, var_8, 1, 1, 3, 0); - __visc__bindOut(var_8, 0, 0, 0); - __visc__bindOut(var_8, 1, 1, 0); + __visc__bindOut(var_8, 0, 0, 0); + __visc__bindOut(var_8, 1, 1, 0); } struct ret_t { - void* tensor; - size_t bytes; + void *tensor; + size_t bytes; }; struct __attribute__((__packed__)) RootIn { - void* input; - size_t input_bytes; - void* gaussian; - size_t gaussian_bytes; - void* sobel_x; - size_t sobel_x_bytes; - void* sobel_y; - size_t sobel_y_bytes; - struct ret_t r; + void *input; + size_t input_bytes; + void *gaussian; + size_t gaussian_bytes; + void *sobel_x; + size_t sobel_x_bytes; + void *sobel_y; + size_t sobel_y_bytes; + struct ret_t r; }; -// Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { -// int64_t m = (w - 1) / 2, n = (h - 1) / 2; -// auto *data = new float[w * h]; -// float sum = 0.0f; -// for (int64_t i = -m; i <= m; i++) -// for (int64_t j = -n; j <= n; j++) { -// size_t idx = (i + m) * h + (j + n); -// float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); -// data[idx] = exp(exponent); -// sum += data[idx]; -// } -// if (sum != 0.0f) -// for (size_t i = 0; i < w * h; i++) -// data[i] /= sum; -// return (Tensor *) createFilterFromData(float_, data, w, h, n_chan); -// } - -// std::pair<Tensor *, Tensor *> getSobelKernels() { -// std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); -// std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); -// auto *t1 = (Tensor *) createFilterFromData(float_, k1.data(), 3, 3, 1); -// auto *t2 = (Tensor *) createFilterFromData(float_, k2.data(), 3, 3, 1); -// return std::make_pair(t1, t2); -// } +Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { + int64_t m = (w - 1) / 2, n = (h - 1) / 2; + auto *data = new float[w * h]; + float sum = 0.0f; + for (int64_t i = -m; i <= m; i++) + for (int64_t j = -n; j <= n; j++) { + size_t idx = (i + m) * h + (j + n); + float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); + data[idx] = exp(exponent); + sum += data[idx]; + } + if (sum != 0.0f) + for (size_t i = 0; i < w * h; i++) + data[i] /= sum; + return (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, data, w, h, n_chan); +} + +std::pair<Tensor *, Tensor *> getSobelKernels() { + std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); + std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); + auto *t1 = + (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k1.data(), 3, 3, 1); + auto *t2 = + (Tensor *)createFilterFromData(CUDNN_DATA_FLOAT, k2.data(), 3, 3, 1); + return std::make_pair(t1, t2); +} int main(int argc, char *argv[]) { - if (argc < 3) - return 0; - std::string output_path = argv[2]; - // Tensor *image = readDataSet(argv[1]); - // Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1); - // Tensor *sobel_x, *sobel_y; - // std::tie(sobel_x, sobel_y) = getSobelKernels(); - Tensor *image = nullptr, *gaussian = nullptr, *sobel_x = nullptr, *sobel_y = nullptr; + if (argc < 3) + return 0; + Tensor *image = readDataSet(argv[1]); + Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1); + Tensor *sobel_x = nullptr, *sobel_y = nullptr; + std::tie(sobel_x, sobel_y) = getSobelKernels(); - __visc__init(); - RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - *args = RootIn{image, 0, gaussian, 0, sobel_x, 0, sobel_y, 0}; + __visc__init(); + RootIn *args = static_cast<RootIn *>(malloc(sizeof(RootIn))); + *args = RootIn{image, 0, gaussian, 0, sobel_x, 0, sobel_y, 0}; - void* dfg = __visc__launch(0, root, (void*) args); + void *dfg = __visc__launch(0, root, (void *)args); - __visc__wait(dfg); + __visc__wait(dfg); - void *result = static_cast<RootIn*>(args)->input; - hpvm_request_tensor(result, 0); + void *result = static_cast<RootIn *>(args)->input; + hpvm_request_tensor(result, 0); - __visc__cleanup(); - // saveDataSet(output_path, (Tensor *) result); - // computeAccuracy2(labels, 10000, result); - return 0; + __visc__cleanup(); + saveDataSet(argv[2], "", (Tensor *)result); + + // computeAccuracy2(labels, 10000, result); + return 0; } -- GitLab