From 956206846f0555ebd7894365bd82bcd42e591612 Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Fri, 1 Nov 2019 14:24:41 -0500 Subject: [PATCH] Canny benchmark, testing usage --- .../benchmarks/canny_test/Makefile | 59 +++++ .../benchmarks/canny_test/bin/setup_paths.sh | 14 ++ .../benchmarks/canny_test/data/promise_flags | 5 + .../canny_test/data/quant_ranges.txt | 5 + .../canny_test/data/quant_ranges_rt.txt | 6 + .../canny_test/data/tuner_confs.txt | 18 ++ .../benchmarks/canny_test/src/canny_test.cpp | 233 ++++++++++++++++++ 7 files changed, 340 insertions(+) create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/bin/setup_paths.sh create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/promise_flags create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/quant_ranges.txt create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/quant_ranges_rt.txt create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/tuner_confs.txt create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile new file mode 100644 index 0000000000..b022f2dc73 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/Makefile @@ -0,0 +1,59 @@ +# NOTE: $LLVM_SRC_ROOT and $HPVM_BUILD_ROOT have to be set +DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks +HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/build + +CC = $(HPVM_BUILD_DIR)/bin/clang++ +OPT = $(HPVM_BUILD_DIR)/bin/opt +LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis +LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link +LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include + +SRC_DIR = src +BUILD_DIR = build +# NOTE: Change to the name of your benchmark +APP = canny_test + +TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include +TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include +TENSOR_LIB_PATH = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a + +CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH) -fno-exceptions -ffast-math -std=c++11 -O3 +CCFLAGS += -DDEVICE=CUDNN_TARGET +LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL + +HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib + + +# FIXME: +CONF_FILE_PATH= +# NOTE: Needs proper handling in the WRAPPER backend because Quant range not needed for IMAGE Benchmarks +WRAPPER_API_QUANT_FILE_PATH= + + +VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_WrapperAPI.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMFuseHPVMTensorNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -hpvm-fuse -dfg2llvm-wrapperapi -quantization-levels-filename=$(WRAPPER_API_QUANT_FILE_PATH) -configuration-inputs-filename=$(CONF_FILE_PATH) -dfg2llvm-x86 -clearDFG + + + +TARGET = $(BUILD_DIR)/$(APP).opt.bc +SOURCES = $(SRC_DIR)/$(APP).cpp +VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll + +#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll) +.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll +default: $(BUILD_DIR) $(TARGET) + + +$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp + $(CC) $(CC_FLAGS) -emit-llvm src/$(APP).cpp -c -o $(BUILD_DIR)/$(APP).ll + +$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.ll + $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP).ll -S -o $(BUILD_DIR)/$(APP).visc.ll + $(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP).visc.ll -o $(BUILD_DIR)/$(APP)_wrapper.bc + $(LLVM_LINK) $(BUILD_DIR)/$(APP)_wrapper.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_wrapper_linked.bc + $(CC) $(BUILD_DIR)/$(APP)_wrapper_linked.bc $(TENSOR_LIB_PATH) -o $(BUILD_DIR)/$(APP)_final $(LINKER_FLAGS) + +$(BUILD_DIR): + mkdir -p $@ + +clean: + rm -rf $(BUILD_DIR) diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/bin/setup_paths.sh b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/bin/setup_paths.sh new file mode 100644 index 0000000000..3548f182f1 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/bin/setup_paths.sh @@ -0,0 +1,14 @@ +#!/bin/bash + +# CUDNN Path setup +module load cuda-toolkit/9.1 +export CUDA_INCLUDE_PATH=/software/cuda-9.1/include +export CUDNN_PATH=/software/cuda-9.1/lib64/ +export LIBRARY_PATH=/software/cuda-9.1/lib64/:$LIBRARY_PATH +export LD_LIBRARY_PATH=/software/cuda-9.1/lib64/:$LD_LIBRARY_PATH + +# HPVM Path setup +export CPATH=$CPATH:/home/hsharif3/anaconda2/include/ +export PATH=/home/hsharif3/Gitlab/hpvm/build/bin/:$PATH +export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/ +export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/ diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/promise_flags b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/promise_flags new file mode 100644 index 0000000000..4926d75080 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/promise_flags @@ -0,0 +1,5 @@ +7 +7 +7 +7 +7 \ No newline at end of file diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/quant_ranges.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/quant_ranges.txt new file mode 100644 index 0000000000..ad8fa364ab --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/quant_ranges.txt @@ -0,0 +1,5 @@ +0.0 1.0 -0.3 0.3 -0.041063767 0.031912163 0.0 1.5512946 +0.0 1.5512946 -0.15580177 0.1533 -0.041385915 0.05869476 0.0 4.916329 +0.0 4.916329 -0.20324017 0.18275258 -0.039915435 0.04589232 0.0 9.447418 +0.0 9.447418 -0.10757191 0.123126 -0.025070198 0.027000334 0.0 9.926857 +0.0 9.926857 -0.18867673 0.16425411 -0.012622595 0.04586973 0.0 42.018578 diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/quant_ranges_rt.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/quant_ranges_rt.txt new file mode 100644 index 0000000000..535066863d --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/quant_ranges_rt.txt @@ -0,0 +1,6 @@ +1 0.0 1.0 -0.3 0.3 -0.041063767 0.031912163 0.0 1.5512946 +2 0.0 1.5512946 -0.15580177 0.1533 -0.041385915 0.05869476 0.0 4.916329 +3 0.0 4.916329 -0.20324017 0.18275258 -0.039915435 0.04589232 0.0 9.447418 +4 0.0 9.447418 -0.10757191 0.123126 -0.025070198 0.027000334 0.0 9.926857 +5 0.0 9.926857 -0.18867673 0.16425411 -0.012622595 0.04586973 0.0 42.018578 +6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/tuner_confs.txt b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/tuner_confs.txt new file mode 100644 index 0000000000..6d896a39c1 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/data/tuner_confs.txt @@ -0,0 +1,18 @@ ++++++ +conf1 1.5 90 +1 promise swing_level 5 +2 promise swing_level 7 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu mul fp16 1 add fp32 1 relu fp32 1 +5 gpu mul fp16 1 add fp16 1 relu fp16 1 +6 gpu softmax fp32 1 +----- ++++++ +conf1 1.5 90 +1 promise swing_level 5 +2 gpu conv fp16 2 add fp16 1 relu fp16 1 +3 gpu conv fp16 1 add fp16 1 relu fp16 1 +4 gpu mul fp16 1 add fp32 1 relu fp32 1 +5 gpu mul fp16 1 add fp16 1 relu fp16 1 +6 gpu softmax fp32 1 +----- diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp new file mode 100644 index 0000000000..b4e3db13da --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/canny_test/src/canny_test.cpp @@ -0,0 +1,233 @@ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <cstring> +#include <visc.h> +#include <tensorTypes.h> +#include <tensorUtils.h> + +// __device__ float avg(float x) { +// return __fdividef(x, 3.0f); +// } + +// __device__ void *avg_ptr = (void *) avg; + +/* 0. Grayscale */ + +void var_0_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(1, t1, 0); + + void *r = __visc__tensor_reduce(t1, 1, nullptr); //device::fadd_ptrptr); + __visc__return(2, r, (size_t) 0); +} + +void var_1_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(1, t1, 0); + + void *r = __visc__tensor_map1(nullptr, t1); // (void *) &avg_ptr + __visc__return(2, r, (size_t) 0); +} + +/* 1. Denoise */ + +void var_2_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1, 1, 0); + __visc__return(2, r, (size_t) 0); +} + +/* 2. Get edge gradient / direction */ + +void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1, 1, 0); + __visc__return(2, r, (size_t) 0); +} + +void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1, 1, 0); + __visc__return(2, r, (size_t) 0); +} + +void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_map2(nullptr, t1, t2); // device::fhypot_ptrptr + __visc__return(2, r, (size_t) 0); +} + +/* 3. Normalize grad magnitude */ + +void var_6_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(1, t1, 0); + + void *r = __visc__tensor_reduce(t1, 2, nullptr); // device::fmax_ptrptr + __visc__return(2, r, (size_t) 0); +} + +void var_7_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::GPU_TARGET); + __visc__attributes(1, t1, 0); + + void *r = __visc__tensor_reduce(t1, 3, nullptr); // device::fmax_ptrptr + __visc__return(2, r, (size_t) 0); +} + +void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::PROMISE_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_map2(nullptr, t1, t2); // device::fdiv_ptrptr + __visc__return(2, r, (size_t) 0); +} + +void root( + void* input, size_t input_bytes, + void* gaussian, size_t gaussian_bytes, + void* sobel_x, size_t sobel_x_bytes, + void* sobel_y, size_t sobel_y_bytes +){ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(8, input, input_bytes, gaussian, gaussian_bytes, sobel_x, sobel_x_bytes, sobel_y, sobel_y_bytes, 0); + + void* var_0 = __visc__createNodeND(0, var_0_node); + + __visc__bindIn(var_0, 0, 0, 0); + __visc__bindIn(var_0, 1, 1, 0); + + void* var_1 = __visc__createNodeND(0, var_1_node); + + __visc__edge(var_0, var_1, 1, 0, 0, 0); + __visc__edge(var_0, var_1, 1, 1, 1, 0); + + void* var_2 = __visc__createNodeND(0, var_2_node); + + __visc__edge(var_1, var_2, 1, 0, 0, 0); + __visc__edge(var_1, var_2, 1, 1, 1, 0); + __visc__bindIn(var_2, 2, 2, 0); + __visc__bindIn(var_2, 3, 3, 0); + + void* var_3 = __visc__createNodeND(0, var_3_node); + + __visc__edge(var_2, var_3, 1, 0, 0, 0); + __visc__edge(var_2, var_3, 1, 1, 1, 0); + __visc__bindIn(var_3, 4, 2, 0); + __visc__bindIn(var_3, 5, 3, 0); + + void* var_4 = __visc__createNodeND(0, var_4_node); + + __visc__edge(var_2, var_4, 1, 0, 0, 0); + __visc__edge(var_2, var_4, 1, 1, 1, 0); + __visc__bindIn(var_4, 6, 2, 0); + __visc__bindIn(var_4, 7, 3, 0); + + void* var_5 = __visc__createNodeND(0, var_5_node); + + __visc__edge(var_3, var_5, 1, 0, 0, 0); + __visc__edge(var_3, var_5, 1, 1, 1, 0); + __visc__edge(var_4, var_5, 1, 0, 2, 0); + __visc__edge(var_4, var_5, 1, 1, 3, 0); + + void* var_6 = __visc__createNodeND(0, var_6_node); + + __visc__edge(var_5, var_6, 1, 0, 0, 0); + __visc__edge(var_5, var_6, 1, 1, 1, 0); + + void* var_7 = __visc__createNodeND(0, var_7_node); + + __visc__edge(var_6, var_7, 1, 0, 0, 0); + __visc__edge(var_6, var_7, 1, 1, 1, 0); + + void* var_8 = __visc__createNodeND(0, var_8_node); + + __visc__edge(var_5, var_8, 1, 0, 0, 0); + __visc__edge(var_5, var_8, 1, 1, 1, 0); + __visc__edge(var_7, var_8, 1, 0, 2, 0); + __visc__edge(var_7, var_8, 1, 1, 3, 0); + + __visc__bindOut(var_8, 0, 0, 0); + __visc__bindOut(var_8, 1, 1, 0); +} + +struct ret_t { + void* tensor; + size_t bytes; +}; + +struct __attribute__((__packed__)) RootIn { + void* input; + size_t input_bytes; + void* gaussian; + size_t gaussian_bytes; + void* sobel_x; + size_t sobel_x_bytes; + void* sobel_y; + size_t sobel_y_bytes; + struct ret_t r; +}; + +// Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) { +// int64_t m = (w - 1) / 2, n = (h - 1) / 2; +// auto *data = new float[w * h]; +// float sum = 0.0f; +// for (int64_t i = -m; i <= m; i++) +// for (int64_t j = -n; j <= n; j++) { +// size_t idx = (i + m) * h + (j + n); +// float exponent = -(i * i + j * j) / (2.0 * sigma * sigma); +// data[idx] = exp(exponent); +// sum += data[idx]; +// } +// if (sum != 0.0f) +// for (size_t i = 0; i < w * h; i++) +// data[i] /= sum; +// return (Tensor *) createFilterFromData(float_, data, w, h, n_chan); +// } + +// std::pair<Tensor *, Tensor *> getSobelKernels() { +// std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1}); +// std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1}); +// auto *t1 = (Tensor *) createFilterFromData(float_, k1.data(), 3, 3, 1); +// auto *t2 = (Tensor *) createFilterFromData(float_, k2.data(), 3, 3, 1); +// return std::make_pair(t1, t2); +// } + +int main(int argc, char *argv[]) { + if (argc < 3) + return 0; + std::string output_path = argv[2]; + // Tensor *image = readDataSet(argv[1]); + // Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1); + // Tensor *sobel_x, *sobel_y; + // std::tie(sobel_x, sobel_y) = getSobelKernels(); + Tensor *image = nullptr, *gaussian = nullptr, *sobel_x = nullptr, *sobel_y = nullptr; + + __visc__init(); + RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); + *args = RootIn{image, 0, gaussian, 0, sobel_x, 0, sobel_y, 0}; + + void* dfg = __visc__launch(0, root, (void*) args); + + __visc__wait(dfg); + + void *result = static_cast<RootIn*>(args)->input; + hpvm_request_tensor(result, 0); + + __visc__cleanup(); + // saveDataSet(output_path, (Tensor *) result); + // computeAccuracy2(labels, 10000, result); + return 0; +} -- GitLab