Skip to content
Snippets Groups Projects
Commit 95620684 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Canny benchmark, testing usage

parent 7f358651
No related branches found
No related tags found
No related merge requests found
# NOTE: $LLVM_SRC_ROOT and $HPVM_BUILD_ROOT have to be set
DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks
HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/build
CC = $(HPVM_BUILD_DIR)/bin/clang++
OPT = $(HPVM_BUILD_DIR)/bin/opt
LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis
LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link
LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include
SRC_DIR = src
BUILD_DIR = build
# NOTE: Change to the name of your benchmark
APP = canny_test
TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include
TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include
TENSOR_LIB_PATH = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a
CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH) -fno-exceptions -ffast-math -std=c++11 -O3
CCFLAGS += -DDEVICE=CUDNN_TARGET
LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL
HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib
# FIXME:
CONF_FILE_PATH=
# NOTE: Needs proper handling in the WRAPPER backend because Quant range not needed for IMAGE Benchmarks
WRAPPER_API_QUANT_FILE_PATH=
VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_WrapperAPI.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMFuseHPVMTensorNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -hpvm-fuse -dfg2llvm-wrapperapi -quantization-levels-filename=$(WRAPPER_API_QUANT_FILE_PATH) -configuration-inputs-filename=$(CONF_FILE_PATH) -dfg2llvm-x86 -clearDFG
TARGET = $(BUILD_DIR)/$(APP).opt.bc
SOURCES = $(SRC_DIR)/$(APP).cpp
VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll
#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll)
.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll
default: $(BUILD_DIR) $(TARGET)
$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp
$(CC) $(CC_FLAGS) -emit-llvm src/$(APP).cpp -c -o $(BUILD_DIR)/$(APP).ll
$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.ll
$(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP).ll -S -o $(BUILD_DIR)/$(APP).visc.ll
$(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP).visc.ll -o $(BUILD_DIR)/$(APP)_wrapper.bc
$(LLVM_LINK) $(BUILD_DIR)/$(APP)_wrapper.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_wrapper_linked.bc
$(CC) $(BUILD_DIR)/$(APP)_wrapper_linked.bc $(TENSOR_LIB_PATH) -o $(BUILD_DIR)/$(APP)_final $(LINKER_FLAGS)
$(BUILD_DIR):
mkdir -p $@
clean:
rm -rf $(BUILD_DIR)
#!/bin/bash
# CUDNN Path setup
module load cuda-toolkit/9.1
export CUDA_INCLUDE_PATH=/software/cuda-9.1/include
export CUDNN_PATH=/software/cuda-9.1/lib64/
export LIBRARY_PATH=/software/cuda-9.1/lib64/:$LIBRARY_PATH
export LD_LIBRARY_PATH=/software/cuda-9.1/lib64/:$LD_LIBRARY_PATH
# HPVM Path setup
export CPATH=$CPATH:/home/hsharif3/anaconda2/include/
export PATH=/home/hsharif3/Gitlab/hpvm/build/bin/:$PATH
export LLVM_BUILD_ROOT=/home/hsharif3/Gitlab/hpvm/build/
export LLVM_SRC_ROOT=/home/hsharif3/Gitlab/hpvm/llvm/
7
7
7
7
7
\ No newline at end of file
0.0 1.0 -0.3 0.3 -0.041063767 0.031912163 0.0 1.5512946
0.0 1.5512946 -0.15580177 0.1533 -0.041385915 0.05869476 0.0 4.916329
0.0 4.916329 -0.20324017 0.18275258 -0.039915435 0.04589232 0.0 9.447418
0.0 9.447418 -0.10757191 0.123126 -0.025070198 0.027000334 0.0 9.926857
0.0 9.926857 -0.18867673 0.16425411 -0.012622595 0.04586973 0.0 42.018578
1 0.0 1.0 -0.3 0.3 -0.041063767 0.031912163 0.0 1.5512946
2 0.0 1.5512946 -0.15580177 0.1533 -0.041385915 0.05869476 0.0 4.916329
3 0.0 4.916329 -0.20324017 0.18275258 -0.039915435 0.04589232 0.0 9.447418
4 0.0 9.447418 -0.10757191 0.123126 -0.025070198 0.027000334 0.0 9.926857
5 0.0 9.926857 -0.18867673 0.16425411 -0.012622595 0.04586973 0.0 42.018578
6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
+++++
conf1 1.5 90
1 promise swing_level 5
2 promise swing_level 7
3 gpu conv fp16 1 add fp16 1 relu fp16 1
4 gpu mul fp16 1 add fp32 1 relu fp32 1
5 gpu mul fp16 1 add fp16 1 relu fp16 1
6 gpu softmax fp32 1
-----
+++++
conf1 1.5 90
1 promise swing_level 5
2 gpu conv fp16 2 add fp16 1 relu fp16 1
3 gpu conv fp16 1 add fp16 1 relu fp16 1
4 gpu mul fp16 1 add fp32 1 relu fp32 1
5 gpu mul fp16 1 add fp16 1 relu fp16 1
6 gpu softmax fp32 1
-----
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <cstring>
#include <visc.h>
#include <tensorTypes.h>
#include <tensorUtils.h>
// __device__ float avg(float x) {
// return __fdividef(x, 3.0f);
// }
// __device__ void *avg_ptr = (void *) avg;
/* 0. Grayscale */
void var_0_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::GPU_TARGET);
__visc__attributes(1, t1, 0);
void *r = __visc__tensor_reduce(t1, 1, nullptr); //device::fadd_ptrptr);
__visc__return(2, r, (size_t) 0);
}
void var_1_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::GPU_TARGET);
__visc__attributes(1, t1, 0);
void *r = __visc__tensor_map1(nullptr, t1); // (void *) &avg_ptr
__visc__return(2, r, (size_t) 0);
}
/* 1. Denoise */
void var_2_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1, 1, 0);
__visc__return(2, r, (size_t) 0);
}
/* 2. Get edge gradient / direction */
void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1, 1, 0);
__visc__return(2, r, (size_t) 0);
}
void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void* r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1, 1, 0);
__visc__return(2, r, (size_t) 0);
}
void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void* r = __visc__tensor_map2(nullptr, t1, t2); // device::fhypot_ptrptr
__visc__return(2, r, (size_t) 0);
}
/* 3. Normalize grad magnitude */
void var_6_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::GPU_TARGET);
__visc__attributes(1, t1, 0);
void *r = __visc__tensor_reduce(t1, 2, nullptr); // device::fmax_ptrptr
__visc__return(2, r, (size_t) 0);
}
void var_7_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::GPU_TARGET);
__visc__attributes(1, t1, 0);
void *r = __visc__tensor_reduce(t1, 3, nullptr); // device::fmax_ptrptr
__visc__return(2, r, (size_t) 0);
}
void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::PROMISE_TARGET);
__visc__attributes(2, t1, t2, 0);
void* r = __visc__tensor_map2(nullptr, t1, t2); // device::fdiv_ptrptr
__visc__return(2, r, (size_t) 0);
}
void root(
void* input, size_t input_bytes,
void* gaussian, size_t gaussian_bytes,
void* sobel_x, size_t sobel_x_bytes,
void* sobel_y, size_t sobel_y_bytes
){
__visc__hint(visc::CPU_TARGET);
__visc__attributes(8, input, input_bytes, gaussian, gaussian_bytes, sobel_x, sobel_x_bytes, sobel_y, sobel_y_bytes, 0);
void* var_0 = __visc__createNodeND(0, var_0_node);
__visc__bindIn(var_0, 0, 0, 0);
__visc__bindIn(var_0, 1, 1, 0);
void* var_1 = __visc__createNodeND(0, var_1_node);
__visc__edge(var_0, var_1, 1, 0, 0, 0);
__visc__edge(var_0, var_1, 1, 1, 1, 0);
void* var_2 = __visc__createNodeND(0, var_2_node);
__visc__edge(var_1, var_2, 1, 0, 0, 0);
__visc__edge(var_1, var_2, 1, 1, 1, 0);
__visc__bindIn(var_2, 2, 2, 0);
__visc__bindIn(var_2, 3, 3, 0);
void* var_3 = __visc__createNodeND(0, var_3_node);
__visc__edge(var_2, var_3, 1, 0, 0, 0);
__visc__edge(var_2, var_3, 1, 1, 1, 0);
__visc__bindIn(var_3, 4, 2, 0);
__visc__bindIn(var_3, 5, 3, 0);
void* var_4 = __visc__createNodeND(0, var_4_node);
__visc__edge(var_2, var_4, 1, 0, 0, 0);
__visc__edge(var_2, var_4, 1, 1, 1, 0);
__visc__bindIn(var_4, 6, 2, 0);
__visc__bindIn(var_4, 7, 3, 0);
void* var_5 = __visc__createNodeND(0, var_5_node);
__visc__edge(var_3, var_5, 1, 0, 0, 0);
__visc__edge(var_3, var_5, 1, 1, 1, 0);
__visc__edge(var_4, var_5, 1, 0, 2, 0);
__visc__edge(var_4, var_5, 1, 1, 3, 0);
void* var_6 = __visc__createNodeND(0, var_6_node);
__visc__edge(var_5, var_6, 1, 0, 0, 0);
__visc__edge(var_5, var_6, 1, 1, 1, 0);
void* var_7 = __visc__createNodeND(0, var_7_node);
__visc__edge(var_6, var_7, 1, 0, 0, 0);
__visc__edge(var_6, var_7, 1, 1, 1, 0);
void* var_8 = __visc__createNodeND(0, var_8_node);
__visc__edge(var_5, var_8, 1, 0, 0, 0);
__visc__edge(var_5, var_8, 1, 1, 1, 0);
__visc__edge(var_7, var_8, 1, 0, 2, 0);
__visc__edge(var_7, var_8, 1, 1, 3, 0);
__visc__bindOut(var_8, 0, 0, 0);
__visc__bindOut(var_8, 1, 1, 0);
}
struct ret_t {
void* tensor;
size_t bytes;
};
struct __attribute__((__packed__)) RootIn {
void* input;
size_t input_bytes;
void* gaussian;
size_t gaussian_bytes;
void* sobel_x;
size_t sobel_x_bytes;
void* sobel_y;
size_t sobel_y_bytes;
struct ret_t r;
};
// Tensor *gaussianFilter(float sigma, size_t w, size_t h, size_t n_chan) {
// int64_t m = (w - 1) / 2, n = (h - 1) / 2;
// auto *data = new float[w * h];
// float sum = 0.0f;
// for (int64_t i = -m; i <= m; i++)
// for (int64_t j = -n; j <= n; j++) {
// size_t idx = (i + m) * h + (j + n);
// float exponent = -(i * i + j * j) / (2.0 * sigma * sigma);
// data[idx] = exp(exponent);
// sum += data[idx];
// }
// if (sum != 0.0f)
// for (size_t i = 0; i < w * h; i++)
// data[i] /= sum;
// return (Tensor *) createFilterFromData(float_, data, w, h, n_chan);
// }
// std::pair<Tensor *, Tensor *> getSobelKernels() {
// std::vector<float> k1({-1, 0, 1, -2, 0, 2, -1, 0, 1});
// std::vector<float> k2({1, 2, 1, 0, 0, 0, -1, -2, -1});
// auto *t1 = (Tensor *) createFilterFromData(float_, k1.data(), 3, 3, 1);
// auto *t2 = (Tensor *) createFilterFromData(float_, k2.data(), 3, 3, 1);
// return std::make_pair(t1, t2);
// }
int main(int argc, char *argv[]) {
if (argc < 3)
return 0;
std::string output_path = argv[2];
// Tensor *image = readDataSet(argv[1]);
// Tensor *gaussian = gaussianFilter(1.4, 5, 5, 1);
// Tensor *sobel_x, *sobel_y;
// std::tie(sobel_x, sobel_y) = getSobelKernels();
Tensor *image = nullptr, *gaussian = nullptr, *sobel_x = nullptr, *sobel_y = nullptr;
__visc__init();
RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
*args = RootIn{image, 0, gaussian, 0, sobel_x, 0, sobel_y, 0};
void* dfg = __visc__launch(0, root, (void*) args);
__visc__wait(dfg);
void *result = static_cast<RootIn*>(args)->input;
hpvm_request_tensor(result, 0);
__visc__cleanup();
// saveDataSet(output_path, (Tensor *) result);
// computeAccuracy2(labels, 10000, result);
return 0;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment