Skip to content
Snippets Groups Projects
Commit 8fabc743 authored by Yifan Zhao's avatar Yifan Zhao
Browse files

Removed legacy benchmarks

parent 13cea338
No related branches found
No related tags found
No related merge requests found
Showing
with 0 additions and 8072 deletions
DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks
# NOTE: can configure build directory
HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_dsoc/
CC = $(HPVM_BUILD_DIR)/bin/clang++
OPT = $(HPVM_BUILD_DIR)/bin/opt
LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis
LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link
LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include
SRC_DIR = src
BUILD_DIR = build
APP = alexnet
define \n
endef
COMMON_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include
DNN_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/dnn_sources/include
TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include
TENSOR_RT_SRC_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/src
# -std=c++11 -D_GLIBCXX_USE_CXX11_ABI=0
# -I $(TENSOR_INCLUDE_DIR)
CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(DNN_INCLUDE_DIR) -I $(COMMON_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH) -fno-exceptions -ffast-math -std=c++11 -O3
LINKER_FLAGS = -lpthread -lOpenCL
HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib
OPTFLAGS1 = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/ReplaceIntrinsics.so -load $(HPVM_LIB_DIR)/DFG2LLVM_X86_dsoc.so -load $(HPVM_LIB_DIR)/ExtractHPVMLeafNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -replace-intrinsics -dfg2llvm-x86-dsoc -hpvm-extract-leaf-gen -clearDFG
OPTFLAGS2 = -load $(HPVM_LIB_DIR)/InlineTensorCalls.so -inline-tensor-calls
TARGET = $(BUILD_DIR)/$(APP).opt.bc
SOURCES = $(SRC_DIR)/$(APP).cpp
VISC_RT_PATH = $(LLVM_SRC_ROOT)/projects/visc-cpu-rt/visc-rt.ll
#VISC_RT_PATH = $(HPVM_BUILD_DIR)/projects/visc-cpu-rt/visc-rt.ll
.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll
default: $(BUILD_DIR) $(TARGET)
$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp
$(CC) $(CC_FLAGS) -emit-llvm -S -o $@ $<
#-visc-timers-gen
$(BUILD_DIR)/%.visc.ll: $(BUILD_DIR)/%.ll
$(OPT) -load LLVMGenVISC.so -genvisc -globaldce $< -S -o $@
expanded_modules:= $(wildcard *_module.ll)
$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.visc.ll
$(OPT) $(OPTFLAGS1) $< -o $@
$(CC) -emit-llvm -c $(TENSOR_RT_SRC_DIR)/tensor_cpu_runtime.cc -o $(BUILD_DIR)/tensor_cpu_runtime.bc
$(OPT) -always-inline $(BUILD_DIR)/tensor_cpu_runtime.bc -o $(BUILD_DIR)/tensor_cpu_runtime.bc
$(LLVM_LINK) $@ $(shell find ./build -name "*module.ll") $(BUILD_DIR)/tensor_cpu_runtime.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_tensor_rt.bc
$(OPT) $(OPTFLAGS2) $(BUILD_DIR)/$(APP)_tensor_rt.bc -o $(BUILD_DIR)/$(APP)_inline.bc
$(CC) $(BUILD_DIR)/$(APP)_inline.bc -o $(BUILD_DIR)/$(APP)_final $(LINKER_FLAGS)
$(foreach module, $(expanded_modules), $(LLVM_LINK) $(module) $(BUILD_DIR)/tensor_cpu_runtime.bc -o $(BUILD_DIR)/$(module)_linked ${\n} $(OPT) $(OPTFLAGS2) $(BUILD_DIR)/$(module)_linked -o $(BUILD_DIR)/$(module)_inline ${\n} )
$(BUILD_DIR):
mkdir -p $@
clean:
rm -rf $(BUILD_DIR)
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/stat.h>
#include <cstring>
#include <visc.h>
#include <utils_cpu.h>
void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 5, 5, 1, 1);
__visc__return(2, r, (size_t) 0);
}
void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
__visc__return(2, r, (size_t) 0);
}
void var_2_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
__visc__return(2, r, (size_t) 0);
}
void var_3_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
__visc__return(2, r, (size_t) 0);
}
void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1);
__visc__return(2, r, (size_t) 0);
}
void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
__visc__return(2, r, (size_t) 0);
}
void var_6_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
__visc__return(2, r, (size_t) 0);
}
void var_7_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
__visc__return(2, r, (size_t) 0);
}
void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
__visc__return(2, r, (size_t) 0);
}
void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
__visc__return(2, r, (size_t) 0);
}
void var_10_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
__visc__return(2, r, (size_t) 0);
}
void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
__visc__return(2, r, (size_t) 0);
}
void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
__visc__return(2, r, (size_t) 0);
}
void var_13_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
__visc__return(2, r, (size_t) 0);
}
void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1);
__visc__return(2, r, (size_t) 0);
}
void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
__visc__return(2, r, (size_t) 0);
}
void var_16_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_tanh(t1);
__visc__return(2, r, (size_t) 0);
}
void var_17_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2);
__visc__return(2, r, (size_t) 0);
}
void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_mul(t1, t2);
__visc__return(2, r, (size_t) 0);
}
void var_19_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(2, t1, t2, 0);
void *r = __visc__tensor_add(t1, t2);
__visc__return(2, r, (size_t) 0);
}
void var_20_node(void* t1, size_t bytes_t1) {
__visc__hint(visc::CPU_TARGET);
__visc__attributes(1, t1, 0);
void* r = __visc__tensor_softmax(t1);
__visc__return(2, r, (size_t) 0);
}
void root(void* input, size_t input_bytes,
void* conv2d_1_w, size_t conv2d_1_w_bytes,
void* conv2d_1_b, size_t conv2d_1_b_bytes,
void* conv2d_2_w, size_t conv2d_2_w_bytes,
void* conv2d_2_b, size_t conv2d_2_b_bytes,
void* conv2d_3_w, size_t conv2d_3_w_bytes,
void* conv2d_3_b, size_t conv2d_3_b_bytes,
void* conv2d_4_w, size_t conv2d_4_w_bytes,
void* conv2d_4_b, size_t conv2d_4_b_bytes,
void* conv2d_5_w, size_t conv2d_5_w_bytes,
void* conv2d_5_b, size_t conv2d_5_b_bytes,
void* dense_1_w, size_t dense_1_w_bytes,
void* dense_1_b, size_t dense_1_b_bytes){
__visc__hint(visc::CPU_TARGET);
__visc__attributes(13, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, dense_1_w, dense_1_b, 0);
void* var_0 = __visc__createNodeND(0, var_0_node);
__visc__bindIn(var_0, 0, 0, 0);
__visc__bindIn(var_0, 1, 1, 0);
__visc__bindIn(var_0, 2, 2, 0);
__visc__bindIn(var_0, 3, 3, 0);
void* var_1 = __visc__createNodeND(0, var_1_node);
__visc__edge(var_0, var_1, 1, 0, 0, 0);
__visc__edge(var_0, var_1, 1, 1, 1, 0);
__visc__bindIn(var_1, 4, 2, 0);
__visc__bindIn(var_1, 5, 3, 0);
void* var_2 = __visc__createNodeND(0, var_2_node);
__visc__edge(var_1, var_2, 1, 0, 0, 0);
__visc__edge(var_1, var_2, 1, 1, 1, 0);
void* var_3 = __visc__createNodeND(0, var_3_node);
__visc__edge(var_2, var_3, 1, 0, 0, 0);
__visc__edge(var_2, var_3, 1, 1, 1, 0);
void* var_4 = __visc__createNodeND(0, var_4_node);
__visc__edge(var_3, var_4, 1, 0, 0, 0);
__visc__edge(var_3, var_4, 1, 1, 1, 0);
__visc__bindIn(var_4, 6, 2, 0);
__visc__bindIn(var_4, 7, 3, 0);
void* var_5 = __visc__createNodeND(0, var_5_node);
__visc__edge(var_4, var_5, 1, 0, 0, 0);
__visc__edge(var_4, var_5, 1, 1, 1, 0);
__visc__bindIn(var_5, 8, 2, 0);
__visc__bindIn(var_5, 9, 3, 0);
void* var_6 = __visc__createNodeND(0, var_6_node);
__visc__edge(var_5, var_6, 1, 0, 0, 0);
__visc__edge(var_5, var_6, 1, 1, 1, 0);
void* var_7 = __visc__createNodeND(0, var_7_node);
__visc__edge(var_6, var_7, 1, 0, 0, 0);
__visc__edge(var_6, var_7, 1, 1, 1, 0);
void* var_8 = __visc__createNodeND(0, var_8_node);
__visc__edge(var_7, var_8, 1, 0, 0, 0);
__visc__edge(var_7, var_8, 1, 1, 1, 0);
__visc__bindIn(var_8, 10, 2, 0);
__visc__bindIn(var_8, 11, 3, 0);
void* var_9 = __visc__createNodeND(0, var_9_node);
__visc__edge(var_8, var_9, 1, 0, 0, 0);
__visc__edge(var_8, var_9, 1, 1, 1, 0);
__visc__bindIn(var_9, 12, 2, 0);
__visc__bindIn(var_9, 13, 3, 0);
void* var_10 = __visc__createNodeND(0, var_10_node);
__visc__edge(var_9, var_10, 1, 0, 0, 0);
__visc__edge(var_9, var_10, 1, 1, 1, 0);
void* var_11 = __visc__createNodeND(0, var_11_node);
__visc__edge(var_10, var_11, 1, 0, 0, 0);
__visc__edge(var_10, var_11, 1, 1, 1, 0);
__visc__bindIn(var_11, 14, 2, 0);
__visc__bindIn(var_11, 15, 3, 0);
void* var_12 = __visc__createNodeND(0, var_12_node);
__visc__edge(var_11, var_12, 1, 0, 0, 0);
__visc__edge(var_11, var_12, 1, 1, 1, 0);
__visc__bindIn(var_12, 16, 2, 0);
__visc__bindIn(var_12, 17, 3, 0);
void* var_13 = __visc__createNodeND(0, var_13_node);
__visc__edge(var_12, var_13, 1, 0, 0, 0);
__visc__edge(var_12, var_13, 1, 1, 1, 0);
void* var_14 = __visc__createNodeND(0, var_14_node);
__visc__edge(var_13, var_14, 1, 0, 0, 0);
__visc__edge(var_13, var_14, 1, 1, 1, 0);
__visc__bindIn(var_14, 18, 2, 0);
__visc__bindIn(var_14, 19, 3, 0);
void* var_15 = __visc__createNodeND(0, var_15_node);
__visc__edge(var_14, var_15, 1, 0, 0, 0);
__visc__edge(var_14, var_15, 1, 1, 1, 0);
__visc__bindIn(var_15, 20, 2, 0);
__visc__bindIn(var_15, 21, 3, 0);
void* var_16 = __visc__createNodeND(0, var_16_node);
__visc__edge(var_15, var_16, 1, 0, 0, 0);
__visc__edge(var_15, var_16, 1, 1, 1, 0);
void* var_17 = __visc__createNodeND(0, var_17_node);
__visc__edge(var_16, var_17, 1, 0, 0, 0);
__visc__edge(var_16, var_17, 1, 1, 1, 0);
void* var_18 = __visc__createNodeND(0, var_18_node);
__visc__edge(var_17, var_18, 1, 0, 0, 0);
__visc__edge(var_17, var_18, 1, 1, 1, 0);
__visc__bindIn(var_18, 22, 2, 0);
__visc__bindIn(var_18, 23, 3, 0);
void* var_19 = __visc__createNodeND(0, var_19_node);
__visc__edge(var_18, var_19, 1, 0, 0, 0);
__visc__edge(var_18, var_19, 1, 1, 1, 0);
__visc__bindIn(var_19, 24, 2, 0);
__visc__bindIn(var_19, 25, 3, 0);
void* var_20 = __visc__createNodeND(0, var_20_node);
__visc__edge(var_19, var_20, 1, 0, 0, 0);
__visc__edge(var_19, var_20, 1, 1, 1, 0);
__visc__bindOut(var_20, 0, 0, 0);
__visc__bindOut(var_20, 1, 1, 0);
}
struct ret_t {
void* tensor;
size_t bytes;
};
typedef struct __attribute__((__packed__)) {
void* input;
size_t input_bytes;
void* conv2d_1_w;
size_t conv2d_1_w_bytes;
void* conv2d_1_b;
size_t conv2d_1_b_bytes;
void* conv2d_2_w;
size_t conv2d_2_w_bytes;
void* conv2d_2_b;
size_t conv2d_2_b_bytes;
void* conv2d_3_w;
size_t conv2d_3_w_bytes;
void* conv2d_3_b;
size_t conv2d_3_b_bytes;
void* conv2d_4_w;
size_t conv2d_4_w_bytes;
void* conv2d_4_b;
size_t conv2d_4_b_bytes;
void* conv2d_5_w;
size_t conv2d_5_w_bytes;
void* conv2d_5_b;
size_t conv2d_5_b_bytes;
void* dense_1_w;
size_t dense_1_w_bytes;
void* dense_1_b;
size_t dense_1_b_bytes;
struct ret_t r;
}
RootIn;
int main(){
std::string dir_prefix = std::string("../../../../../../projects/hpvm-tensor-rt/model_params/alexnet_cifar10_test/");
int input_size = 50;
std::string input_path = dir_prefix + std::string("input.bin");
void* input = readTrainedWeights(input_path.c_str(), 0, input_size,3,32,32);
std::string labels_path = dir_prefix + std::string("labels.bin");
uint8_t* labels = readLabels(labels_path.c_str(), input_size);
std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin");
void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11);
std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin");
void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1);
std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin");
void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5);
std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin");
void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1);
std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin");
void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3);
std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin");
void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1);
std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin");
void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3);
std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin");
void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1);
std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin");
void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3);
std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin");
void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1);
std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin");
void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10);
std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin");
void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1);
__visc__init();
RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn)));
args->input = input;
args->input_bytes = 0;
args->conv2d_1_w = conv2d_1_w;
args->conv2d_1_w_bytes = 0;
args->conv2d_1_b = conv2d_1_b;
args->conv2d_1_b_bytes = 0;
args->conv2d_2_w = conv2d_2_w;
args->conv2d_2_w_bytes = 0;
args->conv2d_2_b = conv2d_2_b;
args->conv2d_2_b_bytes = 0;
args->conv2d_3_w = conv2d_3_w;
args->conv2d_3_w_bytes = 0;
args->conv2d_3_b = conv2d_3_b;
args->conv2d_3_b_bytes = 0;
args->conv2d_4_w = conv2d_4_w;
args->conv2d_4_w_bytes = 0;
args->conv2d_4_b = conv2d_4_b;
args->conv2d_4_b_bytes = 0;
args->conv2d_5_w = conv2d_5_w;
args->conv2d_5_w_bytes = 0;
args->conv2d_5_b = conv2d_5_b;
args->conv2d_5_b_bytes = 0;
args->dense_1_w = dense_1_w;
args->dense_1_w_bytes = 0;
args->dense_1_b = dense_1_b;
args->dense_1_b_bytes = 0;
void* dfg = __visc__launch(0, root, (void*) args);
__visc__wait(dfg);
void *result = static_cast<RootIn*>(args)->input;
hpvm_request_tensor(result, 0);
__visc__cleanup();
computeAccuracy2(labels, input_size, result);
return 0;
}
# NOTE: $LLVM_SRC_ROOT and $CUDA_TOOLKIT_ROOT_DIR have to be set
# HPVM_BUILD_DIR can be optionally set
DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks
HPVM_BUILD_DIR ?= $(LLVM_SRC_ROOT)/../build
CCLANG ?= $(HPVM_BUILD_DIR)/bin/clang++
OPT = $(HPVM_BUILD_DIR)/bin/opt
LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis
LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link
LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include
SRC_DIR = src
BUILD_DIR = build
# NOTE: Change to the name of your benchmark
APP = blend
TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include
TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include
CUSTOM_LIB_PATHS = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a \
$(LLVM_SRC_ROOT)/projects/gpu_profiler/lib/libgpu_profiler.a \
$(LLVM_SRC_ROOT)/projects/soc_simulator/lib/libpromise_profiler.a
CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_TOOLKIT_ROOT_DIR)/include -ffast-math -std=c++11 -O3
CCFLAGS += -DDEVICE=CUDNN_TARGET
LINKER_FLAGS = -L $(CUDA_TOOLKIT_ROOT_DIR)/lib64 -lstdc++fs -lpthread -lcudart -lcurand -lcudnn -lcublas -lcufft
HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib
CONF_FILE_PATH=$(realpath data/tuner_confs.txt)
# NOTE: Needs proper handling in the WRAPPER backend because Quant range not needed for IMAGE Benchmarks
WRAPPER_API_QUANT_FILE_PATH=
VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_WrapperAPI.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMFuseHPVMTensorNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -hpvm-fuse -dfg2llvm-wrapperapi -quantization-levels-filename=$(WRAPPER_API_QUANT_FILE_PATH) -configuration-inputs-filename=$(CONF_FILE_PATH) -dfg2llvm-x86 -clearDFG
TARGET = $(BUILD_DIR)/$(APP).opt.bc direct
SOURCES = $(SRC_DIR)/$(APP).cpp
VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll
#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll)
.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll
default: $(BUILD_DIR) $(TARGET)
$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp
$(CCLANG) $(CC_FLAGS) -emit-llvm src/$(APP).cpp -S -o $(BUILD_DIR)/$(APP).ll
direct: $(SRC_DIR)/$(APP)_direct_call.cpp $(BUILD_DIR)
$(CCLANG) $(CC_FLAGS) src/$(APP)_direct_call.cpp $(CUSTOM_LIB_PATHS) -o $(BUILD_DIR)/$(APP)_direct_call $(LINKER_FLAGS)
$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.ll
$(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP).ll -S -o $(BUILD_DIR)/$(APP).visc.ll
$(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP).visc.ll -o $(BUILD_DIR)/$(APP)_wrapper.bc
$(LLVM_LINK) $(BUILD_DIR)/$(APP)_wrapper.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_wrapper_linked.bc
$(CCLANG) $(BUILD_DIR)/$(APP)_wrapper_linked.bc $(CUSTOM_LIB_PATHS) $(PROMISE_PROFILER_LIB_PATH) -o $(BUILD_DIR)/$(APP)_final $(LINKER_FLAGS)
$(BUILD_DIR):
mkdir -p $@
clean:
rm -rf $(BUILD_DIR)
7
7
7
7
7
\ No newline at end of file
0.0 1.0 -0.3 0.3 -0.041063767 0.031912163 0.0 1.5512946
0.0 1.5512946 -0.15580177 0.1533 -0.041385915 0.05869476 0.0 4.916329
0.0 4.916329 -0.20324017 0.18275258 -0.039915435 0.04589232 0.0 9.447418
0.0 9.447418 -0.10757191 0.123126 -0.025070198 0.027000334 0.0 9.926857
0.0 9.926857 -0.18867673 0.16425411 -0.012622595 0.04586973 0.0 42.018578
1 0.0 1.0 -0.3 0.3 -0.041063767 0.031912163 0.0 1.5512946
2 0.0 1.5512946 -0.15580177 0.1533 -0.041385915 0.05869476 0.0 4.916329
3 0.0 4.916329 -0.20324017 0.18275258 -0.039915435 0.04589232 0.0 9.447418
4 0.0 9.447418 -0.10757191 0.123126 -0.025070198 0.027000334 0.0 9.926857
5 0.0 9.926857 -0.18867673 0.16425411 -0.012622595 0.04586973 0.0 42.018578
6 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
+++++
conf1 1.5 90 1.0 2.0
1 gpu conv fp32 1
2 gpu reduce fp32 1
3 gpu reduce fp32 1
4 gpu map2 fp32 1
5 gpu reduce fp32 1
6 gpu reduce fp32 1
7 gpu map2 fp32 1
8 gpu map2 fp32 1
9 gpu conv fp32 1
10 gpu reduce fp32 1
11 gpu reduce fp32 1
12 gpu map2 fp32 1
13 gpu reduce fp32 1
14 gpu reduce fp32 1
15 gpu map2 fp32 1
16 gpu map2 fp32 1
17 gpu map2 fp32 1
-----
\ No newline at end of file
#include "tensor_runtime.h"
#include "visc.h"
#include <iostream>
#include <cassert>
const size_t n_channels = 3;
Tensor *gaussianFilter_(float div) {
std::vector<float> gauss_data = {1, 4, 6, 4, 1, 4, 16, 24, 16,
4, 6, 24, 36, 24, 6, 4, 16, 24,
16, 4, 1, 4, 6, 4, 1};
for (float &f : gauss_data)
f /= div;
return (Tensor *)createFilterFromData(
CUDNN_DATA_FLOAT, gauss_data.data(), 5, 5, 1);
}
Tensor *gaussianFilter() { return gaussianFilter_(16.0); }
void *normalize(size_t &id, void *image) {
auto *max_1D = wrapper_tensorReduce(
std::to_string(id++).c_str(), image, 2, (int)MathOp::Max);
auto *max = wrapper_tensorReduce(
std::to_string(id++).c_str(), max_1D, 3, (int)MathOp::Max);
auto *img_norm = wrapper_tensorMap2(
std::to_string(id++).c_str(), (int)MathOp::Div, image, max);
freeTensor(max_1D);
freeTensor(max);
return img_norm;
}
extern std::vector<size_t> sizes(Tensor *t);
void forward_reshape(void *t) {
auto *tensor = (Tensor *)t;
std::vector<size_t> sz = sizes(tensor);
assert(sz[1] == 3);
sz[0] = sz[0] * sz[1];
sz[1] = 1;
reshape(tensor, sz);
}
void backward_reshape(void *t) {
auto *tensor = (Tensor *)t;
std::vector<size_t> sz = sizes(tensor);
assert(sz[0] % 3 == 0);
sz[0] = sz[0] / 3;
sz[1] = 3;
reshape(tensor, sz);
}
void *sharpen(size_t &id, void *image) {
void *gaussian = gaussianFilter();
forward_reshape(image);
void *blurred = wrapper_ConvLayer(
std::to_string(id++).c_str(), image, gaussian, nullptr, 2, 2, 1, 1, 0, 0, -1, 0.0, 0.0);
backward_reshape(image);
backward_reshape(blurred);
void *blurred_norm = normalize(id, blurred);
void *image_norm = normalize(id, image);
void *ret = wrapper_tensorMap2(
std::to_string(id++).c_str(), (int)MathOp::AddWeighted, blurred_norm,
image_norm);
freeTensor(gaussian);
freeTensor(blurred);
freeTensor(blurred_norm);
freeTensor(image_norm);
return ret;
}
void *main_procedure(void *fg, void *bg) {
size_t id = 1;
void *g_bg = sharpen(id, bg);
void *g_fg = sharpen(id, fg);
void *ret = wrapper_tensorMap2(
std::to_string(id++).c_str(), (int)MathOp::Blend2, g_bg, g_fg);
std::cout << "**********************" << id << '\n';
freeTensor(g_bg);
freeTensor(g_fg);
return ret;
}
extern void llvm_hpvm_initializeRuntimeController(
const char *ConfigFile, const char *QRangeFile);
extern void llvm_hpvm_clearRuntimeController();
extern void llvm_hpvm_initTensorRt(int gpuid);
extern void llvm_hpvm_nextIter();
const size_t batch_size = 250;
int main(int argc, char *argv[]) {
if (argc < 4)
return 0;
llvm_hpvm_initTensorRt(0);
llvm_hpvm_initializeRuntimeController("data/tuner_confs.txt", "");
startMemTracking();
startProfiling();
size_t bstart = 0;
while (true) {
auto *background = readDataSet(argv[1], bstart, batch_size, n_channels),
*foreground = readDataSet(argv[2], bstart, batch_size, n_channels);
if (!background || !foreground)
break;
auto *result = main_procedure(foreground, background);
if (argc == 4) {
saveDataSet(argv[3], (Tensor *)result, bstart);
llvm_hpvm_imgInvokeRtControl(result, nullptr, bstart, bstart + batch_size);
}
else {
saveDataSet(argv[3], (Tensor *)result, bstart, 10);
auto *gold_output = readDataSet(argv[4], bstart, batch_size, n_channels);
llvm_hpvm_imgInvokeRtControl(result, gold_output, bstart, bstart + batch_size);
}
bstart += batch_size;
freeBatchMemory();
clearTensorMap();
}
stopProfiling();
llvm_hpvm_clearRuntimeController();
}
# NOTE: $LLVM_SRC_ROOT and $CUDA_TOOLKIT_ROOT_DIR have to be set
# HPVM_BUILD_DIR can be optionally set
DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks
HPVM_BUILD_DIR ?= $(LLVM_SRC_ROOT)/../build
CCLANG ?= $(HPVM_BUILD_DIR)/bin/clang++
OPT = $(HPVM_BUILD_DIR)/bin/opt
LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis
LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link
LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include
SRC_DIR = src
BUILD_DIR = build
# NOTE: Change to the name of your benchmark
APP = canny
TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include
TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include
CUSTOM_LIB_PATHS = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a \
$(LLVM_SRC_ROOT)/projects/gpu_profiler/lib/libgpu_profiler.a \
$(LLVM_SRC_ROOT)/projects/soc_simulator/lib/libpromise_profiler.a
CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_TOOLKIT_ROOT_DIR)/include -ffast-math -std=c++11 -O3
CCFLAGS += -DDEVICE=CUDNN_TARGET
LINKER_FLAGS = -L $(CUDA_TOOLKIT_ROOT_DIR)/lib64 -lstdc++fs -lpthread -lcudart -lcurand -lcudnn -lcublas -lcufft
HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib
CONF_FILE_PATH=$(realpath data/tuner_confs.txt)
# NOTE: Needs proper handling in the WRAPPER backend because Quant range not needed for IMAGE Benchmarks
WRAPPER_API_QUANT_FILE_PATH=
VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_WrapperAPI.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMFuseHPVMTensorNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -hpvm-fuse -dfg2llvm-wrapperapi -quantization-levels-filename=$(WRAPPER_API_QUANT_FILE_PATH) -configuration-inputs-filename=$(CONF_FILE_PATH) -dfg2llvm-x86 -clearDFG
TARGET = $(BUILD_DIR)/$(APP).opt.bc direct
SOURCES = $(SRC_DIR)/$(APP).cpp
VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll
#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll)
.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll
default: $(BUILD_DIR) $(TARGET)
$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp
$(CCLANG) $(CC_FLAGS) -emit-llvm src/$(APP).cpp -S -o $(BUILD_DIR)/$(APP).ll
direct: $(SRC_DIR)/$(APP)_direct_call.cpp
$(CCLANG) $(CC_FLAGS) src/$(APP)_direct_call.cpp $(CUSTOM_LIB_PATHS) -o $(BUILD_DIR)/$(APP)_direct_call $(LINKER_FLAGS)
autotuner: $(SRC_DIR)/$(APP)_autotuner.cpp
$(CCLANG) $(CC_FLAGS) src/$(APP)_autotuner.cpp $(CUSTOM_LIB_PATHS) -o $(BUILD_DIR)/$(APP)_autotuner $(LINKER_FLAGS)
$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.ll
$(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP).ll -S -o $(BUILD_DIR)/$(APP).visc.ll
$(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP).visc.ll -o $(BUILD_DIR)/$(APP)_wrapper.bc
$(LLVM_LINK) $(BUILD_DIR)/$(APP)_wrapper.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_wrapper_linked.bc
$(CCLANG) $(BUILD_DIR)/$(APP)_wrapper_linked.bc $(CUSTOM_LIB_PATHS) $(PROMISE_PROFILER_LIB_PATH) -o $(BUILD_DIR)/$(APP)_final $(LINKER_FLAGS)
$(BUILD_DIR):
mkdir -p $@
clean:
rm -rf $(BUILD_DIR)
7
7
7
7
7
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment