From 7183547b1862b6f80dcfc18a67745f262092bb9e Mon Sep 17 00:00:00 2001 From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu> Date: Thu, 14 Feb 2019 20:29:19 -0600 Subject: [PATCH] Porting Alexnet in the ApproxIR representation - model matching --- .../benchmarks/alexnet/Makefile | 57 ++ .../benchmarks/alexnet/src/alexnet.cpp | 538 ++++++++++++++++++ 2 files changed, 595 insertions(+) create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile create mode 100644 llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile new file mode 100644 index 0000000000..76f4b47917 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile @@ -0,0 +1,57 @@ +DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks +# NOTE: can configure build directory +HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_new/ + +CC = $(HPVM_BUILD_DIR)/bin/clang++ +OPT = $(HPVM_BUILD_DIR)/bin/opt +LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis +LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link +LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include + + +SRC_DIR = src +BUILD_DIR = build +APP = alexnet + +TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include +TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include +TENSOR_LIB_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a +TENSOR_AUTOTUNER_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_autotuner.a + +CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH) -fno-exceptions -ffast-math -std=c++11 -O3 +CCFLAGS += -DDEVICE=CUDNN_TARGET +LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL + +HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib +HPVM_LIB_DIR = /home/hsharif3/Gitlab/hpvm/build_new/lib + + +VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_CUDNN.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG + +TARGET = $(BUILD_DIR)/$(APP).opt.bc +SOURCES = $(SRC_DIR)/$(APP).cpp +VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll + +#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll) +.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll +default: $(BUILD_DIR) $(TARGET) + + +$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp + $(CC) $(CC_FLAGS) -emit-llvm -S -o $@ $< + +#-visc-timers-gen +$(BUILD_DIR)/%.visc.ll: $(BUILD_DIR)/%.ll + $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $< -S -o $@ + +$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.visc.ll + $(OPT) $(VISC_OPTFLAGS) $< -o $@ + $(LLVM_LINK) $@ $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_linked.bc + $(CC) $(BUILD_DIR)/$(APP)_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_linked $(LINKER_FLAGS) + $(CC) $(BUILD_DIR)/$(APP)_linked.bc $(TENSOR_AUTOTUNER_DIR) -o $(BUILD_DIR)/$(APP)_tune $(LINKER_FLAGS) + +$(BUILD_DIR): + mkdir -p $@ + +clean: + rm -rf $(BUILD_DIR) diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp new file mode 100644 index 0000000000..a8cab99d52 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp @@ -0,0 +1,538 @@ + +#include <iostream> +#include <cstdio> +#include <cstring> +#include <cinttypes> +#include <visc.h> +#include <tensorTypes.h> +#include <tensorUtils.h> + +using namespace std; + + + +/* DNN Layer 1 **/ +void tensorConvNode1(void *t1, size_t bytes1, void *t2, size_t bytes2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 5, 5, 1, 1); + __visc__return(2, r, (size_t) 0); +} + + +void tensorAddNode1(void *t1, size_t bytest1, void *t2, size_t bytest2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} + + +void tensorPoolNode1(void *t1, size_t bytest1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); + __visc__return(2, r, (size_t) 0); +} + +void tensorTanhNode1(void *t1, size_t bytest1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); +} + +/** End of Layer 1 **/ + + + +/* DNN Layer 2 **/ +void tensorConvNode2(void *t1, size_t bytes1, void *t2, size_t bytes2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + // X * W = t2 * t1 + void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); + __visc__return(2, r, (size_t) 0); +} + + +void tensorAddNode2(void *t1, size_t bytest1, void *t2, size_t bytest2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} + +void tensorPoolNode2(void *t1, size_t bytest1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); + __visc__return(2, r, (size_t) 0); +} + +void tensorTanhNode2(void *t1, size_t bytest1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); +} + +/** End of Layer 2 **/ + + +/* DNN Layer 3 **/ +void tensorConvNode3(void *t1, size_t bytes1, void *t2, size_t bytes2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + // X * W = t2 * t1 + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +// FIXIT: Think if we can reuse the add, tanh, and pool nodes? +void tensorAddNode3(void *t1, size_t bytest1, void *t2, size_t bytest2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} + +void tensorTanhNode3(void *t1, size_t bytest1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); +} + +/** End of Layer 3 **/ + + +/* DNN Layer 4 **/ +void tensorConvNode4(void *t1, size_t bytes1, void *t2, size_t bytes2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + // X * W = t2 * t1 + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); + __visc__return(2, r, (size_t) 0); +} + +// FIXIT: Think if we can reuse the add, tanh, and pool nodes? +void tensorAddNode4(void *t1, size_t bytest1, void *t2, size_t bytest2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} + +void tensorTanhNode4(void *t1, size_t bytest1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); +} + +/** End of Layer 4 **/ + + + +/*** DNN Layer 5 ****/ +void tensorConvNode5(void *t1, size_t bytes1, void *t2, size_t bytes2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); + __visc__return(2, r, (size_t) 0); +} + + +void tensorAddNode5(void *t1, size_t bytest1, void *t2, size_t bytest2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} + +void tensorPoolNode5(void *t1, size_t bytest1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); + __visc__return(2, r, (size_t) 0); +} + +void tensorTanhNode5(void *t1, size_t bytest1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); +} + +/** End of Layer 5 **/ + + +/***** DNN Layer 6 - FC layer ****/ +void tensorMulNode6(void *t1, size_t bytes1, void *t2, size_t bytes2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + // X * W = t2 * t1 + void *r = __visc__tensor_mul(t1, t2); + __visc__return(2, r, (size_t) 0); +} + +void tensorAddNode6(void *t1, size_t bytest1, void *t2, size_t bytest2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void* r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} + + +/** End of Layer 6 **/ + + + + +void root(void *x, size_t x_bytes, + void *conv1_w, size_t conv1_w_bytes, + void *conv1_b, size_t conv1_b_bytes, + void *conv2_w, size_t conv2_w_bytes, + void *conv2_b, size_t conv2_b_bytes, + void *conv3_w, size_t conv3_w_bytes, + void *conv3_b, size_t conv3_b_bytes, + void *conv4_w, size_t conv4_w_bytes, + void *conv4_b, size_t conv4_b_bytes, + void *conv5_w, size_t conv5_w_bytes, + void *conv5_b, size_t conv5_b_bytes, + void *fc1_w, size_t fc1_w_bytes, + void *fc1_b, size_t fc1_b_bytes){ + + __visc__hint(visc::CPU_TARGET); + __visc__attributes(13, x, conv1_w, conv1_b, conv2_w, conv2_b, conv3_w, conv3_b, + conv4_w, conv4_b, conv5_w, conv5_b, fc1_w, fc1_b, 0); + + // Conv1 Nodes + void *nodeConv1 = __visc__createNodeND(0, tensorConvNode1); + void *nodeAdd1 = __visc__createNodeND(0, tensorAddNode1); + void *nodeTanh1 = __visc__createNodeND(0, tensorTanhNode1); + void *nodePool1 = __visc__createNodeND(0, tensorPoolNode1); + // Conv2 Nodes + void *nodeConv2 = __visc__createNodeND(0, tensorConvNode2); + void *nodeAdd2 = __visc__createNodeND(0, tensorAddNode2); + void *nodeTanh2 = __visc__createNodeND(0, tensorTanhNode2); + void *nodePool2 = __visc__createNodeND(0, tensorPoolNode2); + // Conv3 Nodes + void *nodeConv3 = __visc__createNodeND(0, tensorConvNode3); + void *nodeAdd3 = __visc__createNodeND(0, tensorAddNode3); + void *nodeTanh3 = __visc__createNodeND(0, tensorTanhNode3); + // Conv4 Nodes + void *nodeConv4 = __visc__createNodeND(0, tensorConvNode4); + void *nodeAdd4 = __visc__createNodeND(0, tensorAddNode4); + void *nodeTanh4 = __visc__createNodeND(0, tensorTanhNode4); + // Conv5 Nodes + void *nodeConv5 = __visc__createNodeND(0, tensorConvNode5); + void *nodeAdd5 = __visc__createNodeND(0, tensorAddNode5); + void *nodeTanh5 = __visc__createNodeND(0, tensorTanhNode5); + void *nodePool5 = __visc__createNodeND(0, tensorPoolNode5); + // FC1 Nodes + void *nodeMul6 = __visc__createNodeND(0, tensorMulNode6); + void *nodeAdd6 = __visc__createNodeND(0, tensorAddNode6); + + //***** Conv Layer 1 *******/ + // node, src, dst, stream + __visc__bindIn(nodeConv1, 0, 0, 0); + __visc__bindIn(nodeConv1, 1, 1, 0); + __visc__bindIn(nodeConv1, 2, 2, 0); + __visc__bindIn(nodeConv1, 3, 3, 0); + + // node, node, type, src, dst, stream + __visc__edge(nodeConv1, nodeAdd1, 1, 0, 0, 0); + __visc__edge(nodeConv1, nodeAdd1, 1, 1, 1, 0); + + // parent_index, dest_index, bind_type + __visc__bindIn(nodeAdd1, 4, 2, 0); + __visc__bindIn(nodeAdd1, 5, 3, 0); + + // node, node, type, src, dst, stream + __visc__edge(nodeAdd1, nodeTanh1, 1, 0, 0, 0); + __visc__edge(nodeAdd1, nodeTanh1, 1, 1, 1, 0); + + // node, node, type, src, dst, stream + __visc__edge(nodeTanh1, nodePool1, 1, 0, 0, 0); + __visc__edge(nodeTanh1, nodePool1, 1, 1, 1, 0); + + + /**** Conv Layer 2 ****/ + // ConvOp2 + __visc__bindIn(nodeConv2, 6, 2, 0); + __visc__bindIn(nodeConv2, 7, 3, 0); + + __visc__edge(nodePool1, nodeConv2, 1, 0, 0, 0); + __visc__edge(nodePool1, nodeConv2, 1, 1, 1, 0); + + // AddOp2 + __visc__bindIn(nodeAdd2, 8, 2, 0); + __visc__bindIn(nodeAdd2, 9, 3, 0); + + __visc__edge(nodeConv2, nodeAdd2, 1, 0, 0, 0); + __visc__edge(nodeConv2, nodeAdd2, 1, 1, 1, 0); + + // TanhOp2 + __visc__edge(nodeAdd2, nodeTanh2, 1, 0, 0, 0); + __visc__edge(nodeAdd2, nodeTanh2, 1, 1, 1, 0); + + // PoolOp2 + __visc__edge(nodeTanh2, nodePool2, 1, 0, 0, 0); + __visc__edge(nodeTanh2, nodePool2, 1, 1, 1, 0); + + + /**** Conv Layer 3 ****/ + // ConvOp3 + __visc__bindIn(nodeConv3, 10, 2, 0); + __visc__bindIn(nodeConv3, 11, 3, 0); + + __visc__edge(nodePool2, nodeConv3, 1, 0, 0, 0); + __visc__edge(nodePool2, nodeConv3, 1, 1, 1, 0); + + // AddOp3 + __visc__bindIn(nodeAdd3, 12, 2, 0); + __visc__bindIn(nodeAdd3, 13, 3, 0); + + __visc__edge(nodeConv3, nodeAdd3, 1, 0, 0, 0); + __visc__edge(nodeConv3, nodeAdd3, 1, 1, 1, 0); + + // TanhOp3 + __visc__edge(nodeAdd3, nodeTanh3, 1, 0, 0, 0); + __visc__edge(nodeAdd3, nodeTanh3, 1, 1, 1, 0); + + + /**** Conv Layer 4 ****/ + // ConvOp4 + __visc__bindIn(nodeConv4, 14, 2, 0); + __visc__bindIn(nodeConv4, 15, 3, 0); + + __visc__edge(nodeTanh3, nodeConv4, 1, 0, 0, 0); + __visc__edge(nodeTanh3, nodeConv4, 1, 1, 1, 0); + + // AddOp4 + __visc__bindIn(nodeAdd4, 16, 2, 0); + __visc__bindIn(nodeAdd4, 17, 3, 0); + + __visc__edge(nodeConv4, nodeAdd4, 1, 0, 0, 0); + __visc__edge(nodeConv4, nodeAdd4, 1, 1, 1, 0); + + // TanhOp4 + __visc__edge(nodeAdd4, nodeTanh4, 1, 0, 0, 0); + __visc__edge(nodeAdd4, nodeTanh4, 1, 1, 1, 0); + + + /**** Conv Layer 5 ****/ + // ConvOp5 + __visc__bindIn(nodeConv5, 18, 2, 0); + __visc__bindIn(nodeConv5, 19, 3, 0); + + __visc__edge(nodeTanh4, nodeConv5, 1, 0, 0, 0); + __visc__edge(nodeTanh4, nodeConv5, 1, 1, 1, 0); + + // AddOp5 + __visc__bindIn(nodeAdd5, 20, 2, 0); + __visc__bindIn(nodeAdd5, 21, 3, 0); + + __visc__edge(nodeConv5, nodeAdd5, 1, 0, 0, 0); + __visc__edge(nodeConv5, nodeAdd5, 1, 1, 1, 0); + + // TanhOp5 + __visc__edge(nodeAdd5, nodeTanh5, 1, 0, 0, 0); + __visc__edge(nodeAdd5, nodeTanh5, 1, 1, 1, 0); + + // PoolOp5 + __visc__edge(nodeTanh5, nodePool5, 1, 0, 0, 0); + __visc__edge(nodeTanh5, nodePool5, 1, 1, 1, 0); + + + + /**** FC Layer 1 ****/ + // MulOp6 + __visc__bindIn(nodeMul6, 22, 2, 0); + __visc__bindIn(nodeMul6, 23, 3, 0); + + __visc__edge(nodePool5, nodeMul6, 1, 0, 0, 0); + __visc__edge(nodePool5, nodeMul6, 1, 1, 1, 0); + + // AddOp6 + __visc__bindIn(nodeAdd6, 24, 2, 0); + __visc__bindIn(nodeAdd6, 25, 3, 0); + + __visc__edge(nodeMul6, nodeAdd6, 1, 0, 0, 0); + __visc__edge(nodeMul6, nodeAdd6, 1, 1, 1, 0); + + /***** Output Binding ****/ + __visc__bindOut(nodeAdd6, 0, 0, 0); + __visc__bindOut(nodeAdd6, 1, 1, 0); + +} + + +// Return type for the nodes +struct ret_t { + void *tensor; + size_t bytes; +}; + +typedef struct __attribute__((__packed__)) { + void *x; + size_t x_bytes; + // 1st Layer parameters + void* conv1_w; + size_t conv1_w_bytes; + void* conv1_b; + size_t conv1_b_bytes; + // 2nd Layer parameters + void* conv2_w; + size_t conv2_w_bytes; + void* conv2_b; + size_t conv2_b_bytes; + // 3rd Layer parameters + void* conv3_w; + size_t conv3_w_bytes; + void* conv3_b; + size_t conv3_b_bytes; + // 4th Layer parameters + void* conv4_w; + size_t conv4_w_bytes; + void* conv4_b; + size_t conv4_b_bytes; + // 5th Layer parameters + void* conv5_w; + size_t conv5_w_bytes; + void* conv5_b; + size_t conv5_b_bytes; + // 6th Layer parameters + void* fc1_w; + size_t fc1_w_bytes; + void* fc1_b; + size_t fc1_b_bytes; + + struct ret_t r; +} +RootIn; + +int main() { + + int test_batch_size = 5000; + std::string prefix = "../../../../../../projects/hpvm-tensor-rt/model_params"; + + std::string input_data_path = prefix + std::string("/alexnet_cifar10/norm_cifar_input.bin"); + std::string labels_path = prefix + std::string("/alexnet_cifar10/test_labels.bin"); + + std::string conv1_w_path = prefix + std::string("/alexnet_cifar10/conv1.bin"); + std::string conv1_b_path = prefix + std::string("/alexnet_cifar10/conv1_bias.bin"); + + std::string conv2_w_path = prefix + std::string("/alexnet_cifar10/conv2.bin"); + std::string conv2_b_path = prefix + std::string("/alexnet_cifar10/conv2_bias.bin"); + + std::string conv3_w_path = prefix + std::string("/alexnet_cifar10/conv3.bin"); + std::string conv3_b_path = prefix + std::string("/alexnet_cifar10/conv3_bias.bin"); + + std::string conv4_w_path = prefix + std::string("/alexnet_cifar10/conv4.bin"); + std::string conv4_b_path = prefix + std::string("/alexnet_cifar10/conv4_bias.bin"); + + std::string conv5_w_path = prefix + std::string("/alexnet_cifar10/conv5.bin"); + std::string conv5_b_path = prefix + std::string("/alexnet_cifar10/conv5_bias.bin"); + + std::string fc1_w_path = prefix + std::string("/alexnet_cifar10/fc1.bin"); + std::string fc1_b_path = prefix + std::string("/alexnet_cifar10/fc1_bias.bin"); + + printf("Reading Input Data from = %s \n", input_data_path.c_str()); + + uint8_t* labels = readLabels(labels_path.c_str(), test_batch_size); + void* x = readTrainedWeights(input_data_path.c_str(), float_type, + test_batch_size, 3, 32, 32); + + // Loading convolution weights + void* conv1_w = readTrainedWeights(conv1_w_path.c_str(), float_type, 64, 3, 11, 11); + void* conv1_b = readTrainedWeights(conv1_b_path.c_str(), float_type, 1, 64, 1, 1); + void* conv2_w = readTrainedWeights(conv2_w_path.c_str(), float_type, 192, 64, 5, 5); + void* conv2_b = readTrainedWeights(conv2_b_path.c_str(), float_type, 1, 192, 1, 1); + void* conv3_w = readTrainedWeights(conv3_w_path.c_str(), float_type, 384, 192, 3, 3); + void* conv3_b = readTrainedWeights(conv3_b_path.c_str(), float_type, 1, 384, 1, 1); + void* conv4_w = readTrainedWeights(conv4_w_path.c_str(), float_type, 256, 384, 3, 3); + void* conv4_b = readTrainedWeights(conv4_b_path.c_str(), float_type, 1, 256, 1, 1); + void* conv5_w = readTrainedWeights(conv5_w_path.c_str(), float_type, 256, 256, 3, 3); + void* conv5_b = readTrainedWeights(conv5_b_path.c_str(), float_type, 1, 256, 1, 1); + + // Loading FC layer weights + void* fc1_w = readTrainedWeights(fc1_w_path.c_str(), float_type, 1, 1, 4096, 10); + void* fc1_b = readTrainedWeights(fc1_b_path.c_str(), float_type, 1, 10, 1, 1); + + __visc__init(); + + RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); + args->x = x; + args->x_bytes = 0; + // Conv Layers params + args->conv1_w = conv1_w; + args->conv1_w_bytes = 0; + args->conv1_b = conv1_b; + args->conv1_b_bytes = 0; + + args->conv2_w = conv2_w; + args->conv2_w_bytes = 0; + args->conv2_b = conv2_b; + args->conv2_b_bytes = 0; + + args->conv3_w = conv3_w; + args->conv3_w_bytes = 0; + args->conv3_b = conv3_b; + args->conv3_b_bytes = 0; + + args->conv4_w = conv4_w; + args->conv4_w_bytes = 0; + args->conv4_b = conv4_b; + args->conv4_b_bytes = 0; + + args->conv5_w = conv5_w; + args->conv5_w_bytes = 0; + args->conv5_b = conv5_b; + args->conv5_b_bytes = 0; + + // FC Layers params + args->fc1_w = fc1_w; + args->fc1_w_bytes = 0; + args->fc1_b = fc1_b; + args->fc1_b_bytes = 0; + + void *dfg = __visc__launch(0, root, (void *)args); + + __visc__wait(dfg); + + // FIXME: Value returned in the wrong index!! + //void *r = static_cast<RootIn*>(args)->r.tensor; + void *result = static_cast<RootIn*>(args)->x; + hpvm_request_tensor(result, 0); + + __visc__cleanup(); + + computeAccuracy2(labels, test_batch_size, result); + + return 0; +} + + -- GitLab