diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile index 76f4b4791780d468f206aba64a0e66f4d4fc6f2a..86e93cb809336f51b18eaeb110b18dd666367e56 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/Makefile @@ -1,6 +1,6 @@ DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks # NOTE: can configure build directory -HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_new/ +HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_fresh/ CC = $(HPVM_BUILD_DIR)/bin/clang++ OPT = $(HPVM_BUILD_DIR)/bin/opt @@ -23,7 +23,7 @@ CCFLAGS += -DDEVICE=CUDNN_TARGET LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib -HPVM_LIB_DIR = /home/hsharif3/Gitlab/hpvm/build_new/lib +#HPVM_LIB_DIR = /home/hsharif3/Gitlab/hpvm/build_new/lib VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_CUDNN.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp index a8cab99d52acae5b88816da8d51539331a5590f2..ef6fda1d3194a97f4ec48f7ef8040744e24c7fa1 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet/src/alexnet.cpp @@ -1,538 +1,446 @@ -#include <iostream> -#include <cstdio> -#include <cstring> -#include <cinttypes> -#include <visc.h> -#include <tensorTypes.h> -#include <tensorUtils.h> - -using namespace std; +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <cstring> +#include <visc.h> +#include <tensorTypes.h> +#include <tensorUtils.h> + +void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 5, 5, 1, 1); + __visc__return(2, r, (size_t) 0); +} +void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} -/* DNN Layer 1 **/ -void tensorConvNode1(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_2_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); - void *r = __visc__tensor_convolution(t1, t2, 5, 5, 1, 1); - __visc__return(2, r, (size_t) 0); + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); } +void var_3_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); -void tensorAddNode1(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); + void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); + __visc__return(2, r, (size_t) 0); } +void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); -void tensorPoolNode1(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); + __visc__return(2, r, (size_t) 0); } -void tensorTanhNode1(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); } -/** End of Layer 1 **/ +void var_6_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); +} +void var_7_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); -/* DNN Layer 2 **/ -void tensorConvNode2(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); + __visc__return(2, r, (size_t) 0); +} + +void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - // X * W = t2 * t1 - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); + __visc__return(2, r, (size_t) 0); } +void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); -void tensorAddNode2(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); +void var_10_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); } -void tensorPoolNode2(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); + __visc__return(2, r, (size_t) 0); } -void tensorTanhNode2(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); } -/** End of Layer 2 **/ +void var_13_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); +} -/* DNN Layer 3 **/ -void tensorConvNode3(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - // X * W = t2 * t1 - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); + __visc__return(2, r, (size_t) 0); } -// FIXIT: Think if we can reuse the add, tanh, and pool nodes? -void tensorAddNode3(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); } -void tensorTanhNode3(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_16_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); + void* r = __visc__tensor_tanh(t1); + __visc__return(2, r, (size_t) 0); } -/** End of Layer 3 **/ +void var_17_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); + __visc__return(2, r, (size_t) 0); +} -/* DNN Layer 4 **/ -void tensorConvNode4(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - // X * W = t2 * t1 - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_mul(t1, t2); + __visc__return(2, r, (size_t) 0); } -// FIXIT: Think if we can reuse the add, tanh, and pool nodes? -void tensorAddNode4(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_19_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); } -void tensorTanhNode4(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_20_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); + void* r = __visc__tensor_softmax(t1); + __visc__return(2, r, (size_t) 0); } -/** End of Layer 4 **/ +void root(void* input, size_t input_bytes, + void* conv2d_1_w, size_t conv2d_1_w_bytes, + void* conv2d_1_b, size_t conv2d_1_b_bytes, + void* conv2d_2_w, size_t conv2d_2_w_bytes, + void* conv2d_2_b, size_t conv2d_2_b_bytes, + void* conv2d_3_w, size_t conv2d_3_w_bytes, + void* conv2d_3_b, size_t conv2d_3_b_bytes, + void* conv2d_4_w, size_t conv2d_4_w_bytes, + void* conv2d_4_b, size_t conv2d_4_b_bytes, + void* conv2d_5_w, size_t conv2d_5_w_bytes, + void* conv2d_5_b, size_t conv2d_5_b_bytes, + void* dense_1_w, size_t dense_1_w_bytes, + void* dense_1_b, size_t dense_1_b_bytes){ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(13, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, dense_1_w, dense_1_b, 0); -/*** DNN Layer 5 ****/ -void tensorConvNode5(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} + void* var_0 = __visc__createNodeND(0, var_0_node); + __visc__bindIn(var_0, 0, 0, 0); + __visc__bindIn(var_0, 1, 1, 0); + __visc__bindIn(var_0, 2, 2, 0); + __visc__bindIn(var_0, 3, 3, 0); -void tensorAddNode5(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + void* var_1 = __visc__createNodeND(0, var_1_node); - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} + __visc__edge(var_0, var_1, 1, 0, 0, 0); + __visc__edge(var_0, var_1, 1, 1, 1, 0); + __visc__bindIn(var_1, 4, 2, 0); + __visc__bindIn(var_1, 5, 3, 0); -void tensorPoolNode5(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + void* var_2 = __visc__createNodeND(0, var_2_node); - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} + __visc__edge(var_1, var_2, 1, 0, 0, 0); + __visc__edge(var_1, var_2, 1, 1, 1, 0); -void tensorTanhNode5(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + void* var_3 = __visc__createNodeND(0, var_3_node); - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); -} + __visc__edge(var_2, var_3, 1, 0, 0, 0); + __visc__edge(var_2, var_3, 1, 1, 1, 0); -/** End of Layer 5 **/ + void* var_4 = __visc__createNodeND(0, var_4_node); + __visc__edge(var_3, var_4, 1, 0, 0, 0); + __visc__edge(var_3, var_4, 1, 1, 1, 0); + __visc__bindIn(var_4, 6, 2, 0); + __visc__bindIn(var_4, 7, 3, 0); -/***** DNN Layer 6 - FC layer ****/ -void tensorMulNode6(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + void* var_5 = __visc__createNodeND(0, var_5_node); - // X * W = t2 * t1 - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} + __visc__edge(var_4, var_5, 1, 0, 0, 0); + __visc__edge(var_4, var_5, 1, 1, 1, 0); + __visc__bindIn(var_5, 8, 2, 0); + __visc__bindIn(var_5, 9, 3, 0); -void tensorAddNode6(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + void* var_6 = __visc__createNodeND(0, var_6_node); - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} + __visc__edge(var_5, var_6, 1, 0, 0, 0); + __visc__edge(var_5, var_6, 1, 1, 1, 0); + void* var_7 = __visc__createNodeND(0, var_7_node); -/** End of Layer 6 **/ - - - - -void root(void *x, size_t x_bytes, - void *conv1_w, size_t conv1_w_bytes, - void *conv1_b, size_t conv1_b_bytes, - void *conv2_w, size_t conv2_w_bytes, - void *conv2_b, size_t conv2_b_bytes, - void *conv3_w, size_t conv3_w_bytes, - void *conv3_b, size_t conv3_b_bytes, - void *conv4_w, size_t conv4_w_bytes, - void *conv4_b, size_t conv4_b_bytes, - void *conv5_w, size_t conv5_w_bytes, - void *conv5_b, size_t conv5_b_bytes, - void *fc1_w, size_t fc1_w_bytes, - void *fc1_b, size_t fc1_b_bytes){ - - __visc__hint(visc::CPU_TARGET); - __visc__attributes(13, x, conv1_w, conv1_b, conv2_w, conv2_b, conv3_w, conv3_b, - conv4_w, conv4_b, conv5_w, conv5_b, fc1_w, fc1_b, 0); - - // Conv1 Nodes - void *nodeConv1 = __visc__createNodeND(0, tensorConvNode1); - void *nodeAdd1 = __visc__createNodeND(0, tensorAddNode1); - void *nodeTanh1 = __visc__createNodeND(0, tensorTanhNode1); - void *nodePool1 = __visc__createNodeND(0, tensorPoolNode1); - // Conv2 Nodes - void *nodeConv2 = __visc__createNodeND(0, tensorConvNode2); - void *nodeAdd2 = __visc__createNodeND(0, tensorAddNode2); - void *nodeTanh2 = __visc__createNodeND(0, tensorTanhNode2); - void *nodePool2 = __visc__createNodeND(0, tensorPoolNode2); - // Conv3 Nodes - void *nodeConv3 = __visc__createNodeND(0, tensorConvNode3); - void *nodeAdd3 = __visc__createNodeND(0, tensorAddNode3); - void *nodeTanh3 = __visc__createNodeND(0, tensorTanhNode3); - // Conv4 Nodes - void *nodeConv4 = __visc__createNodeND(0, tensorConvNode4); - void *nodeAdd4 = __visc__createNodeND(0, tensorAddNode4); - void *nodeTanh4 = __visc__createNodeND(0, tensorTanhNode4); - // Conv5 Nodes - void *nodeConv5 = __visc__createNodeND(0, tensorConvNode5); - void *nodeAdd5 = __visc__createNodeND(0, tensorAddNode5); - void *nodeTanh5 = __visc__createNodeND(0, tensorTanhNode5); - void *nodePool5 = __visc__createNodeND(0, tensorPoolNode5); - // FC1 Nodes - void *nodeMul6 = __visc__createNodeND(0, tensorMulNode6); - void *nodeAdd6 = __visc__createNodeND(0, tensorAddNode6); - - //***** Conv Layer 1 *******/ - // node, src, dst, stream - __visc__bindIn(nodeConv1, 0, 0, 0); - __visc__bindIn(nodeConv1, 1, 1, 0); - __visc__bindIn(nodeConv1, 2, 2, 0); - __visc__bindIn(nodeConv1, 3, 3, 0); - - // node, node, type, src, dst, stream - __visc__edge(nodeConv1, nodeAdd1, 1, 0, 0, 0); - __visc__edge(nodeConv1, nodeAdd1, 1, 1, 1, 0); - - // parent_index, dest_index, bind_type - __visc__bindIn(nodeAdd1, 4, 2, 0); - __visc__bindIn(nodeAdd1, 5, 3, 0); - - // node, node, type, src, dst, stream - __visc__edge(nodeAdd1, nodeTanh1, 1, 0, 0, 0); - __visc__edge(nodeAdd1, nodeTanh1, 1, 1, 1, 0); - - // node, node, type, src, dst, stream - __visc__edge(nodeTanh1, nodePool1, 1, 0, 0, 0); - __visc__edge(nodeTanh1, nodePool1, 1, 1, 1, 0); - - - /**** Conv Layer 2 ****/ - // ConvOp2 - __visc__bindIn(nodeConv2, 6, 2, 0); - __visc__bindIn(nodeConv2, 7, 3, 0); - - __visc__edge(nodePool1, nodeConv2, 1, 0, 0, 0); - __visc__edge(nodePool1, nodeConv2, 1, 1, 1, 0); - - // AddOp2 - __visc__bindIn(nodeAdd2, 8, 2, 0); - __visc__bindIn(nodeAdd2, 9, 3, 0); - - __visc__edge(nodeConv2, nodeAdd2, 1, 0, 0, 0); - __visc__edge(nodeConv2, nodeAdd2, 1, 1, 1, 0); - - // TanhOp2 - __visc__edge(nodeAdd2, nodeTanh2, 1, 0, 0, 0); - __visc__edge(nodeAdd2, nodeTanh2, 1, 1, 1, 0); - - // PoolOp2 - __visc__edge(nodeTanh2, nodePool2, 1, 0, 0, 0); - __visc__edge(nodeTanh2, nodePool2, 1, 1, 1, 0); - - - /**** Conv Layer 3 ****/ - // ConvOp3 - __visc__bindIn(nodeConv3, 10, 2, 0); - __visc__bindIn(nodeConv3, 11, 3, 0); - - __visc__edge(nodePool2, nodeConv3, 1, 0, 0, 0); - __visc__edge(nodePool2, nodeConv3, 1, 1, 1, 0); - - // AddOp3 - __visc__bindIn(nodeAdd3, 12, 2, 0); - __visc__bindIn(nodeAdd3, 13, 3, 0); - - __visc__edge(nodeConv3, nodeAdd3, 1, 0, 0, 0); - __visc__edge(nodeConv3, nodeAdd3, 1, 1, 1, 0); - - // TanhOp3 - __visc__edge(nodeAdd3, nodeTanh3, 1, 0, 0, 0); - __visc__edge(nodeAdd3, nodeTanh3, 1, 1, 1, 0); - - - /**** Conv Layer 4 ****/ - // ConvOp4 - __visc__bindIn(nodeConv4, 14, 2, 0); - __visc__bindIn(nodeConv4, 15, 3, 0); - - __visc__edge(nodeTanh3, nodeConv4, 1, 0, 0, 0); - __visc__edge(nodeTanh3, nodeConv4, 1, 1, 1, 0); - - // AddOp4 - __visc__bindIn(nodeAdd4, 16, 2, 0); - __visc__bindIn(nodeAdd4, 17, 3, 0); - - __visc__edge(nodeConv4, nodeAdd4, 1, 0, 0, 0); - __visc__edge(nodeConv4, nodeAdd4, 1, 1, 1, 0); - - // TanhOp4 - __visc__edge(nodeAdd4, nodeTanh4, 1, 0, 0, 0); - __visc__edge(nodeAdd4, nodeTanh4, 1, 1, 1, 0); - - - /**** Conv Layer 5 ****/ - // ConvOp5 - __visc__bindIn(nodeConv5, 18, 2, 0); - __visc__bindIn(nodeConv5, 19, 3, 0); - - __visc__edge(nodeTanh4, nodeConv5, 1, 0, 0, 0); - __visc__edge(nodeTanh4, nodeConv5, 1, 1, 1, 0); - - // AddOp5 - __visc__bindIn(nodeAdd5, 20, 2, 0); - __visc__bindIn(nodeAdd5, 21, 3, 0); - - __visc__edge(nodeConv5, nodeAdd5, 1, 0, 0, 0); - __visc__edge(nodeConv5, nodeAdd5, 1, 1, 1, 0); + __visc__edge(var_6, var_7, 1, 0, 0, 0); + __visc__edge(var_6, var_7, 1, 1, 1, 0); - // TanhOp5 - __visc__edge(nodeAdd5, nodeTanh5, 1, 0, 0, 0); - __visc__edge(nodeAdd5, nodeTanh5, 1, 1, 1, 0); + void* var_8 = __visc__createNodeND(0, var_8_node); - // PoolOp5 - __visc__edge(nodeTanh5, nodePool5, 1, 0, 0, 0); - __visc__edge(nodeTanh5, nodePool5, 1, 1, 1, 0); + __visc__edge(var_7, var_8, 1, 0, 0, 0); + __visc__edge(var_7, var_8, 1, 1, 1, 0); + __visc__bindIn(var_8, 10, 2, 0); + __visc__bindIn(var_8, 11, 3, 0); - + void* var_9 = __visc__createNodeND(0, var_9_node); - /**** FC Layer 1 ****/ - // MulOp6 - __visc__bindIn(nodeMul6, 22, 2, 0); - __visc__bindIn(nodeMul6, 23, 3, 0); - - __visc__edge(nodePool5, nodeMul6, 1, 0, 0, 0); - __visc__edge(nodePool5, nodeMul6, 1, 1, 1, 0); + __visc__edge(var_8, var_9, 1, 0, 0, 0); + __visc__edge(var_8, var_9, 1, 1, 1, 0); + __visc__bindIn(var_9, 12, 2, 0); + __visc__bindIn(var_9, 13, 3, 0); - // AddOp6 - __visc__bindIn(nodeAdd6, 24, 2, 0); - __visc__bindIn(nodeAdd6, 25, 3, 0); - - __visc__edge(nodeMul6, nodeAdd6, 1, 0, 0, 0); - __visc__edge(nodeMul6, nodeAdd6, 1, 1, 1, 0); + void* var_10 = __visc__createNodeND(0, var_10_node); - /***** Output Binding ****/ - __visc__bindOut(nodeAdd6, 0, 0, 0); - __visc__bindOut(nodeAdd6, 1, 1, 0); + __visc__edge(var_9, var_10, 1, 0, 0, 0); + __visc__edge(var_9, var_10, 1, 1, 1, 0); -} + void* var_11 = __visc__createNodeND(0, var_11_node); + + __visc__edge(var_10, var_11, 1, 0, 0, 0); + __visc__edge(var_10, var_11, 1, 1, 1, 0); + __visc__bindIn(var_11, 14, 2, 0); + __visc__bindIn(var_11, 15, 3, 0); + + void* var_12 = __visc__createNodeND(0, var_12_node); + + __visc__edge(var_11, var_12, 1, 0, 0, 0); + __visc__edge(var_11, var_12, 1, 1, 1, 0); + __visc__bindIn(var_12, 16, 2, 0); + __visc__bindIn(var_12, 17, 3, 0); + void* var_13 = __visc__createNodeND(0, var_13_node); + + __visc__edge(var_12, var_13, 1, 0, 0, 0); + __visc__edge(var_12, var_13, 1, 1, 1, 0); + + void* var_14 = __visc__createNodeND(0, var_14_node); + + __visc__edge(var_13, var_14, 1, 0, 0, 0); + __visc__edge(var_13, var_14, 1, 1, 1, 0); + __visc__bindIn(var_14, 18, 2, 0); + __visc__bindIn(var_14, 19, 3, 0); + + void* var_15 = __visc__createNodeND(0, var_15_node); + + __visc__edge(var_14, var_15, 1, 0, 0, 0); + __visc__edge(var_14, var_15, 1, 1, 1, 0); + __visc__bindIn(var_15, 20, 2, 0); + __visc__bindIn(var_15, 21, 3, 0); + + void* var_16 = __visc__createNodeND(0, var_16_node); + + __visc__edge(var_15, var_16, 1, 0, 0, 0); + __visc__edge(var_15, var_16, 1, 1, 1, 0); + + void* var_17 = __visc__createNodeND(0, var_17_node); + + __visc__edge(var_16, var_17, 1, 0, 0, 0); + __visc__edge(var_16, var_17, 1, 1, 1, 0); + + void* var_18 = __visc__createNodeND(0, var_18_node); + + __visc__edge(var_17, var_18, 1, 0, 0, 0); + __visc__edge(var_17, var_18, 1, 1, 1, 0); + __visc__bindIn(var_18, 22, 2, 0); + __visc__bindIn(var_18, 23, 3, 0); + + void* var_19 = __visc__createNodeND(0, var_19_node); + + __visc__edge(var_18, var_19, 1, 0, 0, 0); + __visc__edge(var_18, var_19, 1, 1, 1, 0); + __visc__bindIn(var_19, 24, 2, 0); + __visc__bindIn(var_19, 25, 3, 0); + + void* var_20 = __visc__createNodeND(0, var_20_node); + + __visc__edge(var_19, var_20, 1, 0, 0, 0); + __visc__edge(var_19, var_20, 1, 1, 1, 0); + + __visc__bindOut(var_20, 0, 0, 0); + __visc__bindOut(var_20, 1, 1, 0); + +} -// Return type for the nodes struct ret_t { - void *tensor; - size_t bytes; -}; + void* tensor; + size_t bytes; +}; typedef struct __attribute__((__packed__)) { - void *x; - size_t x_bytes; - // 1st Layer parameters - void* conv1_w; - size_t conv1_w_bytes; - void* conv1_b; - size_t conv1_b_bytes; - // 2nd Layer parameters - void* conv2_w; - size_t conv2_w_bytes; - void* conv2_b; - size_t conv2_b_bytes; - // 3rd Layer parameters - void* conv3_w; - size_t conv3_w_bytes; - void* conv3_b; - size_t conv3_b_bytes; - // 4th Layer parameters - void* conv4_w; - size_t conv4_w_bytes; - void* conv4_b; - size_t conv4_b_bytes; - // 5th Layer parameters - void* conv5_w; - size_t conv5_w_bytes; - void* conv5_b; - size_t conv5_b_bytes; - // 6th Layer parameters - void* fc1_w; - size_t fc1_w_bytes; - void* fc1_b; - size_t fc1_b_bytes; - - struct ret_t r; + void* input; + size_t input_bytes; + void* conv2d_1_w; + size_t conv2d_1_w_bytes; + void* conv2d_1_b; + size_t conv2d_1_b_bytes; + void* conv2d_2_w; + size_t conv2d_2_w_bytes; + void* conv2d_2_b; + size_t conv2d_2_b_bytes; + void* conv2d_3_w; + size_t conv2d_3_w_bytes; + void* conv2d_3_b; + size_t conv2d_3_b_bytes; + void* conv2d_4_w; + size_t conv2d_4_w_bytes; + void* conv2d_4_b; + size_t conv2d_4_b_bytes; + void* conv2d_5_w; + size_t conv2d_5_w_bytes; + void* conv2d_5_b; + size_t conv2d_5_b_bytes; + void* dense_1_w; + size_t dense_1_w_bytes; + void* dense_1_b; + size_t dense_1_b_bytes; + + struct ret_t r; } RootIn; -int main() { - - int test_batch_size = 5000; - std::string prefix = "../../../../../../projects/hpvm-tensor-rt/model_params"; - - std::string input_data_path = prefix + std::string("/alexnet_cifar10/norm_cifar_input.bin"); - std::string labels_path = prefix + std::string("/alexnet_cifar10/test_labels.bin"); - - std::string conv1_w_path = prefix + std::string("/alexnet_cifar10/conv1.bin"); - std::string conv1_b_path = prefix + std::string("/alexnet_cifar10/conv1_bias.bin"); - - std::string conv2_w_path = prefix + std::string("/alexnet_cifar10/conv2.bin"); - std::string conv2_b_path = prefix + std::string("/alexnet_cifar10/conv2_bias.bin"); - - std::string conv3_w_path = prefix + std::string("/alexnet_cifar10/conv3.bin"); - std::string conv3_b_path = prefix + std::string("/alexnet_cifar10/conv3_bias.bin"); - - std::string conv4_w_path = prefix + std::string("/alexnet_cifar10/conv4.bin"); - std::string conv4_b_path = prefix + std::string("/alexnet_cifar10/conv4_bias.bin"); - - std::string conv5_w_path = prefix + std::string("/alexnet_cifar10/conv5.bin"); - std::string conv5_b_path = prefix + std::string("/alexnet_cifar10/conv5_bias.bin"); - - std::string fc1_w_path = prefix + std::string("/alexnet_cifar10/fc1.bin"); - std::string fc1_b_path = prefix + std::string("/alexnet_cifar10/fc1_bias.bin"); - - printf("Reading Input Data from = %s \n", input_data_path.c_str()); - - uint8_t* labels = readLabels(labels_path.c_str(), test_batch_size); - void* x = readTrainedWeights(input_data_path.c_str(), float_type, - test_batch_size, 3, 32, 32); - - // Loading convolution weights - void* conv1_w = readTrainedWeights(conv1_w_path.c_str(), float_type, 64, 3, 11, 11); - void* conv1_b = readTrainedWeights(conv1_b_path.c_str(), float_type, 1, 64, 1, 1); - void* conv2_w = readTrainedWeights(conv2_w_path.c_str(), float_type, 192, 64, 5, 5); - void* conv2_b = readTrainedWeights(conv2_b_path.c_str(), float_type, 1, 192, 1, 1); - void* conv3_w = readTrainedWeights(conv3_w_path.c_str(), float_type, 384, 192, 3, 3); - void* conv3_b = readTrainedWeights(conv3_b_path.c_str(), float_type, 1, 384, 1, 1); - void* conv4_w = readTrainedWeights(conv4_w_path.c_str(), float_type, 256, 384, 3, 3); - void* conv4_b = readTrainedWeights(conv4_b_path.c_str(), float_type, 1, 256, 1, 1); - void* conv5_w = readTrainedWeights(conv5_w_path.c_str(), float_type, 256, 256, 3, 3); - void* conv5_b = readTrainedWeights(conv5_b_path.c_str(), float_type, 1, 256, 1, 1); - - // Loading FC layer weights - void* fc1_w = readTrainedWeights(fc1_w_path.c_str(), float_type, 1, 1, 4096, 10); - void* fc1_b = readTrainedWeights(fc1_b_path.c_str(), float_type, 1, 10, 1, 1); - - __visc__init(); - - RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - args->x = x; - args->x_bytes = 0; - // Conv Layers params - args->conv1_w = conv1_w; - args->conv1_w_bytes = 0; - args->conv1_b = conv1_b; - args->conv1_b_bytes = 0; - - args->conv2_w = conv2_w; - args->conv2_w_bytes = 0; - args->conv2_b = conv2_b; - args->conv2_b_bytes = 0; - - args->conv3_w = conv3_w; - args->conv3_w_bytes = 0; - args->conv3_b = conv3_b; - args->conv3_b_bytes = 0; - - args->conv4_w = conv4_w; - args->conv4_w_bytes = 0; - args->conv4_b = conv4_b; - args->conv4_b_bytes = 0; - - args->conv5_w = conv5_w; - args->conv5_w_bytes = 0; - args->conv5_b = conv5_b; - args->conv5_b_bytes = 0; - - // FC Layers params - args->fc1_w = fc1_w; - args->fc1_w_bytes = 0; - args->fc1_b = fc1_b; - args->fc1_b_bytes = 0; - - void *dfg = __visc__launch(0, root, (void *)args); - - __visc__wait(dfg); - - // FIXME: Value returned in the wrong index!! - //void *r = static_cast<RootIn*>(args)->r.tensor; - void *result = static_cast<RootIn*>(args)->x; - hpvm_request_tensor(result, 0); - - __visc__cleanup(); - - computeAccuracy2(labels, test_batch_size, result); - - return 0; -} - - +int main(){ + + std::string dir_prefix = std::string("../../../../../../projects/hpvm-tensor-rt/model_params/alexnet_cifar10_front/"); + //std::string dir_prefix = std::string("../../../../../../projects/hpvm-tensor-rt/model_params/alexnet_cifar10/"); + + std::string input_path = dir_prefix + std::string("input.bin"); + void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); + std::string labels_path = dir_prefix + std::string("labels.bin"); + uint8_t* labels = readLabels(labels_path.c_str(),10000); + std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); + void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); + std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); + void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); + std::string conv2d_2_w_path = dir_prefix + std::string("conv3.bin"); + void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); + std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias3.bin"); + void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); + std::string conv2d_3_w_path = dir_prefix + std::string("conv6.bin"); + void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); + std::string conv2d_3_b_path = dir_prefix + std::string("conv_bias6.bin"); + void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); + std::string conv2d_4_w_path = dir_prefix + std::string("conv7.bin"); + void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); + std::string conv2d_4_b_path = dir_prefix + std::string("conv_bias7.bin"); + void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); + std::string conv2d_5_w_path = dir_prefix + std::string("conv8.bin"); + void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); + std::string conv2d_5_b_path = dir_prefix + std::string("conv_bias8.bin"); + void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); + std::string dense_1_w_path = dir_prefix + std::string("fc12.bin"); + void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); + std::string dense_1_b_path = dir_prefix + std::string("fc_bias12.bin"); + void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); + + + __visc__init(); + RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); + + args->input = input; + args->input_bytes = 0; + args->conv2d_1_w = conv2d_1_w; + args->conv2d_1_w_bytes = 0; + args->conv2d_1_b = conv2d_1_b; + args->conv2d_1_b_bytes = 0; + args->conv2d_2_w = conv2d_2_w; + args->conv2d_2_w_bytes = 0; + args->conv2d_2_b = conv2d_2_b; + args->conv2d_2_b_bytes = 0; + args->conv2d_3_w = conv2d_3_w; + args->conv2d_3_w_bytes = 0; + args->conv2d_3_b = conv2d_3_b; + args->conv2d_3_b_bytes = 0; + args->conv2d_4_w = conv2d_4_w; + args->conv2d_4_w_bytes = 0; + args->conv2d_4_b = conv2d_4_b; + args->conv2d_4_b_bytes = 0; + args->conv2d_5_w = conv2d_5_w; + args->conv2d_5_w_bytes = 0; + args->conv2d_5_b = conv2d_5_b; + args->conv2d_5_b_bytes = 0; + args->dense_1_w = dense_1_w; + args->dense_1_w_bytes = 0; + args->dense_1_b = dense_1_b; + args->dense_1_b_bytes = 0; + + void* dfg = __visc__launch(0, root, (void*) args); + + __visc__wait(dfg); + + void *result = static_cast<RootIn*>(args)->input; + hpvm_request_tensor(result, 0); + + __visc__cleanup(); + computeAccuracy2(labels, 10000, result); + return 0; + +} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet_front/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet_front/Makefile deleted file mode 100644 index 86e93cb809336f51b18eaeb110b18dd666367e56..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet_front/Makefile +++ /dev/null @@ -1,57 +0,0 @@ -DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks -# NOTE: can configure build directory -HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_fresh/ - -CC = $(HPVM_BUILD_DIR)/bin/clang++ -OPT = $(HPVM_BUILD_DIR)/bin/opt -LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis -LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link -LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include - - -SRC_DIR = src -BUILD_DIR = build -APP = alexnet - -TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include -TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include -TENSOR_LIB_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a -TENSOR_AUTOTUNER_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_autotuner.a - -CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH) -fno-exceptions -ffast-math -std=c++11 -O3 -CCFLAGS += -DDEVICE=CUDNN_TARGET -LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL - -HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib -#HPVM_LIB_DIR = /home/hsharif3/Gitlab/hpvm/build_new/lib - - -VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_CUDNN.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG - -TARGET = $(BUILD_DIR)/$(APP).opt.bc -SOURCES = $(SRC_DIR)/$(APP).cpp -VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll - -#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll) -.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll -default: $(BUILD_DIR) $(TARGET) - - -$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp - $(CC) $(CC_FLAGS) -emit-llvm -S -o $@ $< - -#-visc-timers-gen -$(BUILD_DIR)/%.visc.ll: $(BUILD_DIR)/%.ll - $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $< -S -o $@ - -$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.visc.ll - $(OPT) $(VISC_OPTFLAGS) $< -o $@ - $(LLVM_LINK) $@ $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_linked.bc - $(CC) $(BUILD_DIR)/$(APP)_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_linked $(LINKER_FLAGS) - $(CC) $(BUILD_DIR)/$(APP)_linked.bc $(TENSOR_AUTOTUNER_DIR) -o $(BUILD_DIR)/$(APP)_tune $(LINKER_FLAGS) - -$(BUILD_DIR): - mkdir -p $@ - -clean: - rm -rf $(BUILD_DIR) diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet_front/src/alexnet.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet_front/src/alexnet.cpp deleted file mode 100644 index ef6fda1d3194a97f4ec48f7ef8040744e24c7fa1..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/alexnet_front/src/alexnet.cpp +++ /dev/null @@ -1,446 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> -#include <visc.h> -#include <tensorTypes.h> -#include <tensorUtils.h> - -void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 5, 5, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_2_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_3_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_6_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_7_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_9_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_10_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_12_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_13_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_16_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_17_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_19_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_20_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_softmax(t1); - __visc__return(2, r, (size_t) 0); -} - -void root(void* input, size_t input_bytes, - void* conv2d_1_w, size_t conv2d_1_w_bytes, - void* conv2d_1_b, size_t conv2d_1_b_bytes, - void* conv2d_2_w, size_t conv2d_2_w_bytes, - void* conv2d_2_b, size_t conv2d_2_b_bytes, - void* conv2d_3_w, size_t conv2d_3_w_bytes, - void* conv2d_3_b, size_t conv2d_3_b_bytes, - void* conv2d_4_w, size_t conv2d_4_w_bytes, - void* conv2d_4_b, size_t conv2d_4_b_bytes, - void* conv2d_5_w, size_t conv2d_5_w_bytes, - void* conv2d_5_b, size_t conv2d_5_b_bytes, - void* dense_1_w, size_t dense_1_w_bytes, - void* dense_1_b, size_t dense_1_b_bytes){ - - - __visc__hint(visc::CPU_TARGET); - __visc__attributes(13, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, dense_1_w, dense_1_b, 0); - - - void* var_0 = __visc__createNodeND(0, var_0_node); - - __visc__bindIn(var_0, 0, 0, 0); - __visc__bindIn(var_0, 1, 1, 0); - __visc__bindIn(var_0, 2, 2, 0); - __visc__bindIn(var_0, 3, 3, 0); - - void* var_1 = __visc__createNodeND(0, var_1_node); - - __visc__edge(var_0, var_1, 1, 0, 0, 0); - __visc__edge(var_0, var_1, 1, 1, 1, 0); - __visc__bindIn(var_1, 4, 2, 0); - __visc__bindIn(var_1, 5, 3, 0); - - void* var_2 = __visc__createNodeND(0, var_2_node); - - __visc__edge(var_1, var_2, 1, 0, 0, 0); - __visc__edge(var_1, var_2, 1, 1, 1, 0); - - void* var_3 = __visc__createNodeND(0, var_3_node); - - __visc__edge(var_2, var_3, 1, 0, 0, 0); - __visc__edge(var_2, var_3, 1, 1, 1, 0); - - void* var_4 = __visc__createNodeND(0, var_4_node); - - __visc__edge(var_3, var_4, 1, 0, 0, 0); - __visc__edge(var_3, var_4, 1, 1, 1, 0); - __visc__bindIn(var_4, 6, 2, 0); - __visc__bindIn(var_4, 7, 3, 0); - - void* var_5 = __visc__createNodeND(0, var_5_node); - - __visc__edge(var_4, var_5, 1, 0, 0, 0); - __visc__edge(var_4, var_5, 1, 1, 1, 0); - __visc__bindIn(var_5, 8, 2, 0); - __visc__bindIn(var_5, 9, 3, 0); - - void* var_6 = __visc__createNodeND(0, var_6_node); - - __visc__edge(var_5, var_6, 1, 0, 0, 0); - __visc__edge(var_5, var_6, 1, 1, 1, 0); - - void* var_7 = __visc__createNodeND(0, var_7_node); - - __visc__edge(var_6, var_7, 1, 0, 0, 0); - __visc__edge(var_6, var_7, 1, 1, 1, 0); - - void* var_8 = __visc__createNodeND(0, var_8_node); - - __visc__edge(var_7, var_8, 1, 0, 0, 0); - __visc__edge(var_7, var_8, 1, 1, 1, 0); - __visc__bindIn(var_8, 10, 2, 0); - __visc__bindIn(var_8, 11, 3, 0); - - void* var_9 = __visc__createNodeND(0, var_9_node); - - __visc__edge(var_8, var_9, 1, 0, 0, 0); - __visc__edge(var_8, var_9, 1, 1, 1, 0); - __visc__bindIn(var_9, 12, 2, 0); - __visc__bindIn(var_9, 13, 3, 0); - - void* var_10 = __visc__createNodeND(0, var_10_node); - - __visc__edge(var_9, var_10, 1, 0, 0, 0); - __visc__edge(var_9, var_10, 1, 1, 1, 0); - - void* var_11 = __visc__createNodeND(0, var_11_node); - - __visc__edge(var_10, var_11, 1, 0, 0, 0); - __visc__edge(var_10, var_11, 1, 1, 1, 0); - __visc__bindIn(var_11, 14, 2, 0); - __visc__bindIn(var_11, 15, 3, 0); - - void* var_12 = __visc__createNodeND(0, var_12_node); - - __visc__edge(var_11, var_12, 1, 0, 0, 0); - __visc__edge(var_11, var_12, 1, 1, 1, 0); - __visc__bindIn(var_12, 16, 2, 0); - __visc__bindIn(var_12, 17, 3, 0); - - void* var_13 = __visc__createNodeND(0, var_13_node); - - __visc__edge(var_12, var_13, 1, 0, 0, 0); - __visc__edge(var_12, var_13, 1, 1, 1, 0); - - void* var_14 = __visc__createNodeND(0, var_14_node); - - __visc__edge(var_13, var_14, 1, 0, 0, 0); - __visc__edge(var_13, var_14, 1, 1, 1, 0); - __visc__bindIn(var_14, 18, 2, 0); - __visc__bindIn(var_14, 19, 3, 0); - - void* var_15 = __visc__createNodeND(0, var_15_node); - - __visc__edge(var_14, var_15, 1, 0, 0, 0); - __visc__edge(var_14, var_15, 1, 1, 1, 0); - __visc__bindIn(var_15, 20, 2, 0); - __visc__bindIn(var_15, 21, 3, 0); - - void* var_16 = __visc__createNodeND(0, var_16_node); - - __visc__edge(var_15, var_16, 1, 0, 0, 0); - __visc__edge(var_15, var_16, 1, 1, 1, 0); - - void* var_17 = __visc__createNodeND(0, var_17_node); - - __visc__edge(var_16, var_17, 1, 0, 0, 0); - __visc__edge(var_16, var_17, 1, 1, 1, 0); - - void* var_18 = __visc__createNodeND(0, var_18_node); - - __visc__edge(var_17, var_18, 1, 0, 0, 0); - __visc__edge(var_17, var_18, 1, 1, 1, 0); - __visc__bindIn(var_18, 22, 2, 0); - __visc__bindIn(var_18, 23, 3, 0); - - void* var_19 = __visc__createNodeND(0, var_19_node); - - __visc__edge(var_18, var_19, 1, 0, 0, 0); - __visc__edge(var_18, var_19, 1, 1, 1, 0); - __visc__bindIn(var_19, 24, 2, 0); - __visc__bindIn(var_19, 25, 3, 0); - - void* var_20 = __visc__createNodeND(0, var_20_node); - - __visc__edge(var_19, var_20, 1, 0, 0, 0); - __visc__edge(var_19, var_20, 1, 1, 1, 0); - - __visc__bindOut(var_20, 0, 0, 0); - __visc__bindOut(var_20, 1, 1, 0); - -} - -struct ret_t { - void* tensor; - size_t bytes; -}; - -typedef struct __attribute__((__packed__)) { - void* input; - size_t input_bytes; - void* conv2d_1_w; - size_t conv2d_1_w_bytes; - void* conv2d_1_b; - size_t conv2d_1_b_bytes; - void* conv2d_2_w; - size_t conv2d_2_w_bytes; - void* conv2d_2_b; - size_t conv2d_2_b_bytes; - void* conv2d_3_w; - size_t conv2d_3_w_bytes; - void* conv2d_3_b; - size_t conv2d_3_b_bytes; - void* conv2d_4_w; - size_t conv2d_4_w_bytes; - void* conv2d_4_b; - size_t conv2d_4_b_bytes; - void* conv2d_5_w; - size_t conv2d_5_w_bytes; - void* conv2d_5_b; - size_t conv2d_5_b_bytes; - void* dense_1_w; - size_t dense_1_w_bytes; - void* dense_1_b; - size_t dense_1_b_bytes; - - struct ret_t r; -} -RootIn; - -int main(){ - - std::string dir_prefix = std::string("../../../../../../projects/hpvm-tensor-rt/model_params/alexnet_cifar10_front/"); - //std::string dir_prefix = std::string("../../../../../../projects/hpvm-tensor-rt/model_params/alexnet_cifar10/"); - - std::string input_path = dir_prefix + std::string("input.bin"); - void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); - std::string labels_path = dir_prefix + std::string("labels.bin"); - uint8_t* labels = readLabels(labels_path.c_str(),10000); - std::string conv2d_1_w_path = dir_prefix + std::string("conv0.bin"); - void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,11,11); - std::string conv2d_1_b_path = dir_prefix + std::string("conv_bias0.bin"); - void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); - std::string conv2d_2_w_path = dir_prefix + std::string("conv3.bin"); - void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,192,64,5,5); - std::string conv2d_2_b_path = dir_prefix + std::string("conv_bias3.bin"); - void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,192,1,1); - std::string conv2d_3_w_path = dir_prefix + std::string("conv6.bin"); - void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,384,192,3,3); - std::string conv2d_3_b_path = dir_prefix + std::string("conv_bias6.bin"); - void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,384,1,1); - std::string conv2d_4_w_path = dir_prefix + std::string("conv7.bin"); - void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,256,384,3,3); - std::string conv2d_4_b_path = dir_prefix + std::string("conv_bias7.bin"); - void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,256,1,1); - std::string conv2d_5_w_path = dir_prefix + std::string("conv8.bin"); - void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,256,3,3); - std::string conv2d_5_b_path = dir_prefix + std::string("conv_bias8.bin"); - void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); - std::string dense_1_w_path = dir_prefix + std::string("fc12.bin"); - void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,4096,10); - std::string dense_1_b_path = dir_prefix + std::string("fc_bias12.bin"); - void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,10,1,1); - - - __visc__init(); - RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - - args->input = input; - args->input_bytes = 0; - args->conv2d_1_w = conv2d_1_w; - args->conv2d_1_w_bytes = 0; - args->conv2d_1_b = conv2d_1_b; - args->conv2d_1_b_bytes = 0; - args->conv2d_2_w = conv2d_2_w; - args->conv2d_2_w_bytes = 0; - args->conv2d_2_b = conv2d_2_b; - args->conv2d_2_b_bytes = 0; - args->conv2d_3_w = conv2d_3_w; - args->conv2d_3_w_bytes = 0; - args->conv2d_3_b = conv2d_3_b; - args->conv2d_3_b_bytes = 0; - args->conv2d_4_w = conv2d_4_w; - args->conv2d_4_w_bytes = 0; - args->conv2d_4_b = conv2d_4_b; - args->conv2d_4_b_bytes = 0; - args->conv2d_5_w = conv2d_5_w; - args->conv2d_5_w_bytes = 0; - args->conv2d_5_b = conv2d_5_b; - args->conv2d_5_b_bytes = 0; - args->dense_1_w = dense_1_w; - args->dense_1_w_bytes = 0; - args->dense_1_b = dense_1_b; - args->dense_1_b_bytes = 0; - - void* dfg = __visc__launch(0, root, (void*) args); - - __visc__wait(dfg); - - void *result = static_cast<RootIn*>(args)->input; - hpvm_request_tensor(result, 0); - - __visc__cleanup(); - computeAccuracy2(labels, 10000, result); - return 0; - -} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/fc_mnist_3/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fc_mnist_3/Makefile deleted file mode 100644 index 17e0c11fec7b49617c2ad5a68cdc5cb3e394df7a..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/fc_mnist_3/Makefile +++ /dev/null @@ -1,36 +0,0 @@ -DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks -CC = $(LLVM_SRC_ROOT)/../build/bin/clang++ -OPT = $(LLVM_SRC_ROOT)/../build/bin/opt -LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include - -SRC_DIR = src -BUILD_DIR = build -APP = fc_mnist_3 - -TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include - -CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -fno-exceptions -ffast-math -std=c++11 -O3 - -VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_CUDNN.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG - -TARGET = $(BUILD_DIR)/$(APP).opt.ll -SOURCES = $(SRC_DIR)/$(APP).cpp -#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll) -.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll -default: $(BUILD_DIR) $(TARGET) - - -$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp - $(CC) $(CC_FLAGS) -emit-llvm -S -o $@ $< - -$(BUILD_DIR)/%.visc.ll: $(BUILD_DIR)/%.ll - $(OPT) -load LLVMGenVISC.so -genvisc -globaldce -visc-timers-gen $< -S -o $@ - -$(BUILD_DIR)/%.opt.ll: $(BUILD_DIR)/%.visc.ll - $(OPT) $(VISC_OPTFLAGS) $< -o $@ - -$(BUILD_DIR): - mkdir -p $@ - -clean: - rm -rf $(BUILD_DIR) diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/fc_mnist_3/src/fc_mnist_3.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fc_mnist_3/src/fc_mnist_3.cpp deleted file mode 100644 index d1848504744af2897a1e741be676fa1338764ea1..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/fc_mnist_3/src/fc_mnist_3.cpp +++ /dev/null @@ -1,305 +0,0 @@ -#include <iostream> -#include <cstdio> -#include <cstring> -#include <cinttypes> -#include <visc.h> -#include <tensorTypes.h> -#include <tensorUtils.h> - -using namespace std; - -void L1_FC(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void L1_Bias(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, t1, t2, 0); - - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void L1_Act(void *t, size_t bytest) { - __visc__hint(visc::DEVICE); - __visc__attributes(1, t, 0); - - void* r = __visc__tensor_relu(t); - __visc__return(2, r, (size_t) 0); -} - -void L2_FC(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void L2_Bias(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, t1, t2, 0); - - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void L2_Act(void *t, size_t bytest) { - __visc__hint(visc::DEVICE); - __visc__attributes(1, t, 0); - - void* r = __visc__tensor_relu(t); - __visc__return(2, r, (size_t) 0); -} - -void L3_FC(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void L3_Bias(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, t1, t2, 0); - - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void L3_Act(void *t, size_t bytest) { - __visc__hint(visc::DEVICE); - __visc__attributes(1, t, 0); - - void* r = __visc__tensor_relu(t); - __visc__return(2, r, (size_t) 0); -} - -void L4_FC(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void L4_Bias(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::DEVICE); - __visc__attributes(2, t1, t2, 0); - - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void L4_SoftMax(void *t, size_t bytest) { - __visc__hint(visc::DEVICE); - __visc__attributes(1, t, 0); - - void* r = __visc__tensor_softmax(t); - __visc__return(2, r, (size_t) 0); -} - -void root(void* input, size_t bytes_input, // 0 - void* fc1_weights, size_t bytes_fc1_weights, // 2 - void* fc1_bias, size_t bytes_fc1_bias, // 4 - void* fc2_weights, size_t bytes_fc2_weights, // 6 - void* fc2_bias, size_t bytes_fc2_bias, // 8 - void* fc3_weights, size_t bytes_fc3_weights, // 10 - void* fc3_bias, size_t bytes_fc3_bias, // 12 - void* fc4_weights, size_t bytes_fc4_weights, // 14 - void* fc4_bias, size_t bytes_fc4_bias // 16 - ) { - - __visc__hint(visc::CPU_TARGET); - __visc__attributes(9, input, fc1_weights, fc1_bias, fc2_weights, fc2_bias, fc3_weights, fc3_bias, fc4_weights, fc4_bias, 0); - - void *node_L1_FC = __visc__createNodeND(0, L1_FC); - void *node_L1_Bias = __visc__createNodeND(0, L1_Bias); - void *node_L1_Act = __visc__createNodeND(0, L1_Act); - void *node_L2_FC = __visc__createNodeND(0, L2_FC); - void *node_L2_Bias = __visc__createNodeND(0, L2_Bias); - void *node_L2_Act = __visc__createNodeND(0, L2_Act); - void *node_L3_FC = __visc__createNodeND(0, L3_FC); - void *node_L3_Bias = __visc__createNodeND(0, L3_Bias); - void *node_L3_Act = __visc__createNodeND(0, L3_Act); - void *node_L4_FC = __visc__createNodeND(0, L4_FC); - void *node_L4_Bias = __visc__createNodeND(0, L4_Bias); - void *node_L4_Softmax = __visc__createNodeND(0, L4_SoftMax); - - - // Layer 1 - __visc__bindIn(node_L1_FC, 0, 0, 0); // input - __visc__bindIn(node_L1_FC, 1, 1, 0); // bytes_input - __visc__bindIn(node_L1_FC, 2, 2, 0); // fc1_weights - __visc__bindIn(node_L1_FC, 3, 3, 0); // bytes - - __visc__edge(node_L1_FC, node_L1_Bias, 1, 0, 0, 0); // fc1_out - __visc__edge(node_L1_FC, node_L1_Bias, 1, 1, 1, 0); // bytes - __visc__bindIn(node_L1_Bias, 4, 2, 0); // fc1_bias - __visc__bindIn(node_L1_Bias, 5, 3, 0); // bytes - - __visc__edge(node_L1_Bias, node_L1_Act, 1, 0, 0, 0); // fc1_bias_out - __visc__edge(node_L1_Bias, node_L1_Act, 1, 1, 1, 0); // bytes - - // Layer 2 - __visc__edge(node_L1_Act, node_L2_FC, 1, 0, 0, 0); // fc1_act_out - __visc__edge(node_L1_Act, node_L2_FC, 1, 1, 1, 0); // bytes - __visc__bindIn(node_L2_FC, 6, 2, 0); // fc2_weights - __visc__bindIn(node_L2_FC, 7, 3, 0); // bytes - - __visc__edge(node_L2_FC, node_L2_Bias, 1, 0, 0, 0); // fc2_out - __visc__edge(node_L2_FC, node_L2_Bias, 1, 1, 1, 0); // bytes - __visc__bindIn(node_L2_Bias, 8, 2, 0); // fc2_bias - __visc__bindIn(node_L2_Bias, 9, 3, 0); // bytes - - __visc__edge(node_L2_Bias, node_L2_Act, 1, 0, 0, 0); // fc2_bias_out - __visc__edge(node_L2_Bias, node_L2_Act, 1, 1, 1, 0); // bytes - - // Layer 3 - __visc__edge(node_L2_Act, node_L3_FC, 1, 0, 0, 0); // fc2_act_out - __visc__edge(node_L2_Act, node_L3_FC, 1, 1, 1, 0); // bytes - __visc__bindIn(node_L3_FC, 10, 2, 0); // fc3_weights - __visc__bindIn(node_L3_FC, 11, 3, 0); // bytes - - __visc__edge(node_L3_FC, node_L3_Bias, 1, 0, 0, 0); // fc3_out - __visc__edge(node_L3_FC, node_L3_Bias, 1, 1, 1, 0); // bytes - __visc__bindIn(node_L3_Bias, 12, 2, 0); // fc3_bias - __visc__bindIn(node_L3_Bias, 13, 3, 0); // bytes - - __visc__edge(node_L3_Bias, node_L3_Act, 1, 0, 0, 0); // fc3_bias_out - __visc__edge(node_L3_Bias, node_L3_Act, 1, 1, 1, 0); // bytes - - // Layer 4 - __visc__edge(node_L3_Act, node_L4_FC, 1, 0, 0, 0); // fc3_act_out - __visc__edge(node_L3_Act, node_L4_FC, 1, 1, 1, 0); // bytes - __visc__bindIn(node_L4_FC, 14, 2, 0); // fc4_weights - __visc__bindIn(node_L4_FC, 15, 3, 0); // bytes - - __visc__edge(node_L4_FC, node_L4_Bias, 1, 0, 0, 0); // fc4_out - __visc__edge(node_L4_FC, node_L4_Bias, 1, 1, 1, 0); // bytes - __visc__bindIn(node_L4_Bias, 16, 2, 0); // fc4_bias - __visc__bindIn(node_L4_Bias, 17, 3, 0); // bytes - - __visc__edge(node_L4_Bias, node_L4_Softmax, 1, 0, 0, 0); // fc3_bias_out - __visc__edge(node_L4_Bias, node_L4_Softmax, 1, 1, 1, 0); // bytes - - __visc__bindOut(node_L4_Softmax, 0, 0, 0); // softmax output - __visc__bindOut(node_L4_Softmax, 1, 1, 0); // bytes - -} - - -// Return type for the nodes -struct ret_t { - void *tensor; - size_t bytes; -}; - -typedef struct __attribute__((__packed__)) { - void *input; - size_t bytes_input; - void *fc1_weights; - size_t bytes_fc1_weights; - void *fc1_bias; - size_t bytes_fc1_bias; - void *fc2_weights; - size_t bytes_fc2_weights; - void *fc2_bias; - size_t bytes_fc2_bias; - void *fc3_weights; - size_t bytes_fc3_weights; - void *fc3_bias; - size_t bytes_fc3_bias; - void *fc4_weights; - size_t bytes_fc4_weights; - void *fc4_bias; - size_t bytes_fc4_bias; - - struct ret_t r; -} -RootIn; - -void packLaunchArguments(RootIn* args, - void* input, - void* fc1_weights, - void* fc1_bias, - void* fc2_weights, - void* fc2_bias, - void* fc3_weights, - void* fc3_bias, - void* fc4_weights, - void* fc4_bias) { - args->input = input; - args->bytes_input = 0; - args->fc1_weights = fc1_weights; - args->bytes_fc1_weights = 0; - args->fc1_bias = fc1_bias; - args->bytes_fc1_bias = 0; - args->fc2_weights = fc2_weights; - args->bytes_fc2_weights = 0; - args->fc2_bias = fc2_bias; - args->bytes_fc2_bias = 0; - args->fc3_weights = fc3_weights; - args->bytes_fc3_weights = 0; - args->fc3_bias = fc3_bias; - args->bytes_fc3_bias = 0; - args->fc4_weights = fc4_weights; - args->bytes_fc4_weights = 0; - args->fc4_bias = fc4_bias; - args->bytes_fc4_bias = 0; - -} - -int main() { - - int test_batch_size = 10000; - void* input = readInputTensor("t10k-images-idx3-ubyte", float_type, - test_batch_size, 1, 28, 28); - void* fc1_weights = readTrainedWeights("../model_params/FC_network1/fc1.bin", - float_type, 1, 1, 784, 1000); - void* fc1_bias = readTrainedWeights("../model_params/FC_network1/fc1_bias.bin", - float_type, 1, 1000, 1, 1); - void* fc2_weights = readTrainedWeights("../model_params/FC_network1/fc2.bin", - float_type, 1, 1, 1000, 500); - void* fc2_bias = readTrainedWeights("../model_params/FC_network1/fc2_bias.bin", - float_type, 1, 500, 1, 1); - void* fc3_weights = readTrainedWeights("../model_params/FC_network1/fc3.bin", - float_type, 1, 1, 500, 200); - void* fc3_bias = readTrainedWeights("../model_params/FC_network1/fc3_bias.bin", - float_type, 1, 200, 1, 1); - void* fc4_weights = readTrainedWeights("../model_params/FC_network1/fc4.bin", - float_type, 1, 1, 200, 10); - void* fc4_bias = readTrainedWeights("../model_params/FC_network1/fc4_bias.bin", - float_type, 1, 10, 1, 1); - - __visc__init(); - - RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - packLaunchArguments(args, - input, - fc1_weights, - fc1_bias, - fc2_weights, - fc2_bias, - fc3_weights, - fc3_bias, - fc4_weights, - fc4_bias); - - void *dfg = __visc__launch(0, root, (void *)args); - - __visc__wait(dfg); - - void *r = args->r.tensor; - hpvm_request_tensor(r, visc::CPU_TARGET); - - __visc__cleanup(); - return 0; -} - - diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/Makefile index 479b0a2caf914e314b991577ada8d7dc20abff66..aeb8cd6bb1606c496258fcd92f28e1b3d0f10566 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/Makefile +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/Makefile @@ -1,8 +1,11 @@ DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks -CC = $(LLVM_SRC_ROOT)/../build/bin/clang++ -OPT = $(LLVM_SRC_ROOT)/../build/bin/opt -LLVM_DIS = $(LLVM_SRC_ROOT)/../build/bin/llvm-dis -LLVM_LINK = $(LLVM_SRC_ROOT)/../build/bin/llvm-link +# NOTE: can configure build directory +HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_hpvm/ + +CC = $(HPVM_BUILD_DIR)/bin/clang++ +OPT = $(HPVM_BUILD_DIR)/bin/opt +LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis +LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include SRC_DIR = src @@ -18,7 +21,15 @@ CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUD CCFLAGS += -DDEVICE=CUDNN_TARGET LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL -VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMInPlaceDFGAnalysis.so -load LLVMDFG2LLVM_CUDNN.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG +HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib + + +VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_CUDNN.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG + + +VISC_OPTFLAGS2 = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_PROMISE.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMFuseHPVMTensorNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -hpvm-fuse -dfg2llvm-promise -dfg2llvm-x86 -clearDFG + + TARGET = $(BUILD_DIR)/$(APP).opt.bc SOURCES = $(SRC_DIR)/$(APP).cpp @@ -30,17 +41,20 @@ default: $(BUILD_DIR) $(TARGET) $(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp - $(CC) $(CC_FLAGS) -emit-llvm -S -o $@ $< - -#-visc-timers-gen -$(BUILD_DIR)/%.visc.ll: $(BUILD_DIR)/%.ll - $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $< -S -o $@ - -$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.visc.ll - $(OPT) $(VISC_OPTFLAGS) $< -o $@ - $(LLVM_LINK) $@ $(VISC_RT_PATH) -o $(BUILD_DIR)/lenet_linked.bc - $(CC) $(BUILD_DIR)/lenet_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/lenet_linked $(LINKER_FLAGS) - $(CC) $(BUILD_DIR)/lenet_linked.bc $(TENSOR_AUTOTUNER_DIR) -o $(BUILD_DIR)/lenet_tune $(LINKER_FLAGS) + $(CC) $(CC_FLAGS) -emit-llvm src/$(APP).cpp -S -o $(BUILD_DIR)/$(APP).ll + $(CC) $(CC_FLAGS) -emit-llvm src/$(APP)_promise.cpp -S -o $(BUILD_DIR)/$(APP)_promise.ll + + +$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.ll + $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP).ll -S -o $(BUILD_DIR)/$(APP).visc.ll + $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP)_promise.ll -S -o $(BUILD_DIR)/$(APP)_promise.visc.ll + $(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP).visc.ll -o $(BUILD_DIR)/$(APP)_cudnn.bc + $(OPT) $(VISC_OPTFLAGS2) $(BUILD_DIR)/$(APP)_promise.visc.ll -o $(BUILD_DIR)/$(APP)_promise.bc + $(LLVM_LINK) $(BUILD_DIR)/$(APP)_cudnn.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_cudnn_linked.bc + $(LLVM_LINK) $(BUILD_DIR)/$(APP)_promise.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_promise_linked.bc + $(CC) $(BUILD_DIR)/$(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_cudnn_linked $(LINKER_FLAGS) + $(CC) $(BUILD_DIR)/$(BUILD_DIR)/$(APP)_promise_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_promise_linked $(LINKER_FLAGS) + $(CC) $(BUILD_DIR)/$(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_AUTOTUNER_DIR) -o $(BUILD_DIR)/lenet_tune $(LINKER_FLAGS) $(BUILD_DIR): mkdir -p $@ diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp index 7698e1511b4dbc8f15eea3386ec98b338178fd86..1746fc13dc4809f8c3d806fa144903fac50f3315 100644 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet.cpp @@ -1,382 +1,373 @@ -#include <iostream> -#include <cstdio> -#include <cstring> -#include <cinttypes> -#include <visc.h> -#include <tensorTypes.h> -#include <tensorUtils.h> - -using namespace std; - -/* DNN Layer 1 **/ -void tensorConvNode1(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - // X * W = t2 * t1 - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t) 0); +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <cstring> +#include <visc.h> +#include <tensorTypes.h> +#include <tensorUtils.h> + +void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); + __visc__return(2, r, (size_t) 0); } +void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); -void tensorAddNode1(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} + +void var_2_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); } -void tensorPoolNode1(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_3_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); + void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); + __visc__return(2, r, (size_t) 0); } -void tensorTanhNode1(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); + __visc__return(2, r, (size_t) 0); } -/** End of Layer 1 **/ +void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} +void var_6_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} -/* DNN Layer 2 **/ -void tensorConvNode2(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - // X * W = t2 * t1 - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_convolution(t1, t2, 1, 1, 2, 2); + __visc__return(2, r, (size_t) 0); } +void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); + + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); +} -void tensorAddNode2(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_9_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); } -void tensorPoolNode2(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_mul(t1, t2); + __visc__return(2, r, (size_t) 0); } -void tensorTanhNode2(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); } -/** End of Layer 2 **/ +void var_12_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); +} -/***** DNN Layer3 ****/ -void tensorMulNode3(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_13_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - // X * W = t2 * t1 - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_mul(t1, t2); + __visc__return(2, r, (size_t) 0); } -void tensorAddNode3(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); +void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); + void *r = __visc__tensor_add(t1, t2); + __visc__return(2, r, (size_t) 0); } -void tensorTanhNode3(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); +void var_15_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); + void* r = __visc__tensor_relu(t1); + __visc__return(2, r, (size_t) 0); } -/** End of Layer 3 **/ +void var_16_node(void* t1, size_t bytes_t1) { + __visc__hint(visc::CUDNN_TARGET); + __visc__attributes(1, t1, 0); + void* r = __visc__tensor_softmax(t1); + __visc__return(2, r, (size_t) 0); +} +void root(void* input, size_t input_bytes, + void* conv2d_1_w, size_t conv2d_1_w_bytes, + void* conv2d_1_b, size_t conv2d_1_b_bytes, + void* conv2d_2_w, size_t conv2d_2_w_bytes, + void* conv2d_2_b, size_t conv2d_2_b_bytes, + void* conv2d_3_w, size_t conv2d_3_w_bytes, + void* conv2d_3_b, size_t conv2d_3_b_bytes, + void* dense_1_w, size_t dense_1_w_bytes, + void* dense_1_b, size_t dense_1_b_bytes, + void* dense_2_w, size_t dense_2_w_bytes, + void* dense_2_b, size_t dense_2_b_bytes){ -/***** DNN Layer4 ****/ -void tensorMulNode4(void *t1, size_t bytes1, void *t2, size_t bytes2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - // X * W = t2 * t1 - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} + __visc__hint(visc::CPU_TARGET); + __visc__attributes(11, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, 0); -void tensorAddNode4(void *t1, size_t bytest1, void *t2, size_t bytest2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - void* r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} + void* var_0 = __visc__createNodeND(0, var_0_node); -void tensorTanhNode4(void *t1, size_t bytest1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); + __visc__bindIn(var_0, 0, 0, 0); + __visc__bindIn(var_0, 1, 1, 0); + __visc__bindIn(var_0, 2, 2, 0); + __visc__bindIn(var_0, 3, 3, 0); - void* r = __visc__tensor_tanh(t1); - __visc__return(2, r, (size_t) 0); -} + void* var_1 = __visc__createNodeND(0, var_1_node); -/** End of Layer 4 **/ - - - -void root(void *x, size_t x_bytes, - void *conv1_w, size_t conv1_w_bytes, - void *conv1_b, size_t conv1_b_bytes, - void *conv2_w, size_t conv2_w_bytes, - void *conv2_b, size_t conv2_b_bytes, - void *fc1_w, size_t fc1_w_bytes, - void *fc1_b, size_t fc1_b_bytes, - void *fc2_w, size_t fc2_w_bytes, - void *fc2_b, size_t fc2_b_bytes){ - - __visc__hint(visc::CPU_TARGET); - __visc__attributes(5, x, conv1_w, conv1_b, conv2_w, conv2_b, 0); - - // Conv1 Nodes - void *nodeConv1 = __visc__createNodeND(0, tensorConvNode1); - void *nodeAdd1 = __visc__createNodeND(0, tensorAddNode1); - void *nodePool1 = __visc__createNodeND(0, tensorPoolNode1); - void *nodeTanh1 = __visc__createNodeND(0, tensorTanhNode1); - // Conv2 Nodes - void *nodeConv2 = __visc__createNodeND(0, tensorConvNode2); - void *nodeAdd2 = __visc__createNodeND(0, tensorAddNode2); - void *nodePool2 = __visc__createNodeND(0, tensorPoolNode2); - void *nodeTanh2 = __visc__createNodeND(0, tensorTanhNode2); - // FC1 Nodes - void *nodeMul3 = __visc__createNodeND(0, tensorMulNode3); - void *nodeAdd3 = __visc__createNodeND(0, tensorAddNode3); - void *nodeTanh3 = __visc__createNodeND(0, tensorTanhNode3); - // FC2 Nodes - void *nodeMul4 = __visc__createNodeND(0, tensorMulNode4); - void *nodeAdd4 = __visc__createNodeND(0, tensorAddNode4); - void *nodeTanh4 = __visc__createNodeND(0, tensorTanhNode4); - - - //***** Conv Layer 1 *******/ - // node, src, dst, stream - __visc__bindIn(nodeConv1, 0, 0, 0); - __visc__bindIn(nodeConv1, 1, 1, 0); - __visc__bindIn(nodeConv1, 2, 2, 0); - __visc__bindIn(nodeConv1, 3, 3, 0); - - // node, node, type, src, dst, stream - __visc__edge(nodeConv1, nodeAdd1, 1, 0, 0, 0); - __visc__edge(nodeConv1, nodeAdd1, 1, 1, 1, 0); - - // parent_index, dest_index, bind_type - __visc__bindIn(nodeAdd1, 4, 2, 0); - __visc__bindIn(nodeAdd1, 5, 3, 0); - - // node, node, type, src, dst, stream - __visc__edge(nodeAdd1, nodePool1, 1, 0, 0, 0); - __visc__edge(nodeAdd1, nodePool1, 1, 1, 1, 0); - - // node, node, type, src, dst, stream - __visc__edge(nodePool1, nodeTanh1, 1, 0, 0, 0); - __visc__edge(nodePool1, nodeTanh1, 1, 1, 1, 0); - - - /**** Conv Layer 2 ****/ - // ConvOp2 - __visc__bindIn(nodeConv2, 6, 2, 0); - __visc__bindIn(nodeConv2, 7, 3, 0); - - __visc__edge(nodeTanh1, nodeConv2, 1, 0, 0, 0); - __visc__edge(nodeTanh1, nodeConv2, 1, 1, 1, 0); - - // AddOp2 - __visc__bindIn(nodeAdd2, 8, 2, 0); - __visc__bindIn(nodeAdd2, 9, 3, 0); - - __visc__edge(nodeConv2, nodeAdd2, 1, 0, 0, 0); - __visc__edge(nodeConv2, nodeAdd2, 1, 1, 1, 0); - - // PoolOp2 - __visc__edge(nodeAdd2, nodePool2, 1, 0, 0, 0); - __visc__edge(nodeAdd2, nodePool2, 1, 1, 1, 0); - - // TanhOp2 - __visc__edge(nodePool2, nodeTanh2, 1, 0, 0, 0); - __visc__edge(nodePool2, nodeTanh2, 1, 1, 1, 0); - - - /**** FC Layer 1 ****/ - // MulOp3 - __visc__bindIn(nodeMul3, 10, 2, 0); - __visc__bindIn(nodeMul3, 11, 3, 0); - - __visc__edge(nodeTanh2, nodeMul3, 1, 0, 0, 0); - __visc__edge(nodeTanh2, nodeMul3, 1, 1, 1, 0); - - // AddOp3 - __visc__bindIn(nodeAdd3, 12, 2, 0); - __visc__bindIn(nodeAdd3, 13, 3, 0); - - __visc__edge(nodeMul3, nodeAdd3, 1, 0, 0, 0); - __visc__edge(nodeMul3, nodeAdd3, 1, 1, 1, 0); - - // TanhOp3 - __visc__edge(nodeAdd3, nodeTanh3, 1, 0, 0, 0); - __visc__edge(nodeAdd3, nodeTanh3, 1, 1, 1, 0); - - - /**** FC Layer 2 ****/ - // MulOp4 - __visc__bindIn(nodeMul4, 14, 2, 0); - __visc__bindIn(nodeMul4, 15, 3, 0); - - __visc__edge(nodeTanh3, nodeMul4, 1, 0, 0, 0); - __visc__edge(nodeTanh3, nodeMul4, 1, 1, 1, 0); - - // AddOp4 - __visc__bindIn(nodeAdd4, 16, 2, 0); - __visc__bindIn(nodeAdd4, 17, 3, 0); - - __visc__edge(nodeMul4, nodeAdd4, 1, 0, 0, 0); - __visc__edge(nodeMul4, nodeAdd4, 1, 1, 1, 0); - - // TanhOp4 - __visc__edge(nodeAdd4, nodeTanh4, 1, 0, 0, 0); - __visc__edge(nodeAdd4, nodeTanh4, 1, 1, 1, 0); - - - - /***** Output Binding ****/ - __visc__bindOut(nodeTanh4, 0, 0, 0); - __visc__bindOut(nodeTanh4, 1, 1, 0); + __visc__edge(var_0, var_1, 1, 0, 0, 0); + __visc__edge(var_0, var_1, 1, 1, 1, 0); + __visc__bindIn(var_1, 4, 2, 0); + __visc__bindIn(var_1, 5, 3, 0); -} + void* var_2 = __visc__createNodeND(0, var_2_node); + + __visc__edge(var_1, var_2, 1, 0, 0, 0); + __visc__edge(var_1, var_2, 1, 1, 1, 0); + + void* var_3 = __visc__createNodeND(0, var_3_node); + + __visc__edge(var_2, var_3, 1, 0, 0, 0); + __visc__edge(var_2, var_3, 1, 1, 1, 0); + + void* var_4 = __visc__createNodeND(0, var_4_node); + + __visc__edge(var_3, var_4, 1, 0, 0, 0); + __visc__edge(var_3, var_4, 1, 1, 1, 0); + __visc__bindIn(var_4, 6, 2, 0); + __visc__bindIn(var_4, 7, 3, 0); + + void* var_5 = __visc__createNodeND(0, var_5_node); + + __visc__edge(var_4, var_5, 1, 0, 0, 0); + __visc__edge(var_4, var_5, 1, 1, 1, 0); + __visc__bindIn(var_5, 8, 2, 0); + __visc__bindIn(var_5, 9, 3, 0); + + void* var_6 = __visc__createNodeND(0, var_6_node); + + __visc__edge(var_5, var_6, 1, 0, 0, 0); + __visc__edge(var_5, var_6, 1, 1, 1, 0); + + void* var_7 = __visc__createNodeND(0, var_7_node); + + __visc__edge(var_6, var_7, 1, 0, 0, 0); + __visc__edge(var_6, var_7, 1, 1, 1, 0); + __visc__bindIn(var_7, 10, 2, 0); + __visc__bindIn(var_7, 11, 3, 0); + void* var_8 = __visc__createNodeND(0, var_8_node); + + __visc__edge(var_7, var_8, 1, 0, 0, 0); + __visc__edge(var_7, var_8, 1, 1, 1, 0); + __visc__bindIn(var_8, 12, 2, 0); + __visc__bindIn(var_8, 13, 3, 0); + + void* var_9 = __visc__createNodeND(0, var_9_node); + + __visc__edge(var_8, var_9, 1, 0, 0, 0); + __visc__edge(var_8, var_9, 1, 1, 1, 0); + + void* var_10 = __visc__createNodeND(0, var_10_node); + + __visc__edge(var_9, var_10, 1, 0, 0, 0); + __visc__edge(var_9, var_10, 1, 1, 1, 0); + __visc__bindIn(var_10, 14, 2, 0); + __visc__bindIn(var_10, 15, 3, 0); + + void* var_11 = __visc__createNodeND(0, var_11_node); + + __visc__edge(var_10, var_11, 1, 0, 0, 0); + __visc__edge(var_10, var_11, 1, 1, 1, 0); + __visc__bindIn(var_11, 16, 2, 0); + __visc__bindIn(var_11, 17, 3, 0); + + void* var_12 = __visc__createNodeND(0, var_12_node); + + __visc__edge(var_11, var_12, 1, 0, 0, 0); + __visc__edge(var_11, var_12, 1, 1, 1, 0); + + void* var_13 = __visc__createNodeND(0, var_13_node); + + __visc__edge(var_12, var_13, 1, 0, 0, 0); + __visc__edge(var_12, var_13, 1, 1, 1, 0); + __visc__bindIn(var_13, 18, 2, 0); + __visc__bindIn(var_13, 19, 3, 0); + + void* var_14 = __visc__createNodeND(0, var_14_node); + + __visc__edge(var_13, var_14, 1, 0, 0, 0); + __visc__edge(var_13, var_14, 1, 1, 1, 0); + __visc__bindIn(var_14, 20, 2, 0); + __visc__bindIn(var_14, 21, 3, 0); + + void* var_15 = __visc__createNodeND(0, var_15_node); + + __visc__edge(var_14, var_15, 1, 0, 0, 0); + __visc__edge(var_14, var_15, 1, 1, 1, 0); + + void* var_16 = __visc__createNodeND(0, var_16_node); + + __visc__edge(var_15, var_16, 1, 0, 0, 0); + __visc__edge(var_15, var_16, 1, 1, 1, 0); + + __visc__bindOut(var_16, 0, 0, 0); + __visc__bindOut(var_16, 1, 1, 0); + +} -// Return type for the nodes struct ret_t { - void *tensor; - size_t bytes; -}; + void* tensor; + size_t bytes; +}; typedef struct __attribute__((__packed__)) { - void *x; - size_t x_bytes; - // 1st Layer parameters - void* conv1_w; - size_t conv1_w_bytes; - void* conv1_b; - size_t conv1_b_bytes; - // 2nd Layer parameters - void* conv2_w; - size_t conv2_w_bytes; - void* conv2_b; - size_t conv2_b_bytes; - // 3rd Layer parameters - void* fc1_w; - size_t fc1_w_bytes; - void* fc1_b; - size_t fc1_b_bytes; - // 4th Layer parameters - void* fc2_w; - size_t fc2_w_bytes; - void* fc2_b; - size_t fc2_b_bytes; - - struct ret_t r; + void* input; + size_t input_bytes; + void* conv2d_1_w; + size_t conv2d_1_w_bytes; + void* conv2d_1_b; + size_t conv2d_1_b_bytes; + void* conv2d_2_w; + size_t conv2d_2_w_bytes; + void* conv2d_2_b; + size_t conv2d_2_b_bytes; + void* conv2d_3_w; + size_t conv2d_3_w_bytes; + void* conv2d_3_b; + size_t conv2d_3_b_bytes; + void* dense_1_w; + size_t dense_1_w_bytes; + void* dense_1_b; + size_t dense_1_b_bytes; + void* dense_2_w; + size_t dense_2_w_bytes; + void* dense_2_b; + size_t dense_2_b_bytes; + + struct ret_t r; } RootIn; -int main() { - - int test_batch_size = 10000; - std::string prefix = "../../../../../../projects/hpvm-tensor-rt/model_params"; - std::string input_data_path = prefix + std::string("/FC_network2/mnist_float_input.bin"); - std::string labels_path = "../../../../../../projects/hpvm-tensor-rt/model_params/lenet_params/datasets/t10k-labels-idx1-ubyte"; - std::string conv1_w_path = prefix + std::string("/lenet_keras/conv1.bin"); - std::string conv1_b_path = prefix + std::string("/lenet_keras/conv1_bias.bin"); - std::string conv2_w_path = prefix + std::string("/lenet_keras/conv2.bin"); - std::string conv2_b_path = prefix + std::string("/lenet_keras/conv2_bias.bin"); - std::string fc1_w_path = prefix + std::string("/lenet_keras/fc1.bin"); - std::string fc1_b_path = prefix + std::string("/lenet_keras/fc1_bias.bin"); - std::string fc2_w_path = prefix + std::string("/lenet_keras/fc2.bin"); - std::string fc2_b_path = prefix + std::string("/lenet_keras/fc2_bias.bin"); - - - printf("Reading Input Data from = %s \n", input_data_path.c_str()); - - uint8_t* labels = readLabels(labels_path.c_str(), test_batch_size); - void* x = readTrainedWeights(input_data_path.c_str(), float_type, - test_batch_size, 1, 28, 28); - void* conv1_w = readTrainedWeights(conv1_w_path.c_str(), float_type, 32, 1, 5, 5); - void* conv1_b = readTrainedWeights(conv1_b_path.c_str(), float_type, 1, 32, 1, 1); - void* conv2_w = readTrainedWeights(conv2_w_path.c_str(), float_type, 64, 32, 5, 5); - void* conv2_b = readTrainedWeights(conv2_b_path.c_str(), float_type, 1, 64, 1, 1); - void* fc1_w = readTrainedWeights(fc1_w_path.c_str(), float_type, 1, 1, 7*7*64, 1024); - void* fc1_b = readTrainedWeights(fc1_b_path.c_str(), float_type, 1, 1024, 1, 1); - void* fc2_w = readTrainedWeights(fc2_w_path.c_str(), float_type, 1, 1, 1024, 10); - void* fc2_b = readTrainedWeights(fc2_b_path.c_str(), float_type, 1, 10, 1, 1); - - __visc__init(); - - RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - args->x = x; - args->x_bytes = 0; - // Conv Layers params - args->conv1_w = conv1_w; - args->conv1_w_bytes = 0; - args->conv1_b = conv1_b; - args->conv1_b_bytes = 0; - args->conv2_w = conv2_w; - args->conv2_w_bytes = 0; - args->conv2_b = conv2_b; - args->conv2_b_bytes = 0; - // FC Layers params - args->fc1_w = fc1_w; - args->fc1_w_bytes = 0; - args->fc1_b = fc1_b; - args->fc1_b_bytes = 0; - args->fc2_w = fc2_w; - args->fc2_w_bytes = 0; - args->fc2_b = fc2_b; - args->fc2_b_bytes = 0; - - void *dfg = __visc__launch(0, root, (void *)args); - - __visc__wait(dfg); - - // FIXME: Value returned in the wrong index!! - //void *r = static_cast<RootIn*>(args)->r.tensor; - void *result = static_cast<RootIn*>(args)->x; - hpvm_request_tensor(result, 0); - - __visc__cleanup(); - - computeAccuracy2(labels, test_batch_size, result); - - return 0; -} - - +int main(){ + +std::string dir_prefix = std::string("../../../../../../projects/hpvm-tensor-rt/model_params/lenet_hpvm/"); +std::string input_path = dir_prefix + std::string("input.bin"); +void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); +std::string labels_path = dir_prefix + std::string("labels.bin"); +uint8_t* labels = readLabels(labels_path.c_str(),10000); +std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); +void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); +std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); +void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); +std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); +void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); +std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); +void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); +std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); +void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); +std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); +void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); +std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); +void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); +std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); +void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); +std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); +void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); +std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); +void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); + +__visc__init(); +RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); + +args->input = input; +args->input_bytes = 0; +args->conv2d_1_w = conv2d_1_w; +args->conv2d_1_w_bytes = 0; +args->conv2d_1_b = conv2d_1_b; +args->conv2d_1_b_bytes = 0; +args->conv2d_2_w = conv2d_2_w; +args->conv2d_2_w_bytes = 0; +args->conv2d_2_b = conv2d_2_b; +args->conv2d_2_b_bytes = 0; +args->conv2d_3_w = conv2d_3_w; +args->conv2d_3_w_bytes = 0; +args->conv2d_3_b = conv2d_3_b; +args->conv2d_3_b_bytes = 0; +args->dense_1_w = dense_1_w; +args->dense_1_w_bytes = 0; +args->dense_1_b = dense_1_b; +args->dense_1_b_bytes = 0; +args->dense_2_w = dense_2_w; +args->dense_2_w_bytes = 0; +args->dense_2_b = dense_2_b; +args->dense_2_b_bytes = 0; + +void* dfg = __visc__launch(0, root, (void*) args); + +__visc__wait(dfg); + +void *result = static_cast<RootIn*>(args)->input; +hpvm_request_tensor(result, 0); + +__visc__cleanup(); + computeAccuracy2(labels, 10000, result); +return 0; + +} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_front/src/lenet_promise.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet_promise.cpp similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_front/src/lenet_promise.cpp rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet/src/lenet_promise.cpp diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_front/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_front/Makefile deleted file mode 100644 index aeb8cd6bb1606c496258fcd92f28e1b3d0f10566..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_front/Makefile +++ /dev/null @@ -1,63 +0,0 @@ -DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks -# NOTE: can configure build directory -HPVM_BUILD_DIR = $(LLVM_SRC_ROOT)/../build_hpvm/ - -CC = $(HPVM_BUILD_DIR)/bin/clang++ -OPT = $(HPVM_BUILD_DIR)/bin/opt -LLVM_DIS = $(HPVM_BUILD_DIR)/bin/llvm-dis -LLVM_LINK = $(HPVM_BUILD_DIR)/bin/llvm-link -LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include - -SRC_DIR = src -BUILD_DIR = build -APP = lenet - -TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include -TENSOR_RT_INCLUDE_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/tensor_runtime/include -TENSOR_LIB_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_runtime.a -TENSOR_AUTOTUNER_DIR = $(LLVM_SRC_ROOT)/projects/hpvm-tensor-rt/lib/libtensor_autotuner.a - -CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -I $(TENSOR_RT_INCLUDE_DIR) -I $(CUDA_INCLUDE_PATH) -fno-exceptions -ffast-math -std=c++11 -O3 -CCFLAGS += -DDEVICE=CUDNN_TARGET -LINKER_FLAGS = -lpthread -lcudart -lcurand -lcudnn -lcublas -lOpenCL - -HPVM_LIB_DIR = $(HPVM_BUILD_DIR)/lib - - -VISC_OPTFLAGS = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_CUDNN.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -dfg2llvm-cudnn -dfg2llvm-x86 -clearDFG - - -VISC_OPTFLAGS2 = -load $(HPVM_LIB_DIR)/LLVMBuildDFG.so -load $(HPVM_LIB_DIR)/LLVMInPlaceDFGAnalysis.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_PROMISE.so -load $(HPVM_LIB_DIR)/LLVMDFG2LLVM_X86.so -load $(HPVM_LIB_DIR)/LLVMFuseHPVMTensorNodes.so -load $(HPVM_LIB_DIR)/LLVMClearDFG.so -inplace -hpvm-fuse -dfg2llvm-promise -dfg2llvm-x86 -clearDFG - - - -TARGET = $(BUILD_DIR)/$(APP).opt.bc -SOURCES = $(SRC_DIR)/$(APP).cpp -VISC_RT_PATH = $(LLVM_SRC_ROOT)/../build/projects/visc-rt/visc-rt.ll - -#OBJS = $(BUILD_DIR)/$(wildcabrd *.ll) -.PRECIOUS: $(BUILD_DIR)/$(APP).ll $(BUILD_DIR)/$(APP).visc.ll -default: $(BUILD_DIR) $(TARGET) - - -$(BUILD_DIR)/%.ll: $(SRC_DIR)/%.cpp - $(CC) $(CC_FLAGS) -emit-llvm src/$(APP).cpp -S -o $(BUILD_DIR)/$(APP).ll - $(CC) $(CC_FLAGS) -emit-llvm src/$(APP)_promise.cpp -S -o $(BUILD_DIR)/$(APP)_promise.ll - - -$(BUILD_DIR)/%.opt.bc: $(BUILD_DIR)/%.ll - $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP).ll -S -o $(BUILD_DIR)/$(APP).visc.ll - $(OPT) -load LLVMGenVISC.so -genvisc -globaldce $(BUILD_DIR)/$(APP)_promise.ll -S -o $(BUILD_DIR)/$(APP)_promise.visc.ll - $(OPT) $(VISC_OPTFLAGS) $(BUILD_DIR)/$(APP).visc.ll -o $(BUILD_DIR)/$(APP)_cudnn.bc - $(OPT) $(VISC_OPTFLAGS2) $(BUILD_DIR)/$(APP)_promise.visc.ll -o $(BUILD_DIR)/$(APP)_promise.bc - $(LLVM_LINK) $(BUILD_DIR)/$(APP)_cudnn.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_cudnn_linked.bc - $(LLVM_LINK) $(BUILD_DIR)/$(APP)_promise.bc $(VISC_RT_PATH) -o $(BUILD_DIR)/$(APP)_promise_linked.bc - $(CC) $(BUILD_DIR)/$(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_cudnn_linked $(LINKER_FLAGS) - $(CC) $(BUILD_DIR)/$(BUILD_DIR)/$(APP)_promise_linked.bc $(TENSOR_LIB_DIR) -o $(BUILD_DIR)/$(APP)_promise_linked $(LINKER_FLAGS) - $(CC) $(BUILD_DIR)/$(BUILD_DIR)/$(APP)_cudnn_linked.bc $(TENSOR_AUTOTUNER_DIR) -o $(BUILD_DIR)/lenet_tune $(LINKER_FLAGS) - -$(BUILD_DIR): - mkdir -p $@ - -clean: - rm -rf $(BUILD_DIR) diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_front/src/lenet.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_front/src/lenet.cpp deleted file mode 100644 index 1746fc13dc4809f8c3d806fa144903fac50f3315..0000000000000000000000000000000000000000 --- a/llvm/test/VISC/DNN_Benchmarks/benchmarks/lenet_front/src/lenet.cpp +++ /dev/null @@ -1,373 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> -#include <visc.h> -#include <tensorTypes.h> -#include <tensorUtils.h> - -void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_2_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_3_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 2, 2, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_5_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_6_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_9_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_12_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_13_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_15_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_16_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_softmax(t1); - __visc__return(2, r, (size_t) 0); -} - -void root(void* input, size_t input_bytes, - void* conv2d_1_w, size_t conv2d_1_w_bytes, - void* conv2d_1_b, size_t conv2d_1_b_bytes, - void* conv2d_2_w, size_t conv2d_2_w_bytes, - void* conv2d_2_b, size_t conv2d_2_b_bytes, - void* conv2d_3_w, size_t conv2d_3_w_bytes, - void* conv2d_3_b, size_t conv2d_3_b_bytes, - void* dense_1_w, size_t dense_1_w_bytes, - void* dense_1_b, size_t dense_1_b_bytes, - void* dense_2_w, size_t dense_2_w_bytes, - void* dense_2_b, size_t dense_2_b_bytes){ - - - __visc__hint(visc::CPU_TARGET); - __visc__attributes(11, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, 0); - - - void* var_0 = __visc__createNodeND(0, var_0_node); - - __visc__bindIn(var_0, 0, 0, 0); - __visc__bindIn(var_0, 1, 1, 0); - __visc__bindIn(var_0, 2, 2, 0); - __visc__bindIn(var_0, 3, 3, 0); - - void* var_1 = __visc__createNodeND(0, var_1_node); - - __visc__edge(var_0, var_1, 1, 0, 0, 0); - __visc__edge(var_0, var_1, 1, 1, 1, 0); - __visc__bindIn(var_1, 4, 2, 0); - __visc__bindIn(var_1, 5, 3, 0); - - void* var_2 = __visc__createNodeND(0, var_2_node); - - __visc__edge(var_1, var_2, 1, 0, 0, 0); - __visc__edge(var_1, var_2, 1, 1, 1, 0); - - void* var_3 = __visc__createNodeND(0, var_3_node); - - __visc__edge(var_2, var_3, 1, 0, 0, 0); - __visc__edge(var_2, var_3, 1, 1, 1, 0); - - void* var_4 = __visc__createNodeND(0, var_4_node); - - __visc__edge(var_3, var_4, 1, 0, 0, 0); - __visc__edge(var_3, var_4, 1, 1, 1, 0); - __visc__bindIn(var_4, 6, 2, 0); - __visc__bindIn(var_4, 7, 3, 0); - - void* var_5 = __visc__createNodeND(0, var_5_node); - - __visc__edge(var_4, var_5, 1, 0, 0, 0); - __visc__edge(var_4, var_5, 1, 1, 1, 0); - __visc__bindIn(var_5, 8, 2, 0); - __visc__bindIn(var_5, 9, 3, 0); - - void* var_6 = __visc__createNodeND(0, var_6_node); - - __visc__edge(var_5, var_6, 1, 0, 0, 0); - __visc__edge(var_5, var_6, 1, 1, 1, 0); - - void* var_7 = __visc__createNodeND(0, var_7_node); - - __visc__edge(var_6, var_7, 1, 0, 0, 0); - __visc__edge(var_6, var_7, 1, 1, 1, 0); - __visc__bindIn(var_7, 10, 2, 0); - __visc__bindIn(var_7, 11, 3, 0); - - void* var_8 = __visc__createNodeND(0, var_8_node); - - __visc__edge(var_7, var_8, 1, 0, 0, 0); - __visc__edge(var_7, var_8, 1, 1, 1, 0); - __visc__bindIn(var_8, 12, 2, 0); - __visc__bindIn(var_8, 13, 3, 0); - - void* var_9 = __visc__createNodeND(0, var_9_node); - - __visc__edge(var_8, var_9, 1, 0, 0, 0); - __visc__edge(var_8, var_9, 1, 1, 1, 0); - - void* var_10 = __visc__createNodeND(0, var_10_node); - - __visc__edge(var_9, var_10, 1, 0, 0, 0); - __visc__edge(var_9, var_10, 1, 1, 1, 0); - __visc__bindIn(var_10, 14, 2, 0); - __visc__bindIn(var_10, 15, 3, 0); - - void* var_11 = __visc__createNodeND(0, var_11_node); - - __visc__edge(var_10, var_11, 1, 0, 0, 0); - __visc__edge(var_10, var_11, 1, 1, 1, 0); - __visc__bindIn(var_11, 16, 2, 0); - __visc__bindIn(var_11, 17, 3, 0); - - void* var_12 = __visc__createNodeND(0, var_12_node); - - __visc__edge(var_11, var_12, 1, 0, 0, 0); - __visc__edge(var_11, var_12, 1, 1, 1, 0); - - void* var_13 = __visc__createNodeND(0, var_13_node); - - __visc__edge(var_12, var_13, 1, 0, 0, 0); - __visc__edge(var_12, var_13, 1, 1, 1, 0); - __visc__bindIn(var_13, 18, 2, 0); - __visc__bindIn(var_13, 19, 3, 0); - - void* var_14 = __visc__createNodeND(0, var_14_node); - - __visc__edge(var_13, var_14, 1, 0, 0, 0); - __visc__edge(var_13, var_14, 1, 1, 1, 0); - __visc__bindIn(var_14, 20, 2, 0); - __visc__bindIn(var_14, 21, 3, 0); - - void* var_15 = __visc__createNodeND(0, var_15_node); - - __visc__edge(var_14, var_15, 1, 0, 0, 0); - __visc__edge(var_14, var_15, 1, 1, 1, 0); - - void* var_16 = __visc__createNodeND(0, var_16_node); - - __visc__edge(var_15, var_16, 1, 0, 0, 0); - __visc__edge(var_15, var_16, 1, 1, 1, 0); - - __visc__bindOut(var_16, 0, 0, 0); - __visc__bindOut(var_16, 1, 1, 0); - -} - -struct ret_t { - void* tensor; - size_t bytes; -}; - -typedef struct __attribute__((__packed__)) { - void* input; - size_t input_bytes; - void* conv2d_1_w; - size_t conv2d_1_w_bytes; - void* conv2d_1_b; - size_t conv2d_1_b_bytes; - void* conv2d_2_w; - size_t conv2d_2_w_bytes; - void* conv2d_2_b; - size_t conv2d_2_b_bytes; - void* conv2d_3_w; - size_t conv2d_3_w_bytes; - void* conv2d_3_b; - size_t conv2d_3_b_bytes; - void* dense_1_w; - size_t dense_1_w_bytes; - void* dense_1_b; - size_t dense_1_b_bytes; - void* dense_2_w; - size_t dense_2_w_bytes; - void* dense_2_b; - size_t dense_2_b_bytes; - - struct ret_t r; -} -RootIn; - -int main(){ - -std::string dir_prefix = std::string("../../../../../../projects/hpvm-tensor-rt/model_params/lenet_hpvm/"); -std::string input_path = dir_prefix + std::string("input.bin"); -void* input = readTrainedWeights(input_path.c_str(), 0,10000,1,28,28); -std::string labels_path = dir_prefix + std::string("labels.bin"); -uint8_t* labels = readLabels(labels_path.c_str(),10000); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,32,1,5,5); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,32,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,32,5,5); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,64,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,3136,1024); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,1024,1,1); -std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); -void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,1024,10); -std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); -void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - -__visc__init(); -RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - -args->input = input; -args->input_bytes = 0; -args->conv2d_1_w = conv2d_1_w; -args->conv2d_1_w_bytes = 0; -args->conv2d_1_b = conv2d_1_b; -args->conv2d_1_b_bytes = 0; -args->conv2d_2_w = conv2d_2_w; -args->conv2d_2_w_bytes = 0; -args->conv2d_2_b = conv2d_2_b; -args->conv2d_2_b_bytes = 0; -args->conv2d_3_w = conv2d_3_w; -args->conv2d_3_w_bytes = 0; -args->conv2d_3_b = conv2d_3_b; -args->conv2d_3_b_bytes = 0; -args->dense_1_w = dense_1_w; -args->dense_1_w_bytes = 0; -args->dense_1_b = dense_1_b; -args->dense_1_b_bytes = 0; -args->dense_2_w = dense_2_w; -args->dense_2_w_bytes = 0; -args->dense_2_b = dense_2_b; -args->dense_2_b_bytes = 0; - -void* dfg = __visc__launch(0, root, (void*) args); - -__visc__wait(dfg); - -void *result = static_cast<RootIn*>(args)->input; -hpvm_request_tensor(result, 0); - -__visc__cleanup(); - computeAccuracy2(labels, 10000, result); -return 0; - -} diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18_front/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/Makefile similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18_front/Makefile rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/Makefile diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18_front/src/resnet18.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/src/resnet18.cpp similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18_front/src/resnet18.cpp rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/resnet18/src/resnet18.cpp diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/Makefile similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/Makefile rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/Makefile diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/final_accuracy b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/final_accuracy similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/final_accuracy rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/final_accuracy diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10.ll b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10.ll similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10.ll rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10.ll diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10.opt.bc b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10.opt.bc similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10.opt.bc rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10.opt.bc diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10.visc.ll b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10.visc.ll similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10.visc.ll rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10.visc.ll diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10_linked b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10_linked similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10_linked rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10_linked diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10_linked.bc b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10_linked.bc similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10_linked.bc rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10_linked.bc diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10_tune b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10_tune similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/build/vgg16_cifar10_tune rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/build/vgg16_cifar10_tune diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/src/vgg16_cifar10.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/src/vgg16_cifar10.cpp similarity index 100% rename from llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10_front/src/vgg16_cifar10.cpp rename to llvm/test/VISC/DNN_Benchmarks/benchmarks/vgg16_cifar10/src/vgg16_cifar10.cpp