diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/Makefile b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..3cf770ebfe0c29b3f4ade39c89430884a5e8043f --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/Makefile @@ -0,0 +1,31 @@ +DNN_BENCHMARK_ROOT = $(LLVM_SRC_ROOT)/test/VISC/DNN_Benchmarks +CC = $(LLVM_SRC_ROOT)/../build/bin/clang++ +OPT = $(LLVM_SRC_ROOT)/../build/bin/opt +LLVM_INCLUDE_DIR = $(LLVM_SRC_ROOT)/include + +SRC_DIR = src +BUILD_DIR = build +APP = fcl + +TENSOR_INCLUDE_DIR = $(DNN_BENCHMARK_ROOT)/common/include + +CC_FLAGS = -I $(LLVM_INCLUDE_DIR) -I $(TENSOR_INCLUDE_DIR) -fno-exceptions -ffast-math -std=c++11 -O3 + +TARGET = $(BUILD_DIR)/$(APP).visc +SOURCES = $(SRC_DIR)/$(APP).cpp +#OBJS = $(BUILD_DIR)/$(wildcard *.ll) +.PRECIOUS: $(BUILD_DIR)/$(APP).ll +default: $(BUILD_DIR) $(TARGET) + + +$(BUILD_DIR)/%.ll: $(SOURCES) + $(CC) $(CC_FLAGS) -emit-llvm -S -o $@ $< + +$(BUILD_DIR)/%.visc: $(BUILD_DIR)/%.ll + $(OPT) -load LLVMGenVISC.so -genvisc -globaldce -visc-timers-gen $< -S -o $@ -debug + +$(BUILD_DIR): + mkdir -p $@ + +clean: + rm -rf $(BUILD_DIR) diff --git a/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/src/fcl.cpp b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/src/fcl.cpp new file mode 100644 index 0000000000000000000000000000000000000000..49e708d51196522f711544afdf9bb6997f76ee78 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/benchmarks/fcl/src/fcl.cpp @@ -0,0 +1,110 @@ +#include <iostream> +#include <cstdio> +#include <cstring> +#include <cinttypes> +#include <visc.h> +#include <tensorTypes.h> +#include <tensorUtils.h> + +using namespace std; + +void tensorMul(void *t1, size_t bytes1, void *t2, size_t bytes2) { + __visc__hint(visc::DEVICE); + __visc__attributes(2, t1, t2, 0); + + // Call to visc tensor mul intrinsic + // It will return a void * that we cast to Tensor_t + // Not in place operation + void *r = __visc__tensor_mul(t1, t2); + //struct ret_t res; + //res.tensor = r; + __visc__return(r, 0); +} + +void tensorAdd(void *t1, size_t bytest1, void *t2, size_t bytest2) { + __visc__hint(visc::DEVICE); + __visc__attributes(2, t1, t2, 0); + + // Call to visc tensor add intrinsic + // It will return a void * that we cast to Tensor_t + // In place operation + void* r = __visc__tensor_add(t1, t2); + __visc__return(r, 0); +} + +void root(void *w, size_t bytesw, void *x, size_t bytesx, void *b, size_t bytesb) { + __visc__hint(visc::CPU_TARGET); + __visc__attributes(3, w, x, b, 0); + + void *nodeMul = __visc__createNode(tensorMul); + void *nodeAdd = __visc__createNode(tensorAdd); + + // node, src, dst, stream + __visc__bindIn(nodeMul, 0, 0, 0); + __visc__bindIn(nodeMul, 1, 1, 0); + __visc__bindIn(nodeMul, 2, 2, 0); + __visc__bindIn(nodeMul, 3, 3, 0); + + // node, node, type, src, dst, stream + __visc__edge(nodeMul, nodeAdd, 1, 0, 0, 0); + __visc__edge(nodeMul, nodeAdd, 1, 1, 1, 0); + + __visc__bindIn(nodeAdd, 4, 2, 0); + __visc__bindIn(nodeAdd, 5, 3, 0); + + __visc__bindOut(nodeAdd, 0, 0, 0); + __visc__bindOut(nodeAdd, 1, 1, 0); + +} + + +// Return type for the nodes +struct ret_t { + void *tensor; + size_t bytes; +}; + +typedef struct __attribute__((__packed__)) { + void *w; + size_t bytesw; + void *x; + size_t bytesx; + void *b; + size_t bytesb; + struct ret_t r; +} +RootIn; + +int main() { + + void *w; + void *x; + void *b; + int test_batch_size = 10000; + x = readInputTensor("t10k-images-idx3-ubyte", float_type, + test_batch_size, 1, 28, 28); + w = readTrainedWeights("./model_params/lenet_params/ip1.bias.bin", float_type, 1, 1, 800, 500); + b = readTrainedWeights("./model_params/lenet_params/ip1.bin", float_type, 1, 1, 1, 500); + + __visc__init(); + + RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); + args->w = w; + args->bytesw = 0; + args->x = x; + args->bytesx = 0; + args->b = b; + args->bytesb = 0; + + void *dfg = __visc__launch(0, root, (void *)args); + + __visc__wait(dfg); + + void *r = static_cast<RootIn*>(dfg)->r.tensor; + hpvm_request_tensor(r); + + __visc__cleanup(); + return 0; +} + + diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorTypes.h b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorTypes.h new file mode 100644 index 0000000000000000000000000000000000000000..3479a94abec9d6357edc26e4507ec80f8b060acb --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorTypes.h @@ -0,0 +1,38 @@ + +#ifndef TYPES_HEADER +#define TYPES_HEADER + + +struct Dimension_t{ + int num_dims; + size_t* dim_sizes; +}; + + +struct Tensor_t{ + int tensor_id; // used for indexing (in the tensor runtime) + int data_type; // {float_type, double_type, half_type, int_type} + int data_format; // {nchw, nhwc} + void* host_data; + size_t num_elems; // Total elements + size_t size_in_bytes; // Total size in bytes + struct Dimension_t dims; +}; + + +enum Tensor_type_t{ + float_type, + double_type, + half_type, + int_type +}; + + +// NOTE: Currently only NCHW is supported due to limited cuDNN support +enum Tensor_format_t{ + nchw, + nhwc +}; + + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..de6da3d3a26b2d6a41a29f49fcfe9a5dd30074cd --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/common/include/tensorUtils.h @@ -0,0 +1,24 @@ +#include <tensorTypes.h> + +// Header guards +#ifndef UTILS_HEADER +#define UTILS_HEADER + +void printTensorInfo(void* tensor_ptr); +void dumpWeightsToFile(char* file_name, void* weights_ptr); +void fillTensorWithOnes(void* tensor_ptr); +void fillWithOnesAndTwos(void* tensor_ptr); +void fillTensorWithNegOnes(void* tensor_ptr); +void fillTensorVals(void* tensor_ptr); +void printTensorValues(void* tensor_ptr); +void printTensorDims(void* tensor_ptr); +void compareTensors(void* tensor1_ptr, void* tensor2_ptr); +void compareValues(void* tensor_ptr, float* data, size_t num_elems); +void* readInputTensor(char* file_name, int data_type, int dim1_size, int dim2_size, + int dim3_size, int dim4_size); +struct Tensor* readTrainedWeights(char* file_name, int data_type, int dim1_size, int dim2_size, + int dim3_size, int dim4_size); +uint8_t* readLabels(char* labels_file, int num_labels); +void computeAccuracy(char* labels_file, int num_labels, void* result_ptr); + +#endif diff --git a/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h new file mode 100644 index 0000000000000000000000000000000000000000..fe3a25233f3023b13d7e6cb799db497305b78bb4 --- /dev/null +++ b/llvm/test/VISC/DNN_Benchmarks/common/include/visc.h @@ -0,0 +1,112 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifndef DEVICE +#define DEVICE GPU_TARGET +#endif + +#include "llvm/SupportVISC/VISCHint.h" + +#ifdef __cplusplus +extern "C" { +void __visc__hint(visc::Target); +void __visc__wait(void*); +#else +void __visc__hint(enum Target); +void __visc__wait(unsigned); +#endif + +#ifdef __cplusplus +void* __visc__node(...); +void* __visc__createNode(...); +void* __visc__createNode1D(...); +void* __visc__createNode2D(...); +void* __visc__createNode3D(...); +void __visc__return(...); +#endif + +void __visc__attributes(unsigned, ...); +void __visc__init(); +void __visc__cleanup(); + +void __visc__bindIn(void*, unsigned, unsigned, unsigned); +void __visc__bindOut(void*, unsigned, unsigned, unsigned); +void* __visc__edge(void*, void*, unsigned, unsigned, unsigned, unsigned); +void __visc__push(void*, void*); +void* __visc__pop(void*); +void* __visc__launch(unsigned, ...); + +void* __visc__getNode(); +void* __visc__getParentNode(void*); +void __visc__barrier(); +void* __visc__malloc(long); +long __visc__getNodeInstanceID_x(void*); +long __visc__getNodeInstanceID_y(void*); +long __visc__getNodeInstanceID_z(void*); +long __visc__getNumNodeInstances_x(void*); +long __visc__getNumNodeInstances_y(void*); +long __visc__getNumNodeInstances_z(void*); + +// Atomic +// signed int +int __visc__atomic_cmpxchg(int*, int, int); +int __visc__atomic_add(int*, int); +int __visc__atomic_sub(int*, int); +int __visc__atomic_xchg(int*, int); +int __visc__atomic_inc(int*); +int __visc__atomic_dec(int*); +int __visc__atomic_min(int*, int); +int __visc__atomic_max(int*, int); +int __visc__atomic_umax(int*, int); +int __visc__atomic_umin(int*, int); +int __visc__atomic_and(int*, int); +int __visc__atomic_or(int*, int); +int __visc__atomic_xor(int*, int); + +// Special Func +float __visc__floor(float); +float __visc__rsqrt(float); +float __visc__sqrt(float); +float __visc__sin(float); +float __visc__cos(float); +// unsigned int +//unsigned __visc__atomic_cmpxchg(unsigned*, unsigned, unsigned); +//unsigned __visc__atomic_add(unsigned*, unsigned); +//unsigned __visc__atomic_sub(unsigned*, unsigned); +//unsigned __visc__atomic_xchg(unsigned*, unsigned); +//unsigned __visc__atomic_inc(unsigned*); +//unsigned __visc__atomic_dec(unsigned*); +//unsigned __visc__atomic_min(unsigned*, unsigned); +//unsigned __visc__atomic_max(unsigned*, unsigned); +//unsigned __visc__atomic_and(unsigned*, unsigned); +//unsigned __visc__atomic_or(unsigned*, unsigned); +//unsigned __visc__atomic_xor(unsigned*, unsigned); + +/* + * ApproxHPVM specific function calls + */ + +void* __visc__tensor_add(void*, void*); +void* __visc__tensor_mul(void*, void*); + +#include <unistd.h> + +long get_global_id(int); +long get_group_id(int); +long get_local_id(int); +long get_local_size(int); + + +void llvm_visc_track_mem(void*, size_t); +void llvm_visc_untrack_mem(void*); +void llvm_visc_request_mem(void*, size_t); +void hpvm_request_tensor(void*); +#ifdef __cplusplus +} +#endif +