From fc36236a449b1ac088e4c0d394f3d3b53e148242 Mon Sep 17 00:00:00 2001 From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu> Date: Fri, 6 Dec 2019 01:51:13 -0600 Subject: [PATCH] Adding simple Saxpy test --- .../parboil/benchmarks/saxpy_test/Makefile | 176 ++++++++++++++ .../parboil/benchmarks/saxpy_test/src/defs.h | 224 ++++++++++++++++++ .../parboil/benchmarks/saxpy_test/src/main.c | 135 +++++++++++ .../parboil/benchmarks/saxpy_test/src/visc.h | 110 +++++++++ 4 files changed, 645 insertions(+) create mode 100644 llvm/test/VISC/parboil/benchmarks/saxpy_test/Makefile create mode 100644 llvm/test/VISC/parboil/benchmarks/saxpy_test/src/defs.h create mode 100644 llvm/test/VISC/parboil/benchmarks/saxpy_test/src/main.c create mode 100644 llvm/test/VISC/parboil/benchmarks/saxpy_test/src/visc.h diff --git a/llvm/test/VISC/parboil/benchmarks/saxpy_test/Makefile b/llvm/test/VISC/parboil/benchmarks/saxpy_test/Makefile new file mode 100644 index 0000000000..f452c0b2cc --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/saxpy_test/Makefile @@ -0,0 +1,176 @@ +# This Makefile compiles the HPVM-CAVA pilot project. +# It builds HPVM-related dependencies, then the native camera pipeline ISP code. +# +# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config, +# which can be copied from Makefile.config.example for a start. + +CONFIG_FILE := Makefile.config + +ifeq ($(wildcard $(CONFIG_FILE)),) + $(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example) +endif +include $(CONFIG_FILE) + +# Compiler Flags + +DLEVEL ?= 0 +LFLAGS += -lm -lrt + +# Build dirs +ifeq ($(VERSION),) + VERSION = IR_modules +endif +SRC_DIR = src/ +CAM_PIPE_SRC_DIR = $(SRC_DIR) +BUILD_DIR = build/$(TARGET)_$(VERSION) +CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) + +# Source files for the frontend camera pipeline +COMMON_SRCS = main.c \ + + +#CAM_PIPE_SRCS = load_cam_model.c \ + cam_pipe_utility.c \ + dma_interface.c # FIXME: This is a hack until external C++ files can be included in build. + + +# NOTE: We have temporarily removed gem5 and other dependencies for simplicity. +SRCS = $(COMMON_SRCS) + +# NATIVE_FULL_PATH_SRCS contains all the full path source files for the camera vision pipeline. +NATIVE_FULL_PATH_SRCS = $(patsubst %, $(SRC_DIR)/%, $(COMMON_SRCS)) +NATIVE_FULL_PATH_SRCS += $(patsubst %, $(CAM_PIPE_SRC_DIR)/%, $(CAM_PIPE_SRCS)) + +INCLUDES += -I$(SRC_DIR) \ + -I$(CAM_PIPE_SRC_DIR) \ + +ifneq ($(CONFUSE_ROOT),) +INCLUDES += -I$(CONFUSE_ROOT)/include +LFLAGS += -L$(CONFUSE_ROOT)/lib +endif + +EXE = vec_add + +CAM_CFLAGS += -mf16c -flax-vector-conversions +LFLAGS += -pthread + + +## BEGIN HPVM MAKEFILE +LANGUAGE=visc +#SRCDIR_OBJS= load_cam_model.ll cam_pipe_utility.ll dma_interface.ll utility.ll +#OBJS_SRC=src/cam_pipe.c src/pipe_stages.c src/load_cam_model.c src/cam_pipe_utility.c src/dma_interface.c src/utility.c +VISC_OBJS=main.visc.ll +APP = $(EXE) +APP_CUDALDFLAGS=-lm -lstdc++ +APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3 +APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include +APP_LDFLAGS=$(LFLAGS) +OPT_FLAGS = -tti -targetlibinfo -tbaa -scoped-noalias -assumption-cache-tracker -profile-summary-info -forceattrs -inferattrs -ipsccp -globalopt -domtree -mem2reg -deadargelim -domtree -basicaa -aa -simplifycfg -pgo-icall-prom -basiccg -globals-aa -prune-eh -always-inline -functionattrs -domtree -sroa -early-cse -lazy-value-info -jump-threading -correlated-propagation -simplifycfg -domtree -basicaa -aa -libcalls-shrinkwrap -tailcallelim -simplifycfg -reassociate -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -licm -loop-unswitch -simplifycfg -domtree -basicaa -aa -loops -loop-simplify -lcssa-verification -lcssa -scalar-evolution -indvars -loop-idiom -loop-deletion -memdep -memcpyopt -sccp -domtree -demanded-bits -bdce -basicaa -aa -lazy-value-info -jump-threading -correlated-propagation -domtree -basicaa -aa -memdep -dse -loops -loop-simplify -lcssa-verification -lcssa -aa -scalar-evolution -licm -postdomtree -adce -simplifycfg -domtree -basicaa -aa -barrier -basiccg -rpo-functionattrs -globals-aa -float2int -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -loop-accesses -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-distribute -loop-simplify -lcssa-verification -lcssa -branch-prob -block-freq -scalar-evolution -basicaa -aa -loop-accesses -demanded-bits -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-vectorize -loop-simplify -scalar-evolution -aa -loop-accesses -loop-load-elim -basicaa -aa -simplifycfg -domtree -basicaa -aa -loops -scalar-evolution -alignment-from-assumptions -strip-dead-prototypes -domtree -loops -branch-prob -block-freq -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -branch-prob -block-freq -loop-sink -instsimplify + +CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) +OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS) +CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS) +LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS) + +LIBCLC_LIB_PATH = $(LLVM_SRC_ROOT)/../libclc/built_libs +VISC_RT_PATH = $(LLVM_SRC_ROOT)/projects/visc-rt + +VISC_RT_LIB = $(VISC_RT_PATH)/visc-rt.ll +LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx64--nvidiacl.bc + +LLVM_34_AS = $(LLVM_34_ROOT)/build/bin/llvm-as + +TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce +KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl + + +DEVICE = CPU_TARGET +VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG +VISC_OPTFLAGS += -visc-timers-x86 +TESTGEN_OPTFLAGS += -visc-timers-gen + +CFLAGS += -DDEVICE=$(DEVICE) +CXXFLAGS += -DDEVICE=$(DEVICE) + + +# Add BUILDDIR as a prefix to each element of $1 +INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1)) + +PYTHON_LLVM_40_34 = ../llvm-40-34.py + +.PRECIOUS: $(BUILD_DIR)/%.ll + +OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS)) +TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS)) +KERNEL = $(TEST_OBJS).kernels.ll + +ifeq ($(TARGET),x86) + SPIR_ASSEMBLY = $(TEST_OBJS).kernels.bc +else ifeq ($(TARGET),seq) +else ifeq ($(TARGET),fpga) + AOC_CL = $(TEST_OBJS).kernels.cl + AOCL_ASSEMBLY = $(TEST_OBJS).kernels.aocx + BOARD = a10gx + ifeq ($(EMULATION),1) + EXE = cava-visc-emu + AOC_EMU = -march=emulator + BUILD_DIR = build/$(TARGET)-emu + endif +else + KERNEL_LINKED = $(BUILD_DIR)/$(APP).kernels.linked.ll + PTX_ASSEMBLY = $(TEST_OBJS).nvptx.s +endif + +HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll +HOST = $(BUILD_DIR)/$(APP).host.ll + +ifeq ($(OPENCL_PATH),) +FAILSAFE=no_opencl +else +FAILSAFE= +endif + +# Targets +default: $(FAILSAFE) $(BUILD_DIR) $(EXE) +#default: $(FAILSAFE) $(BUILD_DIR) $(PTX_ASSEMBLY) $(SPIR_ASSEMBLY) $(AOC_CL) $(AOCL_ASSEMBLY) $(EXE) + +$(PTX_ASSEMBLY) : $(KERNEL_LINKED) + $(CC) $(KERNEL_GEN_FLAGS) -S $< -o $@ + +$(KERNEL_LINKED) : $(KERNEL) + $(LLVM_LINK) $(LIBCLC_NVPTX_LIB) -S $< -o $@ + +$(SPIR_ASSEMBLY) : $(KERNEL) + python $(PYTHON_LLVM_40_34) $< $(BUILD_DIR)/kernel_34.ll + $(LLVM_34_AS) $(BUILD_DIR)/kernel_34.ll -o $@ + +$(AOCL_ASSEMBLY) : $(AOC_CL) + aoc --report $(AOC_EMU) $(AOC_CL) -o $(AOCL_ASSEMBLY) -board=$(BOARD) + +$(AOC_CL) : $(KERNEL) + llvm-cbe --debug $(KERNEL) + +$(EXE) : $(HOST_LINKED) + $(CXX) -O3 $(LDFLAGS) $< -o $@ + +$(HOST_LINKED) : $(HOST) $(OBJS) $(VISC_RT_LIB) + $(LLVM_LINK) $^ -S -o $@ + +$(VISC_RT_LIB) : $(VISC_RT_PATH)/visc-rt.cpp + make -C $(LLVM_LIB_PATH) + +$(HOST) $(KERNEL): $(BUILD_DIR)/$(VISC_OBJS) + $(OPT) -debug $(VISC_OPTFLAGS) -S $< -o $(HOST) + +$(BUILD_DIR): + mkdir -p $(BUILD_DIR) + +$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c + $(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $< + +$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c + $(CC) $(CFLAGS) -emit-llvm -S -o $@ $< + +$(BUILD_DIR)/main.visc.ll : $(BUILD_DIR)/main.ll + $(OPT) -debug-only=genvisc $(TESTGEN_OPTFLAGS) $< -S -o $@ + diff --git a/llvm/test/VISC/parboil/benchmarks/saxpy_test/src/defs.h b/llvm/test/VISC/parboil/benchmarks/saxpy_test/src/defs.h new file mode 100644 index 0000000000..ccc8acc857 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/saxpy_test/src/defs.h @@ -0,0 +1,224 @@ +#ifndef _COMMON_DEFS_H_ +#define _COMMON_DEFS_H_ + +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long uint64_t; + +#define CACHELINE_SIZE 64 + +// Debugging message macros. +#if DEBUG_LEVEL >= 1 + #define INFO_MSG(args...) printf(args) + + #if DEBUG_LEVEL >= 2 + #define PRINT_MSG(args...) printf(args) + #define PRINT_DEBUG(hid, rows, cols, num_cols) \ + print_debug(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D(hid, rows, cols, height) \ + print_debug4d(hid, rows, cols, height) + #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) \ + print_debug4d_fp16(hid, num, height, rows, cols) + + #if DEBUG_LEVEL >= 3 + #define PRINT_DEBUG_V(hid, rows, cols, num_cols) \ + print_debug(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) \ + print_debug4d(hid, rows, cols, height) + #define PRINT_MSG_V(args...) printf(args) + #else + #define PRINT_DEBUG_V(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) + #define PRINT_MSG_V(args...) + #endif + #else + #define PRINT_MSG(args...) + #define PRINT_DEBUG(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D(hid, rows, cols, height) + #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) + #define PRINT_DEBUG_V(hid, rows, cols, height) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) + #define PRINT_MSG_V(args...) + #endif +#else + #define INFO_MSG(args...) + #define PRINT_DEBUG(hid, rows, cols, num_cols) + #define PRINT_DEBUG4D(hid, rows, cols, height) + #define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) + #define PRINT_MSG(args...) + #define PRINT_DEBUG_V(hid, rows, cols, height) + #define PRINT_DEBUG4D_V(hid, rows, cols, height) + #define PRINT_MSG_V(args...) +#endif + +#define STRING(arg) #arg + +// This is to avoid a ton of spurious unused variable warnings when +// we're not building for gem5. +#define UNUSED(x) (void)(x) + +// Macros for computing the maximum of a group of elements. +// +// Why macros and not functions (or a loop)? A loop takes O(n) cycles to +// compute the maximum, when it could be done in O(log n) time with a tree +// based implementation. But Aladdin regards function calls as a hard +// dependency that it does not optimize across, so we would not get the +// parallelism we expect from the tree. Thus, these must be macros. +// +// I've only implemented a few of these. These are only meant for the pooling +// layers, and we shouldn't need more than a 3x3 pooling layer anyways. +#define max2(A, B) (((A) > (B)) ? (A) : (B)) +#define max3(e0, e1, e2) max2(max2(e0, e1), e2) +#define max4(e0, e1, e2, e3) max2(max2(e0, e1), max2(e2, e3)) +#define max8(e0, e1, e2, e3, e4, e5, e6, e7) \ + max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7)) +#define max9(e0, e1, e2, e3, e4, e5, e6, e7, e8) \ + max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8) + +#define min2(A, B) (((A) < (B)) ? (A) : (B)) + +#define FRAC_CEIL(A, B) ((A) / (B) + ((A) % (B) != 0)) +// Convenience macros to switch between invoking an accelerator (if building a +// binary for gem5) or just calling the kernel function in software. +// +// Usage: +// +// These macros expand differently based on whether the GEM5_HARNESS macro is +// defined. If so, then this binary is meant to be run under gem5, invoking +// accelerators; if not, this binary should run the pure software version of +// the accelerated kernels. +// +// If GEM5_HARNESS is defined: +// +// MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize) +// ===> mapArrayToAccelerator(myReqCode, myArrayName, myArrayPtr, mySize) +// +// INVOKE_KERNEL(myReqCode, kernelFuncName, args...) +// ===> invokeAcceleratorAndBlock(myReqCode) +// +// Otherwise: +// MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize) +// expands to nothing +// +// INVOKE_KERNEL(myReqCode, kernelFuncName, args...) +// ===> kernelFuncName(args) +// +#ifdef GEM5_HARNESS + +#define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \ + mapArrayToAccelerator(req_code, name, base_addr, size) +#define INVOKE_KERNEL(req_code, kernel_ptr, args...) \ + do { \ + UNUSED(kernel_ptr); \ + invokeAcceleratorAndBlock(req_code); \ + } while (0) +#define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \ + do { \ + UNUSED(kernel_ptr); \ + invokeAcceleratorAndReturn2(req_code, finish_flag); \ + } while (0) + +#define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \ + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, false, false); \ + } while (0) +#define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \ + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, true, false); \ + } while (0) +#define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \ + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, false, true); \ + } while (0) +#define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \ + do { \ + invokeAladdinTrafficGenAndBlock(start_addr, size, true, true); \ + } while (0) + +#else + +#define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \ + do { \ + INFO_MSG("Mapping array %s @ %p, size %d.\n", \ + name, (void*)base_addr, (int)(size)); \ + UNUSED(req_code); \ + UNUSED(name); \ + UNUSED(base_addr); \ + UNUSED(size); \ + } while (0) +#define INVOKE_KERNEL(req_code, kernel_ptr, args...) kernel_ptr(args) +#define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \ + kernel_ptr(args) +#define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \ + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) +#define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \ + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) +#define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \ + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) +#define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \ + do { \ + UNUSED(start_addr); \ + UNUSED(size); \ + } while (0) + +#endif + +// Simplified version of MAP_ARRAY_TO_ACCEL. +// +// This assumes that the current name of the base pointer is also the name of +// the array in the top level function of the dynamic trace. THIS IS VERY +// IMPORTANT - if the argument passed to a top level function has been renamed in +// the function, then this WILL NOT WORK! +// +// MAP_ARRAY(myReqCode, myArray, mySize) +// ===> MAP_ARRAY_TO_ACCEL(myReqCode, "myArray", myArray, mySize) +#define MAP_ARRAY(req_code, name_and_base_addr, size) \ + MAP_ARRAY_TO_ACCEL( \ + req_code, STRING(name_and_base_addr), name_and_base_addr, size) + +// Use these convenience macros to cast a raw pointer into a multidimensional +// variable-length array, which lets us use [] notation inside of the ugly +// sub2ind syntax! +// +// Usage: +// If we have an array like array[5][4]: +// ARRAY_2D(TYPE, output_name, array, 4); +// +// If we have an array like array[5][4][3]: +// ARRAY_3D(TYPE, output_name, array, 4, 3); +// +// If we have an array like array[5][4][3][2] +// ARRAY_4D(TYPE, output_name, array, 4, 3, 2); +// +// And so on... +#define ARRAY_1D(TYPE, output_array_name, input_array_name) \ + TYPE* output_array_name = (TYPE*)input_array_name + +#define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1) \ + TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name + +#define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2) \ + TYPE(*output_array_name)[DIM_1][DIM_2] = \ + (TYPE(*)[DIM_1][DIM_2])input_array_name + +#define ARRAY_4D( \ + TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3) \ + TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3] = \ + (TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name + +#define ARRAY_5D( \ + TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4) \ + TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3][DIM_4] = \ + (TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name + +#endif diff --git a/llvm/test/VISC/parboil/benchmarks/saxpy_test/src/main.c b/llvm/test/VISC/parboil/benchmarks/saxpy_test/src/main.c new file mode 100644 index 0000000000..ef4e620be0 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/saxpy_test/src/main.c @@ -0,0 +1,135 @@ + +#include <stdlib.h> +//#include "utility.h" +#include "visc.h" +#include "defs.h" + + +typedef struct __attribute__((__packed__)) { + float* input; size_t bytes_input; + float* result; size_t bytes_result; +} +RootIn; + + + +void scale_values(float* input, size_t num_elems) { + + __visc__hint(DEVICE); + __visc__attributes(1, input, 1, input); + + for (int ind = 0; ind < num_elems; ind++){ + input[ind] = input[ind] * 2.0; + } + + __visc__return(1, num_elems); +} + + + + +void graphRoot(/*0*/ float* input, /*1*/ size_t bytes_input, + /*2*/ float* result, /*3*/ size_t bytes_result) { + + //Specifies compilation target for current node + __visc__hint(CPU_TARGET); + + __visc__attributes(2, input, result, 2, input, result); + + // Create an 0D (specified by 1st argument) HPVM node - so a single node + // associated with node function ---_fxp_wrapper + + void* scaleNode = __visc__createNodeND(0, scale_values); + + // BindIn binds inputs of current node with specified node + // - destination node + // - argument position in argument list of function of source node + // - argument position in argument list of function of destination node + // - streaming (1) or non-streaming (0) + + // Edge transfers data between nodes within the same level of hierarchy. + // - source and destination dataflow nodes + // - edge type, all-all (1) or one-one(0) + // - source position (in output struct of source node) + // - destination position (in argument list of destination node) + // - streaming (1) or non-streaming (0) + + // scale_fxp inputs + __visc__bindIn(scaleNode, 0, 0, 0); // input -> ScNode:input + __visc__bindIn(scaleNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input + + // Similar to bindIn, but for the output. Output of a node is a struct, and + // we consider the fields in increasing ordering. + __visc__bindOut(scaleNode, 0, 0, 0); + +} + + + + + +int main(int argc, char* argv[]) { + + size_t input_size = 100; + size_t result_size = 100; + + size_t input_bytes = input_size * sizeof(float); + size_t result_bytes = result_size * sizeof(float); + + // This is host_input in cam_pipe() + float* input = (float*) malloc(input_bytes); + for(unsigned int i = 0; i < input_size; i++){ + input[i] = 1.0; + } + // This is host_result in cam_pipe() + float* result = (float*) malloc(result_bytes); + + + __visc__init(); + + RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn)); + + // Set up HPVM DFG inputs in the rootArgs struct. + rootArgs->input = input; + rootArgs->bytes_input = input_size; + + printf("input = %d input_bytes = %d \n", input, input_bytes); + + rootArgs->result = result; + rootArgs->bytes_result = result_size; + + + llvm_visc_track_mem(input, input_bytes); + llvm_visc_track_mem(result, result_bytes); + + + void* testDFG = __visc__launch(0, graphRoot, (void*) rootArgs); + __visc__wait(testDFG); + + + printf("input = %d \n", input); + + llvm_visc_request_mem(input, input_bytes); + //llvm_visc_request_mem(result, result_bytes); + + printf("requested mem \n"); + + for(unsigned int i = 0; i < input_size; i++){ + printf("input[%d] = %f \n", i, input[i]); + } + + //llvm_visc_untrack_mem(input); + //llvm_visc_untrack_mem(result); + + printf ("untracked mem \n"); + + __visc__cleanup(); + + printf ("cleaned up visc"); + + return 0; +} + + + + diff --git a/llvm/test/VISC/parboil/benchmarks/saxpy_test/src/visc.h b/llvm/test/VISC/parboil/benchmarks/saxpy_test/src/visc.h new file mode 100644 index 0000000000..a263e35252 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/saxpy_test/src/visc.h @@ -0,0 +1,110 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifndef DEVICE +#define DEVICE GPU_TARGET +#endif + +#include "llvm/SupportVISC/VISCHint.h" + +#ifdef __cplusplus +extern "C" { +void __visc__hint(visc::Target); +//void __visc__wait(void*); +#else +void __visc__hint(enum Target); +//void __visc__wait(unsigned); +#endif + +#ifdef __cplusplus +//void* __visc__node(...); +//void* __visc__createNode(...); +//void* __visc__createNode1D(...); +//void* __visc__createNode2D(...); +//void* __visc__createNode3D(...); +//void __visc__return(...); +#endif + +void* __visc__createNodeND(unsigned,...); +void __visc__return(unsigned, ...); + +void __visc__attributes(unsigned, ...); +void __visc__init(); +void __visc__cleanup(); + +void __visc__bindIn(void*, unsigned, unsigned, unsigned); +void __visc__bindOut(void*, unsigned, unsigned, unsigned); +void* __visc__edge(void*, void*, unsigned, unsigned, unsigned, unsigned); +void __visc__push(void*, void*); +void* __visc__pop(void*); +void* __visc__launch(unsigned, ...); +void __visc__wait(void*); + +void* __visc__getNode(); +void* __visc__getParentNode(void*); +void __visc__barrier(); +void* __visc__malloc(long); +long __visc__getNodeInstanceID_x(void*); +long __visc__getNodeInstanceID_y(void*); +long __visc__getNodeInstanceID_z(void*); +long __visc__getNumNodeInstances_x(void*); +long __visc__getNumNodeInstances_y(void*); +long __visc__getNumNodeInstances_z(void*); + +// Atomic +// signed int +int __visc__atomic_cmpxchg(int*, int, int); +int __visc__atomic_add(int*, int); +int __visc__atomic_sub(int*, int); +int __visc__atomic_xchg(int*, int); +int __visc__atomic_inc(int*); +int __visc__atomic_dec(int*); +int __visc__atomic_min(int*, int); +int __visc__atomic_max(int*, int); +int __visc__atomic_umax(int*, int); +int __visc__atomic_umin(int*, int); +int __visc__atomic_and(int*, int); +int __visc__atomic_or(int*, int); +int __visc__atomic_xor(int*, int); + +// Special Func +float __visc__floor(float); +float __visc__rsqrt(float); +float __visc__sqrt(float); +float __visc__sin(float); +float __visc__cos(float); +// unsigned int +//unsigned __visc__atomic_cmpxchg(unsigned*, unsigned, unsigned); +//unsigned __visc__atomic_add(unsigned*, unsigned); +//unsigned __visc__atomic_sub(unsigned*, unsigned); +//unsigned __visc__atomic_xchg(unsigned*, unsigned); +//unsigned __visc__atomic_inc(unsigned*); +//unsigned __visc__atomic_dec(unsigned*); +//unsigned __visc__atomic_min(unsigned*, unsigned); +//unsigned __visc__atomic_max(unsigned*, unsigned); +//unsigned __visc__atomic_and(unsigned*, unsigned); +//unsigned __visc__atomic_or(unsigned*, unsigned); +//unsigned __visc__atomic_xor(unsigned*, unsigned); + + +#include <unistd.h> + +long get_global_id(int); +long get_group_id(int); +long get_local_id(int); +long get_local_size(int); + + +void llvm_visc_track_mem(void*, size_t); +void llvm_visc_untrack_mem(void*); +void llvm_visc_request_mem(void*, size_t); + +#ifdef __cplusplus +} +#endif + -- GitLab