Skip to content
Snippets Groups Projects
Commit fc36236a authored by Hashim Sharif's avatar Hashim Sharif
Browse files

Adding simple Saxpy test

parent a33d4118
No related branches found
No related tags found
No related merge requests found
# This Makefile compiles the HPVM-CAVA pilot project.
# It builds HPVM-related dependencies, then the native camera pipeline ISP code.
#
# Paths to some dependencies (e.g., HPVM, LLVM) must exist in Makefile.config,
# which can be copied from Makefile.config.example for a start.
CONFIG_FILE := Makefile.config
ifeq ($(wildcard $(CONFIG_FILE)),)
$(error $(CONFIG_FILE) not found. See $(CONFIG_FILE).example)
endif
include $(CONFIG_FILE)
# Compiler Flags
DLEVEL ?= 0
LFLAGS += -lm -lrt
# Build dirs
ifeq ($(VERSION),)
VERSION = IR_modules
endif
SRC_DIR = src/
CAM_PIPE_SRC_DIR = $(SRC_DIR)
BUILD_DIR = build/$(TARGET)_$(VERSION)
CURRENT_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
# Source files for the frontend camera pipeline
COMMON_SRCS = main.c \
#CAM_PIPE_SRCS = load_cam_model.c \
cam_pipe_utility.c \
dma_interface.c # FIXME: This is a hack until external C++ files can be included in build.
# NOTE: We have temporarily removed gem5 and other dependencies for simplicity.
SRCS = $(COMMON_SRCS)
# NATIVE_FULL_PATH_SRCS contains all the full path source files for the camera vision pipeline.
NATIVE_FULL_PATH_SRCS = $(patsubst %, $(SRC_DIR)/%, $(COMMON_SRCS))
NATIVE_FULL_PATH_SRCS += $(patsubst %, $(CAM_PIPE_SRC_DIR)/%, $(CAM_PIPE_SRCS))
INCLUDES += -I$(SRC_DIR) \
-I$(CAM_PIPE_SRC_DIR) \
ifneq ($(CONFUSE_ROOT),)
INCLUDES += -I$(CONFUSE_ROOT)/include
LFLAGS += -L$(CONFUSE_ROOT)/lib
endif
EXE = vec_add
CAM_CFLAGS += -mf16c -flax-vector-conversions
LFLAGS += -pthread
## BEGIN HPVM MAKEFILE
LANGUAGE=visc
#SRCDIR_OBJS= load_cam_model.ll cam_pipe_utility.ll dma_interface.ll utility.ll
#OBJS_SRC=src/cam_pipe.c src/pipe_stages.c src/load_cam_model.c src/cam_pipe_utility.c src/dma_interface.c src/utility.c
VISC_OBJS=main.visc.ll
APP = $(EXE)
APP_CUDALDFLAGS=-lm -lstdc++
APP_CFLAGS= $(INCLUDES) -DDMA_MODE -DDMA_INTERFACE_V3
APP_CXXFLAGS=-ffast-math -O0 -I/opt/opencv/include
APP_LDFLAGS=$(LFLAGS)
OPT_FLAGS = -tti -targetlibinfo -tbaa -scoped-noalias -assumption-cache-tracker -profile-summary-info -forceattrs -inferattrs -ipsccp -globalopt -domtree -mem2reg -deadargelim -domtree -basicaa -aa -simplifycfg -pgo-icall-prom -basiccg -globals-aa -prune-eh -always-inline -functionattrs -domtree -sroa -early-cse -lazy-value-info -jump-threading -correlated-propagation -simplifycfg -domtree -basicaa -aa -libcalls-shrinkwrap -tailcallelim -simplifycfg -reassociate -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -licm -loop-unswitch -simplifycfg -domtree -basicaa -aa -loops -loop-simplify -lcssa-verification -lcssa -scalar-evolution -indvars -loop-idiom -loop-deletion -memdep -memcpyopt -sccp -domtree -demanded-bits -bdce -basicaa -aa -lazy-value-info -jump-threading -correlated-propagation -domtree -basicaa -aa -memdep -dse -loops -loop-simplify -lcssa-verification -lcssa -aa -scalar-evolution -licm -postdomtree -adce -simplifycfg -domtree -basicaa -aa -barrier -basiccg -rpo-functionattrs -globals-aa -float2int -domtree -loops -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -loop-rotate -loop-accesses -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-distribute -loop-simplify -lcssa-verification -lcssa -branch-prob -block-freq -scalar-evolution -basicaa -aa -loop-accesses -demanded-bits -lazy-branch-prob -lazy-block-freq -opt-remark-emitter -loop-vectorize -loop-simplify -scalar-evolution -aa -loop-accesses -loop-load-elim -basicaa -aa -simplifycfg -domtree -basicaa -aa -loops -scalar-evolution -alignment-from-assumptions -strip-dead-prototypes -domtree -loops -branch-prob -block-freq -loop-simplify -lcssa-verification -lcssa -basicaa -aa -scalar-evolution -branch-prob -block-freq -loop-sink -instsimplify
CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS)
OBJS_CFLAGS = -O1 $(APP_CFLAGS) $(PLATFORM_CFLAGS)
CXXFLAGS = $(APP_CXXFLAGS) $(PLATFORM_CXXFLAGS)
LDFLAGS= $(APP_LDFLAGS) $(PLATFORM_LDFLAGS)
LIBCLC_LIB_PATH = $(LLVM_SRC_ROOT)/../libclc/built_libs
VISC_RT_PATH = $(LLVM_SRC_ROOT)/projects/visc-rt
VISC_RT_LIB = $(VISC_RT_PATH)/visc-rt.ll
LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx64--nvidiacl.bc
LLVM_34_AS = $(LLVM_34_ROOT)/build/bin/llvm-as
TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce
KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl
DEVICE = CPU_TARGET
VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG
VISC_OPTFLAGS += -visc-timers-x86
TESTGEN_OPTFLAGS += -visc-timers-gen
CFLAGS += -DDEVICE=$(DEVICE)
CXXFLAGS += -DDEVICE=$(DEVICE)
# Add BUILDDIR as a prefix to each element of $1
INBUILDDIR=$(addprefix $(BUILD_DIR)/,$(1))
PYTHON_LLVM_40_34 = ../llvm-40-34.py
.PRECIOUS: $(BUILD_DIR)/%.ll
OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS))
KERNEL = $(TEST_OBJS).kernels.ll
ifeq ($(TARGET),x86)
SPIR_ASSEMBLY = $(TEST_OBJS).kernels.bc
else ifeq ($(TARGET),seq)
else ifeq ($(TARGET),fpga)
AOC_CL = $(TEST_OBJS).kernels.cl
AOCL_ASSEMBLY = $(TEST_OBJS).kernels.aocx
BOARD = a10gx
ifeq ($(EMULATION),1)
EXE = cava-visc-emu
AOC_EMU = -march=emulator
BUILD_DIR = build/$(TARGET)-emu
endif
else
KERNEL_LINKED = $(BUILD_DIR)/$(APP).kernels.linked.ll
PTX_ASSEMBLY = $(TEST_OBJS).nvptx.s
endif
HOST_LINKED = $(BUILD_DIR)/$(APP).linked.ll
HOST = $(BUILD_DIR)/$(APP).host.ll
ifeq ($(OPENCL_PATH),)
FAILSAFE=no_opencl
else
FAILSAFE=
endif
# Targets
default: $(FAILSAFE) $(BUILD_DIR) $(EXE)
#default: $(FAILSAFE) $(BUILD_DIR) $(PTX_ASSEMBLY) $(SPIR_ASSEMBLY) $(AOC_CL) $(AOCL_ASSEMBLY) $(EXE)
$(PTX_ASSEMBLY) : $(KERNEL_LINKED)
$(CC) $(KERNEL_GEN_FLAGS) -S $< -o $@
$(KERNEL_LINKED) : $(KERNEL)
$(LLVM_LINK) $(LIBCLC_NVPTX_LIB) -S $< -o $@
$(SPIR_ASSEMBLY) : $(KERNEL)
python $(PYTHON_LLVM_40_34) $< $(BUILD_DIR)/kernel_34.ll
$(LLVM_34_AS) $(BUILD_DIR)/kernel_34.ll -o $@
$(AOCL_ASSEMBLY) : $(AOC_CL)
aoc --report $(AOC_EMU) $(AOC_CL) -o $(AOCL_ASSEMBLY) -board=$(BOARD)
$(AOC_CL) : $(KERNEL)
llvm-cbe --debug $(KERNEL)
$(EXE) : $(HOST_LINKED)
$(CXX) -O3 $(LDFLAGS) $< -o $@
$(HOST_LINKED) : $(HOST) $(OBJS) $(VISC_RT_LIB)
$(LLVM_LINK) $^ -S -o $@
$(VISC_RT_LIB) : $(VISC_RT_PATH)/visc-rt.cpp
make -C $(LLVM_LIB_PATH)
$(HOST) $(KERNEL): $(BUILD_DIR)/$(VISC_OBJS)
$(OPT) -debug $(VISC_OPTFLAGS) -S $< -o $(HOST)
$(BUILD_DIR):
mkdir -p $(BUILD_DIR)
$(BUILD_DIR)/%.ll : $(SRC_DIR)/%.c
$(CC) $(OBJS_CFLAGS) -emit-llvm -S -o $@ $<
$(BUILD_DIR)/main.ll : $(SRC_DIR)/main.c
$(CC) $(CFLAGS) -emit-llvm -S -o $@ $<
$(BUILD_DIR)/main.visc.ll : $(BUILD_DIR)/main.ll
$(OPT) -debug-only=genvisc $(TESTGEN_OPTFLAGS) $< -S -o $@
#ifndef _COMMON_DEFS_H_
#define _COMMON_DEFS_H_
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef unsigned long uint64_t;
#define CACHELINE_SIZE 64
// Debugging message macros.
#if DEBUG_LEVEL >= 1
#define INFO_MSG(args...) printf(args)
#if DEBUG_LEVEL >= 2
#define PRINT_MSG(args...) printf(args)
#define PRINT_DEBUG(hid, rows, cols, num_cols) \
print_debug(hid, rows, cols, num_cols)
#define PRINT_DEBUG4D(hid, rows, cols, height) \
print_debug4d(hid, rows, cols, height)
#define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols) \
print_debug4d_fp16(hid, num, height, rows, cols)
#if DEBUG_LEVEL >= 3
#define PRINT_DEBUG_V(hid, rows, cols, num_cols) \
print_debug(hid, rows, cols, num_cols)
#define PRINT_DEBUG4D_V(hid, rows, cols, height) \
print_debug4d(hid, rows, cols, height)
#define PRINT_MSG_V(args...) printf(args)
#else
#define PRINT_DEBUG_V(hid, rows, cols, num_cols)
#define PRINT_DEBUG4D_V(hid, rows, cols, height)
#define PRINT_MSG_V(args...)
#endif
#else
#define PRINT_MSG(args...)
#define PRINT_DEBUG(hid, rows, cols, num_cols)
#define PRINT_DEBUG4D(hid, rows, cols, height)
#define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols)
#define PRINT_DEBUG_V(hid, rows, cols, height)
#define PRINT_DEBUG4D_V(hid, rows, cols, height)
#define PRINT_MSG_V(args...)
#endif
#else
#define INFO_MSG(args...)
#define PRINT_DEBUG(hid, rows, cols, num_cols)
#define PRINT_DEBUG4D(hid, rows, cols, height)
#define PRINT_DEBUG4D_FP16(hid, num, height, rows, cols)
#define PRINT_MSG(args...)
#define PRINT_DEBUG_V(hid, rows, cols, height)
#define PRINT_DEBUG4D_V(hid, rows, cols, height)
#define PRINT_MSG_V(args...)
#endif
#define STRING(arg) #arg
// This is to avoid a ton of spurious unused variable warnings when
// we're not building for gem5.
#define UNUSED(x) (void)(x)
// Macros for computing the maximum of a group of elements.
//
// Why macros and not functions (or a loop)? A loop takes O(n) cycles to
// compute the maximum, when it could be done in O(log n) time with a tree
// based implementation. But Aladdin regards function calls as a hard
// dependency that it does not optimize across, so we would not get the
// parallelism we expect from the tree. Thus, these must be macros.
//
// I've only implemented a few of these. These are only meant for the pooling
// layers, and we shouldn't need more than a 3x3 pooling layer anyways.
#define max2(A, B) (((A) > (B)) ? (A) : (B))
#define max3(e0, e1, e2) max2(max2(e0, e1), e2)
#define max4(e0, e1, e2, e3) max2(max2(e0, e1), max2(e2, e3))
#define max8(e0, e1, e2, e3, e4, e5, e6, e7) \
max2(max4(e0, e1, e2, e3), max4(e4, e5, e6, e7))
#define max9(e0, e1, e2, e3, e4, e5, e6, e7, e8) \
max2(max8(e0, e1, e2, e3, e4, e5, e6, e7), e8)
#define min2(A, B) (((A) < (B)) ? (A) : (B))
#define FRAC_CEIL(A, B) ((A) / (B) + ((A) % (B) != 0))
// Convenience macros to switch between invoking an accelerator (if building a
// binary for gem5) or just calling the kernel function in software.
//
// Usage:
//
// These macros expand differently based on whether the GEM5_HARNESS macro is
// defined. If so, then this binary is meant to be run under gem5, invoking
// accelerators; if not, this binary should run the pure software version of
// the accelerated kernels.
//
// If GEM5_HARNESS is defined:
//
// MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize)
// ===> mapArrayToAccelerator(myReqCode, myArrayName, myArrayPtr, mySize)
//
// INVOKE_KERNEL(myReqCode, kernelFuncName, args...)
// ===> invokeAcceleratorAndBlock(myReqCode)
//
// Otherwise:
// MAP_ARRAY_TO_ACCEL(myReqCode, myArrayName, myArrayPtr, mySize)
// expands to nothing
//
// INVOKE_KERNEL(myReqCode, kernelFuncName, args...)
// ===> kernelFuncName(args)
//
#ifdef GEM5_HARNESS
#define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \
mapArrayToAccelerator(req_code, name, base_addr, size)
#define INVOKE_KERNEL(req_code, kernel_ptr, args...) \
do { \
UNUSED(kernel_ptr); \
invokeAcceleratorAndBlock(req_code); \
} while (0)
#define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \
do { \
UNUSED(kernel_ptr); \
invokeAcceleratorAndReturn2(req_code, finish_flag); \
} while (0)
#define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \
do { \
invokeAladdinTrafficGenAndBlock(start_addr, size, false, false); \
} while (0)
#define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \
do { \
invokeAladdinTrafficGenAndBlock(start_addr, size, true, false); \
} while (0)
#define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \
do { \
invokeAladdinTrafficGenAndBlock(start_addr, size, false, true); \
} while (0)
#define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \
do { \
invokeAladdinTrafficGenAndBlock(start_addr, size, true, true); \
} while (0)
#else
#define MAP_ARRAY_TO_ACCEL(req_code, name, base_addr, size) \
do { \
INFO_MSG("Mapping array %s @ %p, size %d.\n", \
name, (void*)base_addr, (int)(size)); \
UNUSED(req_code); \
UNUSED(name); \
UNUSED(base_addr); \
UNUSED(size); \
} while (0)
#define INVOKE_KERNEL(req_code, kernel_ptr, args...) kernel_ptr(args)
#define INVOKE_KERNEL_NOBLOCK(req_code, finish_flag, kernel_ptr, args...) \
kernel_ptr(args)
#define INVOKE_DMA_READ_TRAFFIC_GEN(start_addr, size) \
do { \
UNUSED(start_addr); \
UNUSED(size); \
} while (0)
#define INVOKE_DMA_WRITE_TRAFFIC_GEN(start_addr, size) \
do { \
UNUSED(start_addr); \
UNUSED(size); \
} while (0)
#define INVOKE_ACP_READ_TRAFFIC_GEN(start_addr, size) \
do { \
UNUSED(start_addr); \
UNUSED(size); \
} while (0)
#define INVOKE_ACP_WRITE_TRAFFIC_GEN(start_addr, size) \
do { \
UNUSED(start_addr); \
UNUSED(size); \
} while (0)
#endif
// Simplified version of MAP_ARRAY_TO_ACCEL.
//
// This assumes that the current name of the base pointer is also the name of
// the array in the top level function of the dynamic trace. THIS IS VERY
// IMPORTANT - if the argument passed to a top level function has been renamed in
// the function, then this WILL NOT WORK!
//
// MAP_ARRAY(myReqCode, myArray, mySize)
// ===> MAP_ARRAY_TO_ACCEL(myReqCode, "myArray", myArray, mySize)
#define MAP_ARRAY(req_code, name_and_base_addr, size) \
MAP_ARRAY_TO_ACCEL( \
req_code, STRING(name_and_base_addr), name_and_base_addr, size)
// Use these convenience macros to cast a raw pointer into a multidimensional
// variable-length array, which lets us use [] notation inside of the ugly
// sub2ind syntax!
//
// Usage:
// If we have an array like array[5][4]:
// ARRAY_2D(TYPE, output_name, array, 4);
//
// If we have an array like array[5][4][3]:
// ARRAY_3D(TYPE, output_name, array, 4, 3);
//
// If we have an array like array[5][4][3][2]
// ARRAY_4D(TYPE, output_name, array, 4, 3, 2);
//
// And so on...
#define ARRAY_1D(TYPE, output_array_name, input_array_name) \
TYPE* output_array_name = (TYPE*)input_array_name
#define ARRAY_2D(TYPE, output_array_name, input_array_name, DIM_1) \
TYPE(*output_array_name)[DIM_1] = (TYPE(*)[DIM_1])input_array_name
#define ARRAY_3D(TYPE, output_array_name, input_array_name, DIM_1, DIM_2) \
TYPE(*output_array_name)[DIM_1][DIM_2] = \
(TYPE(*)[DIM_1][DIM_2])input_array_name
#define ARRAY_4D( \
TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3) \
TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3] = \
(TYPE(*)[DIM_1][DIM_2][DIM_3])input_array_name
#define ARRAY_5D( \
TYPE, output_array_name, input_array_name, DIM_1, DIM_2, DIM_3, DIM_4) \
TYPE(*output_array_name)[DIM_1][DIM_2][DIM_3][DIM_4] = \
(TYPE(*)[DIM_1][DIM_2][DIM_3][DIM_4])input_array_name
#endif
#include <stdlib.h>
//#include "utility.h"
#include "visc.h"
#include "defs.h"
typedef struct __attribute__((__packed__)) {
float* input; size_t bytes_input;
float* result; size_t bytes_result;
}
RootIn;
void scale_values(float* input, size_t num_elems) {
__visc__hint(DEVICE);
__visc__attributes(1, input, 1, input);
for (int ind = 0; ind < num_elems; ind++){
input[ind] = input[ind] * 2.0;
}
__visc__return(1, num_elems);
}
void graphRoot(/*0*/ float* input, /*1*/ size_t bytes_input,
/*2*/ float* result, /*3*/ size_t bytes_result) {
//Specifies compilation target for current node
__visc__hint(CPU_TARGET);
__visc__attributes(2, input, result, 2, input, result);
// Create an 0D (specified by 1st argument) HPVM node - so a single node
// associated with node function ---_fxp_wrapper
void* scaleNode = __visc__createNodeND(0, scale_values);
// BindIn binds inputs of current node with specified node
// - destination node
// - argument position in argument list of function of source node
// - argument position in argument list of function of destination node
// - streaming (1) or non-streaming (0)
// Edge transfers data between nodes within the same level of hierarchy.
// - source and destination dataflow nodes
// - edge type, all-all (1) or one-one(0)
// - source position (in output struct of source node)
// - destination position (in argument list of destination node)
// - streaming (1) or non-streaming (0)
// scale_fxp inputs
__visc__bindIn(scaleNode, 0, 0, 0); // input -> ScNode:input
__visc__bindIn(scaleNode, 1, 1, 0); // bytes_input -> ScNode:bytes_input
// Similar to bindIn, but for the output. Output of a node is a struct, and
// we consider the fields in increasing ordering.
__visc__bindOut(scaleNode, 0, 0, 0);
}
int main(int argc, char* argv[]) {
size_t input_size = 100;
size_t result_size = 100;
size_t input_bytes = input_size * sizeof(float);
size_t result_bytes = result_size * sizeof(float);
// This is host_input in cam_pipe()
float* input = (float*) malloc(input_bytes);
for(unsigned int i = 0; i < input_size; i++){
input[i] = 1.0;
}
// This is host_result in cam_pipe()
float* result = (float*) malloc(result_bytes);
__visc__init();
RootIn* rootArgs = (RootIn*) malloc(sizeof(RootIn));
// Set up HPVM DFG inputs in the rootArgs struct.
rootArgs->input = input;
rootArgs->bytes_input = input_size;
printf("input = %d input_bytes = %d \n", input, input_bytes);
rootArgs->result = result;
rootArgs->bytes_result = result_size;
llvm_visc_track_mem(input, input_bytes);
llvm_visc_track_mem(result, result_bytes);
void* testDFG = __visc__launch(0, graphRoot, (void*) rootArgs);
__visc__wait(testDFG);
printf("input = %d \n", input);
llvm_visc_request_mem(input, input_bytes);
//llvm_visc_request_mem(result, result_bytes);
printf("requested mem \n");
for(unsigned int i = 0; i < input_size; i++){
printf("input[%d] = %f \n", i, input[i]);
}
//llvm_visc_untrack_mem(input);
//llvm_visc_untrack_mem(result);
printf ("untracked mem \n");
__visc__cleanup();
printf ("cleaned up visc");
return 0;
}
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifndef DEVICE
#define DEVICE GPU_TARGET
#endif
#include "llvm/SupportVISC/VISCHint.h"
#ifdef __cplusplus
extern "C" {
void __visc__hint(visc::Target);
//void __visc__wait(void*);
#else
void __visc__hint(enum Target);
//void __visc__wait(unsigned);
#endif
#ifdef __cplusplus
//void* __visc__node(...);
//void* __visc__createNode(...);
//void* __visc__createNode1D(...);
//void* __visc__createNode2D(...);
//void* __visc__createNode3D(...);
//void __visc__return(...);
#endif
void* __visc__createNodeND(unsigned,...);
void __visc__return(unsigned, ...);
void __visc__attributes(unsigned, ...);
void __visc__init();
void __visc__cleanup();
void __visc__bindIn(void*, unsigned, unsigned, unsigned);
void __visc__bindOut(void*, unsigned, unsigned, unsigned);
void* __visc__edge(void*, void*, unsigned, unsigned, unsigned, unsigned);
void __visc__push(void*, void*);
void* __visc__pop(void*);
void* __visc__launch(unsigned, ...);
void __visc__wait(void*);
void* __visc__getNode();
void* __visc__getParentNode(void*);
void __visc__barrier();
void* __visc__malloc(long);
long __visc__getNodeInstanceID_x(void*);
long __visc__getNodeInstanceID_y(void*);
long __visc__getNodeInstanceID_z(void*);
long __visc__getNumNodeInstances_x(void*);
long __visc__getNumNodeInstances_y(void*);
long __visc__getNumNodeInstances_z(void*);
// Atomic
// signed int
int __visc__atomic_cmpxchg(int*, int, int);
int __visc__atomic_add(int*, int);
int __visc__atomic_sub(int*, int);
int __visc__atomic_xchg(int*, int);
int __visc__atomic_inc(int*);
int __visc__atomic_dec(int*);
int __visc__atomic_min(int*, int);
int __visc__atomic_max(int*, int);
int __visc__atomic_umax(int*, int);
int __visc__atomic_umin(int*, int);
int __visc__atomic_and(int*, int);
int __visc__atomic_or(int*, int);
int __visc__atomic_xor(int*, int);
// Special Func
float __visc__floor(float);
float __visc__rsqrt(float);
float __visc__sqrt(float);
float __visc__sin(float);
float __visc__cos(float);
// unsigned int
//unsigned __visc__atomic_cmpxchg(unsigned*, unsigned, unsigned);
//unsigned __visc__atomic_add(unsigned*, unsigned);
//unsigned __visc__atomic_sub(unsigned*, unsigned);
//unsigned __visc__atomic_xchg(unsigned*, unsigned);
//unsigned __visc__atomic_inc(unsigned*);
//unsigned __visc__atomic_dec(unsigned*);
//unsigned __visc__atomic_min(unsigned*, unsigned);
//unsigned __visc__atomic_max(unsigned*, unsigned);
//unsigned __visc__atomic_and(unsigned*, unsigned);
//unsigned __visc__atomic_or(unsigned*, unsigned);
//unsigned __visc__atomic_xor(unsigned*, unsigned);
#include <unistd.h>
long get_global_id(int);
long get_group_id(int);
long get_local_id(int);
long get_local_size(int);
void llvm_visc_track_mem(void*, size_t);
void llvm_visc_untrack_mem(void*);
void llvm_visc_request_mem(void*, size_t);
#ifdef __cplusplus
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment