diff --git a/.gitignore b/.gitignore index ed521a76d8d20f96025eaf7304abc13fe4bc0344..70639e367bc68fedc132d3b39f4c606e4f8a532f 100644 --- a/.gitignore +++ b/.gitignore @@ -28,10 +28,10 @@ lit.site.cfg *.dot JITTests.exports -hpvm/build -hpvm/install -llvm/ -llvm-*.src/ -llvm-*.src.tar.xz +hpvm/build/ +hpvm/install/ +hpvm/llvm/ +hpvm/llvm-*.src.tar.xz +hpvm/llvm-*.src/ hpvm/projects/visc-rt/visc-rt.ll hpvm/test/parboil/benchmarks/*/build/ diff --git a/hpvm/Makefile b/hpvm/Makefile deleted file mode 100644 index a4dab2b2426ab196333b921457eb530608cf9d5d..0000000000000000000000000000000000000000 --- a/hpvm/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -all: - /bin/bash llvm_installer/llvm_installer.sh diff --git a/hpvm/install.h b/hpvm/install.h new file mode 100644 index 0000000000000000000000000000000000000000..359f3d8837515659def90969bf6a6e4a1bca4579 --- /dev/null +++ b/hpvm/install.h @@ -0,0 +1,3 @@ + +# Run installer script +/bin/bash llvm_installer/llvm_installer.sh diff --git a/hpvm/llvm_installer/llvm_installer.sh b/hpvm/llvm_installer/llvm_installer.sh index 1740e84f8df06f526dd65719a57abc5845e72e52..6912c51f1137fc7ffc49912899691ec89d0441eb 100755 --- a/hpvm/llvm_installer/llvm_installer.sh +++ b/hpvm/llvm_installer/llvm_installer.sh @@ -11,7 +11,7 @@ CURRENT_DIR=`pwd` INSTALL_DIR=`pwd`/install BUILD_DIR=$CURRENT_DIR/$LLVM_SRC/build -NUM_THREADS=12 +NUM_THREADS=16 SUFFIX=".tar.xz" CLANG_SRC="cfe-$VERSION.src" @@ -178,7 +178,5 @@ else fi cd $CURRENT_DIR -echo "Add $INSTALL_DIR/bin to PATH variable." -export PATH="$INSTALL_DIR/bin:$PATH" diff --git a/hpvm/projects/visc-rt/makefile b/hpvm/projects/visc-rt/makefile index 01cc2b7b3fd548f918ea8f43ad5c5bbd4642edc6..e0f7e3b2c0311362f49b7be1915bb54a4f0adf71 100644 --- a/hpvm/projects/visc-rt/makefile +++ b/hpvm/projects/visc-rt/makefile @@ -2,12 +2,14 @@ LLVM_BUILD_ROOT = ${LLVM_SRC_ROOT}/../build/ OPENCL_INC_PATH = /opt/intel/opencl-sdk/include +CUDA_INC_PATH = /software/cuda-9.1/include/CL/ + ifeq ($(NUM_CORES),) NUM_CORES=8 endif -CPP_FLAGS = -I $(LLVM_SRC_ROOT)/include -I $(LLVM_BUILD_ROOT)/include -I $(OPENCL_INC_PATH) -std=c++11 -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS +CPP_FLAGS = -I $(LLVM_SRC_ROOT)/include -I $(LLVM_BUILD_ROOT)/include -I $(OPENCL_INC_PATH) -I $(CUDA_INC_PATH) -std=c++11 -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS TARGET:=visc-rt LLVM_CC:=$(LLVM_BUILD_ROOT)/bin/clang diff --git a/hpvm/test/parboil/benchmarks/sgemm/Makefile b/hpvm/test/parboil/benchmarks/sgemm/Makefile index 9069d99772909f1a7414fd9364bddbe9439988d5..ace9ded22b6ef365c9cd0f6262245dd2e086643d 100644 --- a/hpvm/test/parboil/benchmarks/sgemm/Makefile +++ b/hpvm/test/parboil/benchmarks/sgemm/Makefile @@ -19,7 +19,7 @@ BIN = $(addsuffix -$(VERSION), $(APP)) SRCDIR = src/$(VERSION) BUILDDIR = build/$(VERSION)_$(PLATFORM) -DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP) +DATASET_DIR ?= $(PARBOIL_ROOT)/datasets/$(APP) MATRIX1 = $(DATASET_DIR)/$(TEST)/input/matrix1.txt MATRIX2 = $(DATASET_DIR)/$(TEST)/input/matrix2.txt diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc index 22d9098646f3ca1e8582a319e93e68e9a58e42d8..af9ee76e0fed3ced9e2666193afbd7c0631f1ce8 100644 --- a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc +++ b/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc @@ -279,4 +279,3 @@ int main (int argc, char *argv[]) { return 0; } - diff --git a/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp b/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp index 74d2f44495290de30a624a3606d64b56ddc92b63..9ecba96aed5642a4babaea8667576c25c1e4fb1f 100644 --- a/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp +++ b/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp @@ -33,14 +33,41 @@ static int read_data(float *A0, int nx,int ny,int nz,FILE *fp) return 0; } -void naive_kernel(float c0,float c1, float* A0, float *Anext,int nx,int ny,int nz) +typedef struct __attribute__((__packed__)) { + float c0, c1; + float* A0; size_t bytes_A0; + float* Anext; size_t bytes_Anext; + int nx, ny, nz; + size_t dim_X1, dim_Y1, dim_Z1; + size_t dim_X2, dim_Y2, dim_Z2; +} RootIn; + +void naive_kernel(float c0, float c1, + float* A0, size_t bytes_A0, float* Anext, size_t bytes_Anext, + int nx, int ny, int nz) { __visc__hint(visc::DEVICE); __visc__attributes(2, A0, Anext, 1, Anext); - int i = get_global_id(0)+1; - int j = get_global_id(1)+1; - int k = get_global_id(2)+1; + void* thisNode = __visc__getNode(); + void* parentNode = __visc__getParentNode(thisNode); + + int lx = __visc__getNodeInstanceID_x(thisNode); + int ly = __visc__getNodeInstanceID_y(thisNode); + int lz = __visc__getNodeInstanceID_z(thisNode); + + int gx = __visc__getNodeInstanceID_x(parentNode); + int gy = __visc__getNodeInstanceID_y(parentNode); + int gz = __visc__getNodeInstanceID_z(parentNode); + + int gridx = __visc__getNumNodeInstances_x(thisNode); + int gridy = __visc__getNumNodeInstances_y(thisNode); + int gridz = __visc__getNumNodeInstances_z(thisNode); + + int i = gx * gridx + lx + 1; + int j = gy * gridy + ly + 1; + int k = gz * gridz + lz + 1; + if(i<nx-1) { Anext[Index3D (nx, ny, i, j, k)] = c1 * @@ -54,6 +81,74 @@ void naive_kernel(float c0,float c1, float* A0, float *Anext,int nx,int ny,int n } } +void stencilLvl1(float c0, float c1, + float* A0, size_t bytes_A0, float* Anext, size_t bytes_Anext, + int nx, int ny, int nz, + size_t dim_X1, size_t dim_Y1, size_t dim_Z1) +{ + __visc__hint(visc::DEVICE); + __visc__attributes(2, A0, Anext, 1, Anext); + void* stencil_node = __visc__createNodeND(3, naive_kernel, dim_X1, dim_Y1, dim_Z1); + __visc__bindIn(stencil_node, 0, 0, 0); + __visc__bindIn(stencil_node, 1, 1, 0); + __visc__bindIn(stencil_node, 2, 2, 0); + __visc__bindIn(stencil_node, 3, 3, 0); + __visc__bindIn(stencil_node, 4, 4, 0); + __visc__bindIn(stencil_node, 5, 5, 0); + __visc__bindIn(stencil_node, 6, 6, 0); + __visc__bindIn(stencil_node, 7, 7, 0); + __visc__bindIn(stencil_node, 8, 8, 0); +} + +void stencilLvl2(float c0, float c1, + float* A0, size_t bytes_A0, float* Anext, size_t bytes_Anext, + int nx, int ny, int nz, + size_t dim_X1, size_t dim_Y1, size_t dim_Z1, + size_t dim_X2, size_t dim_Y2, size_t dim_Z2) +{ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(2, A0, Anext, 1, Anext); + void* stencil_node = __visc__createNodeND(3, stencilLvl1, dim_X2, dim_Y2, dim_Z2); + __visc__bindIn(stencil_node, 0, 0, 0); + __visc__bindIn(stencil_node, 1, 1, 0); + __visc__bindIn(stencil_node, 2, 2, 0); + __visc__bindIn(stencil_node, 3, 3, 0); + __visc__bindIn(stencil_node, 4, 4, 0); + __visc__bindIn(stencil_node, 5, 5, 0); + __visc__bindIn(stencil_node, 6, 6, 0); + __visc__bindIn(stencil_node, 7, 7, 0); + __visc__bindIn(stencil_node, 8, 8, 0); + __visc__bindIn(stencil_node, 9, 9, 0); + __visc__bindIn(stencil_node, 10, 10, 0); + __visc__bindIn(stencil_node, 11, 11, 0); +} + +void stencilLvl3(float c0, float c1, + float* A0, size_t bytes_A0, float* Anext, size_t bytes_Anext, + int nx, int ny, int nz, + size_t dim_X1, size_t dim_Y1, size_t dim_Z1, + size_t dim_X2, size_t dim_Y2, size_t dim_Z2) +{ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(2, A0, Anext, 1, Anext); + void* stencil_node = __visc__createNodeND(0, stencilLvl2); + __visc__bindIn(stencil_node, 0, 0, 0); + __visc__bindIn(stencil_node, 1, 1, 0); + __visc__bindIn(stencil_node, 2, 2, 0); + __visc__bindIn(stencil_node, 3, 3, 0); + __visc__bindIn(stencil_node, 4, 4, 0); + __visc__bindIn(stencil_node, 5, 5, 0); + __visc__bindIn(stencil_node, 6, 6, 0); + __visc__bindIn(stencil_node, 7, 7, 0); + __visc__bindIn(stencil_node, 8, 8, 0); + __visc__bindIn(stencil_node, 9, 9, 0); + __visc__bindIn(stencil_node, 10, 10, 0); + __visc__bindIn(stencil_node, 11, 11, 0); + __visc__bindIn(stencil_node, 12, 12, 0); + __visc__bindIn(stencil_node, 13, 13, 0); + __visc__bindIn(stencil_node, 14, 14, 0); +} + int main(int argc, char** argv) { struct pb_TimerSet timers; struct pb_Parameters *parameters; @@ -124,13 +219,13 @@ int main(int argc, char** argv) { //only use 1D thread block - int tx =256; - long block[3] = {tx,1,1}; - long grid[3] = {(nx-2+tx-1)/tx*tx,ny-2,nz-2}; + size_t tx = 256; + size_t block[3] = {tx,1,1}; + size_t grid[3] = {((unsigned)nx-2+tx-1)/tx*tx,(unsigned)ny-2,(unsigned)nz-2}; //size_t grid[3] = {nx-2,ny-2,nz-2}; size_t offset[3] = {1,1,1}; - printf("grid(%d, %d, %d), block(%d, %d, %d)\n", grid[0], grid[1], grid[2], block[0], block[1], block[2]); + printf("grid(%ld, %ld, %ld), block(%ld, %ld, %ld)\n", grid[0], grid[1], grid[2], block[0], block[1], block[2]); //main execution int t; @@ -140,14 +235,25 @@ int main(int argc, char** argv) { for(t=0; t<iteration; t++) { pb_SwitchToTimer(&timers, pb_TimerID_NONE); - void* stencilDFG = __visc__node(naive_kernel, 2, 3, block[0], block[1], block[2], grid[0]/block[0], grid[1]/block[1], grid[2]/block[2], 9, (float)c0, (float)c1, h_A0, bytes, h_Anext, bytes, nx, ny, nz, 0); + + void* root_in = malloc(sizeof(RootIn)); + RootIn root_in_local = { + c0, c1, + h_A0, bytes, + h_Anext, bytes, + nx, ny, nz, + block[0], block[1], block[2], + grid[0]/block[0], grid[1]/block[1], grid[2]/block[2] + }; + *(RootIn*)root_in = root_in_local; + void* stencilDFG = __visc__launch(0, stencilLvl3, root_in); + __visc__wait(stencilDFG); //printf("iteration %d\n",t); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); float* h_temp = h_A0; h_A0 = h_Anext; h_Anext = h_temp; - }