From 6690f9e7e8b46b96aea222d3e85315cd63545953 Mon Sep 17 00:00:00 2001 From: Ben Schreiber <bjschre2@illinois.edu> Date: Sat, 18 Jan 2020 16:07:11 -0600 Subject: [PATCH] Port spmv and lbm --- hpvm/test/parboil/benchmarks/lbm/Makefile | 2 +- .../benchmarks/lbm/src/visc/lbm_macros.h | 2 +- .../parboil/benchmarks/lbm/src/visc/main.cpp | 69 +++++++- hpvm/test/parboil/benchmarks/spmv/Makefile | 2 +- .../parboil/benchmarks/spmv/src/visc/main.cpp | 165 +++++++++++++++--- 5 files changed, 207 insertions(+), 33 deletions(-) diff --git a/hpvm/test/parboil/benchmarks/lbm/Makefile b/hpvm/test/parboil/benchmarks/lbm/Makefile index 5644a7d42c..ee7bcb33b4 100644 --- a/hpvm/test/parboil/benchmarks/lbm/Makefile +++ b/hpvm/test/parboil/benchmarks/lbm/Makefile @@ -1,4 +1,4 @@ -PARBOIL_ROOT = $(LLVM_SRC_ROOT)/test/VISC/parboil +PARBOIL_ROOT = $(LLVM_SRC_ROOT)/../test/parboil APP = lbm ifeq ($(NUM_CORES),) diff --git a/hpvm/test/parboil/benchmarks/lbm/src/visc/lbm_macros.h b/hpvm/test/parboil/benchmarks/lbm/src/visc/lbm_macros.h index 2f8ba8a09c..d8ceb373df 100644 --- a/hpvm/test/parboil/benchmarks/lbm/src/visc/lbm_macros.h +++ b/hpvm/test/parboil/benchmarks/lbm/src/visc/lbm_macros.h @@ -6,7 +6,7 @@ *cr ***************************************************************************/ -#ifndef _LBM_MARCOS_H +#ifndef _LBM_MACROS_H_ #define _LBM_MACROS_H_ #define OMEGA (1.95f) diff --git a/hpvm/test/parboil/benchmarks/lbm/src/visc/main.cpp b/hpvm/test/parboil/benchmarks/lbm/src/visc/main.cpp index 255036539c..605513847a 100644 --- a/hpvm/test/parboil/benchmarks/lbm/src/visc/main.cpp +++ b/hpvm/test/parboil/benchmarks/lbm/src/visc/main.cpp @@ -82,21 +82,35 @@ void MAIN_printInfo( const MAIN_Param* param ) { } /*############################################################################*/ -void performStreamCollide_kernel( float* srcG, float* dstG ) + +typedef struct __attribute__((__packed__)) { + float* srcG; size_t bytes_srcG; + float* dstG; size_t bytes_dstG; + size_t dim_X1, dim_X2, dim_Y2; +} RootIn; + +void performStreamCollide_kernel( float* srcG, size_t bytes_srcG, float* dstG, size_t bytes_dstG ) { __visc__hint(visc::DEVICE); __visc__attributes(2, srcG, dstG, 1, dstG); + + void* thisNode = __visc__getNode(); + void* parentNode = __visc__getParentNode(thisNode); + srcG += MARGIN; dstG += MARGIN; + int lx = __visc__getNodeInstanceID_x(thisNode); + int gx = __visc__getNodeInstanceID_x(parentNode); + int gy = __visc__getNodeInstanceID_y(parentNode); //Using some predefined macros here. Consider this the declaration // and initialization of the variables SWEEP_X, SWEEP_Y and SWEEP_Z SWEEP_VAR - SWEEP_X = get_local_id(0); - SWEEP_Y = get_group_id(0); - SWEEP_Z = get_group_id(1); + SWEEP_X = lx; // get_local_id(0) + SWEEP_Y = gx; // get_group_id(0) + SWEEP_Z = gy; // get_group_id(1) float temp_swp, tempC, tempN, tempS, tempE, tempW, tempT, tempB; float tempNE, tempNW, tempSE, tempSW, tempNT, tempNB, tempST ; @@ -262,13 +276,58 @@ void performStreamCollide_kernel( float* srcG, float* dstG ) DST_WB( dstG ) = tempWB; } +void lbmLvl1(float* srcG, size_t bytes_srcG, float* dstG, size_t bytes_dstG, size_t dim_X1) +{ + __visc__hint(visc::DEVICE); + __visc__attributes(2, srcG, dstG, 1, dstG); + void* lbm_node = __visc__createNodeND(1, performStreamCollide_kernel, dim_X1); + __visc__bindIn(lbm_node, 0, 0, 0); + __visc__bindIn(lbm_node, 1, 1, 0); + __visc__bindIn(lbm_node, 2, 2, 0); + __visc__bindIn(lbm_node, 3, 3, 0); +} + +void lbmLvl2(float* srcG, size_t bytes_srcG, float* dstG, size_t bytes_dstG, size_t dim_X1, size_t dim_X2, size_t dim_Y2) +{ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(2, srcG, dstG, 1, dstG); + void* lbm_node = __visc__createNodeND(2, lbmLvl1, dim_X2, dim_Y2); + __visc__bindIn(lbm_node, 0, 0, 0); + __visc__bindIn(lbm_node, 1, 1, 0); + __visc__bindIn(lbm_node, 2, 2, 0); + __visc__bindIn(lbm_node, 3, 3, 0); + __visc__bindIn(lbm_node, 4, 4, 0); +} + +void lbmLvl3(float* srcG, size_t bytes_srcG, float* dstG, size_t bytes_dstG, size_t dim_X1, size_t dim_X2, size_t dim_Y2) +{ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(2, srcG, dstG, 1, dstG); + void* lbm_node = __visc__createNodeND(0, lbmLvl2); + __visc__bindIn(lbm_node, 0, 0, 0); + __visc__bindIn(lbm_node, 1, 1, 0); + __visc__bindIn(lbm_node, 2, 2, 0); + __visc__bindIn(lbm_node, 3, 3, 0); + __visc__bindIn(lbm_node, 4, 4, 0); + __visc__bindIn(lbm_node, 5, 5, 0); + __visc__bindIn(lbm_node, 6, 6, 0); +} __attribute__((noinline)) void MAIN_performStreamCollide( LBM_Grid src, LBM_Grid dst ) { long dimBlock[3] = {SIZE_X,1,1}; long dimGrid[3] = {SIZE_X*SIZE_Y,SIZE_Z,1}; size_t size = TOTAL_PADDED_CELLS*N_CELL_ENTRIES*sizeof( float ); - void* lbmDFG = __visc__node(performStreamCollide_kernel, 2, 3, dimBlock[0], dimBlock[1], dimBlock[2], dimGrid[0]/dimBlock[0], dimGrid[1]/dimBlock[1], dimGrid[2]/dimBlock[2], 4, src-MARGIN, size, dst-MARGIN, size, 0); + + void* root_in = malloc(sizeof(RootIn)); + RootIn root_in_local = { + src - MARGIN, size, + dst - MARGIN, size, + SIZE_X, SIZE_Y, SIZE_Z + }; + *(RootIn*)root_in = root_in_local; + void* lbmDFG = __visc__launch(0, lbmLvl3, root_in); + __visc__wait(lbmDFG); } diff --git a/hpvm/test/parboil/benchmarks/spmv/Makefile b/hpvm/test/parboil/benchmarks/spmv/Makefile index e7ef8ca39c..71e2246343 100644 --- a/hpvm/test/parboil/benchmarks/spmv/Makefile +++ b/hpvm/test/parboil/benchmarks/spmv/Makefile @@ -1,4 +1,4 @@ -PARBOIL_ROOT = $(LLVM_SRC_ROOT)/test/VISC/parboil +PARBOIL_ROOT = $(LLVM_SRC_ROOT)/../test/parboil APP = spmv # Default compile visc diff --git a/hpvm/test/parboil/benchmarks/spmv/src/visc/main.cpp b/hpvm/test/parboil/benchmarks/spmv/src/visc/main.cpp index bd457e6e13..eb4d4d7dbe 100644 --- a/hpvm/test/parboil/benchmarks/spmv/src/visc/main.cpp +++ b/hpvm/test/parboil/benchmarks/spmv/src/visc/main.cpp @@ -31,17 +31,38 @@ static int generate_vector(float *x_vector, int dim) return 0; } -void spmv_jds(float *dst_vector, float *d_data, - int *d_index, int *d_perm, - float *x_vec, int dim, - int *jds_ptr_int, - int *sh_zcnt_int) +typedef struct __attribute__((__packed__)) { + float* dst_vector; size_t bytes_dst_vector; + float* d_data; size_t bytes_d_data; + int* d_index; size_t bytes_d_index; + int* d_perm; size_t bytes_d_perm; + float* x_vec; size_t bytes_x_vec; + int dim; + int* jds_ptr_int; size_t bytes_jds_ptr_int; + int* sh_zcnt_int; size_t bytes_sh_zcnt_int; + size_t dim_X1, dim_X2; +} RootIn; + +void spmv_jds(float* dst_vector, size_t bytes_dst_vector, + float* d_data, size_t bytes_d_data, + int* d_index, size_t bytes_d_index, + int* d_perm, size_t bytes_d_perm, + float* x_vec, size_t bytes_x_vec, + int dim, + int* jds_ptr_int, size_t bytes_jds_ptr_int, + int* sh_zcnt_int, size_t bytes_sh_zcnt_int) { __visc__hint(visc::DEVICE); __visc__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, sh_zcnt_int, 1, dst_vector); - int ix = get_global_id(0); + void* thisNode = __visc__getNode(); + void* parentNode = __visc__getParentNode(thisNode); + int lx = __visc__getNodeInstanceID_x(thisNode); + int gx = __visc__getNodeInstanceID_x(parentNode); + int gridx = __visc__getNumNodeInstances_x(thisNode); + + int ix = gx * gridx + lx; int warp_id=ix>>WARP_BITS; if(ix<dim) @@ -99,6 +120,102 @@ void spmv_jds(float *dst_vector, float *d_data, } } +void spmvLvl1(float* dst_vector, size_t bytes_dst_vector, + float* d_data, size_t bytes_d_data, + int* d_index, size_t bytes_d_index, + int* d_perm, size_t bytes_d_perm, + float* x_vec, size_t bytes_x_vec, + int dim, + int* jds_ptr_int, size_t bytes_jds_ptr_int, + int* sh_zcnt_int, size_t bytes_sh_zcnt_int, + size_t dim_X1) +{ + __visc__hint(visc::DEVICE); + __visc__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, sh_zcnt_int, + 1, dst_vector); + void* spmv_node = __visc__createNodeND(1, spmv_jds, dim_X1); + __visc__bindIn(spmv_node, 0, 0, 0); + __visc__bindIn(spmv_node, 1, 1, 0); + __visc__bindIn(spmv_node, 2, 2, 0); + __visc__bindIn(spmv_node, 3, 3, 0); + __visc__bindIn(spmv_node, 4, 4, 0); + __visc__bindIn(spmv_node, 5, 5, 0); + __visc__bindIn(spmv_node, 6, 6, 0); + __visc__bindIn(spmv_node, 7, 7, 0); + __visc__bindIn(spmv_node, 8, 8, 0); + __visc__bindIn(spmv_node, 9, 9, 0); + __visc__bindIn(spmv_node, 10, 10, 0); + __visc__bindIn(spmv_node, 11, 11, 0); + __visc__bindIn(spmv_node, 12, 12, 0); + __visc__bindIn(spmv_node, 13, 13, 0); + __visc__bindIn(spmv_node, 14, 14, 0); +} + +void spmvLvl2(float* dst_vector, size_t bytes_dst_vector, + float* d_data, size_t bytes_d_data, + int* d_index, size_t bytes_d_index, + int* d_perm, size_t bytes_d_perm, + float* x_vec, size_t bytes_x_vec, + int dim, + int* jds_ptr_int, size_t bytes_jds_ptr_int, + int* sh_zcnt_int, size_t bytes_sh_zcnt_int, + size_t dim_X1, size_t dim_X2) +{ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, sh_zcnt_int, + 1, dst_vector); + void* spmv_node = __visc__createNodeND(1, spmvLvl1, dim_X2); + __visc__bindIn(spmv_node, 0, 0, 0); + __visc__bindIn(spmv_node, 1, 1, 0); + __visc__bindIn(spmv_node, 2, 2, 0); + __visc__bindIn(spmv_node, 3, 3, 0); + __visc__bindIn(spmv_node, 4, 4, 0); + __visc__bindIn(spmv_node, 5, 5, 0); + __visc__bindIn(spmv_node, 6, 6, 0); + __visc__bindIn(spmv_node, 7, 7, 0); + __visc__bindIn(spmv_node, 8, 8, 0); + __visc__bindIn(spmv_node, 9, 9, 0); + __visc__bindIn(spmv_node, 10, 10, 0); + __visc__bindIn(spmv_node, 11, 11, 0); + __visc__bindIn(spmv_node, 12, 12, 0); + __visc__bindIn(spmv_node, 13, 13, 0); + __visc__bindIn(spmv_node, 14, 14, 0); + __visc__bindIn(spmv_node, 15, 15, 0); +} + +void spmvLvl3(float* dst_vector, size_t bytes_dst_vector, + float* d_data, size_t bytes_d_data, + int* d_index, size_t bytes_d_index, + int* d_perm, size_t bytes_d_perm, + float* x_vec, size_t bytes_x_vec, + int dim, + int* jds_ptr_int, size_t bytes_jds_ptr_int, + int* sh_zcnt_int, size_t bytes_sh_zcnt_int, + size_t dim_X1, size_t dim_X2) +{ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(7, dst_vector, d_data, d_index, d_perm, x_vec, jds_ptr_int, sh_zcnt_int, + 1, dst_vector); + void* spmv_node = __visc__createNodeND(1, spmvLvl2, dim_X2); + __visc__bindIn(spmv_node, 0, 0, 0); + __visc__bindIn(spmv_node, 1, 1, 0); + __visc__bindIn(spmv_node, 2, 2, 0); + __visc__bindIn(spmv_node, 3, 3, 0); + __visc__bindIn(spmv_node, 4, 4, 0); + __visc__bindIn(spmv_node, 5, 5, 0); + __visc__bindIn(spmv_node, 6, 6, 0); + __visc__bindIn(spmv_node, 7, 7, 0); + __visc__bindIn(spmv_node, 8, 8, 0); + __visc__bindIn(spmv_node, 9, 9, 0); + __visc__bindIn(spmv_node, 10, 10, 0); + __visc__bindIn(spmv_node, 11, 11, 0); + __visc__bindIn(spmv_node, 12, 12, 0); + __visc__bindIn(spmv_node, 13, 13, 0); + __visc__bindIn(spmv_node, 14, 14, 0); + __visc__bindIn(spmv_node, 15, 15, 0); + __visc__bindIn(spmv_node, 16, 16, 0); +} + int main(int argc, char** argv) { struct pb_TimerSet timers; struct pb_Parameters *parameters; @@ -185,24 +302,22 @@ int main(int argc, char** argv) { for(i=0; i<50; i++) { pb_SwitchToTimer(&timers, pb_TimerID_NONE); - void* spmvDFG = __visc__node(spmv_jds, 2, 1, block, (grid/block), - 15, - h_Ax_vector, - dim*sizeof(float), - h_data, - len*sizeof(float), - h_indices, - len*sizeof(int), - h_perm, - dim*sizeof(int), - h_x_vector, - dim*sizeof(float), - dim, - h_ptr, - depth*sizeof(int), - h_nzcnt, - nzcnt_len*sizeof(int), - 0); + + void* root_in = malloc(sizeof(RootIn)); + RootIn root_in_local = { + h_Ax_vector, dim * sizeof(float), + h_data, len * sizeof(float), + h_indices, len * sizeof(int), + h_perm, dim * sizeof(int), + h_x_vector, dim * sizeof(float), + dim, + h_ptr, depth * sizeof(int), + h_nzcnt, nzcnt_len * sizeof(int), + block, (grid/block) + }; + *(RootIn*)root_in = root_in_local; + void* spmvDFG = __visc__launch(0, spmvLvl3, root_in); + __visc__wait(spmvDFG); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); @@ -224,7 +339,7 @@ int main(int argc, char** argv) { llvm_visc_request_mem(h_Ax_vector, dim*sizeof(float)); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); + pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); llvm_visc_untrack_mem(h_Ax_vector); llvm_visc_untrack_mem(h_data); -- GitLab