diff --git a/hpvm/test/parboil/benchmarks/sgemm/Makefile b/hpvm/test/parboil/benchmarks/sgemm/Makefile index 9069d99772909f1a7414fd9364bddbe9439988d5..ace9ded22b6ef365c9cd0f6262245dd2e086643d 100644 --- a/hpvm/test/parboil/benchmarks/sgemm/Makefile +++ b/hpvm/test/parboil/benchmarks/sgemm/Makefile @@ -19,7 +19,7 @@ BIN = $(addsuffix -$(VERSION), $(APP)) SRCDIR = src/$(VERSION) BUILDDIR = build/$(VERSION)_$(PLATFORM) -DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP) +DATASET_DIR ?= $(PARBOIL_ROOT)/datasets/$(APP) MATRIX1 = $(DATASET_DIR)/$(TEST)/input/matrix1.txt MATRIX2 = $(DATASET_DIR)/$(TEST)/input/matrix2.txt diff --git a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc b/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc index 22d9098646f3ca1e8582a319e93e68e9a58e42d8..af9ee76e0fed3ced9e2666193afbd7c0631f1ce8 100644 --- a/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc +++ b/hpvm/test/parboil/benchmarks/sgemm/src/visc/main.cc @@ -279,4 +279,3 @@ int main (int argc, char *argv[]) { return 0; } - diff --git a/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp b/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp index 74d2f44495290de30a624a3606d64b56ddc92b63..11a02fdd488acbaf9c080cab63e968b3310e180e 100644 --- a/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp +++ b/hpvm/test/parboil/benchmarks/stencil/src/visc/stencil.cpp @@ -33,14 +33,41 @@ static int read_data(float *A0, int nx,int ny,int nz,FILE *fp) return 0; } -void naive_kernel(float c0,float c1, float* A0, float *Anext,int nx,int ny,int nz) +typedef struct __attribute__((__packed__)) { + float c0, c1; + float* A0; size_t bytes_A0; + float* Anext; size_t bytes_Anext; + int nx, ny, nz; + size_t dim_X1, dim_Y1, dim_Z1; + size_t dim_X2, dim_Y2, dim_Z2; +} RootIn; + +void naive_kernel(float c0, float c1, + float* A0, size_t bytes_A0, float* Anext, size_t bytes_Anext, + int nx, int ny, int nz) { __visc__hint(visc::DEVICE); __visc__attributes(2, A0, Anext, 1, Anext); - int i = get_global_id(0)+1; - int j = get_global_id(1)+1; - int k = get_global_id(2)+1; + void* thisNode = __visc__getNode(); + void* parentNode = __visc__getParentNode(thisNode); + + int lx = __visc__getNodeInstanceID_x(thisNode); + int ly = __visc__getNodeInstanceID_y(thisNode); + int lz = __visc__getNodeInstanceID_z(thisNode); + + int gx = __visc__getNodeInstanceID_x(parentNode); + int gy = __visc__getNodeInstanceID_y(parentNode); + int gz = __visc__getNodeInstanceID_z(parentNode); + + int gridx = __visc__getNumNodeInstances_x(thisNode); + int gridy = __visc__getNumNodeInstances_y(thisNode); + int gridz = __visc__getNumNodeInstances_z(thisNode); + + int i = gx * gridx + lx + 1; + int j = gy * gridy + ly + 1; + int k = gz * gridz + lz + 1; + if(i<nx-1) { Anext[Index3D (nx, ny, i, j, k)] = c1 * @@ -54,6 +81,74 @@ void naive_kernel(float c0,float c1, float* A0, float *Anext,int nx,int ny,int n } } +void stencilLvl1(float c0, float c1, + float* A0, size_t bytes_A0, float* Anext, size_t bytes_Anext, + int nx, int ny, int nz, + size_t dim_X1, size_t dim_Y1, size_t dim_Z1) +{ + __visc__hint(visc::DEVICE); + __visc__attributes(2, A0, Anext, 1, Anext); + void* stencil_node = __visc__createNodeND(3, naive_kernel, dim_X1, dim_Y1, dim_Z1); + __visc__bindIn(stencil_node, 0, 0, 0); + __visc__bindIn(stencil_node, 1, 1, 0); + __visc__bindIn(stencil_node, 2, 2, 0); + __visc__bindIn(stencil_node, 3, 3, 0); + __visc__bindIn(stencil_node, 4, 4, 0); + __visc__bindIn(stencil_node, 5, 5, 0); + __visc__bindIn(stencil_node, 6, 6, 0); + __visc__bindIn(stencil_node, 7, 7, 0); + __visc__bindIn(stencil_node, 8, 8, 0); +} + +void stencilLvl2(float c0, float c1, + float* A0, size_t bytes_A0, float* Anext, size_t bytes_Anext, + int nx, int ny, int nz, + size_t dim_X1, size_t dim_Y1, size_t dim_Z1, + size_t dim_X2, size_t dim_Y2, size_t dim_Z2) +{ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(2, A0, Anext, 1, Anext); + void* stencil_node = __visc__createNodeND(3, stencilLvl1, dim_X2, dim_Y2, dim_Z2); + __visc__bindIn(stencil_node, 0, 0, 0); + __visc__bindIn(stencil_node, 1, 1, 0); + __visc__bindIn(stencil_node, 2, 2, 0); + __visc__bindIn(stencil_node, 3, 3, 0); + __visc__bindIn(stencil_node, 4, 4, 0); + __visc__bindIn(stencil_node, 5, 5, 0); + __visc__bindIn(stencil_node, 6, 6, 0); + __visc__bindIn(stencil_node, 7, 7, 0); + __visc__bindIn(stencil_node, 8, 8, 0); + __visc__bindIn(stencil_node, 9, 9, 0); + __visc__bindIn(stencil_node, 10, 10, 0); + __visc__bindIn(stencil_node, 11, 11, 0); +} + +void stencilLvl3(float c0, float c1, + float* A0, size_t bytes_A0, float* Anext, size_t bytes_Anext, + int nx, int ny, int nz, + size_t dim_X1, size_t dim_Y1, size_t dim_Z1, + size_t dim_X2, size_t dim_Y2, size_t dim_Z2) +{ + __visc__hint(visc::CPU_TARGET); + __visc__attributes(2, A0, Anext, 1, Anext); + void* stencil_node = __visc__createNodeND(0, stencilLvl2); + __visc__bindIn(stencil_node, 0, 0, 0); + __visc__bindIn(stencil_node, 1, 1, 0); + __visc__bindIn(stencil_node, 2, 2, 0); + __visc__bindIn(stencil_node, 3, 3, 0); + __visc__bindIn(stencil_node, 4, 4, 0); + __visc__bindIn(stencil_node, 5, 5, 0); + __visc__bindIn(stencil_node, 6, 6, 0); + __visc__bindIn(stencil_node, 7, 7, 0); + __visc__bindIn(stencil_node, 8, 8, 0); + __visc__bindIn(stencil_node, 9, 9, 0); + __visc__bindIn(stencil_node, 10, 10, 0); + __visc__bindIn(stencil_node, 11, 11, 0); + __visc__bindIn(stencil_node, 12, 12, 0); + __visc__bindIn(stencil_node, 13, 13, 0); + __visc__bindIn(stencil_node, 14, 14, 0); +} + int main(int argc, char** argv) { struct pb_TimerSet timers; struct pb_Parameters *parameters; @@ -130,7 +225,7 @@ int main(int argc, char** argv) { //size_t grid[3] = {nx-2,ny-2,nz-2}; size_t offset[3] = {1,1,1}; - printf("grid(%d, %d, %d), block(%d, %d, %d)\n", grid[0], grid[1], grid[2], block[0], block[1], block[2]); + printf("grid(%ld, %ld, %ld), block(%ld, %ld, %ld)\n", grid[0], grid[1], grid[2], block[0], block[1], block[2]); //main execution int t; @@ -140,14 +235,25 @@ int main(int argc, char** argv) { for(t=0; t<iteration; t++) { pb_SwitchToTimer(&timers, pb_TimerID_NONE); - void* stencilDFG = __visc__node(naive_kernel, 2, 3, block[0], block[1], block[2], grid[0]/block[0], grid[1]/block[1], grid[2]/block[2], 9, (float)c0, (float)c1, h_A0, bytes, h_Anext, bytes, nx, ny, nz, 0); + + void* root_in = malloc(sizeof(RootIn)); + RootIn root_in_local = { + c0, c1, + h_A0, bytes, + h_Anext, bytes, + nx, ny, nz, + block[0], block[1], block[2], + grid[0]/block[0], grid[1]/block[1], grid[2]/block[2] + }; + *(RootIn*)root_in = root_in_local; + void* stencilDFG = __visc__launch(0, stencilLvl3, root_in); + __visc__wait(stencilDFG); //printf("iteration %d\n",t); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); float* h_temp = h_A0; h_A0 = h_Anext; h_Anext = h_temp; - }