From 877bca7eba4e82d9344248ee28514db113d6cea7 Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava <prakalps@gmail.com> Date: Tue, 14 Jul 2015 12:43:56 -0500 Subject: [PATCH] Start timers after IO done to increase accuracy of timers. --- llvm/test/VISC/parboil/.ycm_extra_conf.py | 1 + .../benchmarks/lbm/src/opencl_nvidia/main.c | 65 ++++++++++-- .../parboil/benchmarks/lbm/src/visc/main.c | 17 ++-- .../VISC/parboil/benchmarks/nodeSwap/Makefile | 9 +- .../nodeSwap/src/opencl/kernel_offline.cl | 25 +++-- .../benchmarks/nodeSwap/src/opencl/main.cc | 21 ++-- .../benchmarks/sgemm/src/opencl_base/main.cc | 49 +++++----- .../parboil/benchmarks/sgemm/src/visc/main.cc | 33 ++++--- .../benchmarks/spmv/src/opencl_nvidia/main.c | 98 ++++++++++--------- .../parboil/benchmarks/spmv/src/visc/main.c | 36 +++---- .../benchmarks/stencil/src/opencl_base/main.c | 42 ++++---- .../benchmarks/stencil/src/visc/stencil.c | 40 ++++---- llvm/test/VISC/parboil/common/include/visc.h | 2 + llvm/test/VISC/parboil/common/mk/opencl.mk | 2 +- llvm/test/VISC/parboil/common/mk/visc.mk | 7 ++ .../parboil/common/platform/visc.default.mk | 4 +- 16 files changed, 272 insertions(+), 179 deletions(-) diff --git a/llvm/test/VISC/parboil/.ycm_extra_conf.py b/llvm/test/VISC/parboil/.ycm_extra_conf.py index cb5dfd4b78..d8ca60904f 100644 --- a/llvm/test/VISC/parboil/.ycm_extra_conf.py +++ b/llvm/test/VISC/parboil/.ycm_extra_conf.py @@ -42,6 +42,7 @@ flags = [ # harmful '-isystem', '/home/psrivas2/current-test/parboil/common/include', '-I', '/home/psrivas2/current-test/parboil/common/include', + '-I', '/home/psrivas2/current-src/include' '-isystem', '/System/Library/Frameworks/Python.framework/Headers', '-isystem', '/usr/local/include', '-isystem', '/usr/local/include/eigen3', diff --git a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c index 7894ea9b70..c58dcc27e4 100644 --- a/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c +++ b/llvm/test/VISC/parboil/benchmarks/lbm/src/opencl_nvidia/main.c @@ -33,20 +33,53 @@ int main( int nArgs, char* arg[] ) { OpenCL_Param prm; - pb_InitializeTimerSet(&timers); struct pb_Parameters* params; params = pb_ReadParameters(&nArgs, arg); - static LBM_GridPtr TEMP_srcGrid; //Setup TEMP datastructures - LBM_allocateGrid( (float**) &TEMP_srcGrid ); MAIN_parseCommandLine( nArgs, arg, ¶m, params ); MAIN_printInfo( ¶m ); + /*MAIN_initialize( ¶m, &prm ); */ // This has been inlined + + static LBM_Grid TEMP_srcGrid, TEMP_dstGrid; + + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + //Setup TEMP datastructures + LBM_allocateGrid( (float**) &TEMP_srcGrid ); + LBM_allocateGrid( (float**) &TEMP_dstGrid ); + LBM_initializeGrid( TEMP_srcGrid ); + LBM_initializeGrid( TEMP_dstGrid ); + + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + if( param.obstacleFilename != NULL ) { + LBM_loadObstacleFile( TEMP_srcGrid, param.obstacleFilename ); + LBM_loadObstacleFile( TEMP_dstGrid, param.obstacleFilename ); + } + + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + LBM_initializeSpecialCellsForLDC( TEMP_srcGrid ); + LBM_initializeSpecialCellsForLDC( TEMP_dstGrid ); + + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + LBM_showGridStatistics( TEMP_srcGrid ); + + pb_InitializeTimerSet(&timers); pb_SwitchToTimer(&timers, visc_TimerID_SETUP); + OpenCL_initialize(&prm); - MAIN_initialize( ¶m, &prm ); + + //Setup DEVICE datastructures + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + OpenCL_LBM_allocateGrid( &prm, &OpenCL_srcGrid ); + OpenCL_LBM_allocateGrid( &prm, &OpenCL_dstGrid ); + + //Initialize DEVICE datastructures + OpenCL_LBM_initializeGrid( &prm, OpenCL_srcGrid, TEMP_srcGrid ); + OpenCL_LBM_initializeGrid( &prm, OpenCL_dstGrid, TEMP_dstGrid ); + + for( t = 1; t <= param.nTimeSteps; t++ ) { pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); @@ -63,13 +96,31 @@ int main( int nArgs, char* arg[] ) { } } - MAIN_finalize( ¶m, &prm ); + /*MAIN_finalize( ¶m, &prm );*/ // inlined - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - LBM_freeGrid( (float**) &TEMP_srcGrid ); + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + OpenCL_LBM_getDeviceGrid(&prm, OpenCL_srcGrid, TEMP_srcGrid); + + pb_SwitchToTimer(&timers, visc_TimerID_SETUP); + OpenCL_LBM_freeGrid( OpenCL_srcGrid ); + OpenCL_LBM_freeGrid( OpenCL_dstGrid ); + + clReleaseProgram(prm.clProgram); + clReleaseKernel(prm.clKernel); + clReleaseCommandQueue(prm.clCommandQueue); + clReleaseContext(prm.clContext); pb_SwitchToTimer(&timers, pb_TimerID_NONE); pb_PrintTimerSet(&timers); + + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + LBM_showGridStatistics( TEMP_srcGrid ); + LBM_storeVelocityField( TEMP_srcGrid, param.resultFilename, TRUE ); + + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + LBM_freeGrid( (float**) &TEMP_srcGrid ); + LBM_freeGrid( (float**) &TEMP_dstGrid ); + pb_FreeParameters(params); return 0; } diff --git a/llvm/test/VISC/parboil/benchmarks/lbm/src/visc/main.c b/llvm/test/VISC/parboil/benchmarks/lbm/src/visc/main.c index 0a9d87c708..251b10b243 100644 --- a/llvm/test/VISC/parboil/benchmarks/lbm/src/visc/main.c +++ b/llvm/test/VISC/parboil/benchmarks/lbm/src/visc/main.c @@ -32,8 +32,6 @@ int main( int nArgs, char* arg[] ) { MAIN_Param param; int t; - pb_InitializeTimerSet(&timers); - __visc__init(); struct pb_Parameters* params; params = pb_ReadParameters(&nArgs, arg); @@ -46,6 +44,9 @@ int main( int nArgs, char* arg[] ) { MAIN_initialize( ¶m ); + pb_InitializeTimerSet(&timers); + __visc__init(); + size_t size = TOTAL_PADDED_CELLS*N_CELL_ENTRIES*sizeof( float ); pb_SwitchToTimer(&timers, visc_TimerID_MEM_TRACK); llvm_visc_track_mem(srcGrid-MARGIN, size); @@ -72,18 +73,20 @@ int main( int nArgs, char* arg[] ) { pb_SwitchToTimer(&timers, pb_TimerID_COPY); llvm_visc_request_mem(srcGrid-MARGIN, size); - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - MAIN_finalize( ¶m ); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); llvm_visc_untrack_mem(srcGrid-MARGIN); llvm_visc_untrack_mem(dstGrid-MARGIN); - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - LBM_freeGrid( (float**) &TEMP_srcGrid ); pb_SwitchToTimer(&timers, pb_TimerID_NONE); pb_PrintTimerSet(&timers); __visc__cleanup(); + + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + MAIN_finalize( ¶m ); + + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + LBM_freeGrid( (float**) &TEMP_srcGrid ); + pb_FreeParameters(params); return 0; } diff --git a/llvm/test/VISC/parboil/benchmarks/nodeSwap/Makefile b/llvm/test/VISC/parboil/benchmarks/nodeSwap/Makefile index 1c2cafd3fd..67fea898b6 100644 --- a/llvm/test/VISC/parboil/benchmarks/nodeSwap/Makefile +++ b/llvm/test/VISC/parboil/benchmarks/nodeSwap/Makefile @@ -18,6 +18,7 @@ endif ifeq ($(ROWM),1) APP_CFLAGS=-DROWM APP_CXXFLAGS=-DROWM + APP_ISPCFLAGS=-DROWM endif BIN = $(addsuffix -$(VERSION), $(APP)) @@ -26,17 +27,13 @@ SRCDIR = src/$(VERSION) BUILDDIR = build/$(VERSION)_$(PLATFORM) DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP) -ifneq ($(TEST),small) - TEST=medium -endif - MATRIX1 = $(DATASET_DIR)/$(TEST)/input/matrix1.txt -MATRIX2 = $(DATASET_DIR)/$(TEST)/input/matrix1.txt +MATRIX2 = $(DATASET_DIR)/$(TEST)/input/matrix2.txt REF_OUTPUT = $(DATASET_DIR)/$(TEST)/output/matrix3.txt RUNDIR = run/$(VERSION)/$(TEST) OUTPUT = $(RUNDIR)/matrix3.txt ARGS = -i $(MATRIX1),$(MATRIX2) -o $(OUTPUT) -TOOL = tools/compare-output +TOOL = diff include $(PARBOIL_ROOT)/common/mk/Makefile diff --git a/llvm/test/VISC/parboil/benchmarks/nodeSwap/src/opencl/kernel_offline.cl b/llvm/test/VISC/parboil/benchmarks/nodeSwap/src/opencl/kernel_offline.cl index 56f16e2068..f689bdaf79 100644 --- a/llvm/test/VISC/parboil/benchmarks/nodeSwap/src/opencl/kernel_offline.cl +++ b/llvm/test/VISC/parboil/benchmarks/nodeSwap/src/opencl/kernel_offline.cl @@ -10,17 +10,24 @@ * Kernel of dense matrix-matrix multiplication kernel. */ -__kernel void matAdd( __global float *A, __global float *B, __global float* C ) +__kernel void matAdd( __global float *A, __global float *B, __global float* C, int m, int n ) { +//#ifdef ROWM + //int i = get_group_id(0); + //int j = get_local_id(0); + //int dim = get_local_size(0); +//#else + //int j = get_group_id(0); + //int i = get_local_id(0); + //int dim = get_global_size(0)/get_local_size(0); +//#endif #ifdef ROWM - int i = get_group_id(0); - int j = get_local_id(0); - int dim = get_local_size(0); + int i = get_global_id(0); + for(int j=0; j < n; j++) + C[i*n+j] = A[i*n+j] + B[i*n+j]; #else - int j = get_group_id(0); - int i = get_local_id(0); - int dim = get_global_size(0)/get_local_size(0); + int j = get_global_id(0); + for(int i=0; i < m; i++) + C[i*n+j] = A[i*n+j] + B[i*n+j]; #endif - - C[i*dim+j] = A[i*dim+j] + B[i*dim+j]; } diff --git a/llvm/test/VISC/parboil/benchmarks/nodeSwap/src/opencl/main.cc b/llvm/test/VISC/parboil/benchmarks/nodeSwap/src/opencl/main.cc index 5f76fc9bc5..23d2950c2a 100644 --- a/llvm/test/VISC/parboil/benchmarks/nodeSwap/src/opencl/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/nodeSwap/src/opencl/main.cc @@ -46,22 +46,29 @@ void basicSgemm( int m, int n, cl_mem A, cl_mem B, cl_mem C, cl_kernel clKernel, << "; n should be multiple of " << TILE_SZ << std::endl; } +//#ifdef ROWM + //size_t db = m; + //size_t dg = (m*n); +//#else + //size_t db = n; + //size_t dg = (m*n); +//#endif #ifdef ROWM - size_t db = m; - size_t dg = (m*n); + size_t dg = m; #else - size_t db = n; - size_t dg = (m*n); + size_t dg = n; #endif - cl_int clStatus; - std::cout << "Block dim = " << db << ", Group dim = " << dg/db << "\n"; + //std::cout << "Block dim = " << db << ", Group dim = " << dg/db << "\n"; clStatus = clSetKernelArg(clKernel,0,sizeof(cl_mem),(void*)&A); clStatus = clSetKernelArg(clKernel,1,sizeof(cl_mem),(void*)&B); clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&C); + clStatus = clSetKernelArg(clKernel,3,sizeof(int),(void*)&m); + clStatus = clSetKernelArg(clKernel,4,sizeof(int),(void*)&n); CHECK_ERROR("clSetKernelArg") - clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,1,NULL,&dg,&db,0,NULL,NULL); + //clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,1,NULL,&dg,&db,0,NULL,NULL); + clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,1,NULL,&dg,NULL,0,NULL,NULL); CHECK_ERROR("clEnqueueNDRangeKernel") clStatus = clFinish(clCommandQueue); diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc index 47ac5c4271..34ff5ea5cf 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc @@ -88,7 +88,6 @@ int main (int argc, char *argv[]) { int matBrow, matBcol; std::vector<float> matA, matBT; - pb_InitializeTimerSet(&timers); /* Read command line. Expect 3 inputs: A, B and B^T in column-major layout*/ @@ -102,6 +101,16 @@ int main (int argc, char *argv[]) { exit(-1); } + /* Read in data */ + // load A + readColMajorMatrixFile(params->inpFiles[0], + matArow, matAcol, matA); + // load B^T + readColMajorMatrixFile(params->inpFiles[2], + matBcol, matBrow, matBT); + + pb_InitializeTimerSet(&timers); + pb_SwitchToTimer(&timers, visc_TimerID_SETUP); cl_int clStatus; cl_platform_id clPlatform; @@ -138,16 +147,6 @@ int main (int argc, char *argv[]) { //cl_kernel clKernel = clCreateKernel(clProgram,"mysgemmNT",&clStatus); //CHECK_ERROR("clCreateKernel") - /* Read in data */ - pb_SwitchToTimer(&timers, pb_TimerID_IO); - - // load A - readColMajorMatrixFile(params->inpFiles[0], - matArow, matAcol, matA); - // load B^T - readColMajorMatrixFile(params->inpFiles[2], - matBcol, matBrow, matBT); - pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE ); // copy A to device memory A_sz = matArow*matAcol*sizeof(float); @@ -187,12 +186,24 @@ int main (int argc, char *argv[]) { basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, \ dA, matArow, dB, matBcol, 0.0f, dC, matArow, clKernel, clCommandQueue); + pb_SwitchToTimer( &timers, pb_TimerID_COPY ); + clEnqueueReadBuffer(clCommandQueue,dC,CL_TRUE,0,C_sz,&matC.front(),0,NULL,NULL); + + pb_SwitchToTimer( &timers, visc_TimerID_SETUP); + clStatus = clReleaseKernel(clKernel); + clStatus = clReleaseProgram(clProgram); + clStatus = clReleaseMemObject(dA); + clStatus = clReleaseMemObject(dB); + clStatus = clReleaseMemObject(dC); + clStatus = clReleaseCommandQueue(clCommandQueue); + clStatus = clReleaseContext(clContext); + + pb_PrintTimerSet(&timers); + if (params->outFile) { - pb_SwitchToTimer( &timers, pb_TimerID_COPY ); - clEnqueueReadBuffer(clCommandQueue,dC,CL_TRUE,0,C_sz,&matC.front(),0,NULL,NULL); /* Write C to file */ - pb_SwitchToTimer(&timers, pb_TimerID_IO); + //pb_SwitchToTimer(&timers, pb_TimerID_IO); writeColMajorMatrixFile(params->outFile, matArow, matBcol, matC); } @@ -201,18 +212,10 @@ int main (int argc, char *argv[]) { double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); std::cout<< "GFLOPs = " << 2.* matArow * matBcol * matAcol/GPUtime/1e9 << std::endl; - pb_PrintTimerSet(&timers); pb_FreeParameters(params); //free((void*)clSource[0]); - clStatus = clReleaseKernel(clKernel); - clStatus = clReleaseProgram(clProgram); - clStatus = clReleaseMemObject(dA); - clStatus = clReleaseMemObject(dB); - clStatus = clReleaseMemObject(dC); - clStatus = clReleaseCommandQueue(clCommandQueue); - clStatus = clReleaseContext(clContext); - + return 0; } diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc/main.cc index 23d89aeb16..04de5d2c97 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/visc/main.cc @@ -39,6 +39,7 @@ extern char* readFile(const char*); void mysgemmNT( float* A, int lda, float* B, int ldb, float* C, int ldc, int k, float alpha, float beta ) { + __visc__hint(visc::GPU_TARGET); __visc__attributes(3, A, B, C, 1, C); float c = 0.0f; int m = get_global_id(0); @@ -87,8 +88,6 @@ int main (int argc, char *argv[]) { int matBrow, matBcol; std::vector<float> matA, matBT; - pb_InitializeTimerSet(&timers); - __visc__init(); /* Read command line. Expect 3 inputs: A, B and B^T in column-major layout*/ @@ -103,8 +102,6 @@ int main (int argc, char *argv[]) { } /* Read in data */ - pb_SwitchToTimer(&timers, pb_TimerID_IO); - // load A readColMajorMatrixFile(params->inpFiles[0], matArow, matAcol, matA); @@ -113,6 +110,9 @@ int main (int argc, char *argv[]) { readColMajorMatrixFile(params->inpFiles[2], matBcol, matBrow, matBT); + pb_InitializeTimerSet(&timers); + __visc__init(); + pb_SwitchToTimer( &timers, pb_TimerID_COMPUTE ); // copy A to device memory A_sz = matArow*matAcol*sizeof(float); @@ -139,15 +139,8 @@ int main (int argc, char *argv[]) { basicSgemm('N', 'T', matArow, matBcol, matAcol, 1.0f, \ &matA.front(), A_sz, matArow, &matBT.front(), B_sz, matBcol, 0.0f, &matC.front(), C_sz, matArow); - if (params->outFile) { - pb_SwitchToTimer( &timers, pb_TimerID_COPY ); - - /* Write C to file */ - llvm_visc_request_mem(&matC.front(), C_sz); - pb_SwitchToTimer(&timers, pb_TimerID_IO); - writeColMajorMatrixFile(params->outFile, - matArow, matBcol, matC); - } + pb_SwitchToTimer( &timers, pb_TimerID_COPY ); + llvm_visc_request_mem(&matC.front(), C_sz); pb_SwitchToTimer( &timers, visc_TimerID_MEM_UNTRACK ); llvm_visc_untrack_mem(&matA.front()); @@ -155,10 +148,20 @@ int main (int argc, char *argv[]) { llvm_visc_untrack_mem(&matC.front()); pb_SwitchToTimer(&timers, pb_TimerID_NONE); - double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); - std::cout<< "GFLOPs = " << 2.* matArow * matBcol * matAcol/GPUtime/1e9 << std::endl; + pb_PrintTimerSet(&timers); __visc__cleanup(); + + if (params->outFile) { + + /* Write C to file */ + //pb_SwitchToTimer(&timers, pb_TimerID_IO); + writeColMajorMatrixFile(params->outFile, + matArow, matBcol, matC); + } + + double GPUtime = pb_GetElapsedTime(&(timers.timers[pb_TimerID_KERNEL])); + std::cout<< "GFLOPs = " << 2.* matArow * matBcol * matAcol/GPUtime/1e9 << std::endl; pb_FreeParameters(params); return 0; diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c index 87088bf98c..b42bf009c9 100644 --- a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c +++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c @@ -43,6 +43,51 @@ int main(int argc, char** argv) { exit(-1); } + //load matrix from files + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + //inputData(parameters->inpFiles[0], &len, &depth, &dim,&nzcnt_len,&pad, + // &h_data, &h_indices, &h_ptr, + // &h_perm, &h_nzcnt); + int col_count; + + //parameters declaration + int len; + int depth; + int dim; + int pad=32; + int nzcnt_len; + + //host memory allocation + //matrix + float *h_data; + int *h_indices; + int *h_ptr; + int *h_perm; + int *h_nzcnt; + + //vector + float *h_Ax_vector; + float *h_x_vector; + + + coo_to_jds( + parameters->inpFiles[0], // bcsstk32.mtx, fidapm05.mtx, jgl009.mtx + 1, // row padding + pad, // warp size + 1, // pack size + 1, // is mirrored? + 0, // binary matrix + 1, // debug level [0:2] + &h_data, &h_ptr, &h_nzcnt, &h_indices, &h_perm, + &col_count, &dim, &len, &nzcnt_len, &depth + ); + + + h_Ax_vector=(float*)malloc(sizeof(float)*dim); + h_x_vector=(float*)malloc(sizeof(float)*dim); + input_vec( parameters->inpFiles[1],h_x_vector,dim); + + pb_InitializeTimerSet(&timers); pb_SwitchToTimer(&timers, visc_TimerID_SETUP); @@ -81,25 +126,6 @@ int main(int argc, char** argv) { pb_CreateAndBuildKernelFromBinary("build/opencl_nvidia_default/kernel_offline.nvptx.s", "spmv_jds", &clContext, &clDevice, &clProgram, &clKernel); pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - //parameters declaration - int len; - int depth; - int dim; - int pad=32; - int nzcnt_len; - - //host memory allocation - //matrix - float *h_data; - int *h_indices; - int *h_ptr; - int *h_perm; - int *h_nzcnt; - - //vector - float *h_Ax_vector; - float *h_x_vector; - //device memory allocation //matrix cl_mem d_data; @@ -112,30 +138,6 @@ int main(int argc, char** argv) { cl_mem jds_ptr_int; cl_mem sh_zcnt_int; - //load matrix from files - pb_SwitchToTimer(&timers, pb_TimerID_IO); - //inputData(parameters->inpFiles[0], &len, &depth, &dim,&nzcnt_len,&pad, - // &h_data, &h_indices, &h_ptr, - // &h_perm, &h_nzcnt); - int col_count; - - coo_to_jds( - parameters->inpFiles[0], // bcsstk32.mtx, fidapm05.mtx, jgl009.mtx - 1, // row padding - pad, // warp size - 1, // pack size - 1, // is mirrored? - 0, // binary matrix - 1, // debug level [0:2] - &h_data, &h_ptr, &h_nzcnt, &h_indices, &h_perm, - &col_count, &dim, &len, &nzcnt_len, &depth - ); - - - h_Ax_vector=(float*)malloc(sizeof(float)*dim); - h_x_vector=(float*)malloc(sizeof(float)*dim); - input_vec( parameters->inpFiles[1],h_x_vector,dim); - pb_SwitchToTimer(&timers, visc_TimerID_SETUP); OpenCLDeviceProp clDeviceProp; @@ -247,12 +249,15 @@ int main(int argc, char** argv) { clStatus = clReleaseCommandQueue(clCommandQueue); clStatus = clReleaseContext(clContext); + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + pb_PrintTimerSet(&timers); if (parameters->outFile) { - pb_SwitchToTimer(&timers, pb_TimerID_IO); + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ outputData(parameters->outFile,h_Ax_vector,dim); } - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ //free((void*)clSource[0]); @@ -264,9 +269,6 @@ int main(int argc, char** argv) { free (h_Ax_vector); free (h_x_vector); - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - pb_PrintTimerSet(&timers); pb_FreeParameters(parameters); return 0; diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/visc/main.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/visc/main.c index cf153ac4d4..25d81214dc 100644 --- a/llvm/test/VISC/parboil/benchmarks/spmv/src/visc/main.c +++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/visc/main.c @@ -112,9 +112,7 @@ int main(int argc, char** argv) { exit(-1); } - pb_InitializeTimerSet(&timers); - __visc__init(); - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ //parameters declaration int len; @@ -136,7 +134,7 @@ int main(int argc, char** argv) { float *h_x_vector; //load matrix from files - pb_SwitchToTimer(&timers, pb_TimerID_IO); + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ //inputData(parameters->inpFiles[0], &len, &depth, &dim,&nzcnt_len,&pad, // &h_data, &h_indices, &h_ptr, // &h_perm, &h_nzcnt); @@ -158,6 +156,9 @@ int main(int argc, char** argv) { h_x_vector=(float*)malloc(sizeof(float)*dim); input_vec( parameters->inpFiles[1],h_x_vector,dim); + pb_InitializeTimerSet(&timers); + __visc__init(); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); memset(h_Ax_vector, 0, dim*sizeof(float)); @@ -218,17 +219,12 @@ int main(int argc, char** argv) { */ } - pb_SwitchToTimer(&timers, pb_TimerID_COPY); - //HtoD memory copy + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + llvm_visc_request_mem(h_Ax_vector, dim*sizeof(float)); - if (parameters->outFile) { - llvm_visc_request_mem(h_Ax_vector, dim*sizeof(float)); - pb_SwitchToTimer(&timers, pb_TimerID_IO); - outputData(parameters->outFile,h_Ax_vector,dim); - } - pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); + pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); llvm_visc_untrack_mem(h_Ax_vector); llvm_visc_untrack_mem(h_data); @@ -237,7 +233,17 @@ int main(int argc, char** argv) { llvm_visc_untrack_mem(h_x_vector); llvm_visc_untrack_mem(h_ptr); llvm_visc_untrack_mem(h_nzcnt); - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + pb_PrintTimerSet(&timers); + __visc__cleanup(); + + if (parameters->outFile) { + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + outputData(parameters->outFile,h_Ax_vector,dim); + + } + free (h_data); free (h_indices); free (h_ptr); @@ -246,10 +252,6 @@ int main(int argc, char** argv) { free (h_Ax_vector); free (h_x_vector); - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - pb_PrintTimerSet(&timers); - __visc__cleanup(); pb_FreeParameters(parameters); return 0; diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c index 5238110bae..717efc5341 100644 --- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c @@ -50,8 +50,6 @@ int main(int argc, char** argv) { printf("Author: Li-Wen Chang <lchang20@illinois.edu>\n"); parameters = pb_ReadParameters(&argc, argv); - pb_InitializeTimerSet(&timers); - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); //declaration unsigned nx,ny,nz; @@ -83,6 +81,21 @@ int main(int argc, char** argv) { if(iteration<1) return -1; + //host data + float *h_A0; + float *h_Anext; + //load data from files + + size=nx*ny*nz; + + h_A0=(float*)malloc(sizeof(float)*size); + h_Anext=(float*)malloc(sizeof(float)*size); + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + FILE *fp = fopen(parameters->inpFiles[0], "rb"); + read_data(h_A0, nx,ny,nz,fp); + fclose(fp); + + pb_InitializeTimerSet(&timers); pb_SwitchToTimer(&timers, visc_TimerID_SETUP); cl_int clStatus; cl_platform_id clPlatform; @@ -120,24 +133,12 @@ int main(int argc, char** argv) { //CHECK_ERROR("clCreateKernel") pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - //host data - float *h_A0; - float *h_Anext; //device cl_mem d_A0; cl_mem d_Anext; - //load data from files - - size=nx*ny*nz; - - h_A0=(float*)malloc(sizeof(float)*size); - h_Anext=(float*)malloc(sizeof(float)*size); - pb_SwitchToTimer(&timers, pb_TimerID_IO); - FILE *fp = fopen(parameters->inpFiles[0], "rb"); - read_data(h_A0, nx,ny,nz,fp); - fclose(fp); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); memcpy (h_Anext,h_A0,sizeof(float)*size); pb_SwitchToTimer(&timers, visc_TimerID_SETUP); @@ -219,20 +220,21 @@ int main(int argc, char** argv) { clStatus = clReleaseContext(clContext); CHECK_ERROR("clReleaseContext") + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + pb_PrintTimerSet(&timers); + if (parameters->outFile) { - pb_SwitchToTimer(&timers, pb_TimerID_IO); + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ outputData(parameters->outFile,h_Anext,nx,ny,nz); } - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ //free((void*)clSource[0]); free(h_A0); free(h_Anext); - pb_SwitchToTimer(&timers, pb_TimerID_NONE); - - pb_PrintTimerSet(&timers); pb_FreeParameters(parameters); return 0; diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/visc/stencil.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/visc/stencil.c index c2ec412111..865b82d781 100644 --- a/llvm/test/VISC/parboil/benchmarks/stencil/src/visc/stencil.c +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/visc/stencil.c @@ -61,9 +61,7 @@ int main(int argc, char** argv) { printf("Author: Li-Wen Chang <lchang20@illinois.edu>\n"); parameters = pb_ReadParameters(&argc, argv); - pb_InitializeTimerSet(&timers); - __visc__init(); - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ //declaration int nx,ny,nz; @@ -106,18 +104,24 @@ int main(int argc, char** argv) { h_A0=(float*)malloc(sizeof(float)*size); h_Anext=(float*)malloc(sizeof(float)*size); - pb_SwitchToTimer(&timers, visc_TimerID_MEM_TRACK); - llvm_visc_track_mem(h_A0, sizeof(float)*size); - llvm_visc_track_mem(h_Anext, sizeof(float)*size); - pb_SwitchToTimer(&timers, pb_TimerID_IO); + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ FILE *fp = fopen(parameters->inpFiles[0], "rb"); read_data(h_A0, nx,ny,nz,fp); fclose(fp); - memcpy (h_Anext,h_A0,sizeof(float)*size); - + + pb_InitializeTimerSet(&timers); + __visc__init(); + + pb_SwitchToTimer(&timers, visc_TimerID_MEM_TRACK); + llvm_visc_track_mem(h_A0, sizeof(float)*size); + llvm_visc_track_mem(h_Anext, sizeof(float)*size); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + memcpy (h_Anext,h_A0,sizeof(float)*size); + + //only use 1D thread block int tx =256; int block[3] = {tx,1,1}; @@ -149,22 +153,24 @@ int main(int argc, char** argv) { pb_SwitchToTimer(&timers, pb_TimerID_COPY); llvm_visc_request_mem(h_Anext, bytes); - if (parameters->outFile) { - pb_SwitchToTimer(&timers, pb_TimerID_IO); - outputData(parameters->outFile,h_Anext,nx,ny,nz); - - } pb_SwitchToTimer(&timers, visc_TimerID_MEM_UNTRACK); llvm_visc_untrack_mem(h_A0); llvm_visc_untrack_mem(h_Anext); - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - free(h_A0); - free(h_Anext); + pb_SwitchToTimer(&timers, pb_TimerID_NONE); pb_PrintTimerSet(&timers); __visc__cleanup(); + + if (parameters->outFile) { + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + outputData(parameters->outFile,h_Anext,nx,ny,nz); + + } + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + free(h_A0); + free(h_Anext); pb_FreeParameters(parameters); return 0; diff --git a/llvm/test/VISC/parboil/common/include/visc.h b/llvm/test/VISC/parboil/common/include/visc.h index e311c73021..ce3a1f2edb 100644 --- a/llvm/test/VISC/parboil/common/include/visc.h +++ b/llvm/test/VISC/parboil/common/include/visc.h @@ -5,10 +5,12 @@ *cr All Rights Reserved *cr ***************************************************************************/ +#include "llvm/SupportVISC/VISCHint.h" #ifdef __cplusplus extern "C" { void __visc__attributes(unsigned, ...); +void __visc__hint(visc::target); void __visc__wait(unsigned); unsigned __visc__node(...); void __visc__init(); diff --git a/llvm/test/VISC/parboil/common/mk/opencl.mk b/llvm/test/VISC/parboil/common/mk/opencl.mk index 7c3ed00d10..5bd73bb97f 100644 --- a/llvm/test/VISC/parboil/common/mk/opencl.mk +++ b/llvm/test/VISC/parboil/common/mk/opencl.mk @@ -115,7 +115,7 @@ $(BUILDDIR)/%.linked.bc : $(BUILDDIR)/%.ll $(LLVM_LINK) $(LIBCLC)/built_libs/nvptx--nvidiacl.bc $< -o $@ $(BUILDDIR)/%.ll : $(SRCDIR)/%.cl - $(LLVM_CC) -Dcl_clang_storage_class_specifiers -isystem $(LIBCLC)/generic/include -include clc/clc.h -target nvptx--nvidiacl $< -O3 -emit-llvm -S -o $@ + $(LLVM_CC) $(CFLAGS) -Dcl_clang_storage_class_specifiers -isystem $(LIBCLC)/generic/include -include clc/clc.h -target nvptx--nvidiacl $< -O3 -emit-llvm -S -o $@ sed -e "s/ addrspace(.)//g" -i $@ no_opencl: @echo "OPENCL_PATH is not set. Open $(PARBOIL_ROOT)/common/Makefile.conf to set default value." diff --git a/llvm/test/VISC/parboil/common/mk/visc.mk b/llvm/test/VISC/parboil/common/mk/visc.mk index 4445260532..3c7382d1d8 100644 --- a/llvm/test/VISC/parboil/common/mk/visc.mk +++ b/llvm/test/VISC/parboil/common/mk/visc.mk @@ -158,10 +158,14 @@ $(BUILDDIR)/%.ll: $(SRCDIR)/%.c $(BUILDDIR)/%.ll : $(SRCDIR)/%.cc $(CXX) $(CXXFLAGS) -S -emit-llvm $< -o $@ +$(BUILDDIR)/%.ll : $(SRCDIR)/%.cpp + $(CXX) $(CXXFLAGS) -S -emit-llvm $< -o $@ + $(BUILDDIR)/%.visc.ll: $(BUILDDIR)/%.ll $(OPT) $(TESTGEN_OPTFLAGS) $< -S -o $@ cat $(LLVM_SRC_ROOT)/test/VISC/parboil/RUN.parboil.script $@ > $@.tmp mv $@.tmp $(BUILDDIR)/$(APP).visc.ll + #@cp $(VISC_OBJS) $(BUILDDIR)/$(VISC_OBJS) $(BUILDDIR)/%.o : $(SRCDIR)/%.c $(CC) $(CFLAGS) -c $< -o $@ @@ -175,6 +179,9 @@ $(BUILDDIR)/%.o : $(SRCDIR)/%.cc $(BUILDDIR)/%.o : $(SRCDIR)/%.cpp $(CXX) $(CXXFLAGS) -c $< -o $@ +$(BUILDDIR)/%.o : $(SRCDIR)/%.cpp + $(CXX) $(CXXFLAGS) -c $< -o $@ + no_opencl: @echo "OPENCL_PATH is not set. Open $(PARBOIL_ROOT)/common/Makefile.conf to set default value." @echo "You may use $(PLATFORM_MK) if you want a platform specific configurations." diff --git a/llvm/test/VISC/parboil/common/platform/visc.default.mk b/llvm/test/VISC/parboil/common/platform/visc.default.mk index 983c3c971f..a7ab89acb0 100644 --- a/llvm/test/VISC/parboil/common/platform/visc.default.mk +++ b/llvm/test/VISC/parboil/common/platform/visc.default.mk @@ -15,10 +15,10 @@ VISC_BUILD_DIR = Release+Asserts # gcc (default) CC = $(LLVM_SRC_ROOT)/$(VISC_BUILD_DIR)/bin/clang -PLATFORM_CFLAGS = +PLATFORM_CFLAGS = -I$(LLVM_SRC_ROOT)/include CXX = $(LLVM_SRC_ROOT)/$(VISC_BUILD_DIR)/bin/clang++ -PLATFORM_CXXFLAGS = +PLATFORM_CXXFLAGS = -I$(LLVM_SRC_ROOT)/include LINKER = $(LLVM_SRC_ROOT)/$(VISC_BUILD_DIR)/bin/clang++ PLATFORM_LDFLAGS = -lm -lpthread -lOpenCL -- GitLab