diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c index bee09b88c00c325ecd2238c5b521bac9b961b059..661fc7a2c019f3605b4e0680f3ec8988b2487fe9 100644 --- a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c +++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_nvidia/main.c @@ -77,6 +77,7 @@ int main(int argc, char** argv) { cl_kernel clKernel = clCreateKernel(clProgram,"spmv_jds",&clStatus); CHECK_ERROR("clCreateKernel") + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); //parameters declaration int len; int depth; @@ -130,9 +131,9 @@ int main(int argc, char** argv) { h_Ax_vector=(float*)malloc(sizeof(float)*dim); h_x_vector=(float*)malloc(sizeof(float)*dim); - input_vec( parameters->inpFiles[1],h_x_vector,dim); - pb_SwitchToTimer(&timers, visc_TimerID_SETUP); + input_vec( parameters->inpFiles[1],h_x_vector,dim); + pb_SwitchToTimer(&timers, visc_TimerID_SETUP); OpenCLDeviceProp clDeviceProp; clStatus = clGetDeviceInfo(clDevice,CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,sizeof(cl_uint),&(clDeviceProp.major),NULL); @@ -206,14 +207,16 @@ pb_SwitchToTimer(&timers, pb_TimerID_COPY); CHECK_ERROR("clSetKernelArg") //main execution - pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); printf("grid = %lu, block = %lu\n", grid, block); int i; for(i=0; i<50; i++) { + pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,1,NULL,&grid,&block,0,NULL,NULL); CHECK_ERROR("clEnqueueNDRangeKernel") + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); } clStatus = clFinish(clCommandQueue); @@ -226,6 +229,7 @@ pb_SwitchToTimer(&timers, pb_TimerID_COPY); clStatus = clEnqueueReadBuffer(clCommandQueue,d_Ax_vector,CL_TRUE,0,dim*sizeof(float),h_Ax_vector,0,NULL,NULL); CHECK_ERROR("clEnqueueReadBuffer") + pb_SwitchToTimer(&timers, visc_TimerID_SETUP); clStatus = clReleaseKernel(clKernel); clStatus = clReleaseProgram(clProgram); diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/visc/main.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/visc/main.c index d2db5c2e57d091d269c30fd2788511bad406486c..78cef8f7af9fc73944add96b31b2f1af32e3cff6 100644 --- a/llvm/test/VISC/parboil/benchmarks/spmv/src/visc/main.c +++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/visc/main.c @@ -158,16 +158,14 @@ int main(int argc, char** argv) { h_x_vector=(float*)malloc(sizeof(float)*dim); input_vec( parameters->inpFiles[1],h_x_vector,dim); - pb_SwitchToTimer(&timers, pb_TimerID_COPY); - + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); memset(h_Ax_vector, 0, dim*sizeof(float)); - size_t grid; size_t block; - pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); compute_active_thread(&block, &grid, nzcnt_len, pad, 3, 0, 8); + pb_SwitchToTimer(&timers, visc_TimerID_MEM_TRACK); llvm_visc_track_mem(h_Ax_vector, dim*sizeof(float)); llvm_visc_track_mem(h_data, len*sizeof(float)); @@ -180,12 +178,12 @@ int main(int argc, char** argv) { //main execution - pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); int i; for(i=0; i<50; i++) { - + pb_SwitchToTimer(&timers, pb_TimerID_NONE); unsigned spmvDFG = __visc__node(spmv_jds, 2, 1, (unsigned)block, (unsigned)(grid/block), 15, h_Ax_vector, @@ -205,6 +203,7 @@ int main(int argc, char** argv) { nzcnt_len*sizeof(int), 0); __visc__wait(spmvDFG); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); /******************************* Issues ******************************* * 1. Using OpenCL to compute grid and block dimensions