diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
index ec0aef4ecad00e33521592dbaa28e3f41607f296..542b3283629314f27b70ecfed13ca74aa641cf79 100644
--- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
@@ -83,7 +83,7 @@ int main(int argc, char** argv) {
     if(iteration<1)
         return -1;
 
-    printf("nx = %d, ny = %d, nz = %d, c0 = %f, c1 = %f\n", nx, ny, nz, c0, c1);
+    pb_SwitchToTimer(&timers, visc_TimerID_SETUP);
     cl_int clStatus;
     cl_platform_id clPlatform;
     clStatus = clGetPlatformIDs(1,&clPlatform,NULL);
@@ -115,6 +115,7 @@ int main(int argc, char** argv) {
     cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus);
     CHECK_ERROR("clCreateKernel")
 
+    pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
     //host data
     float *h_A0;
     float *h_Anext;
@@ -135,7 +136,7 @@ int main(int argc, char** argv) {
     fclose(fp);
     memcpy (h_Anext,h_A0,sizeof(float)*size);
 
-    pb_SwitchToTimer(&timers, pb_TimerID_COPY);
+    pb_SwitchToTimer(&timers, visc_TimerID_SETUP);
 
     //memory allocation
     d_A0 = clCreateBuffer(clContext,CL_MEM_READ_WRITE,size*sizeof(float),NULL,&clStatus);
@@ -143,6 +144,7 @@ int main(int argc, char** argv) {
     d_Anext = clCreateBuffer(clContext,CL_MEM_READ_WRITE,size*sizeof(float),NULL,&clStatus);
     CHECK_ERROR("clCreateBuffer")
 
+    pb_SwitchToTimer(&timers, pb_TimerID_COPY);
     //memory copy
     clStatus = clEnqueueWriteBuffer(clCommandQueue,d_A0,CL_FALSE,0,size*sizeof(float),h_A0,0,NULL,NULL);
     CHECK_ERROR("clEnqueueWriteBuffer")
@@ -160,6 +162,7 @@ int main(int argc, char** argv) {
 //  printf("block x is %d and y is %d z \n",block[0],block[1]);
 //  printf("grid x is %d and y is %d\n",grid[0],grid[1]);
 
+    pb_SwitchToTimer(&timers, visc_TimerID_SETUP);
     clStatus = clSetKernelArg(clKernel,0,sizeof(float),(void*)&c0);
     clStatus = clSetKernelArg(clKernel,1,sizeof(float),(void*)&c1);
     clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&d_A0);
@@ -170,20 +173,24 @@ int main(int argc, char** argv) {
     CHECK_ERROR("clSetKernelArg")
 
     //main execution
-    pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
-
+    pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
     int t;
     for(t=0; t<iteration; t++)
     {
+        pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
         clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,3,NULL,grid,block,0,NULL,NULL);
         //printf("iteration %d\n",t)
         CHECK_ERROR("clEnqueueNDRangeKernel")
+        pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
 
         cl_mem d_temp = d_A0;
         d_A0 = d_Anext;
         d_Anext = d_temp;
+
+        pb_SwitchToTimer(&timers, visc_TimerID_SETUP);
         clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&d_A0);
         clStatus = clSetKernelArg(clKernel,3,sizeof(cl_mem),(void*)&d_Anext);
+        pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
 
     }
 
@@ -199,6 +206,7 @@ int main(int argc, char** argv) {
     clStatus = clEnqueueReadBuffer(clCommandQueue,d_Anext,CL_TRUE,0,size*sizeof(float),h_Anext,0,NULL,NULL);
     CHECK_ERROR("clEnqueueReadBuffer")
 
+    pb_SwitchToTimer(&timers, visc_TimerID_SETUP);
     clStatus = clReleaseMemObject(d_A0);
     clStatus = clReleaseMemObject(d_Anext);
     clStatus = clReleaseKernel(clKernel);
diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/visc/stencil.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/visc/stencil.c
index be4ab753a41f7b1a3ecd2b546618e100368297b4..c2ec412111a4daafe85def4e1305397efdd41b8f 100644
--- a/llvm/test/VISC/parboil/benchmarks/stencil/src/visc/stencil.c
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/visc/stencil.c
@@ -54,7 +54,6 @@ void naive_kernel(float c0,float c1, float* A0, float *Anext,int nx,int ny,int n
 }
 
 int main(int argc, char** argv) {
-    __visc__init();
     struct pb_TimerSet timers;
     struct pb_Parameters *parameters;
 
@@ -63,6 +62,7 @@ int main(int argc, char** argv) {
     parameters = pb_ReadParameters(&argc, argv);
 
     pb_InitializeTimerSet(&timers);
+    __visc__init();
     pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
 
     //declaration
@@ -95,8 +95,6 @@ int main(int argc, char** argv) {
     if(iteration<1)
         return -1;
 
-    printf("nx = %d, ny = %d, nz = %d, c0 = %f, c1 = %f\n", nx, ny, nz, c0, c1);
-    
     //host data
     float *h_A0;
     float *h_Anext;
@@ -106,15 +104,18 @@ int main(int argc, char** argv) {
     size=nx*ny*nz;
 
     h_A0=(float*)malloc(sizeof(float)*size);
-    llvm_visc_track_mem(h_A0, sizeof(float)*size);
     h_Anext=(float*)malloc(sizeof(float)*size);
+
+    pb_SwitchToTimer(&timers, visc_TimerID_MEM_TRACK);
+    llvm_visc_track_mem(h_A0, sizeof(float)*size);
     llvm_visc_track_mem(h_Anext, sizeof(float)*size);
+
     pb_SwitchToTimer(&timers, pb_TimerID_IO);
     FILE *fp = fopen(parameters->inpFiles[0], "rb");
     read_data(h_A0, nx,ny,nz,fp);
     fclose(fp);
     memcpy (h_Anext,h_A0,sizeof(float)*size);
-
+    
     pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
 
     //only use 1D thread block
@@ -125,15 +126,16 @@ int main(int argc, char** argv) {
     size_t offset[3] = {1,1,1};
 
     //main execution
-    pb_SwitchToTimer(&timers, pb_TimerID_NONE);
 
     int t;
     size_t bytes = size*sizeof(float);
     for(t=0; t<iteration; t++)
     {
+        pb_SwitchToTimer(&timers, pb_TimerID_NONE);
         unsigned stencilDFG = __visc__node(naive_kernel, 2, 3, block[0], block[1], block[2], grid[0]/block[0], grid[1]/block[1], grid[2]/block[2], 9, (float)c0, (float)c1, h_A0, bytes, h_Anext, bytes, nx, ny, nz, 0);
         __visc__wait(stencilDFG);
         //printf("iteration %d\n",t);
+        pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
         float* h_temp = h_A0;
         h_A0 = h_Anext;
         h_Anext = h_temp;
@@ -160,8 +162,8 @@ int main(int argc, char** argv) {
     free(h_A0);
     free(h_Anext);
     pb_SwitchToTimer(&timers, pb_TimerID_NONE);
-
     pb_PrintTimerSet(&timers);
+
     __visc__cleanup();
     pb_FreeParameters(parameters);