diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_cpu/Makefile b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_cpu/Makefile
index 36b421ec6f1359114ea0035d21048ab0b95bf30e..d9c2b594b136f56c0903b8bddd2dab2482d89e55 100644
--- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_cpu/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_cpu/Makefile
@@ -5,4 +5,3 @@ SRCDIR_OBJS=main.o io.o #compute_gold.o
 APP_CUDALDFLAGS=-lm -lstdc++
 APP_CFLAGS=-ffast-math -O3
 APP_CXXFLAGS=-ffast-math -O3
-KERNEL_OBJS=kernel_offline.nvptx.s
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/Makefile b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/Makefile
new file mode 100644
index 0000000000000000000000000000000000000000..82f13f063fed479fdf1e88e5a9ab0c842f3ad19a
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/Makefile
@@ -0,0 +1,10 @@
+# (c) 2010 The Board of Trustees of the University of Illinois.
+
+LANGUAGE=opencl
+TOOLS_SRC=common_src/convert-dataset
+SRCDIR_OBJS=main.o gpu_info.o file.o ocl.o
+APP_CUDALDFLAGS=-lm
+APP_CFLAGS=-ffast-math -g3 -O3 -I$(TOOLS_SRC)
+APP_CXXFLAGS=-ffast-math -g3 -O3
+
+include $(TOOLS_SRC)/commontools.mk
\ No newline at end of file
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/file.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/file.c
new file mode 100644
index 0000000000000000000000000000000000000000..43e99f3e049704495160883afc20923bd8d61cd3
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/file.c
@@ -0,0 +1,78 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2007 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+
+#include <endian.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+#if __BYTE_ORDER != __LITTLE_ENDIAN
+# error "File I/O is not implemented for this system: wrong endianness."
+#endif
+
+void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
+               float** h_data, int** h_indices, int** h_ptr,
+               int** h_perm, int** h_nzcnt)
+{
+  FILE* fid = fopen(fName, "rb");
+
+  if (fid == NULL)
+    {
+      fprintf(stderr, "Cannot open input file\n");
+      exit(-1);
+    }
+
+  fscanf(fid, "%d %d %d %d %d\n",len,depth,nzcnt_len,dim,pad);
+  int _len=len[0];
+  int _depth=depth[0];
+  int _dim=dim[0];
+  int _pad=pad[0];
+  int _nzcnt_len=nzcnt_len[0];
+  
+  *h_data = (float *) malloc(_len * sizeof (float));
+  fread (*h_data, sizeof (float), _len, fid);
+  
+  *h_indices = (int *) malloc(_len * sizeof (int));
+  fread (*h_indices, sizeof (int), _len, fid);
+  
+  *h_ptr = (int *) malloc(_depth * sizeof (int));
+  fread (*h_ptr, sizeof (int), _depth, fid);
+  
+  *h_perm = (int *) malloc(_dim * sizeof (int));
+  fread (*h_perm, sizeof (int), _dim, fid);
+  
+  *h_nzcnt = (int *) malloc(_nzcnt_len * sizeof (int));
+  fread (*h_nzcnt, sizeof (int), _nzcnt_len, fid);
+
+  fclose (fid); 
+}
+
+void input_vec(char *fName,float *h_vec,int dim)
+{
+  FILE* fid = fopen(fName, "rb");
+  fread (h_vec, sizeof (float), dim, fid);
+  fclose(fid);
+  
+}
+
+void outputData(char* fName, float *h_Ax_vector,int dim)
+{
+  FILE* fid = fopen(fName, "w");
+  uint32_t tmp32;
+  if (fid == NULL)
+    {
+      fprintf(stderr, "Cannot open output file\n");
+      exit(-1);
+    }
+  tmp32 = dim;
+  fwrite(&tmp32, sizeof(uint32_t), 1, fid);
+  fwrite(h_Ax_vector, sizeof(float), dim, fid);
+
+  fclose (fid);
+}
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/file.h b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/file.h
new file mode 100644
index 0000000000000000000000000000000000000000..65f80135f8059d570d19366ecf1c6372a12dff62
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/file.h
@@ -0,0 +1,18 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2007 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+#ifndef __FILEH__
+#define __FILEH__
+
+void inputData(char* fName, int* len, int* depth, int* dim,int *nzcnt_len,int *pad,
+               float** h_data, int** h_indices, int** h_ptr,
+               int** h_perm, int** h_nzcnt);
+
+void input_vec(char* fNanme, float *h_vec,int dim);
+void outputData(char* fName, float *h_Ax_vector,int dim);
+
+#endif
\ No newline at end of file
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/gpu_info.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/gpu_info.c
new file mode 100644
index 0000000000000000000000000000000000000000..c0cc9b29aa4260548469735c8a341f2f53ad1efc
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/gpu_info.c
@@ -0,0 +1,55 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2010 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+#include <endian.h>
+#include <stdlib.h>
+#include <malloc.h>
+#include <stdio.h>
+#include <inttypes.h>
+
+#include "gpu_info.h"
+
+void compute_active_thread(size_t *thread,
+			   size_t *grid,
+			   int task,
+			   int pad,
+			   int major,
+			   int minor,
+			   int sm)
+{
+	int max_thread;
+	int max_block=8;
+	if(major==1)
+	{
+		if(minor>=2)
+			max_thread=1024;
+		else
+			max_thread=768;
+	}
+	else if(major==2)
+		max_thread=1536;
+	else
+		//newer GPU  //keep using 2.0
+		max_thread=1536;
+	
+	int _grid;
+	int _thread;
+	
+	if(task*pad>sm*max_thread)
+	{
+		_thread=max_thread/max_block;
+		_grid = ((task*pad+_thread-1)/_thread)*_thread;
+	}
+	else
+	{
+		_thread=pad;
+		_grid=task*pad;
+	}
+
+	thread[0]=_thread;
+	grid[0]=_grid;
+}
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/gpu_info.h b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/gpu_info.h
new file mode 100644
index 0000000000000000000000000000000000000000..4219cda933c58332203b27e0ee3f8133bca2c0e8
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/gpu_info.h
@@ -0,0 +1,20 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2010 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+
+#ifndef __GPUINFOH__
+#define __GPUINFOH__
+
+void compute_active_thread(size_t *thread,
+			   size_t *grid,
+			   int task,
+			   int pad,
+			   int major,
+			   int minor,
+			   int sm);
+
+#endif
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/kernel.cl b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/kernel.cl
new file mode 100644
index 0000000000000000000000000000000000000000..db5d147d88c83a5854aa54627b1f41548b8b3047
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/kernel.cl
@@ -0,0 +1,35 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2010 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+
+__kernel void spmv_jds_naive(__global float *dst_vector, __global float *d_data,
+		       	     __global int *d_index, __global int *d_perm,
+		             __global float *x_vec, const int dim, 
+		             __constant int *jds_ptr_int,
+		             __constant int *sh_zcnt_int)
+{
+  	int ix = get_global_id(0);
+
+  	if (ix < dim) {
+    		float sum = 0.0f;
+    		// 32 is warp size
+    		int bound=sh_zcnt_int[ix/32];
+
+	    	for(int k=0;k<bound;k++)
+    		{	  
+      			int j = jds_ptr_int[k] + ix;    
+      			int in = d_index[j]; 
+  
+      			float d = d_data[j];
+      			float t = x_vec[in];
+
+      			sum += d*t; 
+    		}  
+  
+    		dst_vector[d_perm[ix]] = sum; 
+  	}
+}
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/main.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/main.c
new file mode 100644
index 0000000000000000000000000000000000000000..98cc2e1d53a65818f31a4140d0bef14941c2f7ce
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/main.c
@@ -0,0 +1,268 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2010 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+
+#include <CL/cl.h>
+#include <CL/cl_ext.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <parboil.h>
+
+
+
+#include "file.h"
+#include "gpu_info.h"
+#include "ocl.h"
+#include "convert_dataset.h"
+
+static int generate_vector(float *x_vector, int dim) 
+{	
+	srand(54321);
+	int i;
+	for(i=0;i<dim;i++)
+	{
+		x_vector[i] = (rand() / (float) RAND_MAX);
+	}
+	return 0;
+}
+
+int main(int argc, char** argv) {
+	struct pb_TimerSet timers;
+	struct pb_Parameters *parameters;
+	
+	printf("CUDA accelerated sparse matrix vector multiplication****\n");
+	printf("Original version by Li-Wen Chang <lchang20@illinois.edu> and Shengzhao Wu<wu14@illinois.edu>\n");
+	printf("This version maintained by Chris Rodrigues  ***********\n");
+	parameters = pb_ReadParameters(&argc, argv);
+	if ((parameters->inpFiles[0] == NULL) || (parameters->inpFiles[1] == NULL))
+    	{
+      		fprintf(stderr, "Expecting one input filename\n");
+      		exit(-1);
+    	}
+
+	pb_InitializeTimerSet(&timers);
+	pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
+	
+	//parameters declaration
+	cl_int clStatus;
+
+        cl_uint numPlatforms;
+	clStatus = clGetPlatformIDs(0, NULL, &numPlatforms);
+	CHECK_ERROR("clGetPlatformIDs")
+	
+          cl_platform_id clPlatform[numPlatforms];
+	clStatus = clGetPlatformIDs(numPlatforms, clPlatform, NULL);
+	CHECK_ERROR("clGetPlatformIDs")
+
+	cl_context_properties clCps[3] = {CL_CONTEXT_PLATFORM,(cl_context_properties)clPlatform[1],0};
+	
+	cl_device_id clDevice;
+	clStatus = clGetDeviceIDs(clPlatform[1],CL_DEVICE_TYPE_CPU,1,&clDevice,NULL);
+	CHECK_ERROR("clGetDeviceIDs")
+
+	cl_context clContext = clCreateContextFromType(clCps,CL_DEVICE_TYPE_CPU,NULL,NULL,&clStatus);
+	CHECK_ERROR("clCreateContextFromType")
+
+	cl_command_queue clCommandQueue = clCreateCommandQueue(clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus);
+	CHECK_ERROR("clCreateCommandQueue")
+
+  	pb_SetOpenCL(&clContext, &clCommandQueue);
+	
+	const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
+	cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
+	CHECK_ERROR("clCreateProgramWithSource")
+
+	char clOptions[50];
+	sprintf(clOptions,"");
+	clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL);
+	CHECK_ERROR("clBuildProgram")
+
+	cl_kernel clKernel = clCreateKernel(clProgram,"spmv_jds_naive",&clStatus);
+	CHECK_ERROR("clCreateKernel")
+
+	int len;
+	int depth;
+	int dim;
+	int pad=32;
+	int nzcnt_len;
+	
+	//host memory allocation
+	//matrix
+	float *h_data;
+	int *h_indices;
+	int *h_ptr;
+	int *h_perm;
+	int *h_nzcnt;
+	//vector
+	float *h_Ax_vector;
+	float *h_x_vector;
+	
+	//device memory allocation
+	//matrix
+	cl_mem d_data;
+	cl_mem d_indices;
+	cl_mem d_ptr;
+	cl_mem d_perm;
+	cl_mem d_nzcnt;
+
+	//vector
+	cl_mem d_Ax_vector;
+	cl_mem d_x_vector;
+	
+	cl_mem jds_ptr_int;
+	cl_mem sh_zcnt_int;
+
+    	//load matrix from files
+	pb_SwitchToTimer(&timers, pb_TimerID_IO);
+	//inputData(parameters->inpFiles[0], &len, &depth, &dim,&nzcnt_len,&pad,
+	//    &h_data, &h_indices, &h_ptr,
+	//    &h_perm, &h_nzcnt);
+	int col_count;
+	coo_to_jds(
+		parameters->inpFiles[0], // bcsstk32.mtx, fidapm05.mtx, jgl009.mtx
+		1, // row padding
+		pad, // warp size
+		1, // pack size
+		1, // is mirrored?
+		0, // binary matrix
+		1, // debug level [0:2]
+		&h_data, &h_ptr, &h_nzcnt, &h_indices, &h_perm,
+		&col_count, &dim, &len, &nzcnt_len, &depth
+	);
+	
+//	pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
+	h_Ax_vector=(float*)malloc(sizeof(float)*dim);	
+	h_x_vector=(float*)malloc(sizeof(float)*dim);	
+	
+  input_vec( parameters->inpFiles[1],h_x_vector,dim);
+
+	 pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
+    	OpenCLDeviceProp clDeviceProp;
+//	clStatus = clGetDeviceInfo(clDevice,CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV,sizeof(cl_uint),&(clDeviceProp.major),NULL);
+	//CHECK_ERROR("clGetDeviceInfo")
+//	clStatus = clGetDeviceInfo(clDevice,CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV,sizeof(cl_uint),&(clDeviceProp.minor),NULL);
+  //      CHECK_ERROR("clGetDeviceInfo")
+	clStatus = clGetDeviceInfo(clDevice,CL_DEVICE_MAX_COMPUTE_UNITS,sizeof(cl_uint),&(clDeviceProp.multiProcessorCount),NULL);
+        CHECK_ERROR("clGetDeviceInfo")
+	
+	pb_SwitchToTimer(&timers, pb_TimerID_COPY);
+	//memory allocation
+	d_data = clCreateBuffer(clContext,CL_MEM_READ_ONLY,len*sizeof(float),NULL,&clStatus);
+	CHECK_ERROR("clCreateBuffer")
+	d_indices = clCreateBuffer(clContext,CL_MEM_READ_ONLY,len*sizeof(int),NULL,&clStatus);
+	CHECK_ERROR("clCreateBuffer")
+	d_perm = clCreateBuffer(clContext,CL_MEM_READ_ONLY,dim*sizeof(int),NULL,&clStatus);
+	CHECK_ERROR("clCreateBuffer")
+	d_x_vector = clCreateBuffer(clContext,CL_MEM_READ_ONLY,dim*sizeof(float),NULL,&clStatus);
+	CHECK_ERROR("clCreateBuffer")
+	d_Ax_vector = clCreateBuffer(clContext,CL_MEM_WRITE_ONLY,dim*sizeof(float),NULL,&clStatus);
+	CHECK_ERROR("clCreateBuffer")
+
+	jds_ptr_int = clCreateBuffer(clContext,CL_MEM_READ_ONLY,5000*sizeof(int),NULL,&clStatus);
+	CHECK_ERROR("clCreateBuffer")
+	sh_zcnt_int = clCreateBuffer(clContext,CL_MEM_READ_ONLY,5000*sizeof(int),NULL,&clStatus);
+	CHECK_ERROR("clCreateBuffer")
+
+	clMemSet(clCommandQueue,d_Ax_vector,0,dim*sizeof(float));
+	
+	//memory copy
+	clStatus = clEnqueueWriteBuffer(clCommandQueue,d_data,CL_FALSE,0,len*sizeof(float),h_data,0,NULL,NULL);
+	CHECK_ERROR("clEnqueueWriteBuffer")
+	clStatus = clEnqueueWriteBuffer(clCommandQueue,d_indices,CL_FALSE,0,len*sizeof(int),h_indices,0,NULL,NULL);
+	CHECK_ERROR("clEnqueueWriteBuffer")
+	clStatus = clEnqueueWriteBuffer(clCommandQueue,d_perm,CL_FALSE,0,dim*sizeof(int),h_perm,0,NULL,NULL);
+	CHECK_ERROR("clEnqueueWriteBuffer")
+	clStatus = clEnqueueWriteBuffer(clCommandQueue,d_x_vector,CL_FALSE,0,dim*sizeof(int),h_x_vector,0,NULL,NULL);
+	CHECK_ERROR("clEnqueueWriteBuffer")
+
+	clStatus = clEnqueueWriteBuffer(clCommandQueue,jds_ptr_int,CL_FALSE,0,depth*sizeof(int),h_ptr,0,NULL,NULL);
+	CHECK_ERROR("clEnqueueWriteBuffer")
+	clStatus = clEnqueueWriteBuffer(clCommandQueue,sh_zcnt_int,CL_TRUE,0,nzcnt_len*sizeof(int),h_nzcnt,0,NULL,NULL);
+	CHECK_ERROR("clEnqueueWriteBuffer")
+	
+	pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
+	
+	size_t grid;
+	size_t block;
+
+	compute_active_thread(&block,&grid,nzcnt_len,pad,clDeviceProp.major,clDeviceProp.minor,clDeviceProp.multiProcessorCount);
+//  printf("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!grid is %d and block is %d=\n",grid,block);
+//  printf("!!! dim is %d\n",dim);
+
+	clStatus = clSetKernelArg(clKernel,0,sizeof(cl_mem),&d_Ax_vector);
+	CHECK_ERROR("clSetKernelArg")
+	clStatus = clSetKernelArg(clKernel,1,sizeof(cl_mem),&d_data);
+	CHECK_ERROR("clSetKernelArg")
+	clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),&d_indices);
+	CHECK_ERROR("clSetKernelArg")
+	clStatus = clSetKernelArg(clKernel,3,sizeof(cl_mem),&d_perm);
+	CHECK_ERROR("clSetKernelArg")
+	clStatus = clSetKernelArg(clKernel,4,sizeof(cl_mem),&d_x_vector);
+	CHECK_ERROR("clSetKernelArg")
+	clStatus = clSetKernelArg(clKernel,5,sizeof(int),&dim);
+	CHECK_ERROR("clSetKernelArg")
+
+	clStatus = clSetKernelArg(clKernel,6,sizeof(cl_mem),&jds_ptr_int);
+	CHECK_ERROR("clSetKernelArg")
+	clStatus = clSetKernelArg(clKernel,7,sizeof(cl_mem),&sh_zcnt_int);
+        CHECK_ERROR("clSetKernelArg")
+
+	//main execution
+	pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
+
+	int i;
+	for (i=0; i<50; i++)
+	{
+		clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,1,NULL,&grid,&block,0,NULL,NULL);
+		CHECK_ERROR("clEnqueueNDRangeKernel")
+	}
+
+	clStatus = clFinish(clCommandQueue);
+	CHECK_ERROR("clFinish")
+	
+	pb_SwitchToTimer(&timers, pb_TimerID_COPY);
+	//HtoD memory copy
+	clStatus = clEnqueueReadBuffer(clCommandQueue,d_Ax_vector,CL_TRUE,0,dim*sizeof(float),h_Ax_vector,0,NULL,NULL);
+	CHECK_ERROR("clEnqueueReadBuffer")	
+
+	clStatus = clReleaseKernel(clKernel);
+	clStatus = clReleaseProgram(clProgram);
+
+	clStatus = clReleaseMemObject(d_data);
+	clStatus = clReleaseMemObject(d_indices);
+        clStatus = clReleaseMemObject(d_perm);
+	clStatus = clReleaseMemObject(d_nzcnt);
+        clStatus = clReleaseMemObject(d_x_vector);
+	clStatus = clReleaseMemObject(d_Ax_vector);
+	CHECK_ERROR("clReleaseMemObject")
+
+	clStatus = clReleaseCommandQueue(clCommandQueue);
+	clStatus = clReleaseContext(clContext);	
+	
+	if (parameters->outFile) {
+		pb_SwitchToTimer(&timers, pb_TimerID_IO);
+		outputData(parameters->outFile,h_Ax_vector,dim);
+	}
+
+	pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
+	
+	free((void*)clSource[0]);
+	
+	free (h_data);
+	free (h_indices);
+	free (h_ptr);
+	free (h_perm);
+	free (h_nzcnt);
+	free (h_Ax_vector);
+	free (h_x_vector);
+	pb_SwitchToTimer(&timers, pb_TimerID_NONE);
+
+	pb_PrintTimerSet(&timers);
+	pb_FreeParameters(parameters);
+
+	return 0;
+}
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/ocl.c b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/ocl.c
new file mode 100644
index 0000000000000000000000000000000000000000..7e48e7e65426d1ba4b2edf89f6f3ae1fe33c12c7
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/ocl.c
@@ -0,0 +1,49 @@
+#include <CL/cl.h>
+#include <stdio.h>
+#include <string.h>
+#include "ocl.h"
+
+char* readFile(const char* fileName)
+{
+        FILE* fp;
+        fp = fopen(fileName,"r");
+        if(fp == NULL)
+        {
+                printf("Error 1!\n");
+                exit(1);
+        }
+
+        fseek(fp,0,SEEK_END);
+        long size = ftell(fp);
+        rewind(fp);
+
+        char* buffer = (char*)malloc(sizeof(char)*(size+1));
+        if(buffer  == NULL)
+        {
+                printf("Error 2!\n");
+                fclose(fp);
+                exit(1);
+        }
+
+        size_t res = fread(buffer,1,size,fp);
+        if(res != size)
+        {
+                printf("Error 3!\n");
+                fclose(fp);
+                exit(1);
+        }
+
+	buffer[size] = 0;
+        fclose(fp);
+        return buffer;
+}
+
+void clMemSet(cl_command_queue clCommandQueue, cl_mem buf, int val, size_t size)
+{
+	cl_int clStatus;
+	char* temp = (char*)malloc(size);
+	memset(temp,val,size);
+	clStatus = clEnqueueWriteBuffer(clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL);
+	CHECK_ERROR("clEnqueueWriteBuffer")
+	free(temp);
+}
diff --git a/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/ocl.h b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/ocl.h
new file mode 100644
index 0000000000000000000000000000000000000000..8840a8682b6e383d82bc886f448a601780b81e22
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/spmv/src/opencl_cpu/ocl.h
@@ -0,0 +1,21 @@
+#ifndef __OCLH__
+#define __OCLH__
+
+typedef struct {
+	cl_uint major;
+	cl_uint minor;
+	cl_uint multiProcessorCount;
+} OpenCLDeviceProp;
+
+void clMemSet(cl_command_queue, cl_mem, int, size_t);
+char* readFile(const char*);
+
+#define CHECK_ERROR(errorMessage)           \
+  if(clStatus != CL_SUCCESS)                \
+  {                                         \
+     printf("Error: %s!\n",errorMessage);   \
+     printf("Line: %d\n",__LINE__);         \
+     exit(1);                               \
+  }
+
+#endif
diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/Makefile b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/Makefile
index d5f6d9708d0589222d80f654586af06ef491a12c..141a47f74153bf464f6e50d2b8567ab6ad9549da 100644
--- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/Makefile
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/Makefile
@@ -5,4 +5,3 @@ SRCDIR_OBJS=main.o file.o
 APP_CUDALDFLAGS=-lm
 APP_CFLAGS=-ffast-math -g3 -O3
 APP_CXXFLAGS=-ffast-math -g3 -O3
-KERNEL_OBJS=kernel_offline.nvptx.s