diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.cl b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.cl
new file mode 100644
index 0000000000000000000000000000000000000000..4c5d1263db5948e4e61ea2baa27261613cd0ea06
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.cl
@@ -0,0 +1,28 @@
+/***************************************************************************
+ *cr
+ *cr            (C) Copyright 2010 The Board of Trustees of the
+ *cr                        University of Illinois
+ *cr                         All Rights Reserved
+ *cr
+ ***************************************************************************/
+
+#include "common.h"
+
+__kernel void naive_kernel(float c0,float c1,__global float* A0,__global float *Anext,int nx,int ny,int nz)
+{
+    	int i = get_global_id(0)+1;
+    	int j = get_global_id(1)+1;
+    	int k = get_global_id(2)+1;
+
+if(i<nx-1)
+{
+		Anext[Index3D (nx, ny, i, j, k)] = c1 *
+		( A0[Index3D (nx, ny, i, j, k + 1)] +
+		  A0[Index3D (nx, ny, i, j, k - 1)] +
+		  A0[Index3D (nx, ny, i, j + 1, k)] +
+		  A0[Index3D (nx, ny, i, j - 1, k)] +
+	 	  A0[Index3D (nx, ny, i + 1, j, k)] +
+		  A0[Index3D (nx, ny, i - 1, j, k)] )
+		- A0[Index3D (nx, ny, i, j, k)] * c0;
+}
+}
diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.nvptx.s b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.nvptx.s
new file mode 100644
index 0000000000000000000000000000000000000000..b925d6459752a5b890ed71e90e466ceafb876494
--- /dev/null
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.nvptx.s
@@ -0,0 +1,93 @@
+//
+// Generated by LLVM NVPTX Back-End
+//
+
+.version 3.1
+.target sm_20, texmode_independent
+.address_size 32
+
+	// .globl	naive_kernel
+
+.entry naive_kernel(
+	.param .f32 naive_kernel_param_0,
+	.param .f32 naive_kernel_param_1,
+	.param .u32 .ptr .global .align 4 naive_kernel_param_2,
+	.param .u32 .ptr .global .align 4 naive_kernel_param_3,
+	.param .u32 naive_kernel_param_4,
+	.param .u32 naive_kernel_param_5,
+	.param .u32 naive_kernel_param_6
+)
+{
+	.reg .pred 	%p<2>;
+	.reg .f32 	%f<18>;
+	.reg .s32 	%r<46>;
+
+	mov.u32	%r10, %ctaid.x;
+	mov.u32	%r11, %ntid.x;
+	mov.u32	%r12, %tid.x;
+	mad.lo.s32 	%r1, %r11, %r10, %r12;
+	ld.param.u32 	%r8, [naive_kernel_param_4];
+	add.s32 	%r2, %r1, 1;
+	add.s32 	%r19, %r8, -1;
+	setp.ge.s32 	%p1, %r2, %r19;
+	@%p1 bra 	BB0_2;
+	ld.param.f32 	%f1, [naive_kernel_param_0];
+	ld.param.f32 	%f2, [naive_kernel_param_1];
+	ld.param.u32 	%r6, [naive_kernel_param_2];
+	ld.param.u32 	%r7, [naive_kernel_param_3];
+	ld.param.u32 	%r9, [naive_kernel_param_5];
+	mov.u32	%r13, %ctaid.y;
+	mov.u32	%r14, %ntid.y;
+	mov.u32	%r15, %tid.y;
+	mad.lo.s32 	%r3, %r14, %r13, %r15;
+	add.s32 	%r4, %r3, 1;
+	mov.u32	%r16, %ctaid.z;
+	mov.u32	%r17, %ntid.z;
+	mov.u32	%r18, %tid.z;
+	mad.lo.s32 	%r5, %r17, %r16, %r18;
+	add.s32 	%r20, %r5, 1;
+	add.s32 	%r21, %r5, 2;
+	mad.lo.s32 	%r22, %r21, %r9, %r4;
+	mad.lo.s32 	%r23, %r22, %r8, %r2;
+	shl.b32 	%r24, %r23, 2;
+	add.s32 	%r25, %r6, %r24;
+	ld.global.f32 	%f3, [%r25];
+	mad.lo.s32 	%r26, %r5, %r9, %r4;
+	mad.lo.s32 	%r27, %r26, %r8, %r2;
+	shl.b32 	%r28, %r27, 2;
+	add.s32 	%r29, %r6, %r28;
+	ld.global.f32 	%f4, [%r29];
+	add.f32 	%f5, %f3, %f4;
+	mad.lo.s32 	%r30, %r20, %r9, %r3;
+	add.s32 	%r31, %r30, 2;
+	mad.lo.s32 	%r32, %r31, %r8, %r2;
+	shl.b32 	%r33, %r32, 2;
+	add.s32 	%r34, %r6, %r33;
+	ld.global.f32 	%f6, [%r34];
+	add.f32 	%f7, %f5, %f6;
+	mad.lo.s32 	%r35, %r30, %r8, %r2;
+	shl.b32 	%r36, %r35, 2;
+	add.s32 	%r37, %r6, %r36;
+	ld.global.f32 	%f8, [%r37];
+	add.f32 	%f9, %f7, %f8;
+	mad.lo.s32 	%r38, %r20, %r9, %r4;
+	mad.lo.s32 	%r39, %r38, %r8, %r1;
+	shl.b32 	%r40, %r39, 2;
+	add.s32 	%r41, %r6, %r40;
+	ld.global.f32 	%f10, [%r41+8];
+	add.f32 	%f11, %f9, %f10;
+	ld.global.f32 	%f12, [%r41];
+	add.f32 	%f13, %f11, %f12;
+	mad.lo.s32 	%r42, %r38, %r8, %r2;
+	shl.b32 	%r43, %r42, 2;
+	add.s32 	%r44, %r6, %r43;
+	ld.global.f32 	%f14, [%r44];
+	mul.f32 	%f15, %f2, %f13;
+	neg.f32 	%f16, %f14;
+	fma.rn.f32 	%f17, %f16, %f1, %f15;
+	add.s32 	%r45, %r7, %r43;
+	st.global.f32 	[%r45], %f17;
+BB0_2:
+	ret;
+}
+
diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
index 542b3283629314f27b70ecfed13ca74aa641cf79..427fab549b4b8d34ac24fb44ce63d9418bd6d1ee 100644
--- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
+++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c
@@ -103,17 +103,21 @@ int main(int argc, char** argv) {
 
     pb_SetOpenCL(&clContext, &clCommandQueue);
 
-    const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
-    cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
-    CHECK_ERROR("clCreateProgramWithSource")
+    cl_program clProgram;
+    cl_kernel clKernel;
 
-    char clOptions[50];
-    sprintf(clOptions,"-I src/opencl_base");
-    clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL);
-    CHECK_ERROR("clBuildProgram")
+    pb_CreateAndBuildKernelFromBinary("src/opencl_base/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel);
+    //const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
+    //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
+    //CHECK_ERROR("clCreateProgramWithSource")
 
-    cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus);
-    CHECK_ERROR("clCreateKernel")
+    //char clOptions[50];
+    //sprintf(clOptions,"-I src/opencl_base");
+    //clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL);
+    //CHECK_ERROR("clBuildProgram")
+
+    //cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus);
+    //CHECK_ERROR("clCreateKernel")
 
     pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
     //host data
@@ -222,7 +226,7 @@ int main(int argc, char** argv) {
     }
     pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
 
-    free((void*)clSource[0]);
+    //free((void*)clSource[0]);
 
     free(h_A0);
     free(h_Anext);