diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.cl b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.cl new file mode 100644 index 0000000000000000000000000000000000000000..4c5d1263db5948e4e61ea2baa27261613cd0ea06 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.cl @@ -0,0 +1,28 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include "common.h" + +__kernel void naive_kernel(float c0,float c1,__global float* A0,__global float *Anext,int nx,int ny,int nz) +{ + int i = get_global_id(0)+1; + int j = get_global_id(1)+1; + int k = get_global_id(2)+1; + +if(i<nx-1) +{ + Anext[Index3D (nx, ny, i, j, k)] = c1 * + ( A0[Index3D (nx, ny, i, j, k + 1)] + + A0[Index3D (nx, ny, i, j, k - 1)] + + A0[Index3D (nx, ny, i, j + 1, k)] + + A0[Index3D (nx, ny, i, j - 1, k)] + + A0[Index3D (nx, ny, i + 1, j, k)] + + A0[Index3D (nx, ny, i - 1, j, k)] ) + - A0[Index3D (nx, ny, i, j, k)] * c0; +} +} diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.nvptx.s b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.nvptx.s new file mode 100644 index 0000000000000000000000000000000000000000..b925d6459752a5b890ed71e90e466ceafb876494 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/kernel_offline.nvptx.s @@ -0,0 +1,93 @@ +// +// Generated by LLVM NVPTX Back-End +// + +.version 3.1 +.target sm_20, texmode_independent +.address_size 32 + + // .globl naive_kernel + +.entry naive_kernel( + .param .f32 naive_kernel_param_0, + .param .f32 naive_kernel_param_1, + .param .u32 .ptr .global .align 4 naive_kernel_param_2, + .param .u32 .ptr .global .align 4 naive_kernel_param_3, + .param .u32 naive_kernel_param_4, + .param .u32 naive_kernel_param_5, + .param .u32 naive_kernel_param_6 +) +{ + .reg .pred %p<2>; + .reg .f32 %f<18>; + .reg .s32 %r<46>; + + mov.u32 %r10, %ctaid.x; + mov.u32 %r11, %ntid.x; + mov.u32 %r12, %tid.x; + mad.lo.s32 %r1, %r11, %r10, %r12; + ld.param.u32 %r8, [naive_kernel_param_4]; + add.s32 %r2, %r1, 1; + add.s32 %r19, %r8, -1; + setp.ge.s32 %p1, %r2, %r19; + @%p1 bra BB0_2; + ld.param.f32 %f1, [naive_kernel_param_0]; + ld.param.f32 %f2, [naive_kernel_param_1]; + ld.param.u32 %r6, [naive_kernel_param_2]; + ld.param.u32 %r7, [naive_kernel_param_3]; + ld.param.u32 %r9, [naive_kernel_param_5]; + mov.u32 %r13, %ctaid.y; + mov.u32 %r14, %ntid.y; + mov.u32 %r15, %tid.y; + mad.lo.s32 %r3, %r14, %r13, %r15; + add.s32 %r4, %r3, 1; + mov.u32 %r16, %ctaid.z; + mov.u32 %r17, %ntid.z; + mov.u32 %r18, %tid.z; + mad.lo.s32 %r5, %r17, %r16, %r18; + add.s32 %r20, %r5, 1; + add.s32 %r21, %r5, 2; + mad.lo.s32 %r22, %r21, %r9, %r4; + mad.lo.s32 %r23, %r22, %r8, %r2; + shl.b32 %r24, %r23, 2; + add.s32 %r25, %r6, %r24; + ld.global.f32 %f3, [%r25]; + mad.lo.s32 %r26, %r5, %r9, %r4; + mad.lo.s32 %r27, %r26, %r8, %r2; + shl.b32 %r28, %r27, 2; + add.s32 %r29, %r6, %r28; + ld.global.f32 %f4, [%r29]; + add.f32 %f5, %f3, %f4; + mad.lo.s32 %r30, %r20, %r9, %r3; + add.s32 %r31, %r30, 2; + mad.lo.s32 %r32, %r31, %r8, %r2; + shl.b32 %r33, %r32, 2; + add.s32 %r34, %r6, %r33; + ld.global.f32 %f6, [%r34]; + add.f32 %f7, %f5, %f6; + mad.lo.s32 %r35, %r30, %r8, %r2; + shl.b32 %r36, %r35, 2; + add.s32 %r37, %r6, %r36; + ld.global.f32 %f8, [%r37]; + add.f32 %f9, %f7, %f8; + mad.lo.s32 %r38, %r20, %r9, %r4; + mad.lo.s32 %r39, %r38, %r8, %r1; + shl.b32 %r40, %r39, 2; + add.s32 %r41, %r6, %r40; + ld.global.f32 %f10, [%r41+8]; + add.f32 %f11, %f9, %f10; + ld.global.f32 %f12, [%r41]; + add.f32 %f13, %f11, %f12; + mad.lo.s32 %r42, %r38, %r8, %r2; + shl.b32 %r43, %r42, 2; + add.s32 %r44, %r6, %r43; + ld.global.f32 %f14, [%r44]; + mul.f32 %f15, %f2, %f13; + neg.f32 %f16, %f14; + fma.rn.f32 %f17, %f16, %f1, %f15; + add.s32 %r45, %r7, %r43; + st.global.f32 [%r45], %f17; +BB0_2: + ret; +} + diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c index 542b3283629314f27b70ecfed13ca74aa641cf79..427fab549b4b8d34ac24fb44ce63d9418bd6d1ee 100644 --- a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_base/main.c @@ -103,17 +103,21 @@ int main(int argc, char** argv) { pb_SetOpenCL(&clContext, &clCommandQueue); - const char* clSource[] = {readFile("src/opencl_base/kernel.cl")}; - cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); - CHECK_ERROR("clCreateProgramWithSource") + cl_program clProgram; + cl_kernel clKernel; - char clOptions[50]; - sprintf(clOptions,"-I src/opencl_base"); - clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL); - CHECK_ERROR("clBuildProgram") + pb_CreateAndBuildKernelFromBinary("src/opencl_base/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel); + //const char* clSource[] = {readFile("src/opencl_base/kernel.cl")}; + //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); + //CHECK_ERROR("clCreateProgramWithSource") - cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus); - CHECK_ERROR("clCreateKernel") + //char clOptions[50]; + //sprintf(clOptions,"-I src/opencl_base"); + //clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL); + //CHECK_ERROR("clBuildProgram") + + //cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus); + //CHECK_ERROR("clCreateKernel") pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); //host data @@ -222,7 +226,7 @@ int main(int argc, char** argv) { } pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); - free((void*)clSource[0]); + //free((void*)clSource[0]); free(h_A0); free(h_Anext);