Skip to content
Snippets Groups Projects
Commit dbd5168a authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

(1) Modified stencil to use pre-compiled ptx binary

parent e96af743
No related branches found
No related tags found
No related merge requests found
/***************************************************************************
*cr
*cr (C) Copyright 2010 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include "common.h"
__kernel void naive_kernel(float c0,float c1,__global float* A0,__global float *Anext,int nx,int ny,int nz)
{
int i = get_global_id(0)+1;
int j = get_global_id(1)+1;
int k = get_global_id(2)+1;
if(i<nx-1)
{
Anext[Index3D (nx, ny, i, j, k)] = c1 *
( A0[Index3D (nx, ny, i, j, k + 1)] +
A0[Index3D (nx, ny, i, j, k - 1)] +
A0[Index3D (nx, ny, i, j + 1, k)] +
A0[Index3D (nx, ny, i, j - 1, k)] +
A0[Index3D (nx, ny, i + 1, j, k)] +
A0[Index3D (nx, ny, i - 1, j, k)] )
- A0[Index3D (nx, ny, i, j, k)] * c0;
}
}
//
// Generated by LLVM NVPTX Back-End
//
.version 3.1
.target sm_20, texmode_independent
.address_size 32
// .globl naive_kernel
.entry naive_kernel(
.param .f32 naive_kernel_param_0,
.param .f32 naive_kernel_param_1,
.param .u32 .ptr .global .align 4 naive_kernel_param_2,
.param .u32 .ptr .global .align 4 naive_kernel_param_3,
.param .u32 naive_kernel_param_4,
.param .u32 naive_kernel_param_5,
.param .u32 naive_kernel_param_6
)
{
.reg .pred %p<2>;
.reg .f32 %f<18>;
.reg .s32 %r<46>;
mov.u32 %r10, %ctaid.x;
mov.u32 %r11, %ntid.x;
mov.u32 %r12, %tid.x;
mad.lo.s32 %r1, %r11, %r10, %r12;
ld.param.u32 %r8, [naive_kernel_param_4];
add.s32 %r2, %r1, 1;
add.s32 %r19, %r8, -1;
setp.ge.s32 %p1, %r2, %r19;
@%p1 bra BB0_2;
ld.param.f32 %f1, [naive_kernel_param_0];
ld.param.f32 %f2, [naive_kernel_param_1];
ld.param.u32 %r6, [naive_kernel_param_2];
ld.param.u32 %r7, [naive_kernel_param_3];
ld.param.u32 %r9, [naive_kernel_param_5];
mov.u32 %r13, %ctaid.y;
mov.u32 %r14, %ntid.y;
mov.u32 %r15, %tid.y;
mad.lo.s32 %r3, %r14, %r13, %r15;
add.s32 %r4, %r3, 1;
mov.u32 %r16, %ctaid.z;
mov.u32 %r17, %ntid.z;
mov.u32 %r18, %tid.z;
mad.lo.s32 %r5, %r17, %r16, %r18;
add.s32 %r20, %r5, 1;
add.s32 %r21, %r5, 2;
mad.lo.s32 %r22, %r21, %r9, %r4;
mad.lo.s32 %r23, %r22, %r8, %r2;
shl.b32 %r24, %r23, 2;
add.s32 %r25, %r6, %r24;
ld.global.f32 %f3, [%r25];
mad.lo.s32 %r26, %r5, %r9, %r4;
mad.lo.s32 %r27, %r26, %r8, %r2;
shl.b32 %r28, %r27, 2;
add.s32 %r29, %r6, %r28;
ld.global.f32 %f4, [%r29];
add.f32 %f5, %f3, %f4;
mad.lo.s32 %r30, %r20, %r9, %r3;
add.s32 %r31, %r30, 2;
mad.lo.s32 %r32, %r31, %r8, %r2;
shl.b32 %r33, %r32, 2;
add.s32 %r34, %r6, %r33;
ld.global.f32 %f6, [%r34];
add.f32 %f7, %f5, %f6;
mad.lo.s32 %r35, %r30, %r8, %r2;
shl.b32 %r36, %r35, 2;
add.s32 %r37, %r6, %r36;
ld.global.f32 %f8, [%r37];
add.f32 %f9, %f7, %f8;
mad.lo.s32 %r38, %r20, %r9, %r4;
mad.lo.s32 %r39, %r38, %r8, %r1;
shl.b32 %r40, %r39, 2;
add.s32 %r41, %r6, %r40;
ld.global.f32 %f10, [%r41+8];
add.f32 %f11, %f9, %f10;
ld.global.f32 %f12, [%r41];
add.f32 %f13, %f11, %f12;
mad.lo.s32 %r42, %r38, %r8, %r2;
shl.b32 %r43, %r42, 2;
add.s32 %r44, %r6, %r43;
ld.global.f32 %f14, [%r44];
mul.f32 %f15, %f2, %f13;
neg.f32 %f16, %f14;
fma.rn.f32 %f17, %f16, %f1, %f15;
add.s32 %r45, %r7, %r43;
st.global.f32 [%r45], %f17;
BB0_2:
ret;
}
......@@ -103,17 +103,21 @@ int main(int argc, char** argv) {
pb_SetOpenCL(&clContext, &clCommandQueue);
const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
CHECK_ERROR("clCreateProgramWithSource")
cl_program clProgram;
cl_kernel clKernel;
char clOptions[50];
sprintf(clOptions,"-I src/opencl_base");
clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL);
CHECK_ERROR("clBuildProgram")
pb_CreateAndBuildKernelFromBinary("src/opencl_base/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel);
//const char* clSource[] = {readFile("src/opencl_base/kernel.cl")};
//cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus);
//CHECK_ERROR("clCreateProgramWithSource")
cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus);
CHECK_ERROR("clCreateKernel")
//char clOptions[50];
//sprintf(clOptions,"-I src/opencl_base");
//clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL);
//CHECK_ERROR("clBuildProgram")
//cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus);
//CHECK_ERROR("clCreateKernel")
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
//host data
......@@ -222,7 +226,7 @@ int main(int argc, char** argv) {
}
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
free((void*)clSource[0]);
//free((void*)clSource[0]);
free(h_A0);
free(h_Anext);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment