diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel_offline.cl b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel_offline.cl new file mode 100644 index 0000000000000000000000000000000000000000..f376a27d90003e3c7c18dafb9f64a8b459a40029 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel_offline.cl @@ -0,0 +1,25 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * Kernel of dense matrix-matrix multiplication kernel. + */ + +__kernel void mysgemmNT( __global const float *A, int lda, __global const float *B, int ldb, __global float* C, int ldc, int k, float alpha, float beta ) +{ + float c = 0.0f; + int m = get_global_id(0); + int n = get_global_id(1); + + for (int i = 0; i < k; ++i) { + float a = A[m + i * lda]; + float b = B[n + i * ldb]; + c += a * b; + } + C[m+n*ldc] = C[m+n*ldc] * beta + alpha * c; +} diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel_offline.nvptx.s b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel_offline.nvptx.s new file mode 100644 index 0000000000000000000000000000000000000000..13cfd2d24547432388d9d3de6ef92eca164fc822 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/kernel_offline.nvptx.s @@ -0,0 +1,79 @@ +// +// Generated by LLVM NVPTX Back-End +// + +.version 3.1 +.target sm_20, texmode_independent +.address_size 32 + + // .globl mysgemmNT + +.entry mysgemmNT( + .param .u32 .ptr .global .align 4 mysgemmNT_param_0, + .param .u32 mysgemmNT_param_1, + .param .u32 .ptr .global .align 4 mysgemmNT_param_2, + .param .u32 mysgemmNT_param_3, + .param .u32 .ptr .global .align 4 mysgemmNT_param_4, + .param .u32 mysgemmNT_param_5, + .param .u32 mysgemmNT_param_6, + .param .f32 mysgemmNT_param_7, + .param .f32 mysgemmNT_param_8 +) +{ + .reg .pred %p<3>; + .reg .f32 %f<14>; + .reg .s32 %r<36>; + + mov.u32 %r1, %ctaid.x; + mov.u32 %r2, %ntid.x; + mov.u32 %r3, %tid.x; + mad.lo.s32 %r4, %r2, %r1, %r3; + ld.param.u32 %r23, [mysgemmNT_param_4]; + mov.u32 %r5, %ctaid.y; + ld.param.u32 %r24, [mysgemmNT_param_5]; + mov.u32 %r6, %ntid.y; + ld.param.u32 %r25, [mysgemmNT_param_6]; + ld.param.f32 %f4, [mysgemmNT_param_7]; + mov.u32 %r7, %tid.y; + ld.param.f32 %f5, [mysgemmNT_param_8]; + mad.lo.s32 %r8, %r6, %r5, %r7; + setp.lt.s32 %p1, %r25, 1; + @%p1 bra BB0_1; + ld.param.u32 %r19, [mysgemmNT_param_0]; + ld.param.u32 %r20, [mysgemmNT_param_1]; + ld.param.u32 %r21, [mysgemmNT_param_2]; + ld.param.u32 %r22, [mysgemmNT_param_3]; + shl.b32 %r27, %r8, 2; + add.s32 %r9, %r21, %r27; + shl.b32 %r10, %r22, 2; + shl.b32 %r29, %r4, 2; + add.s32 %r11, %r19, %r29; + shl.b32 %r12, %r20, 2; + mov.f32 %f7, 0f00000000; +BB0_3: + ld.global.f32 %f8, [%r11]; + ld.global.f32 %f9, [%r9]; + fma.rn.f32 %f2, %f8, %f9, %f7; + add.s32 %r16, %r25, -1; + add.s32 %r17, %r9, %r10; + add.s32 %r18, %r11, %r12; + setp.ne.s32 %p2, %r16, 0; + mov.u32 %r11, %r18; + mov.u32 %r9, %r17; + mov.u32 %r25, %r16; + mov.f32 %f7, %f2; + @%p2 bra BB0_3; + bra.uni BB0_4; +BB0_1: + mov.f32 %f7, 0f00000000; +BB0_4: + mad.lo.s32 %r30, %r8, %r24, %r4; + shl.b32 %r31, %r30, 2; + add.s32 %r32, %r23, %r31; + ld.global.f32 %f10, [%r32]; + mul.f32 %f11, %f7, %f4; + fma.rn.f32 %f12, %f10, %f5, %f11; + st.global.f32 [%r32], %f12; + ret; +} + diff --git a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc index 4d199149e29953b834253881faa60202fa29a7fb..020b1256ee09c63a2d7f16d230cb9a5120cca270 100644 --- a/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc +++ b/llvm/test/VISC/parboil/benchmarks/sgemm/src/opencl_base/main.cc @@ -32,7 +32,7 @@ extern char* readFile(const char*); #define CHECK_ERROR(errorMessage) \ if(clStatus != CL_SUCCESS) \ { \ - std::cout<<errorMessage<<" Error!\n"; \ + std::cout<< errorMessage <<": "<< clStatus <<" Error!\n"; \ std::cout<<"Line: "<<__LINE__<<"\n"; \ exit(1); \ } @@ -121,18 +121,23 @@ int main (int argc, char *argv[]) { pb_SetOpenCL(&clContext, &clCommandQueue); - const char* clSource[] = {readFile("src/opencl_base/kernel.cl")}; - cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); - CHECK_ERROR("clCreateProgramWithSource") + // const char* clSource[] = {readFile("src/opencl_base/kernel.cl")}; + const char* clSource[] = {readFile("src/opencl_base/kernel_offline.nvptx.s")}; + // cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); + cl_kernel clKernel; + cl_program clProgram; + pb_CreateAndBuildKernelFromBinary("src/opencl_base/kernel_offline.nvptx.s", "mysgemmNT", &clContext, &clDevice, &clProgram, &clKernel); + //cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); + //CHECK_ERROR("clCreateProgramWithSource") - char clOptions[50]; - sprintf(clOptions,""); + //char clOptions[50]; + //sprintf(clOptions,""); - clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL); - CHECK_ERROR("clBuildProgram") + //clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL); + //CHECK_ERROR("clBuildProgram") - cl_kernel clKernel = clCreateKernel(clProgram,"mysgemmNT",&clStatus); - CHECK_ERROR("clCreateKernel") + //cl_kernel clKernel = clCreateKernel(clProgram,"mysgemmNT",&clStatus); + //CHECK_ERROR("clCreateKernel") /* Read in data */ pb_SwitchToTimer(&timers, pb_TimerID_IO); diff --git a/llvm/test/VISC/parboil/common/include/parboil.h b/llvm/test/VISC/parboil/common/include/parboil.h index 691e5759093db115d1f92bc7a8e666ec0e9a08b6..41f78a99c07cea79e013eee86bc746f223517fdc 100644 --- a/llvm/test/VISC/parboil/common/include/parboil.h +++ b/llvm/test/VISC/parboil/common/include/parboil.h @@ -188,6 +188,9 @@ pb_DestroyTimerSet(struct pb_TimerSet * timers); void pb_SetOpenCL(void *clContextPtr, void *clCommandQueuePtr); +void +pb_CreateAndBuildKernelFromBinary(const char* file, const char* kernel, void* clContextPtr, void* clDevicePtr, void* clProgramPtr, void* clKerenlPtr); + #ifdef __cplusplus } #endif diff --git a/llvm/test/VISC/parboil/common/src/parboil_opencl.c b/llvm/test/VISC/parboil/common/src/parboil_opencl.c index c1200a2479a28791356f2e3e0b786b8e2cabbb1d..1b038efc0525b511cfa3a6884e14cbdcbef47e92 100644 --- a/llvm/test/VISC/parboil/common/src/parboil_opencl.c +++ b/llvm/test/VISC/parboil/common/src/parboil_opencl.c @@ -593,6 +593,95 @@ void pb_SetOpenCL(void *p_clContextPtr, void *p_clCommandQueuePtr) { clCommandQueuePtr = ((cl_command_queue *)p_clCommandQueuePtr); } +static char* LoadProgSource(const char* Filename, size_t* szFinalLength) +{ + // locals + FILE* pFileStream = NULL; + size_t szSourceLength; + + // open the OpenCL source code file + pFileStream = fopen(Filename, "rb"); + if(pFileStream == 0) + { + return NULL; + } + + // get the length of the source code + fseek(pFileStream, 0, SEEK_END); + szSourceLength = ftell(pFileStream); + fseek(pFileStream, 0, SEEK_SET); + + // allocate a buffer for the source code string and read it in + char* cSourceString = (char *)malloc(szSourceLength + 1); + if (fread((cSourceString), szSourceLength, 1, pFileStream) != 1) + { + fclose(pFileStream); + free(cSourceString); + return 0; + } + + // close the file and return the total length of the combined (preamble + source) string + fclose(pFileStream); + if(szFinalLength != 0) + { + *szFinalLength = szSourceLength; + } + cSourceString[szSourceLength] = '\0'; + + return cSourceString; +} + +static inline void checkErr(cl_int err, cl_int success, const char * name) { + if (err != success) { + printf("ERROR: %s\n", name); + exit(EXIT_FAILURE); + } +} + +void pb_CreateAndBuildKernelFromBinary(const char* file, const char* kernel, void* clContextPtr, void* clDevicePtr, void* clProgramPtr, void* clKernelPtr) { + size_t kernelLength; + char *programSource = LoadProgSource(file, &kernelLength); + checkErr(programSource != NULL, 1 /*bool true*/, "Failure to load Program Binary"); + + cl_int binaryStatus; + cl_int errcode; + cl_device_id clDevice = *(cl_device_id*) clDevicePtr; + cl_context clContext = *(cl_context*) clContextPtr; + cl_program clProgram = clCreateProgramWithBinary(clContext, 1, &clDevice, + &kernelLength, + (const unsigned char **)&programSource, + &binaryStatus, &errcode); + checkErr(errcode, CL_SUCCESS, "Failure to create program from binary"); + + // printf("Building kernel - %s, from file %s\n", kernel, file); + errcode = clBuildProgram(clProgram, 0, NULL, NULL, NULL, NULL); + // If build fails, get build log from device + if(errcode != CL_SUCCESS) { + printf("ERROR: Failure to build program\n"); + size_t len = 0; + errcode = clGetProgramBuildInfo(clProgram, clDevice , CL_PROGRAM_BUILD_LOG, 0, + NULL, &len); + printf("LOG LENGTH: %lu\n", len); + checkErr(errcode, CL_SUCCESS, "Failure to collect program build log length"); + char *log = (char*) malloc(len*sizeof(char)); + errcode = clGetProgramBuildInfo(clProgram, clDevice, CL_PROGRAM_BUILD_LOG, len, + log, NULL); + checkErr(errcode, CL_SUCCESS, "Failure to collect program build log"); + + printf("Device Build Log: %s\n", log); + free(log); + exit(EXIT_FAILURE); + } + + cl_kernel clKernel = clCreateKernel(clProgram, kernel, &errcode); + checkErr(errcode, CL_SUCCESS, "Failure to create kernel"); + + *(cl_program*) clProgramPtr = clProgram; + *(cl_kernel*)clKernelPtr = clKernel; + + free(programSource); +} + void pb_AddSubTimer(struct pb_TimerSet *timers, char *label, enum pb_TimerID pb_Category) {