diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/Makefile b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..d5f6d9708d0589222d80f654586af06ef491a12c --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/Makefile @@ -0,0 +1,8 @@ +# (c) 2010 The Board of Trustees of the University of Illinois. + +LANGUAGE=opencl +SRCDIR_OBJS=main.o file.o +APP_CUDALDFLAGS=-lm +APP_CFLAGS=-ffast-math -g3 -O3 +APP_CXXFLAGS=-ffast-math -g3 -O3 +KERNEL_OBJS=kernel_offline.nvptx.s diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/common.h b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/common.h new file mode 100644 index 0000000000000000000000000000000000000000..33bb06d5bd7e02e009565688882ed4e0ef2d52d4 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/common.h @@ -0,0 +1,12 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifndef _COMMON_H_ +#define _COMMON_H_ +#define Index3D(_nx,_ny,_i,_j,_k) ((_i)+_nx*((_j)+_ny*(_k))) +#endif diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/file.cc b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/file.cc new file mode 100644 index 0000000000000000000000000000000000000000..4c57469f7a4b1886f14be77a373750e1a7635cbe --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/file.cc @@ -0,0 +1,87 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include <endian.h> +#include <stdlib.h> +#include <malloc.h> +#include <stdio.h> +#include <inttypes.h> + +#if __BYTE_ORDER != __LITTLE_ENDIAN +# error "File I/O is not implemented for this system: wrong endianness." +#endif + +extern "C" +void inputData(char* fName, int* nx, int* ny, int* nz) +{ + FILE* fid = fopen(fName, "r"); + + if (fid == NULL) + { + fprintf(stderr, "Cannot open input file\n"); + exit(-1); + } + + fread(nx, sizeof(int ),1,fid); + fread(ny, sizeof(int ),1,fid); + fread(nz, sizeof(int ),1,fid); + fclose (fid); +} + +extern "C" +void outputData(char* fName, float *h_A0,int nx,int ny,int nz) +{ + FILE* fid = fopen(fName, "w"); + uint32_t tmp32; + if (fid == NULL) + { + fprintf(stderr, "Cannot open output file\n"); + exit(-1); + } + tmp32 = nx*ny*nz; + fwrite(&tmp32, sizeof(uint32_t), 1, fid); + fwrite(h_A0, sizeof(float), tmp32, fid); + + fclose (fid); +} + +extern "C" +char* readFile(const char* fileName) + { + FILE* fp; + fp = fopen(fileName,"r"); + if(fp == NULL) + { + printf("Error 1!\n"); + exit(1); + } + + fseek(fp,0,SEEK_END); + long size = ftell(fp); + rewind(fp); + + char* buffer = (char*)malloc(sizeof(char)*(size+1)); + if(buffer == NULL) + { + printf("Error 2!\n"); + fclose(fp); + exit(1); + } + + size_t res = fread(buffer,1,size,fp); + if(res != size) + { + printf("Error 3!\n"); + fclose(fp); + exit(1); + } + + buffer[size] = 0; + fclose(fp); + return buffer; +} diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/file.h b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/file.h new file mode 100644 index 0000000000000000000000000000000000000000..40c69734802ba06297418a3895d6eebd7af7b29b --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/file.h @@ -0,0 +1,23 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ +#ifndef __FILEH__ +#define __FILEH__ + +#ifdef __cplusplus +extern "C" { +#endif + +void inputData(char* fName, int* nx, int* ny, int* nz); +void outputData(char* fName, float *h_A0,int nx,int ny,int nz); +char* readFile(const char* fileName); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/kernel.cl b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/kernel.cl new file mode 100644 index 0000000000000000000000000000000000000000..4c5d1263db5948e4e61ea2baa27261613cd0ea06 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/kernel.cl @@ -0,0 +1,28 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include "common.h" + +__kernel void naive_kernel(float c0,float c1,__global float* A0,__global float *Anext,int nx,int ny,int nz) +{ + int i = get_global_id(0)+1; + int j = get_global_id(1)+1; + int k = get_global_id(2)+1; + +if(i<nx-1) +{ + Anext[Index3D (nx, ny, i, j, k)] = c1 * + ( A0[Index3D (nx, ny, i, j, k + 1)] + + A0[Index3D (nx, ny, i, j, k - 1)] + + A0[Index3D (nx, ny, i, j + 1, k)] + + A0[Index3D (nx, ny, i, j - 1, k)] + + A0[Index3D (nx, ny, i + 1, j, k)] + + A0[Index3D (nx, ny, i - 1, j, k)] ) + - A0[Index3D (nx, ny, i, j, k)] * c0; +} +} diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/kernel_offline.cl b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/kernel_offline.cl new file mode 100644 index 0000000000000000000000000000000000000000..4c5d1263db5948e4e61ea2baa27261613cd0ea06 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/kernel_offline.cl @@ -0,0 +1,28 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include "common.h" + +__kernel void naive_kernel(float c0,float c1,__global float* A0,__global float *Anext,int nx,int ny,int nz) +{ + int i = get_global_id(0)+1; + int j = get_global_id(1)+1; + int k = get_global_id(2)+1; + +if(i<nx-1) +{ + Anext[Index3D (nx, ny, i, j, k)] = c1 * + ( A0[Index3D (nx, ny, i, j, k + 1)] + + A0[Index3D (nx, ny, i, j, k - 1)] + + A0[Index3D (nx, ny, i, j + 1, k)] + + A0[Index3D (nx, ny, i, j - 1, k)] + + A0[Index3D (nx, ny, i + 1, j, k)] + + A0[Index3D (nx, ny, i - 1, j, k)] ) + - A0[Index3D (nx, ny, i, j, k)] * c0; +} +} diff --git a/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/main.c b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/main.c new file mode 100644 index 0000000000000000000000000000000000000000..60c75d6b4c26c652550cbdf4f421a190cc264809 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/stencil/src/opencl_cpu/main.c @@ -0,0 +1,246 @@ + +/*************************************************************************** + *cr + *cr (C) Copyright 2010 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include <CL/cl.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <parboil.h> + +#include "file.h" +#include "common.h" + +#define CHECK_ERROR(errorMessage) \ + if(clStatus != CL_SUCCESS) \ + { \ + printf("Error: %s!\n",errorMessage); \ + printf("Line: %d\n",__LINE__); \ + exit(1); \ + } + +static int read_data(float *A0, int nx,int ny,int nz,FILE *fp) +{ + int s=0; + int i,j,k; + for(i=0; i<nz; i++) + { + for(j=0; j<ny; j++) + { + for(k=0; k<nx; k++) + { + fread(A0+s,sizeof(float),1,fp); + s++; + } + } + } + return 0; +} + +int main(int argc, char** argv) { + struct pb_TimerSet timers; + struct pb_Parameters *parameters; + + printf("OpenCL accelerated 7 points stencil codes****\n"); + printf("Author: Li-Wen Chang <lchang20@illinois.edu>\n"); + parameters = pb_ReadParameters(&argc, argv); + + + //declaration + unsigned nx,ny,nz; + unsigned size; + int iteration; + float c0=1.0f/6.0f; + float c1=1.0f/6.0f/6.0f; + + if (argc<5) + { + printf("Usage: probe nx ny nz t\n" + "nx: the grid size x\n" + "ny: the grid size y\n" + "nz: the grid size z\n" + "t: the iteration time\n"); + return -1; + } + + nx = atoi(argv[1]); + if (nx<1) + return -1; + ny = atoi(argv[2]); + if (ny<1) + return -1; + nz = atoi(argv[3]); + if (nz<1) + return -1; + iteration = atoi(argv[4]); + if(iteration<1) + return -1; + + //host data + float *h_A0; + float *h_Anext; + //load data from files + + size=nx*ny*nz; + + h_A0=(float*)malloc(sizeof(float)*size); + h_Anext=(float*)malloc(sizeof(float)*size); + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + FILE *fp = fopen(parameters->inpFiles[0], "rb"); + read_data(h_A0, nx,ny,nz,fp); + fclose(fp); + + pb_InitializeTimerSet(&timers); + pb_SwitchToTimer(&timers, visc_TimerID_SETUP); + cl_int clStatus; + + cl_uint numPlatforms; + clStatus = clGetPlatformIDs(0, NULL, &numPlatforms); + CHECK_ERROR("clGetPlaformIDs") + + cl_platform_id clPlatform[numPlatforms]; + clStatus = clGetPlatformIDs(2,clPlatform,NULL); + CHECK_ERROR("clGetPlaformIDs") + + cl_context_properties clCps[3] = {CL_CONTEXT_PLATFORM,(cl_context_properties)clPlatform[1],0}; + + cl_device_id clDevice; + clStatus = clGetDeviceIDs(clPlatform[1],CL_DEVICE_TYPE_CPU,1,&clDevice,NULL); + CHECK_ERROR("clGetDeviceIDs") + + cl_context clContext = clCreateContextFromType(clCps,CL_DEVICE_TYPE_CPU,NULL,NULL,&clStatus); + CHECK_ERROR("clCreateContextFromType") + + cl_command_queue clCommandQueue = clCreateCommandQueue(clContext,clDevice,CL_QUEUE_PROFILING_ENABLE,&clStatus); + CHECK_ERROR("clCreateCommandQueue") + + pb_SetOpenCL(&clContext, &clCommandQueue); + + /*cl_program clProgram;*/ + /*cl_kernel clKernel;*/ + + /*pb_CreateAndBuildKernelFromBinary("build/opencl_base_default/kernel_offline.nvptx.s", "naive_kernel", &clContext, &clDevice, &clProgram, &clKernel);*/ + const char* clSource[] = {readFile("src/opencl_base/kernel_offline.cl")}; + cl_program clProgram = clCreateProgramWithSource(clContext,1,clSource,NULL,&clStatus); + CHECK_ERROR("clCreateProgramWithSource") + + char clOptions[50]; + sprintf(clOptions,"-I src/opencl_base"); + clStatus = clBuildProgram(clProgram,1,&clDevice,clOptions,NULL,NULL); + CHECK_ERROR("clBuildProgram") + + cl_kernel clKernel = clCreateKernel(clProgram,"naive_kernel",&clStatus); + CHECK_ERROR("clCreateKernel") + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + //device + cl_mem d_A0; + cl_mem d_Anext; + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + memcpy (h_Anext,h_A0,sizeof(float)*size); + + pb_SwitchToTimer(&timers, visc_TimerID_SETUP); + + //memory allocation + d_A0 = clCreateBuffer(clContext,CL_MEM_READ_WRITE,size*sizeof(float),NULL,&clStatus); + CHECK_ERROR("clCreateBuffer") + d_Anext = clCreateBuffer(clContext,CL_MEM_READ_WRITE,size*sizeof(float),NULL,&clStatus); + CHECK_ERROR("clCreateBuffer") + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + //memory copy + clStatus = clEnqueueWriteBuffer(clCommandQueue,d_A0,CL_FALSE,0,size*sizeof(float),h_A0,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + clStatus = clEnqueueWriteBuffer(clCommandQueue,d_Anext,CL_TRUE,0,size*sizeof(float),h_Anext,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + //only use 1D thread block + unsigned tx =256; + size_t block[3] = {tx,1,1}; + size_t grid[3] = {(nx-2+tx-1)/tx*tx,ny-2,nz-2}; + //size_t grid[3] = {nx-2,ny-2,nz-2}; + size_t offset[3] = {1,1,1}; +// printf("block x is %d and y is %d z \n",block[0],block[1]); +// printf("grid x is %d and y is %d\n",grid[0],grid[1]); + + pb_SwitchToTimer(&timers, visc_TimerID_SETUP); + clStatus = clSetKernelArg(clKernel,0,sizeof(float),(void*)&c0); + clStatus = clSetKernelArg(clKernel,1,sizeof(float),(void*)&c1); + clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&d_A0); + clStatus = clSetKernelArg(clKernel,3,sizeof(cl_mem),(void*)&d_Anext); + clStatus = clSetKernelArg(clKernel,4,sizeof(int),(void*)&nx); + clStatus = clSetKernelArg(clKernel,5,sizeof(int),(void*)&ny); + clStatus = clSetKernelArg(clKernel,6,sizeof(int),(void*)&nz); + CHECK_ERROR("clSetKernelArg") + + //main execution + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + int t; + for(t=0; t<iteration; t++) + { + pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); + clStatus = clEnqueueNDRangeKernel(clCommandQueue,clKernel,3,NULL,grid,block,0,NULL,NULL); + //printf("iteration %d\n",t) + CHECK_ERROR("clEnqueueNDRangeKernel") + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + cl_mem d_temp = d_A0; + d_A0 = d_Anext; + d_Anext = d_temp; + + pb_SwitchToTimer(&timers, visc_TimerID_SETUP); + clStatus = clSetKernelArg(clKernel,2,sizeof(cl_mem),(void*)&d_A0); + clStatus = clSetKernelArg(clKernel,3,sizeof(cl_mem),(void*)&d_Anext); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + } + + + cl_mem d_temp = d_A0; + d_A0 = d_Anext; + d_Anext = d_temp; + + clStatus = clFinish(clCommandQueue); + CHECK_ERROR("clFinish") + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + clStatus = clEnqueueReadBuffer(clCommandQueue,d_Anext,CL_TRUE,0,size*sizeof(float),h_Anext,0,NULL,NULL); + CHECK_ERROR("clEnqueueReadBuffer") + + pb_SwitchToTimer(&timers, visc_TimerID_SETUP); + clStatus = clReleaseMemObject(d_A0); + clStatus = clReleaseMemObject(d_Anext); + clStatus = clReleaseKernel(clKernel); + clStatus = clReleaseProgram(clProgram); + clStatus = clReleaseCommandQueue(clCommandQueue); + clStatus = clReleaseContext(clContext); + CHECK_ERROR("clReleaseContext") + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + + pb_PrintTimerSet(&timers); + + if (parameters->outFile) { + /*pb_SwitchToTimer(&timers, pb_TimerID_IO);*/ + outputData(parameters->outFile,h_Anext,nx,ny,nz); + + } + /*pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);*/ + + //free((void*)clSource[0]); + + free(h_A0); + free(h_Anext); + pb_FreeParameters(parameters); + + return 0; +}