From 78714353cf2fab033ec5d3d185c1d9097ed463a8 Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava <psrivas2@illinois.edu> Date: Fri, 21 Nov 2014 09:26:43 +0000 Subject: [PATCH] visc version added. under construction ... --- .../benchmarks/mri-q/src/visc/Makefile | 7 + .../benchmarks/mri-q/src/visc/computeQ.c | 88 ++++++++++ .../benchmarks/mri-q/src/visc/computeQ.h | 14 ++ .../parboil/benchmarks/mri-q/src/visc/file.cc | 78 +++++++++ .../parboil/benchmarks/mri-q/src/visc/file.h | 22 +++ .../benchmarks/mri-q/src/visc/kernels.cl | 68 ++++++++ .../benchmarks/mri-q/src/visc/macros.h | 21 +++ .../parboil/benchmarks/mri-q/src/visc/main.c | 162 ++++++++++++++++++ .../parboil/benchmarks/mri-q/src/visc/ocl.c | 49 ++++++ .../parboil/benchmarks/mri-q/src/visc/ocl.h | 21 +++ 10 files changed, 530 insertions(+) create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/Makefile create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/computeQ.c create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/computeQ.h create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/file.cc create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/file.h create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/kernels.cl create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/macros.h create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/main.c create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/ocl.c create mode 100644 llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/ocl.h diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/Makefile b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/Makefile new file mode 100644 index 0000000000..d896069f4b --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/Makefile @@ -0,0 +1,7 @@ +# (c) 2010 The Board of Trustees of the University of Illinois. + +LANGUAGE=opencl +SRCDIR_OBJS=main.o file.o computeQ.o ocl.o +APP_CUDALDFLAGS=-lm +APP_CFLAGS=-ffast-math -g3 -O3 +APP_CXXFLAGS=-ffast-math -g3 -O3 diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/computeQ.c b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/computeQ.c new file mode 100644 index 0000000000..fe58267d66 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/computeQ.c @@ -0,0 +1,88 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include <stdio.h> +#include <malloc.h> +#include <CL/cl.h> +#include "ocl.h" +#include "macros.h" +#include "computeQ.h" + +#define NC 4 + +void computePhiMag_GPU(int numK, float* phiR, float* phiI, float* phiMag) +{ + int phiMagBlocks = numK / KERNEL_PHI_MAG_THREADS_PER_BLOCK; + if (numK % KERNEL_PHI_MAG_THREADS_PER_BLOCK) + phiMagBlocks++; + + size_t DimPhiMagBlock = KERNEL_PHI_MAG_THREADS_PER_BLOCK; + size_t DimPhiMagGrid = phiMagBlocks*KERNEL_PHI_MAG_THREADS_PER_BLOCK; + + ComputePhiMag_GPU(phiR, phiI, phiMag, numK); +} + +void computeQ_GPU (int numK,int numX, + float* x, float* y, float* z, + struct kValues* kVals, + float* Qr, float* Qi + ) +{ + int QGrids = numK / KERNEL_Q_K_ELEMS_PER_GRID; + if (numK % KERNEL_Q_K_ELEMS_PER_GRID) + QGrids++; + int QBlocks = numX / KERNEL_Q_THREADS_PER_BLOCK; + if (numX % KERNEL_Q_THREADS_PER_BLOCK) + QBlocks++; + + size_t DimQBlock = KERNEL_Q_THREADS_PER_BLOCK/NC; + size_t DimQGrid = QBlocks*KERNEL_Q_THREADS_PER_BLOCK/NC; + + cl_int clStatus; + cl_mem ck; + ck = clCreateBuffer(clPrm->clContext,CL_MEM_READ_WRITE,KERNEL_Q_K_ELEMS_PER_GRID*sizeof(struct kValues),NULL,&clStatus); + + int QGrid; + for (QGrid = 0; QGrid < QGrids; QGrid++) { + // Put the tile of K values into constant mem + int QGridBase = QGrid * KERNEL_Q_K_ELEMS_PER_GRID; + struct kValues* kValsTile = kVals + QGridBase; + int numElems = MIN(KERNEL_Q_K_ELEMS_PER_GRID, numK - QGridBase); + + clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,ck,CL_TRUE,0,numElems*sizeof(struct kValues),kValsTile,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + + ComputeQ_GPU(numK, QGridBase, x, y, z, Qr, Qi, kValsTile); + + clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(int),&numK); + clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(int),&QGridBase); + clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&x_d); + clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(cl_mem),&y_d); + clStatus = clSetKernelArg(clPrm->clKernel,4,sizeof(cl_mem),&z_d); + clStatus = clSetKernelArg(clPrm->clKernel,5,sizeof(cl_mem),&Qr_d); + clStatus = clSetKernelArg(clPrm->clKernel,6,sizeof(cl_mem),&Qi_d); + clStatus = clSetKernelArg(clPrm->clKernel,7,sizeof(cl_mem),&ck); + CHECK_ERROR("clSetKernelArg") + + + + printf ("Grid: %d, Block: %d\n", DimQGrid, DimQBlock); + + clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,NULL); + CHECK_ERROR("clEnqueueNDRangeKernel") + } +} + +void createDataStructsCPU(int numK, int numX, float** phiMag, + float** Qr, float** Qi) +{ + *phiMag = (float* ) memalign(16, numK * sizeof(float)); + *Qr = (float*) memalign(16, numX * sizeof (float)); + *Qi = (float*) memalign(16, numX * sizeof (float)); +} + diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/computeQ.h b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/computeQ.h new file mode 100644 index 0000000000..ea237e0155 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/computeQ.h @@ -0,0 +1,14 @@ +#ifndef __COMPUTEQ__ +#define __COMPUTEQ__ + +void computePhiMag_GPU(int numK, float* phiR, float* phiI, float* phiMag); +void computeQ_GPU (int numK,int numX, + float* x, float* y, float* z, + struct kValues* kVals, + float* Qr, float* Qi + ); + +void createDataStructsCPU(int numK, int numX, float** phiMag, + float** Qr, float** Qi); + +#endif diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/file.cc b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/file.cc new file mode 100644 index 0000000000..cbfdb32937 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/file.cc @@ -0,0 +1,78 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#include <endian.h> +#include <stdlib.h> +#include <malloc.h> +#include <stdio.h> +#include <inttypes.h> + +#include "file.h" + +#if __BYTE_ORDER != __LITTLE_ENDIAN +# error "File I/O is not implemented for this system: wrong endianness." +#endif + +extern "C" +void inputData(char* fName, int* _numK, int* _numX, + float** kx, float** ky, float** kz, + float** x, float** y, float** z, + float** phiR, float** phiI) +{ + int numK, numX; + FILE* fid = fopen(fName, "r"); + + if (fid == NULL) + { + fprintf(stderr, "Cannot open input file\n"); + exit(-1); + } + fread (&numK, sizeof (int), 1, fid); + *_numK = numK; + fread (&numX, sizeof (int), 1, fid); + *_numX = numX; + *kx = (float *) memalign(16, numK * sizeof (float)); + fread (*kx, sizeof (float), numK, fid); + *ky = (float *) memalign(16, numK * sizeof (float)); + fread (*ky, sizeof (float), numK, fid); + *kz = (float *) memalign(16, numK * sizeof (float)); + fread (*kz, sizeof (float), numK, fid); + *x = (float *) memalign(16, numX * sizeof (float)); + fread (*x, sizeof (float), numX, fid); + *y = (float *) memalign(16, numX * sizeof (float)); + fread (*y, sizeof (float), numX, fid); + *z = (float *) memalign(16, numX * sizeof (float)); + fread (*z, sizeof (float), numX, fid); + *phiR = (float *) memalign(16, numK * sizeof (float)); + fread (*phiR, sizeof (float), numK, fid); + *phiI = (float *) memalign(16, numK * sizeof (float)); + fread (*phiI, sizeof (float), numK, fid); + fclose (fid); +} + +extern "C" +void outputData(char* fName, float* outR, float* outI, int numX) +{ + FILE* fid = fopen(fName, "w"); + uint32_t tmp32; + + if (fid == NULL) + { + fprintf(stderr, "Cannot open output file\n"); + exit(-1); + } + + /* Write the data size */ + tmp32 = numX; + fwrite(&tmp32, sizeof(uint32_t), 1, fid); + + /* Write the reconstructed data */ + fwrite (outR, sizeof (float), numX, fid); + fwrite (outI, sizeof (float), numX, fid); + fclose (fid); +} diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/file.h b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/file.h new file mode 100644 index 0000000000..c6a61ef4cd --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/file.h @@ -0,0 +1,22 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +#ifdef __cplusplus +extern "C" { +#endif + +void inputData(char* fName, int* _numK, int* _numX, + float** kx, float** ky, float** kz, + float** x, float** y, float** z, + float** phiR, float** phiI); + +void outputData(char* fName, float* outR, float* outI, int numX); + +#ifdef __cplusplus +} +#endif diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/kernels.cl b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/kernels.cl new file mode 100644 index 0000000000..37455d5843 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/kernels.cl @@ -0,0 +1,68 @@ +#include "macros.h" + +#define NC 4 +#define COARSE_GENERAL +// #define COARSE_SPEC NC + +__kernel void +ComputePhiMag_GPU(__global float* phiR, __global float* phiI, __global float* phiMag, int numK) { + int indexK = get_global_id(0); + if (indexK < numK) { + float real = phiR[indexK]; + float imag = phiI[indexK]; + phiMag[indexK] = real*real + imag*imag; + } +} + +__kernel void +ComputeQ_GPU(int numK, int kGlobalIndex, + __global float* x, __global float* y, __global float* z, + __global float* Qr, __global float* Qi, __global struct kValues* ck) +{ + + float sX[NC]; + float sY[NC]; + float sZ[NC]; + float sQr[NC]; + float sQi[NC]; + + #pragma unroll + for (int tx = 0; tx < NC; tx++) { + int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC * get_local_id(0) + tx; + + sX[tx] = x[xIndex]; + sY[tx] = y[xIndex]; + sZ[tx] = z[xIndex]; + sQr[tx] = Qr[xIndex]; + sQi[tx] = Qi[xIndex]; + } + + // Loop over all elements of K in constant mem to compute a partial value + // for X. + int kIndex = 0; + for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK); + kIndex ++, kGlobalIndex ++) { + float kx = ck[kIndex].Kx; + float ky = ck[kIndex].Ky; + float kz = ck[kIndex].Kz; + float pm = ck[kIndex].PhiMag; + + #pragma unroll + for (int tx = 0; tx < NC; tx++) { + float expArg = PIx2 * + (kx * sX[tx] + + ky * sY[tx] + + kz * sZ[tx]); + sQr[tx] += pm * cos(expArg); + sQi[tx] += pm * sin(expArg); + } + } + + #pragma unroll + for (int tx = 0; tx < NC; tx++) { + int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC * get_local_id(0) + tx; + Qr[xIndex] = sQr[tx]; + Qi[xIndex] = sQi[tx]; + } + +} diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/macros.h b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/macros.h new file mode 100644 index 0000000000..9ba2c5a3be --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/macros.h @@ -0,0 +1,21 @@ +#ifndef __MACROS__ +#define __MACROS__ + +#define PI 3.1415926535897932384626433832795029f +#define PIx2 6.2831853071795864769252867665590058f + +#define MIN(X,Y) ((X) < (Y) ? (X) : (Y)) +#define K_ELEMS_PER_GRID 2048 + +#define KERNEL_PHI_MAG_THREADS_PER_BLOCK 256 /* 512 */ +#define KERNEL_Q_THREADS_PER_BLOCK 256 +#define KERNEL_Q_K_ELEMS_PER_GRID 1024 + +struct kValues { + float Kx; + float Ky; + float Kz; + float PhiMag; +}; + +#endif diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/main.c b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/main.c new file mode 100644 index 0000000000..03e388bb62 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/main.c @@ -0,0 +1,162 @@ +/*************************************************************************** + *cr + *cr (C) Copyright 2007 The Board of Trustees of the + *cr University of Illinois + *cr All Rights Reserved + *cr + ***************************************************************************/ + +/* + * C code for creating the Q data structure for fast convolution-based + * Hessian multiplication for arbitrary k-space trajectories. + * + * Inputs: + * kx - VECTOR of kx values, same length as ky and kz + * ky - VECTOR of ky values, same length as kx and kz + * kz - VECTOR of kz values, same length as kx and ky + * x - VECTOR of x values, same length as y and z + * y - VECTOR of y values, same length as x and z + * z - VECTOR of z values, same length as x and y + * phi - VECTOR of the Fourier transform of the spatial basis + * function, evaluated at [kx, ky, kz]. Same length as kx, ky, and kz. + * + * recommended g++ options: + * -O3 -lm -ffast-math -funroll-all-loops + */ + +#include <stdio.h> +#include <sys/time.h> +#include <parboil.h> +#include <CL/cl.h> + +#include "ocl.h" +#include "file.h" +#include "macros.h" +#include "computeQ.h" + +int +main (int argc, char *argv[]) { + int numX, numK; /* Number of X and K values */ + int original_numK; /* Number of K values in input file */ + float *kx, *ky, *kz; /* K trajectory (3D vectors) */ + float *x, *y, *z; /* X coordinates (3D vectors) */ + float *phiR, *phiI; /* Phi values (complex) */ + float *phiMag; /* Magnitude of Phi */ + float *Qr, *Qi; /* Q signal (complex) */ + + struct kValues* kVals; + + struct pb_Parameters *params; + struct pb_TimerSet timers; + + pb_InitializeTimerSet(&timers); + + /* Read command line */ + params = pb_ReadParameters(&argc, argv); + if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] != NULL)) + { + fprintf(stderr, "Expecting one input filename\n"); + exit(-1); + } + + /* Read in data */ + pb_SwitchToTimer(&timers, pb_TimerID_IO); + inputData(params->inpFiles[0], + &original_numK, &numX, + &kx, &ky, &kz, + &x, &y, &z, + &phiR, &phiI); + + /* Reduce the number of k-space samples if a number is given + * on the command line */ + if (argc < 2) + numK = original_numK; + else + { + int inputK; + char *end; + inputK = strtol(argv[1], &end, 10); + if (end == argv[1]) + { + fprintf(stderr, "Expecting an integer parameter\n"); + exit(-1); + } + + numK = MIN(inputK, original_numK); + } + + printf("%d pixels in output; %d samples in trajectory; using %d samples\n", + numX, original_numK, numK); + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + /* Create CPU data structures */ + createDataStructsCPU(numK, numX, &phiMag, &Qr, &Qi); + + /* GPU section 1 (precompute PhiMag) */ + { + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); + + computePhiMag_GPU(numK, phiR, phiI, phiMag); + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + } + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + kVals = (struct kValues*)calloc(numK, sizeof (struct kValues)); + + int k; + for (k = 0; k < numK; k++) { + kVals[k].Kx = kx[k]; + kVals[k].Ky = ky[k]; + kVals[k].Kz = kz[k]; + kVals[k].PhiMag = phiMag[k]; + } + + free(phiMag); + + /* GPU section 2 */ + { + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + pb_SwitchToTimer(&timers, pb_TimerID_KERNEL); + + computeQ_GPU(numK, numX, x, y, z, kVals, Qr, Qi); + + pb_SwitchToTimer(&timers, pb_TimerID_COPY); + + } + + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + + if (params->outFile) + { + /* Write Q to file */ + pb_SwitchToTimer(&timers, pb_TimerID_IO); + outputData(params->outFile, Qr, Qi, numX); + pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE); + } + + free (kx); + free (ky); + free (kz); + free (x); + free (y); + free (z); + free (phiR); + free (phiI); + free (kVals); + free (Qr); + free (Qi); + + pb_SwitchToTimer(&timers, pb_TimerID_NONE); + pb_PrintTimerSet(&timers); + + pb_FreeParameters(params); + + return 0; +} diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/ocl.c b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/ocl.c new file mode 100644 index 0000000000..2e6b21b908 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/ocl.c @@ -0,0 +1,49 @@ +#include <CL/cl.h> +#include <stdio.h> +#include <string.h> +#include "ocl.h" + +char* readFile(const char* fileName) +{ + FILE* fp; + fp = fopen(fileName,"r"); + if(fp == NULL) + { + printf("Error 1!\n"); + exit(1); + } + + fseek(fp,0,SEEK_END); + long size = ftell(fp); + rewind(fp); + + char* buffer = (char*)malloc(sizeof(char)*(size+1)); + if(buffer == NULL) + { + printf("Error 2!\n"); + fclose(fp); + exit(1); + } + + size_t res = fread(buffer,1,size,fp); + if(res != size) + { + printf("Error 3!\n"); + fclose(fp); + exit(1); + } + + buffer[size] = 0; + fclose(fp); + return buffer; +} + +void clMemSet(clPrmtr* clPrm, cl_mem buf, int val, size_t size) +{ + cl_int clStatus; + char* temp = (char*)malloc(size); + memset(temp,val,size); + clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL); + CHECK_ERROR("clEnqueueWriteBuffer") + free(temp); +} diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/ocl.h b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/ocl.h new file mode 100644 index 0000000000..51f152ccc0 --- /dev/null +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/visc/ocl.h @@ -0,0 +1,21 @@ +#ifndef __OCLH__ +#define __OCLH__ + +typedef struct { + cl_context clContext; + cl_command_queue clCommandQueue; + cl_kernel clKernel; +} clPrmtr; + +void clMemSet(clPrmtr*, cl_mem, int, size_t); +char* readFile(const char*); + +#define CHECK_ERROR(errorMessage) \ + if(clStatus != CL_SUCCESS) \ + { \ + printf("Error: %s!\n",errorMessage); \ + printf("Line: %d\n",__LINE__); \ + exit(1); \ + } + +#endif -- GitLab