Skip to content
Snippets Groups Projects
Commit 78714353 authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

visc version added. under construction ...

parent 0198cffc
No related branches found
No related tags found
No related merge requests found
Showing with 530 additions and 0 deletions
# (c) 2010 The Board of Trustees of the University of Illinois.
LANGUAGE=opencl
SRCDIR_OBJS=main.o file.o computeQ.o ocl.o
APP_CUDALDFLAGS=-lm
APP_CFLAGS=-ffast-math -g3 -O3
APP_CXXFLAGS=-ffast-math -g3 -O3
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <stdio.h>
#include <malloc.h>
#include <CL/cl.h>
#include "ocl.h"
#include "macros.h"
#include "computeQ.h"
#define NC 4
void computePhiMag_GPU(int numK, float* phiR, float* phiI, float* phiMag)
{
int phiMagBlocks = numK / KERNEL_PHI_MAG_THREADS_PER_BLOCK;
if (numK % KERNEL_PHI_MAG_THREADS_PER_BLOCK)
phiMagBlocks++;
size_t DimPhiMagBlock = KERNEL_PHI_MAG_THREADS_PER_BLOCK;
size_t DimPhiMagGrid = phiMagBlocks*KERNEL_PHI_MAG_THREADS_PER_BLOCK;
ComputePhiMag_GPU(phiR, phiI, phiMag, numK);
}
void computeQ_GPU (int numK,int numX,
float* x, float* y, float* z,
struct kValues* kVals,
float* Qr, float* Qi
)
{
int QGrids = numK / KERNEL_Q_K_ELEMS_PER_GRID;
if (numK % KERNEL_Q_K_ELEMS_PER_GRID)
QGrids++;
int QBlocks = numX / KERNEL_Q_THREADS_PER_BLOCK;
if (numX % KERNEL_Q_THREADS_PER_BLOCK)
QBlocks++;
size_t DimQBlock = KERNEL_Q_THREADS_PER_BLOCK/NC;
size_t DimQGrid = QBlocks*KERNEL_Q_THREADS_PER_BLOCK/NC;
cl_int clStatus;
cl_mem ck;
ck = clCreateBuffer(clPrm->clContext,CL_MEM_READ_WRITE,KERNEL_Q_K_ELEMS_PER_GRID*sizeof(struct kValues),NULL,&clStatus);
int QGrid;
for (QGrid = 0; QGrid < QGrids; QGrid++) {
// Put the tile of K values into constant mem
int QGridBase = QGrid * KERNEL_Q_K_ELEMS_PER_GRID;
struct kValues* kValsTile = kVals + QGridBase;
int numElems = MIN(KERNEL_Q_K_ELEMS_PER_GRID, numK - QGridBase);
clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,ck,CL_TRUE,0,numElems*sizeof(struct kValues),kValsTile,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
ComputeQ_GPU(numK, QGridBase, x, y, z, Qr, Qi, kValsTile);
clStatus = clSetKernelArg(clPrm->clKernel,0,sizeof(int),&numK);
clStatus = clSetKernelArg(clPrm->clKernel,1,sizeof(int),&QGridBase);
clStatus = clSetKernelArg(clPrm->clKernel,2,sizeof(cl_mem),&x_d);
clStatus = clSetKernelArg(clPrm->clKernel,3,sizeof(cl_mem),&y_d);
clStatus = clSetKernelArg(clPrm->clKernel,4,sizeof(cl_mem),&z_d);
clStatus = clSetKernelArg(clPrm->clKernel,5,sizeof(cl_mem),&Qr_d);
clStatus = clSetKernelArg(clPrm->clKernel,6,sizeof(cl_mem),&Qi_d);
clStatus = clSetKernelArg(clPrm->clKernel,7,sizeof(cl_mem),&ck);
CHECK_ERROR("clSetKernelArg")
printf ("Grid: %d, Block: %d\n", DimQGrid, DimQBlock);
clStatus = clEnqueueNDRangeKernel(clPrm->clCommandQueue,clPrm->clKernel,1,NULL,&DimQGrid,&DimQBlock,0,NULL,NULL);
CHECK_ERROR("clEnqueueNDRangeKernel")
}
}
void createDataStructsCPU(int numK, int numX, float** phiMag,
float** Qr, float** Qi)
{
*phiMag = (float* ) memalign(16, numK * sizeof(float));
*Qr = (float*) memalign(16, numX * sizeof (float));
*Qi = (float*) memalign(16, numX * sizeof (float));
}
#ifndef __COMPUTEQ__
#define __COMPUTEQ__
void computePhiMag_GPU(int numK, float* phiR, float* phiI, float* phiMag);
void computeQ_GPU (int numK,int numX,
float* x, float* y, float* z,
struct kValues* kVals,
float* Qr, float* Qi
);
void createDataStructsCPU(int numK, int numX, float** phiMag,
float** Qr, float** Qi);
#endif
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#include <endian.h>
#include <stdlib.h>
#include <malloc.h>
#include <stdio.h>
#include <inttypes.h>
#include "file.h"
#if __BYTE_ORDER != __LITTLE_ENDIAN
# error "File I/O is not implemented for this system: wrong endianness."
#endif
extern "C"
void inputData(char* fName, int* _numK, int* _numX,
float** kx, float** ky, float** kz,
float** x, float** y, float** z,
float** phiR, float** phiI)
{
int numK, numX;
FILE* fid = fopen(fName, "r");
if (fid == NULL)
{
fprintf(stderr, "Cannot open input file\n");
exit(-1);
}
fread (&numK, sizeof (int), 1, fid);
*_numK = numK;
fread (&numX, sizeof (int), 1, fid);
*_numX = numX;
*kx = (float *) memalign(16, numK * sizeof (float));
fread (*kx, sizeof (float), numK, fid);
*ky = (float *) memalign(16, numK * sizeof (float));
fread (*ky, sizeof (float), numK, fid);
*kz = (float *) memalign(16, numK * sizeof (float));
fread (*kz, sizeof (float), numK, fid);
*x = (float *) memalign(16, numX * sizeof (float));
fread (*x, sizeof (float), numX, fid);
*y = (float *) memalign(16, numX * sizeof (float));
fread (*y, sizeof (float), numX, fid);
*z = (float *) memalign(16, numX * sizeof (float));
fread (*z, sizeof (float), numX, fid);
*phiR = (float *) memalign(16, numK * sizeof (float));
fread (*phiR, sizeof (float), numK, fid);
*phiI = (float *) memalign(16, numK * sizeof (float));
fread (*phiI, sizeof (float), numK, fid);
fclose (fid);
}
extern "C"
void outputData(char* fName, float* outR, float* outI, int numX)
{
FILE* fid = fopen(fName, "w");
uint32_t tmp32;
if (fid == NULL)
{
fprintf(stderr, "Cannot open output file\n");
exit(-1);
}
/* Write the data size */
tmp32 = numX;
fwrite(&tmp32, sizeof(uint32_t), 1, fid);
/* Write the reconstructed data */
fwrite (outR, sizeof (float), numX, fid);
fwrite (outI, sizeof (float), numX, fid);
fclose (fid);
}
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
#ifdef __cplusplus
extern "C" {
#endif
void inputData(char* fName, int* _numK, int* _numX,
float** kx, float** ky, float** kz,
float** x, float** y, float** z,
float** phiR, float** phiI);
void outputData(char* fName, float* outR, float* outI, int numX);
#ifdef __cplusplus
}
#endif
#include "macros.h"
#define NC 4
#define COARSE_GENERAL
// #define COARSE_SPEC NC
__kernel void
ComputePhiMag_GPU(__global float* phiR, __global float* phiI, __global float* phiMag, int numK) {
int indexK = get_global_id(0);
if (indexK < numK) {
float real = phiR[indexK];
float imag = phiI[indexK];
phiMag[indexK] = real*real + imag*imag;
}
}
__kernel void
ComputeQ_GPU(int numK, int kGlobalIndex,
__global float* x, __global float* y, __global float* z,
__global float* Qr, __global float* Qi, __global struct kValues* ck)
{
float sX[NC];
float sY[NC];
float sZ[NC];
float sQr[NC];
float sQi[NC];
#pragma unroll
for (int tx = 0; tx < NC; tx++) {
int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC * get_local_id(0) + tx;
sX[tx] = x[xIndex];
sY[tx] = y[xIndex];
sZ[tx] = z[xIndex];
sQr[tx] = Qr[xIndex];
sQi[tx] = Qi[xIndex];
}
// Loop over all elements of K in constant mem to compute a partial value
// for X.
int kIndex = 0;
for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK);
kIndex ++, kGlobalIndex ++) {
float kx = ck[kIndex].Kx;
float ky = ck[kIndex].Ky;
float kz = ck[kIndex].Kz;
float pm = ck[kIndex].PhiMag;
#pragma unroll
for (int tx = 0; tx < NC; tx++) {
float expArg = PIx2 *
(kx * sX[tx] +
ky * sY[tx] +
kz * sZ[tx]);
sQr[tx] += pm * cos(expArg);
sQi[tx] += pm * sin(expArg);
}
}
#pragma unroll
for (int tx = 0; tx < NC; tx++) {
int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC * get_local_id(0) + tx;
Qr[xIndex] = sQr[tx];
Qi[xIndex] = sQi[tx];
}
}
#ifndef __MACROS__
#define __MACROS__
#define PI 3.1415926535897932384626433832795029f
#define PIx2 6.2831853071795864769252867665590058f
#define MIN(X,Y) ((X) < (Y) ? (X) : (Y))
#define K_ELEMS_PER_GRID 2048
#define KERNEL_PHI_MAG_THREADS_PER_BLOCK 256 /* 512 */
#define KERNEL_Q_THREADS_PER_BLOCK 256
#define KERNEL_Q_K_ELEMS_PER_GRID 1024
struct kValues {
float Kx;
float Ky;
float Kz;
float PhiMag;
};
#endif
/***************************************************************************
*cr
*cr (C) Copyright 2007 The Board of Trustees of the
*cr University of Illinois
*cr All Rights Reserved
*cr
***************************************************************************/
/*
* C code for creating the Q data structure for fast convolution-based
* Hessian multiplication for arbitrary k-space trajectories.
*
* Inputs:
* kx - VECTOR of kx values, same length as ky and kz
* ky - VECTOR of ky values, same length as kx and kz
* kz - VECTOR of kz values, same length as kx and ky
* x - VECTOR of x values, same length as y and z
* y - VECTOR of y values, same length as x and z
* z - VECTOR of z values, same length as x and y
* phi - VECTOR of the Fourier transform of the spatial basis
* function, evaluated at [kx, ky, kz]. Same length as kx, ky, and kz.
*
* recommended g++ options:
* -O3 -lm -ffast-math -funroll-all-loops
*/
#include <stdio.h>
#include <sys/time.h>
#include <parboil.h>
#include <CL/cl.h>
#include "ocl.h"
#include "file.h"
#include "macros.h"
#include "computeQ.h"
int
main (int argc, char *argv[]) {
int numX, numK; /* Number of X and K values */
int original_numK; /* Number of K values in input file */
float *kx, *ky, *kz; /* K trajectory (3D vectors) */
float *x, *y, *z; /* X coordinates (3D vectors) */
float *phiR, *phiI; /* Phi values (complex) */
float *phiMag; /* Magnitude of Phi */
float *Qr, *Qi; /* Q signal (complex) */
struct kValues* kVals;
struct pb_Parameters *params;
struct pb_TimerSet timers;
pb_InitializeTimerSet(&timers);
/* Read command line */
params = pb_ReadParameters(&argc, argv);
if ((params->inpFiles[0] == NULL) || (params->inpFiles[1] != NULL))
{
fprintf(stderr, "Expecting one input filename\n");
exit(-1);
}
/* Read in data */
pb_SwitchToTimer(&timers, pb_TimerID_IO);
inputData(params->inpFiles[0],
&original_numK, &numX,
&kx, &ky, &kz,
&x, &y, &z,
&phiR, &phiI);
/* Reduce the number of k-space samples if a number is given
* on the command line */
if (argc < 2)
numK = original_numK;
else
{
int inputK;
char *end;
inputK = strtol(argv[1], &end, 10);
if (end == argv[1])
{
fprintf(stderr, "Expecting an integer parameter\n");
exit(-1);
}
numK = MIN(inputK, original_numK);
}
printf("%d pixels in output; %d samples in trajectory; using %d samples\n",
numX, original_numK, numK);
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
/* Create CPU data structures */
createDataStructsCPU(numK, numX, &phiMag, &Qr, &Qi);
/* GPU section 1 (precompute PhiMag) */
{
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
computePhiMag_GPU(numK, phiR, phiI, phiMag);
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
}
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
kVals = (struct kValues*)calloc(numK, sizeof (struct kValues));
int k;
for (k = 0; k < numK; k++) {
kVals[k].Kx = kx[k];
kVals[k].Ky = ky[k];
kVals[k].Kz = kz[k];
kVals[k].PhiMag = phiMag[k];
}
free(phiMag);
/* GPU section 2 */
{
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
pb_SwitchToTimer(&timers, pb_TimerID_KERNEL);
computeQ_GPU(numK, numX, x, y, z, kVals, Qr, Qi);
pb_SwitchToTimer(&timers, pb_TimerID_COPY);
}
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
if (params->outFile)
{
/* Write Q to file */
pb_SwitchToTimer(&timers, pb_TimerID_IO);
outputData(params->outFile, Qr, Qi, numX);
pb_SwitchToTimer(&timers, pb_TimerID_COMPUTE);
}
free (kx);
free (ky);
free (kz);
free (x);
free (y);
free (z);
free (phiR);
free (phiI);
free (kVals);
free (Qr);
free (Qi);
pb_SwitchToTimer(&timers, pb_TimerID_NONE);
pb_PrintTimerSet(&timers);
pb_FreeParameters(params);
return 0;
}
#include <CL/cl.h>
#include <stdio.h>
#include <string.h>
#include "ocl.h"
char* readFile(const char* fileName)
{
FILE* fp;
fp = fopen(fileName,"r");
if(fp == NULL)
{
printf("Error 1!\n");
exit(1);
}
fseek(fp,0,SEEK_END);
long size = ftell(fp);
rewind(fp);
char* buffer = (char*)malloc(sizeof(char)*(size+1));
if(buffer == NULL)
{
printf("Error 2!\n");
fclose(fp);
exit(1);
}
size_t res = fread(buffer,1,size,fp);
if(res != size)
{
printf("Error 3!\n");
fclose(fp);
exit(1);
}
buffer[size] = 0;
fclose(fp);
return buffer;
}
void clMemSet(clPrmtr* clPrm, cl_mem buf, int val, size_t size)
{
cl_int clStatus;
char* temp = (char*)malloc(size);
memset(temp,val,size);
clStatus = clEnqueueWriteBuffer(clPrm->clCommandQueue,buf,CL_TRUE,0,size,temp,0,NULL,NULL);
CHECK_ERROR("clEnqueueWriteBuffer")
free(temp);
}
#ifndef __OCLH__
#define __OCLH__
typedef struct {
cl_context clContext;
cl_command_queue clCommandQueue;
cl_kernel clKernel;
} clPrmtr;
void clMemSet(clPrmtr*, cl_mem, int, size_t);
char* readFile(const char*);
#define CHECK_ERROR(errorMessage) \
if(clStatus != CL_SUCCESS) \
{ \
printf("Error: %s!\n",errorMessage); \
printf("Line: %d\n",__LINE__); \
exit(1); \
}
#endif
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment