Skip to content
Snippets Groups Projects
Commit 0198cffc authored by Prakalp Srivastava's avatar Prakalp Srivastava
Browse files

Preparing visc mri-q benchmark

parent 1aa1ffb0
No related branches found
No related tags found
No related merge requests found
PARBOIL_ROOT = /home/psrivas2/current-test/parboil
BIN = mri-q
SRCDIR = src/opencl
BUILDDIR = build/opencl
DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(BIN)
APP = mri-q
# Default compile visc
ifeq ($(VERSION),)
VERSION = visc
endif
# Default use small test case
ifeq ($(TEST),)
TEST = small
endif
BIN = $(addsuffix -$(VERSION), $(APP))
SRCDIR = src/$(VERSION)
BUILDDIR = build/$(VERSION)
DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP)
ifeq ($(TEST),small)
INPUT = $(DATASET_DIR)/small/input/32_32_32_dataset.bin
REF_OUTPUT = $(DATASET_DIR)/small/output/32_32_32_dataset.out
OUTPUT = run/small/32_32_32_dataset.out
RUNDIR = run/small
OUTPUT = $(RUNDIR)/32_32_32_dataset.out
else
INPUT = $(DATASET_DIR)/large/input/64_64_64_dataset.bin
REF_OUTPUT = $(DATASET_DIR)/large/output/64_64_64_dataset.out
OUTPUT = run/large/64_64_64_dataset.out
RUNDIR = run/large
OUTPUT = $(RUNDIR)/64_64_64_dataset.out
endif
ARGS = -i $(INPUT) -o $(OUTPUT)
......
......@@ -73,6 +73,7 @@ void computeQ_GPU (int numK,int numX,
int QGrid;
for (QGrid = 0; QGrid < QGrids; QGrid++) {
printf("Kernel Q call %d\n", QGrid);
// Put the tile of K values into constant mem
int QGridBase = QGrid * KERNEL_Q_K_ELEMS_PER_GRID;
struct kValues* kValsTile = kVals + QGridBase;
......
......@@ -19,7 +19,6 @@ ComputeQ_GPU(int numK, int kGlobalIndex,
__global float* x, __global float* y, __global float* z,
__global float* Qr, __global float* Qi, __global struct kValues* ck)
{
#ifdef COARSE_GENERAL
float sX[NC];
float sY[NC];
......@@ -66,202 +65,4 @@ ComputeQ_GPU(int numK, int kGlobalIndex,
Qi[xIndex] = sQi[tx];
}
#elif (COARSE_SPEC==2)
float2 sX;
float2 sY;
float2 sZ;
float2 sQr;
float2 sQi;
{
int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC*get_local_id(0);
sX = *(((__global float2*)(x + xIndex)));
sY = *(((__global float2*)(y + xIndex)));
sZ = *(((__global float2*)(z + xIndex)));
sQr = *(((__global float2*)(Qr + xIndex)));
sQi = *(((__global float2*)(Qi + xIndex)));
}
// Loop over all elements of K in constant mem to compute a partial value
// for X.
int kIndex = 0;
for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK);
kIndex += 1, kGlobalIndex += 1) {
float kx = ck[kIndex].Kx;
float ky = ck[kIndex].Ky;
float kz = ck[kIndex].Kz;
float pm = ck[kIndex].PhiMag;
// #pragma unroll
float expArg;
expArg = PIx2 *
(kx * sX.x +
ky * sY.x +
kz * sZ.x);
sQr.x += pm * cos(expArg);
sQi.x += pm * sin(expArg);
expArg = PIx2 *
(kx * sX.y +
ky * sY.y +
kz * sZ.y);
sQr.y += pm * cos(expArg);
sQi.y += pm * sin(expArg);
}
{
int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC * get_local_id(0);
*((__global float2*)(Qr + xIndex)) = sQr;
*((__global float2*)(Qi + xIndex)) = sQi;
}
#elif (COARSE_SPEC==4)
float4 sX;
float4 sY;
float4 sZ;
float4 sQr;
float4 sQi;
{
int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC*get_local_id(0);
sX = *((__global float4*)(x + xIndex));
sY = *((__global float4*)(y + xIndex));
sZ = *((__global float4*)(z + xIndex));
sQr = *((__global float4*)(Qr + xIndex));
sQi = *((__global float4*)(Qi + xIndex));
}
// Loop over all elements of K in constant mem to compute a partial value
// for X.
int kIndex = 0;
for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK);
kIndex += 1, kGlobalIndex += 1) {
float kx = ck[kIndex].Kx;
float ky = ck[kIndex].Ky;
float kz = ck[kIndex].Kz;
float pm = ck[kIndex].PhiMag;
// #pragma unroll
float4 expArg;
expArg.x = PIx2 *
(kx * sX.x +
ky * sY.x +
kz * sZ.x);
sQr.x += pm * cos(expArg.x);
sQi.x += pm * sin(expArg.x);
expArg.y = PIx2 *
(kx * sX.y +
ky * sY.y +
kz * sZ.y);
sQr.y += pm * cos(expArg.y);
sQi.y += pm * sin(expArg.y);
expArg.z = PIx2 *
(kx * sX.z +
ky * sY.z +
kz * sZ.z);
sQr.z += pm * cos(expArg.z);
sQi.z += pm * sin(expArg.z);
expArg.w = PIx2 *
(kx * sX.w +
ky * sY.w +
kz * sZ.w);
sQr.w += pm * cos(expArg.w);
sQi.w += pm * sin(expArg.w);
}
{
int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC * get_local_id(0);
*((__global float4*)(Qr + xIndex)) = sQr;
*((__global float4*)(Qi + xIndex)) = sQi;
}
#else
// Uncoarse
#ifdef UNROLL_2X
float sX;
float sY;
float sZ;
float sQr;
float sQi;
// Determine the element of the X arrays computed by this thread
int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + get_local_id(0);
// Read block's X values from global mem to shared mem
sX = x[xIndex];
sY = y[xIndex];
sZ = z[xIndex];
sQr = Qr[xIndex];
sQi = Qi[xIndex];
// Loop over all elements of K in constant mem to compute a partial value
// for X.
int kIndex = 0;
if (numK % 2) {
float expArg = PIx2 * (ck[0].Kx * sX + ck[0].Ky * sY + ck[0].Kz * sZ);
sQr += ck[0].PhiMag * cos(expArg);
sQi += ck[0].PhiMag * sin(expArg);
kIndex++;
kGlobalIndex++;
}
for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK);
kIndex += 2, kGlobalIndex += 2) {
float expArg = PIx2 * (ck[kIndex].Kx * sX +
ck[kIndex].Ky * sY +
ck[kIndex].Kz * sZ);
sQr += ck[kIndex].PhiMag * cos(expArg);
sQi += ck[kIndex].PhiMag * sin(expArg);
int kIndex1 = kIndex + 1;
float expArg1 = PIx2 * (ck[kIndex1].Kx * sX +
ck[kIndex1].Ky * sY +
ck[kIndex1].Kz * sZ);
sQr += ck[kIndex1].PhiMag * cos(expArg1);
sQi += ck[kIndex1].PhiMag * sin(expArg1);
}
Qr[xIndex] = sQr;
Qi[xIndex] = sQi;
#else
float sX;
float sY;
float sZ;
float sQr;
float sQi;
// Determine the element of the X arrays computed by this thread
int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + get_local_id(0);
// Read block's X values from global mem to shared mem
sX = x[xIndex];
sY = y[xIndex];
sZ = z[xIndex];
sQr = Qr[xIndex];
sQi = Qi[xIndex];
int kIndex = 0;
for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK);
kIndex ++, kGlobalIndex ++) {
float expArg = PIx2 * (ck[kIndex].Kx * sX +
ck[kIndex].Ky * sY +
ck[kIndex].Kz * sZ);
sQr += ck[kIndex].PhiMag * cos(expArg);
sQi += ck[kIndex].PhiMag * sin(expArg);
}
Qr[xIndex] = sQr;
Qi[xIndex] = sQi;
#endif /* UNROLL_2X */
#endif
}
......@@ -62,7 +62,7 @@ endif
default: $(FAILSAFE) $(BUILDDIR) $(BIN)
run:
run : $(RUNDIR)
@echo "Resolving OpenCL library..."
@$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ldd ./$(BIN) | grep OpenCL
@$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ./$(BIN) $(ARGS)
......@@ -81,6 +81,9 @@ clean :
$(BIN) : $(OBJS) $(BUILDDIR)/parboil_opencl.o
$(CXX) $^ -o $@ $(LDFLAGS)
$(RUNDIR) :
mkdir -p $(RUNDIR)
$(BUILDDIR) :
mkdir -p $(BUILDDIR)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment