diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/Makefile b/llvm/test/VISC/parboil/benchmarks/mri-q/Makefile index 96b40e00f70e4bddb31f940f80a7c67e35b698b5..1023c6d35c00f711d3791a42cd4bb5839c23967c 100644 --- a/llvm/test/VISC/parboil/benchmarks/mri-q/Makefile +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/Makefile @@ -1,21 +1,32 @@ PARBOIL_ROOT = /home/psrivas2/current-test/parboil -BIN = mri-q -SRCDIR = src/opencl -BUILDDIR = build/opencl -DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(BIN) +APP = mri-q +# Default compile visc +ifeq ($(VERSION),) + VERSION = visc +endif + +# Default use small test case ifeq ($(TEST),) TEST = small endif +BIN = $(addsuffix -$(VERSION), $(APP)) + +SRCDIR = src/$(VERSION) +BUILDDIR = build/$(VERSION) +DATASET_DIR = $(PARBOIL_ROOT)/datasets/$(APP) + ifeq ($(TEST),small) INPUT = $(DATASET_DIR)/small/input/32_32_32_dataset.bin REF_OUTPUT = $(DATASET_DIR)/small/output/32_32_32_dataset.out - OUTPUT = run/small/32_32_32_dataset.out + RUNDIR = run/small + OUTPUT = $(RUNDIR)/32_32_32_dataset.out else INPUT = $(DATASET_DIR)/large/input/64_64_64_dataset.bin REF_OUTPUT = $(DATASET_DIR)/large/output/64_64_64_dataset.out - OUTPUT = run/large/64_64_64_dataset.out + RUNDIR = run/large + OUTPUT = $(RUNDIR)/64_64_64_dataset.out endif ARGS = -i $(INPUT) -o $(OUTPUT) diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/opencl/computeQ.c b/llvm/test/VISC/parboil/benchmarks/mri-q/src/opencl/computeQ.c index 64c2b508c607af52e5def54ff63a415f47bee965..cf38fe40898d179bafc929432dbe6c8fbf1c6b0f 100644 --- a/llvm/test/VISC/parboil/benchmarks/mri-q/src/opencl/computeQ.c +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/opencl/computeQ.c @@ -73,6 +73,7 @@ void computeQ_GPU (int numK,int numX, int QGrid; for (QGrid = 0; QGrid < QGrids; QGrid++) { + printf("Kernel Q call %d\n", QGrid); // Put the tile of K values into constant mem int QGridBase = QGrid * KERNEL_Q_K_ELEMS_PER_GRID; struct kValues* kValsTile = kVals + QGridBase; diff --git a/llvm/test/VISC/parboil/benchmarks/mri-q/src/opencl/kernels.cl b/llvm/test/VISC/parboil/benchmarks/mri-q/src/opencl/kernels.cl index 1835060cff7ab79e2becdb319b20dcb37c2018e6..37455d5843c863b4d21c556c03cfbc6821a30b98 100644 --- a/llvm/test/VISC/parboil/benchmarks/mri-q/src/opencl/kernels.cl +++ b/llvm/test/VISC/parboil/benchmarks/mri-q/src/opencl/kernels.cl @@ -19,7 +19,6 @@ ComputeQ_GPU(int numK, int kGlobalIndex, __global float* x, __global float* y, __global float* z, __global float* Qr, __global float* Qi, __global struct kValues* ck) { -#ifdef COARSE_GENERAL float sX[NC]; float sY[NC]; @@ -66,202 +65,4 @@ ComputeQ_GPU(int numK, int kGlobalIndex, Qi[xIndex] = sQi[tx]; } -#elif (COARSE_SPEC==2) - - float2 sX; - float2 sY; - float2 sZ; - float2 sQr; - float2 sQi; - - { - int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC*get_local_id(0); - - sX = *(((__global float2*)(x + xIndex))); - sY = *(((__global float2*)(y + xIndex))); - sZ = *(((__global float2*)(z + xIndex))); - sQr = *(((__global float2*)(Qr + xIndex))); - sQi = *(((__global float2*)(Qi + xIndex))); - } - - // Loop over all elements of K in constant mem to compute a partial value - // for X. - int kIndex = 0; - for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK); - kIndex += 1, kGlobalIndex += 1) { - float kx = ck[kIndex].Kx; - float ky = ck[kIndex].Ky; - float kz = ck[kIndex].Kz; - float pm = ck[kIndex].PhiMag; - - // #pragma unroll - float expArg; - expArg = PIx2 * - (kx * sX.x + - ky * sY.x + - kz * sZ.x); - sQr.x += pm * cos(expArg); - sQi.x += pm * sin(expArg); - expArg = PIx2 * - (kx * sX.y + - ky * sY.y + - kz * sZ.y); - sQr.y += pm * cos(expArg); - sQi.y += pm * sin(expArg); - } - - { - int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC * get_local_id(0); - *((__global float2*)(Qr + xIndex)) = sQr; - *((__global float2*)(Qi + xIndex)) = sQi; - } - -#elif (COARSE_SPEC==4) - - float4 sX; - float4 sY; - float4 sZ; - float4 sQr; - float4 sQi; - - { - int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC*get_local_id(0); - - sX = *((__global float4*)(x + xIndex)); - sY = *((__global float4*)(y + xIndex)); - sZ = *((__global float4*)(z + xIndex)); - sQr = *((__global float4*)(Qr + xIndex)); - sQi = *((__global float4*)(Qi + xIndex)); - } - - // Loop over all elements of K in constant mem to compute a partial value - // for X. - int kIndex = 0; - for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK); - kIndex += 1, kGlobalIndex += 1) { - float kx = ck[kIndex].Kx; - float ky = ck[kIndex].Ky; - float kz = ck[kIndex].Kz; - float pm = ck[kIndex].PhiMag; - - // #pragma unroll - float4 expArg; - expArg.x = PIx2 * - (kx * sX.x + - ky * sY.x + - kz * sZ.x); - sQr.x += pm * cos(expArg.x); - sQi.x += pm * sin(expArg.x); - expArg.y = PIx2 * - (kx * sX.y + - ky * sY.y + - kz * sZ.y); - sQr.y += pm * cos(expArg.y); - sQi.y += pm * sin(expArg.y); - expArg.z = PIx2 * - (kx * sX.z + - ky * sY.z + - kz * sZ.z); - sQr.z += pm * cos(expArg.z); - sQi.z += pm * sin(expArg.z); - expArg.w = PIx2 * - (kx * sX.w + - ky * sY.w + - kz * sZ.w); - sQr.w += pm * cos(expArg.w); - sQi.w += pm * sin(expArg.w); - } - - { - int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + NC * get_local_id(0); - *((__global float4*)(Qr + xIndex)) = sQr; - *((__global float4*)(Qi + xIndex)) = sQi; - } - -#else - -// Uncoarse - -#ifdef UNROLL_2X - - float sX; - float sY; - float sZ; - float sQr; - float sQi; - - // Determine the element of the X arrays computed by this thread - int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + get_local_id(0); - - // Read block's X values from global mem to shared mem - sX = x[xIndex]; - sY = y[xIndex]; - sZ = z[xIndex]; - sQr = Qr[xIndex]; - sQi = Qi[xIndex]; - - // Loop over all elements of K in constant mem to compute a partial value - // for X. - int kIndex = 0; - if (numK % 2) { - float expArg = PIx2 * (ck[0].Kx * sX + ck[0].Ky * sY + ck[0].Kz * sZ); - sQr += ck[0].PhiMag * cos(expArg); - sQi += ck[0].PhiMag * sin(expArg); - kIndex++; - kGlobalIndex++; - } - - for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK); - kIndex += 2, kGlobalIndex += 2) { - float expArg = PIx2 * (ck[kIndex].Kx * sX + - ck[kIndex].Ky * sY + - ck[kIndex].Kz * sZ); - sQr += ck[kIndex].PhiMag * cos(expArg); - sQi += ck[kIndex].PhiMag * sin(expArg); - - int kIndex1 = kIndex + 1; - float expArg1 = PIx2 * (ck[kIndex1].Kx * sX + - ck[kIndex1].Ky * sY + - ck[kIndex1].Kz * sZ); - sQr += ck[kIndex1].PhiMag * cos(expArg1); - sQi += ck[kIndex1].PhiMag * sin(expArg1); - } - - Qr[xIndex] = sQr; - Qi[xIndex] = sQi; - -#else - - float sX; - float sY; - float sZ; - float sQr; - float sQi; - - // Determine the element of the X arrays computed by this thread - int xIndex = get_group_id(0)*KERNEL_Q_THREADS_PER_BLOCK + get_local_id(0); - - // Read block's X values from global mem to shared mem - sX = x[xIndex]; - sY = y[xIndex]; - sZ = z[xIndex]; - sQr = Qr[xIndex]; - sQi = Qi[xIndex]; - - int kIndex = 0; - for (; (kIndex < KERNEL_Q_K_ELEMS_PER_GRID) && (kGlobalIndex < numK); - kIndex ++, kGlobalIndex ++) { - float expArg = PIx2 * (ck[kIndex].Kx * sX + - ck[kIndex].Ky * sY + - ck[kIndex].Kz * sZ); - sQr += ck[kIndex].PhiMag * cos(expArg); - sQi += ck[kIndex].PhiMag * sin(expArg); - } - - Qr[xIndex] = sQr; - Qi[xIndex] = sQi; - -#endif /* UNROLL_2X */ - -#endif } diff --git a/llvm/test/VISC/parboil/common/mk/opencl.mk b/llvm/test/VISC/parboil/common/mk/opencl.mk index ba7d9cabfd2c3dc1b3cfe6d9271427e3f54b42be..3c701f0e2ad0fbfab48c50390ed90651274b49d7 100644 --- a/llvm/test/VISC/parboil/common/mk/opencl.mk +++ b/llvm/test/VISC/parboil/common/mk/opencl.mk @@ -62,7 +62,7 @@ endif default: $(FAILSAFE) $(BUILDDIR) $(BIN) -run: +run : $(RUNDIR) @echo "Resolving OpenCL library..." @$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ldd ./$(BIN) | grep OpenCL @$(shell echo $(RUNTIME_ENV)) LD_LIBRARY_PATH=$(OPENCL_LIB_PATH) ./$(BIN) $(ARGS) @@ -81,6 +81,9 @@ clean : $(BIN) : $(OBJS) $(BUILDDIR)/parboil_opencl.o $(CXX) $^ -o $@ $(LDFLAGS) +$(RUNDIR) : + mkdir -p $(RUNDIR) + $(BUILDDIR) : mkdir -p $(BUILDDIR)