From 96fcb4fa0585aa32f5b6d8ac2818cb3042a45085 Mon Sep 17 00:00:00 2001 From: Prakalp Srivastava <psrivas2@illinois.edu> Date: Fri, 5 Dec 2014 05:49:08 +0000 Subject: [PATCH] visc_gemm_ptx fail fixed. It was due to a problem with runtime and with PTX pass generating multiplication of local and global dim statements even in case of 1 level node, when local dim is null M llvm/projects/visc-rt/visc-rt.cpp M llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp --- .../lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp | 12 +++++++++--- llvm/projects/visc-rt/visc-rt.cpp | 2 +- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 869ca5d592..47b7e18856 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -1337,12 +1337,18 @@ static void getExecuteNodeParams(Value* &workDim, Value* &LocalWGPtr, Value* if(isa<Argument>(kernel->globalWGSize[i])) kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]]; } + // For OpenCL, global work group size is the total bumber of instances in each // dimension. So, multiply local and global dim limits. std::vector<Value*> globalWGSizeInsts; - for (unsigned i = 0; i < kernel->gridDim; i++) { - BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB); - globalWGSizeInsts.push_back(MulInst); + if(kernel->hasLocalWG()) { + for (unsigned i = 0; i < kernel->gridDim; i++) { + BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB); + globalWGSizeInsts.push_back(MulInst); + } + } + else { + globalWGSizeInsts = kernel->globalWGSize; } GlobalWGPtr = genWorkGroupPtr(globalWGSizeInsts, VMap, IB, "GlobalWGSize"); DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n"); diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index 7186008fe8..c18e2e21cf 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -182,7 +182,7 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t* } cl_int errcode = clEnqueueNDRangeKernel(Context->clCommandQue, - Context->clKernel, workDim, NULL, GlobalWG, LocalWG, 0, NULL, NULL); + Context->clKernel, workDim, NULL, GlobalWG, (localWorkSize == NULL)? NULL : LocalWG, 0, NULL, NULL); DEBUG(cout << "Enqueued kernel\n"); checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel"); return event; -- GitLab