diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 869ca5d5927ffb59db62f1056f77ea357daa77d0..47b7e18856406a3f1853550587fd662a267a3e47 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -1337,12 +1337,18 @@ static void getExecuteNodeParams(Value* &workDim, Value* &LocalWGPtr, Value* if(isa<Argument>(kernel->globalWGSize[i])) kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]]; } + // For OpenCL, global work group size is the total bumber of instances in each // dimension. So, multiply local and global dim limits. std::vector<Value*> globalWGSizeInsts; - for (unsigned i = 0; i < kernel->gridDim; i++) { - BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB); - globalWGSizeInsts.push_back(MulInst); + if(kernel->hasLocalWG()) { + for (unsigned i = 0; i < kernel->gridDim; i++) { + BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB); + globalWGSizeInsts.push_back(MulInst); + } + } + else { + globalWGSizeInsts = kernel->globalWGSize; } GlobalWGPtr = genWorkGroupPtr(globalWGSizeInsts, VMap, IB, "GlobalWGSize"); DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n"); diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index 7186008fe895e8f025603bc9949ee5e46f7bcb48..c18e2e21cf0b98069081c1f538a0cf24b9e2f739 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -182,7 +182,7 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t* } cl_int errcode = clEnqueueNDRangeKernel(Context->clCommandQue, - Context->clKernel, workDim, NULL, GlobalWG, LocalWG, 0, NULL, NULL); + Context->clKernel, workDim, NULL, GlobalWG, (localWorkSize == NULL)? NULL : LocalWG, 0, NULL, NULL); DEBUG(cout << "Enqueued kernel\n"); checkErr(errcode, CL_SUCCESS, "Failure to enqueue kernel"); return event;