diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index c402848cbd1328ba5a0e790586e4030ac9ca6f2c..7df93bd099e837335656eb6f5f30aa34e4cb0f41 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -55,7 +55,7 @@ public: class Kernel { public: Kernel(Function* _KF, std::vector<unsigned> _inArgMap = - std::vector<unsigned>(), unsigned _gridDim = 0, std::vector<Value*> + std::vector<unsigned>(), unsigned _gridDim = 0, std::vector<Value*> _globalWGSize = std::vector<Value*>(), unsigned _blockDim = 0, std::vector<Value*> _localWGSize = std::vector<Value*>()) @@ -79,11 +79,17 @@ public: std::vector<unsigned> getInArgMap() { return inArgMap; } + + bool hasLocalWG() { + return blockDim == 0; + } }; // Helper function declarations static void getExecuteNodeParams(Value* &, Value* &, Value* &, Kernel*, ValueToValueMapTy&, Instruction*); +static Value* genWorkGroupPtr(std::vector<Value*>&, ValueToValueMapTy&, + Instruction*, const Twine& WGName = "WGSize"); static bool hasAttribute(Function*, unsigned, Attribute::AttrKind); static std::string getPTXFilename(const Module&); static std::string getFilenameFromModule(const Module& M); @@ -723,6 +729,10 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) { int pLevel = PNode->getLevel(); int pReplFactor = PNode->getNumOfDim(); + // Choose parent node as kernel launch if: + // (1) Parent is the top level node i.e., Root of DFG + // OR + // (2) Parent does not have multiple instances if (!pLevel || !pReplFactor) { KernelLaunchNode = PNode; kernel = new Kernel(NULL, N->getInArgMap(), N->getNumOfDim(), N->getDimLimits()); @@ -742,7 +752,8 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) { //} } else { - errs() << "*************** Entering else part **************\n"; + // Converting a 2-level DFG to opencl kernel + errs() << "*************** Kernel Gen: 2-Level Hierarchy **************\n"; KernelLaunchNode = PNode->getParent(); assert((PNode->getNumOfDim() == N->getNumOfDim()) && "Dimension number must match"); // Contains the instructions generating the kernel configuration parameters @@ -1272,53 +1283,65 @@ void CodeGenTraversal::transformFunctionToVoid(Function* F) { static void getExecuteNodeParams(Value* &workDim, Value* &LocalWGPtr, Value* &GlobalWGPtr, Kernel* kernel, ValueToValueMapTy& VMap, Instruction* IB) { - // Get int64_t and or ease of use - Type* Int64Ty = Type::getInt64Ty(getGlobalContext()); - // Assign number of dimenstions a constant value workDim = ConstantInt::get(Type::getInt32Ty(getGlobalContext()), kernel->gridDim); - // For now, local work group size if null - LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(getGlobalContext())); + // If local work group size if null + if(!kernel->hasLocalWG()) { + LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(getGlobalContext())); + } + else { + LocalWGPtr = genWorkGroupPtr(kernel->localWGSize, VMap, IB, "LocalWGSize"); + } - // Global Work Group type is [#dim x i64] - Type* GlobalWGTy = ArrayType::get(Int64Ty, kernel->gridDim); + GlobalWGPtr = genWorkGroupPtr(kernel->globalWGSize, VMap, IB, "GlobalWGSize"); + DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n"); +} + +static Value* genWorkGroupPtr(std::vector<Value*>& WGSize, ValueToValueMapTy& VMap, Instruction* IB, const Twine& WGName) { + Value* WGPtr; + // Get int64_t and or ease of use + Type* Int64Ty = Type::getInt64Ty(getGlobalContext()); + + // Work Group type is [#dim x i64] + Type* WGTy = ArrayType::get(Int64Ty, WGSize.size()); // Allocate space of Global work group data on stack and get pointer to // first element. - AllocaInst* GlobalWG = new AllocaInst(GlobalWGTy, "GlobalWGSize", IB); - GlobalWGPtr = BitCastInst::CreatePointerCast(GlobalWG, Int64Ty->getPointerTo(), GlobalWG->getName()+".0", IB); - Value* nextDim = GlobalWGPtr; - DEBUG(errs() << *GlobalWGPtr << "\n"); + AllocaInst* WG = new AllocaInst(WGTy, WGName, IB); + WGPtr = BitCastInst::CreatePointerCast(WG, Int64Ty->getPointerTo(), WG->getName()+".0", IB); + Value* nextDim = WGPtr; + DEBUG(errs() << *WGPtr << "\n"); // Iterate over the number of dimensions and store the global work group // size in that dimension - for(unsigned i=0; i < kernel->gridDim; i++) { - assert(kernel->globalWGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!"); - if(kernel->globalWGSize[i]->getType() != Int64Ty) { + for(unsigned i=0; i < WGSize.size(); i++) { + assert(WGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!"); + if(WGSize[i]->getType() != Int64Ty) { // If number of dimensions are mentioned in any other integer format, // generate code to extend it to i64. We need to use the mapped value in // the new generated function, hence the use of VMap - kernel->globalWGSize[i] = BitCastInst::CreateIntegerCast(VMap[kernel->globalWGSize[i]], Int64Ty, true, "", IB); - StoreInst* SI = new StoreInst(kernel->globalWGSize[i], nextDim, IB); + // FIXME: Why are we changing the kernel WGSize vector here? + WGSize[i] = BitCastInst::CreateIntegerCast(VMap[WGSize[i]], Int64Ty, true, "", IB); + StoreInst* SI = new StoreInst(WGSize[i], nextDim, IB); DEBUG(errs() << "\tZero extended work group size: " << *SI << "\n"); } else { // Store the value representing work group size in ith dimension on // stack - StoreInst* SI = new StoreInst(VMap[kernel->globalWGSize[i]], nextDim, IB); + StoreInst* SI = new StoreInst(VMap[WGSize[i]], nextDim, IB); DEBUG(errs() << "\t Work group size: " << *SI << "\n"); } - if(i+1 < kernel->gridDim) { + if(i+1 < WGSize.size()) { // Move to next dimension GetElementPtrInst* GEP = GetElementPtrInst::Create(nextDim, ArrayRef<Value*>(ConstantInt::get(Int64Ty, 1)), - GlobalWG->getName()+"."+Twine(i+1), + WG->getName()+"."+Twine(i+1), IB); DEBUG(errs() << "\tPointer to next dimension on stack: " << *GEP << "\n"); nextDim = GEP; } } + return WGPtr; - DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n"); } // Find if argument has the given attribute