diff --git a/llvm/include/llvm/IR/DFGraph.h b/llvm/include/llvm/IR/DFGraph.h index e5a448afb407a4fe31d7b4b26262083ceaf18d61..e379ca4296f8fe4754152ef6757483622736b049 100644 --- a/llvm/include/llvm/IR/DFGraph.h +++ b/llvm/include/llvm/IR/DFGraph.h @@ -368,7 +368,7 @@ public: DFEdge* getInDFEdgeAt(unsigned inPort); DFEdge* getOutDFEdgeAt(unsigned outPort); std::map<unsigned, unsigned> getInArgMap(); - std::map<unsigned, Value*> getSharedInArgMap(); + std::map<unsigned, std::pair<Value*, unsigned> > getSharedInArgMap(); std::vector<unsigned> getOutArgMap(); int getAncestorHops(DFNode* N); bool hasSideEffects(); @@ -654,14 +654,14 @@ std::map<unsigned, unsigned> DFNode::getInArgMap() { return map; } -// Only Allocation Nodes -std::map<unsigned, Value*> DFNode::getSharedInArgMap() { - std::map<unsigned, Value*> map; +// Only Allocation Nodes - only detect relevant indices +std::map<unsigned, std::pair<Value*, unsigned> > DFNode::getSharedInArgMap() { + std::map<unsigned, std::pair<Value*, unsigned> > map; for (unsigned i = 0; i < InDFEdges.size(); i++) { DFEdge* E = getInDFEdgeAt(i); if (!E->getSourceDF()->isAllocationNode()) continue; - map[i] = NULL; + map[i] = std::pair<Value *, unsigned>(NULL,0); } return map; } diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 942b79f6861921e7aee6a637b5b6ed3b70948322..cc21b5611d8732eba99d8e3bbbb7bf51a036337e 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -60,12 +60,12 @@ public: class Kernel { public: Kernel(Function* _KF, DFLeafNode* _KLeafNode, std::map<unsigned, unsigned> _inArgMap = - std::map<unsigned, unsigned>(), std::map<unsigned, Value*> _sharedInArgMap = - std::map<unsigned, Value*>(), std::vector<unsigned> _outArgMap = - std::vector<unsigned>(), unsigned _gridDim = 0, std::vector<Value*> - _globalWGSize = std::vector<Value*>(), - unsigned _blockDim = 0, - std::vector<Value*> _localWGSize = std::vector<Value*>()) + std::map<unsigned, unsigned>(), + std::map<unsigned, std::pair<Value*, unsigned> > _sharedInArgMap = + std::map<unsigned, std::pair<Value*, unsigned> >(), + std::vector<unsigned> _outArgMap = std::vector<unsigned>(), + unsigned _gridDim = 0, std::vector<Value*> _globalWGSize = std::vector<Value*>(), + unsigned _blockDim = 0, std::vector<Value*> _localWGSize = std::vector<Value*>()) : KernelFunction(_KF), KernelLeafNode(_KLeafNode), inArgMap(_inArgMap), sharedInArgMap(_sharedInArgMap), outArgMap(_outArgMap), gridDim(_gridDim), globalWGSize(_globalWGSize), blockDim(_blockDim), localWGSize(_localWGSize) { @@ -80,9 +80,10 @@ public: DFLeafNode* KernelLeafNode; std::map<unsigned, unsigned> inArgMap; // Map for shared memory arguments - std::map<unsigned, Value*> sharedInArgMap; + std::map<unsigned, std::pair<Value*, unsigned> > sharedInArgMap; // Fields for (potential) allocation node DFLeafNode* AllocationNode; + Function* AllocationFunction; std::map<unsigned, unsigned> allocInArgMap; std::vector<unsigned> outArgMap; @@ -99,10 +100,10 @@ public: inArgMap = map; } - std::map<unsigned, Value*> getSharedInArgMap() { + std::map<unsigned, std::pair<Value*, unsigned> > getSharedInArgMap() { return sharedInArgMap; } - void setSharedInArgMap(std::map<unsigned, Value*> map) { + void setSharedInArgMap(std::map<unsigned, std::pair<Value*, unsigned> > map) { sharedInArgMap = map; } @@ -132,7 +133,7 @@ static std::string getFilenameFromModule(const Module& M); static void changeDataLayout(Module &); static void changeTargetTriple(Module &); static void findReturnInst(Function *, std::vector<ReturnInst *> &); - +static void findIntrinsicInst(Function *, Intrinsic::ID, std::vector<IntrinsicInst *> &); // DFG2LLVM_NVPTX - The first implementation. struct DFG2LLVM_NVPTX : public DFG2LLVM { @@ -508,51 +509,126 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi } DEBUG(errs() << "Setup output edges of node and insert visc api\n"); - std::map<unsigned, Value*> kernelSharedInArgMap = K->getSharedInArgMap(); - - for(std::map<unsigned, Value*>::iterator ib = kernelSharedInArgMap.begin(), - ie = kernelSharedInArgMap.end(); ib != ie; ++ib) { - unsigned i = ib->first; - Value* inputVal = ib->second; + std::map<unsigned, std::pair<Value*, unsigned> > kernelSharedInArgMap = + K->getSharedInArgMap(); - DEBUG(errs() << "\tArgument "<< i<< " = " << *inputVal << "\n"); + bool constSizes = true; + for (std::map<unsigned, std::pair<Value*, unsigned> >::iterator + ib = kernelSharedInArgMap.begin(), + ie = kernelSharedInArgMap.end(); ib != ie && constSizes; ++ib) { + Value* sizeVal = ib->second.first; + constSizes = isa<Constant>(sizeVal); + } - // input value has been obtained. - // inputVal is a scalar value - if (i % 2 == 0) { // Sharem memory ptr argument - scalar at size position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); + if (constSizes) { + for (std::map<unsigned, std::pair<Value*, unsigned> >::iterator + ib = kernelSharedInArgMap.begin(), + ie = kernelSharedInArgMap.end(); ib != ie; ++ib) { + unsigned i = ib->first; + Value* inputVal = ib->second.first; + + DEBUG(errs() << "\tArgument "<< i<< " = " << *inputVal << "\n"); + + // input value has been obtained. + // inputVal is a scalar value + if (i % 2 == 0) { // Shared memory ptr argument - scalar at size position + switchToTimer(visc_TimerID_COPY_SCALAR, RI); + + assert(isa<Constant>(inputVal) && "Only constant shared memory size is supported"); + + Value* setInputArgs[] = {GraphID, + ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext())), + ConstantInt::get(Type::getInt32Ty(M.getContext()),i), + inputVal + }; + CallInst::Create(llvm_visc_ocl_argument_scalar, + ArrayRef<Value*>(setInputArgs, 4), "", RI); + + } else { // Sharem memory size argument - scalar at address position + switchToTimer(visc_TimerID_COPY_SCALAR, RI); + // Store the scalar value on stack and then pass the pointer to its + // location + AllocaInst* inputValPtr = new AllocaInst(inputVal->getType(), + kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".ptr", RI); + StoreInst* SI = new StoreInst(inputVal, inputValPtr, RI); + + Value* inputValI8Ptr = CastInst::CreatePointerCast(inputValPtr, + Type::getInt8PtrTy(M.getContext()), + kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".i8ptr", + RI); + + Value* setInputArgs[] = {GraphID, + inputValI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()),i), + ConstantExpr::getSizeOf(inputVal->getType()) + }; + CallInst::Create(llvm_visc_ocl_argument_scalar, + ArrayRef<Value*>(setInputArgs, 4), "", RI); + } + } + } else { - assert(isa<Constant>(inputVal) && "Only constant shared memory size is supported"); + Function *F_alloc = K->AllocationFunction; + StructType *FAllocRetTy = dyn_cast<StructType>(F_alloc->getReturnType()); + assert(FAllocRetTy && "Allocation node with no struct return type"); - Value* setInputArgs[] = {GraphID, - ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext())), - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - inputVal - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); - - } else { // Sharem memory size argument - scalar at address position - switchToTimer(visc_TimerID_COPY_SCALAR, RI); - // Store the scalar value on stack and then pass the pointer to its - // location - AllocaInst* inputValPtr = new AllocaInst(inputVal->getType(), kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".ptr", RI); - StoreInst* SI = new StoreInst(inputVal, inputValPtr, RI); + std::vector<Value *> AllocInputArgs; + for (unsigned i = 0; i < K->allocInArgMap.size(); i++) { + AllocInputArgs.push_back(getArgumentAt(F_X86, K->allocInArgMap.at(i))); + } - Value* inputValI8Ptr = CastInst::CreatePointerCast(inputValPtr, - Type::getInt8PtrTy(M.getContext()), - kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".i8ptr", - RI); + CallInst *CI = CallInst::Create(F_alloc, AllocInputArgs, "", RI); + std::vector<ExtractValueInst *> ExtractValueInstVec; + for (unsigned i = 1; i < FAllocRetTy->getNumElements(); i += 2) { + ExtractValueInst *EI = ExtractValueInst::Create(CI, i, "", RI); + ExtractValueInstVec.push_back(EI); + } - Value* setInputArgs[] = {GraphID, - inputValI8Ptr, - ConstantInt::get(Type::getInt32Ty(M.getContext()),i), - ConstantExpr::getSizeOf(inputVal->getType()) - }; - CallInst::Create(llvm_visc_ocl_argument_scalar, - ArrayRef<Value*>(setInputArgs, 4), "", RI); + for (std::map<unsigned, std::pair<Value*, unsigned> >::iterator + ib = kernelSharedInArgMap.begin(), + ie = kernelSharedInArgMap.end(); ib != ie; ++ib) { + unsigned i = ib->first; + Value* inputVal = ExtractValueInstVec[ib->second.second/2]; + + DEBUG(errs() << "\tArgument "<< i<< " = " << *inputVal << "\n"); + + // input value has been obtained. + // inputVal is a scalar value + if (i % 2 == 0) { // Shared memory ptr argument - scalar at size position + switchToTimer(visc_TimerID_COPY_SCALAR, RI); + + Value* setInputArgs[] = {GraphID, + ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext())), + ConstantInt::get(Type::getInt32Ty(M.getContext()),i), + inputVal + }; + CallInst::Create(llvm_visc_ocl_argument_scalar, + ArrayRef<Value*>(setInputArgs, 4), "", RI); + + } else { // Sharem memory size argument - scalar at address position + switchToTimer(visc_TimerID_COPY_SCALAR, RI); + // Store the scalar value on stack and then pass the pointer to its + // location + AllocaInst* inputValPtr = new AllocaInst(inputVal->getType(), + kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".ptr", RI); + StoreInst* SI = new StoreInst(inputVal, inputValPtr, RI); + + Value* inputValI8Ptr = CastInst::CreatePointerCast(inputValPtr, + Type::getInt8PtrTy(M.getContext()), + kernel->KernelFunction->getName()+".sharedMem."+Twine(i)+".i8ptr", + RI); + + Value* setInputArgs[] = {GraphID, + inputValI8Ptr, + ConstantInt::get(Type::getInt32Ty(M.getContext()),i), + ConstantExpr::getSizeOf(inputVal->getType()) + }; + CallInst::Create(llvm_visc_ocl_argument_scalar, + ArrayRef<Value*>(setInputArgs, 4), "", RI); + } } } + DEBUG(errs() << "Setup shared memory arguments of node and insert visc api\n"); // Set output if struct is not an empty struct @@ -927,14 +1003,42 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // If no allocation node was found, SharedMemArgs is empty if (kernel->AllocationNode) { - std::map<unsigned, Value*> sharedInMap = kernel->getSharedInArgMap(); + + ValueToValueMapTy VMap; + Function *F_alloc = CloneFunction(kernel->AllocationNode->getFuncPointer(), VMap, true); + // Insert the cloned function into the kernels module + M.getFunctionList().push_back(F_alloc); + + std::vector<IntrinsicInst *> ViscMallocInstVec; + findIntrinsicInst(F_alloc, Intrinsic::visc_malloc, ViscMallocInstVec); + + for (unsigned i = 0; i < ViscMallocInstVec.size(); i++) { + IntrinsicInst *II = ViscMallocInstVec[i]; + assert(II->hasOneUse() && "visc_malloc result is used more than once"); + II->replaceAllUsesWith(ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext()))); + II->eraseFromParent(); + } + kernel->AllocationFunction = F_alloc; + + // This could be used to check that the allocation node has the appropriate + // number of fields in its return struct +/* + ReturnInst *RI = ReturnInstVec[0]; + Value *RetVal = RI->getReturnValue(); + Type *RetTy = RetVal->getType(); + StructType *RetStructTy = dyn_cast<StructType>(RetTy); + assert(RetStructTy && "Allocation node does not return a struct type"); + unsigned numFields = RetStructTy->getNumElements(); +*/ + std::map<unsigned, std::pair<Value*, unsigned> > sharedInMap = kernel->getSharedInArgMap(); AllocationNodeProperty* APN = (AllocationNodeProperty*) kernel->AllocationNode->getProperty(DFNode::Allocation); for (auto& AllocPair: APN->getAllocationList()) { unsigned destPos = AllocPair.first->getDestPosition(); + unsigned srcPos = AllocPair.first->getSourcePosition(); SharedMemArgs.push_back(destPos); - sharedInMap[destPos] = AllocPair.second; - sharedInMap[destPos+1] = AllocPair.second; + sharedInMap[destPos] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); + sharedInMap[destPos+1] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1); } kernel->setSharedInArgMap(sharedInMap); } @@ -1583,6 +1687,17 @@ static void findReturnInst(Function* F, std::vector<ReturnInst *> & ReturnInstVe } } +// Helper function, populate a vector with all IntrinsicID intrinsics in a function +static void findIntrinsicInst(Function* F, Intrinsic::ID IntrinsicID, std::vector<IntrinsicInst *> & IntrinsicInstVec) { + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + Instruction *I = &(*i); + IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); + if (II && II->getIntrinsicID() == IntrinsicID) { + IntrinsicInstVec.push_back(II); + } + } +} + } // End of namespace char DFG2LLVM_NVPTX::ID = 0;