diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 9c0476ff52841e16f5c43de35d395e16c3b97aac..1660493aa014db11e8674cfeb5a599c579c5b6ca 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -11,6 +11,7 @@ #define TARGET_PTX 32 #define GENERIC_ADDRSPACE 0 #define GLOBAL_ADDRSPACE 1 +#define CONSTANT_ADDRSPACE 4 #define SHARED_ADDRSPACE 3 #define DEBUG_TYPE "DFG2LLVM_NVPTX" @@ -124,6 +125,7 @@ public: }; // Helper function declarations +static bool canBePromoted(Argument* arg, Function* F); static void getExecuteNodeParams(Value* &, Value* &, Value* &, Kernel*, ValueToValueMapTy&, Instruction*); static Value* genWorkGroupPtr(std::vector<Value*>, ValueToValueMapTy&, @@ -173,6 +175,7 @@ private: //Functions std::string getKernelsModuleName(Module &M); void fixValueAddrspace(Value* V, unsigned addrspace); + std::vector<unsigned> globalToConstantMemoryOpt(std::vector<unsigned>*, Function*); void changeArgAddrspace(Function* F, unsigned i); void changeArgAddrspace(Function* F, std::vector<unsigned> &Ags, unsigned i); void addCLMetadata(Function* F); @@ -1066,7 +1069,12 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { /* At this point, we assume that chescks for the fact that SharedMemArgs only contains pointer arguments to GLOBAL_ADDRSPACE have been performed by the analysis pass */ + // Optimization: Gloabl memory arguments, which are not modified and whose + // loads are not dependent on node id of current node, should be moved to + // constant memory, subject to size of course + std::vector<unsigned> ConstantMemArgs = globalToConstantMemoryOpt(&GlobalMemArgs, F_nvptx); + changeArgAddrspace(F_nvptx, ConstantMemArgs, CONSTANT_ADDRSPACE); changeArgAddrspace(F_nvptx, SharedMemArgs, SHARED_ADDRSPACE); changeArgAddrspace(F_nvptx, GlobalMemArgs, GLOBAL_ADDRSPACE); @@ -1501,6 +1509,25 @@ void CGT_NVPTX::fixValueAddrspace(Value* V, unsigned addrspace) { } } + +std::vector<unsigned> CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned>* GlobalMemArgs, Function* F) { + std::vector<unsigned> ConstantMemArgs; + for(auto& arg: F->getArgumentList()) { + std::vector<unsigned>::iterator pos = std::find(GlobalMemArgs->begin(), + GlobalMemArgs->end(), arg.getArgNo()); + // It has to be a global memory argument to be promotable + if(pos == GlobalMemArgs->end()) + continue; + + // Check if it can/should be promoted + if(canBePromoted(&arg, F)) { + ConstantMemArgs.push_back(arg.getArgNo()); + GlobalMemArgs->erase(pos); + } + } + return ConstantMemArgs; +} + void CGT_NVPTX::changeArgAddrspace(Function* F, unsigned addrspace) { std::vector<Type*> ArgTypes; for(auto& arg: F->getArgumentList()) { @@ -1662,6 +1689,98 @@ void CGT_NVPTX::transformFunctionToVoid(Function* F) { /****************************************************************************** * Helper functions * ******************************************************************************/ +// Check if argument arg can be promoted to constant memory in Function F +// Condition: +// 1. No stores +// 2. Loads not dependent on getNodeInstanceID itrinsic + +static bool findLoadStoreUses(Value* V, std::vector<Value*>*UseList) { + for(Value::use_iterator ui = V->use_begin(), ue = V->use_end(); + ui != ue; ++ui) { + Value* I = *ui; + DEBUG(errs() << "\t" << *I << "\n"); + if(isa<LoadInst>(I)) { + DEBUG(errs() << "\tFound load instruction: " << *I << "\n"); + DEBUG(errs() << "\tAdd to use list: " << *V << "\n"); + UseList->push_back(V); + } + else if(isa<StoreInst>(I)) { + // found a store in use chain + DEBUG(errs() << "Found store instruction: " << *I << "\n"); + return true; + } + else { + DEBUG(errs() << "\tTraverse use chain of: " << *I << "\n"); + if(findLoadStoreUses(I, UseList)) + return true; + } + } + return false; +} + +static bool isDependentOnNodeInstanceID(Value* V, std::vector<Value*>*DependenceList) { + if(std::find(DependenceList->begin(), DependenceList->end(), V) != DependenceList->end()) { + DEBUG(errs() << "\tAlready visited value: " << *V << "\n"); + return false; + } + DependenceList->push_back(V); + // If not an instruction, then not dependent on node instance id + if(!isa<Instruction>(V) || isa<Constant>(V)) { + DEBUG(errs() << "\tStop\n"); + return false; + } + + Instruction* I = cast<Instruction>(V); + for(unsigned i = 0; i < I->getNumOperands(); i++) { + Value* operand = I->getOperand(i); + if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(operand)) { + if((II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_x + || II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_y + || II->getIntrinsicID() == Intrinsic::visc_getNodeInstanceID_z)) { + Value* Node = II->getArgOperand(0); + IntrinsicInst* GN = dyn_cast<IntrinsicInst>(Node); + assert(GN && "NodeInstanceID operande should be node/parent node intrinsic\n"); + if(GN->getIntrinsicID() == Intrinsic::visc_getNode) { + DEBUG(errs() << "\tDependency found on Node instance ID: " << *II << "\n"); + return true; + } + } + } + if(CmpInst* CI = dyn_cast<CmpInst>(operand)) { + DEBUG(errs() << "Found compare instruction: "<< *CI<<"\nNot following its dependency list\n"); + continue; + } + DEBUG( errs() << "\tTraverse the operand chain of: " << *operand << "\n"); + if(isDependentOnNodeInstanceID(operand, DependenceList)) { + return true; + } + } + return false; +} + +// Function to check if argument arg can be changed to a constant memory pointer +static bool canBePromoted(Argument* arg, Function* F) { + errs() << "OPT: Check if Argument " << *arg << " can be changed to constant memory\n"; + std::vector<Value*> UseList; + // recursively traverse use chain + // if find a store instruction return false, everything fails, cannot be + // promoted + // if find a load instruction as use, add the GEP instruction to list + bool foundStore = findLoadStoreUses(arg, &UseList); + if(foundStore == true) + return false; + // See that the GEP instructions are not dependent on getNodeInstanceID + // intrinsic + DEBUG(errs() << foundStore << "\tNo Store Instruction found. Check dependence on node instance ID\n"); + std::vector<Value*>DependenceList; + for(auto U: UseList) { + if(isDependentOnNodeInstanceID(U, &DependenceList)) + return false; + } + errs() << "\tYes, Promotable to Constant Memory\n"; + return true; +} + // Calculate execute node parameters which include, number of diemnsions for // dynamic instances of the kernel, local and global work group sizes.