diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 97e0f3411b7d74e97b1b6aca3d9f7d09f0650d72..642202bf055adcf534b7f1b44b3ab71858347ecb 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -953,8 +953,13 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // (2) Parent does not have multiple instances errs() << "pLevel = " << pLevel << "\n"; errs() << "pReplFactor = " << pReplFactor << "\n"; - if (!pLevel || !pReplFactor) { + assert((pLevel > 0) && "Root not allowed to be chosen as Kernel Node."); + + // Only these options are supported + enum XLevelHierarchy{ONE_LEVEL, TWO_LEVEL} SelectedHierarchy; + if(pLevel == 1 || !pReplFactor) { errs() << "*************** Kernel Gen: 1-Level Hierarchy **************\n"; + SelectedHierarchy = ONE_LEVEL; KernelLaunchNode = PNode; kernel = new Kernel(NULL, N, @@ -967,6 +972,8 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { else { // Converting a 2-level DFG to opencl kernel errs() << "*************** Kernel Gen: 2-Level Hierarchy **************\n"; + assert((pLevel >= 2) && "Selected node not nested deep enough to be Kernel Node."); + SelectedHierarchy = TWO_LEVEL; KernelLaunchNode = PNode->getParent(); assert((PNode->getNumOfDim() == N->getNumOfDim()) && "Dimension number must match"); // Contains the instructions generating the kernel configuration parameters @@ -982,7 +989,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { } - std::vector<IntrinsicInst *> IItoRemove; + std::vector<Instruction *> IItoRemove; BuildDFG::HandleToDFNode Leaf_HandleToDFNodeMap; // Get the function associated with the dataflow node @@ -1127,6 +1134,33 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { F_nvptx = changeArgAddrspace(F_nvptx, SharedMemArgs, SHARED_ADDRSPACE); F_nvptx = changeArgAddrspace(F_nvptx, GlobalMemArgs, GLOBAL_ADDRSPACE); +// Function to replace call instructions to functions in the kernel + std::map<Function *, Function *> OrgToClonedFuncMap; + std::vector<Function *> FuncToBeRemoved; + auto CloneAndReplaceCall = [&] (CallInst *CI, Function *OrgFunc) { + Function* NewFunc; + // Check if the called function has already been cloned before. + auto It = OrgToClonedFuncMap.find(OrgFunc); + if(It == OrgToClonedFuncMap.end()) { + ValueToValueMapTy VMap; + NewFunc = CloneFunction(OrgFunc, VMap); + OrgToClonedFuncMap[OrgFunc] = NewFunc; + FuncToBeRemoved.push_back(NewFunc); + } else { + NewFunc = (*It).second; + } + // Replace the calls to this function + std::vector<Value*> args; + for(unsigned i=0; i < CI->getNumArgOperands(); i++) { + args.push_back(CI->getArgOperand(i)); + } + CallInst* Inst = CallInst::Create(NewFunc, args, + OrgFunc->getReturnType()->isVoidTy()? "" : CI->getName(), CI); + CI->replaceAllUsesWith(Inst); + IItoRemove.push_back(CI); + return NewFunc; + }; + // Go through all the instructions for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e; ++i) { @@ -1212,16 +1246,12 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // The following is to find which function to call Function * OpenCLFunction; - int parentLevel = N->getParent()->getLevel(); - int parentReplFactor = N->getParent()->getNumOfDim(); - DEBUG(errs() << "Parent Level = " << parentLevel << "\n"); - DEBUG(errs() << "Parent Repl factor = " << parentReplFactor << "\n"); FunctionType* FT = FunctionType::get(Type::getInt64Ty(KernelM->getContext()), Type::getInt32Ty(KernelM->getContext()), false); - if ((!parentLevel || !parentReplFactor) && ArgDFNode == N) { + if (SelectedHierarchy == ONE_LEVEL && ArgDFNode == N) { // We only have one level in the hierarchy or the parent node is not // replicated. This indicates that the parent node is the kernel // launch, so we need to specify a global id. @@ -1296,14 +1326,12 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // The following is to find which function to call Function * OpenCLFunction; - int parentLevel = ParentDFNode->getLevel(); - int parentReplFactor = ParentDFNode->getNumOfDim(); FunctionType* FT = FunctionType::get(Type::getInt64Ty(KernelM->getContext()), Type::getInt32Ty(KernelM->getContext()), false); - if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) { + if (N == ArgDFNode && SelectedHierarchy == ONE_LEVEL) { // We only have one level in the hierarchy or the parent node is not // replicated. This indicates that the parent node is the kernel // launch, so the instances are global_size (gridDim x blockDim) @@ -1412,9 +1440,9 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { && "Only handling sin(float) and cos(float)!"); std::string name; if(II->getIntrinsicID() == Intrinsic::sin) - name = "_Z3sinf"; + name = "sin"; else - name = "_Z3cosf"; + name = "cos"; FunctionType* SinCosFT = FunctionType::get(II->getType(), Type::getFloatTy(KernelM->getContext()), @@ -1465,11 +1493,16 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { } else { - // Clone the function - ValueToValueMapTy VMap; - Function* newCalleeF = CloneFunction(calleeF, VMap); - newCalleeF->removeFromParent(); //TODO: MARIA check - KernelM->getFunctionList().push_back(newCalleeF); + // Check if the called function has already been cloned before. + Function *NewFunc = CloneAndReplaceCall(CI, calleeF); + // Iterate over the new function to see if it calls any other functions + // in the module. + for(inst_iterator i = inst_begin(NewFunc), e = inst_end(NewFunc); i != e; ++i) { + if(auto *Call = dyn_cast<CallInst>(&*i)) { + Function *CalledFunc = cast<Function>(Call->getCalledValue()->stripPointerCasts()); + CloneAndReplaceCall(Call, CalledFunc); + } + } } //TODO: how to handle address space qualifiers in load/store } @@ -1480,10 +1513,15 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { // have assumed theworst memory behaviour for these function calls // Traverse the vector backwards, otherwise definitions are deleted while // their subsequent uses are still around - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); ri != re; ++ri) { - DEBUG(errs() << "Erasing: " << **ri << "\n"); - (*ri)->eraseFromParent(); + for (auto *I : IItoRemove) { + DEBUG(errs() << "Erasing: " << *I << "\n"); + I->eraseFromParent(); + } + + // Removed the cloned functions from the parent module into the new module + for(auto *F : FuncToBeRemoved) { + F->removeFromParent(); //TODO: MARIA check + KernelM->getFunctionList().push_back(F); } addCLMetadata(F_nvptx); @@ -1936,7 +1974,7 @@ static Value* genWorkGroupPtr(Module &M, std::vector<Value*> WGSize, ValueToValu // Get generated PTX binary name static std::string getPTXFilename(const Module& M) { std::string moduleID = M.getModuleIdentifier(); - moduleID.append(".nvptx.s"); + moduleID.append(".kernels.cl"); return moduleID; }