diff --git a/llvm/include/llvm/SupportVISC/DFG2LLVM.h b/llvm/include/llvm/SupportVISC/DFG2LLVM.h index 3011d7ab7055b42ca0770805660b84e0758738ee..e57f8c1a0a4687fdebe2bce5b391553d505fd17d 100644 --- a/llvm/include/llvm/SupportVISC/DFG2LLVM.h +++ b/llvm/include/llvm/SupportVISC/DFG2LLVM.h @@ -115,7 +115,7 @@ public: static bool checkPreferredTarget(DFNode* N, visc::Target T); static bool preferredTargetIncludes(DFNode* N, visc::Target T); - + visc::Target getPreferredTarget(DFNode *N); virtual void visit(DFInternalNode* N) { // If code has already been generated for this internal node, skip the @@ -173,7 +173,55 @@ bool CodeGenTraversal::checkPreferredTarget(DFNode* N, visc::Target T) { return false; } +visc::Target CodeGenTraversal::getPreferredTarget(DFNode *N) { + + Function* F = N->getFuncPointer(); + Module* M = F->getParent(); + NamedMDNode* HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); + for (unsigned i = 0; i < HintNode->getNumOperands(); i++) { + MDNode* MetaNode = HintNode->getOperand(i); + Value* FHint = dyn_cast<ValueAsMetadata>(MetaNode->getOperand(0).get())->getValue(); + if(F == FHint) + return visc::CPU_TARGET; + } + + HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); + for (unsigned i = 0; i < HintNode->getNumOperands(); i++) { + MDNode* MetaNode = HintNode->getOperand(i); + Value* FHint = dyn_cast<ValueAsMetadata>(MetaNode->getOperand(0).get())->getValue(); + if(F == FHint) + return visc::GPU_TARGET; + } + + HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); + for (unsigned i = 0; i < HintNode->getNumOperands(); i++) { + MDNode* MetaNode = HintNode->getOperand(i); + Value* FHint = dyn_cast<ValueAsMetadata>(MetaNode->getOperand(0).get())->getValue(); + if(F == FHint) + return visc::SPIR_TARGET; + } + + HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu_gpu"); + for (unsigned i = 0; i < HintNode->getNumOperands(); i++) { + MDNode* MetaNode = HintNode->getOperand(i); + Value* FHint = dyn_cast<ValueAsMetadata>(MetaNode->getOperand(0).get())->getValue(); + if(F == FHint) + return visc::CPU_OR_GPU_TARGET; + } + + HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu_spir"); + for (unsigned i = 0; i < HintNode->getNumOperands(); i++) { + MDNode* MetaNode = HintNode->getOperand(i); + Value* FHint = dyn_cast<ValueAsMetadata>(MetaNode->getOperand(0).get())->getValue(); + if(F == FHint) + return visc::CPU_OR_SPIR_TARGET; + } + + return visc::None; +} + bool CodeGenTraversal::preferredTargetIncludes(DFNode* N, visc::Target T) { + Function* F = N->getFuncPointer(); Module* M = F->getParent(); std::vector<NamedMDNode *> HintNode; diff --git a/llvm/include/llvm/SupportVISC/VISCUtils.h b/llvm/include/llvm/SupportVISC/VISCUtils.h index 0eb494623d13a593dd08ab03b353d98430ef946c..7a8e6cb6731df61a7d77bbd1aaf5d57422de90c2 100644 --- a/llvm/include/llvm/SupportVISC/VISCUtils.h +++ b/llvm/include/llvm/SupportVISC/VISCUtils.h @@ -152,27 +152,27 @@ void fixHintMetadata(Module &M, Function* F, Function* G) { NamedMDNode* HintNode = M.getOrInsertNamedMetadata("visc_hint_gpu"); for(unsigned i = 0; i < HintNode->getNumOperands(); i++) { if(HintNode->getOperand(i) == MDT_F) - HintNode->setOperand(0, MDT_G); + HintNode->setOperand(i, MDT_G); } HintNode = M.getOrInsertNamedMetadata("visc_hint_spir"); for(unsigned i = 0; i < HintNode->getNumOperands(); i++) { if(HintNode->getOperand(i) == MDT_F) - HintNode->setOperand(0, MDT_G); + HintNode->setOperand(i, MDT_G); } HintNode = M.getOrInsertNamedMetadata("visc_hint_cpu"); for(unsigned i = 0; i < HintNode->getNumOperands(); i++) { if(HintNode->getOperand(i) == MDT_F) - HintNode->setOperand(0, MDT_G); + HintNode->setOperand(i, MDT_G); } HintNode = M.getOrInsertNamedMetadata("visc_hint_cpu_gpu"); for(unsigned i = 0; i < HintNode->getNumOperands(); i++) { if(HintNode->getOperand(i) == MDT_F) - HintNode->setOperand(0, MDT_G); + HintNode->setOperand(i, MDT_G); } HintNode = M.getOrInsertNamedMetadata("visc_hint_cpu_spir"); for(unsigned i = 0; i < HintNode->getNumOperands(); i++) { if(HintNode->getOperand(i) == MDT_F) - HintNode->setOperand(0, MDT_G); + HintNode->setOperand(i, MDT_G); } } diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 845e5a7f9f50fcd9cf5eaa8455b5d913ab401a1c..47f7a299130f98ed8319e021cae3842f06f3b9bf 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -377,6 +377,9 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi //Add the generated function info to DFNode // N->setGenFunc(F_X86, visc::CPU_TARGET); N->addGenFunc(F_X86, visc::GPU_TARGET, true); + errs() << "Added GPUGenFunc: " << F_X86->getName() << " for node " + << N->getFuncPointer()->getName() << "\n"; + // Loop over the arguments, to create the VMap dest_iterator = F_X86->arg_begin(); diff --git a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp index 4621391b73d4548d73c0eed50af19d65ed1a07a4..c076c3ac1e84156c33afbd8f414ceec99994eb9a 100644 --- a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp @@ -97,8 +97,7 @@ private: Value* addLoop(Instruction* I, Value* limit, const Twine& indexName = ""); void addDoWhileLoop(Instruction*, Instruction*, Value*); void addWhileLoop(Instruction*, Instruction*, Instruction*, Value*); - void addWhileLoopCounter(BasicBlock *, BasicBlock *, BasicBlock *, - Instruction *); + Instruction *addWhileLoopCounter(BasicBlock *, BasicBlock *, BasicBlock *); Argument* getArgumentFromEnd(Function* F, unsigned offset); Value* getInValueAt(DFNode* Child, unsigned i, Function* ParentF_X86, Instruction* InsertBefore); @@ -315,8 +314,8 @@ void CGT_X86::addWhileLoop(Instruction* CondBlockStart, Instruction* BodyStart, } -void CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, - BasicBlock *Body, Instruction *Cnt) { +Instruction* CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, + BasicBlock *Body) { Module *M = Entry->getParent()->getParent(); Type *Int64Ty = Type::getInt64Ty(M->getContext()); @@ -336,7 +335,7 @@ void CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, CounterPhi->addIncoming(CounterIncr, Body); // Return the pointer to the created PHI node in the corresponding argument - Cnt = CounterPhi; + return CounterPhi; } /* Add Loop around the instruction I @@ -1231,7 +1230,7 @@ Function* CGT_X86::createFunctionFilter(DFNode* C) { // argument to the generated function, that is the iteration number, and then // use it as an argument to the policy_getVersion call if (GetPolicyCI) { - addWhileLoopCounter(EntryBB, CondBB, BodyBB, CntI); + CntI = addWhileLoopCounter(EntryBB, CondBB, BodyBB); assert(CntI && "Counter instruction not found\n"); // Create new function type (with additional argument for iteration number) @@ -1261,6 +1260,7 @@ Function* CGT_X86::createFunctionFilter(DFNode* C) { // Set second operand of the policy_getVersion call to the last function // argument + GetPolicyCI = get_llvm_visc_policy_getVersion_call(NewCGenF); GetPolicyCI->setArgOperand(1, CntArg); } @@ -1454,7 +1454,16 @@ void CGT_X86::codeGen(DFInternalNode* N) { errs() << "hasx86GenFuncForSPIR : " << SFx86 << "\n"; - if (viscUtils::isSingleTargetTag(N->getTag())) { + if (N->getTag() == visc::None) { + // No code is available for this node. This (usually) means that this + // node is a node that + // - from the accelerator backends has been mapped to an intermediate + // node, and thus they have not produced a genFunc + // - a child node had no CPU hint, thus no code gen for CPU could + // take place + errs() << "No GenFunc - Skipping CPU code generation for node " + << N->getFuncPointer()->getName() << "\n"; + } else if (viscUtils::isSingleTargetTag(N->getTag())) { // There is a single version for this node according to code gen hints. // Therefore, we do not need to check the policy, we simply use the // available implementation, whichever target it is for.