diff --git a/llvm/include/llvm/IR/DFGraph.h b/llvm/include/llvm/IR/DFGraph.h index b6471a4cc1420458be65467df814780ea28196e4..a1ac2cb45e216acb643cab0cd29b0b5e26d2d5e3 100644 --- a/llvm/include/llvm/IR/DFGraph.h +++ b/llvm/include/llvm/IR/DFGraph.h @@ -335,7 +335,9 @@ public: bool isEntryNode(); bool isExitNode(); DFEdge* getInDFEdgeAt(unsigned inPort); + DFEdge* getOutDFEdgeAt(unsigned outPort); std::vector<unsigned> getInArgMap(); + std::vector<unsigned> getOutArgMap(); int getAncestorHops(DFNode* N); virtual void applyDFNodeVisitor(DFNodeVisitor &V) = 0; @@ -567,6 +569,19 @@ DFEdge* DFNode::getInDFEdgeAt(unsigned inPort) { return NULL; } +DFEdge* DFNode::getOutDFEdgeAt(unsigned outPort) { + + // Cannot perform check for the number of outputs here, + // it depends on the node's return type + + for(outdfedge_iterator i = outdfedge_begin(), e = outdfedge_end(); i != e; ++i) { + DFEdge* E = *i; + if(outPort == E->getSourcePosition()) + return E; + } + return NULL; +} + std::vector<unsigned> DFNode::getInArgMap() { std::vector<unsigned> map(InDFEdges.size()); for (unsigned i = 0; i < InDFEdges.size(); i++) { @@ -577,6 +592,16 @@ std::vector<unsigned> DFNode::getInArgMap() { return map; } +std::vector<unsigned> DFNode::getOutArgMap() { + std::vector<unsigned> map(OutDFEdges.size()); + for (unsigned i = 0; i < OutDFEdges.size(); i++) { + DFEdge* E = getOutDFEdgeAt(i); + unsigned pos = E->getDestPosition(); + map[pos] = i; + } + return map; +} + int DFNode::getAncestorHops(DFNode* N) { DFNode* temp = this->getParent(); int hops = 1; diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 47b7e18856406a3f1853550587fd662a267a3e47..e8c027686aba7fa19dfb0d3344f208e5ba2302b0 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -54,13 +54,14 @@ public: // calls class Kernel { public: - Kernel(Function* _KF, DFLeafNode* _KLeafNode,std::vector<unsigned> _inArgMap = + Kernel(Function* _KF, DFLeafNode* _KLeafNode, std::vector<unsigned> _inArgMap = + std::vector<unsigned>(), std::vector<unsigned> _outArgMap = std::vector<unsigned>(), unsigned _gridDim = 0, std::vector<Value*> _globalWGSize = std::vector<Value*>(), unsigned _blockDim = 0, std::vector<Value*> _localWGSize = std::vector<Value*>()) : KernelFunction(_KF), KernelLeafNode(_KLeafNode), inArgMap(_inArgMap), - gridDim(_gridDim), globalWGSize(_globalWGSize), + outArgMap(_outArgMap), gridDim(_gridDim), globalWGSize(_globalWGSize), blockDim(_blockDim), localWGSize(_localWGSize) { assert(gridDim == globalWGSize.size() @@ -72,6 +73,7 @@ public: Function* KernelFunction; DFLeafNode* KernelLeafNode; std::vector<unsigned> inArgMap; + std::vector<unsigned> outArgMap; unsigned gridDim; unsigned blockDim; std::vector<Value*> globalWGSize; @@ -85,6 +87,13 @@ public: inArgMap = map; } + std::vector<unsigned> getOutArgMap() { + return outArgMap; + } + void setOutArgMap(std::vector<unsigned> map) { + outArgMap = map; + } + void setLocalWGSize(std::vector<Value*> V) { localWGSize = V; } @@ -648,6 +657,8 @@ void CodeGenTraversal::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Tw // Get OutputType of this node StructType* OutTy = N->getOutputType(); Value *retVal = UndefValue::get(F_X86->getReturnType()); + // Find the kernel's output arg map, to use instead of the bindings + std::vector<unsigned> outArgMap = kernel->getOutArgMap(); // Find all the input edges to exit node for (unsigned i=0; i < OutTy->getNumElements(); i++) { DEBUG(errs() << "Output Edge " << i << "\n"); @@ -682,7 +693,10 @@ void CodeGenTraversal::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Tw // Extract element at source position from this call instruction std::vector<unsigned> IndexList; - IndexList.push_back(E->getSourcePosition()); + // i is the destination of DFEdge E + // Use the mapping instead of the bindings +// IndexList.push_back(E->getSourcePosition()); + IndexList.push_back(outArgMap[i]); DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n"); ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList, "",RI); @@ -722,14 +736,32 @@ void CodeGenTraversal::codeGen(DFInternalNode* N) { } else { DEBUG(errs() << "Found intermediate node. Getting size parameters.\n"); // Keep track of the arguments order. - std::vector<unsigned> map1 = N->getInArgMap(); - std::vector<unsigned> map2 = kernel->getInArgMap(); + std::vector<unsigned> inmap1 = N->getInArgMap(); + std::vector<unsigned> inmap2 = kernel->getInArgMap(); - // The limit is the size of map2, because this is the number of kernel arguments - for (unsigned i = 0; i < map2.size(); i++) { - map2[i] = map1[map2[i]]; + // TODO: Verify when we have incoming edges from more than one nodes + // The limit is the size of inmap2, because this is the number of kernel arguments + for (unsigned i = 0; i < inmap2.size(); i++) { + inmap2[i] = inmap1[inmap2[i]]; + } + kernel->setInArgMap(inmap2); + + // Keep track of the output arguments order. + std::vector<unsigned> outmap1 = N->getOutArgMap(); + std::vector<unsigned> outmap2 = kernel->getOutArgMap(); + + // TODO: Change when we have incoming edges to the dummy exit node from more + // than one nodes. In this case, the number of bindings is the same, but + // their destination position, thus the index in outmap1, is not + // 0 ... outmap2.size()-1 + // The limit is the size of outmap2, because this is the number of kernel + // output arguments for which the mapping matters + // For now, it reasonable to assume that all the kernel arguments are returned, + // maybe plys some others from other nodes, thus outmap2.size() <= outmap1.size() + for (unsigned i = 0; i < outmap2.size(); i++) { + outmap1[i] = outmap2[outmap1[i]]; } - kernel->setInArgMap(map2); + kernel->setOutArgMap(outmap1); // Track the source of local dimlimits for the kernel // Dimension limit can either be a constant or an argument of parent @@ -779,7 +811,12 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) { // (2) Parent does not have multiple instances if (!pLevel || !pReplFactor) { KernelLaunchNode = PNode; - kernel = new Kernel(NULL, N, N->getInArgMap(), N->getNumOfDim(), N->getDimLimits()); + kernel = new Kernel(NULL, + N, + N->getInArgMap(), + N->getOutArgMap(), + N->getNumOfDim(), + N->getDimLimits()); } else { // Converting a 2-level DFG to opencl kernel @@ -790,6 +827,7 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) { kernel = new Kernel(NULL, // kernel function N, // kernel leaf node N->getInArgMap(), // kenel argument mapping + N->getOutArgMap(), // kernel output mapping from the leaf to the interemediate node PNode->getNumOfDim(), // gridDim PNode->getDimLimits(),// grid size N->getNumOfDim(), // blockDim