diff --git a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp index a8c807704a89853d54cc6b7c01efbc98deef78f2..a21bba52177a803018df501ccd5d7476f8155c15 100644 --- a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp +++ b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp @@ -32,20 +32,20 @@ using namespace llvm; namespace builddfg { bool BuildDFG::runOnModule(Module &M) { - DEBUG(errs() << "\nBUILDDFG PASS\n"); - DEBUG(errs() << "-------- Searching for launch sites ----------\n"); + //DEBUG(errs() << "\nBUILDDFG PASS\n"); + //DEBUG(errs() << "-------- Searching for launch sites ----------\n"); IntrinsicInst *II; // Iterate over all functions in the module for (auto &Func : M) { Function *F = &Func; - DEBUG(errs() << "Function: " << F->getName() << "\n"); + //DEBUG(errs() << "Function: " << F->getName() << "\n"); for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { Instruction *I = &*i; // Grab pointer to Instruction if (isHPVMLaunchIntrinsic(I)) { - DEBUG(errs() << "------------ Found launch site --------------\n"); + // DEBUG(errs() << "------------ Found launch site --------------\n"); II = cast<IntrinsicInst>(I); assert(II && "Launch intrinsic not recognized."); @@ -53,8 +53,8 @@ bool BuildDFG::runOnModule(Module &M) { // Intrinsic Instruction has been initialized from this point on. Function *F = cast<Function>(II->getOperand(0)->stripPointerCasts()); Root = DFInternalNode::Create(II, F, hpvmUtils::getPreferredTarget(F)); - errs() << "INTRINSIC: " << II << "\n"; - errs() << "ROOT NODE" << Root << "\n"; + //errs() << "INTRINSIC: " << II << "\n"; + //errs() << "ROOT NODE" << Root << "\n"; Roots.push_back(Root); BuildGraph(Root, F); @@ -62,14 +62,14 @@ bool BuildDFG::runOnModule(Module &M) { e = Root->getChildGraph()->end(); i != e; i++) { DFNode *N = *i; - DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n"); + //DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n"); } Root->getChildGraph()->sortChildren(); for (DFGraph::children_iterator i = Root->getChildGraph()->begin(), e = Root->getChildGraph()->end(); i != e; i++) { DFNode *N = *i; - DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n"); + //DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n"); } viewDFGraph(Root->getChildGraph()); } @@ -176,9 +176,9 @@ bool BuildDFG::isTypeCongruent(Type *L, Type *R) { // Handles all the createNodeXX hpvm intrinsics. void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) { - errs() << "************ HANDLE CREATE NODE *********\n"; - II->print(errs()); - errs() << "\n"; + //errs() << "************ HANDLE CREATE NODE *********\n"; + //II->print(errs()); + //errs() << "\n"; bool isInternalNode = false; Function *F = cast<Function>((II->getOperand(0))->stripPointerCasts()); @@ -211,7 +211,7 @@ void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) { // dataflow graph DFInternalNode *childDFNode = DFInternalNode::Create( II, F, hpvmUtils::getPreferredTarget(F), N, numOfDim, dimLimits); - errs() << "INTERNAL NODE: " << childDFNode << "\n"; + //errs() << "INTERNAL NODE: " << childDFNode << "\n"; N->addChildToDFGraph(childDFNode); HandleToDFNodeMap[II] = childDFNode; BuildGraph(childDFNode, F); @@ -219,26 +219,26 @@ void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) { // Create Leaf DFnode and add it to the map. DFLeafNode *childDFNode = DFLeafNode::Create( II, F, hpvmUtils::getPreferredTarget(F), N, numOfDim, dimLimits); - errs() << "LEAF NODE: " << childDFNode << "\n"; + //errs() << "LEAF NODE: " << childDFNode << "\n"; N->addChildToDFGraph(childDFNode); HandleToDFNodeMap[II] = childDFNode; } } void BuildDFG::handleCreateEdge(DFInternalNode *N, IntrinsicInst *II) { - errs() << "************ HANDLE CREATE EDGE *********\n"; - II->print(errs()); - errs() << "\n"; + //errs() << "************ HANDLE CREATE EDGE *********\n"; + //II->print(errs()); + //errs() << "\n"; // The DFNode structures must be in the map before the edge is processed HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0)); assert(DFI != HandleToDFNodeMap.end()); DFI = HandleToDFNodeMap.find(II->getOperand(1)); assert(DFI != HandleToDFNodeMap.end()); - errs() << "NODE TO MAP OPERAND 0: " << II->getOperand(0) << "\n"; - errs() << "NODE TO MAP OPERAND 1: " << II->getOperand(1) << "\n"; - errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n"; - errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(1)] << "\n"; + //errs() << "NODE TO MAP OPERAND 0: " << II->getOperand(0) << "\n"; + //errs() << "NODE TO MAP OPERAND 1: " << II->getOperand(1) << "\n"; + //errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n"; + //errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(1)] << "\n"; DFNode *SrcDF = HandleToDFNodeMap[II->getOperand(0)]; DFNode *DestDF = HandleToDFNodeMap[II->getOperand(1)]; @@ -272,23 +272,23 @@ void BuildDFG::handleCreateEdge(DFInternalNode *N, IntrinsicInst *II) { DestPosition, DestTy, isStreaming); HandleToDFEdgeMap[II] = newDFEdge; - errs() << "NEW EDGE: " << newDFEdge << "\n"; + //errs() << "NEW EDGE: " << newDFEdge << "\n"; // Add Edge to the dataflow graph associated with the parent node N->addEdgeToDFGraph(newDFEdge); } void BuildDFG::handleBindInput(DFInternalNode *N, IntrinsicInst *II) { - errs() << "************ HANDLE BIND INPUT *********\n"; - II->print(errs()); - errs() << "\n"; + //errs() << "************ HANDLE BIND INPUT *********\n"; + //II->print(errs()); + //errs() << "\n"; // The DFNode structures must be in the map before the edge is processed HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0)); assert(DFI != HandleToDFNodeMap.end()); - errs() << "NODE TP MAP: " << II->getOperand(0) << "\n"; - errs() << "SRC NODE: " << N->getChildGraph()->getEntry() << "\n"; - errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n"; + //errs() << "NODE TP MAP: " << II->getOperand(0) << "\n"; + //errs() << "SRC NODE: " << N->getChildGraph()->getEntry() << "\n"; + //errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n"; DFNode *SrcDF = N->getChildGraph()->getEntry(); DFNode *DestDF = HandleToDFNodeMap[II->getOperand(0)]; @@ -319,23 +319,23 @@ void BuildDFG::handleBindInput(DFInternalNode *N, IntrinsicInst *II) { DestPosition, DestTy, isStreaming); HandleToDFEdgeMap[II] = newDFEdge; - errs() << "NEW EDGE: " << newDFEdge << "\n"; + // errs() << "NEW EDGE: " << newDFEdge << "\n"; // Add Edge to the dataflow graph associated with the parent node N->addEdgeToDFGraph(newDFEdge); } void BuildDFG::handleBindOutput(DFInternalNode *N, IntrinsicInst *II) { - errs() << "************ HANDLE BIND OUTPUT *********\n"; - II->print(errs()); - errs() << "\n"; + //errs() << "************ HANDLE BIND OUTPUT *********\n"; + //II->print(errs()); + //errs() << "\n"; // The DFNode structures must be in the map before the edge is processed HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0)); assert(DFI != HandleToDFNodeMap.end()); - errs() << "NODE TP MAP: " << II->getOperand(0) << "\n"; - errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n"; - errs() << "DEST NODE: " << N->getChildGraph()->getExit() << "\n"; + //errs() << "NODE TP MAP: " << II->getOperand(0) << "\n"; + //errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n"; + //errs() << "DEST NODE: " << N->getChildGraph()->getExit() << "\n"; DFNode *SrcDF = HandleToDFNodeMap[II->getOperand(0)]; DFNode *DestDF = N->getChildGraph()->getExit(); @@ -366,14 +366,14 @@ void BuildDFG::handleBindOutput(DFInternalNode *N, IntrinsicInst *II) { DestPosition, DestTy, isStreaming); HandleToDFEdgeMap[II] = newDFEdge; - errs() << "NEW EDGE: " << newDFEdge << "\n"; + //errs() << "NEW EDGE: " << newDFEdge << "\n"; // Add Edge to the dataflow graph associated with the parent node N->addEdgeToDFGraph(newDFEdge); } void BuildDFG::BuildGraph(DFInternalNode *N, Function *F) { - DEBUG(errs() << "FUNCTION: " << F->getName() << "\n"); + //DEBUG(errs() << "FUNCTION: " << F->getName() << "\n"); // TODO: Place checks for valid hpvm functions. For example one of the // check can be that any function that contains hpvm dataflow graph // construction intrinsics should not have other llvm IR statements. @@ -382,10 +382,10 @@ void BuildDFG::BuildGraph(DFInternalNode *N, Function *F) { // intrinsics. for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { Instruction *I = &*i; // Grab pointer to Instruction - DEBUG(errs() << *I << "\n"); + // DEBUG(errs() << *I << "\n"); if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { - DEBUG(errs() << "IntrinsicID = " << II->getIntrinsicID() << ": " - << II->getCalledFunction()->getName() << "\n"); + // DEBUG(errs() << "IntrinsicID = " << II->getIntrinsicID() << ": " + // << II->getCalledFunction()->getName() << "\n"); switch (II->getIntrinsicID()) { case Intrinsic::hpvm_createNode: case Intrinsic::hpvm_createNode1D: @@ -406,15 +406,15 @@ void BuildDFG::BuildGraph(DFInternalNode *N, Function *F) { // TODO: Reconsider launch within a dataflow graph (recursion?) case Intrinsic::hpvm_wait: case Intrinsic::hpvm_launch: - DEBUG(errs() - << "Error: Launch/wait intrinsic used within a dataflow graph\n\t" - << *II << "\n"); + // DEBUG(errs() + // << "Error: Launch/wait intrinsic used within a dataflow graph\n\t" + // << *II << "\n"); break; default: - DEBUG( - errs() << "Error: Invalid HPVM Intrinsic inside Internal node!\n\t" - << *II << "\n"); + //DEBUG( + // errs() << "Error: Invalid HPVM Intrinsic inside Internal node!\n\t" + // << *II << "\n"); break; } continue; diff --git a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp index de9c025c0e7e996b6abfaa8748adf6688d04d10d..349614f01948db52911c6760e1e99117c06e1bf5 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp @@ -13,7 +13,9 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "DFG2LLVM_CPU" + #include "SupportHPVM/DFG2LLVM.h" + #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" @@ -1426,14 +1428,56 @@ void CGT_CPU::codeGen(DFLeafNode *N) { << " : skipping it\n"); switch (N->getTag()) { - case hpvm::GPU_TARGET: - // A leaf node should not have an cpu function for GPU - // by design of DFG2LLVM_OpenCL backend - assert(!(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET)) && + case hpvm::GPU_TARGET: + { + // A leaf node should not have an cpu function for GPU + // by design of DFG2LLVM_OpenCL backend + assert(!(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET)) && "Leaf node not expected to have GPU GenFunc"); - break; - default: - break; + break; + } + case hpvm::CUDNN_TARGET: + { + errs() << "CUDNN hint found. Store CUDNN function as CPU funtion.\n"; + // Make sure there is a generated CPU function for cudnn + assert(N->getGenFuncForTarget(hpvm::CUDNN_TARGET) && ""); + assert(N->hasCPUGenFuncForTarget(hpvm::CUDNN_TARGET) && ""); + // Store the CUDNN x86 function as the CPU generated function + Function *Ftmp = N->getGenFuncForTarget(N->getTag()); + // after adding the required number of arguments + if (!N->getParent()->isChildGraphStreaming()) { + Ftmp = addIdxDimArgs(Ftmp); + } + + N->removeGenFuncForTarget(hpvm::CUDNN_TARGET); + N->setTag(hpvm::None); + N->addGenFunc(Ftmp, hpvm::CPU_TARGET, true); + N->setTag(hpvm::CPU_TARGET); + break; + } + case hpvm::PROMISE_TARGET: + { + errs() << "Promise hint found. Store PROMISE function as CPU funtion.\n"; + // Make sure there is a generated x86 function for promise + assert(N->getGenFuncForTarget(hpvm::PROMISE_TARGET) && ""); + assert(N->hasCPUGenFuncForTarget(hpvm::PROMISE_TARGET) && ""); + // Store the PROMISE x86 function as the CPU generated function + Function *Ftmp = N->getGenFuncForTarget(N->getTag()); + // after adding the required number of arguments + if (!N->getParent()->isChildGraphStreaming()) { + Ftmp = addIdxDimArgs(Ftmp); + } + + N->setTag(hpvm::None); + N->removeGenFuncForTarget(hpvm::PROMISE_TARGET); + N->addGenFunc(Ftmp, hpvm::CPU_TARGET, true); + N->setTag(hpvm::CPU_TARGET); + break; + } + default: + { + break; + } } return; diff --git a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp index dce9f25319f6fd75d72c16cd847e98fe44b5a793..8b2570fdad9f43be73fa6682d4954413375a5041 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp @@ -143,7 +143,7 @@ void CGT_CUDNN::initRuntimeAPI() { // FIXME: set correct path Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; + Twine runtimeAPI = llvmSrcRoot+"/tools/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); if(runtimeModule == nullptr) DEBUG(errs() << Err.getMessage()); diff --git a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp index aa310291a13acafdbad38a4269a848470c892bc2..294f9ac5742d561e79b8b81b3d88c22d3b156bcd 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp @@ -40,7 +40,7 @@ namespace { cl::opt<std::string> QuantizationInputsFilename( "quantization-levels-filename", - cl::desc("<CPU quantization levels input file (path)>"), + cl::desc("<PROMISE quantization levels input file (path)>"), cl::value_desc("filename"), cl::Required); @@ -280,6 +280,7 @@ public: void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream + errs() << "INITIAL STATE\n"; switch (II->getIntrinsicID()) { case Intrinsic::hpvm_tensor_convolution: { @@ -288,6 +289,7 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { Mch->addArgument(II->getOperand(1)); // conv kernel Mch->setCurrent(new ConvolutionLayer_1()); + errs() << "TO CONVOLUTION LAYER 1\n"; } break; case Intrinsic::hpvm_tensor_mul: @@ -297,6 +299,7 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { Mch->addArgument(II->getOperand(1)); // 2nd gemm input Mch->setCurrent(new FullyConnectedLayer_1()); + errs() << "TO FULLY CONNECTED LAYER 1\n"; } break; @@ -321,6 +324,7 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { Mch->addIntrinsicToRemove(II); Mch->setCurrent(new InitialState()); + errs() << "TO INIT STATE\n"; } break; @@ -328,24 +332,30 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { { Mch->addIntrinsicInst(II); Mch->setCurrent(new SingleTensorOperation()); + errs() << "TO SINGLE OP\n"; } break; } delete this; - } // else {} // No HPVM intrinsic received. Remain at initial + } // else {} // No HPVM intrinsic received. Remain at initial + errs() << "TO NO CHANGE\n"; } void SingleTensorOperation::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream + errs() << "SINGLE TENSOR OP\n"; Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; delete this; } + errs() << "NO CHANGE\n"; } void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream + errs() << "FULLY CONNECTED LAYER 1\n"; switch (II->getIntrinsicID()) { case Intrinsic::hpvm_tensor_add: { @@ -357,14 +367,17 @@ void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch, Mch->addArgument(II->getOperand(1)); // bias Mch->setCurrent(new FullyConnectedLayer_2()); + errs() << "TO FULLY CONNECTED LAYER 2\n"; } break; default: Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATERN\n"; break; } } else { Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATERN\n"; } delete this; } @@ -372,6 +385,7 @@ void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch, void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream + errs() << "FULLY CONNECTED LAYER 2\n"; switch (II->getIntrinsicID()) { case Intrinsic::hpvm_tensor_tanh: { @@ -382,6 +396,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, Mch->addIntrinsicInst(II); Mch->setCurrent(new FullyConnectedLayer_3()); + errs() << "TO FULLY CONNECTED LAYER 3\n"; } break; case Intrinsic::hpvm_tensor_relu: @@ -393,6 +408,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, Mch->addIntrinsicInst(II); Mch->setCurrent(new FullyConnectedLayer_3()); + errs() << "TO FULLY CONNECTED LAYER 3\n"; } break; case Intrinsic::hpvm_tensor_clipped_relu: @@ -404,10 +420,12 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, Mch->addIntrinsicInst(II); Mch->setCurrent(new FullyConnectedLayer_3()); + errs() << "TO FULLY CONNECTED LAYER 3\n"; } break; default: // No activation, but HPVM intrinsic Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; break; } } else { // End of instruction stream @@ -416,6 +434,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, Type::getInt32Ty(Mch->getModule()->getContext()), -1)); Mch->setCurrent(new FullyConnectedLayer()); + errs() << "TO FULLY CONNECTED LAYER\n"; } delete this; } @@ -423,9 +442,12 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (!II) { // End of instruction stream + errs() << "FULLY CONNECTED LAYER 3\n"; Mch->setCurrent(new FullyConnectedLayer()); + errs() << "TO FULLY CONNECTED LAYER\n"; } else { Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; } delete this; } @@ -433,14 +455,18 @@ void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch, void FullyConnectedLayer::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream + errs() << "FULLY CONNECTED LAYER\n"; Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; delete this; } + errs() << "TO NO CHANGE\n"; } void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream + errs() << "CONVOLUTION LAYER 1\n"; switch (II->getIntrinsicID()) { case Intrinsic::hpvm_tensor_add: { @@ -457,10 +483,12 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv Mch->setCurrent(new ConvolutionLayer_2()); + errs() << "TO CONVOLUTION LAYER 2\n"; } break; default: Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; break; } } else { @@ -497,6 +525,7 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, Type::getInt32Ty(Mch->getModule()->getContext()), -1)); Mch->setCurrent(new ConvolutionLayer()); + errs() << "TO CONVOLUTION LAYER\n"; } delete this; } @@ -504,6 +533,7 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream + errs() << "CONVOLUTION LAYER 2\n"; switch (II->getIntrinsicID()) { case Intrinsic::hpvm_tensor_tanh: { @@ -513,6 +543,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_3()); + errs() << "TO CONVOLUTION LAYER 3\n"; } break; case Intrinsic::hpvm_tensor_relu: @@ -523,6 +554,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_3()); + errs() << "TO CONVOLUTION LAYER 3\n"; } break; case Intrinsic::hpvm_tensor_clipped_relu: @@ -533,6 +565,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_3()); + errs() << "TO CONVOLUTION LAYER 3\n"; } break; case Intrinsic::hpvm_tensor_pool_max: @@ -551,6 +584,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; } break; case Intrinsic::hpvm_tensor_pool_min: @@ -569,6 +603,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; } break; case Intrinsic::hpvm_tensor_pool_mean: @@ -587,10 +622,12 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; } break; default: // No activation, No pooling, but HPVM intrinsic Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; break; } } else { // End of instruction stream @@ -607,6 +644,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Type::getInt32Ty(Mch->getModule()->getContext()), -1)); Mch->setCurrent(new ConvolutionLayer()); + errs() << "TO CONVOLUTION LAYER\n"; } delete this; } @@ -614,6 +652,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream + errs() << "CONVOLUTION LAYER 3\n"; switch (II->getIntrinsicID()) { case Intrinsic::hpvm_tensor_pool_max: { @@ -643,6 +682,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, } Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; } break; case Intrinsic::hpvm_tensor_pool_min: @@ -674,6 +714,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, } Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; } break; case Intrinsic::hpvm_tensor_pool_mean: @@ -704,10 +745,12 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, } Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; } break; default: // No pooling, but HPVM intrinsic Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; break; } } else { // End of instruction stream @@ -736,6 +779,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, } Mch->setCurrent(new ConvolutionLayer()); + errs() << "TO CONVOLUTION LAYER\n"; } delete this; } @@ -743,9 +787,12 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (!II) { // End of instruction stream + errs() << "CONVOLUTION LAYER 4\n"; Mch->setCurrent(new ConvolutionLayer()); + errs() << "TO CONVOLUTION LAYER\n"; } else { Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; } delete this; } @@ -753,9 +800,12 @@ void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch, void ConvolutionLayer::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream + errs() << "CONVOLUTION LAYER\n"; Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; delete this; } + errs() << "NO CHANGE\n"; } void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {} @@ -772,6 +822,7 @@ void CodeGenStateMachine::transition(IntrinsicInst *II) { void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRef, InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) { + errs() << "TRANSITIONTED TO: " << std::to_string(current->getStateID()) << "\n"; assert( ( (current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER) || (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION) ) && @@ -1174,7 +1225,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe default: - llvm_unreachable("Unknown VISC Intrinsic!"); + llvm_unreachable("Unknown HPVM Intrinsic!"); break; } @@ -1280,7 +1331,7 @@ void CGT_WrapperAPI::initRuntimeAPI() { // FIXME: set correct path Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; + Twine runtimeAPI = llvmSrcRoot+"/tools/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); if(runtimeModule == nullptr) DEBUG(errs() << Err.getMessage()); @@ -1363,6 +1414,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { // Increment the node ID, for current node. ++nodeID; + errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n"; // Get the function associated with the dataflow node Function *F = N->getFuncPointer(); @@ -1370,7 +1422,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { // Look up if we have visited this function before. If we have, then just // get the cloned function pointer from DFNode. Otherwise, create the cloned // function and add it to the DFNode GenFunc. - Function *F_wrapper_api = N->getGenFuncForTarget(hpvm::CPU_TARGET); + Function *F_wrapper_api = N->getGenFuncForTarget(hpvm::PROMISE_TARGET); assert((F_wrapper_api == NULL) && "Error: Visiting a node for which code already generated"); @@ -1385,7 +1437,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { F_wrapper_api->removeFromParent(); M.getFunctionList().push_back(F_wrapper_api); - N->addGenFunc(F_wrapper_api, hpvm::CPU_TARGET, true); + N->addGenFunc(F_wrapper_api, hpvm::PROMISE_TARGET, true); /* Removing HPVM in/out/inout function attributes */ for(Function::arg_iterator ai = F_wrapper_api->arg_begin(), ae = F_wrapper_api->arg_end(); @@ -1429,10 +1481,11 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { for (inst_iterator i = inst_begin(F_wrapper_api), e = inst_end(F_wrapper_api); i != e; ++i) { Instruction *I = &(*i); + errs() << "PRINT INST: " << *I << "\n"; CGM.transition(dyn_cast<IntrinsicInst>(I)); } - - errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n"; + errs() << "CLONED FUNCTION: " << *F_wrapper_api << "\n"; + // errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n"; //CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP); CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP); diff --git a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp index 2ddc77ff4bcbfa1bd03ab5fc8fba0d35202980f2..e0032eaaf4cb073a99912a5eef08f1433f942234 100644 --- a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp +++ b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp @@ -823,18 +823,19 @@ void FindFusionTargetsTraversal::codeGen(DFInternalNode *N) { void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { DEBUG(errs() << "Inside leaf node: " << N->getFuncPointer()->getName() << "\n"); - + errs() << "FUSE TARGETS AT LEAF NODE\n"; // Skip fusion check if it is a dummy node if(N->isDummyNode()) { DEBUG(errs() << "Skipping dummy node\n"); return; } - - - if(!preferredTargetIncludes(N, hpvm::CPU_TARGET)) { + errs() << "THIS IS NOT A DUMMY NODE\n"; + errs() << "INTRINSIC: " << *isValidHPVMTensorNode(N) << "\n"; + if(!preferredTargetIncludes(N, hpvm::PROMISE_TARGET)) { // Only fuse if we plan to target PROMISE/Layers API // The CUDNN backend would be able to generate calls for the fused node, // but not the other way around + errs() << "NO PROMISE HINT. SKIPPING NODE.\n"; DEBUG(errs() << "No PROMISE hint. Skipping node: " << N->getFuncPointer()->getName() << "\n"); return; @@ -857,23 +858,29 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { */ case Intrinsic::hpvm_tensor_convolution: - { // Found beginning of pattern conv-bias-activation-pooling. + { errs() << "INSTRUCTION: " << *II << "\n"; + + // Found beginning of pattern conv-bias-activation-pooling. // Look for the rest CurrentNodeSequence.push_back(N->getInstruction()); // Look for bias DFNode *SN = findNextNodeInSequence(N); if (!SN) { + errs() << "DID NOT FIND ADD IN NODE SEQUENCE\n"; return; // Did not find a node sequence starting at N. Simpy return. } if (getPreferredTarget(SN) != StartNodePreferredTarget) { + errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n"; return; // Node in sequence has different hint. Simpy return. } IntrinsicInst *SII = isValidHPVMTensorNode(SN); if (SII->getIntrinsicID() != Intrinsic::hpvm_tensor_add) { + errs() << "SUCCESSOR IS NOT A BIAS OPERATION\n"; // Successor is not the bias operation, thus does not fit the pattern. return; } + errs() << "SUCCESSOR IS A BIAS OPERATION\n"; // Otherwise, push this node to the current sequence CurrentNodeSequence.push_back(SN->getInstruction()); @@ -882,12 +889,15 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { // Continue with next node, looking for activation (relu, clipped relu, tanh) SN = findNextNodeInSequence(SN); if (!SN) { + errs() << "DID NOT FIND POOLING AND ACTIVATION NODE SEQUENCE\n"; // Did not find a node sequence starting at N.Use current sequence. break; } if (getPreferredTarget(SN) != StartNodePreferredTarget) { + errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n"; break; // Node in sequence has different hint. Use current sequence. } + errs() << "SUCCESSOR IS A ACTIVATION OR POOLING OPERATION\n"; SII = isValidHPVMTensorNode(SN); if ((SII->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu) || @@ -895,13 +905,15 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh)) { // Successor is activation. Push this node to the current sequence. CurrentNodeSequence.push_back(SN->getInstruction()); - + errs() << "SUCCESSOR IS AN ACTIVATION OPERATION\n"; // Will continue, looking for pooling in the next node SN = findNextNodeInSequence(SN); if (!SN) { + errs() << "DID NOT FIND POOLING NODE SEQUENCE\n"; break; // No node in sequence. Use currently found sequence. } if (getPreferredTarget(SN) != StartNodePreferredTarget) { + errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n"; break; // Node in sequence has different hint. Use current sequence. } SII = isValidHPVMTensorNode(SN); @@ -910,6 +922,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { if ((SII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) || (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_min) || (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean)) { + errs() << "SUCCESSOR IS A POOLING OPERATION\n"; // Successor is a pool operation. Use currently found sequence. CurrentNodeSequence.push_back(SN->getInstruction()); } @@ -921,16 +934,20 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { // Look for bias DFNode *SN = findNextNodeInSequence(N); if (!SN) { + errs() << "DID NOT FIND ADD IN NODE SEQUENCE\n"; return; // Did not find a node sequence starting at N. Simpy return. } if (getPreferredTarget(SN) != StartNodePreferredTarget) { + errs() << "HINT DO NOT MATCH IN NODE SEQUENCE\n"; return; // Node in sequence has different hint. Simpy return. } IntrinsicInst *SII = isValidHPVMTensorNode(SN); if (SII->getIntrinsicID() != Intrinsic::hpvm_tensor_add) { + errs() << "SUCCESSOR IS NOT IS BIAS OPERATION\n"; // Successor is not the bias operation, thus does not fit the pattern. return; } + errs() << "SUCCESSOR IS BIAS OPERATION\n"; // Otherwise, push this node to the current sequence CurrentNodeSequence.push_back(SN->getInstruction()); // This is a possible fuse target, gemm-add. @@ -946,6 +963,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { if ((SII->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu) || (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) || (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh)) { + errs() << "SUCCESSOR IS ACTIVATION OPERATION\n"; // We found activation in sequence. Push in vector as well. CurrentNodeSequence.push_back(SN->getInstruction()); } diff --git a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp index 00063d6dc85c2de2ec609fce29024da037d84f12..cac468a704990746806684d5bad0b3054ccfcc9d 100644 --- a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp +++ b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp @@ -381,7 +381,7 @@ bool GenHPVM::runOnModule(Module &M) { assert(isa<ConstantInt>(CI->getArgOperand(0)) && "Argument to hint must be constant integer!"); ConstantInt *hint = cast<ConstantInt>(CI->getArgOperand(0)); - + errs() << "HINT INSTRUCTION: " << *I << "\n"; hpvm::Target t = (hpvm::Target)hint->getZExtValue(); addHint(CI->getParent()->getParent(), t); DEBUG(errs() << "Found hpvm hint call: " << *CI << "\n");