From 49ae0d441b03cdc502b14e3b1e887bdef117289e Mon Sep 17 00:00:00 2001 From: Akash Kothari <akashk4@tyler.cs.illinois.edu> Date: Mon, 21 Dec 2020 12:12:45 -0600 Subject: [PATCH] Add LLVM-9-ported InPlaceDFG analysis pass --- hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp | 35 +++- hpvm/lib/Transforms/CMakeLists.txt | 1 + .../DFG2LLVM_WrapperAPI/CMakeLists.txt | 4 +- .../DFG2LLVM_WrapperAPI.cpp | 163 +++++++++--------- .../DFG2LLVM_WrapperAPI.exports | 2 + hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp | 97 ++++++++--- .../InPlaceDFG/InPlaceDFGAnalysis.cpp | 7 +- 7 files changed, 195 insertions(+), 114 deletions(-) diff --git a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp index 3177f86005..a8c807704a 100644 --- a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp +++ b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp @@ -53,6 +53,8 @@ bool BuildDFG::runOnModule(Module &M) { // Intrinsic Instruction has been initialized from this point on. Function *F = cast<Function>(II->getOperand(0)->stripPointerCasts()); Root = DFInternalNode::Create(II, F, hpvmUtils::getPreferredTarget(F)); + errs() << "INTRINSIC: " << II << "\n"; + errs() << "ROOT NODE" << Root << "\n"; Roots.push_back(Root); BuildGraph(Root, F); @@ -174,6 +176,9 @@ bool BuildDFG::isTypeCongruent(Type *L, Type *R) { // Handles all the createNodeXX hpvm intrinsics. void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) { + errs() << "************ HANDLE CREATE NODE *********\n"; + II->print(errs()); + errs() << "\n"; bool isInternalNode = false; Function *F = cast<Function>((II->getOperand(0))->stripPointerCasts()); @@ -206,6 +211,7 @@ void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) { // dataflow graph DFInternalNode *childDFNode = DFInternalNode::Create( II, F, hpvmUtils::getPreferredTarget(F), N, numOfDim, dimLimits); + errs() << "INTERNAL NODE: " << childDFNode << "\n"; N->addChildToDFGraph(childDFNode); HandleToDFNodeMap[II] = childDFNode; BuildGraph(childDFNode, F); @@ -213,18 +219,26 @@ void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) { // Create Leaf DFnode and add it to the map. DFLeafNode *childDFNode = DFLeafNode::Create( II, F, hpvmUtils::getPreferredTarget(F), N, numOfDim, dimLimits); + errs() << "LEAF NODE: " << childDFNode << "\n"; N->addChildToDFGraph(childDFNode); HandleToDFNodeMap[II] = childDFNode; } } void BuildDFG::handleCreateEdge(DFInternalNode *N, IntrinsicInst *II) { + errs() << "************ HANDLE CREATE EDGE *********\n"; + II->print(errs()); + errs() << "\n"; // The DFNode structures must be in the map before the edge is processed HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0)); assert(DFI != HandleToDFNodeMap.end()); DFI = HandleToDFNodeMap.find(II->getOperand(1)); assert(DFI != HandleToDFNodeMap.end()); - + + errs() << "NODE TO MAP OPERAND 0: " << II->getOperand(0) << "\n"; + errs() << "NODE TO MAP OPERAND 1: " << II->getOperand(1) << "\n"; + errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n"; + errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(1)] << "\n"; DFNode *SrcDF = HandleToDFNodeMap[II->getOperand(0)]; DFNode *DestDF = HandleToDFNodeMap[II->getOperand(1)]; @@ -258,16 +272,23 @@ void BuildDFG::handleCreateEdge(DFInternalNode *N, IntrinsicInst *II) { DestPosition, DestTy, isStreaming); HandleToDFEdgeMap[II] = newDFEdge; + errs() << "NEW EDGE: " << newDFEdge << "\n"; // Add Edge to the dataflow graph associated with the parent node N->addEdgeToDFGraph(newDFEdge); } void BuildDFG::handleBindInput(DFInternalNode *N, IntrinsicInst *II) { + errs() << "************ HANDLE BIND INPUT *********\n"; + II->print(errs()); + errs() << "\n"; // The DFNode structures must be in the map before the edge is processed HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0)); assert(DFI != HandleToDFNodeMap.end()); - + + errs() << "NODE TP MAP: " << II->getOperand(0) << "\n"; + errs() << "SRC NODE: " << N->getChildGraph()->getEntry() << "\n"; + errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n"; DFNode *SrcDF = N->getChildGraph()->getEntry(); DFNode *DestDF = HandleToDFNodeMap[II->getOperand(0)]; @@ -298,16 +319,23 @@ void BuildDFG::handleBindInput(DFInternalNode *N, IntrinsicInst *II) { DestPosition, DestTy, isStreaming); HandleToDFEdgeMap[II] = newDFEdge; + errs() << "NEW EDGE: " << newDFEdge << "\n"; // Add Edge to the dataflow graph associated with the parent node N->addEdgeToDFGraph(newDFEdge); } void BuildDFG::handleBindOutput(DFInternalNode *N, IntrinsicInst *II) { + errs() << "************ HANDLE BIND OUTPUT *********\n"; + II->print(errs()); + errs() << "\n"; // The DFNode structures must be in the map before the edge is processed HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0)); assert(DFI != HandleToDFNodeMap.end()); - + + errs() << "NODE TP MAP: " << II->getOperand(0) << "\n"; + errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n"; + errs() << "DEST NODE: " << N->getChildGraph()->getExit() << "\n"; DFNode *SrcDF = HandleToDFNodeMap[II->getOperand(0)]; DFNode *DestDF = N->getChildGraph()->getExit(); @@ -338,6 +366,7 @@ void BuildDFG::handleBindOutput(DFInternalNode *N, IntrinsicInst *II) { DestPosition, DestTy, isStreaming); HandleToDFEdgeMap[II] = newDFEdge; + errs() << "NEW EDGE: " << newDFEdge << "\n"; // Add Edge to the dataflow graph associated with the parent node N->addEdgeToDFGraph(newDFEdge); diff --git a/hpvm/lib/Transforms/CMakeLists.txt b/hpvm/lib/Transforms/CMakeLists.txt index 5421808ecd..af8c711d33 100644 --- a/hpvm/lib/Transforms/CMakeLists.txt +++ b/hpvm/lib/Transforms/CMakeLists.txt @@ -4,6 +4,7 @@ add_subdirectory(DFG2LLVM_OpenCL) add_subdirectory(DFG2LLVM_CPU) add_subdirectory(GenHPVM) add_subdirectory(LocalMem) +add_subdirectory(HPVM2NGRAPH) add_subdirectory(DFG2LLVM_WrapperAPI) add_subdirectory(ReplaceIntrinsics) add_subdirectory(DFG2LLVM_CUDNN) diff --git a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt index 22c219d0a1..d643e595da 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt +++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt @@ -2,7 +2,8 @@ if(WIN32 OR CYGWIN) set(LLVM_LINK_COMPONENTS Core Support) endif() -add_llvm_loadable_module( LLVMDFG2LLVM_WrapperAPI +add_llvm_library( LLVMDFG2LLVM_WrapperAPI + MODULE DFG2LLVM_WrapperAPI.cpp DEPENDS @@ -10,3 +11,4 @@ add_llvm_loadable_module( LLVMDFG2LLVM_WrapperAPI PLUGIN_TOOL opt ) + diff --git a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp index ecec258dfe..e8a3f59ffa 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp @@ -22,10 +22,11 @@ #include "llvm/Support/SourceMgr.h" #include "llvm/Support/FileSystem.h" #include "llvm/IR/Attributes.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm-c/Core.h" -#include "llvm/SupportVISC/VISCTimer.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" + +#include "SupportHPVM/DFG2LLVM.h" +#include "InPlaceDFG/InPlaceDFGAnalysis.h" #include <sstream> #include <fstream> @@ -280,7 +281,7 @@ public: void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_convolution: + case Intrinsic::hpvm_tensor_convolution: { Mch->addIntrinsicInst(II); Mch->addArgument(II->getOperand(0)); // conv input @@ -289,7 +290,7 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { Mch->setCurrent(new ConvolutionLayer_1()); } break; - case Intrinsic::visc_tensor_mul: + case Intrinsic::hpvm_tensor_mul: { Mch->addIntrinsicInst(II); Mch->addArgument(II->getOperand(0)); // 1st gemm input @@ -299,10 +300,10 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { } break; - case Intrinsic::visc_node_id: + case Intrinsic::hpvm_node_id: { - DEBUG(errs() << "\t: Handling __visc_node_id \n"); + DEBUG(errs() << "\t: Handling __hpvm_node_id \n"); // Get uint32 node ID Value *Op = II->getOperand(0); @@ -312,11 +313,11 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { Module *M = Mch->getModule(); Module *RtM = Mch->getRtModule(); - Constant* visc_node_id_call = + FunctionCallee hpvm_node_id_call = M->getOrInsertFunction(StringRef("tensor_set_node_id"), RtM->getFunction(StringRef("tensor_set_node_id"))->getFunctionType()); - CallInst::Create(visc_node_id_call, Args, "", II); + CallInst::Create(hpvm_node_id_call, Args, "", II); Mch->addIntrinsicToRemove(II); Mch->setCurrent(new InitialState()); @@ -346,7 +347,7 @@ void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_add: + case Intrinsic::hpvm_tensor_add: { IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0); assert((MulII == II->getOperand(0)) && @@ -372,7 +373,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_tanh: + case Intrinsic::hpvm_tensor_tanh: { // Type of activation : TanH Mch->addArgument(ConstantInt::get( @@ -383,7 +384,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, Mch->setCurrent(new FullyConnectedLayer_3()); } break; - case Intrinsic::visc_tensor_relu: + case Intrinsic::hpvm_tensor_relu: { // Type of activation : ReLU Mch->addArgument(ConstantInt::get( @@ -394,7 +395,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, Mch->setCurrent(new FullyConnectedLayer_3()); } break; - case Intrinsic::visc_tensor_clipped_relu: + case Intrinsic::hpvm_tensor_clipped_relu: { // Type of activation : Clipped ReLU Mch->addArgument(ConstantInt::get( @@ -441,7 +442,7 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_add: + case Intrinsic::hpvm_tensor_add: { IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); assert((ConvII == II->getOperand(0)) && @@ -504,7 +505,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_tanh: + case Intrinsic::hpvm_tensor_tanh: { // Type of activation : TanH // Mch->addArgument(ConstantInt::get( @@ -514,7 +515,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->setCurrent(new ConvolutionLayer_3()); } break; - case Intrinsic::visc_tensor_relu: + case Intrinsic::hpvm_tensor_relu: { // Type of activation : ReLU // Mch->addArgument(ConstantInt::get( @@ -524,7 +525,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->setCurrent(new ConvolutionLayer_3()); } break; - case Intrinsic::visc_tensor_clipped_relu: + case Intrinsic::hpvm_tensor_clipped_relu: { // Type of activation : Clipped ReLU // Mch->addArgument(ConstantInt::get( @@ -534,7 +535,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->setCurrent(new ConvolutionLayer_3()); } break; - case Intrinsic::visc_tensor_pool_max: + case Intrinsic::hpvm_tensor_pool_max: { // pool max Mch->addArgument(ConstantInt::get( @@ -552,7 +553,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->setCurrent(new ConvolutionLayer_4()); } break; - case Intrinsic::visc_tensor_pool_min: + case Intrinsic::hpvm_tensor_pool_min: { // pool min FIXME: 2: supported? Mch->addArgument(ConstantInt::get( @@ -570,7 +571,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, Mch->setCurrent(new ConvolutionLayer_4()); } break; - case Intrinsic::visc_tensor_pool_mean: + case Intrinsic::hpvm_tensor_pool_mean: { // pool mean Mch->addArgument(ConstantInt::get( @@ -614,7 +615,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_pool_max: + case Intrinsic::hpvm_tensor_pool_max: { // pool max Mch->addArgument(ConstantInt::get( @@ -630,13 +631,13 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { + if (ActIID == Intrinsic::hpvm_tensor_tanh) { Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { + } else if (ActIID == Intrinsic::hpvm_tensor_relu) { Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu + } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 2)); } @@ -644,7 +645,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, Mch->setCurrent(new ConvolutionLayer_4()); } break; - case Intrinsic::visc_tensor_pool_min: + case Intrinsic::hpvm_tensor_pool_min: { // pool min FIXME: 2: supported? Mch->addArgument(ConstantInt::get( @@ -661,13 +662,13 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { + if (ActIID == Intrinsic::hpvm_tensor_tanh) { Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { + } else if (ActIID == Intrinsic::hpvm_tensor_relu) { Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu + } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 2)); } @@ -675,7 +676,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, Mch->setCurrent(new ConvolutionLayer_4()); } break; - case Intrinsic::visc_tensor_pool_mean: + case Intrinsic::hpvm_tensor_pool_mean: { // pool max Mch->addArgument(ConstantInt::get( @@ -691,13 +692,13 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { + if (ActIID == Intrinsic::hpvm_tensor_tanh) { Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { + } else if (ActIID == Intrinsic::hpvm_tensor_relu) { Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu + } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 2)); } @@ -723,13 +724,13 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { + if (ActIID == Intrinsic::hpvm_tensor_tanh) { Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { + } else if (ActIID == Intrinsic::hpvm_tensor_relu) { Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu + } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 2)); } @@ -800,11 +801,10 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe switch (current->getStateID()) { case AbstractState::ID::CONVOLUTION_LAYER: { - Constant* wrapper_ConvLayer2 = + FunctionCallee wrapper_ConvLayer2 = M->getOrInsertFunction(StringRef("wrapper_ConvLayer2"), RtM->getFunction(StringRef("wrapper_ConvLayer2"))->getFunctionType()); - DEBUG(errs() << *wrapper_ConvLayer2); // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0 Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); @@ -835,10 +835,9 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe break; case AbstractState::ID::FULLY_CONNECTED_LAYER: { - Constant* wrapper_FCLayer = + FunctionCallee wrapper_FCLayer = M->getOrInsertFunction(StringRef("wrapper_FCLayer"), RtM->getFunction(StringRef("wrapper_FCLayer"))->getFunctionType()); - DEBUG(errs() << *wrapper_FCLayer); // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0 Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); @@ -888,7 +887,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe errs() << "TensorII: " << *TensorII << "\n"; switch (TensorII->getIntrinsicID()) { - case Intrinsic::visc_tensor_group_convolution: + case Intrinsic::hpvm_tensor_group_convolution: { /* llvm.hpvm.tensor.group.conv */ // Tensor group conv is not in place. DEBUG(errs() << F->getName() << "\t: Handling tensor group convolution \n"); @@ -922,7 +921,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe Args.push_back(TensorII->getOperand(7)); // Create wrapper API runtime function call - Constant* wrapper_tensorGroupConvolution = + FunctionCallee wrapper_tensorGroupConvolution = M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"), RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType()); CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution, @@ -932,7 +931,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe } break; - case Intrinsic::visc_tensor_batchnorm: + case Intrinsic::hpvm_tensor_batchnorm: { /* llvm.hpvm.tensor.batchnorm */ // Tensor batchnorm is not in place. @@ -963,7 +962,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe Args.push_back(TensorII->getOperand(5)); // Create wrapper API runtime function call - Constant* wrapper_tensorBatchNorm = + FunctionCallee wrapper_tensorBatchNorm = M->getOrInsertFunction(StringRef("wrapper_tensorBatchNorm"), RtM->getFunction(StringRef("wrapper_tensorBatchNorm"))->getFunctionType()); CallInst* CI = CallInst::Create(wrapper_tensorBatchNorm, @@ -973,7 +972,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe } break; - case Intrinsic::visc_tensor_add: + case Intrinsic::hpvm_tensor_add: { /* llvm.hpvm.tensor.add */ DEBUG(errs() << F->getName() << "\t: Handling tensorAdd\n"); @@ -1009,7 +1008,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe Args.push_back(TensorII->getOperand(1)); // Create wrapper API runtime function call - Constant* wrapper_tensorAdd = + FunctionCallee wrapper_tensorAdd = M->getOrInsertFunction(StringRef("wrapper_tensorAdd"), RtM->getFunction(StringRef("wrapper_tensorAdd"))->getFunctionType()); CallInst::Create(wrapper_tensorAdd, Args, "", TensorII); @@ -1019,9 +1018,9 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe } break; - case Intrinsic::visc_tensor_pool_max: - case Intrinsic::visc_tensor_pool_mean: - case Intrinsic::visc_tensor_pool_min: + case Intrinsic::hpvm_tensor_pool_max: + case Intrinsic::hpvm_tensor_pool_mean: + case Intrinsic::hpvm_tensor_pool_min: { DEBUG(errs() << F->getName() << "\t: Handling tensor pooling functions\n"); @@ -1046,13 +1045,13 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe Args.push_back(TensorII->getOperand(0)); int pool_type = 0; - if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_pool_max) { + if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) { pool_type = 0; } - if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_pool_mean) { + if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) { pool_type = 1; } - if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_pool_min) { + if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_min) { pool_type = 2; } @@ -1068,10 +1067,9 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe Args.push_back(TensorII->getOperand(6)); // Create wrapper API runtime function call - Constant* wrapper_tensorPooling = + FunctionCallee wrapper_tensorPooling = M->getOrInsertFunction(StringRef("wrapper_tensorPooling"), RtM->getFunction(StringRef("wrapper_tensorPooling"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorPooling); CallInst* CI = CallInst::Create(wrapper_tensorPooling, Args, "", TensorII); // Replacing intrinsic result uses with the result of the tensor runtime operation @@ -1079,9 +1077,9 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe } break; - case Intrinsic::visc_tensor_relu: - case Intrinsic::visc_tensor_clipped_relu: - case Intrinsic::visc_tensor_tanh: + case Intrinsic::hpvm_tensor_relu: + case Intrinsic::hpvm_tensor_clipped_relu: + case Intrinsic::hpvm_tensor_tanh: { DEBUG(errs() << F->getName() << "\t: Handling tensor activation functions\n"); @@ -1111,28 +1109,25 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe Args.push_back(TensorII->getOperand(0)); - if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_relu) { + if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) { // Create wrapper API runtime function call - Constant* wrapper_tensorRelu = + FunctionCallee wrapper_tensorRelu = M->getOrInsertFunction(StringRef("wrapper_tensorRelu"), RtM->getFunction(StringRef("wrapper_tensorRelu"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorRelu); CallInst::Create(wrapper_tensorRelu, Args, "", TensorII); } - else if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu) { + else if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu) { // Create wrapper API runtime function call - Constant* wrapper_tensorClippedRelu = + FunctionCallee wrapper_tensorClippedRelu = M->getOrInsertFunction(StringRef("wrapper_tensorClippedRelu"), RtM->getFunction(StringRef("wrapper_tensorClippedRelu"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorClippedRelu); CallInst::Create(wrapper_tensorClippedRelu, Args, "", TensorII); } - else if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_tanh) { + else if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) { // Create wrapper API runtime function call - Constant* wrapper_tensorTanh = + FunctionCallee wrapper_tensorTanh = M->getOrInsertFunction(StringRef("wrapper_tensorTanh"), RtM->getFunction(StringRef("wrapper_tensorTanh"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorTanh); CallInst::Create(wrapper_tensorTanh, Args, "", TensorII); } @@ -1143,8 +1138,8 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe } break; - case Intrinsic::visc_tensor_softmax: - { /* llvm.visc.tensor.softmax */ + case Intrinsic::hpvm_tensor_softmax: + { /* llvm.hpvm.tensor.softmax */ DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n"); // Tensor softmax(a) is in place for argument a. @@ -1167,10 +1162,9 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe Args.push_back(TensorII->getOperand(0)); // Create wrapper API runtime function call - Constant* wrapper_tensorSoftmax = + FunctionCallee wrapper_tensorSoftmax = M->getOrInsertFunction(StringRef("wrapper_tensorSoftmax"), RtM->getFunction(StringRef("wrapper_tensorSoftmax"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorSoftmax); CallInst::Create(wrapper_tensorSoftmax, Args, "", TensorII); // We can replace the call to hpvm.tensor.softmax with the 1st argument // that, due to in place operation, now contains the result @@ -1238,13 +1232,13 @@ private: InPlaceDFGAnalysis::InPlaceDFGParameter *IPP; - // VISC Runtime API and Tensor runtime API - Constant* llvm_hpvm_initApproxhpvmRt; - Constant* llvm_hpvm_cleanupApproxhpvmRt; - Constant* hpvm_request_tensor; + // HPVM Runtime API and Tensor runtime API + FunctionCallee llvm_hpvm_initApproxhpvmRt; + FunctionCallee llvm_hpvm_cleanupApproxhpvmRt; + FunctionCallee hpvm_request_tensor; - Constant* llvm_hpvm_initializeRuntimeController; - Constant* llvm_hpvm_clearRuntimeController; + FunctionCallee llvm_hpvm_initializeRuntimeController; + FunctionCallee llvm_hpvm_clearRuntimeController; // Functions @@ -1304,11 +1298,11 @@ void CGT_WrapperAPI::initRuntimeAPI() { DECLARE(llvm_hpvm_initializeRuntimeController); DECLARE(llvm_hpvm_clearRuntimeController); - // Find visc.init and visc.cleanup calls, and add placeholder methods + // Find hpvm.init and visc.cleanup calls, and add placeholder methods // for initialization and cleanup of the hpvm tensor runtime - Function* VI = M.getFunction("llvm.visc.init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n"); + Function* VI = M.getFunction("llvm.hpvm.init"); + assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n"); InitCall = cast<Instruction>(*VI->user_begin()); CallInst::Create(llvm_hpvm_initApproxhpvmRt, ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), @@ -1339,8 +1333,8 @@ void CGT_WrapperAPI::initRuntimeAPI() { ArrayRef<Value*> RTCInitArgs = {ConfsGEPConst, QRangesGEPConst}; CallInst::Create(llvm_hpvm_initializeRuntimeController, RTCInitArgs, "", InitCall); - Function* VC = M.getFunction("llvm.visc.cleanup"); - assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n"); + Function* VC = M.getFunction("llvm.hpvm.cleanup"); + assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n"); CleanupCall = cast<Instruction>(*VC->user_begin()); CallInst::Create(llvm_hpvm_cleanupApproxhpvmRt, ArrayRef<Value*>(), "", CleanupCall); CallInst::Create(llvm_hpvm_clearRuntimeController, ArrayRef<Value*>(), "", CleanupCall); @@ -1376,7 +1370,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { // Look up if we have visited this function before. If we have, then just // get the cloned function pointer from DFNode. Otherwise, create the cloned // function and add it to the DFNode GenFunc. - Function *F_wrapper_api = N->getGenFuncForTarget(visc::PROMISE_TARGET); + Function *F_wrapper_api = N->getGenFuncForTarget(hpvm::PROMISE_TARGET); assert((F_wrapper_api == NULL) && "Error: Visiting a node for which code already generated"); @@ -1391,7 +1385,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { F_wrapper_api->removeFromParent(); M.getFunctionList().push_back(F_wrapper_api); - N->addGenFunc(F_wrapper_api, visc::PROMISE_TARGET, true); + N->addGenFunc(F_wrapper_api, hpvm::PROMISE_TARGET, true); /* Removing HPVM in/out/inout function attributes */ for(Function::arg_iterator ai = F_wrapper_api->arg_begin(), ae = F_wrapper_api->arg_end(); @@ -1407,9 +1401,9 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { // Adding nounwind to generated function : FIXME: needed? DEBUG(errs() << "Adding nounwind to generated function\n"); - F_wrapper_api->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); + F_wrapper_api->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); - // Add llvm_visc_requestTensor calls for every pointer argument of the function + // Add llvm_hpvm_requestTensor calls for every pointer argument of the function // (they are all expected to be tensors), at the beginning of the function. // This is the first instruction of the function, insert them before this Instruction* FI = &*(F_wrapper_api->getEntryBlock().begin()); @@ -1530,3 +1524,4 @@ static RegisterPass<DFG2LLVM_WrapperAPI> X("dfg2llvm-wrapperapi", true /* transformation, * * not just analysis */); + diff --git a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports index e69de29bb2..139597f9cb 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports +++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports @@ -0,0 +1,2 @@ + + diff --git a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp index 6c3dcd75f6..2d62ffda0e 100644 --- a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp +++ b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp @@ -169,6 +169,21 @@ IS_HPVM_CALL(requestMemory) IS_HPVM_CALL(attributes) IS_HPVM_CALL(hint) +// Tensor Operators +IS_HPVM_CALL(tensor_mul) +IS_HPVM_CALL(tensor_convolution) +IS_HPVM_CALL(tensor_group_convolution) +IS_HPVM_CALL(tensor_batchnorm) +IS_HPVM_CALL(tensor_add) +IS_HPVM_CALL(tensor_pool_max) +IS_HPVM_CALL(tensor_pool_min) +IS_HPVM_CALL(tensor_pool_mean) +IS_HPVM_CALL(tensor_relu) +IS_HPVM_CALL(tensor_clipped_relu) +IS_HPVM_CALL(tensor_tanh) +IS_HPVM_CALL(tensor_sigmoid) +IS_HPVM_CALL(tensor_softmax) + // Return the constant integer represented by value V static unsigned getNumericValue(Value *V) { assert(isa<ConstantInt>(V) && @@ -264,32 +279,32 @@ bool GenHPVM::runOnModule(Module &M) { // Load Runtime API Module SMDiagnostic Err; - std::string runtimeAPI = std::string(LLVM_BUILD_DIR_STR) + - "/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc"; + //std::string runtimeAPI = std::string(LLVM_BUILD_DIR_STR) + + // "/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc"; - std::unique_ptr<Module> runtimeModule = - parseIRFile(runtimeAPI, Err, M.getContext()); + //std::unique_ptr<Module> runtimeModule = + // parseIRFile(runtimeAPI, Err, M.getContext()); - if (runtimeModule == NULL) { - DEBUG(errs() << Err.getMessage() << " " << runtimeAPI << "\n"); - assert(false && "couldn't parse runtime"); - } else - DEBUG(errs() << "Successfully loaded hpvm-rt API module\n"); + //if (runtimeModule == NULL) { + //DEBUG(errs() << Err.getMessage() << " " << runtimeAPI << "\n"); + // assert(false && "couldn't parse runtime"); + //} else + // DEBUG(errs() << "Successfully loaded hpvm-rt API module\n"); - llvm_hpvm_initializeTimerSet = M.getOrInsertFunction( - "llvm_hpvm_initializeTimerSet", - runtimeModule->getFunction("llvm_hpvm_initializeTimerSet") - ->getFunctionType()); + //llvm_hpvm_initializeTimerSet = M.getOrInsertFunction( + // "llvm_hpvm_initializeTimerSet", + // runtimeModule->getFunction("llvm_hpvm_initializeTimerSet") + // ->getFunctionType()); // DEBUG(errs() << *llvm_hpvm_initializeTimerSet); - llvm_hpvm_switchToTimer = M.getOrInsertFunction( - "llvm_hpvm_switchToTimer", - runtimeModule->getFunction("llvm_hpvm_switchToTimer")->getFunctionType()); + //llvm_hpvm_switchToTimer = M.getOrInsertFunction( + // "llvm_hpvm_switchToTimer", + // runtimeModule->getFunction("llvm_hpvm_switchToTimer")->getFunctionType()); // DEBUG(errs() << *llvm_hpvm_switchToTimer); - llvm_hpvm_printTimerSet = M.getOrInsertFunction( - "llvm_hpvm_printTimerSet", - runtimeModule->getFunction("llvm_hpvm_printTimerSet")->getFunctionType()); + //llvm_hpvm_printTimerSet = M.getOrInsertFunction( + // "llvm_hpvm_printTimerSet", + // runtimeModule->getFunction("llvm_hpvm_printTimerSet")->getFunctionType()); // DEBUG(errs() << *llvm_hpvm_printTimerSet); // Insert init context in main @@ -298,16 +313,16 @@ bool GenHPVM::runOnModule(Module &M) { assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once"); Instruction *I = cast<Instruction>(*VI->user_begin()); - DEBUG(errs() << "Initialize Timer Set\n"); - initializeTimerSet(I); - switchToTimer(hpvm_TimerID_NONE, I); + //DEBUG(errs() << "Initialize Timer Set\n"); + //initializeTimerSet(I); + //switchToTimer(hpvm_TimerID_NONE, I); // Insert print instruction at hpvm exit DEBUG(errs() << "Locate __hpvm__cleanup()\n"); Function *VC = M.getFunction("__hpvm__cleanup"); assert(VC->getNumUses() == 1 && "__hpvm__cleanup should only be used once"); I = cast<Instruction>(*VC->user_begin()); - printTimerSet(I); + //printTimerSet(I); DEBUG(errs() << "-------- Searching for launch sites ----------\n"); @@ -711,6 +726,42 @@ bool GenHPVM::runOnModule(Module &M) { if (isHPVMCall_cos(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::cos, &toBeErased); } + if (isHPVMCall_tensor_convolution(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_convolution, &toBeErased); + } + if (isHPVMCall_tensor_group_convolution(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_group_convolution, &toBeErased); + } + if (isHPVMCall_tensor_add(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_add, &toBeErased); + } + if (isHPVMCall_tensor_batchnorm(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_batchnorm, &toBeErased); + } + if (isHPVMCall_tensor_mul(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_mul, &toBeErased); + } + if (isHPVMCall_tensor_pool_max(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_pool_max, &toBeErased); + } + if (isHPVMCall_tensor_pool_min(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_pool_min, &toBeErased); + } + if (isHPVMCall_tensor_pool_mean(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_pool_mean, &toBeErased); + } + if (isHPVMCall_tensor_relu(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_relu, &toBeErased); + } + if (isHPVMCall_tensor_tanh(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_tanh, &toBeErased); + } + if (isHPVMCall_tensor_clipped_relu(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_clipped_relu, &toBeErased); + } + if (isHPVMCall_tensor_softmax(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_softmax, &toBeErased); + } } // Erase the __hpvm__node calls diff --git a/hpvm/lib/Transforms/InPlaceDFG/InPlaceDFGAnalysis.cpp b/hpvm/lib/Transforms/InPlaceDFG/InPlaceDFGAnalysis.cpp index a45e6e3645..db5a1f5fe0 100644 --- a/hpvm/lib/Transforms/InPlaceDFG/InPlaceDFGAnalysis.cpp +++ b/hpvm/lib/Transforms/InPlaceDFG/InPlaceDFGAnalysis.cpp @@ -17,8 +17,8 @@ #define DEBUG_TYPE "InPlaceDFGAnalysis" #include "llvm/Support/SourceMgr.h" -#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" -#include "llvm/SupportVISC/DFG2LLVM.h" +#include "InPlaceDFG/InPlaceDFGAnalysis.h" +#include "SupportHPVM/DFG2LLVM.h" using namespace llvm; using namespace builddfg; @@ -279,7 +279,7 @@ void AT_OCL::codeGen(DFLeafNode* N) { CallInst *CI = dyn_cast<CallInst>(OutValues[i]); assert(CI && "Expected return value to be the result of a call instruction\n"); - assert ((CI->getCalledFunction()->getName()).startswith("llvm.visc.tensor") && + assert ((CI->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor") && "Node output must be the result of an HPVM tensor intrinsic\n"); } } @@ -316,3 +316,4 @@ static RegisterPass<InPlaceDFGAnalysisWrapper> X("inplace", } // End of namespace + -- GitLab