From 91d350a2e573c2f62825ab3baef84a3fbc14e549 Mon Sep 17 00:00:00 2001 From: Akash Kothari <akashk4@tyler.cs.illinois.edu> Date: Tue, 8 Dec 2020 12:17:37 -0600 Subject: [PATCH] Add support for node ID intrinsic --- llvm/include/llvm/IR/IntrinsicsVISC.td | 3 + llvm/lib/Transforms/ClearDFG/ClearDFG.cpp | 23 +- .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp | 46 +++- .../DFG2LLVM_WrapperAPI.cpp | 198 ++++++++---------- .../FuseHPVMTensorNodes.cpp | 76 +++++-- llvm/lib/Transforms/GenVISC/GenVISC.cpp | 98 +-------- 6 files changed, 217 insertions(+), 227 deletions(-) diff --git a/llvm/include/llvm/IR/IntrinsicsVISC.td b/llvm/include/llvm/IR/IntrinsicsVISC.td index ab22372d80..404903648f 100644 --- a/llvm/include/llvm/IR/IntrinsicsVISC.td +++ b/llvm/include/llvm/IR/IntrinsicsVISC.td @@ -325,4 +325,7 @@ let TargetPrefix = "visc" in { llvm_i32_ty, llvm_i32_ty], []>; + def int_visc_node_id : Intrinsic<[llvm_ptr_ty], [llvm_i32_ty], []>; + + } diff --git a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp index 37f74325a7..84f9bec04f 100644 --- a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp +++ b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp @@ -59,6 +59,7 @@ private: //Functions void deleteNode(DFNode* N); + public: // Constructor TreeTraversal(Module &_M, BuildDFG &_DFG) : M(_M), DFG(_DFG) { } @@ -88,6 +89,7 @@ public: }; bool ClearDFG::runOnModule(Module &M) { + errs() << "\nCLEARDFG PASS\n"; // Get the BuildDFG Analysis Results: // - Dataflow graph @@ -118,6 +120,19 @@ bool ClearDFG::runOnModule(Module &M) { VC->replaceAllUsesWith(UndefValue::get(VC->getType())); VC->eraseFromParent(); + + Function* VN = M.getFunction("llvm.visc.node.id"); + if (VN != NULL){ // Delete visc.node.id intrinsic calls if they exist + for(Value::user_iterator ui = VN->user_begin(), ue = VN->user_end(); ui != ue; ui++) { + Instruction* I = dyn_cast<Instruction>(*ui); + I->eraseFromParent(); + } + + VN->replaceAllUsesWith(UndefValue::get(VN->getType())); + VN->eraseFromParent(); + } + + // Visitor for Code Generation Graph Traversal TreeTraversal *Visitor = new TreeTraversal(M, DFG); @@ -125,7 +140,9 @@ bool ClearDFG::runOnModule(Module &M) { for (auto rootNode: Roots) { Visitor->visit(rootNode); } + delete Visitor; + return true; } @@ -149,7 +166,7 @@ void TreeTraversal::deleteNode(DFNode* N) { char ClearDFG::ID = 0; static RegisterPass<ClearDFG> X("clearDFG", - "Delete all DFG functions for which code has been generated", - false /* does not modify the CFG */, - true /* transformation, not just analysis */); + "Delete all DFG functions for which code has been generated", + false /* does not modify the CFG */, + true /* transformation, not just analysis */); diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp index abc4e9ef89..f18325588c 100644 --- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp @@ -145,7 +145,7 @@ void CGT_CUDNN::initRuntimeAPI() { runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); if(runtimeModule == nullptr) DEBUG(errs() << Err.getMessage()); - else + else DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); // Get or insert Global declarations for @@ -158,7 +158,15 @@ void CGT_CUDNN::initRuntimeAPI() { // Find visc.init and visc.cleanup calls, and add placeholder methods // for initialization and cleanup of the hpvm tensor runtime - + /* + LLVMContext &C = M.getContext(); + auto *FuncType = FunctionType::get(Type::getVoidTy(C), ArrayRef<Type *>({Type::getInt32Ty(C)}), false); + llvm_hpvm_initTensorRt = M.getOrInsertFunction(StringRef("llvm_hpvm_initTensorRt"), FuncType); + FuncType = FunctionType::get(Type::getVoidTy(C), ArrayRef<Type *>({}), false); + llvm_hpvm_cleanupTensorRt = M.getOrInsertFunction(StringRef("llvm_hpvm_cleanupTensorRt"), FuncType); + FuncType = FunctionType::get(Type::getVoidTy(C), ArrayRef<Type *>({Type::getInt8PtrTy(C), Type::getInt32Ty(C)}), false); + hpvm_request_tensor = M.getOrInsertFunction(StringRef("hpvm_request_tensor"), FuncType); +*/ Function* VI = M.getFunction("llvm.visc.init"); assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n"); InitCall = cast<Instruction>(*VI->user_begin()); @@ -166,6 +174,7 @@ void CGT_CUDNN::initRuntimeAPI() { ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), "", InitCall); + Function* VC = M.getFunction("llvm.visc.cleanup"); assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n"); CleanupCall = cast<Instruction>(*VC->user_begin()); @@ -264,9 +273,13 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { if (BuildDFG::isViscIntrinsic(I)) { IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") - && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + //assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") + // && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + //if (!(II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")){ + //continue; // skip non-tensor ops + //} + /********************* Handle VISC Tensor intrinsics ********************/ switch (II->getIntrinsicID()) { @@ -427,7 +440,8 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { { /* llvm.visc.tensor.relu */ DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor_pool_max\n"); - // Argument list - tensorPooling(input, poolFunction, window_height, window_width, vertical_pad, horizontal_pad, + // Argument list - tensorPooling(input, poolFunction, window_height, + // window_width, vertical_pad, horizontal_pad, // vertical_stride, horizontal_stride); std::vector<Value*> Args; Args.push_back(II->getOperand(0)); @@ -540,6 +554,28 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { IItoRemove.push_back(II); } break; + + case Intrinsic::visc_node_id: + { /* llvm.visc.node.id */ + DEBUG(errs() << F_cudnn->getName() << "\t: Handling Node ID Intrinsic \n"); + // Get uint32 argument + Value *Op = II->getOperand(0); + + // Argument list for the runtime call + std::vector<Value*> Args; + Args.push_back(II->getOperand(0)); + + // Create hpvm-tensor-rt function call + Constant* tensor_set_node_id; + DECLARE(tensor_set_node_id); + CallInst::Create(tensor_set_node_id, Args, "", II); + + // Mark to remove at the end + IItoRemove.push_back(II); + } + break; + + default: llvm_unreachable("Unknown VISC Intrinsic!"); break; diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp index ac5fa450c0..ecec258dfe 100644 --- a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp @@ -73,6 +73,7 @@ private: std::vector<Value*> Args; std::vector<IntrinsicInst*> IIs; + std::vector<IntrinsicInst*> IIs_remove; // Intrinsics to remove AbstractState *current; public: @@ -88,6 +89,10 @@ public: return M; } + Module *getRtModule() { + return RtM; + } + void addArgument(Value *Arg) { Args.push_back(Arg); } @@ -96,6 +101,10 @@ public: IIs.push_back(II); } + void addIntrinsicToRemove(IntrinsicInst *II) { + IIs_remove.push_back(II); + } + IntrinsicInst *getIntrinsicInstAt(unsigned idx) { return IIs[idx]; } @@ -267,6 +276,7 @@ public: void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; }; + void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream switch (II->getIntrinsicID()) { @@ -288,6 +298,31 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { Mch->setCurrent(new FullyConnectedLayer_1()); } break; + + case Intrinsic::visc_node_id: + { + + DEBUG(errs() << "\t: Handling __visc_node_id \n"); + // Get uint32 node ID + Value *Op = II->getOperand(0); + + std::vector<Value*> Args; + Args.push_back(Op); + + Module *M = Mch->getModule(); + Module *RtM = Mch->getRtModule(); + + Constant* visc_node_id_call = + M->getOrInsertFunction(StringRef("tensor_set_node_id"), + RtM->getFunction(StringRef("tensor_set_node_id"))->getFunctionType()); + + CallInst::Create(visc_node_id_call, Args, "", II); + + Mch->addIntrinsicToRemove(II); + Mch->setCurrent(new InitialState()); + } + break; + default: // Other HPVM intrinsic { Mch->addIntrinsicInst(II); @@ -438,14 +473,15 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); // No pooling // 0 for unused pool arguments: @@ -470,9 +506,9 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, switch (II->getIntrinsicID()) { case Intrinsic::visc_tensor_tanh: { - // Type of activation : TanH -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Type of activation : TanH + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_3()); @@ -480,9 +516,9 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, break; case Intrinsic::visc_tensor_relu: { - // Type of activation : ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 1)); + // Type of activation : ReLU + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 1)); Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_3()); @@ -490,9 +526,9 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, break; case Intrinsic::visc_tensor_clipped_relu: { - // Type of activation : Clipped ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + // Type of activation : Clipped ReLU + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 2)); Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_3()); @@ -613,6 +649,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, // pool min FIXME: 2: supported? Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + // pool_size_v, pool_size_h, pool pad_v, // pool_pad_h, pool_stride_v, pool_stride_h for (int i = 1; i < 7; i++) { @@ -737,10 +774,11 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe assert( ( (current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER) || (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION) ) && - "Unsupported instruction sequence for the Wrapper API.\n" ); + "Unsupported instruction sequence for the Wrapper API.\n" ); if ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)) { + // Layer Operation. DEBUG(errs() << "Layer Instruction Sequence. Validating ...\n"); // We have a valid instruction sequence. @@ -765,6 +803,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe Constant* wrapper_ConvLayer2 = M->getOrInsertFunction(StringRef("wrapper_ConvLayer2"), RtM->getFunction(StringRef("wrapper_ConvLayer2"))->getFunctionType()); + DEBUG(errs() << *wrapper_ConvLayer2); // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0 @@ -839,12 +878,14 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe CI->insertBefore(IIlast); IIlast->replaceAllUsesWith(CI); - } else { // SINGLE_TENSOR_OPERATION + } + else { // SINGLE_TENSOR_OPERATION assert((IIs.size() == 1) && "Unexpected size of intrinsics vector in code gen state machine.\n"); assert(Args.empty() && "Unexpected arguments found in coge gen state machine.\n"); IntrinsicInst *TensorII = IIs[0]; -errs() << "TensorII: " << *TensorII << "\n"; + + errs() << "TensorII: " << *TensorII << "\n"; switch (TensorII->getIntrinsicID()) { case Intrinsic::visc_tensor_group_convolution: @@ -893,8 +934,9 @@ errs() << "TensorII: " << *TensorII << "\n"; case Intrinsic::visc_tensor_batchnorm: { /* llvm.hpvm.tensor.batchnorm */ + // Tensor batchnorm is not in place. - // FIXME: Add Check for InPlace Analysis + // FIXME: Add Check for InPlace Analysis DEBUG(errs() << F->getName() << "\t: Handling tensor batch normalization \n"); // Argument list for the runtime call @@ -933,18 +975,18 @@ errs() << "TensorII: " << *TensorII << "\n"; case Intrinsic::visc_tensor_add: { /* llvm.hpvm.tensor.add */ - DEBUG(errs() << F->getName() << "\t: Handling tensor add\n"); - // Tensor add(a,b) is in place for argument a. -// Value *Op = TensorII->getOperand(0); + DEBUG(errs() << F->getName() << "\t: Handling tensorAdd\n"); + // Tensor add(a,b) is in place for argument a. + // Value *Op = TensorII->getOperand(0); // Test the intrinsic operand for in place operation. -// bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); + // bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); // Code generation will not continue if this is false, because the target // may provide an in place operation(safe choice) // FIXME: remove this comment - must check for in-place -// assert(inplace && -// "Operand not valid for in place operation. Code gen aborted.\n"); + // assert(inplace && + // "Operand not valid for in place operation. Code gen aborted.\n"); // Argument list for the runtime call @@ -1047,13 +1089,11 @@ errs() << "TensorII: " << *TensorII << "\n"; Value *Op = TensorII->getOperand(0); // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); + //-- bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); // Code generation will not continue if this is false, because the target // may provide an in place operation(safe choice) - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call + //-- assert(inplace && + //-- "Operand not valid for in place operation. Code gen aborted.\n"); // Create string for node name, as first argument for wrapper API call Constant *ConstArray = ConstantDataArray::getString(M->getContext(), @@ -1110,15 +1150,6 @@ errs() << "TensorII: " << *TensorII << "\n"; // Tensor softmax(a) is in place for argument a. Value *Op = TensorII->getOperand(0); - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); - // Code generation will not continue if this is false, because the target - // may provide an in place operation(safe choice) - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call - // Create string for node name, as first argument for wrapper API call Constant *ConstArray = ConstantDataArray::getString(M->getContext(), strRef, true); @@ -1146,62 +1177,8 @@ errs() << "TensorII: " << *TensorII << "\n"; TensorII->replaceAllUsesWith(TensorII->getOperand(0)); } break; -/* - case Intrinsic::visc_image_fft_transform: - { // llvm.hpvm.image.fft.transform - Or another image intrinsic - // All will be treated as not in place - DEBUG(errs() << F->getName() << "\t: Handling fft transform \n"); - - // Create argument list for the runtime call - stored in Args - // All interfaces will have a string as first argument, which will be - // used to identify the dataflow node at runtime - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - // Here, use you will access the appropriate arruments of the intrinsic - // and push_back, in order to create the argument list of runtime call - Args.push_back(TensorII->getOperand(0)); - Args.push_back(TensorII->getOperand(1)); - Args.push_back(TensorII->getOperand(2)); - Args.push_back(TensorII->getOperand(3)); - Args.push_back(TensorII->getOperand(4)); - Args.push_back(TensorII->getOperand(5)); - - Constant *conv_mode = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); - Args.push_back(conv_mode); - - Args.push_back(TensorII->getOperand(7)); - - // Done with argument list. - - // Create wrapper API runtime function call - // Appropriately set the name of the function of the runtime that you - // want to call - // Note: the Constant * is what we need to pass to the callInst. - // This name does not have to match, but does so for similarity. - Constant* wrapper_tensorGroupConvolution; - M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"), - RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType()); - CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution, - Args, "", TensorII); - // We can replace the call to hpvm.tensor.xxx with the runtime call - TensorII->replaceAllUsesWith(CI); - } - break; - -*/ + default: llvm_unreachable("Unknown VISC Intrinsic!"); break; @@ -1219,6 +1196,13 @@ errs() << "TensorII: " << *TensorII << "\n"; (*ri)->eraseFromParent(); } + + for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs_remove.rbegin(), + re = IIs_remove.rend(); ri != re; ++ri) { + DEBUG(errs() << "Erasing: " << **ri << "\n"); + (*ri)->eraseFromParent(); + } + } // DFG2LLVM_WrapperAPI - The first implementation. @@ -1226,6 +1210,8 @@ errs() << "TensorII: " << *TensorII << "\n"; struct DFG2LLVM_WrapperAPI : public DFG2LLVM { static char ID; // Pass identification, replacement for typeid DFG2LLVM_WrapperAPI() : DFG2LLVM(ID) {} + + private: public: @@ -1380,20 +1366,13 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { return; } -// For wrapper API, we generate code for every leaf node. -// No need to check for hints from frontend -// // Generate code only if it has the right hint -// if (!checkPreferredTarget(N, visc::PROMISE_TARGET)) { -// errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; -// return; -// } // Increment the node ID, for current node. ++nodeID; // Get the function associated with the dataflow node Function *F = N->getFuncPointer(); -errs() << "Node Function: " << *F << "\n"; + errs() << "Node Function: " << *F << "\n"; // Look up if we have visited this function before. If we have, then just // get the cloned function pointer from DFNode. Otherwise, create the cloned // function and add it to the DFNode GenFunc. @@ -1405,6 +1384,8 @@ errs() << "Node Function: " << *F << "\n"; // Clone the function ValueToValueMapTy VMap; std::string FName(F->getName().data());//Twine FName = F->getName(); + + F_wrapper_api = CloneFunction(F, VMap); F_wrapper_api->setName(FName+"_wrapper_api"); F_wrapper_api->removeFromParent(); @@ -1461,15 +1442,12 @@ errs() << "Node Function: " << *F << "\n"; //CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP); CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP); -//errs() << "-----------------------------------\n"; -//errs() << *F_wrapper_api << "\n"; - return; } bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_WrapperAPI PASS\n"; + errs() << "\nDFG2LLVM_WrapperAPI PASS\n"; // Get the BuildDFG Analysis Results: // - Dataflow graph BuildDFG &DFG = getAnalysis<BuildDFG>(); @@ -1477,9 +1455,8 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) { // Get the In Place Analysis Results InPlaceDFGAnalysis::InPlaceDFGParameter IPP = (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); - // Print results -// printInPlaceDFGParameter(IPP); + std::vector<DFInternalNode*> Roots = DFG.getRoots(); // Visitor for Code Generation Graph Traversal @@ -1496,6 +1473,7 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) { //TODO: Edit module epilogue to remove the VISC intrinsic declarations delete CGTVisitor; + return true; } diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp index d9a3c588b5..541efe4e1d 100644 --- a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp +++ b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp @@ -78,17 +78,35 @@ static bool isIncomingEdgeArgument(unsigned argno, return false; } + // Check that this is a valid HPVM Tensor Node (starts with an HPVM intrinsic) // Return the node intrinsic function static IntrinsicInst *isValidHPVMTensorNode(DFNode *N) { + Function *F = N->getFuncPointer(); - IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*(inst_begin(F))); - assert(II && - "HPVM tensor intrinsic expected as first instruction of HPVM tensor node\n"); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") && - "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + //IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*(inst_begin(F))); + + IntrinsicInst *II; + for (auto I = inst_begin(F), E = inst_end(F); I != E; I++){ + + if(dyn_cast<IntrinsicInst>(&*I)){ + II = dyn_cast<IntrinsicInst>(&*I); + if ((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")){ + errs()<<"** Tensor Intrinsic = " << *II << "\n"; + } + + } + } + + //assert(II && + // "HPVM tensor intrinsic expected as first instruction of HPVM tensor node\n"); + + //assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") && + // "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + return II; } + // Returns the next node in a node sequence, or NULL if it does not exist. // We consider two nodes a sequence if SrcN has a single successor, DstN, @@ -340,9 +358,9 @@ Function* FuseHPVMTensorNodes::createEmptyDFNodeFunction(IntrinsicInst* II1, the body of the fused function instead * * OutVs: This maps the output struct field index to the stored value */ void FuseHPVMTensorNodes::inlineFirstNodeFunction(Module &M, Function *F1, - Function *Ffused, - ValueMap<Value*, Value*> &VMap, - std::vector<Value*> &OutVs) { + Function *Ffused, + ValueMap<Value*, Value*> &VMap, + std::vector<Value*> &OutVs) { ReturnInst *RI = cast<ReturnInst>(Ffused->getEntryBlock().getTerminator()); @@ -356,8 +374,9 @@ void FuseHPVMTensorNodes::inlineFirstNodeFunction(Module &M, Function *F1, } IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") - && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + assert ( ((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") + || (II->getCalledFunction()->getName()).startswith("llvm.visc.node.id") ) + && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); std::vector<Value*> Args; for(unsigned i = 0; i < II->getNumArgOperands(); i++) { @@ -370,6 +389,7 @@ void FuseHPVMTensorNodes::inlineFirstNodeFunction(Module &M, Function *F1, Args.push_back(VMap[V]); } } + Function *F = Intrinsic::getDeclaration(&M, II->getIntrinsicID()); CallInst* CI = CallInst::Create(F, Args, @@ -409,9 +429,14 @@ void FuseHPVMTensorNodes::inlineSecondNodeFunction(Module &M, Function *F2, Instruction *I = &(*f2_i); if ((BuildDFG::isViscIntrinsic(I))) { IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") + assert( ((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") + || (II->getCalledFunction()->getName()).startswith("llvm.visc.node.id")) && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + if ( (II->getCalledFunction()->getName()).startswith("llvm.visc.node.id")) { + continue; // Skip adding visc.node.id calls in nodes other than first node + } + std::vector<Value*> Args; for(unsigned i = 0; i < II->getNumArgOperands(); i++) { Value *V = II->getArgOperand(i); @@ -506,10 +531,11 @@ Function* FuseHPVMTensorNodes::createLeafDFNodeFunction(IntrinsicInst* II1, ++fused_arg_it; } -// for(const auto& v: FusedValueMap) { -// errs() << "key = " << *(v.first) << "\t"; -// errs() << "value = " << *(v.second) << "\n"; -// } + + // for(const auto& v: FusedValueMap) { + // errs() << "key = " << *(v.first) << "\t"; + // errs() << "value = " << *(v.second) << "\n"; + // } // Invoke function that inlines F1 into Ffused, using and updating mappings inlineFirstNodeFunction(M, F1, Ffused, FusedValueMap, OutValues); @@ -670,6 +696,7 @@ void FuseHPVMTensorNodes::updateParentNodeFunction(IntrinsicInst* II1, DEBUG(errs() << "Erasing: " << **ib << "\n"); (*ib)->eraseFromParent(); } + II2->replaceAllUsesWith(IInew); II2->eraseFromParent(); @@ -792,6 +819,7 @@ void FindFusionTargetsTraversal::codeGen(DFInternalNode *N) { return; } + void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { DEBUG(errs() << "Inside leaf node: " << N->getFuncPointer()->getName() << "\n"); @@ -802,9 +830,9 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { return; } -// if(N->getTargetHint() != visc::PROMISE_TARGET) { + if(!preferredTargetIncludes(N, visc::PROMISE_TARGET)) { - // Only fuse if we plan to target PROMISE + // Only fuse if we plan to target PROMISE/Layers API // The CUDNN backend would be able to generate calls for the fused node, // but not the other way around DEBUG(errs() << "No PROMISE hint. Skipping node: " @@ -820,6 +848,14 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { std::vector<IntrinsicInst*> CurrentNodeSequence; switch(II->getIntrinsicID()) { + + /*case Intrinsic::visc_node_id: + { // Found beginning of pattern conv-bias-activation-pooling. + + } + break; + */ + case Intrinsic::visc_tensor_convolution: { // Found beginning of pattern conv-bias-activation-pooling. // Look for the rest @@ -931,9 +967,9 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { } bool FuseHPVMTensorNodesWrapper::runOnModule(Module &M) { - errs() << "\nFUSE HPVM TENSOR NODES PASS\n"; -// Get the BuildDFG Analysis Results: + errs() << "\nFUSE HPVM TENSOR NODES PASS\n"; + // Get the BuildDFG Analysis Results: // - Dataflow graph BuildDFG &DFG = getAnalysis<BuildDFG>(); @@ -952,7 +988,7 @@ bool FuseHPVMTensorNodesWrapper::runOnModule(Module &M) { FuseHPVMTensorNodes::FusionTargets &FTs = FTTVisitor->getFusionTargets(); FuseHPVMTensorNodes Fuse; -// Fuse.printFusionTargets(FTs); + // Fuse.printFusionTargets(FTs); Fuse.run(M, FTs); diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp index faab312087..a4d9f2c2a4 100644 --- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp +++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp @@ -27,21 +27,14 @@ #include "llvm/SupportVISC/VISCUtils.h" -#define TIMER(X) do { if (VISCTimer) { X; } } while (0) - using namespace llvm; using namespace viscUtils; -// VISC Command line option to use timer or not -static cl::opt<bool> -VISCTimer("visc-timers-gen", cl::desc("Enable GenVISC timer")); - namespace genvisc { // Helper Functions -static inline ConstantInt* getTimerID(Module&, enum visc_TimerID); static Function* transformReturnTypeToStruct(Function* F); static Type* getReturnTypeFromReturnInst(Function* F); @@ -178,6 +171,9 @@ IS_VISC_CALL(tensor_tanh) IS_VISC_CALL(tensor_sigmoid) IS_VISC_CALL(tensor_softmax) +IS_VISC_CALL(node_id) + + // Return the constant integer represented by value V static unsigned getNumericValue(Value* V) { assert(isa<ConstantInt>(V) @@ -816,53 +812,17 @@ bool GenVISC::runOnModule(Module &M) { // Load Runtime API Module SMDiagnostic Err; - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && - "Define LLVM_SRC_ROOT environment variable!"); - - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/../build/projects/visc-rt/visc-rt.ll"; - errs() << llvmSrcRoot << "\n"; - - std::unique_ptr<Module> runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - // Module* runtimeModule = - // ParseIRFile("/home/psrivas2/current-src/projects/visc-rt/visc-rt.ll", - // Err, M.getContext()); - - if(runtimeModule == NULL) - DEBUG(errs() << Err.getMessage()); - else - DEBUG(errs() << "Successfully loaded visc-rt API module\n"); - - llvm_visc_initializeTimerSet = M.getOrInsertFunction("llvm_visc_initializeTimerSet", - runtimeModule->getFunction("llvm_visc_initializeTimerSet")->getFunctionType()); - DEBUG(errs() << *llvm_visc_initializeTimerSet); - - llvm_visc_switchToTimer = M.getOrInsertFunction("llvm_visc_switchToTimer", - runtimeModule->getFunction("llvm_visc_switchToTimer")->getFunctionType()); - DEBUG(errs() << *llvm_visc_switchToTimer); - - llvm_visc_printTimerSet = M.getOrInsertFunction("llvm_visc_printTimerSet", - runtimeModule->getFunction("llvm_visc_printTimerSet")->getFunctionType()); - DEBUG(errs() << *llvm_visc_printTimerSet); - // Insert init context in main DEBUG(errs() << "Locate __visc__init()\n"); Function* VI = M.getFunction("__visc__init"); assert(VI->getNumUses() == 1 && "__visc__init should only be used once"); Instruction* I = cast<Instruction>(*VI->user_begin()); - DEBUG(errs() << "Initialize Timer Set\n"); - initializeTimerSet(I); - switchToTimer(visc_TimerID_NONE, I); - // Insert print instruction at visc exit DEBUG(errs() << "Locate __visc__cleanup()\n"); Function* VC = M.getFunction("__visc__cleanup"); assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once"); I = cast<Instruction>(*VC->user_begin()); - printTimerSet(I); - DEBUG(errs() << "-------- Searching for launch sites ----------\n"); @@ -1308,6 +1268,12 @@ bool GenVISC::runOnModule(Module &M) { if (isVISCCall_tensor_softmax(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_softmax, &toBeErased); } + + // New Intrinsic to set Node ID + if (isVISCCall_node_id(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::visc_node_id, &toBeErased); + } + } // Erase the __visc__node calls @@ -1530,7 +1496,6 @@ void GenVISC::genHost(CallInst* CI, Function* KernelF, unsigned levels, unsigned StructType* ArgStructTy = StructType::create(ArgList, "struct.arg", true); DEBUG(errs() << *ArgStructTy << "\n"); - switchToTimer(visc_TimerID_ARG_PACK, CI); // Insert alloca inst for this argument struct type AllocaInst* AI = new AllocaInst(ArgStructTy, "in.addr", CI); @@ -1544,8 +1509,6 @@ void GenVISC::genHost(CallInst* CI, Function* KernelF, unsigned levels, unsigned "args", CI); - switchToTimer(visc_TimerID_NONE, CI); - // Bitcast Root function to i8* Constant* Root_i8ptr = ConstantExpr::getPointerCast(Root, Type::getInt8PtrTy(Ctx)); // Replace CI with launch call to a Root function @@ -1575,49 +1538,6 @@ void GenVISC::genHost(CallInst* CI, Function* KernelF, unsigned levels, unsigned // Get result (optional) } -void GenVISC::initializeTimerSet(Instruction* InsertBefore) { - Value* TimerSetAddr; - StoreInst* SI; - TIMER(TimerSet = new GlobalVariable(*M, - Type::getInt8PtrTy(M->getContext()), - false, - GlobalValue::CommonLinkage, - Constant::getNullValue(Type::getInt8PtrTy(M->getContext())), - "viscTimerSet_GenVISC")); - DEBUG(errs() << "Inserting GV: " << *TimerSet->getType() << *TimerSet << "\n"); - DEBUG(errs() << "Inserting call to: " << *llvm_visc_initializeTimerSet << "\n"); - - TIMER(TimerSetAddr = CallInst::Create(llvm_visc_initializeTimerSet, - None, - "", - InsertBefore)); - DEBUG(errs() << "TimerSetAddress = " << *TimerSetAddr << "\n"); - TIMER(SI = new StoreInst(TimerSetAddr, TimerSet, InsertBefore)); - DEBUG(errs() << "Store Timer Address in Global variable: " << *SI << "\n"); -} - -void GenVISC::switchToTimer(enum visc_TimerID timer, Instruction* InsertBefore) { - Value* switchArgs[] = {TimerSet, getTimerID(*M, timer)}; - TIMER(CallInst::Create(llvm_visc_switchToTimer, - ArrayRef<Value*>(switchArgs, 2), - "", - InsertBefore)); -} - -void GenVISC::printTimerSet(Instruction* InsertBefore) { - Value* TimerName; - TIMER(TimerName = getStringPointer("GenVISC_Timer", InsertBefore)); - Value* printArgs[] = {TimerSet, TimerName}; - TIMER(CallInst::Create(llvm_visc_printTimerSet, - ArrayRef<Value*>(printArgs, 2), - "", - InsertBefore)); -} - -static inline ConstantInt* getTimerID(Module& M, enum visc_TimerID timer) { - return ConstantInt::get(Type::getInt32Ty(M.getContext()), timer); -} - static Function* transformReturnTypeToStruct(Function* F) { // Currently only works for void return types DEBUG(errs() << "Transforming return type of function to Struct: " << F->getName() << "\n"); -- GitLab