From 41411aad55677796c566b12990be7608bec6d2d0 Mon Sep 17 00:00:00 2001 From: Yifan Zhao <yifanz16@illinois.edu> Date: Wed, 13 Jan 2021 05:55:07 -0600 Subject: [PATCH] Use config file to get path to tensor_runtime.ll (started in 750ab0620) --- .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp | 344 ++-- .../DFG2LLVM_WrapperAPI.cpp | 1671 ++++++++--------- .../ReplaceIntrinsics/ReplaceIntrinsics.cpp | 244 ++- 3 files changed, 1075 insertions(+), 1184 deletions(-) diff --git a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp index 8b2570fdad..bd26a92fd3 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp @@ -27,6 +27,7 @@ #include "SupportHPVM/DFG2LLVM.h" #include "InPlaceDFG/InPlaceDFGAnalysis.h" +#include "Config.h" #include <sstream> @@ -44,10 +45,9 @@ namespace { struct DFG2LLVM_CUDNN : public DFG2LLVM { static char ID; // Pass identification, replacement for typeid DFG2LLVM_CUDNN() : DFG2LLVM(ID) {} -private: +private: public: - void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<BuildDFG>(); AU.addRequired<InPlaceDFGAnalysisWrapper>(); @@ -62,7 +62,7 @@ public: class CGT_CUDNN : public CodeGenTraversal { private: - //Member variables + // Member variables InPlaceDFGAnalysis::InPlaceDFGParameter *IPP; // VISC Runtime API and Tensor runtime API @@ -73,32 +73,28 @@ private: // Functions bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N); - - // Virtual Functions void init(); void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); + void codeGen(DFInternalNode *N); + void codeGen(DFLeafNode *N); public: - // Constructor - CGT_CUDNN(Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP) - : CodeGenTraversal(_M, _DFG), IPP(&_IPP) { + CGT_CUDNN(Module &_M, BuildDFG &_DFG, + InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP) + : CodeGenTraversal(_M, _DFG), IPP(&_IPP) { initRuntimeAPI(); } - }; -bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op, - Function *Fgen, +bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N) { if (Argument *Arg = dyn_cast<Argument>(Op)) { DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n"); assert((Arg->getParent() == Fgen) && - "Extra Parameter in body of Function\n"); + "Extra Parameter in body of Function\n"); // Candidae parameter is a function argument // In this case, consult the result of in place analysis // Find position in arg list @@ -112,11 +108,10 @@ bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op, DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n"); return false; } - } - else { + } else { // If it is not an argument, then it needs to be the result of // another intrinsic. These are new objects that are allocated, - // and consumed by next intrinsic. + // and consumed by next intrinsic. DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n"); if (dyn_cast<IntrinsicInst>(Op)) { DEBUG(errs() << *Arg << "\t: local, suitable for in place\n"); @@ -128,24 +123,15 @@ bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op, } } - -void CGT_CUDNN::init() { -} +void CGT_CUDNN::init() {} // Initialize the VISC runtime API. This makes it easier to insert these calls void CGT_CUDNN::initRuntimeAPI() { // Load Runtime API Module SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n"); - - // FIXME: set correct path - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/tools/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; - runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == nullptr) + runtimeModule = parseIRFile(TENSOR_RT_LL, Err, M.getContext()); + if (runtimeModule == nullptr) DEBUG(errs() << Err.getMessage()); else DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); @@ -161,59 +147,60 @@ void CGT_CUDNN::initRuntimeAPI() { // Find hpvm.init and visc.cleanup calls, and add placeholder methods // for initialization and cleanup of the hpvm tensor runtime - Function* VI = M.getFunction("llvm.hpvm.init"); + Function *VI = M.getFunction("llvm.hpvm.init"); assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n"); InitCall = cast<Instruction>(*VI->user_begin()); - CallInst::Create(llvm_hpvm_initTensorRt, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), - "", InitCall); + CallInst::Create( + llvm_hpvm_initTensorRt, + ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), + "", InitCall); - Function* VC = M.getFunction("llvm.hpvm.cleanup"); + Function *VC = M.getFunction("llvm.hpvm.cleanup"); assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n"); CleanupCall = cast<Instruction>(*VC->user_begin()); - CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall); - + CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value *>(), "", + CleanupCall); } -void CGT_CUDNN::codeGen(DFInternalNode* N) { - errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - errs () << "Skipping internal node\n"; +void CGT_CUDNN::codeGen(DFInternalNode *N) { + errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n"; + errs() << "Skipping internal node\n"; } - -void CGT_CUDNN::codeGen(DFLeafNode* N) { +void CGT_CUDNN::codeGen(DFLeafNode *N) { // Skip code generation if it is a dummy node - if(N->isDummyNode()) { + if (N->isDummyNode()) { DEBUG(errs() << "Skipping dummy node\n"); return; } // Abort code generation if it is an allocation node - if(N->isAllocationNode()) { + if (N->isAllocationNode()) { assert(false && "Allocation Node not expected in ApproxHPVM"); return; } // Generate code only if it has the right hint if (!checkPreferredTarget(N, hpvm::CUDNN_TARGET)) { - errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; + errs() << "Skipping node: " << N->getFuncPointer()->getName() << "\n"; return; } // Get the function associated with the dataflow node Function *F = N->getFuncPointer(); - errs()<<"function name = "<< F->getName()<<"\n"; + errs() << "function name = " << F->getName() << "\n"; /* Removing HPVM in/out/inout function attributes */ - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ai++){ + for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; + ai++) { Argument *Arg = &*ai; - if(Arg->hasAttribute(Attribute::In)) + if (Arg->hasAttribute(Attribute::In)) Arg->removeAttr(Attribute::In); - if(Arg->hasAttribute(Attribute::Out)) + if (Arg->hasAttribute(Attribute::Out)) Arg->removeAttr(Attribute::Out); - if(Arg->hasAttribute(Attribute::InOut)) - Arg->removeAttr(Attribute::InOut); + if (Arg->hasAttribute(Attribute::InOut)) + Arg->removeAttr(Attribute::InOut); } // Look up if we have visited this function before. If we have, then just @@ -223,14 +210,14 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { assert((F_cudnn == NULL) && "Error: Visiting a node for which code already generated"); - + // Clone the function ValueToValueMapTy VMap; std::string FName(F->getName().data()); F_cudnn = CloneFunction(F, VMap); F_cudnn->setName(FName + "_cudnn"); - errs()<<"Cloned function name2 = "<<F_cudnn->getName()<<"\n"; - F_cudnn->removeFromParent(); + errs() << "Cloned function name2 = " << F_cudnn->getName() << "\n"; + F_cudnn->removeFromParent(); M.getFunctionList().push_back(F_cudnn); N->addGenFunc(F_cudnn, hpvm::CUDNN_TARGET, true); @@ -239,165 +226,161 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { DEBUG(errs() << "Adding nounwind to generated function\n"); F_cudnn->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); - // Add llvm_hpvm_requestTensor calls for every pointer argument of the function - // (they are all expected to be tensors), at the beginning of the function. - // This is the first instruction of the function, insert them before this - Instruction* FI = &*(F_cudnn->getEntryBlock().begin()); + // Add llvm_hpvm_requestTensor calls for every pointer argument of the + // function (they are all expected to be tensors), at the beginning of the + // function. This is the first instruction of the function, insert them before + // this + Instruction *FI = &*(F_cudnn->getEntryBlock().begin()); // In this backend, the target device is GPU, represented by i32 1. ConstantInt *TargetDeviceID = - ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); + ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); for (Function::arg_iterator ai = F_cudnn->arg_begin(), - ae = F_cudnn->arg_end(); ai != ae; ++ai) { - Argument* Arg = &*ai; + ae = F_cudnn->arg_end(); + ai != ae; ++ai) { + Argument *Arg = &*ai; if (Arg->getType()->isPointerTy()) { Value *Args[] = {Arg, TargetDeviceID}; - CallInst::Create(hpvm_request_tensor, - ArrayRef<Value*>(Args, 2), - "", FI); + CallInst::Create(hpvm_request_tensor, ArrayRef<Value *>(Args, 2), "", FI); } } std::vector<IntrinsicInst *> IItoRemove; - for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e; ++i) { + for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e; + ++i) { Instruction *I = &(*i); if (BuildDFG::isHPVMIntrinsic(I)) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - //assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor") + IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + // assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor") // && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); - //if (!(II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")){ - //continue; // skip non-tensor ops + // if + // (!(II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")){ + // continue; // skip non-tensor ops //} - + /********************* Handle VISC Tensor intrinsics ********************/ switch (II->getIntrinsicID()) { - case Intrinsic::hpvm_tensor_convolution: - { /* llvm.hpvm.tensor.mul */ + case Intrinsic::hpvm_tensor_convolution: { /* llvm.hpvm.tensor.mul */ // Tensor mul is not in place. - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n"); + DEBUG(errs() << F_cudnn->getName() + << "\t: Handling tensor convolution \n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); + Args.push_back(II->getOperand(2)); Args.push_back(II->getOperand(3)); Args.push_back(II->getOperand(4)); Args.push_back(II->getOperand(5)); - Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); - Constant* conv_precision = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + Constant *conv_mode = + ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); + Constant *conv_precision = + ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); Args.push_back(conv_mode); Args.push_back(conv_precision); - + // Create cudnn runtime function call FunctionCallee tensorConvolution; DECLARE(tensorConvolution); - - CallInst* CI = CallInst::Create(tensorConvolution, - Args, "", II); + + CallInst *CI = CallInst::Create(tensorConvolution, Args, "", II); // We can replace the call to hpvm.tensor.mul with the runtime call II->replaceAllUsesWith(CI); // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; - case Intrinsic::hpvm_tensor_group_convolution: - { /* llvm.hpvm.tensor.mul */ + case Intrinsic::hpvm_tensor_group_convolution: { /* llvm.hpvm.tensor.mul + */ // Tensor mul is not in place. - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n"); + DEBUG(errs() << F_cudnn->getName() + << "\t: Handling tensor convolution \n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); + Args.push_back(II->getOperand(2)); Args.push_back(II->getOperand(3)); Args.push_back(II->getOperand(4)); Args.push_back(II->getOperand(5)); - Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); + Constant *conv_mode = + ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); Args.push_back(conv_mode); Args.push_back(II->getOperand(7)); - + // Create cudnn runtime function call FunctionCallee tensorConvolution; DECLARE(tensorConvolution); - - CallInst* CI = CallInst::Create(tensorConvolution, - Args, "", II); + + CallInst *CI = CallInst::Create(tensorConvolution, Args, "", II); // We can replace the call to hpvm.tensor.mul with the runtime call II->replaceAllUsesWith(CI); // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; - case Intrinsic::hpvm_tensor_batchnorm: - { /* llvm.hpvm.tensor.batchnorm */ + case Intrinsic::hpvm_tensor_batchnorm: { /* llvm.hpvm.tensor.batchnorm */ // Tensor batchnorm is in place. - // FIXME: Add Check for InPlace Analysis - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor batch normalization \n"); + // FIXME: Add Check for InPlace Analysis + DEBUG(errs() << F_cudnn->getName() + << "\t: Handling tensor batch normalization \n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); + Args.push_back(II->getOperand(2)); Args.push_back(II->getOperand(3)); Args.push_back(II->getOperand(4)); Args.push_back(II->getOperand(5)); - + // Create cudnn runtime function call FunctionCallee tensorBatchNorm; DECLARE(tensorBatchNorm); - - CallInst* CI = CallInst::Create(tensorBatchNorm, - Args, "", II); - // We can replace the call to hpvm.tensor.batchnorm with the TensorRT call + + CallInst *CI = CallInst::Create(tensorBatchNorm, Args, "", II); + // We can replace the call to hpvm.tensor.batchnorm with the TensorRT + // call II->replaceAllUsesWith(CI); // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; - - case Intrinsic::hpvm_tensor_mul: - { /* llvm.hpvm.tensor.mul */ + case Intrinsic::hpvm_tensor_mul: { /* llvm.hpvm.tensor.mul */ // Tensor mul is not in place. DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor mul\n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); Args.push_back(II->getOperand(1)); // Create cudnn runtime function call FunctionCallee tensorGemmGPU; DECLARE(tensorGemmGPU); - - CallInst* CI = CallInst::Create(tensorGemmGPU, - Args, "", II); + + CallInst *CI = CallInst::Create(tensorGemmGPU, Args, "", II); // We can replace the call to hpvm.tensor.mul with the runtime call II->replaceAllUsesWith(CI); // Mark to remove at the end IItoRemove.push_back(II); - } - break; - case Intrinsic::hpvm_tensor_add: - { /* llvm.hpvm.tensor.add */ + } break; + case Intrinsic::hpvm_tensor_add: { /* llvm.hpvm.tensor.add */ DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor add\n"); // Tensor add(a,b) is in place for argument a. Value *Op = II->getOperand(0); @@ -407,12 +390,13 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { // Code generation cannot continue if this is false, because the target // only provides an in place operation - // FIXME: remove this comment - must check for in-place - //assert(inplace && - // "Operand not valid for in place operation. Code gen aborted.\n"); + // FIXME: remove this comment - must check for in-place + // assert(inplace && + // "Operand not valid for in place operation. Code gen + // aborted.\n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); Args.push_back(II->getOperand(1)); @@ -426,54 +410,55 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; case Intrinsic::hpvm_tensor_pool_max: - case Intrinsic::hpvm_tensor_pool_mean: - { /* llvm.hpvm.tensor.relu */ + case Intrinsic::hpvm_tensor_pool_mean: { /* llvm.hpvm.tensor.relu */ DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor_pool_max\n"); // Argument list - tensorPooling(input, poolFunction, window_height, - // window_width, vertical_pad, horizontal_pad, - // vertical_stride, horizontal_stride); - std::vector<Value*> Args; + // window_width, vertical_pad, + // horizontal_pad, vertical_stride, + // horizontal_stride); + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); - int pool_type = 0; - if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max){ + int pool_type = 0; + if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) { pool_type = 0; - } - if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean){ + } + if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) { pool_type = 1; - } - - Constant* constPoolType = ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type); - Args.push_back(constPoolType); // ID for max pool. Min/Avg have different IDs (non-zero) - Args.push_back(II->getOperand(1)); + } + + Constant *constPoolType = + ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type); + Args.push_back(constPoolType); // ID for max pool. Min/Avg have + // different IDs (non-zero) + Args.push_back(II->getOperand(1)); Args.push_back(II->getOperand(2)); - Args.push_back(II->getOperand(3)); + Args.push_back(II->getOperand(3)); Args.push_back(II->getOperand(4)); - Args.push_back(II->getOperand(5)); - Args.push_back(II->getOperand(6)); + Args.push_back(II->getOperand(5)); + Args.push_back(II->getOperand(6)); // Create cudnn runtime function call FunctionCallee tensorPooling; DECLARE(tensorPooling); - CallInst* CI = CallInst::Create(tensorPooling, Args, "", II); + CallInst *CI = CallInst::Create(tensorPooling, Args, "", II); - // Replacing intrinsic result uses with the result of the tensor runtime operation + // Replacing intrinsic result uses with the result of the tensor runtime + // operation II->replaceAllUsesWith(CI); // Mark to remove at the end IItoRemove.push_back(II); - } - break; - + } break; + case Intrinsic::hpvm_tensor_relu: case Intrinsic::hpvm_tensor_clipped_relu: - case Intrinsic::hpvm_tensor_tanh: - { /* llvm.hpvm.tensor.relu */ - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor activation functions \n"); + case Intrinsic::hpvm_tensor_tanh: { /* llvm.hpvm.tensor.relu */ + DEBUG(errs() << F_cudnn->getName() + << "\t: Handling tensor activation functions \n"); // Tensor relu(a) is in place for argument a. Value *Op = II->getOperand(0); @@ -485,41 +470,38 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { "Operand not valid for in place operation. Code gen aborted.\n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); - if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu){ + if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) { // Create cudnn runtime function call FunctionCallee tensorRelu; DECLARE(tensorRelu); CallInst::Create(tensorRelu, Args, "", II); - } - else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu){ + } else if (II->getIntrinsicID() == + Intrinsic::hpvm_tensor_clipped_relu) { // Create cudnn runtime function call //-- FunctionCallee tensorClippedRelu; - FunctionCallee tensorRelu2; + FunctionCallee tensorRelu2; DECLARE(tensorRelu2); CallInst::Create(tensorRelu2, Args, "", II); - } - else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh){ + } else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) { // Create cudnn runtime function call FunctionCallee tensorTanh; - errs()<<"tensorTanh Call = \n\n"; + errs() << "tensorTanh Call = \n\n"; DECLARE(tensorTanh); - //errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l"; + // errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l"; CallInst::Create(tensorTanh, Args, "", II); - } - + } + // We can replace the call to hpvm.tensor.relu with the 1st argument // that, due to in place operation, now contains the result II->replaceAllUsesWith(II->getOperand(0)); // Mark to remove at the end IItoRemove.push_back(II); - } - break; - case Intrinsic::hpvm_tensor_softmax: - { /* llvm.hpvm.tensor.softmax */ + } break; + case Intrinsic::hpvm_tensor_softmax: { /* llvm.hpvm.tensor.softmax */ DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor softmax\n"); // Tensor relu(a) is in place for argument a. Value *Op = II->getOperand(0); @@ -532,7 +514,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { "Operand not valid for in place operation. Code gen aborted.\n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); // Create cudnn runtime function call @@ -545,17 +527,16 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; - case Intrinsic::hpvm_node_id: - { /* llvm.hpvm.node.id */ - DEBUG(errs() << F_cudnn->getName() << "\t: Handling Node ID Intrinsic \n"); + case Intrinsic::hpvm_node_id: { /* llvm.hpvm.node.id */ + DEBUG(errs() << F_cudnn->getName() + << "\t: Handling Node ID Intrinsic \n"); // Get uint32 argument Value *Op = II->getOperand(0); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); // Create hpvm-tensor-rt function call @@ -565,10 +546,8 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; - default: llvm_unreachable("Unknown VISC Intrinsic!"); break; @@ -582,7 +561,8 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) { // Traverse the vector backwards, otherwise definitions are deleted while // their subsequent uses are still around. for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); ri != re; ++ri) { + re = IItoRemove.rend(); + ri != re; ++ri) { DEBUG(errs() << "Erasing: " << **ri << "\n"); errs() << "Erasing: " << **ri << "\n"; (*ri)->eraseFromParent(); @@ -600,33 +580,31 @@ bool DFG2LLVM_CUDNN::runOnModule(Module &M) { // Get the In Place Analysis Results InPlaceDFGAnalysis::InPlaceDFGParameter IPP = - (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); + (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); // Print results printInPlaceDFGParameter(IPP); - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - + std::vector<DFInternalNode *> Roots = DFG.getRoots(); + // Visitor for Code Generation Graph Traversal CGT_CUDNN *CGTVisitor = new CGT_CUDNN(M, DFG, IPP); // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { + for (auto rootNode : Roots) { // Initiate code generation for root DFNode CGTVisitor->visit(rootNode); } - //TODO: Edit module epilogue to remove the VISC intrinsic declarations + // TODO: Edit module epilogue to remove the VISC intrinsic declarations delete CGTVisitor; return true; } - /****************************************************************************** * Helper functions * ******************************************************************************/ - } // End of namespace char DFG2LLVM_CUDNN::ID = 0; @@ -635,5 +613,3 @@ static RegisterPass<DFG2LLVM_CUDNN> X("dfg2llvm-cudnn", false /* does not modify the CFG */, true /* transformation, * * not just analysis */); - - diff --git a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp index 294f9ac574..d9dcc7c876 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp @@ -39,20 +39,18 @@ using namespace inplacedfg; namespace { cl::opt<std::string> QuantizationInputsFilename( - "quantization-levels-filename", - cl::desc("<PROMISE quantization levels input file (path)>"), - cl::value_desc("filename"), - cl::Required); + "quantization-levels-filename", + cl::desc("<PROMISE quantization levels input file (path)>"), + cl::value_desc("filename"), cl::Required); cl::opt<std::string> ConfigurationInputsFilename( - "configuration-inputs-filename", - cl::desc("<Autotuner configurations input file (path)>"), - cl::value_desc("filename"), - cl::Required); + "configuration-inputs-filename", + cl::desc("<Autotuner configurations input file (path)>"), + cl::value_desc("filename"), cl::Required); // Helper function declarations -bool isValidOperandForInPlaceOperation(Value *, Function *, DFNode *, - InPlaceDFGAnalysis::InPlaceDFGParameter &); +bool isValidOperandForInPlaceOperation( + Value *, Function *, DFNode *, InPlaceDFGAnalysis::InPlaceDFGParameter &); // Helper class declarations @@ -72,53 +70,37 @@ private: Module *M; Module *RtM; - std::vector<Value*> Args; - std::vector<IntrinsicInst*> IIs; - std::vector<IntrinsicInst*> IIs_remove; // Intrinsics to remove + std::vector<Value *> Args; + std::vector<IntrinsicInst *> IIs; + std::vector<IntrinsicInst *> IIs_remove; // Intrinsics to remove AbstractState *current; public: CodeGenStateMachine(Module *, Module *); - void setCurrent(AbstractState *s) { - current = s; - } + void setCurrent(AbstractState *s) { current = s; } void transition(IntrinsicInst *II); - Module *getModule() { - return M; - } + Module *getModule() { return M; } - Module *getRtModule() { - return RtM; - } + Module *getRtModule() { return RtM; } - void addArgument(Value *Arg) { - Args.push_back(Arg); - } + void addArgument(Value *Arg) { Args.push_back(Arg); } - void addIntrinsicInst(IntrinsicInst *II) { - IIs.push_back(II); - } + void addIntrinsicInst(IntrinsicInst *II) { IIs.push_back(II); } - void addIntrinsicToRemove(IntrinsicInst *II) { - IIs_remove.push_back(II); - } + void addIntrinsicToRemove(IntrinsicInst *II) { IIs_remove.push_back(II); } - IntrinsicInst *getIntrinsicInstAt(unsigned idx) { - return IIs[idx]; - } + IntrinsicInst *getIntrinsicInstAt(unsigned idx) { return IIs[idx]; } - void codeGen(DFNode *, Function * , const StringRef &, + void codeGen(DFNode *, Function *, const StringRef &, InPlaceDFGAnalysis::InPlaceDFGParameter &); - }; class AbstractState { public: - enum ID - { + enum ID { INITIAL_STATE, FULLY_CONNECTED_LAYER_1, FULLY_CONNECTED_LAYER_2, @@ -137,9 +119,7 @@ protected: enum ID StateID; public: - enum ID getStateID() { - return StateID; - } + enum ID getStateID() { return StateID; } virtual void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) = 0; virtual ~AbstractState() {} @@ -277,68 +257,60 @@ public: void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; }; - void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream errs() << "INITIAL STATE\n"; switch (II->getIntrinsicID()) { - case Intrinsic::hpvm_tensor_convolution: - { - Mch->addIntrinsicInst(II); - Mch->addArgument(II->getOperand(0)); // conv input - Mch->addArgument(II->getOperand(1)); // conv kernel - - Mch->setCurrent(new ConvolutionLayer_1()); - errs() << "TO CONVOLUTION LAYER 1\n"; - } - break; - case Intrinsic::hpvm_tensor_mul: - { - Mch->addIntrinsicInst(II); - Mch->addArgument(II->getOperand(0)); // 1st gemm input - Mch->addArgument(II->getOperand(1)); // 2nd gemm input - - Mch->setCurrent(new FullyConnectedLayer_1()); - errs() << "TO FULLY CONNECTED LAYER 1\n"; - } - break; - - case Intrinsic::hpvm_node_id: - { - - DEBUG(errs() << "\t: Handling __hpvm_node_id \n"); - // Get uint32 node ID - Value *Op = II->getOperand(0); - - std::vector<Value*> Args; - Args.push_back(Op); - - Module *M = Mch->getModule(); - Module *RtM = Mch->getRtModule(); - - FunctionCallee hpvm_node_id_call = - M->getOrInsertFunction(StringRef("tensor_set_node_id"), - RtM->getFunction(StringRef("tensor_set_node_id"))->getFunctionType()); - - CallInst::Create(hpvm_node_id_call, Args, "", II); - - Mch->addIntrinsicToRemove(II); - Mch->setCurrent(new InitialState()); - errs() << "TO INIT STATE\n"; - } - break; - - default: // Other HPVM intrinsic - { - Mch->addIntrinsicInst(II); - Mch->setCurrent(new SingleTensorOperation()); - errs() << "TO SINGLE OP\n"; - } - break; + case Intrinsic::hpvm_tensor_convolution: { + Mch->addIntrinsicInst(II); + Mch->addArgument(II->getOperand(0)); // conv input + Mch->addArgument(II->getOperand(1)); // conv kernel + + Mch->setCurrent(new ConvolutionLayer_1()); + errs() << "TO CONVOLUTION LAYER 1\n"; + } break; + case Intrinsic::hpvm_tensor_mul: { + Mch->addIntrinsicInst(II); + Mch->addArgument(II->getOperand(0)); // 1st gemm input + Mch->addArgument(II->getOperand(1)); // 2nd gemm input + + Mch->setCurrent(new FullyConnectedLayer_1()); + errs() << "TO FULLY CONNECTED LAYER 1\n"; + } break; + + case Intrinsic::hpvm_node_id: { + + DEBUG(errs() << "\t: Handling __hpvm_node_id \n"); + // Get uint32 node ID + Value *Op = II->getOperand(0); + + std::vector<Value *> Args; + Args.push_back(Op); + + Module *M = Mch->getModule(); + Module *RtM = Mch->getRtModule(); + + FunctionCallee hpvm_node_id_call = M->getOrInsertFunction( + StringRef("tensor_set_node_id"), + RtM->getFunction(StringRef("tensor_set_node_id"))->getFunctionType()); + + CallInst::Create(hpvm_node_id_call, Args, "", II); + + Mch->addIntrinsicToRemove(II); + Mch->setCurrent(new InitialState()); + errs() << "TO INIT STATE\n"; + } break; + + default: // Other HPVM intrinsic + { + Mch->addIntrinsicInst(II); + Mch->setCurrent(new SingleTensorOperation()); + errs() << "TO SINGLE OP\n"; + } break; } delete this; } // else {} // No HPVM intrinsic received. Remain at initial - errs() << "TO NO CHANGE\n"; + errs() << "TO NO CHANGE\n"; } void SingleTensorOperation::transition(CodeGenStateMachine *Mch, @@ -357,23 +329,21 @@ void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch, if (II) { // Not end of instruction stream errs() << "FULLY CONNECTED LAYER 1\n"; switch (II->getIntrinsicID()) { - case Intrinsic::hpvm_tensor_add: - { - IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0); - assert((MulII == II->getOperand(0)) && - "Output of mul must be used as 1st operand of add"); - Mch->addIntrinsicInst(II); - - Mch->addArgument(II->getOperand(1)); // bias - - Mch->setCurrent(new FullyConnectedLayer_2()); - errs() << "TO FULLY CONNECTED LAYER 2\n"; - } - break; - default: - Mch->setCurrent(new NoPattern()); - errs() << "TO NO PATERN\n"; - break; + case Intrinsic::hpvm_tensor_add: { + IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0); + assert((MulII == II->getOperand(0)) && + "Output of mul must be used as 1st operand of add"); + Mch->addIntrinsicInst(II); + + Mch->addArgument(II->getOperand(1)); // bias + + Mch->setCurrent(new FullyConnectedLayer_2()); + errs() << "TO FULLY CONNECTED LAYER 2\n"; + } break; + default: + Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATERN\n"; + break; } } else { Mch->setCurrent(new NoPattern()); @@ -387,51 +357,45 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, if (II) { // Not end of instruction stream errs() << "FULLY CONNECTED LAYER 2\n"; switch (II->getIntrinsicID()) { - case Intrinsic::hpvm_tensor_tanh: - { - // Type of activation : TanH - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + case Intrinsic::hpvm_tensor_tanh: { + // Type of activation : TanH + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - Mch->addIntrinsicInst(II); + Mch->addIntrinsicInst(II); - Mch->setCurrent(new FullyConnectedLayer_3()); - errs() << "TO FULLY CONNECTED LAYER 3\n"; - } - break; - case Intrinsic::hpvm_tensor_relu: - { - // Type of activation : ReLU - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); + Mch->setCurrent(new FullyConnectedLayer_3()); + errs() << "TO FULLY CONNECTED LAYER 3\n"; + } break; + case Intrinsic::hpvm_tensor_relu: { + // Type of activation : ReLU + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - Mch->addIntrinsicInst(II); + Mch->addIntrinsicInst(II); - Mch->setCurrent(new FullyConnectedLayer_3()); - errs() << "TO FULLY CONNECTED LAYER 3\n"; - } - break; - case Intrinsic::hpvm_tensor_clipped_relu: - { - // Type of activation : Clipped ReLU - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - errs() << "TO FULLY CONNECTED LAYER 3\n"; - } - break; - default: // No activation, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - errs() << "TO NO PATTERN\n"; - break; + Mch->setCurrent(new FullyConnectedLayer_3()); + errs() << "TO FULLY CONNECTED LAYER 3\n"; + } break; + case Intrinsic::hpvm_tensor_clipped_relu: { + // Type of activation : Clipped ReLU + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + + Mch->addIntrinsicInst(II); + + Mch->setCurrent(new FullyConnectedLayer_3()); + errs() << "TO FULLY CONNECTED LAYER 3\n"; + } break; + default: // No activation, but HPVM intrinsic + Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; + break; } } else { // End of instruction stream // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); + Mch->addArgument( + ConstantInt::get(Type::getInt32Ty(Mch->getModule()->getContext()), -1)); Mch->setCurrent(new FullyConnectedLayer()); errs() << "TO FULLY CONNECTED LAYER\n"; @@ -457,10 +421,10 @@ void FullyConnectedLayer::transition(CodeGenStateMachine *Mch, if (II) { // Not end of instruction stream errs() << "FULLY CONNECTED LAYER\n"; Mch->setCurrent(new NoPattern()); - errs() << "TO NO PATTERN\n"; + errs() << "TO NO PATTERN\n"; delete this; } - errs() << "TO NO CHANGE\n"; + errs() << "TO NO CHANGE\n"; } void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, @@ -468,33 +432,31 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, if (II) { // Not end of instruction stream errs() << "CONVOLUTION LAYER 1\n"; switch (II->getIntrinsicID()) { - case Intrinsic::hpvm_tensor_add: - { - IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); - assert((ConvII == II->getOperand(0)) && - "Output of conv must be used as 1st operand of add"); - Mch->addIntrinsicInst(II); - - Mch->addArgument(II->getOperand(1)); // bias - - Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv - Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv - Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv - Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv - - Mch->setCurrent(new ConvolutionLayer_2()); - errs() << "TO CONVOLUTION LAYER 2\n"; - } - break; - default: - Mch->setCurrent(new NoPattern()); - errs() << "TO NO PATTERN\n"; - break; + case Intrinsic::hpvm_tensor_add: { + IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); + assert((ConvII == II->getOperand(0)) && + "Output of conv must be used as 1st operand of add"); + Mch->addIntrinsicInst(II); + + Mch->addArgument(II->getOperand(1)); // bias + + Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv + Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv + Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv + Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv + + Mch->setCurrent(new ConvolutionLayer_2()); + errs() << "TO CONVOLUTION LAYER 2\n"; + } break; + default: + Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; + break; } } else { // No addition Mch->addArgument(ConstantPointerNull::get( - Type::getInt8PtrTy(Mch->getModule()->getContext()))); + Type::getInt8PtrTy(Mch->getModule()->getContext()))); // Zero for all convolution numeric arguments FIXME??? IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); @@ -504,28 +466,32 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv // Mch->addArgument(ConstantInt::get( - // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Type::getInt32Ty(Mch->getModule()->getContext()), + // 0)); // Mch->addArgument(ConstantInt::get( - // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Type::getInt32Ty(Mch->getModule()->getContext()), + // 0)); // Mch->addArgument(ConstantInt::get( - // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Type::getInt32Ty(Mch->getModule()->getContext()), + // 0)); // Mch->addArgument(ConstantInt::get( - // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Type::getInt32Ty(Mch->getModule()->getContext()), + // 0)); // No pooling // 0 for unused pool arguments: // pool_id, pool_size_v, pool_size_h, pool pad_v, // pool_pad_h, pool_stride_v, pool_stride_h for (int i = 0; i < 7; i++) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); } // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); + Mch->addArgument( + ConstantInt::get(Type::getInt32Ty(Mch->getModule()->getContext()), -1)); Mch->setCurrent(new ConvolutionLayer()); - errs() << "TO CONVOLUTION LAYER\n"; + errs() << "TO CONVOLUTION LAYER\n"; } delete this; } @@ -535,100 +501,91 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, if (II) { // Not end of instruction stream errs() << "CONVOLUTION LAYER 2\n"; switch (II->getIntrinsicID()) { - case Intrinsic::hpvm_tensor_tanh: - { - // Type of activation : TanH - // Mch->addArgument(ConstantInt::get( - // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - errs() << "TO CONVOLUTION LAYER 3\n"; - } - break; - case Intrinsic::hpvm_tensor_relu: - { - // Type of activation : ReLU - // Mch->addArgument(ConstantInt::get( - // Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - errs() << "TO CONVOLUTION LAYER 3\n"; - } - break; - case Intrinsic::hpvm_tensor_clipped_relu: - { - // Type of activation : Clipped ReLU - // Mch->addArgument(ConstantInt::get( - // Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - errs() << "TO CONVOLUTION LAYER 3\n"; - } - break; - case Intrinsic::hpvm_tensor_pool_max: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // pool_size_v, pool_size_h, pool pad_v, - // pool_pad_h, pool_stride_v, pool_stride_h - for (int i = 1; i < 7; i++) { - Mch->addArgument(II->getOperand(i)); - } - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_4()); - errs() << "TO CONVOLUTION LAYER 4\n"; - } - break; - case Intrinsic::hpvm_tensor_pool_min: - { - // pool min FIXME: 2: supported? - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - // pool_size_v, pool_size_h, pool pad_v, - // pool_pad_h, pool_stride_v, pool_stride_h - for (int i = 1; i < 7; i++) { - Mch->addArgument(II->getOperand(i)); - } - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_4()); - errs() << "TO CONVOLUTION LAYER 4\n"; - } - break; - case Intrinsic::hpvm_tensor_pool_mean: - { - // pool mean - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - // pool_size_v, pool_size_h, pool pad_v, - // pool_pad_h, pool_stride_v, pool_stride_h - for (int i = 1; i < 7; i++) { - Mch->addArgument(II->getOperand(i)); - } - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_4()); - errs() << "TO CONVOLUTION LAYER 4\n"; - } - break; - default: // No activation, No pooling, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - errs() << "TO NO PATTERN\n"; - break; + case Intrinsic::hpvm_tensor_tanh: { + // Type of activation : TanH + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), + // 0)); + Mch->addIntrinsicInst(II); + + Mch->setCurrent(new ConvolutionLayer_3()); + errs() << "TO CONVOLUTION LAYER 3\n"; + } break; + case Intrinsic::hpvm_tensor_relu: { + // Type of activation : ReLU + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), + // 1)); + Mch->addIntrinsicInst(II); + + Mch->setCurrent(new ConvolutionLayer_3()); + errs() << "TO CONVOLUTION LAYER 3\n"; + } break; + case Intrinsic::hpvm_tensor_clipped_relu: { + // Type of activation : Clipped ReLU + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), + // 2)); + Mch->addIntrinsicInst(II); + + Mch->setCurrent(new ConvolutionLayer_3()); + errs() << "TO CONVOLUTION LAYER 3\n"; + } break; + case Intrinsic::hpvm_tensor_pool_max: { + // pool max + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // pool_size_v, pool_size_h, pool pad_v, + // pool_pad_h, pool_stride_v, pool_stride_h + for (int i = 1; i < 7; i++) { + Mch->addArgument(II->getOperand(i)); + } + // No activation + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), -1)); + Mch->addIntrinsicInst(II); + + Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; + } break; + case Intrinsic::hpvm_tensor_pool_min: { + // pool min FIXME: 2: supported? + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + // pool_size_v, pool_size_h, pool pad_v, + // pool_pad_h, pool_stride_v, pool_stride_h + for (int i = 1; i < 7; i++) { + Mch->addArgument(II->getOperand(i)); + } + // No activation + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), -1)); + Mch->addIntrinsicInst(II); + + Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; + } break; + case Intrinsic::hpvm_tensor_pool_mean: { + // pool mean + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 1)); + // pool_size_v, pool_size_h, pool pad_v, + // pool_pad_h, pool_stride_v, pool_stride_h + for (int i = 1; i < 7; i++) { + Mch->addArgument(II->getOperand(i)); + } + // No activation + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), -1)); + Mch->addIntrinsicInst(II); + + Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; + } break; + default: // No activation, No pooling, but HPVM intrinsic + Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; + break; } } else { // End of instruction stream // No pooling @@ -636,12 +593,12 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, // pool_id, pool_size_v, pool_size_h, pool pad_v, // pool_pad_h, pool_stride_v, pool_stride_h for (int i = 0; i < 7; i++) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); } // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); + Mch->addArgument( + ConstantInt::get(Type::getInt32Ty(Mch->getModule()->getContext()), -1)); Mch->setCurrent(new ConvolutionLayer()); errs() << "TO CONVOLUTION LAYER\n"; @@ -654,104 +611,98 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, if (II) { // Not end of instruction stream errs() << "CONVOLUTION LAYER 3\n"; switch (II->getIntrinsicID()) { - case Intrinsic::hpvm_tensor_pool_max: - { - // pool max + case Intrinsic::hpvm_tensor_pool_max: { + // pool max + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // pool_size_v, pool_size_h, pool pad_v, + // pool_pad_h, pool_stride_v, pool_stride_h + for (int i = 1; i < 7; i++) { + Mch->addArgument(II->getOperand(i)); + } + Mch->addIntrinsicInst(II); + + // Revisit last intrinsic, to add argument for activation operation + IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); + // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU + Intrinsic::ID ActIID = ActII->getIntrinsicID(); + if (ActIID == Intrinsic::hpvm_tensor_tanh) { + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + } else if (ActIID == Intrinsic::hpvm_tensor_relu) { + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 1)); + } else { // ActIID == Intrinsic::hpvm_tensor_clipped_relu + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + } + + Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; + } break; + case Intrinsic::hpvm_tensor_pool_min: { + // pool min FIXME: 2: supported? + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + + // pool_size_v, pool_size_h, pool pad_v, + // pool_pad_h, pool_stride_v, pool_stride_h + for (int i = 1; i < 7; i++) { + Mch->addArgument(II->getOperand(i)); + } + Mch->addIntrinsicInst(II); + + // Revisit last intrinsic, to add argument for activation operation + IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); + // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU + Intrinsic::ID ActIID = ActII->getIntrinsicID(); + if (ActIID == Intrinsic::hpvm_tensor_tanh) { Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // pool_size_v, pool_size_h, pool pad_v, - // pool_pad_h, pool_stride_v, pool_stride_h - for (int i = 1; i < 7; i++) { - Mch->addArgument(II->getOperand(i)); - } - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::hpvm_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::hpvm_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - Mch->setCurrent(new ConvolutionLayer_4()); - errs() << "TO CONVOLUTION LAYER 4\n"; - } - break; - case Intrinsic::hpvm_tensor_pool_min: - { - // pool min FIXME: 2: supported? + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + } else if (ActIID == Intrinsic::hpvm_tensor_relu) { + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 1)); + } else { // ActIID == Intrinsic::hpvm_tensor_clipped_relu + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + } + + Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; + } break; + case Intrinsic::hpvm_tensor_pool_mean: { + // pool max + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 1)); + // pool_size_v, pool_size_h, pool pad_v, + // pool_pad_h, pool_stride_v, pool_stride_h + for (int i = 1; i < 7; i++) { + Mch->addArgument(II->getOperand(i)); + } + Mch->addIntrinsicInst(II); + + // Revisit last intrinsic, to add argument for activation operation + IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); + // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU + Intrinsic::ID ActIID = ActII->getIntrinsicID(); + if (ActIID == Intrinsic::hpvm_tensor_tanh) { + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + } else if (ActIID == Intrinsic::hpvm_tensor_relu) { Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - - // pool_size_v, pool_size_h, pool pad_v, - // pool_pad_h, pool_stride_v, pool_stride_h - for (int i = 1; i < 7; i++) { - Mch->addArgument(II->getOperand(i)); - } - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::hpvm_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::hpvm_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - Mch->setCurrent(new ConvolutionLayer_4()); - errs() << "TO CONVOLUTION LAYER 4\n"; - } - break; - case Intrinsic::hpvm_tensor_pool_mean: - { - // pool max + Type::getInt32Ty(Mch->getModule()->getContext()), 1)); + } else { // ActIID == Intrinsic::hpvm_tensor_clipped_relu Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - // pool_size_v, pool_size_h, pool pad_v, - // pool_pad_h, pool_stride_v, pool_stride_h - for (int i = 1; i < 7; i++) { - Mch->addArgument(II->getOperand(i)); - } - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::hpvm_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::hpvm_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - Mch->setCurrent(new ConvolutionLayer_4()); - errs() << "TO CONVOLUTION LAYER 4\n"; - } - break; - default: // No pooling, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - errs() << "TO NO PATTERN\n"; - break; + Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + } + + Mch->setCurrent(new ConvolutionLayer_4()); + errs() << "TO CONVOLUTION LAYER 4\n"; + } break; + default: // No pooling, but HPVM intrinsic + Mch->setCurrent(new NoPattern()); + errs() << "TO NO PATTERN\n"; + break; } } else { // End of instruction stream // No pooling @@ -759,8 +710,8 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, // pool_id, pool_size_v, pool_size_h, pool pad_v, // pool_pad_h, pool_stride_v, pool_stride_h for (int i = 0; i < 7; i++) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); } // Revisit last intrinsic, to add argument for activation operation @@ -769,17 +720,17 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, Intrinsic::ID ActIID = ActII->getIntrinsicID(); if (ActIID == Intrinsic::hpvm_tensor_tanh) { Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::hpvm_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } + Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + } else if (ActIID == Intrinsic::hpvm_tensor_relu) { + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 1)); + } else { // ActIID == Intrinsic::hpvm_tensor_clipped_relu + Mch->addArgument(ConstantInt::get( + Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + } - Mch->setCurrent(new ConvolutionLayer()); - errs() << "TO CONVOLUTION LAYER\n"; + Mch->setCurrent(new ConvolutionLayer()); + errs() << "TO CONVOLUTION LAYER\n"; } delete this; } @@ -797,8 +748,7 @@ void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch, delete this; } -void ConvolutionLayer::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { +void ConvolutionLayer::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream errs() << "CONVOLUTION LAYER\n"; Mch->setCurrent(new NoPattern()); @@ -810,8 +760,8 @@ void ConvolutionLayer::transition(CodeGenStateMachine *Mch, void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {} -CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM) : - M(_M), RtM(_RtM) { +CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM) + : M(_M), RtM(_RtM) { current = new InitialState(); } @@ -819,14 +769,17 @@ void CodeGenStateMachine::transition(IntrinsicInst *II) { current->transition(this, II); } -void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRef, - InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) { +void CodeGenStateMachine::codeGen( + DFNode *N, Function *F, const StringRef &strRef, + InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) { - errs() << "TRANSITIONTED TO: " << std::to_string(current->getStateID()) << "\n"; - assert( ( (current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || - (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER) || - (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION) ) && - "Unsupported instruction sequence for the Wrapper API.\n" ); + errs() << "TRANSITIONTED TO: " << std::to_string(current->getStateID()) + << "\n"; + assert( + ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || + (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER) || + (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION)) && + "Unsupported instruction sequence for the Wrapper API.\n"); if ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)) { @@ -836,90 +789,90 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe // We have a valid instruction sequence. // Make sure that the instruction sequence can be traslated: // each instruction's result must be used only by the next one in sequence. - - for (unsigned p = 0; p < IIs.size()-1; p++) { + + for (unsigned p = 0; p < IIs.size() - 1; p++) { IntrinsicInst *II = IIs[p]; assert((II->hasOneUse()) && - "Instruction sequence does not fit pattern: not single use\n"); - + "Instruction sequence does not fit pattern: not single use\n"); + Value::user_iterator ui = II->user_begin(); // The only use - assert((*ui == IIs[p+1]) && - "Instruction sequence does not fit pattern: not used by next instruction\n"); + assert((*ui == IIs[p + 1]) && "Instruction sequence does not fit " + "pattern: not used by next instruction\n"); } // Create corresponding wrapper API call CallInst *CI; switch (current->getStateID()) { - case AbstractState::ID::CONVOLUTION_LAYER: - { - FunctionCallee wrapper_ConvLayer2 = - M->getOrInsertFunction(StringRef("wrapper_ConvLayer2"), - RtM->getFunction(StringRef("wrapper_ConvLayer2"))->getFunctionType()); - - - // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0 - Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); - Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); - - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - std::vector<Value*> UpdatedArgs; - UpdatedArgs.push_back(GEPConst); - for (unsigned i = 0; i < Args.size(); i++) { - UpdatedArgs.push_back(Args[i]); - } - // Create wrapper API function call - CI = CallInst::Create(wrapper_ConvLayer2, UpdatedArgs, ""); - } - break; - case AbstractState::ID::FULLY_CONNECTED_LAYER: - { - FunctionCallee wrapper_FCLayer = - M->getOrInsertFunction(StringRef("wrapper_FCLayer"), - RtM->getFunction(StringRef("wrapper_FCLayer"))->getFunctionType()); - - // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0 - Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); - Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - std::vector<Value*> UpdatedArgs; - UpdatedArgs.push_back(GEPConst); - for (unsigned i = 0; i < Args.size(); i++) { - UpdatedArgs.push_back(Args[i]); - } - - // Create wrapper API function call - CI = CallInst::Create(wrapper_FCLayer, UpdatedArgs, ""); - } - break; - default: - llvm_unreachable("Unexpected CodeGenStateMachine State\n"); - break; + case AbstractState::ID::CONVOLUTION_LAYER: { + FunctionCallee wrapper_ConvLayer2 = M->getOrInsertFunction( + StringRef("wrapper_ConvLayer2"), + RtM->getFunction(StringRef("wrapper_ConvLayer2"))->getFunctionType()); + + // FIXME: get last (float) arguments from clipped relu intrinsic. For now, + // 0 + Args.push_back( + ConstantFP::get(Type::getFloatTy(M->getContext()), (double)0)); + Args.push_back( + ConstantFP::get(Type::getFloatTy(M->getContext()), (double)0)); + + // Create string for node name, as first argument for wrapper API call + Constant *ConstArray = + ConstantDataArray::getString(M->getContext(), strRef, true); + GlobalVariable *GV = + new GlobalVariable(*M, ConstArray->getType(), true, + GlobalValue::ExternalLinkage, ConstArray, ""); + + // Create GEP expression to access it + Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); + Constant *GEPIndices[] = {Int_0, Int_0}; + Constant *GEPConst = ConstantExpr::getGetElementPtr( + GV->getType()->getPointerElementType(), GV, GEPIndices); + + std::vector<Value *> UpdatedArgs; + UpdatedArgs.push_back(GEPConst); + for (unsigned i = 0; i < Args.size(); i++) { + UpdatedArgs.push_back(Args[i]); + } + // Create wrapper API function call + CI = CallInst::Create(wrapper_ConvLayer2, UpdatedArgs, ""); + } break; + case AbstractState::ID::FULLY_CONNECTED_LAYER: { + FunctionCallee wrapper_FCLayer = M->getOrInsertFunction( + StringRef("wrapper_FCLayer"), + RtM->getFunction(StringRef("wrapper_FCLayer"))->getFunctionType()); + + // FIXME: get last (float) arguments from clipped relu intrinsic. For now, + // 0 + Args.push_back( + ConstantFP::get(Type::getFloatTy(M->getContext()), (double)0)); + Args.push_back( + ConstantFP::get(Type::getFloatTy(M->getContext()), (double)0)); + + // Create string for node name, as first argument for wrapper API call + Constant *ConstArray = + ConstantDataArray::getString(M->getContext(), strRef, true); + GlobalVariable *GV = + new GlobalVariable(*M, ConstArray->getType(), true, + GlobalValue::ExternalLinkage, ConstArray, ""); + + // Create GEP expression to access it + Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); + Constant *GEPIndices[] = {Int_0, Int_0}; + Constant *GEPConst = ConstantExpr::getGetElementPtr( + GV->getType()->getPointerElementType(), GV, GEPIndices); + + std::vector<Value *> UpdatedArgs; + UpdatedArgs.push_back(GEPConst); + for (unsigned i = 0; i < Args.size(); i++) { + UpdatedArgs.push_back(Args[i]); + } + + // Create wrapper API function call + CI = CallInst::Create(wrapper_FCLayer, UpdatedArgs, ""); + } break; + default: + llvm_unreachable("Unexpected CodeGenStateMachine State\n"); + break; } // Insert new call and replace all uses of pattern result with @@ -928,326 +881,328 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe CI->insertBefore(IIlast); IIlast->replaceAllUsesWith(CI); - } - else { // SINGLE_TENSOR_OPERATION + } else { // SINGLE_TENSOR_OPERATION assert((IIs.size() == 1) && - "Unexpected size of intrinsics vector in code gen state machine.\n"); - assert(Args.empty() && "Unexpected arguments found in coge gen state machine.\n"); + "Unexpected size of intrinsics vector in code gen state machine.\n"); + assert(Args.empty() && + "Unexpected arguments found in coge gen state machine.\n"); IntrinsicInst *TensorII = IIs[0]; errs() << "TensorII: " << *TensorII << "\n"; switch (TensorII->getIntrinsicID()) { - case Intrinsic::hpvm_tensor_group_convolution: - { /* llvm.hpvm.tensor.group.conv */ - // Tensor group conv is not in place. - DEBUG(errs() << F->getName() << "\t: Handling tensor group convolution \n"); - - // Argument list for the runtime call - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - Args.push_back(TensorII->getOperand(1)); - Args.push_back(TensorII->getOperand(2)); - Args.push_back(TensorII->getOperand(3)); - Args.push_back(TensorII->getOperand(4)); - Args.push_back(TensorII->getOperand(5)); - - Constant *conv_mode = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); - Args.push_back(conv_mode); - - Args.push_back(TensorII->getOperand(7)); - - // Create wrapper API runtime function call - FunctionCallee wrapper_tensorGroupConvolution = - M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"), - RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType()); - CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution, - Args, "", TensorII); - // We can replace the call to hpvm.tensor.mul with the runtime call - TensorII->replaceAllUsesWith(CI); + case Intrinsic::hpvm_tensor_group_convolution: { /* llvm.hpvm.tensor.group.conv + */ + // Tensor group conv is not in place. + DEBUG(errs() << F->getName() + << "\t: Handling tensor group convolution \n"); + + // Argument list for the runtime call + + // Create string for node name, as first argument for wrapper API call + Constant *ConstArray = + ConstantDataArray::getString(M->getContext(), strRef, true); + GlobalVariable *GV = + new GlobalVariable(*M, ConstArray->getType(), true, + GlobalValue::ExternalLinkage, ConstArray, ""); + // Create GEP expression to access it + Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); + Constant *GEPIndices[] = {Int_0, Int_0}; + Constant *GEPConst = ConstantExpr::getGetElementPtr( + GV->getType()->getPointerElementType(), GV, GEPIndices); + + Args.push_back(GEPConst); + + Args.push_back(TensorII->getOperand(0)); + Args.push_back(TensorII->getOperand(1)); + Args.push_back(TensorII->getOperand(2)); + Args.push_back(TensorII->getOperand(3)); + Args.push_back(TensorII->getOperand(4)); + Args.push_back(TensorII->getOperand(5)); + + Constant *conv_mode = + ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); + Args.push_back(conv_mode); + + Args.push_back(TensorII->getOperand(7)); + + // Create wrapper API runtime function call + FunctionCallee wrapper_tensorGroupConvolution = M->getOrInsertFunction( + StringRef("wrapper_tensorGroupConvolution"), + RtM->getFunction(StringRef("wrapper_tensorGroupConvolution")) + ->getFunctionType()); + CallInst *CI = + CallInst::Create(wrapper_tensorGroupConvolution, Args, "", TensorII); + // We can replace the call to hpvm.tensor.mul with the runtime call + TensorII->replaceAllUsesWith(CI); + } break; + + case Intrinsic::hpvm_tensor_batchnorm: { /* llvm.hpvm.tensor.batchnorm */ + + // Tensor batchnorm is not in place. + // FIXME: Add Check for InPlace Analysis + DEBUG(errs() << F->getName() + << "\t: Handling tensor batch normalization \n"); + + // Argument list for the runtime call + + // Create string for node name, as first argument for wrapper API call + Constant *ConstArray = + ConstantDataArray::getString(M->getContext(), strRef, true); + GlobalVariable *GV = + new GlobalVariable(*M, ConstArray->getType(), true, + GlobalValue::ExternalLinkage, ConstArray, ""); + // Create GEP expression to access it + Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); + Constant *GEPIndices[] = {Int_0, Int_0}; + Constant *GEPConst = ConstantExpr::getGetElementPtr( + GV->getType()->getPointerElementType(), GV, GEPIndices); + + Args.push_back(GEPConst); + + Args.push_back(TensorII->getOperand(0)); + Args.push_back(TensorII->getOperand(1)); + Args.push_back(TensorII->getOperand(2)); + Args.push_back(TensorII->getOperand(3)); + Args.push_back(TensorII->getOperand(4)); + Args.push_back(TensorII->getOperand(5)); + + // Create wrapper API runtime function call + FunctionCallee wrapper_tensorBatchNorm = M->getOrInsertFunction( + StringRef("wrapper_tensorBatchNorm"), + RtM->getFunction(StringRef("wrapper_tensorBatchNorm")) + ->getFunctionType()); + CallInst *CI = + CallInst::Create(wrapper_tensorBatchNorm, Args, "", TensorII); + // We can replace the call to hpvm.tensor.batchnorm with the wrapper API + // call + TensorII->replaceAllUsesWith(CI); + } break; + + case Intrinsic::hpvm_tensor_add: { /* llvm.hpvm.tensor.add */ + DEBUG(errs() << F->getName() << "\t: Handling tensorAdd\n"); + + // Tensor add(a,b) is in place for argument a. + // Value *Op = TensorII->getOperand(0); + // Test the intrinsic operand for in place operation. + // bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); + + // Code generation will not continue if this is false, because the target + // may provide an in place operation(safe choice) + // FIXME: remove this comment - must check for in-place + // assert(inplace && + // "Operand not valid for in place operation. Code gen + // aborted.\n"); + + // Argument list for the runtime call + + // Create string for node name, as first argument for wrapper API call + Constant *ConstArray = + ConstantDataArray::getString(M->getContext(), strRef, true); + GlobalVariable *GV = + new GlobalVariable(*M, ConstArray->getType(), true, + GlobalValue::ExternalLinkage, ConstArray, ""); + // Create GEP expression to access it + Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); + Constant *GEPIndices[] = {Int_0, Int_0}; + Constant *GEPConst = ConstantExpr::getGetElementPtr( + GV->getType()->getPointerElementType(), GV, GEPIndices); + + Args.push_back(GEPConst); + + Args.push_back(TensorII->getOperand(0)); + Args.push_back(TensorII->getOperand(1)); + + // Create wrapper API runtime function call + FunctionCallee wrapper_tensorAdd = M->getOrInsertFunction( + StringRef("wrapper_tensorAdd"), + RtM->getFunction(StringRef("wrapper_tensorAdd"))->getFunctionType()); + CallInst::Create(wrapper_tensorAdd, Args, "", TensorII); + // We can replace the call to hpvm.tensor.add with the 1st argument + // that, due to in place operation, now contains the result + TensorII->replaceAllUsesWith(TensorII->getOperand(0)); + } break; + + case Intrinsic::hpvm_tensor_pool_max: + case Intrinsic::hpvm_tensor_pool_mean: + case Intrinsic::hpvm_tensor_pool_min: { + DEBUG(errs() << F->getName() + << "\t: Handling tensor pooling functions\n"); + + // Argument list for tensor pooling: + // input, poolFunction, window_height, window_width, + // vertical_pad, horizontal_pad, vertical_stride, horizontal_stride + + // Create string for node name, as first argument for wrapper API call + Constant *ConstArray = + ConstantDataArray::getString(M->getContext(), strRef, true); + GlobalVariable *GV = + new GlobalVariable(*M, ConstArray->getType(), true, + GlobalValue::ExternalLinkage, ConstArray, ""); + // Create GEP expression to access it + Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); + Constant *GEPIndices[] = {Int_0, Int_0}; + Constant *GEPConst = ConstantExpr::getGetElementPtr( + GV->getType()->getPointerElementType(), GV, GEPIndices); + + Args.push_back(GEPConst); + + Args.push_back(TensorII->getOperand(0)); + + int pool_type = 0; + if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) { + pool_type = 0; } - break; - - case Intrinsic::hpvm_tensor_batchnorm: - { /* llvm.hpvm.tensor.batchnorm */ - - // Tensor batchnorm is not in place. - // FIXME: Add Check for InPlace Analysis - DEBUG(errs() << F->getName() << "\t: Handling tensor batch normalization \n"); - - // Argument list for the runtime call - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - Args.push_back(TensorII->getOperand(1)); - Args.push_back(TensorII->getOperand(2)); - Args.push_back(TensorII->getOperand(3)); - Args.push_back(TensorII->getOperand(4)); - Args.push_back(TensorII->getOperand(5)); - - // Create wrapper API runtime function call - FunctionCallee wrapper_tensorBatchNorm = - M->getOrInsertFunction(StringRef("wrapper_tensorBatchNorm"), - RtM->getFunction(StringRef("wrapper_tensorBatchNorm"))->getFunctionType()); - CallInst* CI = CallInst::Create(wrapper_tensorBatchNorm, - Args, "", TensorII); - // We can replace the call to hpvm.tensor.batchnorm with the wrapper API call - TensorII->replaceAllUsesWith(CI); + if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) { + pool_type = 1; } - break; - - case Intrinsic::hpvm_tensor_add: - { /* llvm.hpvm.tensor.add */ - DEBUG(errs() << F->getName() << "\t: Handling tensorAdd\n"); - - // Tensor add(a,b) is in place for argument a. - // Value *Op = TensorII->getOperand(0); - // Test the intrinsic operand for in place operation. - // bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); - - // Code generation will not continue if this is false, because the target - // may provide an in place operation(safe choice) - // FIXME: remove this comment - must check for in-place - // assert(inplace && - // "Operand not valid for in place operation. Code gen aborted.\n"); - - - // Argument list for the runtime call - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - Args.push_back(TensorII->getOperand(1)); - - // Create wrapper API runtime function call - FunctionCallee wrapper_tensorAdd = - M->getOrInsertFunction(StringRef("wrapper_tensorAdd"), - RtM->getFunction(StringRef("wrapper_tensorAdd"))->getFunctionType()); - CallInst::Create(wrapper_tensorAdd, Args, "", TensorII); - // We can replace the call to hpvm.tensor.add with the 1st argument - // that, due to in place operation, now contains the result - TensorII->replaceAllUsesWith(TensorII->getOperand(0)); + if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_min) { + pool_type = 2; } - break; - case Intrinsic::hpvm_tensor_pool_max: - case Intrinsic::hpvm_tensor_pool_mean: - case Intrinsic::hpvm_tensor_pool_min: - { - DEBUG(errs() << F->getName() << "\t: Handling tensor pooling functions\n"); - - // Argument list for tensor pooling: - // input, poolFunction, window_height, window_width, - // vertical_pad, horizontal_pad, vertical_stride, horizontal_stride - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - - int pool_type = 0; - if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) { - pool_type = 0; - } - if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) { - pool_type = 1; - } - if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_min) { - pool_type = 2; - } - - Constant *constPoolType = + Constant *constPoolType = ConstantInt::get(Type::getInt32Ty(M->getContext()), pool_type); - Args.push_back(constPoolType); - - Args.push_back(TensorII->getOperand(1)); - Args.push_back(TensorII->getOperand(2)); - Args.push_back(TensorII->getOperand(3)); - Args.push_back(TensorII->getOperand(4)); - Args.push_back(TensorII->getOperand(5)); - Args.push_back(TensorII->getOperand(6)); - + Args.push_back(constPoolType); + + Args.push_back(TensorII->getOperand(1)); + Args.push_back(TensorII->getOperand(2)); + Args.push_back(TensorII->getOperand(3)); + Args.push_back(TensorII->getOperand(4)); + Args.push_back(TensorII->getOperand(5)); + Args.push_back(TensorII->getOperand(6)); + + // Create wrapper API runtime function call + FunctionCallee wrapper_tensorPooling = M->getOrInsertFunction( + StringRef("wrapper_tensorPooling"), + RtM->getFunction(StringRef("wrapper_tensorPooling")) + ->getFunctionType()); + CallInst *CI = + CallInst::Create(wrapper_tensorPooling, Args, "", TensorII); + + // Replacing intrinsic result uses with the result of the tensor runtime + // operation + TensorII->replaceAllUsesWith(CI); + } break; + + case Intrinsic::hpvm_tensor_relu: + case Intrinsic::hpvm_tensor_clipped_relu: + case Intrinsic::hpvm_tensor_tanh: { + DEBUG(errs() << F->getName() + << "\t: Handling tensor activation functions\n"); + + // Tensor relu(a) (and others) is in place for argument a. + Value *Op = TensorII->getOperand(0); + + // Test the intrinsic operand for in place operation. + //-- bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); + // Code generation will not continue if this is false, because the target + // may provide an in place operation(safe choice) + //-- assert(inplace && + //-- "Operand not valid for in place operation. Code gen + //aborted.\n"); + + // Create string for node name, as first argument for wrapper API call + Constant *ConstArray = + ConstantDataArray::getString(M->getContext(), strRef, true); + GlobalVariable *GV = + new GlobalVariable(*M, ConstArray->getType(), true, + GlobalValue::ExternalLinkage, ConstArray, ""); + // Create GEP expression to access it + Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); + Constant *GEPIndices[] = {Int_0, Int_0}; + Constant *GEPConst = ConstantExpr::getGetElementPtr( + GV->getType()->getPointerElementType(), GV, GEPIndices); + + Args.push_back(GEPConst); + + Args.push_back(TensorII->getOperand(0)); + + if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) { // Create wrapper API runtime function call - FunctionCallee wrapper_tensorPooling = - M->getOrInsertFunction(StringRef("wrapper_tensorPooling"), - RtM->getFunction(StringRef("wrapper_tensorPooling"))->getFunctionType()); - CallInst* CI = CallInst::Create(wrapper_tensorPooling, Args, "", TensorII); - - // Replacing intrinsic result uses with the result of the tensor runtime operation - TensorII->replaceAllUsesWith(CI); - } - break; - - case Intrinsic::hpvm_tensor_relu: - case Intrinsic::hpvm_tensor_clipped_relu: - case Intrinsic::hpvm_tensor_tanh: - { - DEBUG(errs() << F->getName() << "\t: Handling tensor activation functions\n"); - - // Tensor relu(a) (and others) is in place for argument a. - Value *Op = TensorII->getOperand(0); - - // Test the intrinsic operand for in place operation. - //-- bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); - // Code generation will not continue if this is false, because the target - // may provide an in place operation(safe choice) - //-- assert(inplace && - //-- "Operand not valid for in place operation. Code gen aborted.\n"); - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - - if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) { - // Create wrapper API runtime function call - FunctionCallee wrapper_tensorRelu = - M->getOrInsertFunction(StringRef("wrapper_tensorRelu"), - RtM->getFunction(StringRef("wrapper_tensorRelu"))->getFunctionType()); - CallInst::Create(wrapper_tensorRelu, Args, "", TensorII); - } - else if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu) { - // Create wrapper API runtime function call - FunctionCallee wrapper_tensorClippedRelu = - M->getOrInsertFunction(StringRef("wrapper_tensorClippedRelu"), - RtM->getFunction(StringRef("wrapper_tensorClippedRelu"))->getFunctionType()); - CallInst::Create(wrapper_tensorClippedRelu, Args, "", TensorII); - } - else if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) { - // Create wrapper API runtime function call - FunctionCallee wrapper_tensorTanh = - M->getOrInsertFunction(StringRef("wrapper_tensorTanh"), - RtM->getFunction(StringRef("wrapper_tensorTanh"))->getFunctionType()); - CallInst::Create(wrapper_tensorTanh, Args, "", TensorII); - } - - // We can replace the call to hpvm.tensor.{relu,clipped relu, tanh} - // with the 1st argument that, due to in place operation, - // now contains the result - TensorII->replaceAllUsesWith(TensorII->getOperand(0)); - } - break; - - case Intrinsic::hpvm_tensor_softmax: - { /* llvm.hpvm.tensor.softmax */ - - DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n"); - // Tensor softmax(a) is in place for argument a. - Value *Op = TensorII->getOperand(0); - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - + FunctionCallee wrapper_tensorRelu = M->getOrInsertFunction( + StringRef("wrapper_tensorRelu"), + RtM->getFunction(StringRef("wrapper_tensorRelu")) + ->getFunctionType()); + CallInst::Create(wrapper_tensorRelu, Args, "", TensorII); + } else if (TensorII->getIntrinsicID() == + Intrinsic::hpvm_tensor_clipped_relu) { + // Create wrapper API runtime function call + FunctionCallee wrapper_tensorClippedRelu = M->getOrInsertFunction( + StringRef("wrapper_tensorClippedRelu"), + RtM->getFunction(StringRef("wrapper_tensorClippedRelu")) + ->getFunctionType()); + CallInst::Create(wrapper_tensorClippedRelu, Args, "", TensorII); + } else if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) { // Create wrapper API runtime function call - FunctionCallee wrapper_tensorSoftmax = - M->getOrInsertFunction(StringRef("wrapper_tensorSoftmax"), - RtM->getFunction(StringRef("wrapper_tensorSoftmax"))->getFunctionType()); - CallInst::Create(wrapper_tensorSoftmax, Args, "", TensorII); - // We can replace the call to hpvm.tensor.softmax with the 1st argument - // that, due to in place operation, now contains the result - TensorII->replaceAllUsesWith(TensorII->getOperand(0)); + FunctionCallee wrapper_tensorTanh = M->getOrInsertFunction( + StringRef("wrapper_tensorTanh"), + RtM->getFunction(StringRef("wrapper_tensorTanh")) + ->getFunctionType()); + CallInst::Create(wrapper_tensorTanh, Args, "", TensorII); } - break; - - default: - llvm_unreachable("Unknown HPVM Intrinsic!"); - break; + // We can replace the call to hpvm.tensor.{relu,clipped relu, tanh} + // with the 1st argument that, due to in place operation, + // now contains the result + TensorII->replaceAllUsesWith(TensorII->getOperand(0)); + } break; + + case Intrinsic::hpvm_tensor_softmax: { /* llvm.hpvm.tensor.softmax */ + + DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n"); + // Tensor softmax(a) is in place for argument a. + Value *Op = TensorII->getOperand(0); + + // Create string for node name, as first argument for wrapper API call + Constant *ConstArray = + ConstantDataArray::getString(M->getContext(), strRef, true); + GlobalVariable *GV = + new GlobalVariable(*M, ConstArray->getType(), true, + GlobalValue::ExternalLinkage, ConstArray, ""); + // Create GEP expression to access it + Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); + Constant *GEPIndices[] = {Int_0, Int_0}; + Constant *GEPConst = ConstantExpr::getGetElementPtr( + GV->getType()->getPointerElementType(), GV, GEPIndices); + + Args.push_back(GEPConst); + + Args.push_back(TensorII->getOperand(0)); + + // Create wrapper API runtime function call + FunctionCallee wrapper_tensorSoftmax = M->getOrInsertFunction( + StringRef("wrapper_tensorSoftmax"), + RtM->getFunction(StringRef("wrapper_tensorSoftmax")) + ->getFunctionType()); + CallInst::Create(wrapper_tensorSoftmax, Args, "", TensorII); + // We can replace the call to hpvm.tensor.softmax with the 1st argument + // that, due to in place operation, now contains the result + TensorII->replaceAllUsesWith(TensorII->getOperand(0)); + } break; + + default: + llvm_unreachable("Unknown HPVM Intrinsic!"); + break; } } // No other case exists, since assertion passed - // Remove the instructions we translated to the simulator call. // Traverse the vector backwards, otherwise definitions are deleted while // their subsequent uses are still around. for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs.rbegin(), - re = IIs.rend(); ri != re; ++ri) { + re = IIs.rend(); + ri != re; ++ri) { DEBUG(errs() << "Erasing: " << **ri << "\n"); (*ri)->eraseFromParent(); } - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs_remove.rbegin(), - re = IIs_remove.rend(); ri != re; ++ri) { + re = IIs_remove.rend(); + ri != re; ++ri) { DEBUG(errs() << "Erasing: " << **ri << "\n"); (*ri)->eraseFromParent(); } - } // DFG2LLVM_WrapperAPI - The first implementation. @@ -1256,11 +1211,8 @@ struct DFG2LLVM_WrapperAPI : public DFG2LLVM { static char ID; // Pass identification, replacement for typeid DFG2LLVM_WrapperAPI() : DFG2LLVM(ID) {} - private: - public: - void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<BuildDFG>(); AU.addRequired<InPlaceDFGAnalysisWrapper>(); @@ -1275,7 +1227,7 @@ public: class CGT_WrapperAPI : public CodeGenTraversal { private: - //Member variables + // Member variables unsigned nodeID; // Used as a node identifier std::string QuantizationInputsFilenameStr; @@ -1296,26 +1248,23 @@ private: // Virtual Functions void init(); void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); + void codeGen(DFInternalNode *N); + void codeGen(DFLeafNode *N); public: - // Constructor CGT_WrapperAPI(Module &_M, BuildDFG &_DFG, - InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP, - std::string &_QuantizationInputsFilenameStr, - std::string &_ConfigurationInputsFilenameStr) - : CodeGenTraversal(_M, _DFG), IPP(&_IPP), - QuantizationInputsFilenameStr(_QuantizationInputsFilenameStr), - ConfigurationInputsFilenameStr(_ConfigurationInputsFilenameStr) { + InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP, + std::string &_QuantizationInputsFilenameStr, + std::string &_ConfigurationInputsFilenameStr) + : CodeGenTraversal(_M, _DFG), IPP(&_IPP), + QuantizationInputsFilenameStr(_QuantizationInputsFilenameStr), + ConfigurationInputsFilenameStr(_ConfigurationInputsFilenameStr) { nodeID = 0; initRuntimeAPI(); } - }; - void CGT_WrapperAPI::init() { // FIXME: what to do here? If anything? } @@ -1325,15 +1274,8 @@ void CGT_WrapperAPI::initRuntimeAPI() { // Load Runtime API Module SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n"); - - // FIXME: set correct path - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/tools/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; - runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == nullptr) + runtimeModule = parseIRFile(TENSOR_RT_LL, Err, M.getContext()); + if (runtimeModule == nullptr) DEBUG(errs() << Err.getMessage()); else DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); @@ -1352,69 +1294,71 @@ void CGT_WrapperAPI::initRuntimeAPI() { // Find hpvm.init and visc.cleanup calls, and add placeholder methods // for initialization and cleanup of the hpvm tensor runtime - Function* VI = M.getFunction("llvm.hpvm.init"); + Function *VI = M.getFunction("llvm.hpvm.init"); assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n"); InitCall = cast<Instruction>(*VI->user_begin()); - CallInst::Create(llvm_hpvm_initApproxhpvmRt, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), - "", InitCall); + CallInst::Create( + llvm_hpvm_initApproxhpvmRt, + ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), + "", InitCall); StringRef QRangesStrRef = StringRef(QuantizationInputsFilenameStr); // Create string for node name, as first argument for wrapper API call - Constant *ConstArray1 = ConstantDataArray::getString(M.getContext(), - QRangesStrRef, true); - GlobalVariable *GV1 = new GlobalVariable(M,ConstArray1->getType(), - true, GlobalValue::ExternalLinkage, ConstArray1, ""); + Constant *ConstArray1 = + ConstantDataArray::getString(M.getContext(), QRangesStrRef, true); + GlobalVariable *GV1 = + new GlobalVariable(M, ConstArray1->getType(), true, + GlobalValue::ExternalLinkage, ConstArray1, ""); // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* QRangesGEPConst = - ConstantExpr::getGetElementPtr(GV1->getType()->getPointerElementType(), - GV1, GEPIndices); + Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + Constant *GEPIndices[] = {Int_0, Int_0}; + Constant *QRangesGEPConst = ConstantExpr::getGetElementPtr( + GV1->getType()->getPointerElementType(), GV1, GEPIndices); StringRef ConfsStrRef = StringRef(ConfigurationInputsFilenameStr); // Create string for node name, as first argument for wrapper API call - Constant *ConstArray2 = ConstantDataArray::getString(M.getContext(), - ConfsStrRef, true); - GlobalVariable *GV2 = new GlobalVariable(M,ConstArray2->getType(), - true, GlobalValue::ExternalLinkage, ConstArray2, ""); - Constant* ConfsGEPConst = - ConstantExpr::getGetElementPtr(GV2->getType()->getPointerElementType(), - GV2, GEPIndices); - ArrayRef<Value*> RTCInitArgs = {ConfsGEPConst, QRangesGEPConst}; - CallInst::Create(llvm_hpvm_initializeRuntimeController, RTCInitArgs, "", InitCall); - - Function* VC = M.getFunction("llvm.hpvm.cleanup"); + Constant *ConstArray2 = + ConstantDataArray::getString(M.getContext(), ConfsStrRef, true); + GlobalVariable *GV2 = + new GlobalVariable(M, ConstArray2->getType(), true, + GlobalValue::ExternalLinkage, ConstArray2, ""); + Constant *ConfsGEPConst = ConstantExpr::getGetElementPtr( + GV2->getType()->getPointerElementType(), GV2, GEPIndices); + ArrayRef<Value *> RTCInitArgs = {ConfsGEPConst, QRangesGEPConst}; + CallInst::Create(llvm_hpvm_initializeRuntimeController, RTCInitArgs, "", + InitCall); + + Function *VC = M.getFunction("llvm.hpvm.cleanup"); assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n"); CleanupCall = cast<Instruction>(*VC->user_begin()); - CallInst::Create(llvm_hpvm_cleanupApproxhpvmRt, ArrayRef<Value*>(), "", CleanupCall); - CallInst::Create(llvm_hpvm_clearRuntimeController, ArrayRef<Value*>(), "", CleanupCall); - + CallInst::Create(llvm_hpvm_cleanupApproxhpvmRt, ArrayRef<Value *>(), "", + CleanupCall); + CallInst::Create(llvm_hpvm_clearRuntimeController, ArrayRef<Value *>(), "", + CleanupCall); } -void CGT_WrapperAPI::codeGen(DFInternalNode* N) { - errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - errs () << "Skipping internal node\n"; +void CGT_WrapperAPI::codeGen(DFInternalNode *N) { + errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n"; + errs() << "Skipping internal node\n"; } -void CGT_WrapperAPI::codeGen(DFLeafNode* N) { +void CGT_WrapperAPI::codeGen(DFLeafNode *N) { // Skip code generation if it is a dummy node - if(N->isDummyNode()) { + if (N->isDummyNode()) { DEBUG(errs() << "Skipping dummy node\n"); return; } // Abort code generation if it is an allocation node - if(N->isAllocationNode()) { + if (N->isAllocationNode()) { assert(false && "Allocation Node not expected in ApproxHPVM"); return; } - // Increment the node ID, for current node. ++nodeID; - errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n"; + errs() << "Node ID string: " << StringRef(std::to_string(nodeID)) << "\n"; // Get the function associated with the dataflow node Function *F = N->getFuncPointer(); @@ -1429,50 +1373,51 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { // Clone the function ValueToValueMapTy VMap; - std::string FName(F->getName().data());//Twine FName = F->getName(); + std::string FName(F->getName().data()); // Twine FName = F->getName(); - F_wrapper_api = CloneFunction(F, VMap); - F_wrapper_api->setName(FName+"_wrapper_api"); + F_wrapper_api->setName(FName + "_wrapper_api"); F_wrapper_api->removeFromParent(); M.getFunctionList().push_back(F_wrapper_api); N->addGenFunc(F_wrapper_api, hpvm::PROMISE_TARGET, true); /* Removing HPVM in/out/inout function attributes */ - for(Function::arg_iterator ai = F_wrapper_api->arg_begin(), ae = F_wrapper_api->arg_end(); - ai != ae; ai++){ + for (Function::arg_iterator ai = F_wrapper_api->arg_begin(), + ae = F_wrapper_api->arg_end(); + ai != ae; ai++) { Argument *Arg = &*ai; - if(Arg->hasAttribute(Attribute::In)) + if (Arg->hasAttribute(Attribute::In)) Arg->removeAttr(Attribute::In); - if(Arg->hasAttribute(Attribute::Out)) + if (Arg->hasAttribute(Attribute::Out)) Arg->removeAttr(Attribute::Out); - if(Arg->hasAttribute(Attribute::InOut)) - Arg->removeAttr(Attribute::InOut); + if (Arg->hasAttribute(Attribute::InOut)) + Arg->removeAttr(Attribute::InOut); } // Adding nounwind to generated function : FIXME: needed? DEBUG(errs() << "Adding nounwind to generated function\n"); - F_wrapper_api->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind); + F_wrapper_api->addAttribute(AttributeList::FunctionIndex, + Attribute::NoUnwind); - // Add llvm_hpvm_requestTensor calls for every pointer argument of the function - // (they are all expected to be tensors), at the beginning of the function. - // This is the first instruction of the function, insert them before this - Instruction* FI = &*(F_wrapper_api->getEntryBlock().begin()); + // Add llvm_hpvm_requestTensor calls for every pointer argument of the + // function (they are all expected to be tensors), at the beginning of the + // function. This is the first instruction of the function, insert them before + // this + Instruction *FI = &*(F_wrapper_api->getEntryBlock().begin()); // FIXME: verify that we want 1 as a target device // In this backend, the target device is GPU, represented by i32 1. ConstantInt *TargetDeviceID = - ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); + ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); for (Function::arg_iterator ai = F_wrapper_api->arg_begin(), - ae = F_wrapper_api->arg_end(); ai != ae; ++ai) { - Argument* Arg = &*ai; + ae = F_wrapper_api->arg_end(); + ai != ae; ++ai) { + Argument *Arg = &*ai; if (Arg->getType()->isPointerTy()) { Value *Args[] = {Arg, TargetDeviceID}; - CallInst::Create(hpvm_request_tensor, - ArrayRef<Value*>(Args, 2), - "", FI); + CallInst::Create(hpvm_request_tensor, ArrayRef<Value *>(Args, 2), "", FI); } } @@ -1485,8 +1430,8 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { CGM.transition(dyn_cast<IntrinsicInst>(I)); } errs() << "CLONED FUNCTION: " << *F_wrapper_api << "\n"; - // errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n"; - //CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP); + // errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n"; + // CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP); CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP); return; @@ -1501,30 +1446,26 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) { // Get the In Place Analysis Results InPlaceDFGAnalysis::InPlaceDFGParameter IPP = - (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); + (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); + + std::vector<DFInternalNode *> Roots = DFG.getRoots(); - - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - // Visitor for Code Generation Graph Traversal - CGT_WrapperAPI *CGTVisitor = new CGT_WrapperAPI(M, DFG, IPP, - QuantizationInputsFilename, - ConfigurationInputsFilename); + CGT_WrapperAPI *CGTVisitor = new CGT_WrapperAPI( + M, DFG, IPP, QuantizationInputsFilename, ConfigurationInputsFilename); // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { + for (auto rootNode : Roots) { // Initiate code generation for root DFNode CGTVisitor->visit(rootNode); } - //TODO: Edit module epilogue to remove the VISC intrinsic declarations + // TODO: Edit module epilogue to remove the VISC intrinsic declarations delete CGTVisitor; - return true; } - /****************************************************************************** * Helper functions * ******************************************************************************/ @@ -1532,13 +1473,14 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) { /* Method needs to be called as part of an analysis pre-step, before code * * generation is run on a node function, so that the HPVM intrinsics are still * * in place. */ -bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N, - InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) { +bool isValidOperandForInPlaceOperation( + Value *Op, Function *Fgen, DFNode *N, + InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) { if (Argument *Arg = dyn_cast<Argument>(Op)) { DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n"); assert((Arg->getParent() == Fgen) && - "Extra Parameter in body of Function\n"); + "Extra Parameter in body of Function\n"); // Candidate parameter is a function argument // In this case, consult the result of in place analysis // Find position in arg list @@ -1552,11 +1494,10 @@ bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N, DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n"); return false; } - } - else { + } else { // If it is not an argument, then it needs to be the result of // another intrinsic. These are new objects that are allocated, - // and consumed by next intrinsic. + // and consumed by next intrinsic. DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n"); if (dyn_cast<IntrinsicInst>(Op)) { DEBUG(errs() << *Arg << "\t: local, suitable for in place\n"); @@ -1576,5 +1517,3 @@ static RegisterPass<DFG2LLVM_WrapperAPI> X("dfg2llvm-wrapperapi", false /* does not modify the CFG */, true /* transformation, * * not just analysis */); - - diff --git a/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp b/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp index 6944d0d0e2..45ad0ece23 100644 --- a/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp +++ b/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp @@ -46,10 +46,9 @@ namespace { struct DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls : public DFG2LLVM { static char ID; // Pass identification, replacement for typeid DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls() : DFG2LLVM(ID) {} -private: +private: public: - void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired<BuildDFG>(); AU.addRequired<InPlaceDFGAnalysisWrapper>(); @@ -64,7 +63,7 @@ public: class CGT_ReplaceApproxHPVMIntrinsicsWithFCalls : public CodeGenTraversal { private: - //Member variables + // Member variables InPlaceDFGAnalysis::InPlaceDFGParameter *IPP; // VISC Runtime API and Tensor runtime API @@ -74,7 +73,7 @@ private: relevant code also, but I leave in in for now until verified. */ FunctionCallee llvm_hpvm_initTensorRt; FunctionCallee llvm_hpvm_cleanupTensorRt; -// Constant* hpvm_request_tensor; DONE: request tensor will not be used + // Constant* hpvm_request_tensor; DONE: request tensor will not be used // Functions bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N); @@ -82,27 +81,25 @@ private: // Virtual Functions void init(); void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); + void codeGen(DFInternalNode *N); + void codeGen(DFLeafNode *N); public: - // Constructor - CGT_ReplaceApproxHPVMIntrinsicsWithFCalls(Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP) - : CodeGenTraversal(_M, _DFG), IPP(&_IPP) { + CGT_ReplaceApproxHPVMIntrinsicsWithFCalls( + Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP) + : CodeGenTraversal(_M, _DFG), IPP(&_IPP) { initRuntimeAPI(); } - }; -bool CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::isValidOperandForInPlaceOperation(Value *Op, - Function *Fgen, - DFNode *N) { +bool CGT_ReplaceApproxHPVMIntrinsicsWithFCalls:: + isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N) { // We only expect the if branch to be taken if (Argument *Arg = dyn_cast<Argument>(Op)) { DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n"); assert((Arg->getParent() == Fgen) && - "Extra Parameter in body of Function\n"); + "Extra Parameter in body of Function\n"); // Candidae parameter is a function argument // In this case, consult the result of in place analysis // Find position in arg list @@ -116,8 +113,7 @@ bool CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::isValidOperandForInPlaceOperatio DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n"); return false; } - } - else { + } else { // If it is not an argument, then it needs to be the result of // another intrinsic. These are new objects that are allocated, // and consumed by next intrinsic. Alternatively, the intrinsic @@ -133,32 +129,22 @@ bool CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::isValidOperandForInPlaceOperatio return true; else return false; - } - else { + } else { DEBUG(errs() << *Arg << "\t: local, not suitable for in place\n"); return false; } } } - -void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::init() { -} +void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::init() {} // Initialize the VISC runtime API. This makes it easier to insert these calls void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::initRuntimeAPI() { // Load Runtime API Module SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n"); - - // FIXME: set correct path - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_cpu_runtime.ll"; - runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == nullptr) + runtimeModule = parseIRFile(TENSOR_RT_LL, Err, M.getContext()); + if (runtimeModule == nullptr) DEBUG(errs() << Err.getMessage()); else DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); @@ -169,125 +155,123 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::initRuntimeAPI() { // - request a tensor DECLARE(llvm_hpvm_initTensorRt); DECLARE(llvm_hpvm_cleanupTensorRt); -// DECLARE(hpvm_request_tensor); + // DECLARE(hpvm_request_tensor); // Find hpvm.init and visc.cleanup calls, and add placeholder methods // for initialization and cleanup of the hpvm tensor runtime - Function* VI = M.getFunction("llvm.hpvm.init"); + Function *VI = M.getFunction("llvm.hpvm.init"); assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n"); InitCall = cast<Instruction>(*VI->user_begin()); - CallInst::Create(llvm_hpvm_initTensorRt, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), - "", InitCall); + CallInst::Create( + llvm_hpvm_initTensorRt, + ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), + "", InitCall); - Function* VC = M.getFunction("llvm.hpvm.cleanup"); + Function *VC = M.getFunction("llvm.hpvm.cleanup"); assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n"); CleanupCall = cast<Instruction>(*VC->user_begin()); - CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall); - + CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value *>(), "", + CleanupCall); } -void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFInternalNode* N) { - errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - errs () << "Skipping internal node\n"; +void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFInternalNode *N) { + errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n"; + errs() << "Skipping internal node\n"; } - -void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) { +void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode *N) { // Skip if it is a dummy node - if(N->isDummyNode()) { + if (N->isDummyNode()) { DEBUG(errs() << "Skipping dummy node\n"); return; } // Abort if it is an allocation node - if(N->isAllocationNode()) { + if (N->isAllocationNode()) { assert(false && "Allocation Node not expected in ApproxHPVM"); return; } // Search for intrinsic only if it has the right hint if (!checkPreferredTarget(N, hpvm::CPU_TARGET)) { - errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; + errs() << "Skipping node: " << N->getFuncPointer()->getName() << "\n"; return; } // Get the function associated with the dataflow node Function *F = N->getFuncPointer(); - errs()<<"function name = "<< F->getName()<<"\n"; + errs() << "function name = " << F->getName() << "\n"; std::vector<IntrinsicInst *> IItoRemove; for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { Instruction *I = &(*i); if (BuildDFG::isHPVMIntrinsic(I)) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor") - && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + assert( + (II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor") && + "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); /********************* Handle VISC Tensor intrinsics ********************/ - // We replace them with calls to functions with implementations at the LLVM level + // We replace them with calls to functions with implementations at the + // LLVM level switch (II->getIntrinsicID()) { - case Intrinsic::hpvm_tensor_convolution: - { /* llvm.hpvm.tensor.convolution */ + case Intrinsic::hpvm_tensor_convolution: { /* llvm.hpvm.tensor.convolution + */ DEBUG(errs() << F->getName() << "\t: Handling tensor convolution \n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); + Args.push_back(II->getOperand(2)); Args.push_back(II->getOperand(3)); Args.push_back(II->getOperand(4)); Args.push_back(II->getOperand(5)); - Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); - Constant* conv_precision = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); + Constant *conv_mode = + ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); + Constant *conv_precision = + ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); Args.push_back(conv_mode); Args.push_back(conv_precision); - + // Create function call FunctionCallee tensorConvolutionCPU; DECLARE(tensorConvolutionCPU); - - CallInst* CI = CallInst::Create(tensorConvolutionCPU, - Args, "", II); + + CallInst *CI = CallInst::Create(tensorConvolutionCPU, Args, "", II); // We can replace the call to hpvm.tensor.mul with the LLVM call II->replaceAllUsesWith(CI); // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; - case Intrinsic::hpvm_tensor_mul: - { /* llvm.hpvm.tensor.mul */ + case Intrinsic::hpvm_tensor_mul: { /* llvm.hpvm.tensor.mul */ DEBUG(errs() << F->getName() << "\t: Handling tensor mul\n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); Args.push_back(II->getOperand(1)); // Create function call FunctionCallee tensorGemmCPU; DECLARE(tensorGemmCPU); - - CallInst* CI = CallInst::Create(tensorGemmCPU, - Args, "", II); + + CallInst *CI = CallInst::Create(tensorGemmCPU, Args, "", II); // We can replace the call to hpvm.tensor.mul with the LLVM call II->replaceAllUsesWith(CI); // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; - case Intrinsic::hpvm_tensor_add: - { /* llvm.hpvm.tensor.add */ + case Intrinsic::hpvm_tensor_add: { /* llvm.hpvm.tensor.add */ DEBUG(errs() << F->getName() << "\t: Handling tensor add\n"); // Tensor add(a,b) is in place for argument a. Value *Op = II->getOperand(0); @@ -297,12 +281,13 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) { // Code generation cannot continue if this is false, because the target // only provides an in place operation - // FIXME: remove this comment - must check for in-place - //assert(inplace && - // "Operand not valid for in place operation. Code gen aborted.\n"); + // FIXME: remove this comment - must check for in-place + // assert(inplace && + // "Operand not valid for in place operation. Code gen + // aborted.\n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); Args.push_back(II->getOperand(1)); @@ -316,12 +301,10 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) { // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; case Intrinsic::hpvm_tensor_pool_max: - case Intrinsic::hpvm_tensor_pool_mean: - { /* llvm.hpvm.tensor.relu */ + case Intrinsic::hpvm_tensor_pool_mean: { /* llvm.hpvm.tensor.relu */ DEBUG(errs() << F->getName() << "\t: Handling tensor_pool_max\n"); // Tensor relu(a) is in place for argument a. Value *Op = II->getOperand(0); @@ -333,45 +316,48 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) { assert(inplace && "Operand not valid for in place operation. Code gen aborted.\n"); - // Argument list - tensorPooling(input, poolFunction, window_height, window_width, vertical_pad, horizontal_pad, - // vertical_stride, horizontal_stride); - std::vector<Value*> Args; + // Argument list - tensorPooling(input, poolFunction, window_height, + // window_width, vertical_pad, horizontal_pad, + // vertical_stride, horizontal_stride); + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); - int pool_type = 0; - if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max){ + int pool_type = 0; + if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) { pool_type = 0; - } - if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean){ + } + if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) { pool_type = 1; - } - - Constant* constPoolType = ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type); - Args.push_back(constPoolType); // ID for max pool. Min/Avg have different IDs (non-zero) - Args.push_back(II->getOperand(1)); + } + + Constant *constPoolType = + ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type); + Args.push_back(constPoolType); // ID for max pool. Min/Avg have + // different IDs (non-zero) + Args.push_back(II->getOperand(1)); Args.push_back(II->getOperand(2)); - Args.push_back(II->getOperand(3)); + Args.push_back(II->getOperand(3)); Args.push_back(II->getOperand(4)); - Args.push_back(II->getOperand(5)); - Args.push_back(II->getOperand(6)); + Args.push_back(II->getOperand(5)); + Args.push_back(II->getOperand(6)); // Create function call FunctionCallee tensorPoolingCPU; DECLARE(tensorPoolingCPU); - CallInst* CI = CallInst::Create(tensorPoolingCPU, Args, "", II); + CallInst *CI = CallInst::Create(tensorPoolingCPU, Args, "", II); - // Replacing intrinsic result uses with the result of the LLVM call + // Replacing intrinsic result uses with the result of the LLVM call II->replaceAllUsesWith(CI); // Mark to remove at the end IItoRemove.push_back(II); - }break; + } break; case Intrinsic::hpvm_tensor_relu: case Intrinsic::hpvm_tensor_clipped_relu: - case Intrinsic::hpvm_tensor_tanh: - { /* llvm.hpvm.tensor.relu */ - DEBUG(errs() << F->getName() << "\t: Handling tensor activation functions \n"); + case Intrinsic::hpvm_tensor_tanh: { /* llvm.hpvm.tensor.relu */ + DEBUG(errs() << F->getName() + << "\t: Handling tensor activation functions \n"); // Tensor relu(a) is in place for argument a. Value *Op = II->getOperand(0); @@ -383,42 +369,39 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) { "Operand not valid for in place operation. Code gen aborted.\n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); - if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu){ + if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) { // Create function call FunctionCallee tensorReluCPU; DECLARE(tensorReluCPU); CallInst::Create(tensorReluCPU, Args, "", II); - } - else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu){ + } else if (II->getIntrinsicID() == + Intrinsic::hpvm_tensor_clipped_relu) { // Create function call //-- FunctionCallee tensorClippedRelu; - FunctionCallee tensorRelu2CPU; + FunctionCallee tensorRelu2CPU; DECLARE(tensorRelu2CPU); CallInst::Create(tensorRelu2CPU, Args, "", II); - } - else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh){ + } else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) { // Create function call FunctionCallee tensorTanhCPU; - errs()<<"tensorTanh Call = \n\n"; + errs() << "tensorTanh Call = \n\n"; DECLARE(tensorTanhCPU); - //errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l"; + // errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l"; CallInst::Create(tensorTanhCPU, Args, "", II); - } - + } + // We can replace the call to hpvm.tensor.relu with the 1st argument // that, due to in place operation, now contains the result II->replaceAllUsesWith(II->getOperand(0)); // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; - case Intrinsic::hpvm_tensor_softmax: - { /* llvm.hpvm.tensor.softmax */ + case Intrinsic::hpvm_tensor_softmax: { /* llvm.hpvm.tensor.softmax */ DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n"); // Tensor relu(a) is in place for argument a. Value *Op = II->getOperand(0); @@ -431,7 +414,7 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) { "Operand not valid for in place operation. Code gen aborted.\n"); // Argument list for the runtime call - std::vector<Value*> Args; + std::vector<Value *> Args; Args.push_back(II->getOperand(0)); // Create function call @@ -444,24 +427,21 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) { // Mark to remove at the end IItoRemove.push_back(II); - } - break; + } break; default: llvm_unreachable("Unknown VISC Intrinsic!"); break; - } - } - } // We need to do this explicitly: DCE pass may not remove them. // Traverse the vector backwards, otherwise definitions are deleted while // their subsequent uses are still around. for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); ri != re; ++ri) { + re = IItoRemove.rend(); + ri != re; ++ri) { DEBUG(errs() << "Erasing: " << **ri << "\n"); errs() << "Erasing: " << **ri << "\n"; (*ri)->eraseFromParent(); @@ -479,34 +459,32 @@ bool DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls::runOnModule(Module &M) { // Get the In Place Analysis Results InPlaceDFGAnalysis::InPlaceDFGParameter IPP = - (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); + (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); // Print results printInPlaceDFGParameter(IPP); - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - + std::vector<DFInternalNode *> Roots = DFG.getRoots(); + // Visitor for Code Generation Graph Traversal CGT_ReplaceApproxHPVMIntrinsicsWithFCalls *CGTVisitor = - new CGT_ReplaceApproxHPVMIntrinsicsWithFCalls(M, DFG, IPP); + new CGT_ReplaceApproxHPVMIntrinsicsWithFCalls(M, DFG, IPP); // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { + for (auto rootNode : Roots) { // Initiate code generation for root DFNode CGTVisitor->visit(rootNode); } - //TODO: Edit module epilogue to remove the VISC intrinsic declarations + // TODO: Edit module epilogue to remove the VISC intrinsic declarations delete CGTVisitor; return true; } - /****************************************************************************** * Helper functions * ******************************************************************************/ - } // End of namespace char DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls::ID = 0; @@ -515,5 +493,3 @@ static RegisterPass<DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls> X("replace-i false /* does not modify the CFG */, true /* transformation, * * not just analysis */); - - -- GitLab