From b9838d8c90b99c7ab7d8b10dea8bc174a6e14f35 Mon Sep 17 00:00:00 2001 From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu> Date: Fri, 6 Dec 2019 00:17:03 -0600 Subject: [PATCH] Removing ApproxHPVM passes from ./lib/Transforms --- .../ApproxScheduler/ApproxScheduler.cpp | 275 --- .../Transforms/ApproxScheduler/CMakeLists.txt | 12 - .../Transforms/ApproxScheduler/LLVMBuild.txt | 21 - .../Transforms/DFG2LLVM_CUDNN/CMakeLists.txt | 12 - .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp | 609 ------- .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports | 0 .../Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt | 21 - .../DFG2LLVM_PROMISE/CMakeLists.txt | 12 - .../DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp | 1283 -------------- .../DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports | 0 .../Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt | 21 - .../DFG2LLVM_WrapperAPI/CMakeLists.txt | 12 - .../DFG2LLVM_WrapperAPI.cpp | 1530 ----------------- .../DFG2LLVM_WrapperAPI.exports | 0 .../DFG2LLVM_WrapperAPI/LLVMBuild.txt | 21 - .../FuseHPVMTensorNodes/CMakeLists.txt | 12 - .../FuseHPVMTensorNodes.cpp | 971 ----------- .../FuseHPVMTensorNodes.exports | 0 .../FuseHPVMTensorNodes/LLVMBuild.txt | 21 - .../InsertApproxInfo/CMakeLists.txt | 12 - .../InsertApproxInfo/InsertApproxInfo.cpp | 498 ------ .../Transforms/InsertApproxInfo/LLVMBuild.txt | 21 - 22 files changed, 5364 deletions(-) delete mode 100644 llvm/lib/Transforms/ApproxScheduler/ApproxScheduler.cpp delete mode 100644 llvm/lib/Transforms/ApproxScheduler/CMakeLists.txt delete mode 100644 llvm/lib/Transforms/ApproxScheduler/LLVMBuild.txt delete mode 100644 llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt delete mode 100644 llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp delete mode 100644 llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports delete mode 100644 llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt delete mode 100644 llvm/lib/Transforms/DFG2LLVM_PROMISE/CMakeLists.txt delete mode 100644 llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp delete mode 100644 llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports delete mode 100644 llvm/lib/Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt delete mode 100644 llvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt delete mode 100644 llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp delete mode 100644 llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports delete mode 100644 llvm/lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt delete mode 100644 llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt delete mode 100644 llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp delete mode 100644 llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.exports delete mode 100644 llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt delete mode 100644 llvm/lib/Transforms/InsertApproxInfo/CMakeLists.txt delete mode 100644 llvm/lib/Transforms/InsertApproxInfo/InsertApproxInfo.cpp delete mode 100644 llvm/lib/Transforms/InsertApproxInfo/LLVMBuild.txt diff --git a/llvm/lib/Transforms/ApproxScheduler/ApproxScheduler.cpp b/llvm/lib/Transforms/ApproxScheduler/ApproxScheduler.cpp deleted file mode 100644 index 7537b517bc..0000000000 --- a/llvm/lib/Transforms/ApproxScheduler/ApproxScheduler.cpp +++ /dev/null @@ -1,275 +0,0 @@ -//===------------------------ InPlaceDFGAnalysis.cpp ----------------------===// -// -// -// -// The LLVM Compiler Infrastructure -// -// -// -// This file is distributed under the University of Illinois Open Source -// -// License. See LICENSE.TXT for details. -// -// -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "ApproxScheduler" - -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/IR/InstrTypes.h" -#include <unordered_map> -#include <dirent.h> -#include <stdio.h> -#include <sstream> -#include <fstream> - - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; -using namespace inplacedfg; - - -namespace { - -static cl::opt<std::string> category_input("category", cl::desc(" Hardware-agnostic ranking cateogy {log, linear, quad} ")); -static cl::opt<int> rank_input("rank", cl::desc(" Hardware-agostic rank given by autotuner ")); - - -struct ApproxMetrics{ - std::string op_name; - std::string category; - unsigned int rank; // rank given by autotuner - double approx_level; - // Relative L-norm metrics - double relative_l1; - double relative_l2; - double relative_linf; - // Mean L-norm metrics - double mean_l1; - double mean_l2; - double mean_linf; -}; - - - - -struct ApproxSchedulerWrapperPass : public ModulePass { - static char ID; // Pass identification, replacement for typeid - ApproxSchedulerWrapperPass() : ModulePass(ID) {} - -public: - // Functions - bool runOnModule(Module &M); - void getAnalysisUsage(AnalysisUsage &AU) const; -}; - - -// Visitor for Code generation traversal (tree traversal for now) -class ApproxScheduler : public CodeGenTraversal { - -private: - - int rank; // Rank to use for scheduling - ranks added in operand bundles - std::string category; // category = {log, linear, quad} - - // Virtual Functions - void init() {} - void initRuntimeAPI() {} - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - bool rankMatches(OperandBundleUse opBundle, std::string category, int rank); - ApproxMetrics* getApproxInfo(Instruction* I); - ApproxMetrics* loadApproxMetrics(OperandBundleUse opBundle); - - // Tracks the id of the tensor op processed - unsigned int currentID; - -public: - // Constructor - ApproxScheduler(Module &_M, BuildDFG &_DFG, std::string category, int rank); - void run(); - -}; - - - -void ApproxSchedulerWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addPreserved<BuildDFG>(); -} - - -bool ApproxSchedulerWrapperPass::runOnModule(Module &M) { - - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - std::string category = category_input.getValue(); - int rank = rank_input.getValue(); - - ApproxScheduler scheduler(M, DFG, category, rank); - scheduler.run(); - - return true; -} - - - -ApproxScheduler::ApproxScheduler(Module &_M, BuildDFG &_DFG, std::string category, int rank) : - CodeGenTraversal(_M, _DFG){ - - this->category = category; - this->rank = rank; -} - - -void ApproxScheduler::run() { - - errs() << "\n NOTE: Approximation-based scheduling transform \n"; - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - - // Iterate over all the DFGs - for (auto rootNode: Roots) { - this->visit(rootNode); - } - - return; -} - - -/*** Analysis of internal node ***/ -void ApproxScheduler::codeGen(DFInternalNode* N) { - DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n"); -} - - -ApproxMetrics* ApproxScheduler::loadApproxMetrics(OperandBundleUse opBundle){ - - ApproxMetrics* approx_metrics = new ApproxMetrics; - for(unsigned int j = 0; j < opBundle.Inputs.size(); j = j + 2){ - - GlobalVariable* gv = dyn_cast<GlobalVariable>(opBundle.Inputs[j].get()); - ConstantDataArray* constString = dyn_cast<ConstantDataArray>(gv->getInitializer()); - std::string metric = std::string(constString->getAsCString().data()); - - if(metric == "rel_l1"){ - double norm_value = dyn_cast<ConstantFP>(opBundle.Inputs[j+1].get())->getValueAPF().convertToDouble(); - approx_metrics->relative_l1 = norm_value; - errs()<<"***relative_l1 = "<<approx_metrics->relative_l1<<"\n"; - } - - if(metric == "rel_l2"){ - double norm_value = dyn_cast<ConstantFP>(opBundle.Inputs[j+1].get())->getValueAPF().convertToDouble(); - approx_metrics->relative_l2 = norm_value; - } - } - -} - - -bool ApproxScheduler::rankMatches(OperandBundleUse opBundle, std::string category_in, int rank_in){ - - // Extracting value of the 'category' attribute - GlobalVariable* gv = dyn_cast<GlobalVariable>(opBundle.Inputs[1].get()); - ConstantDataArray* constString = dyn_cast<ConstantDataArray>(gv->getInitializer()); - std::string category = std::string(constString->getAsCString().data()); - errs()<<"*category = "<<category<<"\n"; - - int rank = dyn_cast<ConstantInt>(opBundle.Inputs[3].get())->getZExtValue(); - errs()<<"-rank = "<<rank<<"\n"; - - if(category == category_in && rank == rank_in) - return true; - else - return false; - -} - - -ApproxMetrics* ApproxScheduler::getApproxInfo(Instruction* I){ - - CallSite* CS = new CallSite(I); - if(CS->hasOperandBundles()){ - errs()<<"CallSite has OperandBundles \n"; - - for(unsigned int i = 0; i < CS->getNumOperandBundles(); i++){ - OperandBundleUse bundleUse = CS->getOperandBundleAt(i); - errs()<<"bundleUse -> getTagName() = "<<bundleUse.getTagName()<<"\n"; - - if(rankMatches(bundleUse, category, rank)){ - return loadApproxMetrics(bundleUse); - } - - /*for(unsigned int j = 0; j < bundleUse.Inputs.size(); j++){ - Value* bundleVal = bundleUse.Inputs[j].get(); - errs()<<"Val = "<<*bundleVal<<"\n"; - } - */ - - } - } - else{ - errs()<<"DOES NOT have OperandBundles \n"; - } - - assert("No Bundle Matched the provided rank and Category! \n"); - -} - - -/*** Analysis of leaf node ***/ -void ApproxScheduler::codeGen(DFLeafNode* N) { - DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n"); - - // Skip code generation if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Abort code generation if it is an allocation node - if(N->isAllocationNode()) { - assert(false && "Allocation Node not expected in ApproxHPVM"); - return; - } - - Function *F = N->getFuncPointer(); - Module* M = F->getParent(); - - std::vector<ApproxMetrics*> metrics_list; - /**** Reading all tensor operations in the DFG Leaf Node ****/ - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { - Instruction *I = &(*i); - errs()<<*I<<"\n"; - - if (BuildDFG::isViscIntrinsic(I)) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - // FIXME: The assumption of only tensor instrinsics is restrictive - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") - && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); - - // NOTE: Get tensorOp name - the scheduling decisions are made per-operation type - std::string intrinsic_id = std::string(II->getCalledFunction()->getName().data()); - ApproxMetrics* approx_metrics = getApproxInfo(I); - metrics_list.push_back(approx_metrics); - } - - } - -} - -char ApproxSchedulerWrapperPass::ID = 0; -static RegisterPass<ApproxSchedulerWrapperPass> X("approx-scheduler", - "Select target compute unit based on aprroximation metrics", - false /* does not modify the CFG */, - false /* not transformation, just analysis */); - - - -} // End of namespace - diff --git a/llvm/lib/Transforms/ApproxScheduler/CMakeLists.txt b/llvm/lib/Transforms/ApproxScheduler/CMakeLists.txt deleted file mode 100644 index 267ad1d859..0000000000 --- a/llvm/lib/Transforms/ApproxScheduler/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( ApproxScheduler - ApproxScheduler.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/llvm/lib/Transforms/ApproxScheduler/LLVMBuild.txt b/llvm/lib/Transforms/ApproxScheduler/LLVMBuild.txt deleted file mode 100644 index ccd8479c2e..0000000000 --- a/llvm/lib/Transforms/ApproxScheduler/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/LocalMem/LLVMBuild.txt ------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = ApproxScheduler -parent = Transforms diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt b/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt deleted file mode 100644 index dc98faafec..0000000000 --- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( LLVMDFG2LLVM_CUDNN - DFG2LLVM_CUDNN.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp deleted file mode 100644 index abc4e9ef89..0000000000 --- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp +++ /dev/null @@ -1,609 +0,0 @@ -//=== DFG2LLVM_CUDNN.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -#define ENABLE_ASSERTS - -#define DEBUG_TYPE "DFG2LLVM_CUDNN" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/IR/Attributes.h" -#include "llvm-c/Core.h" -#include "llvm/SupportVISC/VISCTimer.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" -#include <sstream> - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; - -using namespace inplacedfg; - -namespace { -// Helper class declarations - -// DFG2LLVM_CUDNN - The first implementation. - -struct DFG2LLVM_CUDNN : public DFG2LLVM { - static char ID; // Pass identification, replacement for typeid - DFG2LLVM_CUDNN() : DFG2LLVM(ID) {} -private: - -public: - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addRequired<InPlaceDFGAnalysisWrapper>(); - AU.addPreserved<BuildDFG>(); - AU.addPreserved<InPlaceDFGAnalysisWrapper>(); - } - - bool runOnModule(Module &M); -}; - -// Visitor for Code generation traversal (tree traversal for now) -class CGT_CUDNN : public CodeGenTraversal { - -private: - //Member variables - InPlaceDFGAnalysis::InPlaceDFGParameter *IPP; - - // VISC Runtime API and Tensor runtime API - Constant* llvm_hpvm_initTensorRt; - Constant* llvm_hpvm_cleanupTensorRt; - Constant* hpvm_request_tensor; - - // Functions - bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N); - - - - // Virtual Functions - void init(); - void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - -public: - - // Constructor - CGT_CUDNN(Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP) - : CodeGenTraversal(_M, _DFG), IPP(&_IPP) { - initRuntimeAPI(); - } - -}; - -bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op, - Function *Fgen, - DFNode *N) { - - if (Argument *Arg = dyn_cast<Argument>(Op)) { - DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n"); - assert((Arg->getParent() == Fgen) && - "Extra Parameter in body of Function\n"); - // Candidae parameter is a function argument - // In this case, consult the result of in place analysis - // Find position in arg list - unsigned pos = Arg->getArgNo(); - // If this parameter cannot be used for in place operation - // code gen cannot continue - if (IPP->at(N)[pos]) { - DEBUG(errs() << *Arg << "\t: argument, suitable for in place\n"); - return true; - } else { - DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n"); - return false; - } - } - else { - // If it is not an argument, then it needs to be the result of - // another intrinsic. These are new objects that are allocated, - // and consumed by next intrinsic. - DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n"); - if (dyn_cast<IntrinsicInst>(Op)) { - DEBUG(errs() << *Arg << "\t: local, suitable for in place\n"); - return true; - } else { - DEBUG(errs() << *Arg << "\t: local, not suitable for in place\n"); - return false; - } - } -} - - -void CGT_CUDNN::init() { -} - -// Initialize the VISC runtime API. This makes it easier to insert these calls -void CGT_CUDNN::initRuntimeAPI() { - - // Load Runtime API Module - SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n"); - - // FIXME: set correct path - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; - runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == nullptr) - DEBUG(errs() << Err.getMessage()); - else - DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); - - // Get or insert Global declarations for - // - initialization - // - cleanup - // - request a tensor - DECLARE(llvm_hpvm_initTensorRt); - DECLARE(llvm_hpvm_cleanupTensorRt); - DECLARE(hpvm_request_tensor); - - // Find visc.init and visc.cleanup calls, and add placeholder methods - // for initialization and cleanup of the hpvm tensor runtime - - Function* VI = M.getFunction("llvm.visc.init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n"); - InitCall = cast<Instruction>(*VI->user_begin()); - CallInst::Create(llvm_hpvm_initTensorRt, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), - "", InitCall); - - Function* VC = M.getFunction("llvm.visc.cleanup"); - assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n"); - CleanupCall = cast<Instruction>(*VC->user_begin()); - CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall); - -} - -void CGT_CUDNN::codeGen(DFInternalNode* N) { - errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - errs () << "Skipping internal node\n"; -} - - -void CGT_CUDNN::codeGen(DFLeafNode* N) { - - // Skip code generation if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Abort code generation if it is an allocation node - if(N->isAllocationNode()) { - assert(false && "Allocation Node not expected in ApproxHPVM"); - return; - } - - // Generate code only if it has the right hint - if (!checkPreferredTarget(N, visc::CUDNN_TARGET)) { - errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; - return; - } - - // Get the function associated with the dataflow node - Function *F = N->getFuncPointer(); - errs()<<"function name = "<< F->getName()<<"\n"; - - /* Removing HPVM in/out/inout function attributes */ - for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ai++){ - Argument *Arg = &*ai; - if(Arg->hasAttribute(Attribute::In)) - Arg->removeAttr(Attribute::In); - if(Arg->hasAttribute(Attribute::Out)) - Arg->removeAttr(Attribute::Out); - if(Arg->hasAttribute(Attribute::InOut)) - Arg->removeAttr(Attribute::InOut); - } - - // Look up if we have visited this function before. If we have, then just - // get the cloned function pointer from DFNode. Otherwise, create the cloned - // function and add it to the DFNode GenFunc. - Function *F_cudnn = N->getGenFuncForTarget(visc::CUDNN_TARGET); - - assert((F_cudnn == NULL) && - "Error: Visiting a node for which code already generated"); - - // Clone the function - ValueToValueMapTy VMap; - std::string FName(F->getName().data()); - F_cudnn = CloneFunction(F, VMap); - F_cudnn->setName(FName + "_cudnn"); - errs()<<"Cloned function name2 = "<<F_cudnn->getName()<<"\n"; - F_cudnn->removeFromParent(); - M.getFunctionList().push_back(F_cudnn); - - N->addGenFunc(F_cudnn, visc::CUDNN_TARGET, true); - - // Adding nounwind to generated function : FIXME: needed? - DEBUG(errs() << "Adding nounwind to generated function\n"); - F_cudnn->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); - - // Add llvm_visc_requestTensor calls for every pointer argument of the function - // (they are all expected to be tensors), at the beginning of the function. - // This is the first instruction of the function, insert them before this - Instruction* FI = &*(F_cudnn->getEntryBlock().begin()); - - // In this backend, the target device is GPU, represented by i32 1. - ConstantInt *TargetDeviceID = - ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); - - for (Function::arg_iterator ai = F_cudnn->arg_begin(), - ae = F_cudnn->arg_end(); ai != ae; ++ai) { - Argument* Arg = &*ai; - if (Arg->getType()->isPointerTy()) { - Value *Args[] = {Arg, TargetDeviceID}; - CallInst::Create(hpvm_request_tensor, - ArrayRef<Value*>(Args, 2), - "", FI); - } - } - - std::vector<IntrinsicInst *> IItoRemove; - - for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e; ++i) { - Instruction *I = &(*i); - - if (BuildDFG::isViscIntrinsic(I)) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") - && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); - - /********************* Handle VISC Tensor intrinsics ********************/ - switch (II->getIntrinsicID()) { - - case Intrinsic::visc_tensor_convolution: - { /* llvm.hpvm.tensor.mul */ - // Tensor mul is not in place. - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n"); - - // Argument list for the runtime call - std::vector<Value*> Args; - Args.push_back(II->getOperand(0)); - Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); - Args.push_back(II->getOperand(3)); - Args.push_back(II->getOperand(4)); - Args.push_back(II->getOperand(5)); - - Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); - Constant* conv_precision = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); - - Args.push_back(conv_mode); - Args.push_back(conv_precision); - - // Create cudnn runtime function call - Constant* tensorConvolution; - DECLARE(tensorConvolution); - - CallInst* CI = CallInst::Create(tensorConvolution, - Args, "", II); - // We can replace the call to hpvm.tensor.mul with the runtime call - II->replaceAllUsesWith(CI); - - // Mark to remove at the end - IItoRemove.push_back(II); - } - break; - - case Intrinsic::visc_tensor_group_convolution: - { /* llvm.hpvm.tensor.mul */ - // Tensor mul is not in place. - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n"); - - // Argument list for the runtime call - std::vector<Value*> Args; - Args.push_back(II->getOperand(0)); - Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); - Args.push_back(II->getOperand(3)); - Args.push_back(II->getOperand(4)); - Args.push_back(II->getOperand(5)); - - Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); - - Args.push_back(conv_mode); - Args.push_back(II->getOperand(7)); - - // Create cudnn runtime function call - Constant* tensorConvolution; - DECLARE(tensorConvolution); - - CallInst* CI = CallInst::Create(tensorConvolution, - Args, "", II); - // We can replace the call to hpvm.tensor.mul with the runtime call - II->replaceAllUsesWith(CI); - - // Mark to remove at the end - IItoRemove.push_back(II); - } - break; - - case Intrinsic::visc_tensor_batchnorm: - { /* llvm.hpvm.tensor.batchnorm */ - // Tensor batchnorm is in place. - // FIXME: Add Check for InPlace Analysis - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor batch normalization \n"); - - // Argument list for the runtime call - std::vector<Value*> Args; - Args.push_back(II->getOperand(0)); - Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); - Args.push_back(II->getOperand(3)); - Args.push_back(II->getOperand(4)); - Args.push_back(II->getOperand(5)); - - // Create cudnn runtime function call - Constant* tensorBatchNorm; - DECLARE(tensorBatchNorm); - - CallInst* CI = CallInst::Create(tensorBatchNorm, - Args, "", II); - // We can replace the call to hpvm.tensor.batchnorm with the TensorRT call - II->replaceAllUsesWith(CI); - - // Mark to remove at the end - IItoRemove.push_back(II); - } - break; - - - case Intrinsic::visc_tensor_mul: - { /* llvm.hpvm.tensor.mul */ - // Tensor mul is not in place. - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor mul\n"); - - // Argument list for the runtime call - std::vector<Value*> Args; - Args.push_back(II->getOperand(0)); - Args.push_back(II->getOperand(1)); - - // Create cudnn runtime function call - Constant* tensorGemmGPU; - DECLARE(tensorGemmGPU); - - CallInst* CI = CallInst::Create(tensorGemmGPU, - Args, "", II); - // We can replace the call to hpvm.tensor.mul with the runtime call - II->replaceAllUsesWith(CI); - - // Mark to remove at the end - IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_tensor_add: - { /* llvm.hpvm.tensor.add */ - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor add\n"); - // Tensor add(a,b) is in place for argument a. - Value *Op = II->getOperand(0); - - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F_cudnn, N); - // Code generation cannot continue if this is false, because the target - // only provides an in place operation - - // FIXME: remove this comment - must check for in-place - //assert(inplace && - // "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call - std::vector<Value*> Args; - Args.push_back(II->getOperand(0)); - Args.push_back(II->getOperand(1)); - - // Create cudnn runtime function call - Constant* tensorAdd; - DECLARE(tensorAdd); - CallInst::Create(tensorAdd, Args, "", II); - // We can replace the call to hpvm.tensor.add with the 1st argument - // that, due to in place operation, now contains the result - II->replaceAllUsesWith(II->getOperand(0)); - - // Mark to remove at the end - IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_tensor_pool_max: - case Intrinsic::visc_tensor_pool_mean: - { /* llvm.visc.tensor.relu */ - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor_pool_max\n"); - - // Argument list - tensorPooling(input, poolFunction, window_height, window_width, vertical_pad, horizontal_pad, - // vertical_stride, horizontal_stride); - std::vector<Value*> Args; - Args.push_back(II->getOperand(0)); - - int pool_type = 0; - if (II->getIntrinsicID() == Intrinsic::visc_tensor_pool_max){ - pool_type = 0; - } - if (II->getIntrinsicID() == Intrinsic::visc_tensor_pool_mean){ - pool_type = 1; - } - - Constant* constPoolType = ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type); - Args.push_back(constPoolType); // ID for max pool. Min/Avg have different IDs (non-zero) - Args.push_back(II->getOperand(1)); - Args.push_back(II->getOperand(2)); - Args.push_back(II->getOperand(3)); - Args.push_back(II->getOperand(4)); - Args.push_back(II->getOperand(5)); - Args.push_back(II->getOperand(6)); - - // Create cudnn runtime function call - Constant* tensorPooling; - DECLARE(tensorPooling); - CallInst* CI = CallInst::Create(tensorPooling, Args, "", II); - - // Replacing intrinsic result uses with the result of the tensor runtime operation - II->replaceAllUsesWith(CI); - - // Mark to remove at the end - IItoRemove.push_back(II); - } - break; - - case Intrinsic::visc_tensor_relu: - case Intrinsic::visc_tensor_clipped_relu: - case Intrinsic::visc_tensor_tanh: - { /* llvm.visc.tensor.relu */ - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor activation functions \n"); - // Tensor relu(a) is in place for argument a. - Value *Op = II->getOperand(0); - - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F_cudnn, N); - // Code generation cannot continue if this is false, because the target - // only provides an in place operation - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call - std::vector<Value*> Args; - Args.push_back(II->getOperand(0)); - - if (II->getIntrinsicID() == Intrinsic::visc_tensor_relu){ - // Create cudnn runtime function call - Constant* tensorRelu; - DECLARE(tensorRelu); - CallInst::Create(tensorRelu, Args, "", II); - } - else if (II->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu){ - // Create cudnn runtime function call - //-- Constant* tensorClippedRelu; - Constant* tensorRelu2; - DECLARE(tensorRelu2); - CallInst::Create(tensorRelu2, Args, "", II); - } - else if (II->getIntrinsicID() == Intrinsic::visc_tensor_tanh){ - // Create cudnn runtime function call - Constant* tensorTanh; - errs()<<"tensorTanh Call = \n\n"; - DECLARE(tensorTanh); - //errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l"; - CallInst::Create(tensorTanh, Args, "", II); - } - - // We can replace the call to hpvm.tensor.relu with the 1st argument - // that, due to in place operation, now contains the result - II->replaceAllUsesWith(II->getOperand(0)); - - // Mark to remove at the end - IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_tensor_softmax: - { /* llvm.visc.tensor.softmax */ - DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor softmax\n"); - // Tensor relu(a) is in place for argument a. - Value *Op = II->getOperand(0); - - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F_cudnn, N); - // Code generation cannot continue if this is false, because the target - // only provides an in place operation - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call - std::vector<Value*> Args; - Args.push_back(II->getOperand(0)); - - // Create cudnn runtime function call - Constant* tensorSoftmax; - DECLARE(tensorSoftmax); - CallInst::Create(tensorSoftmax, Args, "", II); - // We can replace the call to hpvm.tensor.softmax with the 1st argument - // that, due to in place operation, now contains the result - II->replaceAllUsesWith(II->getOperand(0)); - - // Mark to remove at the end - IItoRemove.push_back(II); - } - break; - default: - llvm_unreachable("Unknown VISC Intrinsic!"); - break; - } - } - } - - //--- errs()<<"IIToRemove.size() = "<<IItoRemove.size()<<"\n\n"; - - // We need to do this explicitly: DCE pass may not remove them. - // Traverse the vector backwards, otherwise definitions are deleted while - // their subsequent uses are still around. - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); ri != re; ++ri) { - DEBUG(errs() << "Erasing: " << **ri << "\n"); - errs() << "Erasing: " << **ri << "\n"; - (*ri)->eraseFromParent(); - } - - return; -} - -bool DFG2LLVM_CUDNN::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_CUDNN PASS\n"; - - // Get the BuildDFG Analysis Results: - // - Dataflow graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - // Get the In Place Analysis Results - InPlaceDFGAnalysis::InPlaceDFGParameter IPP = - (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); - // Print results - printInPlaceDFGParameter(IPP); - - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - - // Visitor for Code Generation Graph Traversal - CGT_CUDNN *CGTVisitor = new CGT_CUDNN(M, DFG, IPP); - - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { - // Initiate code generation for root DFNode - CGTVisitor->visit(rootNode); - } - - //TODO: Edit module epilogue to remove the VISC intrinsic declarations - delete CGTVisitor; - - return true; -} - - -/****************************************************************************** - * Helper functions * - ******************************************************************************/ - - -} // End of namespace - -char DFG2LLVM_CUDNN::ID = 0; -static RegisterPass<DFG2LLVM_CUDNN> X("dfg2llvm-cudnn", - "Dataflow Graph to LLVM for CUDNN Pass", - false /* does not modify the CFG */, - true /* transformation, * - * not just analysis */); - diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt b/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt deleted file mode 100644 index 1579b2fc47..0000000000 --- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = DFG2LLVM_CUDNN -parent = Transforms diff --git a/llvm/lib/Transforms/DFG2LLVM_PROMISE/CMakeLists.txt b/llvm/lib/Transforms/DFG2LLVM_PROMISE/CMakeLists.txt deleted file mode 100644 index 5b5d2677d0..0000000000 --- a/llvm/lib/Transforms/DFG2LLVM_PROMISE/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( LLVMDFG2LLVM_PROMISE - DFG2LLVM_PROMISE.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp b/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp deleted file mode 100644 index 184f92910a..0000000000 --- a/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp +++ /dev/null @@ -1,1283 +0,0 @@ -//=== DFG2LLVM_PROMISE.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -#define ENABLE_ASSERTS - -#define DEBUG_TYPE "DFG2LLVM_PROMISE" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/IR/Attributes.h" -#include "llvm-c/Core.h" -#include "llvm/SupportVISC/VISCTimer.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" -#include <sstream> -#include <fstream> - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; - -namespace { - -cl::opt<std::string> QuantizationInputsFilename( - "quantization-levels-filename", - cl::desc("<PROMISE quantization levels input file (path)>"), - cl::value_desc("filename"), - cl::Required); - -// Helper class declarations - -// State machine definition for pattern identification - -/* An assumption is made for the PROMISE simulator: * - * a leaf node will contain consequtive operations that will map to a * - * single PROMISE simulator call * - - * To alleviate that, the states that correspond to valid patterns * - * - (FullyConnectedLayer_(2,3,x, ConvilutionLayer_(2,3,4,x)) * - * can invoke codeGen when detecting the beginning of a new pattern, then * - * clear the collected IIs and Args, then go to initial and invoke its * - * transition. */ - -class AbstractState; - -class CodeGenStateMachine { -private: - Module *M; - Module *RtM; - - std::ifstream &qin; // Quantization levels input stream reference - std::vector<Value*> Args; - std::vector<IntrinsicInst*> IIs; - AbstractState *current; - -public: - CodeGenStateMachine(Module *, Module *, std::ifstream &); - - void setCurrent(AbstractState *s) { - current = s; - } - - void transition(IntrinsicInst *II); - - Module *getModule() { - return M; - } - - void getNextQuantizationLevel(float &ql) { - qin >> ql; - } - - void addArgument(Value *Arg) { - Args.push_back(Arg); - } - - void addIntrinsicInst(IntrinsicInst *II) { - IIs.push_back(II); - } - - IntrinsicInst *getIntrinsicInstAt(unsigned idx) { - return IIs[idx]; - } - - void codeGen(); - -}; - -class AbstractState { -public: - enum ID - { - INITIAL_STATE, - FULLY_CONNECTED_LAYER_1, - FULLY_CONNECTED_LAYER_2, - FULLY_CONNECTED_LAYER_3, - FULLY_CONNECTED_LAYER, - CONVOLUTION_LAYER_1, - CONVOLUTION_LAYER_2, - CONVOLUTION_LAYER_3, - CONVOLUTION_LAYER_4, - CONVOLUTION_LAYER, - NO_PATTERN, - }; - -protected: - enum ID StateID; - -public: - enum ID getStateID() { - return StateID; - } - - virtual void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) = 0; - virtual ~AbstractState() {} -}; - -class InitialState : public AbstractState { -public: - InitialState() { - StateID = ID::INITIAL_STATE; - DEBUG(errs() << "new InitialState\n"); - } - ~InitialState() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer_1 : public AbstractState { -public: - FullyConnectedLayer_1() { - StateID = ID::FULLY_CONNECTED_LAYER_1; - DEBUG(errs() << "new FullyConnectedLayer_1\n"); - } - ~FullyConnectedLayer_1() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer_2 : public AbstractState { -public: - FullyConnectedLayer_2() { - StateID = ID::FULLY_CONNECTED_LAYER_2; - DEBUG(errs() << "new FullyConnectedLayer_2\n"); - } - ~FullyConnectedLayer_2() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer_3 : public AbstractState { -public: - FullyConnectedLayer_3() { - StateID = ID::FULLY_CONNECTED_LAYER_3; - DEBUG(errs() << "new FullyConnectedLayer_3\n"); - } - ~FullyConnectedLayer_3() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer : public AbstractState { -public: - FullyConnectedLayer() { - StateID = ID::FULLY_CONNECTED_LAYER; - DEBUG(errs() << "new FullyConnectedLayer\n"); - } - ~FullyConnectedLayer() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_1 : public AbstractState { -public: - ConvolutionLayer_1() { - StateID = ID::CONVOLUTION_LAYER_1; - DEBUG(errs() << "new ConvolutionLayer_1\n"); - } - ~ConvolutionLayer_1() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_2 : public AbstractState { -public: - ConvolutionLayer_2() { - StateID = ID::CONVOLUTION_LAYER_2; - DEBUG(errs() << "new ConvolutionLayer_2\n"); - } - ~ConvolutionLayer_2() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_3 : public AbstractState { -public: - ConvolutionLayer_3() { - StateID = ID::CONVOLUTION_LAYER_3; - DEBUG(errs() << "new ConvolutionLayer_3\n"); - } - ~ConvolutionLayer_3() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_4 : public AbstractState { -public: - ConvolutionLayer_4() { - StateID = ID::CONVOLUTION_LAYER_4; - DEBUG(errs() << "new ConvolutionLayer_4\n"); - } - ~ConvolutionLayer_4() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer : public AbstractState { -public: - ConvolutionLayer() { - StateID = ID::CONVOLUTION_LAYER; - DEBUG(errs() << "new ConvolutionLayer\n"); - } - ~ConvolutionLayer() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class NoPattern : public AbstractState { -public: - NoPattern() { - StateID = ID::NO_PATTERN; - DEBUG(errs() << "new NoPattern\n"); - } - ~NoPattern() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_convolution: - { - Mch->addIntrinsicInst(II); - Mch->addArgument(II->getOperand(0)); // conv input - - // Read quantization levels for input - float i_min, i_max; - Mch->getNextQuantizationLevel(i_min); - Mch->getNextQuantizationLevel(i_max); - errs() << "i_min: " << i_min << "\n"; - errs() << "i_max: " << i_max << "\n"; - - // Create associated arguments for the quantization levels - Constant *IminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) i_min); -// errs() << "IminC : " -// << dyn_cast<ConstantFP>(IminC)->getValueAPF().convertToFloat() -// << "\n"; - Mch->addArgument(IminC); - Constant *ImaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) i_max); - Mch->addArgument(ImaxC); - - Mch->addArgument(II->getOperand(1)); // conv kernel - - // Read quantization levels for filter - float w_min, w_max; - Mch->getNextQuantizationLevel(w_min); - Mch->getNextQuantizationLevel(w_max); - errs() << "w_min: " << w_min << "\n"; - errs() << "w_max: " << w_max << "\n"; - Constant *WminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) w_min); - Mch->addArgument(WminC); - Constant *WmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) w_max); - Mch->addArgument(WmaxC); - - Mch->setCurrent(new ConvolutionLayer_1()); - } - break; - case Intrinsic::visc_tensor_mul: - { - Mch->addIntrinsicInst(II); - Mch->addArgument(II->getOperand(0)); // 1st gemm input - - // Read quantization levels for input - float i_min, i_max; - Mch->getNextQuantizationLevel(i_min); - Mch->getNextQuantizationLevel(i_max); - errs() << "i_min: " << i_min << "\n"; - errs() << "i_max: " << i_max << "\n"; - - Constant *IminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) i_min); - Mch->addArgument(IminC); - Constant *ImaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) i_max); - Mch->addArgument(ImaxC); - - Mch->addArgument(II->getOperand(1)); // 2nd gemm input - - // Read quantization levels for weight - float w_min, w_max; - Mch->getNextQuantizationLevel(w_min); - Mch->getNextQuantizationLevel(w_max); - errs() << "w_min: " << w_min << "\n"; - errs() << "w_max: " << w_max << "\n"; - - Constant *WminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) w_min); - Mch->addArgument(WminC); - Constant *WmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) w_max); - Mch->addArgument(WmaxC); - - Mch->setCurrent(new FullyConnectedLayer_1()); - } - break; - default: // Other HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - delete this; - } // else {} // No HPVM intrinsic received. Remain at initial -} - -void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_add: - { - IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0); - assert((MulII == II->getOperand(0)) && - "Output of mul must be used as 1st operand of add"); - Mch->addIntrinsicInst(II); - - Mch->addArgument(II->getOperand(1)); // bias - - // Read quantization levels for input - float b_min, b_max; - Mch->getNextQuantizationLevel(b_min); - Mch->getNextQuantizationLevel(b_max); - errs() << "b_min: " << b_min << "\n"; - errs() << "b_max: " << b_max << "\n"; - - Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_min); - Mch->addArgument(BminC); - Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_max); - Mch->addArgument(BmaxC); - - Mch->setCurrent(new FullyConnectedLayer_2()); - } - break; - default: - Mch->setCurrent(new NoPattern()); - break; - } - } else { - Mch->setCurrent(new NoPattern()); - } - delete this; -} - -void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_tanh: - { - // Type of activation : TanH - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - } - break; - case Intrinsic::visc_tensor_relu: - { - // Type of activation : ReLU - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - } - break; - case Intrinsic::visc_tensor_clipped_relu: - { - // Type of activation : Clipped ReLU - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - } - break; - default: // No activation, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - } else { // End of instruction stream - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new FullyConnectedLayer()); - } - delete this; -} - -void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (!II) { // End of instruction stream - Mch->setCurrent(new FullyConnectedLayer()); - } else { - Mch->setCurrent(new NoPattern()); - } - delete this; -} - -void FullyConnectedLayer::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - Mch->setCurrent(new NoPattern()); - delete this; - } -} - -void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_add: - { - IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); - assert((ConvII == II->getOperand(0)) && - "Output of conv must be used as 1st operand of add"); - Mch->addIntrinsicInst(II); - - Mch->addArgument(II->getOperand(1)); // bias - // Read quantization levels for bias - float b_min, b_max; - Mch->getNextQuantizationLevel(b_min); - Mch->getNextQuantizationLevel(b_max); - errs() << "b_min: " << b_min << "\n"; - errs() << "b_max: " << b_max << "\n"; - - Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_min); - Mch->addArgument(BminC); - Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_max); - Mch->addArgument(BmaxC); - - Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv - Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv - Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv - Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv - - Mch->setCurrent(new ConvolutionLayer_2()); - } - break; - default: - Mch->setCurrent(new NoPattern()); - break; - } - } else { - // No addition - Mch->addArgument(ConstantPointerNull::get( - Type::getInt8PtrTy(Mch->getModule()->getContext()))); - // Still need to add the quantization constants - and remove them from file - float b_min, b_max; - Mch->getNextQuantizationLevel(b_min); - Mch->getNextQuantizationLevel(b_max); - errs() << "b_min: " << b_min << "\n"; - errs() << "b_max: " << b_max << "\n"; - Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_min); - Mch->addArgument(BminC); - Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) b_max); - Mch->addArgument(BmaxC); - - // Zero for all convolution numeric arguments FIXME??? - IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); - Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv - Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv - Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv - Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - - // No pooling - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // 0 for unused pool argument - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer()); - } - delete this; -} - -void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_tanh: - { - // Type of activation : TanH -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - } - break; - case Intrinsic::visc_tensor_relu: - { - // Type of activation : ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - } - break; - case Intrinsic::visc_tensor_clipped_relu: - { - // Type of activation : Clipped ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - } - break; - case Intrinsic::visc_tensor_pool_max: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // poolSize - Mch->addArgument(II->getOperand(1)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_min: - { - // pool min FIXME: 2: supported? - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - // poolSize - Mch->addArgument(II->getOperand(1)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_mean: - { - // pool mean - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - // poolSize - Mch->addArgument(II->getOperand(1)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - default: // No activation, No pooling, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - } else { // End of instruction stream - // No pooling - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // 0 for unused pool argument - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer()); - } - delete this; -} - -void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_pool_max: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // poolSize - Mch->addArgument(II->getOperand(1)); - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_min: - { - // pool min FIXME: 2: supported? - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - // poolSize - Mch->addArgument(II->getOperand(1)); - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_mean: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - // poolSize - Mch->addArgument(II->getOperand(1)); - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - default: // No pooling, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - } else { // End of instruction stream - // No pooling - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // 0 for unused pool argument - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - // Read quantization levels for output - float out_min, out_max; - Mch->getNextQuantizationLevel(out_min); - Mch->getNextQuantizationLevel(out_max); - errs() << "out_min: " << out_min << "\n"; - errs() << "out_max: " << out_max << "\n"; - - Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_min); - Mch->addArgument(OutminC); - Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()), - (double) out_max); - Mch->addArgument(OutmaxC); - - Mch->setCurrent(new ConvolutionLayer()); - } - delete this; -} - -void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (!II) { // End of instruction stream - Mch->setCurrent(new ConvolutionLayer()); - } else { - Mch->setCurrent(new NoPattern()); - } - delete this; -} - -void ConvolutionLayer::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - Mch->setCurrent(new NoPattern()); - delete this; - } -} - -void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {} - -CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM, std::ifstream &_qin) : - M(_M), RtM(_RtM), qin(_qin) { - current = new InitialState(); -} - -void CodeGenStateMachine::transition(IntrinsicInst *II) { - current->transition(this, II); -} - -void CodeGenStateMachine::codeGen() { - - if ((current->getStateID() != AbstractState::ID::FULLY_CONNECTED_LAYER) && - (current->getStateID() != AbstractState::ID::CONVOLUTION_LAYER)) { - // Not a valid instruction sequence. - assert(false && "Unsupported instruction sequence by PROMISE simulator\n"); - } - - // We have a valid instruction sequence. - // Make sure that the instruction sequence can be traslated: - // each instruction's result must be used only by the next one in sequence. - for (unsigned p = 0; p < IIs.size()-1; p++) { - IntrinsicInst *II = IIs[p]; - assert((II->hasOneUse()) && - "Instruction sequence does not fit expected pattern: not single use\n"); - - Value::user_iterator ui = II->user_begin(); // The only use - assert((*ui == IIs[p+1]) && - "Instruction sequence does not fit expected pattern: not used by next instruction\n"); - } - - // Create corresponding PROMISE simulator call - CallInst *CI; - switch (current->getStateID()) { - case AbstractState::ID::CONVOLUTION_LAYER: - { - Constant* ConvLayer_PROMISE = - M->getOrInsertFunction(StringRef("ConvLayer_PROMISE"), - RtM->getFunction(StringRef("ConvLayer_PROMISE"))->getFunctionType()); - DEBUG(errs() << *ConvLayer_PROMISE); - - // FIXME: get last argument from some intrinsic. For now, 7 - Args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 7)); - // Create PROMISE simulator function call - CI = CallInst::Create(ConvLayer_PROMISE, Args, ""); - } - break; - case AbstractState::ID::FULLY_CONNECTED_LAYER: - { - Constant* FCLayer_PROMISE = - M->getOrInsertFunction(StringRef("FCLayer_PROMISE"), - RtM->getFunction(StringRef("FCLayer_PROMISE"))->getFunctionType()); - DEBUG(errs() << *FCLayer_PROMISE); - - // FIXME: get last argument from some intrinsic. For now, 7 - Args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 7)); - // Create PROMISE simulator function call - CI = CallInst::Create(FCLayer_PROMISE, Args, ""); - } - break; - default: - llvm_unreachable("Unexpected CodeGenStateMachine State\n"); - break; - } - - // Insert new call and replace all uses of pattern result with - // the PROMISE simulator call - IntrinsicInst *IIlast = *(IIs.rbegin()); - CI->insertBefore(IIlast); - IIlast->replaceAllUsesWith(CI); - - // Remove the instructions we translated to the simulator call. - // Traverse the vector backwards, otherwise definitions are deleted while - // their subsequent uses are still around. - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs.rbegin(), - re = IIs.rend(); ri != re; ++ri) { - DEBUG(errs() << "Erasing: " << **ri << "\n"); - (*ri)->eraseFromParent(); - } -errs() << "****** GenF:\n" << *(CI->getParent()->getParent()); - -} - -// DFG2LLVM_PROMISE - The first implementation. - -struct DFG2LLVM_PROMISE : public DFG2LLVM { - static char ID; // Pass identification, replacement for typeid - DFG2LLVM_PROMISE() : DFG2LLVM(ID) {} -private: - -public: - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addPreserved<BuildDFG>(); - } - - bool runOnModule(Module &M); -}; - -// Visitor for Code generation traversal (tree traversal for now) -class CGT_PROMISE : public CodeGenTraversal { - -private: - //Member variables - std::ifstream qin; - - // VISC Runtime API and Tensor runtime API - Constant* llvm_hpvm_initTensorRt; - Constant* llvm_hpvm_cleanupTensorRt; - Constant* hpvm_request_tensor; - - // Functions - - // Virtual Functions - void init(); - void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - -public: - - // Constructor - CGT_PROMISE(Module &_M, BuildDFG &_DFG, std::string &_str) : CodeGenTraversal(_M, _DFG) { - qin.open(_str.c_str()); - assert(qin && "Failed to open quantization levels input file\n"); - initRuntimeAPI(); - } - - ~CGT_PROMISE() { - qin.close(); - } - -}; - -void CGT_PROMISE::init() { - // FIXME: what to do here? If anything? -} - -// Initialize the VISC runtime API. This makes it easier to insert these calls -void CGT_PROMISE::initRuntimeAPI() { - - // Load Runtime API Module - SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n"); - - // FIXME: set correct path - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; - runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == nullptr) - DEBUG(errs() << Err.getMessage()); - else - DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); - - // Get or insert Global declarations for - // - initialization - // - cleanup - // - request a tensor - DECLARE(llvm_hpvm_initTensorRt); - DECLARE(llvm_hpvm_cleanupTensorRt); - DECLARE(hpvm_request_tensor); - - // Find visc.init and visc.cleanup calls, and add placeholder methods - // for initialization and cleanup of the hpvm tensor runtime - - Function* VI = M.getFunction("llvm.visc.init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n"); - InitCall = cast<Instruction>(*VI->user_begin()); - CallInst::Create(llvm_hpvm_initTensorRt, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), - "", InitCall); - - Function* VC = M.getFunction("llvm.visc.cleanup"); - assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n"); - CleanupCall = cast<Instruction>(*VC->user_begin()); - CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall); - -} - -void CGT_PROMISE::codeGen(DFInternalNode* N) { - errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - errs () << "Skipping internal node\n"; -} - -void CGT_PROMISE::codeGen(DFLeafNode* N) { - - // Skip code generation if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Abort code generation if it is an allocation node - if(N->isAllocationNode()) { - assert(false && "Allocation Node not expected in ApproxHPVM"); - return; - } - - // Generate code only if it has the right hint - if (!checkPreferredTarget(N, visc::PROMISE_TARGET)) { - errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; - return; - } - - // Get the function associated with the dataflow node - Function *F = N->getFuncPointer(); -errs() << "Node Function: " << *F << "\n"; - // Look up if we have visited this function before. If we have, then just - // get the cloned function pointer from DFNode. Otherwise, create the cloned - // function and add it to the DFNode GenFunc. - Function *F_promise = N->getGenFuncForTarget(visc::PROMISE_TARGET); - - assert((F_promise == NULL) && - "Error: Visiting a node for which code already generated"); - - // Clone the function - ValueToValueMapTy VMap; - std::string FName(F->getName().data());//Twine FName = F->getName(); - F_promise = CloneFunction(F, VMap); - F_promise->setName(FName+"_promise"); - F_promise->removeFromParent(); - M.getFunctionList().push_back(F_promise); - - N->addGenFunc(F_promise, visc::PROMISE_TARGET, true); - - /* Removing HPVM in/out/inout function attributes */ - for(Function::arg_iterator ai = F_promise->arg_begin(), ae = F_promise->arg_end(); - ai != ae; ai++){ - Argument *Arg = &*ai; - if(Arg->hasAttribute(Attribute::In)) - Arg->removeAttr(Attribute::In); - if(Arg->hasAttribute(Attribute::Out)) - Arg->removeAttr(Attribute::Out); - if(Arg->hasAttribute(Attribute::InOut)) - Arg->removeAttr(Attribute::InOut); - } - - // Adding nounwind to generated function : FIXME: needed? - DEBUG(errs() << "Adding nounwind to generated function\n"); - F_promise->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); - - // Add llvm_visc_requestTensor calls for every pointer argument of the function - // (they are all expected to be tensors), at the beginning of the function. - // This is the first instruction of the function, insert them before this - Instruction* FI = &*(F_promise->getEntryBlock().begin()); - - // FIXME: verify that we want 0 as a target device - // In this backend, the target device is CPU, represented by i32 0. - ConstantInt *TargetDeviceID = - ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); - - for (Function::arg_iterator ai = F_promise->arg_begin(), - ae = F_promise->arg_end(); ai != ae; ++ai) { - Argument* Arg = &*ai; - if (Arg->getType()->isPointerTy()) { - Value *Args[] = {Arg, TargetDeviceID}; - CallInst::Create(hpvm_request_tensor, - ArrayRef<Value*>(Args, 2), - "", FI); - } - } - - CodeGenStateMachine CGM(&M, runtimeModule.get(), qin); - - /* An assumption is made for the PROMISE simulator: * - * a leaf node will contain consequtive operations that will map to a * - * single PROMISE simulator call */ - - for (inst_iterator i = inst_begin(F_promise), e = inst_end(F_promise); - i != e; ++i) { - Instruction *I = &(*i); - CGM.transition(dyn_cast<IntrinsicInst>(I)); - } - - CGM.codeGen(); - -//errs() << "-----------------------------------\n"; -//errs() << *F_promise << "\n"; - - return; -} - -bool DFG2LLVM_PROMISE::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_PROMISE PASS\n"; - - errs() << QuantizationInputsFilename << "\n"; - -// std::ifstream qin(quantizationInputsFilename_cstr); -// std::ifstream qin; -// qin.open(QuantizationInputsFilename.c_str()); -// qin.open(QuantizationInputsFilename.c_str(), std::ifstream::in); - - // Get the BuildDFG Analysis Results: - // - Dataflow graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - - // Visitor for Code Generation Graph Traversal - CGT_PROMISE *CGTVisitor = new CGT_PROMISE(M, DFG, QuantizationInputsFilename); - - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { - // Initiate code generation for root DFNode - CGTVisitor->visit(rootNode); - } - - //TODO: Edit module epilogue to remove the VISC intrinsic declarations - delete CGTVisitor; - - return true; -} - - -/****************************************************************************** - * Helper functions * - ******************************************************************************/ - -} // End of namespace - -char DFG2LLVM_PROMISE::ID = 0; -static RegisterPass<DFG2LLVM_PROMISE> X("dfg2llvm-promise", - "Dataflow Graph to LLVM for PROMISE Pass", - false /* does not modify the CFG */, - true /* transformation, * - * not just analysis */); - diff --git a/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports b/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/llvm/lib/Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt b/llvm/lib/Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt deleted file mode 100644 index 714ad14f18..0000000000 --- a/llvm/lib/Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = DFG2LLVM_PROMISE -parent = Transforms diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt deleted file mode 100644 index 22c219d0a1..0000000000 --- a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( LLVMDFG2LLVM_WrapperAPI - DFG2LLVM_WrapperAPI.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp deleted file mode 100644 index c54dd9ef3b..0000000000 --- a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp +++ /dev/null @@ -1,1530 +0,0 @@ -//=== DFG2LLVM_WrapperAPI.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -#define ENABLE_ASSERTS - -#define DEBUG_TYPE "DFG2LLVM_WrapperAPI" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/IRBuilder.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/IR/InstIterator.h" -#include "llvm/Transforms/Utils/ValueMapper.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Cloning.h" -#include "llvm/IRReader/IRReader.h" -#include "llvm/Linker/Linker.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/IR/Attributes.h" -#include "llvm-c/Core.h" -#include "llvm/SupportVISC/VISCTimer.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" -#include <sstream> -#include <fstream> - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; - -using namespace inplacedfg; - -namespace { - -cl::opt<std::string> QuantizationInputsFilename( - "quantization-levels-filename", - cl::desc("<PROMISE quantization levels input file (path)>"), - cl::value_desc("filename"), - cl::Required); - -cl::opt<std::string> ConfigurationInputsFilename( - "configuration-inputs-filename", - cl::desc("<Autotuner configurations input file (path)>"), - cl::value_desc("filename"), - cl::Required); - -// Helper function declarations -bool isValidOperandForInPlaceOperation(Value *, Function *, DFNode *, - InPlaceDFGAnalysis::InPlaceDFGParameter &); - -// Helper class declarations - -// State machine definition for pattern identification - -/* An assumption is made for the Wrapper API input: * - * a leaf node will contain consequtive operations that will map to a * - * single convolution or fully connected layer, or a single tensor operation. * - - * FullyConnectedLayer: Multiply, Add, [Activation] * - * ConvolutionLayer: Convolution, [Add], [Activation], [Pooling] */ - -class AbstractState; - -class CodeGenStateMachine { -private: - Module *M; - Module *RtM; - - std::vector<Value*> Args; - std::vector<IntrinsicInst*> IIs; - AbstractState *current; - -public: - CodeGenStateMachine(Module *, Module *); - - void setCurrent(AbstractState *s) { - current = s; - } - - void transition(IntrinsicInst *II); - - Module *getModule() { - return M; - } - - void addArgument(Value *Arg) { - Args.push_back(Arg); - } - - void addIntrinsicInst(IntrinsicInst *II) { - IIs.push_back(II); - } - - IntrinsicInst *getIntrinsicInstAt(unsigned idx) { - return IIs[idx]; - } - - void codeGen(DFNode *, Function * , const StringRef &, - InPlaceDFGAnalysis::InPlaceDFGParameter &); - -}; - -class AbstractState { -public: - enum ID - { - INITIAL_STATE, - FULLY_CONNECTED_LAYER_1, - FULLY_CONNECTED_LAYER_2, - FULLY_CONNECTED_LAYER_3, - FULLY_CONNECTED_LAYER, - CONVOLUTION_LAYER_1, - CONVOLUTION_LAYER_2, - CONVOLUTION_LAYER_3, - CONVOLUTION_LAYER_4, - CONVOLUTION_LAYER, - SINGLE_TENSOR_OPERATION, - NO_PATTERN, - }; - -protected: - enum ID StateID; - -public: - enum ID getStateID() { - return StateID; - } - - virtual void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) = 0; - virtual ~AbstractState() {} -}; - -class InitialState : public AbstractState { -public: - InitialState() { - StateID = ID::INITIAL_STATE; - DEBUG(errs() << "new InitialState\n"); - } - ~InitialState() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer_1 : public AbstractState { -public: - FullyConnectedLayer_1() { - StateID = ID::FULLY_CONNECTED_LAYER_1; - DEBUG(errs() << "new FullyConnectedLayer_1\n"); - } - ~FullyConnectedLayer_1() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer_2 : public AbstractState { -public: - FullyConnectedLayer_2() { - StateID = ID::FULLY_CONNECTED_LAYER_2; - DEBUG(errs() << "new FullyConnectedLayer_2\n"); - } - ~FullyConnectedLayer_2() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer_3 : public AbstractState { -public: - FullyConnectedLayer_3() { - StateID = ID::FULLY_CONNECTED_LAYER_3; - DEBUG(errs() << "new FullyConnectedLayer_3\n"); - } - ~FullyConnectedLayer_3() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class FullyConnectedLayer : public AbstractState { -public: - FullyConnectedLayer() { - StateID = ID::FULLY_CONNECTED_LAYER; - DEBUG(errs() << "new FullyConnectedLayer\n"); - } - ~FullyConnectedLayer() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_1 : public AbstractState { -public: - ConvolutionLayer_1() { - StateID = ID::CONVOLUTION_LAYER_1; - DEBUG(errs() << "new ConvolutionLayer_1\n"); - } - ~ConvolutionLayer_1() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_2 : public AbstractState { -public: - ConvolutionLayer_2() { - StateID = ID::CONVOLUTION_LAYER_2; - DEBUG(errs() << "new ConvolutionLayer_2\n"); - } - ~ConvolutionLayer_2() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_3 : public AbstractState { -public: - ConvolutionLayer_3() { - StateID = ID::CONVOLUTION_LAYER_3; - DEBUG(errs() << "new ConvolutionLayer_3\n"); - } - ~ConvolutionLayer_3() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer_4 : public AbstractState { -public: - ConvolutionLayer_4() { - StateID = ID::CONVOLUTION_LAYER_4; - DEBUG(errs() << "new ConvolutionLayer_4\n"); - } - ~ConvolutionLayer_4() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class ConvolutionLayer : public AbstractState { -public: - ConvolutionLayer() { - StateID = ID::CONVOLUTION_LAYER; - DEBUG(errs() << "new ConvolutionLayer\n"); - } - ~ConvolutionLayer() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class SingleTensorOperation : public AbstractState { -public: - SingleTensorOperation() { - StateID = ID::SINGLE_TENSOR_OPERATION; - DEBUG(errs() << "new SingleTensorOperation\n"); - } - ~SingleTensorOperation() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -class NoPattern : public AbstractState { -public: - NoPattern() { - StateID = ID::NO_PATTERN; - DEBUG(errs() << "new NoPattern\n"); - } - ~NoPattern() {} - - void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; -}; - -void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_convolution: - { - Mch->addIntrinsicInst(II); - Mch->addArgument(II->getOperand(0)); // conv input - Mch->addArgument(II->getOperand(1)); // conv kernel - - Mch->setCurrent(new ConvolutionLayer_1()); - } - break; - case Intrinsic::visc_tensor_mul: - { - Mch->addIntrinsicInst(II); - Mch->addArgument(II->getOperand(0)); // 1st gemm input - Mch->addArgument(II->getOperand(1)); // 2nd gemm input - - Mch->setCurrent(new FullyConnectedLayer_1()); - } - break; - default: // Other HPVM intrinsic - { - Mch->addIntrinsicInst(II); - Mch->setCurrent(new SingleTensorOperation()); - } - break; - } - delete this; - } // else {} // No HPVM intrinsic received. Remain at initial -} - -void SingleTensorOperation::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - Mch->setCurrent(new NoPattern()); - delete this; - } -} - -void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_add: - { - IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0); - assert((MulII == II->getOperand(0)) && - "Output of mul must be used as 1st operand of add"); - Mch->addIntrinsicInst(II); - - Mch->addArgument(II->getOperand(1)); // bias - - Mch->setCurrent(new FullyConnectedLayer_2()); - } - break; - default: - Mch->setCurrent(new NoPattern()); - break; - } - } else { - Mch->setCurrent(new NoPattern()); - } - delete this; -} - -void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_tanh: - { - // Type of activation : TanH - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - } - break; - case Intrinsic::visc_tensor_relu: - { - // Type of activation : ReLU - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - } - break; - case Intrinsic::visc_tensor_clipped_relu: - { - // Type of activation : Clipped ReLU - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new FullyConnectedLayer_3()); - } - break; - default: // No activation, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - } else { // End of instruction stream - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - - Mch->setCurrent(new FullyConnectedLayer()); - } - delete this; -} - -void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (!II) { // End of instruction stream - Mch->setCurrent(new FullyConnectedLayer()); - } else { - Mch->setCurrent(new NoPattern()); - } - delete this; -} - -void FullyConnectedLayer::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - Mch->setCurrent(new NoPattern()); - delete this; - } -} - -void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_add: - { - IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); - assert((ConvII == II->getOperand(0)) && - "Output of conv must be used as 1st operand of add"); - Mch->addIntrinsicInst(II); - - Mch->addArgument(II->getOperand(1)); // bias - - Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv - Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv - Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv - Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv - - Mch->setCurrent(new ConvolutionLayer_2()); - } - break; - default: - Mch->setCurrent(new NoPattern()); - break; - } - } else { - // No addition - Mch->addArgument(ConstantPointerNull::get( - Type::getInt8PtrTy(Mch->getModule()->getContext()))); - - // Zero for all convolution numeric arguments FIXME??? - IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0); - Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv - Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv - Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv - Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - - // No pooling - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // 0 for unused pool argument - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - - Mch->setCurrent(new ConvolutionLayer()); - } - delete this; -} - -void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_tanh: - { - // Type of activation : TanH -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - } - break; - case Intrinsic::visc_tensor_relu: - { - // Type of activation : ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - } - break; - case Intrinsic::visc_tensor_clipped_relu: - { - // Type of activation : Clipped ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_3()); - } - break; - case Intrinsic::visc_tensor_pool_max: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // poolSize - Mch->addArgument(II->getOperand(1)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_min: - { - // pool min FIXME: 2: supported? - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - // poolSize - Mch->addArgument(II->getOperand(1)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_mean: - { - // pool mean - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - // poolSize - Mch->addArgument(II->getOperand(1)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - Mch->addIntrinsicInst(II); - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - default: // No activation, No pooling, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - } else { // End of instruction stream - // No pooling - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // 0 for unused pool argument - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // No activation - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), -1)); - - Mch->setCurrent(new ConvolutionLayer()); - } - delete this; -} - -void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - switch (II->getIntrinsicID()) { - case Intrinsic::visc_tensor_pool_max: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // poolSize - Mch->addArgument(II->getOperand(1)); - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_min: - { - // pool min FIXME: 2: supported? - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - // poolSize - Mch->addArgument(II->getOperand(1)); - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - case Intrinsic::visc_tensor_pool_mean: - { - // pool max - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - // poolSize - Mch->addArgument(II->getOperand(1)); - Mch->addIntrinsicInst(II); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - Mch->setCurrent(new ConvolutionLayer_4()); - } - break; - default: // No pooling, but HPVM intrinsic - Mch->setCurrent(new NoPattern()); - break; - } - } else { // End of instruction stream - // No pooling - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - // 0 for unused pool argument - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - - // Revisit last intrinsic, to add argument for activation operation - IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2); - // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU - Intrinsic::ID ActIID = ActII->getIntrinsicID(); - if (ActIID == Intrinsic::visc_tensor_tanh) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 0)); - } else if (ActIID == Intrinsic::visc_tensor_relu) { - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 1)); - } else { //ActIID == Intrinsic::visc_tensor_clipped_relu - Mch->addArgument(ConstantInt::get( - Type::getInt32Ty(Mch->getModule()->getContext()), 2)); - } - - Mch->setCurrent(new ConvolutionLayer()); - } - delete this; -} - -void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (!II) { // End of instruction stream - Mch->setCurrent(new ConvolutionLayer()); - } else { - Mch->setCurrent(new NoPattern()); - } - delete this; -} - -void ConvolutionLayer::transition(CodeGenStateMachine *Mch, - IntrinsicInst *II) { - if (II) { // Not end of instruction stream - Mch->setCurrent(new NoPattern()); - delete this; - } -} - -void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {} - -CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM) : - M(_M), RtM(_RtM) { - current = new InitialState(); -} - -void CodeGenStateMachine::transition(IntrinsicInst *II) { - current->transition(this, II); -} - -void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRef, - InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) { - - assert( ( (current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || - (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER) || - (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION) ) && - "Unsupported instruction sequence for the Wrapper API.\n" ); - - if ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || - (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)) { - // Layer Operation. - DEBUG(errs() << "Layer Instruction Sequence. Validating ...\n"); - // We have a valid instruction sequence. - // Make sure that the instruction sequence can be traslated: - // each instruction's result must be used only by the next one in sequence. - - for (unsigned p = 0; p < IIs.size()-1; p++) { - IntrinsicInst *II = IIs[p]; - assert((II->hasOneUse()) && - "Instruction sequence does not fit pattern: not single use\n"); - - Value::user_iterator ui = II->user_begin(); // The only use - assert((*ui == IIs[p+1]) && - "Instruction sequence does not fit pattern: not used by next instruction\n"); - } - - // Create corresponding wrapper API call - CallInst *CI; - switch (current->getStateID()) { - case AbstractState::ID::CONVOLUTION_LAYER: - { - Constant* wrapper_ConvLayer = - M->getOrInsertFunction(StringRef("wrapper_ConvLayer"), - RtM->getFunction(StringRef("wrapper_ConvLayer"))->getFunctionType()); - DEBUG(errs() << *wrapper_ConvLayer); - - // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0 - Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); - Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); - - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - std::vector<Value*> UpdatedArgs; - UpdatedArgs.push_back(GEPConst); - for (unsigned i = 0; i < Args.size(); i++) { - UpdatedArgs.push_back(Args[i]); - } - // Create wrapper API function call - CI = CallInst::Create(wrapper_ConvLayer, UpdatedArgs, ""); - } - break; - case AbstractState::ID::FULLY_CONNECTED_LAYER: - { - Constant* wrapper_FCLayer = - M->getOrInsertFunction(StringRef("wrapper_FCLayer"), - RtM->getFunction(StringRef("wrapper_FCLayer"))->getFunctionType()); - DEBUG(errs() << *wrapper_FCLayer); - - // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0 - Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); - Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0)); - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - std::vector<Value*> UpdatedArgs; - UpdatedArgs.push_back(GEPConst); - for (unsigned i = 0; i < Args.size(); i++) { - UpdatedArgs.push_back(Args[i]); - } - - // Create wrapper API function call - CI = CallInst::Create(wrapper_FCLayer, UpdatedArgs, ""); - } - break; - default: - llvm_unreachable("Unexpected CodeGenStateMachine State\n"); - break; - } - - // Insert new call and replace all uses of pattern result with - // the wrapper API call - IntrinsicInst *IIlast = *(IIs.rbegin()); - CI->insertBefore(IIlast); - IIlast->replaceAllUsesWith(CI); - - } else { // SINGLE_TENSOR_OPERATION - assert((IIs.size() == 1) && - "Unexpected size of intrinsics vector in code gen state machine.\n"); - assert(Args.empty() && "Unexpected arguments found in coge gen state machine.\n"); - IntrinsicInst *TensorII = IIs[0]; -errs() << "TensorII: " << *TensorII << "\n"; - - switch (TensorII->getIntrinsicID()) { - case Intrinsic::visc_tensor_group_convolution: - { /* llvm.hpvm.tensor.group.conv */ - // Tensor group conv is not in place. - DEBUG(errs() << F->getName() << "\t: Handling tensor group convolution \n"); - - // Argument list for the runtime call - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - Args.push_back(TensorII->getOperand(1)); - Args.push_back(TensorII->getOperand(2)); - Args.push_back(TensorII->getOperand(3)); - Args.push_back(TensorII->getOperand(4)); - Args.push_back(TensorII->getOperand(5)); - - Constant *conv_mode = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); - Args.push_back(conv_mode); - - Args.push_back(TensorII->getOperand(7)); - - // Create wrapper API runtime function call - Constant* wrapper_tensorGroupConvolution = - M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"), - RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType()); - CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution, - Args, "", TensorII); - // We can replace the call to hpvm.tensor.mul with the runtime call - TensorII->replaceAllUsesWith(CI); - } - break; - - case Intrinsic::visc_tensor_batchnorm: - { /* llvm.hpvm.tensor.batchnorm */ - // Tensor batchnorm is not in place. - // FIXME: Add Check for InPlace Analysis - DEBUG(errs() << F->getName() << "\t: Handling tensor batch normalization \n"); - - // Argument list for the runtime call - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - Args.push_back(TensorII->getOperand(1)); - Args.push_back(TensorII->getOperand(2)); - Args.push_back(TensorII->getOperand(3)); - Args.push_back(TensorII->getOperand(4)); - Args.push_back(TensorII->getOperand(5)); - - // Create wrapper API runtime function call - Constant* wrapper_tensorBatchNorm = - M->getOrInsertFunction(StringRef("wrapper_tensorBatchNorm"), - RtM->getFunction(StringRef("wrapper_tensorBatchNorm"))->getFunctionType()); - CallInst* CI = CallInst::Create(wrapper_tensorBatchNorm, - Args, "", TensorII); - // We can replace the call to hpvm.tensor.batchnorm with the wrapper API call - TensorII->replaceAllUsesWith(CI); - } - break; - - case Intrinsic::visc_tensor_add: - { /* llvm.hpvm.tensor.add */ - DEBUG(errs() << F->getName() << "\t: Handling tensor add\n"); - // Tensor add(a,b) is in place for argument a. -// Value *Op = TensorII->getOperand(0); - - // Test the intrinsic operand for in place operation. -// bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); - - // Code generation will not continue if this is false, because the target - // may provide an in place operation(safe choice) - // FIXME: remove this comment - must check for in-place -// assert(inplace && -// "Operand not valid for in place operation. Code gen aborted.\n"); - - - // Argument list for the runtime call - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - Args.push_back(TensorII->getOperand(1)); - - // Create wrapper API runtime function call - Constant* wrapper_tensorAdd = - M->getOrInsertFunction(StringRef("wrapper_tensorAdd"), - RtM->getFunction(StringRef("wrapper_tensorAdd"))->getFunctionType()); - CallInst::Create(wrapper_tensorAdd, Args, "", TensorII); - // We can replace the call to hpvm.tensor.add with the 1st argument - // that, due to in place operation, now contains the result - TensorII->replaceAllUsesWith(TensorII->getOperand(0)); - } - break; - - case Intrinsic::visc_tensor_pool_max: - case Intrinsic::visc_tensor_pool_mean: - case Intrinsic::visc_tensor_pool_min: - { - DEBUG(errs() << F->getName() << "\t: Handling tensor pooling functions\n"); - - // Argument list for tensor pooling: - // input, poolFunction, window_height, window_width, - // vertical_pad, horizontal_pad, vertical_stride, horizontal_stride - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - - int pool_type = 0; - if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_pool_max) { - pool_type = 0; - } - if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_pool_mean) { - pool_type = 1; - } - if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_pool_min) { - pool_type = 2; - } - - Constant *constPoolType = - ConstantInt::get(Type::getInt32Ty(M->getContext()), pool_type); - Args.push_back(constPoolType); - - Args.push_back(TensorII->getOperand(1)); - Args.push_back(TensorII->getOperand(2)); - Args.push_back(TensorII->getOperand(3)); - Args.push_back(TensorII->getOperand(4)); - Args.push_back(TensorII->getOperand(5)); - Args.push_back(TensorII->getOperand(6)); - - // Create wrapper API runtime function call - Constant* wrapper_tensorPooling = - M->getOrInsertFunction(StringRef("wrapper_tensorPooling"), - RtM->getFunction(StringRef("wrapper_tensorPooling"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorPooling); - CallInst* CI = CallInst::Create(wrapper_tensorPooling, Args, "", TensorII); - - // Replacing intrinsic result uses with the result of the tensor runtime operation - TensorII->replaceAllUsesWith(CI); - } - break; - - case Intrinsic::visc_tensor_relu: - case Intrinsic::visc_tensor_clipped_relu: - case Intrinsic::visc_tensor_tanh: - { - DEBUG(errs() << F->getName() << "\t: Handling tensor activation functions\n"); - - // Tensor relu(a) (and others) is in place for argument a. - Value *Op = TensorII->getOperand(0); - - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); - // Code generation will not continue if this is false, because the target - // may provide an in place operation(safe choice) - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - - if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_relu) { - // Create wrapper API runtime function call - Constant* wrapper_tensorRelu = - M->getOrInsertFunction(StringRef("wrapper_tensorRelu"), - RtM->getFunction(StringRef("wrapper_tensorRelu"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorRelu); - CallInst::Create(wrapper_tensorRelu, Args, "", TensorII); - } - else if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu) { - // Create wrapper API runtime function call - Constant* wrapper_tensorClippedRelu = - M->getOrInsertFunction(StringRef("wrapper_tensorClippedRelu"), - RtM->getFunction(StringRef("wrapper_tensorClippedRelu"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorClippedRelu); - CallInst::Create(wrapper_tensorClippedRelu, Args, "", TensorII); - } - else if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_tanh) { - // Create wrapper API runtime function call - Constant* wrapper_tensorTanh = - M->getOrInsertFunction(StringRef("wrapper_tensorTanh"), - RtM->getFunction(StringRef("wrapper_tensorTanh"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorTanh); - CallInst::Create(wrapper_tensorTanh, Args, "", TensorII); - } - - // We can replace the call to hpvm.tensor.{relu,clipped relu, tanh} - // with the 1st argument that, due to in place operation, - // now contains the result - TensorII->replaceAllUsesWith(TensorII->getOperand(0)); - } - break; - - case Intrinsic::visc_tensor_softmax: - { /* llvm.visc.tensor.softmax */ - - DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n"); - // Tensor softmax(a) is in place for argument a. - Value *Op = TensorII->getOperand(0); - - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); - // Code generation will not continue if this is false, because the target - // may provide an in place operation(safe choice) - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call - - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - Args.push_back(TensorII->getOperand(0)); - - // Create wrapper API runtime function call - Constant* wrapper_tensorSoftmax = - M->getOrInsertFunction(StringRef("wrapper_tensorSoftmax"), - RtM->getFunction(StringRef("wrapper_tensorSoftmax"))->getFunctionType()); - DEBUG(errs() << *wrapper_tensorSoftmax); - CallInst::Create(wrapper_tensorSoftmax, Args, "", TensorII); - // We can replace the call to hpvm.tensor.softmax with the 1st argument - // that, due to in place operation, now contains the result - TensorII->replaceAllUsesWith(TensorII->getOperand(0)); - } - break; -/* - case Intrinsic::visc_image_fft_transform: - { // llvm.hpvm.image.fft.transform - Or another image intrinsic - // All will be treated as not in place - DEBUG(errs() << F->getName() << "\t: Handling fft transform \n"); - - // Create argument list for the runtime call - stored in Args - - // All interfaces will have a string as first argument, which will be - // used to identify the dataflow node at runtime - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - // Here, use you will access the appropriate arruments of the intrinsic - // and push_back, in order to create the argument list of runtime call - Args.push_back(TensorII->getOperand(0)); - Args.push_back(TensorII->getOperand(1)); - Args.push_back(TensorII->getOperand(2)); - Args.push_back(TensorII->getOperand(3)); - Args.push_back(TensorII->getOperand(4)); - Args.push_back(TensorII->getOperand(5)); - - Constant *conv_mode = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); - Args.push_back(conv_mode); - - Args.push_back(TensorII->getOperand(7)); - - // Done with argument list. - - // Create wrapper API runtime function call - // Appropriately set the name of the function of the runtime that you - // want to call - // Note: the Constant * is what we need to pass to the callInst. - // This name does not have to match, but does so for similarity. - Constant* wrapper_tensorGroupConvolution; - M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"), - RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType()); - CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution, - Args, "", TensorII); - // We can replace the call to hpvm.tensor.xxx with the runtime call - TensorII->replaceAllUsesWith(CI); - } - break; - -*/ - default: - llvm_unreachable("Unknown VISC Intrinsic!"); - break; - } - - } // No other case exists, since assertion passed - - - // Remove the instructions we translated to the simulator call. - // Traverse the vector backwards, otherwise definitions are deleted while - // their subsequent uses are still around. - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs.rbegin(), - re = IIs.rend(); ri != re; ++ri) { - DEBUG(errs() << "Erasing: " << **ri << "\n"); - (*ri)->eraseFromParent(); - } - -} - -// DFG2LLVM_WrapperAPI - The first implementation. - -struct DFG2LLVM_WrapperAPI : public DFG2LLVM { - static char ID; // Pass identification, replacement for typeid - DFG2LLVM_WrapperAPI() : DFG2LLVM(ID) {} -private: - -public: - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addRequired<InPlaceDFGAnalysisWrapper>(); - AU.addPreserved<BuildDFG>(); - AU.addPreserved<InPlaceDFGAnalysisWrapper>(); - } - - bool runOnModule(Module &M); -}; - -// Visitor for Code generation traversal (tree traversal for now) -class CGT_WrapperAPI : public CodeGenTraversal { - -private: - //Member variables - unsigned nodeID; // Used as a node identifier - - std::string QuantizationInputsFilenameStr; - std::string ConfigurationInputsFilenameStr; - - InPlaceDFGAnalysis::InPlaceDFGParameter *IPP; - - // VISC Runtime API and Tensor runtime API - Constant* llvm_hpvm_initApproxhpvmRt; - Constant* llvm_hpvm_cleanupApproxhpvmRt; - Constant* hpvm_request_tensor; - - Constant* llvm_hpvm_initializeRuntimeController; - Constant* llvm_hpvm_clearRuntimeController; - - // Functions - - // Virtual Functions - void init(); - void initRuntimeAPI(); - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - -public: - - // Constructor - CGT_WrapperAPI(Module &_M, BuildDFG &_DFG, - InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP, - std::string &_QuantizationInputsFilenameStr, - std::string &_ConfigurationInputsFilenameStr) - : CodeGenTraversal(_M, _DFG), IPP(&_IPP), - QuantizationInputsFilenameStr(_QuantizationInputsFilenameStr), - ConfigurationInputsFilenameStr(_ConfigurationInputsFilenameStr) { - nodeID = 0; - initRuntimeAPI(); - } - -}; - - -void CGT_WrapperAPI::init() { - // FIXME: what to do here? If anything? -} - -// Initialize the VISC runtime API. This makes it easier to insert these calls -void CGT_WrapperAPI::initRuntimeAPI() { - - // Load Runtime API Module - SMDiagnostic Err; - - char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); - assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n"); - - // FIXME: set correct path - Twine llvmSrcRoot = LLVM_SRC_ROOT; - Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll"; - runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); - if(runtimeModule == nullptr) - DEBUG(errs() << Err.getMessage()); - else - DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); - - // Get or insert Global declarations for - // - initialization - // - cleanup - // - request a tensor - DECLARE(llvm_hpvm_initApproxhpvmRt); - DECLARE(llvm_hpvm_cleanupApproxhpvmRt); - DECLARE(hpvm_request_tensor); - - DECLARE(llvm_hpvm_initializeRuntimeController); - DECLARE(llvm_hpvm_clearRuntimeController); - - // Find visc.init and visc.cleanup calls, and add placeholder methods - // for initialization and cleanup of the hpvm tensor runtime - - Function* VI = M.getFunction("llvm.visc.init"); - assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n"); - InitCall = cast<Instruction>(*VI->user_begin()); - CallInst::Create(llvm_hpvm_initApproxhpvmRt, - ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)), - "", InitCall); - - StringRef QRangesStrRef = StringRef(QuantizationInputsFilenameStr); - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray1 = ConstantDataArray::getString(M.getContext(), - QRangesStrRef, true); - GlobalVariable *GV1 = new GlobalVariable(M,ConstArray1->getType(), - true, GlobalValue::ExternalLinkage, ConstArray1, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* QRangesGEPConst = - ConstantExpr::getGetElementPtr(GV1->getType()->getPointerElementType(), - GV1, GEPIndices); - - StringRef ConfsStrRef = StringRef(ConfigurationInputsFilenameStr); - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray2 = ConstantDataArray::getString(M.getContext(), - ConfsStrRef, true); - GlobalVariable *GV2 = new GlobalVariable(M,ConstArray2->getType(), - true, GlobalValue::ExternalLinkage, ConstArray2, ""); - Constant* ConfsGEPConst = - ConstantExpr::getGetElementPtr(GV2->getType()->getPointerElementType(), - GV2, GEPIndices); - ArrayRef<Value*> RTCInitArgs = {ConfsGEPConst, QRangesGEPConst}; - CallInst::Create(llvm_hpvm_initializeRuntimeController, RTCInitArgs, "", InitCall); - - Function* VC = M.getFunction("llvm.visc.cleanup"); - assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n"); - CleanupCall = cast<Instruction>(*VC->user_begin()); - CallInst::Create(llvm_hpvm_cleanupApproxhpvmRt, ArrayRef<Value*>(), "", CleanupCall); - CallInst::Create(llvm_hpvm_clearRuntimeController, ArrayRef<Value*>(), "", CleanupCall); - -} - -void CGT_WrapperAPI::codeGen(DFInternalNode* N) { - errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; - errs () << "Skipping internal node\n"; -} - -void CGT_WrapperAPI::codeGen(DFLeafNode* N) { - - // Skip code generation if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Abort code generation if it is an allocation node - if(N->isAllocationNode()) { - assert(false && "Allocation Node not expected in ApproxHPVM"); - return; - } - -// For wrapper API, we generate code for every leaf node. -// No need to check for hints from frontend -// // Generate code only if it has the right hint -// if (!checkPreferredTarget(N, visc::PROMISE_TARGET)) { -// errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; -// return; -// } - - // Increment the node ID, for current node. - ++nodeID; - - // Get the function associated with the dataflow node - Function *F = N->getFuncPointer(); -errs() << "Node Function: " << *F << "\n"; - // Look up if we have visited this function before. If we have, then just - // get the cloned function pointer from DFNode. Otherwise, create the cloned - // function and add it to the DFNode GenFunc. - Function *F_wrapper_api = N->getGenFuncForTarget(visc::PROMISE_TARGET); - - assert((F_wrapper_api == NULL) && - "Error: Visiting a node for which code already generated"); - - // Clone the function - ValueToValueMapTy VMap; - std::string FName(F->getName().data());//Twine FName = F->getName(); - F_wrapper_api = CloneFunction(F, VMap); - F_wrapper_api->setName(FName+"_wrapper_api"); - F_wrapper_api->removeFromParent(); - M.getFunctionList().push_back(F_wrapper_api); - - N->addGenFunc(F_wrapper_api, visc::PROMISE_TARGET, true); - - /* Removing HPVM in/out/inout function attributes */ - for(Function::arg_iterator ai = F_wrapper_api->arg_begin(), ae = F_wrapper_api->arg_end(); - ai != ae; ai++){ - Argument *Arg = &*ai; - if(Arg->hasAttribute(Attribute::In)) - Arg->removeAttr(Attribute::In); - if(Arg->hasAttribute(Attribute::Out)) - Arg->removeAttr(Attribute::Out); - if(Arg->hasAttribute(Attribute::InOut)) - Arg->removeAttr(Attribute::InOut); - } - - // Adding nounwind to generated function : FIXME: needed? - DEBUG(errs() << "Adding nounwind to generated function\n"); - F_wrapper_api->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); - - // Add llvm_visc_requestTensor calls for every pointer argument of the function - // (they are all expected to be tensors), at the beginning of the function. - // This is the first instruction of the function, insert them before this - Instruction* FI = &*(F_wrapper_api->getEntryBlock().begin()); - - // FIXME: verify that we want 1 as a target device - // In this backend, the target device is GPU, represented by i32 1. - ConstantInt *TargetDeviceID = - ConstantInt::get(Type::getInt32Ty(M.getContext()), 1); - - for (Function::arg_iterator ai = F_wrapper_api->arg_begin(), - ae = F_wrapper_api->arg_end(); ai != ae; ++ai) { - Argument* Arg = &*ai; - if (Arg->getType()->isPointerTy()) { - Value *Args[] = {Arg, TargetDeviceID}; - CallInst::Create(hpvm_request_tensor, - ArrayRef<Value*>(Args, 2), - "", FI); - } - } - - CodeGenStateMachine CGM(&M, runtimeModule.get()); - - for (inst_iterator i = inst_begin(F_wrapper_api), e = inst_end(F_wrapper_api); - i != e; ++i) { - Instruction *I = &(*i); - CGM.transition(dyn_cast<IntrinsicInst>(I)); - } - - errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n"; - //CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP); - CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP); - -//errs() << "-----------------------------------\n"; -//errs() << *F_wrapper_api << "\n"; - - return; -} - -bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_WrapperAPI PASS\n"; - - // Get the BuildDFG Analysis Results: - // - Dataflow graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - // Get the In Place Analysis Results - InPlaceDFGAnalysis::InPlaceDFGParameter IPP = - (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); - // Print results -// printInPlaceDFGParameter(IPP); - - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - - // Visitor for Code Generation Graph Traversal - CGT_WrapperAPI *CGTVisitor = new CGT_WrapperAPI(M, DFG, IPP, - QuantizationInputsFilename, - ConfigurationInputsFilename); - - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { - // Initiate code generation for root DFNode - CGTVisitor->visit(rootNode); - } - - //TODO: Edit module epilogue to remove the VISC intrinsic declarations - delete CGTVisitor; - - return true; -} - - -/****************************************************************************** - * Helper functions * - ******************************************************************************/ - -/* Method needs to be called as part of an analysis pre-step, before code * - * generation is run on a node function, so that the HPVM intrinsics are still * - * in place. */ -bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N, - InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) { - - if (Argument *Arg = dyn_cast<Argument>(Op)) { - DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n"); - assert((Arg->getParent() == Fgen) && - "Extra Parameter in body of Function\n"); - // Candidate parameter is a function argument - // In this case, consult the result of in place analysis - // Find position in arg list - unsigned pos = Arg->getArgNo(); - // If this parameter cannot be used for in place operation - // code gen cannot continue - if (IPP.at(N)[pos]) { - DEBUG(errs() << *Arg << "\t: argument, suitable for in place\n"); - return true; - } else { - DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n"); - return false; - } - } - else { - // If it is not an argument, then it needs to be the result of - // another intrinsic. These are new objects that are allocated, - // and consumed by next intrinsic. - DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n"); - if (dyn_cast<IntrinsicInst>(Op)) { - DEBUG(errs() << *Arg << "\t: local, suitable for in place\n"); - return true; - } else { - DEBUG(errs() << *Arg << "\t: local, not suitable for in place\n"); - return false; - } - } -} - -} // End of namespace - -char DFG2LLVM_WrapperAPI::ID = 0; -static RegisterPass<DFG2LLVM_WrapperAPI> X("dfg2llvm-wrapperapi", - "Dataflow Graph to LLVM for WrapperAPI Pass", - false /* does not modify the CFG */, - true /* transformation, * - * not just analysis */); - diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt deleted file mode 100644 index b4ebb8019d..0000000000 --- a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt -------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = DFG2LLVM_WrapperAPI -parent = Transforms diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt b/llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt deleted file mode 100644 index 374f3b26f1..0000000000 --- a/llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( LLVMFuseHPVMTensorNodes - FuseHPVMTensorNodes.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp deleted file mode 100644 index d9a3c588b5..0000000000 --- a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp +++ /dev/null @@ -1,971 +0,0 @@ -//=== FuseHPVMTensorNodes.cpp ===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "FuseTensorNodes" - -#include "llvm/IR/ValueMap.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" - -#include "llvm/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/SupportVISC/VISCUtils.h" - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; -using namespace viscUtils; - -namespace tensorfuse { -/*** Classes ***/ - -/*** Helper Functions ***/ - -/* Return the constant integer represented by value V */ -static unsigned getNumericValue(Value* V) { - assert(isa<ConstantInt>(V) - && "Value indicating the number of arguments should be a constant integer"); - return cast<ConstantInt>(V)->getZExtValue(); -} - -/* Query the kind of edge described by a createEdge intrinsic IIe * - * with respect to node handle IIn */ -static bool isIncomingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) { - Value* Src = IIe->getArgOperand(1); - IntrinsicInst* ArgII = cast<IntrinsicInst>(Src); - assert(ArgII && "First argument of createEdge is not an intrinsic"); - return (ArgII == IIn); -} -static bool isOutgoingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) { - Value* Src = IIe->getArgOperand(0); - IntrinsicInst* ArgII = cast<IntrinsicInst>(Src); - assert(ArgII && "First argument of createEdge is not an intrinsic"); - return (ArgII == IIn); -} - -/* Populates vector with all incoming edge intrinsics to node II */ -static void getIncomingEdgeIntrinsicList(IntrinsicInst *II, - std::vector<IntrinsicInst*> &EdgeList) { - for(Value::user_iterator ui = II->user_begin(), - ue = II->user_end(); ui!=ue; ++ui) { - IntrinsicInst* useI = dyn_cast<IntrinsicInst>(*ui); - assert(useI && - "HPVM graph intrinsic used in non HPVM intrinsic instruction\n"); - if (useI->getIntrinsicID() != Intrinsic::visc_createEdge) - continue; // Skip all non edge intrinsics - - // For edge intrinsics, test the descination operand - if (useI->getOperand(1) == II) { // Argument is the destination - EdgeList.push_back(useI); - } - } - return; -} - -/* Returns true if argument at position argno is coming from a dataflow edge * - * in the vector EdgeList */ -static bool isIncomingEdgeArgument(unsigned argno, - std::vector<IntrinsicInst*> &EdgeList) { - for (IntrinsicInst *ii : EdgeList) { - if (getNumericValue(ii->getOperand(4)) == argno) - return true; - } - return false; -} - -// Check that this is a valid HPVM Tensor Node (starts with an HPVM intrinsic) -// Return the node intrinsic function -static IntrinsicInst *isValidHPVMTensorNode(DFNode *N) { - Function *F = N->getFuncPointer(); - IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*(inst_begin(F))); - assert(II && - "HPVM tensor intrinsic expected as first instruction of HPVM tensor node\n"); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") && - "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); - return II; -} - -// Returns the next node in a node sequence, or NULL if it does not exist. -// We consider two nodes a sequence if SrcN has a single successor, DstN, -// and DstN a single predeccessor, SrcN (other than the Root node) -static DFNode *findNextNodeInSequence(DFNode *SrcN) { - - DFNode *DstN = NULL; - - for (DFNode::successor_iterator si = SrcN->successors_begin(), - se = SrcN->successors_end(); si != se; ++si) { - DFNode *N = *si; - if (N->isDummyNode()) { - continue; - } - if (!DstN) - DstN = N; - if (DstN != N) { - errs() << "Found different destination nodes: no node sequence.\n"; - return NULL; - } - } - - // If we reach this point, DstN is the unique successor of SrcN - - // Now, test that the DstN has a single predeccessor except Root (dummy) - for (DFNode::indfedge_iterator eb = DstN->indfedge_begin(), - ee = DstN->indfedge_end(); eb != ee; ++eb) { - DFNode *SN = (*eb)->getSourceDF(); - if ((SN != SrcN) && (!(SN->isDummyNode()))) { - // Does not satisfy requirement - return NULL; - } - } - - return DstN; -} - -/*** Methods ***/ - -/* Create an identical bind (in or out, depending on the argument intrinsic) * - * with different src (true) or dst (false) port */ -IntrinsicInst* FuseHPVMTensorNodes::createIdenticalBindWithDifferentPort( - IntrinsicInst* II, unsigned port, bool srcport) { - // Argument of the function to be called - ConstantInt* PortConstant = - ConstantInt::get(Type::getInt32Ty(II->getContext()), port); - Value* SrcPort = (srcport) ? PortConstant: II->getArgOperand(1); - Value* DstPort = (srcport) ? II->getArgOperand(2): PortConstant; - - Value* BindArgs[] = {II->getArgOperand(0), - SrcPort, - DstPort, - II->getArgOperand(3) - }; - Function* BindF = II->getCalledFunction(); - CallInst* BindInst = CallInst::Create(BindF, - ArrayRef<Value*>(BindArgs, 4), - ""); - IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst); - - return newII; -} - -/* Given two createNode intrinsics describing connected nodes, this function * - * returns the argument list type of the fused function */ -void FuseHPVMTensorNodes::createArgTypes(IntrinsicInst* II1, - IntrinsicInst* II2, - std::vector<Type*> &ArgTypes) { - Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); - Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); - - // Arguments of the first node are simply added - for(auto& arg: F1->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - ArgTypes.push_back(arg.getType()); - } - - // Arguments of the second node are added only if they are not the output of - // the previous node - - // Find all incoming edges. - std::vector<IntrinsicInst *> IncomingEdgeList; - getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); - - // Their source must be the first fusion node, otherwise they would not have - // been fusion candidates - for (IntrinsicInst *ii : IncomingEdgeList) { - assert((ii->getOperand(0) == II1) && "Unexpected source operand\n"); - } - - // Add argument type to the new function only if it is not incoming from - // an edge - for(auto& arg: F2->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - unsigned inport = arg.getArgNo(); - if (isIncomingEdgeArgument(inport, IncomingEdgeList)) - continue; - ArgTypes.push_back(arg.getType()); - } -} - -/* Get the return type of the function for fused node II1-II2 */ -StructType* FuseHPVMTensorNodes::createReturnType(IntrinsicInst* II1, - IntrinsicInst* II2) { - Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); - Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); - - // Based on the HPVM tensor node assumptions and the patterns we want to - // support, when two nodes are fused the result will always be the result - // of the second node. - StructType* F1RetTy = dyn_cast<StructType>(F1->getReturnType()); - assert(F1RetTy && "Return Type must always be a struct"); - StructType* F2RetTy = dyn_cast<StructType>(F2->getReturnType()); - assert(F2RetTy && "Return Type must always be a struct"); - - return F2RetTy; -} - -/* Copy argument names, from functions of II1 and II2 to F */ -void FuseHPVMTensorNodes::copyArgumentNames(IntrinsicInst* II1, - IntrinsicInst* II2, - Function* F) { - Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); - Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); - - Function::arg_iterator dest_it = F->arg_begin(); - - // Argument names of the first node are simply copied - for(auto& arg: F1->getArgumentList()) { - dest_it->setName("s_" + arg.getName()); - dest_it++; - } - - // For the second node, we ignore those arguments that are incoming edges - // (from II1) - // Find all incoming edges. - std::vector<IntrinsicInst *> IncomingEdgeList; - getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); - - // Their source must be the first fusion node, otherwise they would not have - // been fusion candidates - for (IntrinsicInst *ii : IncomingEdgeList) { - assert((ii->getOperand(0) == II1) && "Unexpected source operand\n"); - } - - // Copy argument name to the new function only if it is not incoming from - // an edge - for(auto& arg: F2->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - unsigned inport = arg.getArgNo(); - if (isIncomingEdgeArgument(inport, IncomingEdgeList)) - continue; - - dest_it->setName("d_" + arg.getName()); - dest_it++; - } - assert((dest_it == F->arg_end()) && - "Argument list of fused function not fully traversed\n"); - return; -} - -/* Copy attributes, from functions of II1 and II2 to F */ -void FuseHPVMTensorNodes::copyAttrList(IntrinsicInst* II1, - IntrinsicInst* II2, - Function* F) { - Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); - Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); - - Function::arg_iterator f1_ai = F1->arg_begin(), f1_ae = F1->arg_end(); - Function::arg_iterator f2_ai = F2->arg_begin(), f2_ae = F2->arg_end(); - Function::arg_iterator f_ai = F->arg_begin(), f_ae = F->arg_end(); - - // For the second node, we have to ignore the arguments that are incoming - // edges (from II1) - // Find all incoming edges. - std::vector<IntrinsicInst *> IncomingEdgeList; - getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); - - // Their source must be the first fusion node, otherwise they would not have - // been fusion candidates - for (IntrinsicInst *ii : IncomingEdgeList) { - assert((ii->getOperand(0) == II1) && "Unexpected source operand\n"); - } - - // Copy attributes of F1 - for(; f1_ai != f1_ae && f_ai != f_ae; ++f1_ai, ++f_ai) { - AttributeSet AS = F1->getAttributes(); - DEBUG(errs() << "Copying attributes from " - << F1->getName() << " at " << f1_ai->getArgNo() << "\n"); - AttrBuilder AB(AS, f1_ai->getArgNo()+1); - AttributeSet argAS = AttributeSet::get(F1->getContext(), - f_ai->getArgNo()+1, AB); - F->addAttributes(f_ai->getArgNo()+1, argAS); - } - - // Copy needed attributes of F2 - for(; f2_ai != f2_ae && f_ai != f_ae; ++f2_ai) { - unsigned inport = f2_ai->getArgNo(); - if (isIncomingEdgeArgument(inport, IncomingEdgeList)) - continue; - - AttributeSet AS = F2->getAttributes(); - DEBUG(errs() << "Copying attributes from " - << F2->getName() << " at " << f2_ai->getArgNo() << "\n"); - AttrBuilder AB(AS, f2_ai->getArgNo()+1); - AttributeSet argAS = AttributeSet::get(F2->getContext(), - f_ai->getArgNo()+1, AB); - F->addAttributes(f_ai->getArgNo()+1, argAS); - ++f_ai;; - } - return; -} - -/* Creates and inserts an empty function of the rght type for the fused node */ -Function* FuseHPVMTensorNodes::createEmptyDFNodeFunction(IntrinsicInst* II1, - IntrinsicInst* II2, - Module &M) { - Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); - Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); - - DEBUG(errs () << "Constructing argument list\n"); - // Construct argument list - std::vector<Type*> ArgTypes; - createArgTypes(II1, II2, ArgTypes); - - DEBUG(errs () << "Constructing return type\n"); - // Construct return type - StructType* FRetTy = createReturnType(II1, II2); - - FunctionType* FTy = FunctionType::get(FRetTy, ArgTypes, false); - // Create a function with the new type - Function* F = Function::Create(FTy, F1->getLinkage(), - F1->getName()+"_"+F2->getName(), &M); - - DEBUG(errs () << "Copying argument names\n"); - // Copy argument names from original functions - copyArgumentNames(II1, II2, F); - // Copy argument attributes from original functions - copyAttrList(II1, II2, F); - - return F; -} - -/* Inline first node function, updating required mappings * - * - F1: first node function * - * - M: module containing the node function * - * - Ffused: fused node function * - * - VMap: maps values used in the body of F1 to those that mst be used in * - the body of the fused function instead * - * OutVs: This maps the output struct field index to the stored value */ -void FuseHPVMTensorNodes::inlineFirstNodeFunction(Module &M, Function *F1, - Function *Ffused, - ValueMap<Value*, Value*> &VMap, - std::vector<Value*> &OutVs) { - - ReturnInst *RI = cast<ReturnInst>(Ffused->getEntryBlock().getTerminator()); - - inst_iterator f1_i = inst_begin(F1); - // First, we copy the HPVM intrinsics of F1 into Ffused, applying the mapping - for (inst_iterator f1_e = inst_end(F1); f1_i != f1_e; ++f1_i) { - Instruction *I = &(*f1_i); - if (!(BuildDFG::isViscIntrinsic(I))) { - // We are done with the node computation - break; - } - - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") - && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); - - std::vector<Value*> Args; - for(unsigned i = 0; i < II->getNumArgOperands(); i++) { - Value *V = II->getArgOperand(i); - if (isa<Constant>(V)) { // Constants can be reused - Args.push_back(V); - } else { - assert((VMap.find(V) != VMap.end()) && - "Attempted to use value without existing mapping in VMap"); - Args.push_back(VMap[V]); - } - } - Function *F = Intrinsic::getDeclaration(&M, II->getIntrinsicID()); - CallInst* CI = - CallInst::Create(F, Args, - F->getReturnType()->isVoidTy()? "" : "s_"+II->getName(), RI); - // Update the map with the newly created value - VMap[II] = CI; - } - - // We continue with gathering information about the return values - for (inst_iterator f1_e = inst_end(F1); f1_i != f1_e; ++f1_i) { - Instruction *I = &(*f1_i); - InsertValueInst* IV = dyn_cast<InsertValueInst>(I); - if (!IV) { - // End of insertvalue instructions. This should be a return statement - assert((dyn_cast<ReturnInst>(I)) && "Unexpected Instruction\n"); - break; // Done processing this function - } - OutVs.push_back(IV->getOperand(1)); - } - return; -} - -/* Inline second node function, updating required mappings * - * - F2: second node function * - * - M: module containing the node function * - * - Ffused: fused node function * - * - VMap: maps values used in the body of F2 to those that mst be used in * - the body of the fused function instead */ -void FuseHPVMTensorNodes::inlineSecondNodeFunction(Module &M, Function *F2, - Function *Ffused, ValueMap<Value*, Value*> &VMap) { - - ReturnInst *RI = cast<ReturnInst>(Ffused->getEntryBlock().getTerminator()); - - // Copy the body of F2 into Ffused, applying the mapping - inst_iterator f2_i = inst_begin(F2); - for (inst_iterator f2_e = inst_end(F2); f2_i != f2_e; ++f2_i) { - Instruction *I = &(*f2_i); - if ((BuildDFG::isViscIntrinsic(I))) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") - && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); - - std::vector<Value*> Args; - for(unsigned i = 0; i < II->getNumArgOperands(); i++) { - Value *V = II->getArgOperand(i); - if (isa<Constant>(V)) { // Constants can be reused - Args.push_back(V); - } else { - assert((VMap.find(V) != VMap.end()) && - "Attempted to use value without existing mapping in VMap"); - Args.push_back(VMap[V]); - } - } - Function *F = Intrinsic::getDeclaration(&M, II->getIntrinsicID()); - CallInst* CI = - CallInst::Create(F, Args, - F->getReturnType()->isVoidTy()? "" : II->getName(), - RI); - // Update the map with the newly created value - VMap[II] = CI; - } else if (InsertValueInst* IV = dyn_cast<InsertValueInst>(I)) { - Value *AggOp = IV->getAggregateOperand(); - Value *InsOp = IV->getInsertedValueOperand(); - assert(((VMap.find(AggOp) != VMap.end()) || - (isa<Constant>(AggOp)) ) && - "Attempted to use value without existing mapping in VMap"); - assert(((VMap.find(InsOp) != VMap.end()) || - (isa<Constant>(InsOp))) && - "Attempted to use value without existing mapping in VMap"); - InsertValueInst* IVI = InsertValueInst::Create( - (isa<Constant>(AggOp)) ? AggOp : VMap[AggOp], - (isa<Constant>(InsOp)) ? InsOp : VMap[InsOp], - IV->getIndices(), - IV->getName(), - RI); - // Update the map with the newly created value - VMap[IV] = IVI; - } else { - ReturnInst* RetI = dyn_cast<ReturnInst>(I); - assert(RetI && "Unexpected Instruction\n"); - Value *RetVal = RetI->getOperand(0); - ReturnInst *newRI = ReturnInst::Create(Ffused->getContext(), - VMap[RetVal]); - ReplaceInstWithInst(RI, newRI); - } - } - return; -} - -/* Create function of leaf node after fusion * - * - create type * - * - create empty function of the type * - * - inline body of first function (applying and updating appropriate * - * mappings) * - * - inline body of second function (applying and updating appropriate * - * mappings) */ -Function* FuseHPVMTensorNodes::createLeafDFNodeFunction(IntrinsicInst* II1, - IntrinsicInst* II2, - Module &M) { - DEBUG(errs () << "Creating function signature\n"); - - /* Create empty node function of the correct type */ - Function* Ffused = createEmptyDFNodeFunction(II1, II2, M); - - // Get return type, needed for building the assignmens to the return struct - StructType* FfusedRetTy = cast<StructType>(Ffused->getReturnType()); - - /* Mapping information required for using the correct values in the body of * - * the fused node function */ - - // This map maps the values used in the original function bodies with - // the ones that need to be used in the fused function body. - ValueMap<Value*, Value*> FusedValueMap; - - // Intemediate information saved for return values of first node function - // This maps the output port to the value returned through the outgoing edge - std::vector<Value*> OutValues; - - DEBUG(errs () << "Creating function body\n"); - - // Add a basic block to the new, empty function - BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", Ffused); - ReturnInst::Create(M.getContext(), UndefValue::get(FfusedRetTy), BB); - - // Get the node functions - Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); - Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); - - // Initially, update FusedValueMap: it is populated with the arguments of F1 - Function::arg_iterator fused_arg_it = Ffused->arg_begin(); - // Argument names of the first node are simply copied - for(auto& arg: F1->getArgumentList()) { - FusedValueMap[&arg] = &*fused_arg_it; - ++fused_arg_it; - } - -// for(const auto& v: FusedValueMap) { -// errs() << "key = " << *(v.first) << "\t"; -// errs() << "value = " << *(v.second) << "\n"; -// } - - // Invoke function that inlines F1 into Ffused, using and updating mappings - inlineFirstNodeFunction(M, F1, Ffused, FusedValueMap, OutValues); - - // Compute mapping between inputs of F2 and outputs of F1 - std::vector<IntrinsicInst *> IncomingEdgeList; - getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); - std::vector<unsigned> PortMap(IncomingEdgeList.size(), 0); - for (IntrinsicInst * ii : IncomingEdgeList) { - unsigned srcPort = getNumericValue(ii->getOperand(3)); - unsigned dstPort = getNumericValue(ii->getOperand(4)); - PortMap[dstPort] = srcPort; - } - - // FusedValueMap is now populated with the arguments of F2 as well - for(auto& arg: F2->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - unsigned inport = arg.getArgNo(); - if (isIncomingEdgeArgument(inport, IncomingEdgeList)) { - // Get the mappings of the return values of F1 if incoming edge argument - Value *V = OutValues[PortMap[inport]]; - FusedValueMap[&arg] = (isa<Constant>(V)) ? V: FusedValueMap[V]; - } - else { - // Get new argument otherwise - FusedValueMap[&arg] = &*fused_arg_it; - ++fused_arg_it; - } - } - - // Invoke function that inlines F2 into Ffused, using and updating mappings - inlineSecondNodeFunction(M, F2, Ffused, FusedValueMap); - - // Done with fused node function - return Ffused; -} - -/* Updates parent of fused nodes to use the new node intrinsic */ -void FuseHPVMTensorNodes::updateParentNodeFunction(IntrinsicInst* II1, - IntrinsicInst* II2, - IntrinsicInst* IInew) { - - // Compute the required shifting of positions for edges/binds to the second - // fusion node. No shifting is required for the first fusion node. - Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); - Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); - std::vector<unsigned> ShiftMap(F2->getFunctionType()->getNumParams(), 0); - unsigned shiftCount = F1->getFunctionType()->getNumParams(); - - // Find all incoming edges. - std::vector<IntrinsicInst *> IncomingEdgeList; - getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); - // Their source must be the first fusion node, otherwise they would not have - // been fusion candidates - for (IntrinsicInst *ii : IncomingEdgeList) { - assert((ii->getOperand(0) == II1) && "Unexpected source operand\n"); - } - - // Compute shift map for n2: maps position in F2 arg list to Ffused arg list - for(auto& arg: F2->getArgumentList()) { - DEBUG(errs() << arg << "\n"); - unsigned inport = arg.getArgNo(); - if (isIncomingEdgeArgument(inport, IncomingEdgeList)) - continue; - - ShiftMap[inport] = shiftCount; - shiftCount++; - } - - std::vector<IntrinsicInst*> IItoRemove; - - // First, iterate over uses of the first node's createNode intrinsic - for (Value::user_iterator i = II1->user_begin(), ie = II1->user_end(); - i != ie; ++i) { - Instruction *VI = dyn_cast<Instruction>(*i); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI); - assert(II && "Use of a node handle outside of a visc intrinsic\n"); - - switch(II->getIntrinsicID()) { - case Intrinsic::visc_createEdge: - { - if (isOutgoingEdgeIntrinsic(II,II1)) { - assert(isIncomingEdgeIntrinsic(II,II2) && - "Outgoing edge of node 1 should only go to node 2\n"); - IItoRemove.push_back(II); - } - } - break; - case Intrinsic::visc_bind_input: - { - } - break; - case Intrinsic::visc_bind_output: - { - assert(false && - "Source node of node fusion not expected in bind.out\n"); - } - break; - default: - llvm_unreachable("Unknown use of HPVM createNode handle\n"); - break; - } - } - - // Delete gathered instructions - they are the edges between n1-n2 - for (std::vector<IntrinsicInst *>::iterator ib = IItoRemove.begin(), - ie = IItoRemove.end(); ib != ie; ++ib) { - DEBUG(errs() << "Erasing: " << **ib << "\n"); - (*ib)->eraseFromParent(); - } - II1->replaceAllUsesWith(IInew); - II1->eraseFromParent(); - - IItoRemove.clear(); - - // Then, iterate over uses of the second node's createNode intrinsic - for (Value::user_iterator i = II2->user_begin(), ie = II2->user_end(); - i != ie; ++i) { - Instruction *VI = dyn_cast<Instruction>(*i); - IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI); - assert(II && "Use of a node handle outside of a visc intrinsic\n"); - - switch(II->getIntrinsicID()) { - case Intrinsic::visc_createEdge: - { - assert(isOutgoingEdgeIntrinsic(II,II2) && - "Node 2 is expected to have only outgoing edges at this point\n"); - } - break; - case Intrinsic::visc_bind_input: - { - /* The index must be updated to the matching argument position of * - * the fused functionm using ShiftMap */ - unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); - IntrinsicInst *newII = - createIdenticalBindWithDifferentPort(II, - ShiftMap[dstPos], - false); - newII->insertBefore(II); - IItoRemove.push_back(II); - } - break; - case Intrinsic::visc_bind_output: - { - assert(false && - "Source node of node fusion not expected in bind.out\n"); - } - break; - default: - llvm_unreachable("Unknown use of HPVM createNode handle\n"); - break; - } - } - - // Delete gathered instructions - they are the old bindings of n2 - for (std::vector<IntrinsicInst *>::iterator ib = IItoRemove.begin(), - ie = IItoRemove.end(); ib != ie; ++ib) { - DEBUG(errs() << "Erasing: " << **ib << "\n"); - (*ib)->eraseFromParent(); - } - II2->replaceAllUsesWith(IInew); - II2->eraseFromParent(); - - return; -} - -/* Performs all operations required at the IR level for fusion of HPVM tensor * - * nodes with intrinsic instructions II1 and II2 * - * - Creates fused node function * - * - Creates createNode intrinsic for it and returns it * - * - Updates parent function: * - * - - adds new intrinsic * - * - - edges and binds consistently use the new intrinsic * - * - Removes old functions */ -IntrinsicInst* FuseHPVMTensorNodes::FuseHPVMTensorNodesStep(IntrinsicInst* II1, - IntrinsicInst* II2, - Module &M) { - // Get the node functions - Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); - Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); - - // Create fused node function - Function *Ffused = createLeafDFNodeFunction(II1, II2, M); - addHint(Ffused, getPreferredTarget(F1)); - - // FIX PARENT DFNode'S FUNCTION - - // Generate createNode Intrinsic for fused node and insert it - Function* CreateNodeF = Intrinsic::getDeclaration(&M, - Intrinsic::visc_createNode); - Constant* Fp = ConstantExpr::getPointerCast(Ffused, - Type::getInt8PtrTy(M.getContext())); - CallInst *CI = CallInst::Create(CreateNodeF, - ArrayRef<Value*>(Fp), - Ffused->getName()+".node"); - IntrinsicInst* CreateNodeII = cast<IntrinsicInst>(CI); - CreateNodeII->insertBefore(II1); - - // By the assumptions about the fusion pattern structure, all edges that have - // II1 as source will have II2 as destination and vice versa. - // We can simply delete them. - - // All createEdge intrinsics with destination argument = II1 need to use - // CreateNodeII instead. - // Similarly with bind.in - - // All createEdge intrinsics with source argument = II1 need to use - // CreateNodeII instead - // Similarly with bind.out - - // By the assumptions about the fusion pattern structure, the first node - // cannot be the argument of a bind.out - // The second node can be the argument of a bind.in. - // For the bind.in, we need to adjust the destination port. - updateParentNodeFunction(II1, II2, CreateNodeII); - - // Remove old node functions - removeHint(F1, getPreferredTarget(F1)); - removeHint(F2, getPreferredTarget(F2)); - F1->replaceAllUsesWith(UndefValue::get(F1->getType())); - F1->eraseFromParent(); - F2->replaceAllUsesWith(UndefValue::get(F2->getType())); - F2->eraseFromParent(); - - return CreateNodeII; -} - -/* Fuse node sequence described by creaetNode intrinsics in IIs. * - * Contents of IIs are cleared. */ -void FuseHPVMTensorNodes::FuseHPVMTensorNodeSequence( - std::vector<IntrinsicInst*> &IIs, Module &M) { - for (IntrinsicInst *II : IIs) { - assert((II->getIntrinsicID() == Intrinsic::visc_createNode) && - "Expected createNode intrinsic in fuse intrinsic sequence\n"); - } - - if (IIs.size() < 2) { - errs() << "Warning: Attempted to fuse fewer than 2 nodes\n"; - return; - } - - for (unsigned i = 0; i + 1 < IIs.size(); i++) { - IntrinsicInst *II1 = IIs[i]; - IntrinsicInst *II2 = IIs[i+1]; - IIs[i+1] = FuseHPVMTensorNodesStep(II1, II2, M); - } - IIs.clear(); - return; -} - -/* Run method for FuseHPVMTensorNodes class, simply invokes fusion of all the * - * sequenses in member variable FTs. */ -void FuseHPVMTensorNodes::run(Module &M, FusionTargets &FTs) { - for (unsigned i = 0; i < FTs.size(); i++) { - FuseHPVMTensorNodeSequence(FTs[i], M); - } - return; -} - -// Print Fusion Targets. The argument vector contains createNode intrinsics -// of nodes to be fused). -void FuseHPVMTensorNodes::printFusionTargets(FusionTargets &FTs) { - errs() << "Print Fusion Targets\n"; - errs() << "Found " << FTs.size() << " targets\n"; - for (FuseHPVMTensorNodes::FusionTargets::iterator ii = FTs.begin(), - ie = FTs.end(); ii != ie ; ++ii) { - errs() << "Target:\n"; - std::vector<IntrinsicInst*> IIv = *ii; - for (std::vector< IntrinsicInst*>::iterator pi = IIv.begin(), - pe = IIv.end(); pi != pe; ++pi) { - errs() << "\t" << *((*pi)->getOperand(0)) << "\n"; - } - } - return; -} - -void FindFusionTargetsTraversal::codeGen(DFInternalNode *N) { - DEBUG(errs() << "Skipping Internal Node: " - << N->getFuncPointer()->getName() << "\n"); - return; -} - -void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { - DEBUG(errs() << "Inside leaf node: " - << N->getFuncPointer()->getName() << "\n"); - - // Skip fusion check if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - -// if(N->getTargetHint() != visc::PROMISE_TARGET) { - if(!preferredTargetIncludes(N, visc::PROMISE_TARGET)) { - // Only fuse if we plan to target PROMISE - // The CUDNN backend would be able to generate calls for the fused node, - // but not the other way around - DEBUG(errs() << "No PROMISE hint. Skipping node: " - << N->getFuncPointer()->getName() << "\n"); - return; - } - - visc::Target StartNodePreferredTarget = getPreferredTarget(N); - // Make sure that this is a valid HPVM Tensor Node - // Find first instruction, and check that it is an HPVM tensor intrinsic - IntrinsicInst *II = isValidHPVMTensorNode(N); - - std::vector<IntrinsicInst*> CurrentNodeSequence; - - switch(II->getIntrinsicID()) { - case Intrinsic::visc_tensor_convolution: - { // Found beginning of pattern conv-bias-activation-pooling. - // Look for the rest - CurrentNodeSequence.push_back(N->getInstruction()); - - // Look for bias - DFNode *SN = findNextNodeInSequence(N); - if (!SN) { - return; // Did not find a node sequence starting at N. Simpy return. - } - if (getPreferredTarget(SN) != StartNodePreferredTarget) { - return; // Node in sequence has different hint. Simpy return. - } - IntrinsicInst *SII = isValidHPVMTensorNode(SN); - if (SII->getIntrinsicID() != Intrinsic::visc_tensor_add) { - // Successor is not the bias operation, thus does not fit the pattern. - return; - } - // Otherwise, push this node to the current sequence - CurrentNodeSequence.push_back(SN->getInstruction()); - - // This is a valid sequence. - // We still need to fuse activation and/or pooling if we find them - // Continue with next node, looking for activation (relu, clipped relu, tanh) - SN = findNextNodeInSequence(SN); - if (!SN) { - // Did not find a node sequence starting at N.Use current sequence. - break; - } - if (getPreferredTarget(SN) != StartNodePreferredTarget) { - break; // Node in sequence has different hint. Use current sequence. - } - SII = isValidHPVMTensorNode(SN); - - if ((SII->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu) || - (SII->getIntrinsicID() == Intrinsic::visc_tensor_relu) || - (SII->getIntrinsicID() == Intrinsic::visc_tensor_tanh)) { - // Successor is activation. Push this node to the current sequence. - CurrentNodeSequence.push_back(SN->getInstruction()); - - // Will continue, looking for pooling in the next node - SN = findNextNodeInSequence(SN); - if (!SN) { - break; // No node in sequence. Use currently found sequence. - } - if (getPreferredTarget(SN) != StartNodePreferredTarget) { - break; // Node in sequence has different hint. Use current sequence. - } - SII = isValidHPVMTensorNode(SN); - } //else {} // Look for pooling in this node - - if ((SII->getIntrinsicID() == Intrinsic::visc_tensor_pool_max) || - (SII->getIntrinsicID() == Intrinsic::visc_tensor_pool_min) || - (SII->getIntrinsicID() == Intrinsic::visc_tensor_pool_mean)) { - // Successor is a pool operation. Use currently found sequence. - CurrentNodeSequence.push_back(SN->getInstruction()); - } - } - break; - case Intrinsic::visc_tensor_mul: - { // Found beginning of pattern gemm-bias-activation. Look for the rest - CurrentNodeSequence.push_back(N->getInstruction()); - // Look for bias - DFNode *SN = findNextNodeInSequence(N); - if (!SN) { - return; // Did not find a node sequence starting at N. Simpy return. - } - if (getPreferredTarget(SN) != StartNodePreferredTarget) { - return; // Node in sequence has different hint. Simpy return. - } - IntrinsicInst *SII = isValidHPVMTensorNode(SN); - if (SII->getIntrinsicID() != Intrinsic::visc_tensor_add) { - // Successor is not the bias operation, thus does not fit the pattern. - return; - } - // Otherwise, push this node to the current sequence - CurrentNodeSequence.push_back(SN->getInstruction()); - // This is a possible fuse target, gemm-add. - // We need to reach the end of the function, where the found sequence - // is added. - - // If the next operation is activation, we fuse that as well. - // Continue with next node, looking for activation (relu, clipped relu, tanh) - SN = findNextNodeInSequence(SN); - if (SN) { - if (getPreferredTarget(SN) == StartNodePreferredTarget) { - SII = isValidHPVMTensorNode(SN); - if ((SII->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu) || - (SII->getIntrinsicID() == Intrinsic::visc_tensor_relu) || - (SII->getIntrinsicID() == Intrinsic::visc_tensor_tanh)) { - // We found activation in sequence. Push in vector as well. - CurrentNodeSequence.push_back(SN->getInstruction()); - } - } - } - } - break; - default: - DEBUG(errs() << "No pattern begins at this node\n"); - break; - } - - if (CurrentNodeSequence.size() != 0) { - // A sequence was found. Store the node sequence in FTs. - FTs.push_back(CurrentNodeSequence); - } - - return; -} - -bool FuseHPVMTensorNodesWrapper::runOnModule(Module &M) { - errs() << "\nFUSE HPVM TENSOR NODES PASS\n"; - -// Get the BuildDFG Analysis Results: - // - Dataflow graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - // Visitor for Fuse Target Detection Graph Traversal - FindFusionTargetsTraversal *FTTVisitor = - new FindFusionTargetsTraversal(M, DFG); - - errs() << "Find targets\n"; - // Iterate over all the DFGs and produce code for each one of them - for (auto rootNode: Roots) { - // Initiate code generation for root DFNode - FTTVisitor->visit(rootNode); - } - - FuseHPVMTensorNodes::FusionTargets &FTs = FTTVisitor->getFusionTargets(); - - FuseHPVMTensorNodes Fuse; -// Fuse.printFusionTargets(FTs); - - Fuse.run(M, FTs); - - delete FTTVisitor; - - return true; -} - -char FuseHPVMTensorNodesWrapper::ID = 0; -static RegisterPass<FuseHPVMTensorNodesWrapper> X("hpvm-fuse", - "Fuse HPVM Tensor Nodes Pass", - false /* does not modify the CFG */, - true /* transformation, not just analysis */); - -} // End of namespace - diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.exports b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.exports deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt b/llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt deleted file mode 100644 index 55a6ee5150..0000000000 --- a/llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/LocalMem/LLVMBuild.txt ------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = FuseHPVMTensorNodes -parent = Transforms diff --git a/llvm/lib/Transforms/InsertApproxInfo/CMakeLists.txt b/llvm/lib/Transforms/InsertApproxInfo/CMakeLists.txt deleted file mode 100644 index 2b6d41bd70..0000000000 --- a/llvm/lib/Transforms/InsertApproxInfo/CMakeLists.txt +++ /dev/null @@ -1,12 +0,0 @@ -if(WIN32 OR CYGWIN) - set(LLVM_LINK_COMPONENTS Core Support) -endif() - -add_llvm_loadable_module( InsertApproxInfo - InsertApproxInfo.cpp - - DEPENDS - intrinsics_gen - PLUGIN_TOOL - opt - ) diff --git a/llvm/lib/Transforms/InsertApproxInfo/InsertApproxInfo.cpp b/llvm/lib/Transforms/InsertApproxInfo/InsertApproxInfo.cpp deleted file mode 100644 index bde4ef8907..0000000000 --- a/llvm/lib/Transforms/InsertApproxInfo/InsertApproxInfo.cpp +++ /dev/null @@ -1,498 +0,0 @@ -//===------------------------ InPlaceDFGAnalysis.cpp ----------------------===// -// -// -// -// The LLVM Compiler Infrastructure -// -// -// -// This file is distributed under the University of Illinois Open Source -// -// License. See LICENSE.TXT for details. -// -// -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "InsertApproxInfo" - -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/SourceMgr.h" -#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" -#include "llvm/SupportVISC/DFG2LLVM.h" -#include "llvm/IR/InstrTypes.h" -#include <unordered_map> -#include <dirent.h> -#include <stdio.h> -#include <sstream> -#include <fstream> - - -using namespace llvm; -using namespace builddfg; -using namespace dfg2llvm; -using namespace inplacedfg; - - -namespace { - -static cl::opt<std::string> dir_name("results-dir", cl::desc(" Name of directory with Autotuner results ")); - - -struct ApproxMetrics{ - std::string op_name; - std::string category; - unsigned int rank; // rank given by autotuner - double approx_level; - // Relative L-norm metrics - double relative_l1; - double relative_l2; - double relative_linf; - // Mean L-norm metrics - double mean_l1; - double mean_l2; - double mean_linf; -}; - - - -struct InsertApproxInfoWrapperPass : public ModulePass { - static char ID; // Pass identification, replacement for typeid - InsertApproxInfoWrapperPass() : ModulePass(ID) {} - -public: - // Functions - bool runOnModule(Module &M); - void getAnalysisUsage(AnalysisUsage &AU) const; -}; - - -// Visitor for Code generation traversal (tree traversal for now) -class InsertApproxInfo : public CodeGenTraversal { - -private: - // Virtual Functions - void init() {} - void initRuntimeAPI() {} - void codeGen(DFInternalNode* N); - void codeGen(DFLeafNode* N); - void loadTrainedApproxMetrics(std::string dir_path); - void loadMetricsFromFile(std::string dir_path, std::string file_path, std::string category); - void loadMetricsFromDir(std::string dir_path, std::string category); - void readApproxValues(const std::string line, ApproxMetrics* approx_metrics); - void initIntrinsicNames(); - void initGlobalStrings(); - - // private data - std::unordered_map<std::string, std::string> intrinsics_map; - std::unordered_map<std::string, std::vector<ApproxMetrics*>> operation_metrics; - GlobalVariable* rank_str; - GlobalVariable* category_str; - GlobalVariable* mean_l1_str; - GlobalVariable* mean_l2_str; - GlobalVariable* mean_linf_str; - GlobalVariable* rel_l1_str; - GlobalVariable* rel_l2_str; - GlobalVariable* rel_linf_str; - - - // Tracks the id of the tensor op processed - unsigned int currentID; - -public: - // Constructor - InsertApproxInfo(Module &_M, BuildDFG &_DFG); - - //void run(Module &M, BuildDFG &DFG); - void run(std::string dir_path); - -}; - - - -void InsertApproxInfo::initIntrinsicNames(){ - - intrinsics_map["llvm.visc.tensor.convolution"] = "tensorConv"; - intrinsics_map["llvm.visc.tensor.mul"] = "tensorGemm"; - intrinsics_map["llvm.visc.tensor.add"] = "tensorAdd"; - intrinsics_map["llvm.visc.tensor.pool.max"] = "tensorPooling"; - intrinsics_map["llvm.visc.tensor.tanh"] = "tensorTanh"; -} - - -void InsertApproxInfo::initGlobalStrings(){ - - /**** Creating global constant strings for each approximation metric type *******/ - - std::string rank_string = "rank"; - Constant* stringConst = ConstantDataArray::getString(M.getContext(), StringRef(rank_string.c_str()), true); - rank_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - std::string category_string = "category"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(category_string.c_str()), true); - category_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - // Mean l-norm metrics - std::string metric_string = "mean_l1"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - mean_l1_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - metric_string = "mean_l2"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - mean_l2_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - metric_string = "mean_linf"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - mean_linf_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - // Relative l-norm metrics - metric_string = "rel_l1"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - rel_l1_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - metric_string = "rel_l2"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - rel_l2_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - - metric_string = "rel_linf"; - stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true); - rel_linf_str = new GlobalVariable(M, stringConst->getType(), true, - GlobalValue::ExternalLinkage, stringConst, ""); - -} - - -InsertApproxInfo::InsertApproxInfo(Module &_M, BuildDFG &_DFG) : - CodeGenTraversal(_M, _DFG){ - - currentID = 1; - - initIntrinsicNames(); - initGlobalStrings(); -} - - -void InsertApproxInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired<BuildDFG>(); - AU.addPreserved<BuildDFG>(); -} - - -bool InsertApproxInfoWrapperPass::runOnModule(Module &M) { - - std::string dir_path = dir_name.getValue(); - // Get the BuildDFG Analysis Results: - // - Dataflow graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - - InsertApproxInfo IApprox(M, DFG); - IApprox.run(dir_path); - - return false; -} - - -void InsertApproxInfo::readApproxValues(const std::string line, ApproxMetrics* approx_metrics){ - - std::istringstream in(line); - std::string op_name; - - float approx_level; - - float mean_l1; - float mean_l2; - float mean_linf; - - float relative_l1; - float relative_l2; - float relative_linf; - - in >> op_name; - in >> approx_level; - - in >> mean_l1; - in >> mean_l2; - in >> mean_linf; - - in >> relative_l1; - in >> relative_l2; - in >> relative_linf; - - printf("\n *** op_name = %s \n", op_name.c_str()); - printf("approx_level = %f \n", approx_level); - printf("relative_l1 = %f \n", relative_l1); - printf("relative_l2 = %f \n", relative_l2); - printf("relative_linf = %f \n", relative_linf); - printf("mean_l1 = %f \n", mean_l1); - printf("mean_l2 = %f \n", mean_l2); - printf("mean_linf = %f \n", mean_linf); - - approx_metrics->op_name = op_name; - approx_metrics->approx_level = approx_level; - approx_metrics->mean_l1 = mean_l1; - approx_metrics->mean_l2 = mean_l2; - approx_metrics->mean_linf = mean_linf; - approx_metrics->relative_l1 = relative_l1; - approx_metrics->relative_l2 = relative_l2; - approx_metrics->relative_linf = relative_linf; - -} - - -unsigned int getFileRank(std::string file_path){ - - char file_name[100]; // Assuming no file names greater than 100 chars - strcpy(file_name, file_path.c_str()); - - char* pch = strtok(file_name, "_"); - char* last_pch; - while(pch != NULL){ - last_pch = pch; - pch = strtok(NULL, "_"); - } - - printf("NOTE: ****** last_pch = %s \n", last_pch); - - size_t sz; - int rank = std::stoi(last_pch, &sz); - - return rank + 1; // NOTE: Adding 1 to start ranks with '1' -} - - - -void InsertApproxInfo::loadMetricsFromFile(std::string dir_path, std::string file_path, std::string category){ - - std::string full_path = dir_path + "/" + file_path; - printf("full_path = %s \n", full_path.c_str()); - std::ifstream infile(full_path.c_str()); - std::string line; - - unsigned int it_count = 0; - while(std::getline(infile, line)){ - - // Skip first line with confidence information - if(it_count > 0){ - std::vector<float> approx_values; - ApproxMetrics* approx_metrics = new ApproxMetrics; - readApproxValues(line, approx_metrics); - - approx_metrics->category = category; - unsigned int rank = getFileRank(file_path); - approx_metrics->rank = rank; - - std::string unique_op_name = approx_metrics->op_name + std::to_string(it_count); - operation_metrics[unique_op_name].push_back(approx_metrics); - printf("\n ** unique_op_name = %s \n", unique_op_name.c_str()); - } - - it_count++; - } - -} - - - -void InsertApproxInfo::loadMetricsFromDir(std::string dir_path, std::string category){ - - struct dirent* entry; - dir_path = dir_path + category; - - DIR* dir = opendir(dir_path.c_str()); - if(dir == NULL){ - printf("Directory %s not found . Aborting ... \n\n ", dir_path.c_str()); - abort(); - } - - while((entry = readdir(dir)) != NULL){ - printf("f_name = %s \n", entry->d_name); - std::string f_name = entry->d_name; - loadMetricsFromFile(dir_path, f_name, category); - } -} - - - -void InsertApproxInfo::loadTrainedApproxMetrics(std::string dir_path){ - - std::string root_path = dir_path + "/high_confidence/"; - loadMetricsFromDir(root_path, "linear"); - loadMetricsFromDir(root_path, "log"); - loadMetricsFromDir(root_path, "quad"); -} - - -/*** Methods of InPlaceDFGAnalysis ***/ -void InsertApproxInfo::run(std::string dir_path) { - - loadTrainedApproxMetrics(dir_path); - - errs() << "\n NOTE: ApproxInfo INSERTION TRANSFORM \n"; - std::vector<DFInternalNode*> Roots = DFG.getRoots(); - - // Iterate over all the DFGs - // Analyse the edges for parameters that are valid to be used in place - for (auto rootNode: Roots) { - //ATVisitor->visit(rootNode); - this->visit(rootNode); - } - - //delete ATVisitor; - return; -} - -/*** Analysis of internal node ***/ -void InsertApproxInfo::codeGen(DFInternalNode* N) { - DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n"); -} - -/*** Analysis of leaf node ***/ -void InsertApproxInfo::codeGen(DFLeafNode* N) { - DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n"); - - // Skip code generation if it is a dummy node - if(N->isDummyNode()) { - DEBUG(errs() << "Skipping dummy node\n"); - return; - } - - // Abort code generation if it is an allocation node - if(N->isAllocationNode()) { - assert(false && "Allocation Node not expected in ApproxHPVM"); - return; - } - - Function *F = N->getFuncPointer(); - Module* M = F->getParent(); - std::vector<IntrinsicInst *> IItoRemove; - - - /**** Adding operand bundles for each tensor operation in the HPVM DFG Leaf Node ****/ - for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { - Instruction *I = &(*i); - errs()<<*I<<"\n"; - - - if (BuildDFG::isViscIntrinsic(I)) { - IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); - assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") - && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); - - std::string intrinsic_id = std::string(II->getCalledFunction()->getName().data()); - std::string runtime_func_name = intrinsics_map[intrinsic_id]; - std::string unique_name = runtime_func_name + std::to_string(currentID); - printf("\n ---- unique_name = %s \n ", unique_name.c_str()); - std::vector<ApproxMetrics*> approx_metrics; - if(operation_metrics.find(unique_name) != operation_metrics.end()){ - approx_metrics = operation_metrics[unique_name]; - } - else{ - errs()<<"Intrinsic Name NOT found in the map - Unexpected Error. Aborting ... \n\n"; - abort(); - } - - - unsigned int num_configs = approx_metrics.size(); - std::vector<OperandBundleDef> conf_bundles; - for(unsigned int i = 0; i < num_configs; i++){ - std::vector<Value*> norm_vals; - - norm_vals.push_back(category_str); - Constant* categoryConst = ConstantDataArray::getString(M->getContext(), StringRef(approx_metrics[i]->category.c_str()), true); - GlobalVariable* category_value = new GlobalVariable(*M, categoryConst->getType(), true, - GlobalValue::ExternalLinkage, categoryConst, ""); - norm_vals.push_back(category_value); - - norm_vals.push_back(rank_str); - Constant* constIntVal = ConstantInt::get(Type::getInt32Ty(M->getContext()), approx_metrics[i]->rank); - norm_vals.push_back(constIntVal); - - // Adding mean l-norm metrics - norm_vals.push_back(mean_l1_str); - Constant* constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_l1); - norm_vals.push_back(constFPVal); - - norm_vals.push_back(mean_l2_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_l2); - norm_vals.push_back(constFPVal); - - norm_vals.push_back(mean_linf_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_linf); - norm_vals.push_back(constFPVal); - - // Relative l-norm Metrics - norm_vals.push_back(rel_l1_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_l1); - norm_vals.push_back(constFPVal); - - norm_vals.push_back(rel_l2_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_l2); - norm_vals.push_back(constFPVal); - - norm_vals.push_back(rel_linf_str); - constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_linf); - norm_vals.push_back(constFPVal); - - - std::string config_name = "config_" + std::to_string(i+1); - OperandBundleDef norm_bundle(config_name, norm_vals); - - conf_bundles.push_back(norm_bundle); - } - - ArrayRef<OperandBundleDef> bundle_arr(conf_bundles); - - /*** Creating new Intrinsic call with Operand Bundles attached **/ - Function* calledFunction = II->getCalledFunction(); - unsigned num_args = II->getNumArgOperands(); - std::vector<Value*> args; - for(unsigned i = 0; i < num_args; i++){ - Value* argValue = II->getArgOperand(i); - args.push_back(argValue); - } - - CallInst* CI = CallInst::Create(calledFunction, - args, bundle_arr, "", II); - - errs()<<"NOTE: New CallInst = "<<*CI<<"\n"; - - II->replaceAllUsesWith(CI); - // Mark to remove at the end - IItoRemove.push_back(II); - - // Increment counter of op processed - currentID++; - } - } - - - for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), - re = IItoRemove.rend(); ri != re; ++ri) { - DEBUG(errs() << "Erasing: " << **ri << "\n"); - errs() << "Erasing: " << **ri << "\n"; - (*ri)->eraseFromParent(); - } - - -} - -char InsertApproxInfoWrapperPass::ID = 0; -static RegisterPass<InsertApproxInfoWrapperPass> X("insert-approxinfo", - "Pass to add approximation information (l-norm metrics) in the ApproxHPVM DFG", - false /* does not modify the CFG */, - false /* not transformation, just analysis */); - - - - - -} // End of namespace - diff --git a/llvm/lib/Transforms/InsertApproxInfo/LLVMBuild.txt b/llvm/lib/Transforms/InsertApproxInfo/LLVMBuild.txt deleted file mode 100644 index e9cf5afd4a..0000000000 --- a/llvm/lib/Transforms/InsertApproxInfo/LLVMBuild.txt +++ /dev/null @@ -1,21 +0,0 @@ -;===- ./lib/Transforms/LocalMem/LLVMBuild.txt ------------------*- Conf -*--===; -; -; The LLVM Compiler Infrastructure -; -; This file is distributed under the University of Illinois Open Source -; License. See LICENSE.TXT for details. -; -;===------------------------------------------------------------------------===; -; -; This is an LLVMBuild description file for the components in this subdirectory. -; -; For more information on the LLVMBuild system, please see: -; -; http://llvm.org/docs/LLVMBuild.html -; -;===------------------------------------------------------------------------===; - -[component_0] -type = Library -name = InsertApproxInfo -parent = Transforms -- GitLab