From d353e1e771421f5718fff3fb136955a6b2b392c3 Mon Sep 17 00:00:00 2001 From: Akash Kothari <akashk4@miranda.cs.illinois.edu> Date: Sun, 23 May 2021 17:03:05 -0500 Subject: [PATCH] Add NVDLA backend pass --- hpvm/lib/Transforms/CMakeLists.txt | 1 + hpvm/lib/Transforms/HPVM2NVDLA/CMakeLists.txt | 34 + .../Transforms/HPVM2NVDLA/HPVM2NVDLA.exports | 0 .../Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp | 1652 +++++++++++++++++ hpvm/lib/Transforms/HPVM2NVDLA/LLVMBuild.txt | 21 + 5 files changed, 1708 insertions(+) create mode 100644 hpvm/lib/Transforms/HPVM2NVDLA/CMakeLists.txt create mode 100644 hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLA.exports create mode 100644 hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp create mode 100644 hpvm/lib/Transforms/HPVM2NVDLA/LLVMBuild.txt diff --git a/hpvm/lib/Transforms/CMakeLists.txt b/hpvm/lib/Transforms/CMakeLists.txt index b18cd4551b..296e3f87cd 100644 --- a/hpvm/lib/Transforms/CMakeLists.txt +++ b/hpvm/lib/Transforms/CMakeLists.txt @@ -6,5 +6,6 @@ add_subdirectory(GenHPVM) add_subdirectory(LocalMem) add_subdirectory(DFG2LLVM_WrapperAPI) add_subdirectory(DFG2LLVM_CUDNN) +add_subdirectory(HPVM2NVDLA) add_subdirectory(FuseHPVMTensorNodes) add_subdirectory(InPlaceDFG) diff --git a/hpvm/lib/Transforms/HPVM2NVDLA/CMakeLists.txt b/hpvm/lib/Transforms/HPVM2NVDLA/CMakeLists.txt new file mode 100644 index 0000000000..2e82ec555a --- /dev/null +++ b/hpvm/lib/Transforms/HPVM2NVDLA/CMakeLists.txt @@ -0,0 +1,34 @@ +if(WIN32 OR CYGWIN) + set(LLVM_LINK_COMPONENTS Core Support) +endif() + +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLLVM_BUILD_DIR=${CMAKE_BINARY_DIR}") + +add_definitions(-DNVDLA_UTILS_ERROR_TAG="DLA") + +include_directories(../../../sw/umd/external/include) +include_directories(../../../sw/umd/core/include) +include_directories(../../../sw/umd/core/src/common/include) +include_directories(../../../sw/umd/core/src/compiler/include) + +add_llvm_library( LLVMHPVM2NVDLAPass + MODULE + HPVM2NVDLAPass.cpp + + DEPENDS + intrinsics_gen + PLUGIN_TOOL + opt + ) + +find_library(NVDLA_COMPILER + NAMES nvdla_compiler + HINTS ../../../sw/lib +) + +find_library(PROTOBUF + NAMES protobuf + HINTS ../../../sw/lib +) + +target_link_libraries(LLVMHPVM2NVDLAPass ${NVDLA_COMPILER} ${PROTOBUF}) diff --git a/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLA.exports b/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLA.exports new file mode 100644 index 0000000000..e69de29bb2 diff --git a/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp b/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp new file mode 100644 index 0000000000..e3b8f5dfc9 --- /dev/null +++ b/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp @@ -0,0 +1,1652 @@ +#define ENABLE_ASSERTS + +#define DEBUG_TYPE "DFG2NVDLA" + +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/IR/Attributes.h" +#include "llvm/ADT/STLExtras.h" + +#include "SupportHPVM/DFG2LLVM.h" + +#include <sstream> +#include <fstream> +#include <vector> +#include <map> + +#include "dlaerror.h" +#include "dlatypes.h" + +#include "nvdla/IRuntime.h" +#include "DlaImageUtils.h" + +#include "ErrorMacros.h" +#include "nvdla_inf.h" +#include "nvdla_os_inf.h" +#include "nvdla/IType.h" +#include "nvdla/ITensor.h" +#include "nvdla/INetwork.h" +#include "nvdla/ILayer.h" +#include "nvdla/IProfiler.h" +#include "nvdla/IProfile.h" +#include "nvdla/ICompiler.h" +#include "nvdla/ILoadable.h" +#include "nvdla/IWisdom.h" + +#include "rapidjson/document.h" +#include "rapidjson/filereadstream.h" +#include "rapidjson/error/en.h" +#include "half.h" + +using namespace llvm; +using namespace builddfg; +using namespace dfg2llvm; + +using namespace nvdla; + +typedef half_float::half float16; + +static cl::opt<std::string> ComputePrecision("cprecision", + cl::desc("Compute precision (int8 or fp16)."), cl::init("float16")); + +static cl::opt<std::string> CalibTablePath("calib-table", + cl::desc("Path to tensor scales file"), + cl::value_desc("filename"), cl::Required); + + +#define DEFAULT_BATCH_SIZE 0 +#define DEFAULT_DATA_FMT nvdla::DataFormat::NCHW +#define DEFAULT_QUANT_MODE nvdla::QuantizationMode::NONE +#define TARGET_CONFIG_NAME "nv_full" +#define TEST_PARAM_FILE_MAX_SIZE 65536 + +struct HPVM2NVDLA : public ModulePass { + static char ID; // Pass identification, replacement for typeid + HPVM2NVDLA() : ModulePass(ID) {} + +public: + // Functions + virtual bool runOnModule(Module &M); + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<BuildDFG>(); + AU.addPreserved<BuildDFG>(); + } + +private: + //bool transformHPVM2NVDLA(Module &M); + + //void codeGenHPVM2NVDLA(CGT_NVDLA *, DFNode *); +}; + +struct TestAppArgs +{ + std::string project; + std::string inputPath; + std::string inputName; + std::string outputPath; + std::string testname; + std::string testArgs; + std::string prototxt; // This should be folded into testArgs + std::string caffemodel; // This should be folded into testArgs + std::string cachemodel; // This should be folded into testArgs + + std::string profileName; // ok here? + std::string profileFile; + std::string configtarget; + std::string calibTable; + nvdla::QuantizationMode quantizationMode; + + Module *M; + std::vector<DFInternalNode *> *Roots; + + NvU16 numBatches; + nvdla::DataFormat inDataFormat; + nvdla::DataType computePrecision; + + std::map<std::string, NvF32> tensorScales; +}; + +struct TestInfo +{ + // common + nvdla::IWisdom* wisdom; + std::string wisdomPath; + + // parse + std::string modelsPath; + std::string profilesPath; + std::string calibTablesPath; + + // runtime + // nvdla::IRuntime* runtime; + nvdla::ILoadable* compiledLoadable; + NvU8 *pData; + //std::string inputImagesPath; + //std::string inputLoadablePath; + // std::map<std::string, NvDlaImage*> inputImages; + // std::map<std::string, void *> inputBuffers; + // std::map<std::string, NvDlaImage*> outputImages; + // std::map<std::string, void *> outputBuffers; + // std::vector<SubmitContext*> submits; + NvU32 timeout; + NvU16 numBatches; // runtime's point-of-view + NvU32 numSubmits; +}; + +static TestAppArgs defaultTestAppArgs = +{ + /* .project = */ "OpenDLA", + /* .inputPath = */ "./", + /* .inputName = */ "", + /* .outputPath = */ "./", + /* .testname = */ "", + /* .testArgs = */ "", + /* .prototxt = */ "", + /* .caffemodel = */ "", + /* .cachemodel = */ "", + /* .profileName = */ "fast-math", + /* .profileFile = */ "", + /* .configtarget = */ TARGET_CONFIG_NAME, + /* .calibtable = */ "", + /* .quantizationMode = */ DEFAULT_QUANT_MODE, + nullptr, nullptr, + /* .numBatches = */ DEFAULT_BATCH_SIZE, + /* .inDataFormat = */ DEFAULT_DATA_FMT, + /* .computePrecision = */ nvdla::DataType::INT8 +}; + +char HPVM2NVDLA::ID = 0; +static RegisterPass<HPVM2NVDLA> X("hpvm-nvdla", + "Dataflow Graph to NVDLA IR Pass", + false, false); + + +// Visitor for Code generation traversal of HPVM IR +class CGT_NVDLA : public CodeGenTraversal { +private: + // Data information + //DataFormat InDataFormat; + //DataType ComputePrecision; + //QuantizationMode Quantization; + //NvU16 NumBatches; + + // Wisdom and network information + IWisdom *Wisdom; + INetwork *Network; + + std::map<std::string, int> LayerNameMap; + + // Maps dataflow edges in HPVM IR to Tensors in NVDLA IR + DenseMap<const DFEdge *, ITensor *> EdgeToTensorMap; + + // Virtual Functions + void init(); + void initRuntimeAPI(); + void codeGen(DFInternalNode* N); + void codeGen(DFLeafNode* N); + + // Codegen functions for all supported layers + void generateConvolutionLayer(DFLeafNode *, const IntrinsicInst *); + void generatePoolingLayer(DFLeafNode *, const IntrinsicInst *); + void generateBatchNormLayer(DFLeafNode *, const IntrinsicInst *); + void generateReluLayer(DFLeafNode *, const IntrinsicInst *); + void generateGemmLayer(DFLeafNode *, const IntrinsicInst *); + void generateSoftMaxLayer(DFLeafNode *, const IntrinsicInst *); + void generateTanhLayer(DFLeafNode *, const IntrinsicInst *); + + // Map edges to output tensors + void mapOutputTensor(DFNode *N, ITensor *Tensor); + + // Get input tensors to nodes + ITensor *getIntermediateInputTensor(DFNode *N); + + // Get binding tensors to nodes + User *getBindingTensor(DFLeafNode* N, unsigned index); + + // Get the input NVDLA tensors to nodes + ITensor *getNVDLAInputTensor(DFLeafNode* N, const User *InputTensor); + + // Get index for an input tensor + unsigned getInputIndex(DFLeafNode* N, const IntrinsicInst *II); + + // Gets nodes with add ops meant to be combined with convolution and gemm + void getaddOpSucceedsNode(DFNode *N, SmallVector<DFLeafNode *, 4> &AddNodes, + SmallVector<IntrinsicInst *, 4> &AddInsts); + + // Getting weights + Weights readTrainedWeights(User *TensorPtr, + int dim1_size, int dim2_size, + int dim3_size, int dim4_size); + + // Identify outputs + unsigned identifyOutputs(); + + // Generate profile based on data parameters + //void generateProfile(std::string &, std::string &); + +std::string getLayerName(std::string Name); + +public: + + CGT_NVDLA(Module &_M, BuildDFG &_DFG) + : CodeGenTraversal(_M, _DFG) {// : Network(nullptr) { + //initRuntimeAPI(); + init(); + } + + //void destroySetUp(); + + //void setUpWisdom(); + + //void compileProfile(); + + //void transformHPVM2NVDLA(DFNode *); + + NvDlaError generateTensorScales(const TestAppArgs*, TestInfo*, nvdla::INetwork*); + + NvDlaError updateProfileWithCmdLineArgs(const TestAppArgs*, TestInfo*, const char*, nvdla::DataFormat); + + NvDlaError beginWithNamedProfile(const TestAppArgs*, TestInfo*); + + NvDlaError generateProfile(const TestAppArgs*, std::string*, TestInfo*); + + NvDlaError compileProfile(const TestAppArgs*, TestInfo*); + + NvDlaError launchTest(const TestAppArgs*); + + NvDlaError testSetup(const TestAppArgs*, TestInfo*); + + NvDlaError parseAndCompile(const TestAppArgs*, TestInfo*); + + NvDlaError transformHPVM2NVDLA(const TestAppArgs*, TestInfo*); + + NvDlaError parseSetup(const TestAppArgs*, TestInfo*); + + NvDlaError readTensorScales(const TestAppArgs* appArgs, TestInfo *i, nvdla::INetwork* network); +}; + +void CGT_NVDLA::init() { + // Default paramters + //InDataFormat = DataFormat::NCHW; + //ComputePrecision = DataType::FLOAT; + //Quantization = QuantizationMode::NONE; + //NumBatches = 0; +} + +void CGT_NVDLA::initRuntimeAPI() { + // Nothing to do here! +} + +Weights CGT_NVDLA::readTrainedWeights(User *TensorPtr, + int dim1_size, int dim2_size, + int dim3_size, int dim4_size) { + DEBUG(errs() << "READ TRAINED WEIGHTS\n"); + // Get weights file name + User *MemcpyPtr = dyn_cast<User>(TensorPtr->getOperand(0)); + DEBUG(MemcpyPtr->print(errs())); + DEBUG(errs() << "\n"); + while(!dyn_cast<AllocaInst>(MemcpyPtr)) { + MemcpyPtr = dyn_cast<User>(MemcpyPtr->getOperand(0)); + } + User *MemcpyArg = nullptr; + for(User *U: MemcpyPtr->users()) { + DEBUG(U->print(errs())); + DEBUG(errs() << "\n"); + if(auto *BCO = dyn_cast<BitCastOperator>(U)) { + for(User *CU: BCO->users()) { + if(auto *CI = dyn_cast<CallInst>(CU)) { + CI->getCalledFunction()->getName().contains(StringRef("memcpy")); + MemcpyArg = dyn_cast<User>(CI->getOperand(1)); + break; + } + } + if(MemcpyArg) + break; + } + } + assert(MemcpyArg && "File name not found."); + auto *WeightFileName = dyn_cast<GlobalVariable>(MemcpyArg->getOperand(0)); + assert(WeightFileName && "Weight file name must be a global variable."); + auto* CDA = dyn_cast<ConstantDataArray>(WeightFileName->getInitializer()); + assert(CDA && "Weight file name must be a constant array."); + const auto &file_name = std::string(CDA->getAsString()); + + // Read the weights file + int num_elem = dim1_size * dim2_size * dim3_size * dim4_size; + int size_in_bytes = sizeof(float16) * num_elem; + //DEBUG(errs() << "float16 size: " << sizeof(float16) << "\n"); + DEBUG(errs() << "size in bytes: " << size_in_bytes << "\n"); + void *tensor_data = (void *) malloc(size_in_bytes); + int file_header_size = 0; + DEBUG(errs() << "FILE NAME: " << file_name << "\n"); + FILE *file = fopen(file_name.c_str(), "rb"); + if(!file) { + DEBUG(errs() << "Data file is not found. Aborting.\n"); + abort(); + } + fseek(file, file_header_size, SEEK_CUR); + size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); + DEBUG(errs() << "BYTES READ: " << bytes_read << "\n"); + fclose(file); + + // Create weight tensors + auto Weight = Weights(DataType::HALF, tensor_data, NvS64(num_elem)); + //FILE *try_file = fopen("temp.bin", "wb"); + //fwrite(Weight.values, sizeof(float), num_elem, try_file); + //fclose(try_file); + //exit(-1); + return Weight; +} + +// For a tensor to be a input weight tensor, it has to come from the root node +User *CGT_NVDLA::getBindingTensor(DFLeafNode* N, unsigned index) { + // HPVM internal API needs fixing. Remove this lambda function when bug is fixed. + auto NodeIsRoot = [](DFNode &InternalNode) { + auto *RootFunction = InternalNode.getFuncPointer(); + for(User *U: RootFunction->users()) { + DEBUG(errs() << "USER FOR INTERNAL NODE IN LAMBDA FUNCTION: "); + DEBUG(U->print(errs())); + DEBUG(errs() << "\n"); + auto *II = dyn_cast<IntrinsicInst>(U); + if(!II) { + auto *BCI = dyn_cast<BitCastOperator>(U); + assert(BCI && "Not a bitcast instruction."); + for(User *BCU : BCI->users()) { + DEBUG(errs() << "USER FOR INTERNAL NODE IN LAMBDA FUNCTION: "); + DEBUG(BCU->print(errs())); + DEBUG(errs() << "\n"); + II = dyn_cast<IntrinsicInst>(BCU); + if(II) + break; + } + } + if(II && (II->getIntrinsicID() == Intrinsic::hpvm_launch)) { + DEBUG(errs() << "LAUNCH FUNCTION: "); + DEBUG(II->print(errs())); + DEBUG(errs() << "LAMBDA FUNCTION RETURN TRUE\n"); + return true; + } + } + DEBUG(errs() << "LAMBDA FUNCTION RETURN FALSE\n"); + return false; + }; + + auto NodeIsLeaf = [](DFNode &Node) { + auto *NodeFunction = Node.getFuncPointer(); + for(User *U: NodeFunction->users()) { + DEBUG(errs() << "USER FOR INTERNAL NODE IN LAMBDA FUNCTION: "); + DEBUG(U->print(errs())); + DEBUG(errs() << "\n"); + auto *II = dyn_cast<IntrinsicInst>(U); + if(!II) { + auto *BCI = dyn_cast<BitCastOperator>(U); + assert(BCI && "Not a bitcast instruction."); + for(User *BCU : BCI->users()) { + DEBUG(errs() << "USER FOR INTERNAL NODE IN LAMBDA FUNCTION: "); + DEBUG(BCU->print(errs())); + DEBUG(errs() << "\n"); + II = dyn_cast<IntrinsicInst>(BCU); + if(II) + break; + } + } + if(II + && (II->getIntrinsicID() == Intrinsic::hpvm_createNode + || II->getIntrinsicID() == Intrinsic::hpvm_createNode1D + || II->getIntrinsicID() == Intrinsic::hpvm_createNode2D + || II->getIntrinsicID() == Intrinsic::hpvm_createNode3D)) { + DEBUG(errs() << "CREATE NODE FUNCTION: "); + DEBUG(II->print(errs())); + DEBUG(errs() << "LAMBDA FUNCTION RETURN TRUE\n"); + + // Ensure that the node function does not have these create node intrinsics + for(inst_iterator i = inst_begin(NodeFunction), + e = inst_end(NodeFunction); i != e; ++i) { + Instruction *I = &(*i); + if(auto *II = dyn_cast<IntrinsicInst>(I)) { + if(II->getIntrinsicID() == Intrinsic::hpvm_createNode + || II->getIntrinsicID() == Intrinsic::hpvm_createNode1D + || II->getIntrinsicID() == Intrinsic::hpvm_createNode2D + || II->getIntrinsicID() == Intrinsic::hpvm_createNode3D) { + DEBUG(errs() << "--LAMBDA FUNCTION RETURN FALSE\n"); + return false; + } + } + + } + return true; + } + } + DEBUG(errs() << "LAMBDA FUNCTION RETURN FALSE\n"); + return false; + }; + + DEBUG(errs() << "GET BINDING TENSOR\n"); + DEBUG(errs() << "GIVEN INDEX: " << index << "\n"); + DFEdge *DE = N->getInDFEdgeAt(index); + assert(DE && "Data edge does not exist at given index"); + DEBUG(errs() << "LEAF NODE FUNCTION: " << N->getFuncPointer()->getName() << "\n"); + // Get the argument position in the root node. + DEBUG(errs() << "GET TO THE ROOT FIRST\n"); + auto *InternalNode = DE->getSourceDF(); + DEBUG(errs() << "INTERNAL NODE FUNCTION: " << InternalNode->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "INTERNAL NDOE POINTER: " << InternalNode << "\n"); + if(NodeIsLeaf(*InternalNode)) { + DEBUG(errs() << "BIND NONE: EDGE FROM LEAF NODE\n"); + return nullptr; + } + unsigned argPos = DE->getSourcePosition(); + DEBUG(errs() << "ARG POSITION BEFORE LOOP: " << argPos << "\n"); + while(!NodeIsRoot(*InternalNode)) { + DEBUG(errs() << "IN LOOP\n"); + if(NodeIsLeaf(*InternalNode)) { + DEBUG(errs() << "IN LOOP BIND NONE: EDGE FROM LEAF NODE\n"); + return nullptr; + } + argPos = DE->getSourcePosition(); + DE = InternalNode->getInDFEdgeAt(argPos); + if(!DE) { + DEBUG(errs() << "NO BINDING EDGE IN LOOP\n"); + // No binding edge. + return nullptr; + } + InternalNode = DE->getSourceDF(); + DEBUG(errs() << "INTERNAL NODE FUNCTION IN LOOP: " << InternalNode->getFuncPointer()->getName() << "\n"); + DEBUG(errs() << "IN LOOP DATA EDGE: " << DE << "\n"); + DEBUG(errs() << "IN LOOP ARG POSITION: " << argPos << "\n"); + } + DEBUG(errs() << "ARG POSITION: " << argPos << "\n"); + + DEBUG(errs() << "GET THE LAUNCH FUNCTION\n"); + // Now we have the root node. We need to get the launch functions for it. + auto *RootFunction = InternalNode->getFuncPointer(); + for(User *U: RootFunction->users()) { + DEBUG(errs() << "User for root: "); + DEBUG(U->print(errs())); + IntrinsicInst *II = dyn_cast<IntrinsicInst>(U); + if(!II) { + auto *BCI = dyn_cast<BitCastOperator>(U); + assert(BCI && "Not a bitcast instruction."); + for(User *BCU : BCI->users()) { + II = dyn_cast<IntrinsicInst>(BCU); + if(II) + break; + } + } + assert(II && (II->getIntrinsicID() == Intrinsic::hpvm_launch) + && "Use of a root node must be in launch function call instrinsic."); + DEBUG(errs() << "LAUNCH FUNCTION: "); + DEBUG(II->print(errs())); + + // Now, get the the arguments to the root and get element pointer to argument structure. + auto *ArgObj = dyn_cast<Instruction>(II->getOperand(1)); + if(auto *BCO = dyn_cast<BitCastOperator>(ArgObj)) { + ArgObj = dyn_cast<Instruction>(BCO->getOperand(0)); + } else if (auto *CI = dyn_cast<CallInst>(ArgObj)) { + for(User *CIU : CI->users()) { + auto *BCO = dyn_cast<BitCastOperator>(CIU); + if(BCO) { + ArgObj = dyn_cast<Instruction>(BCO->getOperand(0)); + break; + } + } + } else if (auto *AI = dyn_cast<AllocaInst>(ArgObj)) { + for(User *AIU : AI->users()) { + auto *BCO = dyn_cast<BitCastOperator>(AIU); + if(BCO) { + ArgObj = dyn_cast<Instruction>(BCO->getOperand(0)); + break; + } + } + } + auto *ArgObjPtrType = dyn_cast<PointerType>(ArgObj->getType()); + auto *ArgObjType = dyn_cast<StructType>(ArgObjPtrType->getElementType()); + assert(ArgObjType && "Arguments to launch is a structure."); + DEBUG(errs() << "ARG OBJ: "); + DEBUG(ArgObj->print(errs())); + DEBUG(errs() << "\n"); + + // Use the offset into the structure to get the source tensor. + const auto &DL = ArgObj->getParent()->getParent()->getParent()->getDataLayout(); + const auto *SL = DL.getStructLayout(ArgObjType); + uint64_t ElementOffset = SL->getElementOffset(argPos); + DEBUG(errs() << "ELEMENT OFFSET: " << ElementOffset << "\n"); + Instruction *StructElemPtr = nullptr; + for(User *U: ArgObj->users()) { + if(auto *GI = dyn_cast<GetElementPtrInst>(U)) { + auto *Offset = dyn_cast<ConstantInt>(GI->getOperand(2)); + assert(Offset && "Offset is not constant."); + if(Offset->getZExtValue() == argPos) {//ElementOffset) { + StructElemPtr = GI; + break; + } + } + } + assert(StructElemPtr && "No getelementptr found with given offset."); + DEBUG(StructElemPtr->print(errs())); + DEBUG(errs() << "\n"); + DEBUG(errs() << "USE THE STORES TO GET THE BIND TENSOR\n"); + // Get store to the element of argument structure to get the pointer to tensor. + for(User *GIU: StructElemPtr->users()) { + DEBUG(GIU->print(errs())); + DEBUG(errs() << "\n"); + if(auto *BCO = dyn_cast<BitCastOperator>(GIU)) { + DEBUG(BCO->print(errs())); + DEBUG(errs() << "\n"); + for(User *BCU : BCO->users()) { + if(auto *SI = dyn_cast<StoreInst>(BCU)) { + // Get the tensor pointer + DEBUG(SI->print(errs())); + DEBUG(errs() << "\n"); + auto *Val = SI->getValueOperand(); + if(auto *BCO = dyn_cast<BitCastOperator>(Val)) { + return dyn_cast<User>(BCO->getOperand(0)); + } + return dyn_cast<User>(Val); + } + } + } + if(auto *SI = dyn_cast<StoreInst>(GIU)) { + // Get the tensor pointer + DEBUG(SI->print(errs())); + auto *Val = SI->getValueOperand(); + if(auto *BCO = dyn_cast<BitCastOperator>(Val)) { + return dyn_cast<User>(BCO->getOperand(0)); + } + return dyn_cast<User>(Val); + } + } + } + return nullptr; +} + + +void CGT_NVDLA::mapOutputTensor(DFNode *N, ITensor *Tensor) { + for(int i = 0; i < N->outdfedge_size(); i++) + EdgeToTensorMap[N->getOutDFEdgeAt(i)] = Tensor; +} + +ITensor *CGT_NVDLA::getIntermediateInputTensor(DFNode *N) { + return EdgeToTensorMap[N->getInDFEdgeAt(0)]; +} + +void CGT_NVDLA::getaddOpSucceedsNode(DFNode *N, SmallVector<DFLeafNode *, 4> &AddNodes, + SmallVector<IntrinsicInst *, 4> &AddInsts) { + bool AddOpNodes = false; + for(int i = 0; i < N->outdfedge_size(); i++) { + auto *DestNode = N->getOutDFEdgeAt(i)->getDestDF(); + auto *F = DestNode->getFuncPointer(); + + // If the node is already cached in the list, no need to visit it + auto *Node = dyn_cast<DFLeafNode>(DestNode); + if(find(AddNodes, Node) != AddNodes.end()) + continue; + + // Add node to list if it contains add operation + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + Instruction *I = &(*i); + auto *II = dyn_cast<IntrinsicInst>(I); + if (II && II->getIntrinsicID() == Intrinsic::hpvm_tensor_add) { + AddNodes.push_back(Node); + AddInsts.push_back(II); + AddOpNodes = true; + break; + } + } + assert(((AddNodes.size() > 0) == AddOpNodes) + && "All destination nodes are adds or all of them are not."); + } +} + +ITensor *CGT_NVDLA::getNVDLAInputTensor(DFLeafNode* N, const User *InputBindingTensor) { + if(InputBindingTensor) { + auto *BatchesConst = dyn_cast<ConstantInt>(InputBindingTensor->getOperand(2)); + auto *ChannelsConst = dyn_cast<ConstantInt>(InputBindingTensor->getOperand(3)); + auto *HeightConst = dyn_cast<ConstantInt>(InputBindingTensor->getOperand(4)); + auto *WidthConst = dyn_cast<ConstantInt>(InputBindingTensor->getOperand(5)); + assert(HeightConst && WidthConst && ChannelsConst && BatchesConst + && "Number of input dimensions must be constants."); + + // Input dimensions + int InputW = WidthConst->getZExtValue(); + int InputH = HeightConst->getZExtValue(); + int InputC = ChannelsConst->getZExtValue(); + int InputN = BatchesConst->getZExtValue(); + + // Create a new input tensor + Dims4 dims(InputN, InputC, InputH, InputW); + return Network->addInput("", dims); + } + return getIntermediateInputTensor(N); +} + +unsigned CGT_NVDLA::getInputIndex(DFLeafNode* N, const IntrinsicInst *II) { + DEBUG(errs() << "GET INPUT INDEX\n"); + auto *F = N->getFuncPointer(); + DEBUG(errs()<<"function name = "<< F->getName()<<"\n"); + unsigned inputIndex = 0; + for(auto &Arg : F->args()) { + DEBUG(errs() << "ARGUMENT: "); + DEBUG((&Arg)->print(errs())); + DEBUG(errs() << "\n"); + if(II->getOperand(0) == &Arg) { + DEBUG(errs() << "INPUT: "); + DEBUG(II->getOperand(0)->print(errs())); + DEBUG(errs() << "\n"); + DEBUG(errs() << "INPUT INDEX: " << inputIndex << "\n"); + return inputIndex; + } + inputIndex++; + } + assert(false && "Illegal intrinsic or Node."); + return -1; // Keep compiler happy +} + +std::string CGT_NVDLA::getLayerName(std::string Name) { + DEBUG(errs() << "GET LAYER NAME\n"); + if(LayerNameMap.find(Name) == LayerNameMap.end()) { + LayerNameMap[Name] = 1; + } else { + LayerNameMap[Name]++; + } + return std::to_string(LayerNameMap[Name]); +} + +void CGT_NVDLA::generateConvolutionLayer(DFLeafNode* N, const IntrinsicInst *II) { + DEBUG(errs() << "*****CONVOLUTION LAYER*****\n"); + // FIXME: What is number of "groups". Setting it to 1 for now. + int numGroups = 1; + + // If the input tensor is not a binding tensor, it must be coming + // from an edge from a visted node, so use that to get number of outputs. + unsigned inputIndex = getInputIndex(N, II); + DEBUG(errs() << "INPUT INDEX: " << inputIndex << "\n"); + DEBUG(errs() << "GET INPUT TENSOR\n"); + auto *InputTensor = getBindingTensor(N, inputIndex); + DEBUG(errs() << "INPUT TENSOR: "); + ITensor *InputNVDLATensor = getNVDLAInputTensor(N, InputTensor); + + // Get the index for kernel tensor + auto *F = N->getFuncPointer(); + DEBUG(errs()<<"function name = "<< F->getName()<<"\n"); + unsigned kernelIndex = 0; + bool ArgFound = false; + for(auto &Arg : F->args()) { + if(II->getOperand(1) == &Arg) { + ArgFound = true; + break; + } + kernelIndex++; + } + assert(ArgFound && "Illegal intrinsic or Node."); + DEBUG(errs() << "KERNEL INDEX: " << kernelIndex << "\n"); + // Get the kernel tensor + DEBUG(errs() << "GET KERNEL TENSOR\n"); + auto *KernelTensor = getBindingTensor(N, kernelIndex); + assert(KernelTensor && "Kernel tensors are always binds."); + + // Get kernel constants + auto *KernelWConst = dyn_cast<ConstantInt>(KernelTensor->getOperand(5)); + auto *KernelHConst = dyn_cast<ConstantInt>(KernelTensor->getOperand(4)); + auto *KernelCHConst = dyn_cast<ConstantInt>(KernelTensor->getOperand(3)); + auto *KernelNConst = dyn_cast<ConstantInt>(KernelTensor->getOperand(2)); + assert(KernelWConst && KernelHConst && KernelCHConst && KernelNConst + && "Kernel dimensions must be constants."); + int kernelW = KernelWConst->getZExtValue(); + int kernelH = KernelHConst->getZExtValue(); + int kernelC = KernelCHConst->getZExtValue(); + int kernelN = KernelNConst->getZExtValue(); + DEBUG(errs() << "\nKERNEL H: " << kernelH << "\n"); + DEBUG(errs() << "KERNEL W: " << kernelW << "\n"); + DEBUG(errs() << "KERNEL C: " << kernelC << "\n"); + DEBUG(errs() << "KERNEL N: " << kernelN << "\n"); + + int numOutputs; + if(!InputTensor) { + DEBUG(errs() << "INPUT FROM EDGE\n"); + numOutputs = (InputNVDLATensor->getDimensions()).n * kernelN; + // (InputNVDLATensor->getDimensions()).c; + } else { + DEBUG(errs() << "INPUT FROM WEIGHT TENSOR\n"); + auto *BatchesConst = dyn_cast<ConstantInt>(InputTensor->getOperand(2)); + auto *ChannelsConst = dyn_cast<ConstantInt>(InputTensor->getOperand(3)); + numOutputs = BatchesConst->getZExtValue() * kernelN; + // ChannelsConst->getZExtValue(); + DEBUG(errs() << "NUM OUTPUTS: " << numOutputs << "\n"); + } + + // Get Strides + ConstantInt *StrideWConst = dyn_cast<ConstantInt>(II->getOperand(5)); + ConstantInt *StrideHConst = dyn_cast<ConstantInt>(II->getOperand(4)); + assert((StrideWConst && StrideHConst) && "Strides must be constants."); + int strideW = StrideWConst->getZExtValue(); + int strideH = StrideHConst->getZExtValue(); + DEBUG(errs() << "STRIDE H: " << strideH << "\n"); + DEBUG(errs() << "STRIDE W: " << strideW << "\n"); + + // Get pads + ConstantInt *PadWConst = dyn_cast<ConstantInt>(II->getOperand(3)); + ConstantInt *PadHConst = dyn_cast<ConstantInt>(II->getOperand(2)); + assert((PadWConst && PadHConst) && "Pads must be constants."); + int padW = PadWConst->getZExtValue(); + int padH = PadHConst->getZExtValue(); + DEBUG(errs() << "PAD H: " << padH << "\n"); + DEBUG(errs() << "PAD W: " << padW << "\n"); + + // FIXME: Support dilations. Set dilations to 1 since we do not have dilation support yet. + int dilationW = 1; + int dilationH = 1; + + // Get the nodes with Add operations + SmallVector<DFLeafNode *, 4> AddOpNodes; + SmallVector<IntrinsicInst *, 4> AddInsts; + getaddOpSucceedsNode(N, AddOpNodes, AddInsts); + assert((!(AddOpNodes.size() > 1)) + && "Number of nodes with Add ops must not be more than 1"); + + // Get bias parameters + int BiasW, BiasH, BiasC, BiasN; + User *BiasTensor = nullptr; + BiasMode biasMode = BiasMode::bNONE; + if(AddOpNodes.size()) { + // Get the index for bias tensor + auto *AddNode = AddOpNodes[0]; + auto *AddInst = AddInsts[0]; + DEBUG(AddInst->print(errs())); + auto *F = AddNode->getFuncPointer(); + unsigned BiasIndex = 0; + ArgFound = false; + for(auto &Arg : F->args()) { + if(AddInst->getOperand(1) == &Arg) { + ArgFound = true; + break; + } + BiasIndex++; + } + assert(ArgFound && "Illegal intrinsic or Node."); + + // Get the bias tensor + DEBUG(errs() << "BIAS INDEX: " << BiasIndex << "\n"); + DEBUG(errs() << "BIAS TENSOR\n"); + BiasTensor = getBindingTensor(AddNode, BiasIndex); + assert(BiasTensor && "Bias tensors are always binds."); + + // Get Bias constants + auto *BiasWConst = dyn_cast<ConstantInt>(BiasTensor->getOperand(5)); + auto *BiasHConst = dyn_cast<ConstantInt>(BiasTensor->getOperand(4)); + auto *BiasCHConst = dyn_cast<ConstantInt>(BiasTensor->getOperand(3)); + auto *BiasNConst = dyn_cast<ConstantInt>(BiasTensor->getOperand(2)); + assert(BiasWConst && BiasHConst && BiasCHConst && BiasNConst + && "Bias dimensions must be constants."); + BiasW = BiasWConst->getZExtValue(); + BiasH = BiasHConst->getZExtValue(); + BiasC = BiasCHConst->getZExtValue(); + BiasN = BiasNConst->getZExtValue(); + DEBUG(errs() << "BIAS H: " << BiasH << "\n"); + DEBUG(errs() << "BIAS W: " << BiasW << "\n"); + DEBUG(errs() << "BIAS C: " << BiasC << "\n"); + DEBUG(errs() << "BIAS N: " << BiasN << "\n"); + + // Get bias mode + //if(kernelN == numOutputs) + biasMode = BiasMode::bCHANNEL; + //else + // biasMode = BiasMode::bUNIFORM; + } + + // Get weights + Weights kernelWeights = readTrainedWeights(KernelTensor, kernelN, kernelC, kernelH, kernelW); + Weights biasWeights = AddOpNodes.size() == 1 ? + readTrainedWeights(BiasTensor, BiasN, BiasC, BiasH, BiasW) + : Weights(DataType::HALF, nullptr, 0); + + Dims2 tlPadding = Dims2(padH, padW); + Dims2 brPadding = Dims2(padH, padW); + Dims2 stride = Dims2(strideH, strideW); + Dims2 dilation = Dims2(dilationH, dilationW); + Dims2 kernelSize = Dims2(kernelH, kernelW); + + auto *Layer = Network->addConvolution(InputNVDLATensor, numOutputs, 0, + kernelSize, tlPadding, brPadding, stride, dilation, + kernelWeights, biasWeights, biasMode, numGroups); + if(AddOpNodes.size()) { + auto *Node = AddOpNodes[0]; + mapOutputTensor(Node, Layer->getOutput(0)); + } else { + mapOutputTensor(N, Layer->getOutput(0)); + } + Layer->setName((std::string("conv") + getLayerName(std::string("conv"))).c_str()); + DEBUG(errs() << Layer->getName() << "\n"); +} + +void CGT_NVDLA::generatePoolingLayer(DFLeafNode* N, const IntrinsicInst *II) { + DEBUG(errs() << "*****POOLING LAYER*****\n"); + // Get input tensor + unsigned inputIndex = getInputIndex(N, II); + auto *InputTensor = getBindingTensor(N, inputIndex); + ITensor *InputNVDLATensor = getNVDLAInputTensor(N, InputTensor); + + // Get window dimensions + ConstantInt *KernelWConst = dyn_cast<ConstantInt>(II->getOperand(2)); + ConstantInt *KernelHConst = dyn_cast<ConstantInt>(II->getOperand(1)); + assert((KernelWConst && KernelHConst) && "Kernel dimensions must be constants."); + int kernelH = KernelHConst->getZExtValue(); + int kernelW = KernelWConst->getZExtValue(); + DEBUG(errs() << "KERNEL H: " << kernelH << "\n"); + DEBUG(errs() << "KERNEL W: " << kernelW << "\n"); + + // Get Strides + ConstantInt *StrideWConst = dyn_cast<ConstantInt>(II->getOperand(6)); + ConstantInt *StrideHConst = dyn_cast<ConstantInt>(II->getOperand(5)); + assert((StrideWConst && StrideHConst) && "Strides must be constants."); + int strideH = StrideHConst->getZExtValue(); + int strideW = StrideWConst->getZExtValue(); + DEBUG(errs() << "STRIDE H: " << strideH << "\n"); + DEBUG(errs() << "STRIDE W: " << strideW << "\n"); + + // Get pads + ConstantInt *PadWConst = dyn_cast<ConstantInt>(II->getOperand(4)); + ConstantInt *PadHConst = dyn_cast<ConstantInt>(II->getOperand(3)); + assert((PadWConst && PadHConst) && "Pads must be constants."); + int padH = PadHConst->getZExtValue(); + int padW = PadWConst->getZExtValue(); + DEBUG(errs() << "PAD H: " << padH << "\n"); + DEBUG(errs() << "PAD W: " << padW << "\n"); + + Dims2 windowSize = Dims2(kernelH, kernelW); + Dims2 stride = Dims2(strideH, strideW); + Dims2 tlPadding = Dims2(padH, padW); + Dims2 brPadding = Dims2(padH, padW); + + PoolingType type = (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) ? + PoolingType::kAVERAGE : PoolingType::kMAX; + + auto *Layer = Network->addPooling(InputNVDLATensor, type, + windowSize, stride, tlPadding, brPadding); + mapOutputTensor(N, Layer->getOutput(0)); + Layer->setName((std::string("pool") + getLayerName(std::string("pool"))).c_str()); + DEBUG(errs() << Layer->getName() << "\n"); +} + +void CGT_NVDLA::generateGemmLayer(DFLeafNode* N, const IntrinsicInst *II) { + DEBUG(errs() << "****GEMM LAYER****\n"); + // Get input tensor and compute number of outputs + unsigned inputIndex = getInputIndex(N, II); + auto *InputTensor = getBindingTensor(N, inputIndex); + ITensor *InputNVDLATensor = getNVDLAInputTensor(N, InputTensor); + + // Get the index for kernel tensor + auto *F = N->getFuncPointer(); + DEBUG(errs()<<"function name = "<< F->getName()<<"\n"); + unsigned kernelIndex = 0; + bool ArgFound = false; + for(auto &Arg : F->args()) { + if(II->getOperand(1) == &Arg) { + ArgFound = true; + break; + } + kernelIndex++; + } + assert(ArgFound && "Illegal intrinsic or Node."); + + // Get the kernel tensor + auto *KernelTensor = getBindingTensor(N, kernelIndex); + assert(KernelTensor && "Kernel tensors are always binds."); + + // Get kernel constants + auto *KernelWConst = dyn_cast<ConstantInt>(KernelTensor->getOperand(5)); + auto *KernelHConst = dyn_cast<ConstantInt>(KernelTensor->getOperand(4)); + auto *KernelCHConst = dyn_cast<ConstantInt>(KernelTensor->getOperand(3)); + auto *KernelNConst = dyn_cast<ConstantInt>(KernelTensor->getOperand(2)); + assert(KernelWConst && KernelHConst && KernelCHConst && KernelNConst + && "Kernel dimensions must be constants."); + int kernelW = KernelWConst->getZExtValue(); + int kernelH = KernelHConst->getZExtValue(); + int kernelC = KernelCHConst->getZExtValue(); + int kernelN = KernelNConst->getZExtValue(); + DEBUG(errs() << "KERNEL H: " << kernelH << "\n"); + DEBUG(errs() << "KERNEL W: " << kernelW << "\n"); + DEBUG(errs() << "KERNEL C: " << kernelC << "\n"); + DEBUG(errs() << "KERNEL N: " << kernelN << "\n"); + + int numOutputs = kernelW; + DEBUG(errs() << "NUM OUTPUTS: " << numOutputs << "\n"); + + // Get the nodes with Add operations + SmallVector<DFLeafNode *, 4> AddOpNodes; + SmallVector<IntrinsicInst *, 4> AddInsts; + getaddOpSucceedsNode(N, AddOpNodes, AddInsts); + assert((!(AddOpNodes.size() > 1)) + && "Number of nodes with Add ops must not be more than 1"); + + // Get bias parameters + int BiasW, BiasH, BiasC, BiasN; + User *BiasTensor = nullptr; + BiasMode biasMode = BiasMode::bNONE; + if(AddOpNodes.size()) { + // Get the index for bias tensor + auto *AddNode = AddOpNodes[0]; + auto *AddInst = AddInsts[0]; + auto *F = AddNode->getFuncPointer(); + unsigned BiasIndex = 0; + ArgFound = false; + for(auto &Arg : F->args()) { + if(AddInst->getOperand(1) == &Arg) { + ArgFound = true; + break; + } + BiasIndex++; + } + assert(ArgFound && "Illegal intrinsic or Node."); + + // Get the bias tensor + BiasTensor = getBindingTensor(AddNode, BiasIndex); + assert(BiasTensor && "Bias tensors are always binds."); + + // Get Bias constants + auto *BiasWConst = dyn_cast<ConstantInt>(BiasTensor->getOperand(5)); + auto *BiasHConst = dyn_cast<ConstantInt>(BiasTensor->getOperand(4)); + auto *BiasCHConst = dyn_cast<ConstantInt>(BiasTensor->getOperand(3)); + auto *BiasNConst = dyn_cast<ConstantInt>(BiasTensor->getOperand(2)); + assert(BiasWConst && BiasHConst && BiasCHConst && BiasNConst + && "Bias dimensions must be constants."); + BiasW = BiasWConst->getZExtValue(); + BiasH = BiasHConst->getZExtValue(); + BiasC = BiasCHConst->getZExtValue(); + BiasN = BiasNConst->getZExtValue(); + DEBUG(errs() << "BIAS H: " << BiasH << "\n"); + DEBUG(errs() << "BIAS W: " << BiasW << "\n"); + DEBUG(errs() << "BIAS C: " << BiasC << "\n"); + DEBUG(errs() << "BIAS N: " << BiasN << "\n"); + + // Get bias mode + //if(KernelCHConst->getZExtValue() == numOutputs) + biasMode = BiasMode::bCHANNEL; + //else + // biasMode = BiasMode::bUNIFORM; + } + + // Get weights + Weights kernelWeights = readTrainedWeights(KernelTensor, kernelN, kernelC, kernelH, kernelW); + Weights biasWeights = (AddOpNodes.size() == 1) ? + readTrainedWeights(BiasTensor, BiasN, BiasC, BiasH, BiasW) + : Weights(DataType::HALF, nullptr, 0); + + auto *Layer = Network->addFullyConnected(InputNVDLATensor, numOutputs, + kernelWeights, biasWeights, biasMode); + if(AddOpNodes.size()) { + auto *Node = AddOpNodes[0]; + mapOutputTensor(Node, Layer->getOutput(0)); + } else { + mapOutputTensor(N, Layer->getOutput(0)); + } + Layer->setName((std::string("gemm") + getLayerName(std::string("gemm"))).c_str()); + DEBUG(errs() << Layer->getName() << "\n"); +} + +void CGT_NVDLA::generateReluLayer(DFLeafNode* N, const IntrinsicInst *II) { + DEBUG(errs() << "******RELU LAYER******\n"); + // Get input tensor + unsigned inputIndex = getInputIndex(N, II); + auto *InputTensor = getBindingTensor(N, inputIndex); + ITensor *InputNVDLATensor = getNVDLAInputTensor(N, InputTensor); + + auto *Layer = Network->addActivation(InputNVDLATensor, kRELU); + mapOutputTensor(N, Layer->getOutput(0)); + Layer->setName((std::string("relu") + getLayerName(std::string("relu"))).c_str()); + DEBUG(errs() << Layer->getName() << "\n"); +} + +void CGT_NVDLA::generateSoftMaxLayer(DFLeafNode* N, const IntrinsicInst *II) { + DEBUG(errs() << "******SOFTMAX LAYER*******\n"); + // Get input tensor + unsigned inputIndex = getInputIndex(N, II); + auto *InputTensor = getBindingTensor(N, inputIndex); + ITensor *InputNVDLATensor = getNVDLAInputTensor(N, InputTensor); + + auto *Layer = Network->addSoftMax(InputNVDLATensor); + mapOutputTensor(N, Layer->getOutput(0)); + Layer->setName((std::string("softmax") + getLayerName(std::string("softmax"))).c_str()); + DEBUG(errs() << Layer->getName() << "\n"); +} + +void CGT_NVDLA::generateTanhLayer(DFLeafNode* N, const IntrinsicInst *II) { + DEBUG(errs() << "*******TANH LAYER*******\n"); + // Get input tensor + unsigned inputIndex = getInputIndex(N, II); + auto *InputTensor = getBindingTensor(N, inputIndex); + ITensor *InputNVDLATensor = getNVDLAInputTensor(N, InputTensor); + + auto *Layer = Network->addActivation(InputNVDLATensor, kTANH); + mapOutputTensor(N, Layer->getOutput(0)); + Layer->setName((std::string("tanh") + getLayerName(std::string("tanh"))).c_str()); + DEBUG(errs() << Layer->getName() << "\n"); +} + +/* +void CGT_NVDLA::generateBatchNormLayer(DFLeafNode* N, const IntrinsicInst *II) { + const dc::BatchNormParameter& p = msg.batch_norm_param(); + Weights mean = weightFactory(msg.name(), kMEAN); + Weights variance = weightFactory(msg.name(), kVARIANCE); + Weights movingAverage = weightFactory(msg.name(), kMOVING_AVERAGE); + float eps = p.eps(); + float scaleFactor = 1.0f; + float average = 0.0f; + int i; + + average = *(static_cast<const float*>(movingAverage.values)); + if ( average == 0.0f ) + { + gLogError << "Batch Normalization moving average is zero " << std::endl; + return 0; + } + scaleFactor /= average; + + if (mean.count != variance.count) + { + gLogError << "Mean and variance have differing number of elements " + << mean.count << " & " << variance.count << std::endl; + return 0; + } + + float *meanBlob = (float *)mean.values; + float *varianceBlob = (float *)variance.values; + + Dims4 inputDims = getIntermediateInputTensor(N)->getDimensions(); + BatchNormMode mode; + + if (mean.count == 1) + { + mode = BatchNormMode::bnUNIFORM; + meanBlob[0] = meanBlob[0] * scaleFactor; + varianceBlob[0] = varianceBlob[0] * scaleFactor; + } + else if (mean.count == inputDims.c) + { + mode = BatchNormMode::bnm_CHANNEL; + for (i = 0; i < mean.count; i++) + { + meanBlob[i] = meanBlob[i] * scaleFactor; + varianceBlob[i] = varianceBlob[i] * scaleFactor; + } + } + else + { + gLogError << "Unknown batch norm mode" << std::endl; + return 0; + } + + // Get input tensor + unsigned inputIndex = getInputIndex(N, II); + Value *InputTensor = getBindingTensor(inputIndex); + ITensor *InputNVDLATensor = getNVDLAInputTensor(InputTensor); + + auto *Layer = Network->addBatchNorm(InputNVDLATensor, mode, mean, variance, eps); + mapOutputTensor(N, Layer->getOutput(0)); +} +*/ + +unsigned CGT_NVDLA::identifyOutputs() { + std::set< ITensor* > outputTensors; + std::set< ITensor* > InputTensors; + + for (int l = 0; l < Network->getNumLayers(); ++l) { + ILayer* layer = Network->getLayer(l); + assert(layer && "Illegal NVDLA compiler IR!"); + for (int ii = 0; ii < layer->getNumInputs(); ++ii) { + InputTensors.insert(layer->getInput(ii)); + } + for (int oo = 0; oo < layer->getNumOutputs(); ++oo) { + outputTensors.insert(layer->getOutput(oo)); + } + } + + for (std::set<ITensor*>::iterator oi = outputTensors.begin(); oi != outputTensors.end(); ++oi) { + // An output tensor which is not an input to any other layers is a Network output tensor + if (InputTensors.find(*oi) == InputTensors.end()) + Network->markOutput(*oi); + } + return Network->getNumOutputs(); +} + +void CGT_NVDLA::codeGen(DFLeafNode *N) { + // No allocation nodes allowed. + assert(!N->isAllocationNode() && "Allocation Node not expected in ApproxHPVM"); + + // Skip code generation if it is a dummy node + if(N->isDummyNode()) { + DEBUG(errs() << "Skipping dummy node\n"); + return; + } + + // Generate code only if it has the right hint + //if (!checkPreferredTarget(N, hpvm::NVDLA_TARGET)) { + // DEBUG(errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"); + // return; + // } + + // Get the function associated with the dataflow node + auto *F = N->getFuncPointer(); + DEBUG(errs()<<"function name = "<< F->getName()<<"\n"); + + // Generate code for every instruction in this node + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + Instruction *I = &(*i); + + if (BuildDFG::isViscIntrinsic(I)) { + auto *II = dyn_cast<IntrinsicInst>(I); + assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor") + && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + + switch (II->getIntrinsicID()) { + case Intrinsic::hpvm_tensor_convolution: + case Intrinsic::hpvm_tensor_group_convolution: + generateConvolutionLayer(N, II); + break; + + case Intrinsic::hpvm_tensor_batchnorm: + generateBatchNormLayer(N, II); + break; + + case Intrinsic::hpvm_tensor_mul: + generateGemmLayer(N, II); + break; + + case Intrinsic::hpvm_tensor_add: + // Add not explicitly supported by NVDLA compiler! + break; + + case Intrinsic::hpvm_tensor_pool_max: + case Intrinsic::hpvm_tensor_pool_mean: + generatePoolingLayer(N, II); + break; + + case Intrinsic::hpvm_tensor_relu: + generateReluLayer(N, II); + break; + + case Intrinsic::hpvm_tensor_clipped_relu: + // No need to generate NVDLA IR for this? + break; + + case Intrinsic::hpvm_tensor_tanh: + generateTanhLayer(N, II); + break; + + case Intrinsic::hpvm_tensor_softmax: + generateSoftMaxLayer(N, II); + break; + + default: + llvm_unreachable("Unknown HPVM Intrinsic!"); + break; + } + } + } +} + +void CGT_NVDLA::codeGen(DFInternalNode* N) { + DEBUG(errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"); + DEBUG(errs () << "Skipping internal node\n"); +} + +NvDlaError CGT_NVDLA::parseSetup(const TestAppArgs* appArgs, TestInfo* i) { + return NvDlaSuccess; +} + +NvDlaError CGT_NVDLA::transformHPVM2NVDLA(const TestAppArgs* appArgs, TestInfo* i) { + NVDLA_UNUSED(appArgs); + NvDlaError e = NvDlaSuccess; + + Network = nullptr; + Network = nvdla::createNetwork(); + if (!Network) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "createNetwork() failed"); + + // Iterate over all the DFGs and produce code for each one of them + for(auto &RootNode: *(appArgs->Roots)) + visit(RootNode); + + // if the application has so far not marked the network's outputs, allow the parser to do so now + if (Network->getNumOutputs() <= 0) { + int outs = identifyOutputs(); + DEBUG(NvDlaDebugPrintf("Marking total %d outputs\n", outs)); + if (outs <= 0) + ORIGINATE_ERROR_FAIL(NvDlaError_BadValue, "Unable to identify outputs for the network: %d", outs); + } + + if (appArgs->computePrecision == nvdla::DataType::INT8) { + if (appArgs->calibTable != "") { + DEBUG(NvDlaDebugPrintf("parsing calibration table...\n")); + PROPAGATE_ERROR_FAIL(readTensorScales(appArgs, i, Network)); + } else { + DEBUG(NvDlaDebugPrintf("initialize all tensors with const scaling factors of 127...\n")); + PROPAGATE_ERROR_FAIL(generateTensorScales(appArgs, i, Network)); + } + } + + DEBUG(NvDlaDebugPrintf("attaching parsed network to the wisdom...\n")); + if (!i->wisdom->setNetworkTransient(Network)) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "wisdom->setNetworkTransient() failed"); + + return NvDlaSuccess; + +fail: + return e; +} + +NvDlaError CGT_NVDLA::parseAndCompile(const TestAppArgs* appArgs, TestInfo* i) { + NvDlaError e = NvDlaSuccess; + bool isCaffe = appArgs->caffemodel != ""; + + PROPAGATE_ERROR_FAIL(parseSetup(appArgs, i)); + + DEBUG(NvDlaDebugPrintf("creating new wisdom context...\n")); + i->wisdom = nvdla::createWisdom(); + if (!i->wisdom) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "createWisdom() failed"); + + DEBUG(NvDlaDebugPrintf("opening wisdom context...\n")); + if (!i->wisdom->open(i->wisdomPath)) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "wisdom->open() failed to open: \"%s\"", i->wisdomPath.c_str()); + + // Parse + PROPAGATE_ERROR_FAIL(transformHPVM2NVDLA(appArgs, i)); + + // Compile + PROPAGATE_ERROR_FAIL(compileProfile(appArgs, i)); + + /* Destroy network before closing wisdom context */ + nvdla::destroyNetwork(i->wisdom->getNetwork()); + + DEBUG(NvDlaDebugPrintf("closing wisdom context...\n")); + i->wisdom->close(); + +fail: + if (i->wisdom != NULL) { + nvdla::destroyWisdom(i->wisdom); + i->wisdom = NULL; + } + return e; +} + +NvDlaError CGT_NVDLA::testSetup(const TestAppArgs* appArgs, TestInfo* i) { + NvDlaError e = NvDlaSuccess; + + std::string wisdomPath = appArgs->outputPath + "wisdom.dir/"; + std::string removeCmd = ""; + std::string imagePath = ""; + NvDlaStatType stat; + int ii = 0; + + // Do input paths exist? + e = NvDlaStat(appArgs->inputPath.c_str(), &stat); + if (e != NvDlaSuccess) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "Input path does not exist: \"%s\"", appArgs->inputPath.c_str()); + + // Do output paths exist? + e = NvDlaStat(appArgs->outputPath.c_str(), &stat); + if (e != NvDlaSuccess) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "Output path does not exist: \"%s\"", appArgs->outputPath.c_str()); + + // Clear wisdomPath if any exist + removeCmd += "rm -rf " + wisdomPath; + ii = std::system(removeCmd.c_str()); // This is pretty awful + if (ii != 0) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "system command failed: \"%s\"", removeCmd.c_str()); + + PROPAGATE_ERROR_FAIL(NvDlaMkdir(const_cast<char *>(wisdomPath.c_str()))); + + // Initialize TestInfo + i->wisdom = NULL; + i->wisdomPath = wisdomPath; + i->pData = NULL; + + return NvDlaSuccess; + +fail: + return e; +} + +NvDlaError CGT_NVDLA::launchTest(const TestAppArgs* appArgs) { + NvDlaError e = NvDlaSuccess; + TestInfo testInfo; + + PROPAGATE_ERROR_FAIL(testSetup(appArgs, &testInfo)); + + PROPAGATE_ERROR_FAIL(parseAndCompile(appArgs, &testInfo)); + + return NvDlaSuccess; + +fail: + return e; +} + +bool HPVM2NVDLA::runOnModule(Module &M) { + DEBUG(errs() << "**************HPVM2NVDLA PASS****************\n"); + + NvDlaError e = NvDlaError_TestApplicationFailed; + TestAppArgs testAppArgs = defaultTestAppArgs; + + // Get the HPVM IR graph + BuildDFG &DFG = getAnalysis<BuildDFG>(); + std::vector<DFInternalNode *> Roots = DFG.getRoots(); + + // Visitor for Code Generation Graph Traversal + CGT_NVDLA *CGTVisitor = new CGT_NVDLA(M, DFG); + + if(ComputePrecision == "INT8" || ComputePrecision == "int8") { + testAppArgs.computePrecision = nvdla::DataType::INT8; + testAppArgs.quantizationMode = nvdla::QuantizationMode::PER_KERNEL; + testAppArgs.configtarget = std::string("nv_small"); + } else { + testAppArgs.computePrecision = nvdla::DataType::HALF; + testAppArgs.quantizationMode = nvdla::QuantizationMode::NONE; + testAppArgs.configtarget = std::string("nv_full"); + } + testAppArgs.profileName = std::string("hpvm-mod"); + testAppArgs.calibTable = CalibTablePath;//std::string("output_scales.txt"); + testAppArgs.outputPath = std::string("."); + testAppArgs.inDataFormat = nvdla::DataFormat::NCHW; + + testAppArgs.Roots = &Roots; + + e = CGTVisitor->launchTest(&testAppArgs); + if (e != NvDlaSuccess) + DEBUG(errs() << "ERROR\n"); + else + DEBUG(errs() << "SUCESS\n"); + + delete CGTVisitor; + + return false; +} + +NvDlaError CGT_NVDLA::compileProfile(const TestAppArgs* appArgs, TestInfo* i) { + NvDlaError e = NvDlaSuccess; + std::string profileName = ""; + std::string targetConfigName = ""; + + NvDlaFileHandle file = 0; + std::string fileName = ""; + NvU8 *buffer = 0; + NvU64 size = 0; + + nvdla::ICompiler* compiler = i->wisdom->getCompiler(); + if (!compiler) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "wisdom->getCompiler() failed"); + + if (!(appArgs->configtarget != "")) + ORIGINATE_ERROR_FAIL(NvDlaError_NotInitialized, "No target config found to load"); + + targetConfigName = appArgs->configtarget; + + // Determine profile + PROPAGATE_ERROR_FAIL(generateProfile(appArgs, &profileName, i)); + + // Compile + DEBUG(NvDlaDebugPrintf("compiling profile \"%s\"... config \"%s\"...\n", profileName.c_str(), targetConfigName.c_str())); + PROPAGATE_ERROR_FAIL(compiler->compile(profileName.c_str(), targetConfigName.c_str(), &i->compiledLoadable)); + + // Get loadable buffer and dump it into a file + PROPAGATE_ERROR_FAIL(compiler->getLoadableImageSize(profileName.c_str(), + &size)); + if (size == 0) { + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, + "Invalid size for a loadable"); + } + + buffer = (NvU8 *) NvDlaAlloc(size); + if (buffer == NULL) { + ORIGINATE_ERROR_FAIL(NvDlaError_InsufficientMemory, + "Failed to allocate buffer for loadable"); + } + PROPAGATE_ERROR_FAIL(compiler->getLoadableImage(profileName.c_str(), + buffer)); + fileName = profileName + ".nvdla"; + errs() << "Writing NVDLA module '" << fileName << "' ..."; + PROPAGATE_ERROR_FAIL(NvDlaFopen(fileName.c_str(), NVDLA_OPEN_WRITE, &file)); + PROPAGATE_ERROR_FAIL(NvDlaFwrite(file, buffer, size)); + errs() << " done.\n"; + +fail: + NvDlaFclose(file); + if (buffer != NULL) + NvDlaFree(buffer); + return e; +} + +NvDlaError CGT_NVDLA::generateProfile(const TestAppArgs* appArgs, std::string* profileName, TestInfo* i) { + NvDlaError e = NvDlaSuccess; + nvdla::DataFormat inDataFormat = nvdla::DataFormat::UNKNOWN; + + if (appArgs->profileName != "") { + // init named profile (basic/default/performance) with default params in its constructor and exit + DEBUG(errs() << "PROFILE NAME PROVIDED\n"); + PROPAGATE_ERROR_FAIL(beginWithNamedProfile(appArgs, i)); + *profileName = appArgs->profileName; + } else { + ORIGINATE_ERROR_FAIL(NvDlaError_NotInitialized, "No profile supplied to load"); + } + + // capture profile params from command line (override the existing ones as necessary) + inDataFormat = inDataFormat == nvdla::DataFormat::UNKNOWN ? appArgs->inDataFormat : inDataFormat; + PROPAGATE_ERROR_FAIL(updateProfileWithCmdLineArgs(appArgs, i, profileName->c_str(), inDataFormat)); + +fail: + return e; +} + +NvDlaError CGT_NVDLA::beginWithNamedProfile(const TestAppArgs* appArgs, TestInfo* i) { + NvDlaError e = NvDlaSuccess; + nvdla::IProfiler* profiler; + nvdla::IProfile* profile; + + profiler = i->wisdom->getProfiler(); + if ( !profiler ) { + ORIGINATE_ERROR_FAIL(NvDlaError_NotInitialized, "Profiler not initialized"); + } + + profile = profiler->getProfile(appArgs->profileName.c_str()); + if ( !profile ) { + ORIGINATE_ERROR_FAIL(NvDlaError_NotInitialized, "Profile %s not initialized", appArgs->profileName.c_str()); + } + +fail: + return e; +} + +NvDlaError CGT_NVDLA::updateProfileWithCmdLineArgs +( + const TestAppArgs* appArgs, TestInfo* i, const char* profileName, nvdla::DataFormat inDataFormat +) { + NvDlaError e = NvDlaSuccess; + nvdla::IProfiler* profiler; + nvdla::IProfile* profile; + + profiler = i->wisdom->getProfiler(); + if (!profiler) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "wisdom->getProfiler() failed"); + profile = profiler->getProfile(profileName); + if (!profile) + ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "profiler->getProfile() failed"); + + PROPAGATE_ERROR_FAIL(profile->setComputePrecision(appArgs->computePrecision)); + PROPAGATE_ERROR_FAIL(profile->setNetworkInputDataFormat(inDataFormat)); + + // determine input surface format + switch(inDataFormat) { + case nvdla::DataFormat::NHWC: + + if (appArgs->computePrecision == nvdla::DataType::HALF) { + PROPAGATE_ERROR_FAIL(profile->setNetworkInputSurfaceFormat(nvdla::PixelFormat::A16B16G16R16_F)); + } else if (appArgs->computePrecision == nvdla::DataType::INT8) { + PROPAGATE_ERROR_FAIL(profile->setNetworkInputSurfaceFormat(nvdla::PixelFormat::A8B8G8R8)); + } else { + ORIGINATE_ERROR_FAIL(NvDlaError_NotSupported, "NHWC and compute precision %u is not yet supported", + appArgs->computePrecision.v()); + } + break; + case nvdla::DataFormat::NCxHWx: + case nvdla::DataFormat::NCHW: + case nvdla::DataFormat::UNKNOWN: // atleast start the test with feature data format + default: + if (std::strcmp(appArgs->configtarget.c_str(), "opendla-small") == 0) + PROPAGATE_ERROR_FAIL(profile->setNetworkInputSurfaceFormat(nvdla::PixelFormat::FEATURE_X8)); + else + PROPAGATE_ERROR_FAIL(profile->setNetworkInputSurfaceFormat(nvdla::PixelFormat::FEATURE)); + } + + // determine int8 cfgs + if (appArgs->computePrecision == nvdla::DataType::INT8) { + PROPAGATE_ERROR_FAIL(profile->setTensorScalingMode(nvdla::TensorScalingMode::PER_TENSOR)); + switch(appArgs->quantizationMode) { + case nvdla::QuantizationMode::PER_FILTER: + PROPAGATE_ERROR_FAIL(profile->setQuantizationMode(nvdla::QuantizationMode::PER_FILTER)); + break; + case nvdla::QuantizationMode::PER_KERNEL: + case nvdla::QuantizationMode::NONE: // default to per-kernel; find a way to run int8 tests w/ NONE qtzMode cleanly + default: + PROPAGATE_ERROR_FAIL(profile->setQuantizationMode(nvdla::QuantizationMode::PER_KERNEL)); + } + } else { + PROPAGATE_ERROR_FAIL(profile->setTensorScalingMode(nvdla::TensorScalingMode::NONE)); + PROPAGATE_ERROR_FAIL(profile->setQuantizationMode(nvdla::QuantizationMode::NONE)); + } + + PROPAGATE_ERROR_FAIL(profile->setNetworkOutputDataFormat(nvdla::DataFormat::NCxHWx)); + + if (std::strcmp(appArgs->configtarget.c_str(), "opendla-small") == 0) + PROPAGATE_ERROR_FAIL(profile->setNetworkOutputSurfaceFormat(nvdla::PixelFormat::FEATURE_X8)); + else + PROPAGATE_ERROR_FAIL(profile->setNetworkOutputSurfaceFormat(nvdla::PixelFormat::FEATURE)); + + if (appArgs->numBatches > 0) + PROPAGATE_ERROR_FAIL(profile->setMultiBatchSize(appArgs->numBatches)); + +fail: + return e; +} + +NvDlaError CGT_NVDLA::generateTensorScales(const TestAppArgs* appArgs, TestInfo* i, nvdla::INetwork* network) { + NvDlaError e = NvDlaSuccess; + + std::vector<nvdla::ILayer*> networkLayers = network->getLayers(); + std::vector<nvdla::ITensor*> networkInputs = network->getInputs(); + + std::vector<nvdla::ILayer*>::iterator li = networkLayers.begin(); + std::vector<nvdla::ITensor*>::iterator nii = networkInputs.begin(); + + // set scaling factor for the network input tensors + for (; nii != networkInputs.end(); ++nii) { + NvF32 scale = 1; + NvF32 min = scale * -127.0f; + NvF32 max = scale * 127.0f; + std::string tName = (*nii)->getName(); + DEBUG(errs() << "INPUT NAME: " << tName << "\n"); + // set same dynamic range for all channels of the tensor (cIndex = -1) + PROPAGATE_ERROR_FAIL( (*nii)->setChannelDynamicRange(-1, min, max) ); + const_cast<TestAppArgs*>(appArgs)->tensorScales.insert(std::pair<std::string, NvF32>(tName, scale)); + if (0) + NvDlaDebugPrintf("setting dynamic range of: %s to %f\n", tName.c_str(), scale); + } + + for (; li != networkLayers.end(); ++li) { + NvF32 scale = 127; + NvF32 min = scale * -127.0f; + NvF32 max = scale * 127.0f; + std::string lName = (*li)->getName(); + nvdla::ITensor* outTensor = (*li)->getOutput(0); + DEBUG(errs() << "LAYER NAME: " << lName << "\n"); + // set same dynamic range for all channels of the tensor (cIndex = -1) + PROPAGATE_ERROR_FAIL( outTensor->setChannelDynamicRange(-1, min, max) ); + const_cast<TestAppArgs*>(appArgs)->tensorScales.insert(std::pair<std::string, NvF32>(lName, scale)); + if (0) + NvDlaDebugPrintf("setting dynamic range of: %s to %f\n", lName.c_str(), scale); + } + +fail: + return e; +} + +NvDlaError CGT_NVDLA::readTensorScales(const TestAppArgs* appArgs, TestInfo *i, nvdla::INetwork* network) { + NvDlaError e = NvDlaSuccess; + NvDlaStatType stat; + std::string calibTableFile = /*i->calibTablesPath + "/" + */appArgs->calibTable; + + //PROPAGATE_ERROR_FAIL(NvDlaStat(calibTableFile.c_str(), &stat)); + DEBUG(errs() << "***********READING TENSOR SCALESi*************\n"); + std::ifstream infile(calibTableFile.c_str()); + std::string line; + std::map<std::string, float> LayerNameToScaleMap; + while (std::getline(infile, line)) { + DEBUG(errs() << "READ LINE: " << line << "\n"); + line.erase(remove(line.begin(), line.end(), ' '), line.end()); + DEBUG(errs() << "READ LINE WITHOUT WHITE SPACES: " << line << "\n"); + std::string delimiter = ":"; + std::string layer_name = line.substr(0, line.find(delimiter)); + std::string Scale = line.substr(line.find(delimiter) + 1); + DEBUG(errs() << "LAYER NAME: " << layer_name << "\n"); + DEBUG(errs() << "SCALE: " << Scale << "\n"); + size_t size; + LayerNameToScaleMap[layer_name] = std::stof(Scale, &size); + } + infile.close(); + DEBUG(errs() << "GOT TENSOR SCALES FROM CALIB TABLE\n"); + + std::vector<nvdla::ILayer*> networkLayers = network->getLayers(); + std::vector<nvdla::ITensor*> networkInputs = network->getInputs(); + for (auto *Input : networkInputs) { + NvF32 scale = 0.0f; + NvF32 min = 0.0f; + NvF32 max = 0.0f; + DEBUG(errs() << "SET SCALE FOR INPUT\n"); + scale = LayerNameToScaleMap["input"]; + DEBUG(errs() << "INPUT SCALE: " << scale << "\n"); + min = scale * -127.0f; + max = scale * 127.0; + PROPAGATE_ERROR_FAIL(Input->setChannelDynamicRange(-1, min, max) ); + const_cast<TestAppArgs*>(appArgs)->tensorScales.insert(std::pair<std::string, NvF32>("data", scale)); + } + DEBUG(errs() << "PER LAYER CALIB\n"); + for (auto *Layer : networkLayers) { + NvF32 scale = 0.0f; + NvF32 min = 0.0f; + NvF32 max = 0.0f; + std::string tName = Layer->getName(); + DEBUG(errs() << "SETTING SCALE FOR LAYER NAME: " << tName << "\n"); + nvdla::ITensor* outTensor = Layer->getOutput(0); + auto it = LayerNameToScaleMap.find(tName); + if (it != LayerNameToScaleMap.end()) { + DEBUG(errs() << "SET SCALE FOR NAME: " << tName << "\n"); + DEBUG(errs() << "SCALE: " << it->second << "\n"); + scale = it->second; + min = scale * -127.0f; + max = scale * 127.0f; + } else { + DEBUG(errs() << "SET DEFAULT SCALE FOR NAME: " << tName << "\n"); + DEBUG(errs() << "SCALE: 1\n"); + scale = 1; + min = scale * -127.0f; + max = scale * 127.0f; + } + //else { + // ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "Atleast 1 of scale or min-max should be specified for %s\n", tName.c_str()); + //} + PROPAGATE_ERROR_FAIL( outTensor->setChannelDynamicRange(-1, min, max) ); + const_cast<TestAppArgs*>(appArgs)->tensorScales.insert(std::pair<std::string, NvF32>(tName, scale)); + } + + DEBUG(errs() << "DONE PARSING CALIBRATION TABLE\n"); + fail: + return e; +} + diff --git a/hpvm/lib/Transforms/HPVM2NVDLA/LLVMBuild.txt b/hpvm/lib/Transforms/HPVM2NVDLA/LLVMBuild.txt new file mode 100644 index 0000000000..44e63f3c71 --- /dev/null +++ b/hpvm/lib/Transforms/HPVM2NVDLA/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = HPVM2NVDLA +parent = Transforms -- GitLab