diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt b/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..dc98faafeca139586e3891b4defc4e20c8bfae00 --- /dev/null +++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt @@ -0,0 +1,12 @@ +if(WIN32 OR CYGWIN) + set(LLVM_LINK_COMPONENTS Core Support) +endif() + +add_llvm_loadable_module( LLVMDFG2LLVM_CUDNN + DFG2LLVM_CUDNN.cpp + + DEPENDS + intrinsics_gen + PLUGIN_TOOL + opt + ) diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp new file mode 100644 index 0000000000000000000000000000000000000000..3699a43a49c9beb11b011bd684cb8f038b03e00c --- /dev/null +++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp @@ -0,0 +1,338 @@ +//=== DFG2LLVM_CUDNN.cpp ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +#define ENABLE_ASSERTS + +#define DEBUG_TYPE "DFG2LLVM_CUDNN" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/Transforms/Utils/ValueMapper.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/IRReader/IRReader.h" +#include "llvm/Linker/Linker.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/IR/Attributes.h" +#include "llvm-c/Core.h" +#include "llvm/SupportVISC/VISCTimer.h" +#include "llvm/SupportVISC/DFG2LLVM.h" +#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h" +#include <sstream> + +using namespace llvm; +using namespace builddfg; +using namespace dfg2llvm; + +using namespace inplacedfg; + +namespace { +// Helper class declarations + +// DFG2LLVM_CUDNN - The first implementation. + +struct DFG2LLVM_CUDNN : public DFG2LLVM { + static char ID; // Pass identification, replacement for typeid + DFG2LLVM_CUDNN() : DFG2LLVM(ID) {} +private: + +public: + bool runOnModule(Module &M); +}; + +// Visitor for Code generation traversal (tree traversal for now) +class CGT_CUDNN : public CodeGenTraversal { + +private: + //Member variables + InPlaceDFGAnalysis::InPlaceDFGParameter *IPP; + + // VISC Runtime API and Tensor runtime API + Constant* llvm_visc_initTensorRt; // FIXME: sync names + Constant* llvm_visc_cleanupTensorRt; // FIXME: add function + Constant* llvm_visc_requestTensor; // FIXME: add function + + // Functions + + // Virtual Functions + void init(); + void initRuntimeAPI(); + void codeGen(DFInternalNode* N); + void codeGen(DFLeafNode* N); + +public: + + // Constructor + CGT_CUDNN(Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP) : CodeGenTraversal(_M, _DFG), IPP(&_IPP) { + initRuntimeAPI(); + } + +}; + +void CGT_CUDNN::init() { + // FIXME: what to do here? If anything? +} + +// Initialize the VISC runtime API. This makes it easier to insert these calls +void CGT_CUDNN::initRuntimeAPI() { + + // Load Runtime API Module + SMDiagnostic Err; + + char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT"); + assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!"); + + // FIXME: set correct path + Twine llvmSrcRoot = LLVM_SRC_ROOT; + Twine runtimeAPI = llvmSrcRoot+"/../build/projects/hpvm-tensor-rt/hpvm-tensor-rt.ll"; + + runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext()); + if(runtimeModule == nullptr) + DEBUG(errs() << Err.getMessage()); + else + DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n"); + + + // Get or insert Global declarations for + // - initialization + // - cleanup + // - request a tensor + DECLARE(llvm_visc_initTensorRt); + DECLARE(llvm_visc_cleanupTensorRt); + DECLARE(llvm_visc_requestTensor); + + // Find visc.init and visc.cleanup calls, and add placeholder methods + // for initialization and cleanup of the hpvm tensor runtime + + Function* VI = M.getFunction("llvm.visc.init"); + assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n"); + InitCall = cast<Instruction>(*VI->user_begin()); + CallInst::Create(llvm_visc_initTensorRt, ArrayRef<Value*>(), "", InitCall); + + Function* VC = M.getFunction("llvm.visc.cleanup"); + assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n"); + CleanupCall = cast<Instruction>(*VC->user_begin()); + CallInst::Create(llvm_visc_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall); + +} + + +void CGT_CUDNN::codeGen(DFInternalNode* N) { + errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; + errs () << "Skipping internal node\n"; +} + +void CGT_CUDNN::codeGen(DFLeafNode* N) { + + // Skip code generation if it is a dummy node + if(N->isDummyNode()) { + DEBUG(errs() << "Skipping dummy node\n"); + return; + } + + // Abort code generation if it is an allocation node + if(N->isAllocationNode()) { + assert(false && "Allocation Node not expected in ApproxHPVM"); + return; + } + + // Generate code only if it has the right hint + if (!checkPreferredTarget(N, visc::CUDNN_TARGET)) { + errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; + return; + } + + // Get the function associated with the dataflow node + Function *F = N->getFuncPointer(); + + // Look up if we have visited this function before. If we have, then just + // get the cloned function pointer from DFNode. Otherwise, create the cloned + // function and add it to the DFNode GenFunc. + Function *F_cudnn = N->getGenFuncForTarget(visc::CUDNN_TARGET); + + assert((F_cudnn == NULL) && + "Error: Visiting a node for which code already generated"); + + // Clone the function + ValueToValueMapTy VMap; + Twine FName = F->getName(); + F_cudnn = CloneFunction(F, VMap); + F_cudnn->setName(FName+"_cudnn"); + + N->addGenFunc(F_cudnn, visc::CUDNN_TARGET, true); + + // Adding nounwind to generated function : FIXME: needed? + DEBUG(errs() << "Adding nounwind to generated function\n"); + F_cudnn->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind); + + // Add llvm_visc_requestTensor calls for every pointer argument of the function + // (they are all expected to be tensors), at the beginning of the function. + // This is the first instruction of the function, insert them before this + Instruction* FI = &*(F_cudnn->getEntryBlock().begin()); + + // In this backend, the target device is GPU. + // Create an LLVM Value that represents the visc::GPU_TARGET FIXME + + for (Function::arg_iterator ai = F_cudnn->arg_begin(), + ae = F_cudnn->arg_end(); ai != ae; ++ai) { + Argument* Arg = &*ai; + if (Arg->getType()->isPointerTy()) { + CallInst::Create(llvm_visc_requestTensor, + ArrayRef<Value*>(Arg), // FIXME: add second argument + "", FI); + } + } + + std::vector<IntrinsicInst *> IItoRemove; + + for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e; ++i) { + Instruction *I = &(*i); + + if (BuildDFG::isViscIntrinsic(I)) { + IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); + assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") + && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + + /********************* Handle VISC Tensor intrinsics ********************/ + switch (II->getIntrinsicID()) { + + case Intrinsic::visc_tensor_mul: + { /* llvm.hpvm.tensor.mul */ + // Tensor mul is not in place. + DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor mul\n"); + + // Argument list for the runtime call + std::vector<Value*> Args; + Args.push_back(II->getOperand(0)); + Args.push_back(II->getOperand(1)); + + // Create cudnn runtime function call + Constant* tensorGemmGPU; + DECLARE(tensorGemmGPU); + CallInst* CI = CallInst::Create(tensorGemmGPU, + Args, "", II); + // We can replace the call to hpvm.tensor.mul with the runtime call + II->replaceAllUsesWith(CI); + + // Mark to remove at the end + IItoRemove.push_back(II); + } + break; + case Intrinsic::visc_tensor_add: + { /* llvm.hpvm.tensor.add */ + DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor add\n"); + // Tensor add(a,b) is in place for argument a. + Value *Op = II->getOperand(0); + + // First, test if it is a parameter of the function + if (Argument *Arg = dyn_cast<Argument>(Op)) { + DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n"); + assert((Arg->getParent() == F_cudnn) && + "Extra Parameter in body of Function\n"); + // In this case, consult the result of in place analysis + // Find position in arg list + unsigned pos = Arg->getArgNo(); + // If this parameter cannot be used for in place operation + // code gen cannot continue + assert((IPP->at(N)[pos]) && + "Only in place operation provided by CUDNN runtime but \ + parameter is not eligible for in place operation\n"); + } else { + // If it is not an argument, then it needs to be the result of + // another intrinsic. These are new objects that are allocated, + // and consumed by next intrinsic. + DEBUG(errs() << *Op << "\t: Test for intrinsic operation\n"); + assert((dyn_cast<IntrinsicInst>(Op)) && + "Operand not valid for in place operation. Code gen aborted.\n"); + } + + // Argument list for the runtime call + std::vector<Value*> Args; + Args.push_back(II->getOperand(0)); + Args.push_back(II->getOperand(1)); + + // Create cudnn runtime function call + Constant* tensorAdd; + DECLARE(tensorAdd); + CallInst* CI = CallInst::Create(tensorAdd, + Args, "", II); + // We can replace the call to hpvm.tensor.add with the 1st argument + // that, due to in place operation, now contains the result + II->replaceAllUsesWith(II->getOperand(0)); + + // Mark to remove at the end + IItoRemove.push_back(II); + } + break; + default: + llvm_unreachable("Unknown VISC Intrinsic!"); + break; + } + } + } + + // We need to do this explicitly: DCE pass may not remove them. + // Traverse the vector backwards, otherwise definitions are deleted while + // their subsequent uses are still around. + for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(), + re = IItoRemove.rend(); ri != re; ++ri) { + DEBUG(errs() << "Erasing: " << **ri << "\n"); + (*ri)->eraseFromParent(); + } + + return; +} + +bool DFG2LLVM_CUDNN::runOnModule(Module &M) { + errs() << "\nDFG2LLVM_CUDNN PASS\n"; + + // Get the BuildDFG Analysis Results: + // - Dataflow graph + BuildDFG &DFG = getAnalysis<BuildDFG>(); + + // Get the In Place Analysis Results + InPlaceDFGAnalysis::InPlaceDFGParameter IPP = + (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); + // Print results + printInPlaceDFGParameter(IPP); + + std::vector<DFInternalNode*> Roots = DFG.getRoots(); + + // Visitor for Code Generation Graph Traversal + CGT_CUDNN *CGTVisitor = new CGT_CUDNN(M, DFG, IPP); + + // Iterate over all the DFGs and produce code for each one of them + for (auto rootNode: Roots) { + // Initiate code generation for root DFNode + CGTVisitor->visit(rootNode); + } + + //TODO: Edit module epilogue to remove the VISC intrinsic declarations + delete CGTVisitor; + + return true; +} + + +/****************************************************************************** + * Helper functions * + ******************************************************************************/ + + +} // End of namespace + +char DFG2LLVM_CUDNN::ID = 0; +static RegisterPass<DFG2LLVM_CUDNN> X("dfg2llvm-cudnn", + "Dataflow Graph to LLVM for CUDNN Pass", + false /* does not modify the CFG */, + true /* transformation, * + * not just analysis */); + diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt b/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt new file mode 100644 index 0000000000000000000000000000000000000000..1579b2fc47f527c7b10cc8e5f5b0e45fad742ad8 --- /dev/null +++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = DFG2LLVM_CUDNN +parent = Transforms