diff --git a/llvm/include/llvm/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h b/llvm/include/llvm/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h new file mode 100644 index 0000000000000000000000000000000000000000..72812071a3ee3965d05b9c006fb304550d4eb3ff --- /dev/null +++ b/llvm/include/llvm/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h @@ -0,0 +1,178 @@ +#ifndef __FUSE_HPVM_TENSOR_NODES_H__ +#define __FUSE_HPVM_TENSOR_NODES_H__ + +//=== FuseHPVMTensorNodes.h ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/IR/DFGraph.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" + +#include "llvm/BuildDFG/BuildDFG.h" +#include "llvm/SupportVISC/DFG2LLVM.h" + +using namespace llvm; + +namespace tensorfuse { + +class FuseHPVMTensorNodes { +public: + typedef std::vector< std::vector< IntrinsicInst* > > FusionTargets; +private: + // Member variables + + // Functions + +/* Create an identical bind (in or out, depending on the argument intrinsic) * + * with different src (true) or dst (false) port */ + IntrinsicInst* createIdenticalBindWithDifferentPort(IntrinsicInst* II, + unsigned port, + bool srcport); +/* Given two createNode intrinsics describing connected nodes, this function * + * returns the argument list type of the fused function */ + void createArgTypes(IntrinsicInst* II1, + IntrinsicInst* II2, + std::vector<Type*> &ArgTypes); +/* Get the return type of the function for fused node II1-II2 */ + StructType* createReturnType(IntrinsicInst* II1, IntrinsicInst* II2); +/* Copy argument names, from functions of II1 and II2 to F */ + void copyArgumentNames(IntrinsicInst* II1, + IntrinsicInst* II2, + Function* F); +/* Copy attributes, from functions of II1 and II2 to F */ + void copyAttrList(IntrinsicInst* II1, + IntrinsicInst* II2, + Function* F); +/* Creates and inserts an empty function of the rght type for the fused node */ + Function* createEmptyDFNodeFunction(IntrinsicInst* II1, + IntrinsicInst* II2, + Module &M); +/* Inline first node function, updating required mappings * + * - F1: first node function * + * - M: module containing the node function * + * - Ffused: fused node function * + * - VMap: maps values used in the body of F1 to those that mst be used in * + the body of the fused function instead * + * OutVs: This maps the output struct field index to the stored value */ + void inlineFirstNodeFunction(Module &M, + Function *F1, + Function *Ffused, + ValueMap<Value*, Value*> &VMap, + std::vector<Value*> &OutVs); +/* Inline second node function, updating required mappings * + * - F2: second node function * + * - M: module containing the node function * + * - Ffused: fused node function * + * - VMap: maps values used in the body of F2 to those that mst be used in * + the body of the fused function instead */ + void inlineSecondNodeFunction(Module &M, + Function *F2, + Function *Ffused, + ValueMap<Value*, Value*> &VMap); +/* Create function of leaf node after fusion * + * - create type * + * - create empty function of the type * + * - inline body of first function (applying and updating appropriate * + * mappings) * + * - inline body of second function (applying and updating appropriate * + * mappings) */ + Function* createLeafDFNodeFunction(IntrinsicInst* II1, + IntrinsicInst* II2, + Module &M); +/* Updates parent of fused nodes to use the new node intrinsic */ + void updateParentNodeFunction(IntrinsicInst* II1, + IntrinsicInst* II2, + IntrinsicInst* IInew); +/* Performs all operations required at the IR level for fusion of HPVM tensor * + * nodes with intrinsic instructions II1 and II2 * + * - Creates fused node function * + * - Creates createNode intrinsic for it and returns it * + * - Updates parent function: * + * - - adds new intrinsic * + * - - edges and binds consistently use the new intrinsic * + * - Removes old functions */ + IntrinsicInst* FuseHPVMTensorNodesStep(IntrinsicInst* II1, + IntrinsicInst* II2, + Module &M); +/* Fuse node sequence described by creaetNode intrinsics in IIs. * + * Contents of IIs are cleared. */ + void FuseHPVMTensorNodeSequence(std::vector<IntrinsicInst*> &IIs, Module &M); +public: + void run(Module &M, FusionTargets &FTs); + + void printFusionTargets(FusionTargets &FTs); +}; + +// Visitor for finding nodes to fuse +class FindFusionTargetsTraversal : public dfg2llvm::CodeGenTraversal { + +private: + typedef std::map< visc::Target, std::vector< std::vector<Intrinsic::ID> > > + FusePatterns; + //Member variables + + /* Map, from HPVM target to sequences of intrinsic IDs that if found, + need to be fused */ + /* TODO: use this in the future. Current (for PLDI 2018) implementation + * - assumes only two patterns, for PROMISE + * - assumes that nodes belonging to a single pattern only, if any. */ +// FusePatterns FPs; + FuseHPVMTensorNodes::FusionTargets FTs; + //Functions + + // Virtual Functions + void init() {} + void initRuntimeAPI() {} + void codeGen(DFInternalNode* N); + void codeGen(DFLeafNode* N); + +public: + // Constructor + + FindFusionTargetsTraversal(Module &_M, builddfg::BuildDFG &_DFG) : + CodeGenTraversal(_M, _DFG) { +/* FPs[visc::PROMISE_TARGET] = { {Intrinsic::visc_tensor_conv, + Intrinsic::visc_tensor_add, + Intrinsic::visc_tensor_relu, + Intrinsic::visc_tensor_pooling + }, + {Intrinsic::visc_tensor_mul, + Intrinsic::visc_tensor_add, + Intrinsic::visc_tensor_relu + } + } +*/ + } + + FuseHPVMTensorNodes::FusionTargets &getFusionTargets() { + return FTs; + } + +}; + +struct FuseHPVMTensorNodesWrapper : public ModulePass { + static char ID; // Pass identification, replacement for typeid + FuseHPVMTensorNodesWrapper() : ModulePass(ID) {} + +private: + // Member variables + +public: + // Functions + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired<builddfg::BuildDFG>(); + } + + bool runOnModule(Module &M); + +}; + +} // End of namespace + +#endif diff --git a/llvm/lib/Transforms/CMakeLists.txt b/llvm/lib/Transforms/CMakeLists.txt index 18edb9d2a2866b9a39774eabb1874dc93d84a2e8..181013742165a8d2672e531bc07db2db0d71595b 100644 --- a/llvm/lib/Transforms/CMakeLists.txt +++ b/llvm/lib/Transforms/CMakeLists.txt @@ -17,3 +17,4 @@ add_subdirectory(LocalMem) add_subdirectory(InPlaceDFG) add_subdirectory(GenVISC) add_subdirectory(MergeDFN) +add_subdirectory(FuseHPVMTensorNodes) diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt b/llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..374f3b26f1bd8da35fb1adfa69c6327f8be64b15 --- /dev/null +++ b/llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt @@ -0,0 +1,12 @@ +if(WIN32 OR CYGWIN) + set(LLVM_LINK_COMPONENTS Core Support) +endif() + +add_llvm_loadable_module( LLVMFuseHPVMTensorNodes + FuseHPVMTensorNodes.cpp + + DEPENDS + intrinsics_gen + PLUGIN_TOOL + opt + ) diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp new file mode 100644 index 0000000000000000000000000000000000000000..352a5ca5b571dd1fffaf55906129a0c5bf10f959 --- /dev/null +++ b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp @@ -0,0 +1,938 @@ +//=== FuseHPVMTensorNodes.cpp ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "FuseTensorNodes" + +#include "llvm/IR/ValueMap.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" + +#include "llvm/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h" +#include "llvm/SupportVISC/DFG2LLVM.h" +#include "llvm/SupportVISC/VISCUtils.h" + +using namespace llvm; +using namespace builddfg; +using namespace dfg2llvm; +using namespace viscUtils; + +namespace tensorfuse { +/*** Classes ***/ + +/*** Helper Functions ***/ + +/* Return the constant integer represented by value V */ +static unsigned getNumericValue(Value* V) { + assert(isa<ConstantInt>(V) + && "Value indicating the number of arguments should be a constant integer"); + return cast<ConstantInt>(V)->getZExtValue(); +} + +/* Query the kind of edge described by a createEdge intrinsic IIe * + * with respect to node handle IIn */ +static bool isIncomingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) { + Value* Src = IIe->getArgOperand(1); + IntrinsicInst* ArgII = cast<IntrinsicInst>(Src); + assert(ArgII && "First argument of createEdge is not an intrinsic"); + return (ArgII == IIn); +} +static bool isOutgoingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) { + Value* Src = IIe->getArgOperand(0); + IntrinsicInst* ArgII = cast<IntrinsicInst>(Src); + assert(ArgII && "First argument of createEdge is not an intrinsic"); + return (ArgII == IIn); +} + +/* Populates vector with all incoming edge intrinsics to node II */ +static void getIncomingEdgeIntrinsicList(IntrinsicInst *II, + std::vector<IntrinsicInst*> &EdgeList) { + for(Value::user_iterator ui = II->user_begin(), + ue = II->user_end(); ui!=ue; ++ui) { + IntrinsicInst* useI = dyn_cast<IntrinsicInst>(*ui); + assert(useI && + "HPVM graph intrinsic used in non HPVM intrinsic instruction\n"); + if (useI->getIntrinsicID() != Intrinsic::visc_createEdge) + continue; // Skip all non edge intrinsics + + // For edge intrinsics, test the descination operand + if (useI->getOperand(1) == II) { // Argument is the destination + EdgeList.push_back(useI); + } + } + return; +} + +/* Returns true if argument at position argno is coming from a dataflow edge * + * in the vector EdgeList */ +static bool isIncomingEdgeArgument(unsigned argno, + std::vector<IntrinsicInst*> &EdgeList) { + for (IntrinsicInst *ii : EdgeList) { + if (getNumericValue(ii->getOperand(4)) == argno) + return true; + } + return false; +} + +// Check that this is a valid HPVM Tensor Node (starts with an HPVM intrinsic) +// Return the node intrinsic function +static IntrinsicInst *isValidHPVMTensorNode(DFNode *N) { + Function *F = N->getFuncPointer(); + IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*(inst_begin(F))); + assert(II && + "HPVM tensor intrinsic expected as first instruction of HPVM tensor node\n"); + assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") && + "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + return II; +} + +// Returns the next node in a node sequence, or NULL if it does not exist. +// We consider two nodes a sequence if SrcN has a single successor, DstN, +// and DstN a single predeccessor, SrcN (other than the Root node) +static DFNode *findNextNodeInSequence(DFNode *SrcN) { + + DFNode *DstN = NULL; + + for (DFNode::successor_iterator si = SrcN->successors_begin(), + se = SrcN->successors_end(); si != se; ++si) { + DFNode *N = *si; + if (N->isDummyNode()) { + continue; + } + if (!DstN) + DstN = N; + if (DstN != N) { + errs() << "Found different destination nodes: no node sequence.\n"; + return NULL; + } + } + + // If we reach this point, DstN is the unique successor of SrcN + + // Now, test that the DstN has a single predeccessor except Root (dummy) + for (DFNode::indfedge_iterator eb = DstN->indfedge_begin(), + ee = DstN->indfedge_end(); eb != ee; ++eb) { + DFNode *SN = (*eb)->getSourceDF(); + if ((SN != SrcN) && (!(SN->isDummyNode()))) { + // Does not satisfy requirement + return NULL; + } + } + + return DstN; +} + +/*** Methods ***/ + +/* Create an identical bind (in or out, depending on the argument intrinsic) * + * with different src (true) or dst (false) port */ +IntrinsicInst* FuseHPVMTensorNodes::createIdenticalBindWithDifferentPort( + IntrinsicInst* II, unsigned port, bool srcport) { + // Argument of the function to be called + ConstantInt* PortConstant = + ConstantInt::get(Type::getInt32Ty(II->getContext()), port); + Value* SrcPort = (srcport) ? PortConstant: II->getArgOperand(1); + Value* DstPort = (srcport) ? II->getArgOperand(2): PortConstant; + + Value* BindArgs[] = {II->getArgOperand(0), + SrcPort, + DstPort, + II->getArgOperand(3) + }; + Function* BindF = II->getCalledFunction(); + CallInst* BindInst = CallInst::Create(BindF, + ArrayRef<Value*>(BindArgs, 4), + ""); + IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst); + + return newII; +} + +/* Given two createNode intrinsics describing connected nodes, this function * + * returns the argument list type of the fused function */ +void FuseHPVMTensorNodes::createArgTypes(IntrinsicInst* II1, + IntrinsicInst* II2, + std::vector<Type*> &ArgTypes) { + Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); + Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); + + // Arguments of the first node are simply added + for(auto& arg: F1->getArgumentList()) { + DEBUG(errs() << arg << "\n"); + ArgTypes.push_back(arg.getType()); + } + + // Arguments of the second node are added only if they are not the output of + // the previous node + + // Find all incoming edges. + std::vector<IntrinsicInst *> IncomingEdgeList; + getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); + + // Their source must be the first fusion node, otherwise they would not have + // been fusion candidates + for (IntrinsicInst *ii : IncomingEdgeList) { + assert((ii->getOperand(0) == II1) && "Unexpected source operand\n"); + } + + // Add argument type to the new function only if it is not incoming from + // an edge + for(auto& arg: F2->getArgumentList()) { + DEBUG(errs() << arg << "\n"); + unsigned inport = arg.getArgNo(); + if (isIncomingEdgeArgument(inport, IncomingEdgeList)) + continue; + ArgTypes.push_back(arg.getType()); + } +} + +/* Get the return type of the function for fused node II1-II2 */ +StructType* FuseHPVMTensorNodes::createReturnType(IntrinsicInst* II1, + IntrinsicInst* II2) { + Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); + Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); + + // Based on the HPVM tensor node assumptions and the patterns we want to + // support, when two nodes are fused the result will always be the result + // of the second node. + StructType* F1RetTy = dyn_cast<StructType>(F1->getReturnType()); + assert(F1RetTy && "Return Type must always be a struct"); + StructType* F2RetTy = dyn_cast<StructType>(F2->getReturnType()); + assert(F2RetTy && "Return Type must always be a struct"); + + return F2RetTy; +} + +/* Copy argument names, from functions of II1 and II2 to F */ +void FuseHPVMTensorNodes::copyArgumentNames(IntrinsicInst* II1, + IntrinsicInst* II2, + Function* F) { + Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); + Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); + + Function::arg_iterator dest_it = F->arg_begin(); + + // Argument names of the first node are simply copied + for(auto& arg: F1->getArgumentList()) { + dest_it->setName("s_" + arg.getName()); + dest_it++; + } + + // For the second node, we ignore those arguments that are incoming edges + // (from II1) + // Find all incoming edges. + std::vector<IntrinsicInst *> IncomingEdgeList; + getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); + + // Their source must be the first fusion node, otherwise they would not have + // been fusion candidates + for (IntrinsicInst *ii : IncomingEdgeList) { + assert((ii->getOperand(0) == II1) && "Unexpected source operand\n"); + } + + // Copy argument name to the new function only if it is not incoming from + // an edge + for(auto& arg: F2->getArgumentList()) { + DEBUG(errs() << arg << "\n"); + unsigned inport = arg.getArgNo(); + if (isIncomingEdgeArgument(inport, IncomingEdgeList)) + continue; + + dest_it->setName("d_" + arg.getName()); + dest_it++; + } + assert((dest_it == F->arg_end()) && + "Argument list of fused function not fully traversed\n"); + return; +} + +/* Copy attributes, from functions of II1 and II2 to F */ +void FuseHPVMTensorNodes::copyAttrList(IntrinsicInst* II1, + IntrinsicInst* II2, + Function* F) { + Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); + Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); + + Function::arg_iterator f1_ai = F1->arg_begin(), f1_ae = F1->arg_end(); + Function::arg_iterator f2_ai = F2->arg_begin(), f2_ae = F2->arg_end(); + Function::arg_iterator f_ai = F->arg_begin(), f_ae = F->arg_end(); + + // For the second node, we have to ignore the arguments that are incoming + // edges (from II1) + // Find all incoming edges. + std::vector<IntrinsicInst *> IncomingEdgeList; + getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); + + // Their source must be the first fusion node, otherwise they would not have + // been fusion candidates + for (IntrinsicInst *ii : IncomingEdgeList) { + assert((ii->getOperand(0) == II1) && "Unexpected source operand\n"); + } + + // Copy attributes of F1 + for(; f1_ai != f1_ae && f_ai != f_ae; ++f1_ai, ++f_ai) { + AttributeSet AS = F1->getAttributes(); + DEBUG(errs() << "Copying attributes from " + << F1->getName() << " at " << f1_ai->getArgNo() << "\n"); + AttrBuilder AB(AS, f1_ai->getArgNo()+1); + AttributeSet argAS = AttributeSet::get(F1->getContext(), + f_ai->getArgNo()+1, AB); + F->addAttributes(f_ai->getArgNo()+1, argAS); + } + + // Copy needed attributes of F2 + for(; f2_ai != f2_ae && f_ai != f_ae; ++f2_ai) { + unsigned inport = f2_ai->getArgNo(); + if (isIncomingEdgeArgument(inport, IncomingEdgeList)) + continue; + + AttributeSet AS = F2->getAttributes(); + DEBUG(errs() << "Copying attributes from " + << F2->getName() << " at " << f2_ai->getArgNo() << "\n"); + AttrBuilder AB(AS, f2_ai->getArgNo()+1); + AttributeSet argAS = AttributeSet::get(F2->getContext(), + f_ai->getArgNo()+1, AB); + F->addAttributes(f_ai->getArgNo()+1, argAS); + ++f_ai;; + } + return; +} + +/* Creates and inserts an empty function of the rght type for the fused node */ +Function* FuseHPVMTensorNodes::createEmptyDFNodeFunction(IntrinsicInst* II1, + IntrinsicInst* II2, + Module &M) { + Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); + Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); + + DEBUG(errs () << "Constructing argument list\n"); + // Construct argument list + std::vector<Type*> ArgTypes; + createArgTypes(II1, II2, ArgTypes); + + DEBUG(errs () << "Constructing return type\n"); + // Construct return type + StructType* FRetTy = createReturnType(II1, II2); + + FunctionType* FTy = FunctionType::get(FRetTy, ArgTypes, false); + // Create a function with the new type + Function* F = Function::Create(FTy, F1->getLinkage(), + F1->getName()+"_"+F2->getName(), &M); + + DEBUG(errs () << "Copying argument names\n"); + // Copy argument names from original functions + copyArgumentNames(II1, II2, F); + // Copy argument attributes from original functions + copyAttrList(II1, II2, F); + + return F; +} + +/* Inline first node function, updating required mappings * + * - F1: first node function * + * - M: module containing the node function * + * - Ffused: fused node function * + * - VMap: maps values used in the body of F1 to those that mst be used in * + the body of the fused function instead * + * OutVs: This maps the output struct field index to the stored value */ +void FuseHPVMTensorNodes::inlineFirstNodeFunction(Module &M, Function *F1, + Function *Ffused, + ValueMap<Value*, Value*> &VMap, + std::vector<Value*> &OutVs) { + + ReturnInst *RI = cast<ReturnInst>(Ffused->getEntryBlock().getTerminator()); + + inst_iterator f1_i = inst_begin(F1); + // First, we copy the HPVM intrinsics of F1 into Ffused, applying the mapping + for (inst_iterator f1_e = inst_end(F1); f1_i != f1_e; ++f1_i) { + Instruction *I = &(*f1_i); + if (!(BuildDFG::isViscIntrinsic(I))) { + // We are done with the node computation + break; + } + + IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); + assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") + && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + + std::vector<Value*> Args; + for(unsigned i = 0; i < II->getNumArgOperands(); i++) { + Value *V = II->getArgOperand(i); + assert((VMap.find(V) != VMap.end()) && + "Attempted to use value without existing mapping in VMap"); + Args.push_back(VMap[V]); + } + Function *F = Intrinsic::getDeclaration(&M, II->getIntrinsicID()); + CallInst* CI = + CallInst::Create(F, Args, + F->getReturnType()->isVoidTy()? "" : "s_"+II->getName(), RI); + // Update the map with the newly created value + VMap[II] = CI; + } + + // We continue with gathering information about the return values + for (inst_iterator f1_e = inst_end(F1); f1_i != f1_e; ++f1_i) { + Instruction *I = &(*f1_i); + InsertValueInst* IV = dyn_cast<InsertValueInst>(I); + if (!IV) { + // End of insertvalue instructions. This should be a return statement + assert((dyn_cast<ReturnInst>(I)) && "Unexpected Instruction\n"); + break; // Done processing this function + } + OutVs.push_back(IV->getOperand(1)); + } + return; +} + +/* Inline second node function, updating required mappings * + * - F2: second node function * + * - M: module containing the node function * + * - Ffused: fused node function * + * - VMap: maps values used in the body of F2 to those that mst be used in * + the body of the fused function instead */ +void FuseHPVMTensorNodes::inlineSecondNodeFunction(Module &M, Function *F2, + Function *Ffused, ValueMap<Value*, Value*> &VMap) { + + ReturnInst *RI = cast<ReturnInst>(Ffused->getEntryBlock().getTerminator()); + + // Copy the body of F2 into Ffused, applying the mapping + inst_iterator f2_i = inst_begin(F2); + for (inst_iterator f2_e = inst_end(F2); f2_i != f2_e; ++f2_i) { + Instruction *I = &(*f2_i); + if ((BuildDFG::isViscIntrinsic(I))) { + IntrinsicInst* II = dyn_cast<IntrinsicInst>(I); + assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") + && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n"); + + std::vector<Value*> Args; + for(unsigned i = 0; i < II->getNumArgOperands(); i++) { + Value *V = II->getArgOperand(i); + assert((VMap.find(V) != VMap.end()) && + "Attempted to use value without existing mapping in VMap"); + Args.push_back(VMap[V]); + } + Function *F = Intrinsic::getDeclaration(&M, II->getIntrinsicID()); + CallInst* CI = + CallInst::Create(F, Args, + F->getReturnType()->isVoidTy()? "" : II->getName(), + RI); + // Update the map with the newly created value + VMap[II] = CI; + } else if (InsertValueInst* IV = dyn_cast<InsertValueInst>(I)) { + Value *AggOp = IV->getAggregateOperand(); + Value *InsOp = IV->getInsertedValueOperand(); + assert(((VMap.find(AggOp) != VMap.end()) || + (isa<Constant>(AggOp)) ) && + "Attempted to use value without existing mapping in VMap"); + assert(((VMap.find(InsOp) != VMap.end()) || + (isa<Constant>(InsOp))) && + "Attempted to use value without existing mapping in VMap"); + InsertValueInst* IVI = InsertValueInst::Create( + (isa<Constant>(AggOp)) ? AggOp : VMap[AggOp], + (isa<Constant>(InsOp)) ? InsOp : VMap[InsOp], + IV->getIndices(), + IV->getName(), + RI); + // Update the map with the newly created value + VMap[IV] = IVI; + } else { + ReturnInst* RetI = dyn_cast<ReturnInst>(I); + assert(RetI && "Unexpected Instruction\n"); + Value *RetVal = RetI->getOperand(0); + ReturnInst *newRI = ReturnInst::Create(Ffused->getContext(), + VMap[RetVal]); + ReplaceInstWithInst(RI, newRI); + } + } + return; +} + +/* Create function of leaf node after fusion * + * - create type * + * - create empty function of the type * + * - inline body of first function (applying and updating appropriate * + * mappings) * + * - inline body of second function (applying and updating appropriate * + * mappings) */ +Function* FuseHPVMTensorNodes::createLeafDFNodeFunction(IntrinsicInst* II1, + IntrinsicInst* II2, + Module &M) { + DEBUG(errs () << "Creating function signature\n"); + + /* Create empty node function of the correct type */ + Function* Ffused = createEmptyDFNodeFunction(II1, II2, M); + + // Get return type, needed for building the assignmens to the return struct + StructType* FfusedRetTy = cast<StructType>(Ffused->getReturnType()); + + /* Mapping information required for using the correct values in the body of * + * the fused node function */ + + // This map maps the values used in the original function bodies with + // the ones that need to be used in the fused function body. + ValueMap<Value*, Value*> FusedValueMap; + + // Intemediate information saved for return values of first node function + // This maps the output port to the value returned through the outgoing edge + std::vector<Value*> OutValues; + + DEBUG(errs () << "Creating function body\n"); + + // Add a basic block to the new, empty function + BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", Ffused); + ReturnInst::Create(M.getContext(), UndefValue::get(FfusedRetTy), BB); + + // Get the node functions + Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); + Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); + + // Initially, update FusedValueMap: it is populated with the arguments of F1 + Function::arg_iterator fused_arg_it = Ffused->arg_begin(); + // Argument names of the first node are simply copied + for(auto& arg: F1->getArgumentList()) { + FusedValueMap[&arg] = &*fused_arg_it; + ++fused_arg_it; + } + + // Invoke function that inlines F1 into Ffused, using and updating mappings + inlineFirstNodeFunction(M, F1, Ffused, FusedValueMap, OutValues); + + // Compute mapping between inputs of F2 and outputs of F1 + std::vector<IntrinsicInst *> IncomingEdgeList; + getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); + std::vector<unsigned> PortMap(IncomingEdgeList.size(), 0); + for (IntrinsicInst * ii : IncomingEdgeList) { + unsigned srcPort = getNumericValue(ii->getOperand(3)); + unsigned dstPort = getNumericValue(ii->getOperand(4)); + PortMap[dstPort] = srcPort; + } + + // FusedValueMap is now populated with the arguments of F2 as well + for(auto& arg: F2->getArgumentList()) { + DEBUG(errs() << arg << "\n"); + unsigned inport = arg.getArgNo(); + if (isIncomingEdgeArgument(inport, IncomingEdgeList)) { + // Get the mappings of the return values of F1 if incoming edge argument + Value *V = OutValues[PortMap[inport]]; + FusedValueMap[&arg] = (isa<Constant>(V)) ? V: FusedValueMap[V]; + } + else { + // Get new argument otherwise + FusedValueMap[&arg] = &*fused_arg_it; + ++fused_arg_it; + } + } + + // Invoke function that inlines F2 into Ffused, using and updating mappings + inlineSecondNodeFunction(M, F2, Ffused, FusedValueMap); + + // Done with fused node function + return Ffused; +} + +/* Updates parent of fused nodes to use the new node intrinsic */ +void FuseHPVMTensorNodes::updateParentNodeFunction(IntrinsicInst* II1, + IntrinsicInst* II2, + IntrinsicInst* IInew) { + + // Compute the required shifting of positions for edges/binds to the second + // fusion node. No shifting is required for the first fusion node. + Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); + Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); + std::vector<unsigned> ShiftMap(F2->getFunctionType()->getNumParams(), 0); + unsigned shiftCount = F1->getFunctionType()->getNumParams(); + + // Find all incoming edges. + std::vector<IntrinsicInst *> IncomingEdgeList; + getIncomingEdgeIntrinsicList(II2, IncomingEdgeList); + // Their source must be the first fusion node, otherwise they would not have + // been fusion candidates + for (IntrinsicInst *ii : IncomingEdgeList) { + assert((ii->getOperand(0) == II1) && "Unexpected source operand\n"); + } + + // Compute shift map for n2: maps position in F2 arg list to Ffused arg list + for(auto& arg: F2->getArgumentList()) { + DEBUG(errs() << arg << "\n"); + unsigned inport = arg.getArgNo(); + if (isIncomingEdgeArgument(inport, IncomingEdgeList)) + continue; + + ShiftMap[inport] = shiftCount; + shiftCount++; + } + + std::vector<IntrinsicInst*> IItoRemove; + + // First, iterate over uses of the first node's createNode intrinsic + for (Value::user_iterator i = II1->user_begin(), ie = II1->user_end(); + i != ie; ++i) { + Instruction *VI = dyn_cast<Instruction>(*i); + IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI); + assert(II && "Use of a node handle outside of a visc intrinsic\n"); + + switch(II->getIntrinsicID()) { + case Intrinsic::visc_createEdge: + { + if (isOutgoingEdgeIntrinsic(II,II1)) { + assert(isIncomingEdgeIntrinsic(II,II2) && + "Outgoing edge of node 1 should only go to node 2\n"); + IItoRemove.push_back(II); + } + } + break; + case Intrinsic::visc_bind_input: + { + } + break; + case Intrinsic::visc_bind_output: + { + assert(false && + "Source node of node fusion not expected in bind.out\n"); + } + break; + default: + llvm_unreachable("Unknown use of HPVM createNode handle\n"); + break; + } + } + + // Delete gathered instructions - they are the edges between n1-n2 + for (std::vector<IntrinsicInst *>::iterator ib = IItoRemove.begin(), + ie = IItoRemove.end(); ib != ie; ++ib) { + DEBUG(errs() << "Erasing: " << **ib << "\n"); + (*ib)->eraseFromParent(); + } + II1->replaceAllUsesWith(IInew); + II1->eraseFromParent(); + + IItoRemove.clear(); + + // Then, iterate over uses of the second node's createNode intrinsic + for (Value::user_iterator i = II2->user_begin(), ie = II2->user_end(); + i != ie; ++i) { + Instruction *VI = dyn_cast<Instruction>(*i); + IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI); + assert(II && "Use of a node handle outside of a visc intrinsic\n"); + + switch(II->getIntrinsicID()) { + case Intrinsic::visc_createEdge: + { + assert(isOutgoingEdgeIntrinsic(II,II2) && + "Node 2 is expected to have only outgoing edges at this point\n"); + } + break; + case Intrinsic::visc_bind_input: + { + /* The index must be updated to the matching argument position of * + * the fused functionm using ShiftMap */ + unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue(); + IntrinsicInst *newII = + createIdenticalBindWithDifferentPort(II, + ShiftMap[dstPos], + false); + newII->insertBefore(II); + IItoRemove.push_back(II); + } + break; + case Intrinsic::visc_bind_output: + { + assert(false && + "Source node of node fusion not expected in bind.out\n"); + } + break; + default: + llvm_unreachable("Unknown use of HPVM createNode handle\n"); + break; + } + } + + // Delete gathered instructions - they are the old bindings of n2 + for (std::vector<IntrinsicInst *>::iterator ib = IItoRemove.begin(), + ie = IItoRemove.end(); ib != ie; ++ib) { + DEBUG(errs() << "Erasing: " << **ib << "\n"); + (*ib)->eraseFromParent(); + } + II2->replaceAllUsesWith(IInew); + II2->eraseFromParent(); + + return; +} + +/* Performs all operations required at the IR level for fusion of HPVM tensor * + * nodes with intrinsic instructions II1 and II2 * + * - Creates fused node function * + * - Creates createNode intrinsic for it and returns it * + * - Updates parent function: * + * - - adds new intrinsic * + * - - edges and binds consistently use the new intrinsic * + * - Removes old functions */ +IntrinsicInst* FuseHPVMTensorNodes::FuseHPVMTensorNodesStep(IntrinsicInst* II1, + IntrinsicInst* II2, + Module &M) { + // Get the node functions + Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts()); + Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts()); + + // Create fused node function + Function *Ffused = createLeafDFNodeFunction(II1, II2, M); + addHint(Ffused, getPreferredTarget(F1)); + + // FIX PARENT DFNode'S FUNCTION + + // Generate createNode Intrinsic for fused node and insert it + Function* CreateNodeF = Intrinsic::getDeclaration(&M, + Intrinsic::visc_createNode); + Constant* Fp = ConstantExpr::getPointerCast(Ffused, + Type::getInt8PtrTy(M.getContext())); + CallInst *CI = CallInst::Create(CreateNodeF, + ArrayRef<Value*>(Fp), + Ffused->getName()+".node"); + IntrinsicInst* CreateNodeII = cast<IntrinsicInst>(CI); + CreateNodeII->insertBefore(II1); + + // By the assumptions about the fusion pattern structure, all edges that have + // II1 as source will have II2 as destination and vice versa. + // We can simply delete them. + + // All createEdge intrinsics with destination argument = II1 need to use + // CreateNodeII instead. + // Similarly with bind.in + + // All createEdge intrinsics with source argument = II1 need to use + // CreateNodeII instead + // Similarly with bind.out + + // By the assumptions about the fusion pattern structure, the first node + // cannot be the argument of a bind.out + // The second node can be the argument of a bind.in. + // For the bind.in, we need to adjust the destination port. + updateParentNodeFunction(II1, II2, CreateNodeII); + + // Remove old node functions + removeHint(F1, getPreferredTarget(F1)); + removeHint(F2, getPreferredTarget(F2)); + F1->replaceAllUsesWith(UndefValue::get(F1->getType())); + F1->eraseFromParent(); + F2->replaceAllUsesWith(UndefValue::get(F2->getType())); + F2->eraseFromParent(); + + return CreateNodeII; +} + +/* Fuse node sequence described by creaetNode intrinsics in IIs. * + * Contents of IIs are cleared. */ +void FuseHPVMTensorNodes::FuseHPVMTensorNodeSequence( + std::vector<IntrinsicInst*> &IIs, Module &M) { + for (IntrinsicInst *II : IIs) { + assert((II->getIntrinsicID() == Intrinsic::visc_createNode) && + "Expected createNode intrinsic in fuse intrinsic sequence\n"); + } + + if (IIs.size() < 2) { + errs() << "Warning: Attempted to fuse fewer than 2 nodes\n"; + return; + } + + for (unsigned i = 0; i + 1 < IIs.size(); i++) { + IntrinsicInst *II1 = IIs[i]; + IntrinsicInst *II2 = IIs[i+1]; + IIs[i+1] = FuseHPVMTensorNodesStep(II1, II2, M); + } + IIs.clear(); + return; +} + +/* Run method for FuseHPVMTensorNodes class, simply invokes fusion of all the * + * sequenses in member variable FTs. */ +void FuseHPVMTensorNodes::run(Module &M, FusionTargets &FTs) { + for (unsigned i = 0; i < FTs.size(); i++) { + FuseHPVMTensorNodeSequence(FTs[i], M); + } + return; +} + +// Print Fusion Targets. The argument vector contains createNode intrinsics +// of nodes to be fused). +void FuseHPVMTensorNodes::printFusionTargets(FusionTargets &FTs) { + errs() << "Print Fusion Targets\n"; + errs() << "Found " << FTs.size() << " targets\n"; + for (FuseHPVMTensorNodes::FusionTargets::iterator ii = FTs.begin(), + ie = FTs.end(); ii != ie ; ++ii) { + errs() << "Target:\n"; + std::vector<IntrinsicInst*> IIv = *ii; + for (std::vector< IntrinsicInst*>::iterator pi = IIv.begin(), + pe = IIv.end(); pi != pe; ++pi) { + errs() << "\t" << *((*pi)->getOperand(0)) << "\n"; + } + } + return; +} + +void FindFusionTargetsTraversal::codeGen(DFInternalNode *N) { + DEBUG(errs() << "Skipping Internal Node: " + << N->getFuncPointer()->getName() << "\n"); + return; +} + +void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) { + DEBUG(errs() << "Inside leaf node: " + << N->getFuncPointer()->getName() << "\n"); + + // Skip fusion check if it is a dummy node + if(N->isDummyNode()) { + DEBUG(errs() << "Skipping dummy node\n"); + return; + } + +// if(N->getTargetHint() != visc::PROMISE_TARGET) { + if(!preferredTargetIncludes(N, visc::PROMISE_TARGET)) { + // Only fuse if we plan to target PROMISE + // The CUDNN backend would be able to generate calls for the fused node, + // but not the other way around + DEBUG(errs() << "No PROMISE hint. Skipping node: " + << N->getFuncPointer()->getName() << "\n"); + return; + } + + // Make sure that this is a valid HPVM Tensor Node + // Find first instruction, and check that it is an HPVM tensor intrinsic + IntrinsicInst *II = isValidHPVMTensorNode(N); + + std::vector<IntrinsicInst*> CurrentNodeSequence; + + switch(II->getIntrinsicID()) { + case Intrinsic::visc_tensor_convolution: + { // Found beginning of pattern conv-bias-relu-pooling. Look for the rest + CurrentNodeSequence.push_back(N->getInstruction()); + + // Look for bias + DFNode *SN = findNextNodeInSequence(N); + if (!SN) { + return; // Did not find a node sequence starting at N. Simpy return. + } + IntrinsicInst *SII = isValidHPVMTensorNode(SN); + if (SII->getIntrinsicID() != Intrinsic::visc_tensor_add) { + // Successor is not the bias operation, thus does not fit the pattern. + return; + } + // Otherwise, push this node to the current sequence + CurrentNodeSequence.push_back(SN->getInstruction()); + + // Continue with next node, looking for relu + SN = findNextNodeInSequence(SN); + if (!SN) { + return; // Did not find a node sequence starting at N. Simpy return. + } + SII = isValidHPVMTensorNode(SN); + if (SII->getIntrinsicID() != Intrinsic::visc_tensor_relu) { + // Successor is not the relu operation, thus does not fit the pattern. + return; + } + // Otherwise, push this node to the current sequence + CurrentNodeSequence.push_back(SN->getInstruction()); + + // Continue with next node, looking for pooling + SN = findNextNodeInSequence(SN); + if (!SN) { + return; // Did not find a node sequence starting at N. Simpy return. + } + SII = isValidHPVMTensorNode(SN); + if ((SII->getIntrinsicID() != Intrinsic::visc_tensor_pool_max) && + (SII->getIntrinsicID() != Intrinsic::visc_tensor_pool_min) && + (SII->getIntrinsicID() != Intrinsic::visc_tensor_pool_average)) { + // Successor is not a pool operation, thus does not fit the pattern. + return; + } + // Otherwise, push this node to the current sequence + CurrentNodeSequence.push_back(SN->getInstruction()); + } + break; + case Intrinsic::visc_tensor_mul: + { // Found beginning of pattern gemm-bias-relu. Look for the rest + CurrentNodeSequence.push_back(N->getInstruction()); + // Look for bias + DFNode *SN = findNextNodeInSequence(N); + if (!SN) { + return; // Did not find a node sequence starting at N. Simpy return. + } + IntrinsicInst *SII = isValidHPVMTensorNode(SN); + if (SII->getIntrinsicID() != Intrinsic::visc_tensor_add) { + // Successor is not the bias operation, thus does not fit the pattern. + return; + } + // Otherwise, push this node to the current sequence + CurrentNodeSequence.push_back(SN->getInstruction()); + // This is a possible fuse target, gemm-add. + + // If the next operation is a relu, we fuse that as well. + // Continue with next node, looking for relu + SN = findNextNodeInSequence(SN); + if (SN) { + SII = isValidHPVMTensorNode(SN); + if (SII->getIntrinsicID() == Intrinsic::visc_tensor_relu) { + // We found a relu operation in sequence. Push in vector as well. + CurrentNodeSequence.push_back(SN->getInstruction()); + } + } + } + break; + default: + DEBUG(errs() << "No pattern begins at this node\n"); + break; + } + + if (CurrentNodeSequence.size() != 0) { + // A sequence was found. Store the node sequence in FTs. + FTs.push_back(CurrentNodeSequence); + } + + return; +} + +bool FuseHPVMTensorNodesWrapper::runOnModule(Module &M) { + errs() << "\nFUSE HPVM TENSOR NODES PASS\n"; + +// Get the BuildDFG Analysis Results: + // - Dataflow graph + BuildDFG &DFG = getAnalysis<BuildDFG>(); + + std::vector<DFInternalNode*> Roots = DFG.getRoots(); + // Visitor for Fuse Target Detection Graph Traversal + FindFusionTargetsTraversal *FTTVisitor = + new FindFusionTargetsTraversal(M, DFG); + + errs() << "Find targets\n"; + // Iterate over all the DFGs and produce code for each one of them + for (auto rootNode: Roots) { + // Initiate code generation for root DFNode + FTTVisitor->visit(rootNode); + } + + FuseHPVMTensorNodes::FusionTargets &FTs = FTTVisitor->getFusionTargets(); + + FuseHPVMTensorNodes Fuse; +// Fuse.printFusionTargets(FTs); + + Fuse.run(M, FTs); + +errs() << "------------------------------------------------------------\n" + << M << "\n\n"; + + delete FTTVisitor; + + return true; +} + +char FuseHPVMTensorNodesWrapper::ID = 0; +static RegisterPass<FuseHPVMTensorNodesWrapper> X("hpvm-fuse", + "Fuse HPVM Tensor Nodes Pass", + false /* does not modify the CFG */, + true /* transformation, not just analysis */); + +} // End of namespace + diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.exports b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.exports new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt b/llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt new file mode 100644 index 0000000000000000000000000000000000000000..55a6ee5150daad0143131980c7e107cb3b907091 --- /dev/null +++ b/llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt @@ -0,0 +1,21 @@ +;===- ./lib/Transforms/LocalMem/LLVMBuild.txt ------------------*- Conf -*--===; +; +; The LLVM Compiler Infrastructure +; +; This file is distributed under the University of Illinois Open Source +; License. See LICENSE.TXT for details. +; +;===------------------------------------------------------------------------===; +; +; This is an LLVMBuild description file for the components in this subdirectory. +; +; For more information on the LLVMBuild system, please see: +; +; http://llvm.org/docs/LLVMBuild.html +; +;===------------------------------------------------------------------------===; + +[component_0] +type = Library +name = FuseHPVMTensorNodes +parent = Transforms diff --git a/llvm/lib/Transforms/LLVMBuild.txt b/llvm/lib/Transforms/LLVMBuild.txt index 8b1de2bf81a6a6324668df36130b45712dca5f51..e6ae45fc16fdfe505e0045890a2226a0682c8b33 100644 --- a/llvm/lib/Transforms/LLVMBuild.txt +++ b/llvm/lib/Transforms/LLVMBuild.txt @@ -27,12 +27,15 @@ subdirectories = ObjCARC BuildDFG ClearDFG + DFG2LLVM_CUDNN DFG2LLVM_NVPTX DFG2LLVM_SPIR DFG2LLVM_X86 LocalMem + InPlaceDFG GenVISC MergeDFN + FuseHPVMTensorNodes [component_0] type = Group