diff --git a/hpvm/include/SupportHPVM/DFG2LLVM.h b/hpvm/include/SupportHPVM/DFG2LLVM.h index c1ade92e9a7201a5c3c80e9302b9bac57c750537..0703eda772088b87d7edc917babd1df84718d563 100644 --- a/hpvm/include/SupportHPVM/DFG2LLVM.h +++ b/hpvm/include/SupportHPVM/DFG2LLVM.h @@ -1,6 +1,3 @@ -#ifndef __DFG2LLVM_H__ -#define __DFG2LLVM_H__ - //===---- DFG2LLVM.h - Header file for "HPVM Dataflow Graph to Target" ----===// // // The LLVM Compiler Infrastructure @@ -15,6 +12,9 @@ // //===----------------------------------------------------------------------===// +#ifndef __DFG2LLVM_H__ +#define __DFG2LLVM_H__ + #include "BuildDFG/BuildDFG.h" #include "SupportHPVM/HPVMHint.h" #include "SupportHPVM/HPVMTimer.h" @@ -25,6 +25,8 @@ #include "llvm/IR/Module.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include <queue> using namespace llvm; using namespace builddfg; @@ -37,13 +39,19 @@ using namespace builddfg; } while (0) #define DECLARE(X) \ X = M.getOrInsertFunction( \ - #X, runtimeModule->getFunction(#X)->getFunctionType()); + getMangledName(#X), runtimeModule->getFunction(getMangledName(#X))->getFunctionType()); +#define DECLARE_EPOCHS(X, Y) \ + X = M.getOrInsertFunction( \ + getMangledName(#X), Y->getFunction(getMangledName(#X))->getFunctionType()); namespace dfg2llvm { // Helper Functions static inline ConstantInt *getTimerID(Module &, enum hpvm_TimerID); -bool hasAttribute(Function *, unsigned, Attribute::AttrKind); +inline bool hasAttribute(Function *, unsigned, Attribute::AttrKind); + +// void addArgument(Function*, Type*, const Twine& Name = ""); +inline Function *addArgument(Function *, Type *, const Twine &Name = ""); // DFG2LLVM abstract class implementation class DFG2LLVM : public ModulePass { @@ -65,6 +73,40 @@ public: } }; +// DFG2LLVM_CPU - The first implementation. +struct DFG2LLVM_CPU : public DFG2LLVM { + static char ID; // Pass identification, replacement for typeid + DFG2LLVM_CPU() : DFG2LLVM(ID) {} + +private: + // Member variables + + // Functions + +public: + bool runOnModule(Module &M); +}; + +// Utility function that invokes the HPVM CPU backend. +bool runDFG2LLVM_CPU(Module &M, BuildDFG &DFG); + +// DFG2LLVM_EPOCHS - The first implementation. +struct DFG2LLVM_EPOCHS : public DFG2LLVM { + static char ID; // Pass identification, replacement for typeid + DFG2LLVM_EPOCHS() : DFG2LLVM(ID) {} + +private: + // Member variables + + // Functions + +public: + bool runOnModule(Module &M); +}; + +// Utility function that invokes the HPVM EPOCHS backend. +bool runDFG2LLVM_EPOCHS(Module &M, BuildDFG &DFG); + // Abstract Visitor for Code generation traversal (tree traversal for now) class CodeGenTraversal : public DFNodeVisitor { @@ -87,22 +129,22 @@ protected: FunctionCallee llvm_hpvm_initializeTimerSet; FunctionCallee llvm_hpvm_switchToTimer; FunctionCallee llvm_hpvm_printTimerSet; + FunctionCallee llvm_hpvm_cpu_dstack_push; + FunctionCallee llvm_hpvm_cpu_dstack_pop; GlobalVariable *TimerSet; GlobalVariable *GraphIDAddr; Instruction *InitCall; Instruction *CleanupCall; // Functions - Value *getStringPointer(const Twine &S, Instruction *InsertBefore, - const Twine &Name = ""); - // void addArgument(Function*, Type*, const Twine& Name = ""); - Function *addArgument(Function *, Type *, const Twine &Name = ""); + inline Value *getStringPointer(const Twine &S, Instruction *InsertBefore, + const Twine &Name = ""); // void addIdxDimArgs(Function* F); - Function *addIdxDimArgs(Function *F); - std::vector<Value *> extractElements(Value *, std::vector<Type *>, - std::vector<std::string>, Instruction *); - Argument *getArgumentAt(Function *F, unsigned offset); - void initTimerAPI(); + inline Function *addIdxDimArgs(Function *F); + inline std::vector<Value *> extractElements(Value *, std::vector<Type *>, + std::vector<std::string>, Instruction *); + inline Argument *getArgumentAt(Function *F, unsigned offset); + inline void initTimerAPI(); // Pure Virtual Functions virtual void init() = 0; @@ -111,19 +153,39 @@ protected: virtual void codeGen(DFLeafNode *N) = 0; // Virtual Functions - virtual void initializeTimerSet(Instruction *); - virtual void switchToTimer(enum hpvm_TimerID, Instruction *); - virtual void printTimerSet(Instruction *); + virtual inline void initializeTimerSet(Instruction *); + virtual inline void switchToTimer(enum hpvm_TimerID, Instruction *); + virtual inline void printTimerSet(Instruction *); virtual ~CodeGenTraversal() {} + inline Value *addLoop(Instruction *I, Value *limit, + const Twine &indexName = ""); + inline void invokeChild(DFNode *C, Function *F, ValueToValueMapTy &VMap, + Instruction *InsertBefore, hpvm::Target Tag); + inline Value *getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU, + Instruction *InsertBefore); + inline Argument *getArgumentFromEnd(Function *F, unsigned offset); + + inline void copyChildIndexCalc(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap, + Instruction *InsertBefore); + + std::string getMangledName(std::string Name) { + //if (!Name.compare("llvm_hpvm_ocl_launch")) + //return "_Z20llvm_hpvm_ocl_launchPKcS0_N4hpvm6TargetEb"; + //if (!Name.compare("llvm_hpvm_ocl_executeNode")) + //return "_Z25llvm_hpvm_ocl_executeNodePvjPKmS1_"; + //else + return Name; + } + public: // Constructor CodeGenTraversal(Module &_M, BuildDFG &_DFG) : M(_M), DFG(_DFG) {} - static bool checkPreferredTarget(DFNode *N, hpvm::Target T); - static bool preferredTargetIncludes(DFNode *N, hpvm::Target T); - hpvm::Target getPreferredTarget(DFNode *N); + inline static bool checkPreferredTarget(DFNode *N, hpvm::Target T); + inline static bool preferredTargetIncludes(DFNode *N, hpvm::Target T); + inline hpvm::Target getPreferredTarget(DFNode *N); virtual void visit(DFInternalNode *N) { // If code has already been generated for this internal node, skip the @@ -160,6 +222,313 @@ public: // -------------- CodeGenTraversal Implementation ----------------- +/* Traverse the function argument list in reverse order to get argument at a + * distance offset fromt he end of argument list of function F + */ +Argument *CodeGenTraversal::getArgumentFromEnd(Function *F, unsigned offset) { + assert((F->getFunctionType()->getNumParams() >= offset && offset > 0) && + "Invalid offset to access arguments!"); + Function::arg_iterator e = F->arg_end(); + // Last element of argument iterator is dummy. Skip it. + e--; + Argument *arg; + for (; offset != 0; e--) { + offset--; + arg = &*e; + } + return arg; +} +Value *CodeGenTraversal::getInValueAt(DFNode *Child, unsigned i, + Function *ParentF_CPU, + Instruction *InsertBefore) { + // TODO: Assumption is that each input port of a node has just one + // incoming edge. May change later on. + + // Find the incoming edge at the requested input port + DEBUG(errs() << "Finding incoming edge " << i << " for " + << Child->getFuncPointer()->getName() << "\n"); + DFEdge *E = Child->getInDFEdgeAt(i); + assert(E && "No incoming edge or binding for input element!"); + // Find the Source DFNode associated with the incoming edge + DFNode *SrcDF = E->getSourceDF(); + + // If Source DFNode is a dummyNode, edge is from parent. Get the + // argument from argument list of this internal node + Value *inputVal; + if (SrcDF->isEntryNode()) { + inputVal = getArgumentAt(ParentF_CPU, E->getSourcePosition()); + DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n"); + } else { + // edge is from a sibling + // Check - code should already be generated for this source dfnode + assert(OutputMap.count(SrcDF) && + "Source node call not found. Dependency violation!"); + + // Find CallInst associated with the Source DFNode using OutputMap + Value *CI = OutputMap[SrcDF]; + + // Extract element at source position from this call instruction + std::vector<unsigned> IndexList; + IndexList.push_back(E->getSourcePosition()); + DEBUG(errs() << "Going to generate ExtarctVal inst from " << *CI << "\n"); + ExtractValueInst *EI = + ExtractValueInst::Create(CI, IndexList, "", InsertBefore); + inputVal = EI; + } + return inputVal; +} + +// Copy over any index limit calculations which may be used +// to specify dynamic nodes instances. +void CodeGenTraversal::copyChildIndexCalc(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap, + Instruction *InsertBefore) { + + for(unsigned j =0; j < C->getNumOfDim(); j++){ + std::vector<Instruction*> toClone; + std::queue<Value*> workList; + + workList.push(C->getDimLimits()[j]); + + + + while (workList.size()){ + Value* entry = workList.front(); + workList.pop(); + + // Arguments are already available in + // the cloned function. + if(isa<Argument>(entry)) continue; + + // Constant values should be available in + // the operand; + if(isa<Constant>(entry)) continue; + + + // Only calls with scalar operands are currrently allowed + if(CallInst* CI = dyn_cast<CallInst>(entry)){ + + for(unsigned c = 0; c < CI->getNumArgOperands(); c++){ + Value* CArg = CI->getArgOperand(c); + assert(!CArg->getType()->isPointerTy() && "Index calculation chain of instructions can not contain calls with pointer operands"); + } + } + + assert(!isa<LoadInst>(entry) && !isa<StoreInst>(entry) && "Only non-memory reading/writing operations legal in dimension limit calculation"); + + + Instruction* I = dyn_cast<Instruction>(entry); + + if(!I){ + DEBUG(errs()<<"Unknown value type: "<<*entry<<"\n"); + assert(false && "Non-argument or instruction type used in limit calc."); + } + + toClone.push_back(I); + + for(unsigned k =0; k < I->getNumOperands(); k++){ + workList.push(I->getOperand(k)); + + } + } + + + // Clone instructions in reverse order due to use-def chains + for(int i = toClone.size()-1; i >= 0; i--){ + Instruction* I = toClone[i]; + + // Already copied over value. + if(VMap.find(&*I) != VMap.end()) continue; + + + Instruction* NewInst = I->clone(); + + // Update the cloned instructions operands to use + // the VMapped Values. + for(unsigned o = 0; o < NewInst->getNumOperands(); o++){ + + if(isa<Constant>(NewInst->getOperand(o))) continue; + + assert(VMap.find(I->getOperand(o)) != VMap.end() && + "Copied Value use-def chain not correctly copied!"); + + NewInst->setOperand(o,VMap[I->getOperand(o)]); + } + + NewInst->insertBefore(InsertBefore); + + // Add instruction to VMap; + VMap[&*I] = NewInst; + + + } + } + + DEBUG(errs()<<"F_CPU after cloning in index values: "<<*F_CPU<<"\n"); +} + + +void CodeGenTraversal::invokeChild(DFNode *C, Function *F_CPU, + ValueToValueMapTy &VMap, Instruction *IB, + hpvm::Target Tag) { + Function *CF = C->getFuncPointer(); + + // Function* CF_CPU = C->getGenFunc(); + Function *CF_CPU = C->getGenFuncForTarget(Tag); + assert(CF_CPU != NULL && + "Found leaf node for which code generation has not happened yet!\n"); + assert(C->hasCPUGenFuncForTarget(Tag) && + "The generated function to be called from cpu backend is not an cpu " + "function\n"); + DEBUG(errs() << "Invoking child node" << CF_CPU->getName() << "\n"); + + std::vector<Value *> Args; + // Create argument list to pass to call instruction + // First find the correct values using the edges + // The remaing six values are inserted as constants for now. + for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) { + Args.push_back(getInValueAt(C, i, F_CPU, IB)); + } + + Value *I64Zero = ConstantInt::get(Type::getInt64Ty(F_CPU->getContext()), 0); + for (unsigned j = 0; j < 6; j++) + Args.push_back(I64Zero); + + DEBUG(errs() << "Gen Function type: " << *CF_CPU->getType() << "\n"); + DEBUG(errs() << "Node Function type: " << *CF->getType() << "\n"); + DEBUG(errs() << "Arguments: "; + for (const Value *Arg : Args) + errs() << *(Arg->getType()) << ", "; + errs() << "\n"); + + // Copying over index calculation. + copyChildIndexCalc(C, F_CPU, VMap, IB); + + // Call the F_CPU function associated with this node + CallInst *CI = + CallInst::Create(CF_CPU, Args, CF_CPU->getName() + "_output", IB); + DEBUG(errs() << *CI << "\n"); + OutputMap[C] = CI; + + // Find num of dimensions this node is replicated in. + // Based on number of dimensions, insert loop instructions + std::string varNames[3] = {"x", "y", "z"}; + unsigned numArgs = CI->getNumArgOperands(); + for (unsigned j = 0; j < C->getNumOfDim(); j++) { + Value *indexLimit = NULL; + // Limit can either be a constant or an arguement of the internal node. + // In case of constant we can use that constant value directly in the + // new F_CPU function. In case of an argument, we need to get the mapped + // value using VMap + if (ConstantInt *ConstDimLimit = dyn_cast<ConstantInt>(C->getDimLimits()[j])) { + indexLimit = C->getDimLimits()[j]; + DEBUG(errs() << "In Constant case:\n" + << " indexLimit type = " << *indexLimit->getType() << "\n"); + if (ConstDimLimit->getZExtValue() == 1) { + DEBUG(errs() << "DimLimit is 1, no need for loop!\n"); + continue; + } + } else if (isa<Constant>(C->getDimLimits()[j])) { + indexLimit = C->getDimLimits()[j]; + DEBUG(errs() << "In Constant case:\n" + << " indexLimit type = " << *indexLimit->getType() << "\n"); + } else { + indexLimit = VMap[C->getDimLimits()[j]]; + DEBUG(errs() << "In VMap case:" + << " indexLimit type = " << *indexLimit->getType() << "\n"); + } + assert(indexLimit && "Invalid dimension limit!"); + // Insert loop + Value *indexVar = addLoop(CI, indexLimit, varNames[j]); + DEBUG(errs() << "indexVar type = " << *indexVar->getType() << "\n"); + // Insert index variable and limit arguments + CI->setArgOperand(numArgs - 6 + j, indexVar); + CI->setArgOperand(numArgs - 3 + j, indexLimit); + } + + if (Tag == hpvm::CPU_TARGET) { + // Insert call to runtime to push the dim limits and instanceID on the depth + // stack + Value *args[] = { + ConstantInt::get(Type::getInt32Ty(CI->getContext()), + C->getNumOfDim()), // numDim + CI->getArgOperand(numArgs - 3 + 0), // limitX + CI->getArgOperand(numArgs - 6 + 0), // iX + CI->getArgOperand(numArgs - 3 + 1), // limitY + CI->getArgOperand(numArgs - 6 + 1), // iY + CI->getArgOperand(numArgs - 3 + 2), // limitZ + CI->getArgOperand(numArgs - 6 + 2) // iZ + }; + + CallInst *Push = CallInst::Create(llvm_hpvm_cpu_dstack_push, + ArrayRef<Value *>(args, 7), "", CI); + DEBUG(errs() << "Push on stack: " << *Push << "\n"); + // Insert call to runtime to pop the dim limits and instanceID from the + // depth stack + BasicBlock::iterator i(CI); + ++i; + Instruction *NextI = &*i; + // Next Instruction should also belong to the same basic block as the basic + // block will have a terminator instruction + assert(NextI->getParent() == CI->getParent() && + "Next Instruction should also belong to the same basic block!"); + + CallInst *Pop = CallInst::Create(llvm_hpvm_cpu_dstack_pop, None, "", NextI); + DEBUG(errs() << "Pop from stack: " << *Pop << "\n"); + DEBUG(errs() << *CI->getParent()->getParent()); + } +} +/* Add Loop around the instruction I + * Algorithm: + * (1) Split the basic block of instruction I into three parts, where the + * middleblock/body would contain instruction I. + * (2) Add phi node before instruction I. Add incoming edge to phi node from + * predecessor + * (3) Add increment and compare instruction to index variable + * (4) Replace terminator/branch instruction of body with conditional branch + * which loops over bidy if true and goes to end if false + * (5) Update phi node of body + */ +Value *CodeGenTraversal::addLoop(Instruction *I, Value *limit, + const Twine &indexName) { + BasicBlock *Entry = I->getParent(); + BasicBlock *ForBody = Entry->splitBasicBlock(I, "for.body"); + + BasicBlock::iterator i(I); + ++i; + Instruction *NextI = &*i; + // Next Instruction should also belong to the same basic block as the basic + // block will have a terminator instruction + assert(NextI->getParent() == ForBody && + "Next Instruction should also belong to the same basic block!"); + BasicBlock *ForEnd = ForBody->splitBasicBlock(NextI, "for.end"); + + // Add Phi Node for index variable + PHINode *IndexPhi = PHINode::Create(Type::getInt64Ty(I->getContext()), 2, + "index." + indexName, I); + + // Add incoming edge to phi + IndexPhi->addIncoming(ConstantInt::get(Type::getInt64Ty(I->getContext()), 0), + Entry); + // Increment index variable + BinaryOperator *IndexInc = BinaryOperator::Create( + Instruction::Add, IndexPhi, + ConstantInt::get(Type::getInt64Ty(I->getContext()), 1), + "index." + indexName + ".inc", ForBody->getTerminator()); + + // Compare index variable with limit + CmpInst *Cond = + CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, IndexInc, limit, + "cond." + indexName, ForBody->getTerminator()); + + // Replace the terminator instruction of for.body with new conditional + // branch which loops over body if true and branches to for.end otherwise + BranchInst *BI = BranchInst::Create(ForBody, ForEnd, Cond); + ReplaceInstWithInst(ForBody->getTerminator(), BI); + + // Add incoming edge to phi node in body + IndexPhi->addIncoming(IndexInc, ForBody); + return IndexPhi; +} bool CodeGenTraversal::checkPreferredTarget(DFNode *N, hpvm::Target T) { Function *F = N->getFuncPointer(); Module *M = F->getParent(); @@ -168,7 +537,7 @@ bool CodeGenTraversal::checkPreferredTarget(DFNode *N, hpvm::Target T) { case hpvm::GPU_TARGET: HintNode = M->getOrInsertNamedMetadata("hpvm_hint_gpu"); break; - case hpvm::CUDNN_TARGET: + case hpvm::CUDNN_TARGET: HintNode = M->getOrInsertNamedMetadata("hpvm_hint_cudnn"); break; case hpvm::CPU_TARGET: @@ -195,7 +564,6 @@ hpvm::Target CodeGenTraversal::getPreferredTarget(DFNode *N) { } bool CodeGenTraversal::preferredTargetIncludes(DFNode *N, hpvm::Target T) { - Function *F = N->getFuncPointer(); Module *M = F->getParent(); std::vector<NamedMDNode *> HintNode; @@ -230,6 +598,7 @@ bool CodeGenTraversal::preferredTargetIncludes(DFNode *N, hpvm::Target T) { return true; } } + return false; } @@ -249,7 +618,7 @@ Value *CodeGenTraversal::getStringPointer(const Twine &S, Instruction *IB, return SPtr; } -void renameNewArgument(Function *newF, const Twine &argName) { +inline void renameNewArgument(Function *newF, const Twine &argName) { // Get Last argument in Function Arg List and rename it to given name Argument *lastArg = &*(newF->arg_end() - 1); lastArg->setName(argName); @@ -257,8 +626,7 @@ void renameNewArgument(Function *newF, const Twine &argName) { // Creates a function with an additional argument of the specified type and // name. The previous function is not deleted. -Function *CodeGenTraversal::addArgument(Function *F, Type *Ty, - const Twine &name) { +inline Function *addArgument(Function *F, Type *Ty, const Twine &name) { Argument *new_arg = new Argument(Ty, name); // Create the argument type list with added argument types @@ -276,7 +644,7 @@ Function *CodeGenTraversal::addArgument(Function *F, Type *Ty, FunctionType *FTy = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg()); Function *newF = Function::Create(FTy, F->getLinkage(), - F->getName() + "_cloned", F->getParent()); + F->getName() + "_c", F->getParent()); renameNewArgument(newF, name); newF = hpvmUtils::cloneFunction(F, newF, false); @@ -291,7 +659,6 @@ Function *CodeGenTraversal::addArgument(Function *F, Type *Ty, // Return new function with additional index and limit arguments. // The original function is removed from the module and erased. Function *CodeGenTraversal::addIdxDimArgs(Function *F) { - DEBUG(errs() << "Adding dimension and limit arguments to Function: " << F->getName()); DEBUG(errs() << "Function Type: " << *F->getFunctionType() << "\n"); // Add Index and Dim arguments std::string names[] = {"idx_x", "idx_y", "idx_z", "dim_x", "dim_y", "dim_z"}; diff --git a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp index 10667ddeecc7f072222032e930d27fd1f75e7b2d..70a40ba07226e613300e3cb4ae073cfde9e8a584 100644 --- a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp +++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp @@ -12,10 +12,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Intrinsics.h" #define DEBUG_TYPE "DFG2LLVM_CPU" - #include "SupportHPVM/DFG2LLVM.h" - #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/InstIterator.h" @@ -28,6 +27,8 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/ValueMapper.h" +#include <queue> + #ifndef LLVM_BUILD_DIR #error LLVM_BUILD_DIR is not defined #endif @@ -44,21 +45,7 @@ using namespace dfg2llvm; static cl::opt<bool> HPVMTimer_CPU("hpvm-timers-cpu", cl::desc("Enable hpvm timers")); -namespace { - -// DFG2LLVM_CPU - The first implementation. -struct DFG2LLVM_CPU : public DFG2LLVM { - static char ID; // Pass identification, replacement for typeid - DFG2LLVM_CPU() : DFG2LLVM(ID) {} - -private: - // Member variables - - // Functions - -public: - bool runOnModule(Module &M); -}; +namespace dfg2llvm { // Visitor for Code generation traversal (tree traversal for now) class CGT_CPU : public CodeGenTraversal { @@ -83,21 +70,15 @@ private: FunctionCallee llvm_hpvm_createThread; FunctionCallee llvm_hpvm_bufferPush; FunctionCallee llvm_hpvm_bufferPop; - FunctionCallee llvm_hpvm_cpu_dstack_push; - FunctionCallee llvm_hpvm_cpu_dstack_pop; +// FunctionCallee llvm_hpvm_cpu_dstack_push; +// FunctionCallee llvm_hpvm_cpu_dstack_pop; FunctionCallee llvm_hpvm_cpu_getDimLimit; FunctionCallee llvm_hpvm_cpu_getDimInstance; // Functions std::vector<IntrinsicInst *> *getUseList(Value *LI); - Value *addLoop(Instruction *I, Value *limit, const Twine &indexName = ""); void addWhileLoop(Instruction *, Instruction *, Instruction *, Value *); Instruction *addWhileLoopCounter(BasicBlock *, BasicBlock *, BasicBlock *); - Argument *getArgumentFromEnd(Function *F, unsigned offset); - Value *getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU, - Instruction *InsertBefore); - void invokeChild_CPU(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap, - Instruction *InsertBefore); void invokeChild_PTX(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap, Instruction *InsertBefore); StructType *getArgumentListStructTy(DFNode *); @@ -129,14 +110,7 @@ public: void codeGenLaunchStreaming(DFInternalNode *Root); }; -bool DFG2LLVM_CPU::runOnModule(Module &M) { - DEBUG(errs() << "\nDFG2LLVM_CPU PASS\n"); - - // Get the BuildDFG Analysis Results: - // - Dataflow graph - // - Maps from i8* hansles to DFNode and DFEdge - BuildDFG &DFG = getAnalysis<BuildDFG>(); - +bool runDFG2LLVM_CPU (Module &M, BuildDFG&DFG) { // DFInternalNode *Root = DFG.getRoot(); std::vector<DFInternalNode *> Roots = DFG.getRoots(); // BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap(); @@ -161,11 +135,35 @@ bool DFG2LLVM_CPU::runOnModule(Module &M) { else CGTVisitor->codeGenLaunch(rootNode); } + + for (auto &F : M) { + for (Function::arg_iterator ai = F.arg_begin(), ae = F.arg_end(); ai != ae; ai++) { + Argument *Arg = &*ai; + if(Arg->hasAttribute(Attribute::In)) + Arg->removeAttr(Attribute::In); + if(Arg->hasAttribute(Attribute::Out)) + Arg->removeAttr(Attribute::Out); + if(Arg->hasAttribute(Attribute::InOut)) + Arg->removeAttr(Attribute::InOut); + + } + } delete CGTVisitor; return true; } +bool DFG2LLVM_CPU::runOnModule(Module &M) { + DEBUG(errs() << "\nDFG2LLVM_CPU PASS\n"); + + // Get the BuildDFG Analysis Results: + // - Dataflow graph + // - Maps from i8* hansles to DFNode and DFEdge + BuildDFG &DFG = getAnalysis<BuildDFG>(); + + return runDFG2LLVM_CPU(M, DFG); +} + // Initialize the HPVM runtime API. This makes it easier to insert these calls void CGT_CPU::initRuntimeAPI() { @@ -238,22 +236,6 @@ std::vector<IntrinsicInst *> *CGT_CPU::getUseList(Value *GraphID) { return UseList; } -/* Traverse the function argument list in reverse order to get argument at a - * distance offset fromt he end of argument list of function F - */ -Argument *CGT_CPU::getArgumentFromEnd(Function *F, unsigned offset) { - assert((F->getFunctionType()->getNumParams() >= offset && offset > 0) && - "Invalid offset to access arguments!"); - Function::arg_iterator e = F->arg_end(); - // Last element of argument iterator is dummy. Skip it. - e--; - Argument *arg; - for (; offset != 0; e--) { - offset--; - arg = &*e; - } - return arg; -} /* Add Loop around the instruction I * Algorithm: @@ -283,58 +265,31 @@ void CGT_CPU::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart, ReplaceInstWithInst(WhileBody->getTerminator(), UnconditionalBranch); } -/* Add Loop around the instruction I - * Algorithm: - * (1) Split the basic block of instruction I into three parts, where the - * middleblock/body would contain instruction I. - * (2) Add phi node before instruction I. Add incoming edge to phi node from - * predecessor - * (3) Add increment and compare instruction to index variable - * (4) Replace terminator/branch instruction of body with conditional branch - * which loops over bidy if true and goes to end if false - * (5) Update phi node of body - */ -Value *CGT_CPU::addLoop(Instruction *I, Value *limit, const Twine &indexName) { - BasicBlock *Entry = I->getParent(); - BasicBlock *ForBody = Entry->splitBasicBlock(I, "for.body"); - - BasicBlock::iterator i(I); - ++i; - Instruction *NextI = &*i; - // Next Instruction should also belong to the same basic block as the basic - // block will have a terminator instruction - assert(NextI->getParent() == ForBody && - "Next Instruction should also belong to the same basic block!"); - BasicBlock *ForEnd = ForBody->splitBasicBlock(NextI, "for.end"); - - // Add Phi Node for index variable - PHINode *IndexPhi = PHINode::Create(Type::getInt64Ty(I->getContext()), 2, - "index." + indexName, I); - - // Add incoming edge to phi - IndexPhi->addIncoming(ConstantInt::get(Type::getInt64Ty(I->getContext()), 0), - Entry); - // Increment index variable - BinaryOperator *IndexInc = BinaryOperator::Create( - Instruction::Add, IndexPhi, - ConstantInt::get(Type::getInt64Ty(I->getContext()), 1), - "index." + indexName + ".inc", ForBody->getTerminator()); - - // Compare index variable with limit - CmpInst *Cond = - CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_ULT, IndexInc, limit, - "cond." + indexName, ForBody->getTerminator()); - - // Replace the terminator instruction of for.body with new conditional - // branch which loops over body if true and branches to for.end otherwise - BranchInst *BI = BranchInst::Create(ForBody, ForEnd, Cond); - ReplaceInstWithInst(ForBody->getTerminator(), BI); - - // Add incoming edge to phi node in body - IndexPhi->addIncoming(IndexInc, ForBody); - return IndexPhi; +Instruction *CGT_CPU::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond, + BasicBlock *Body) { + Module *M = Entry->getParent()->getParent(); + Type *Int64Ty = Type::getInt64Ty(M->getContext()); + + // Insert a PHI instruction at the beginning of the condition block + Instruction *IB = Cond->getFirstNonPHI(); + PHINode *CounterPhi = PHINode::Create(Int64Ty, 2, "cnt", IB); + + ConstantInt *IConst = + ConstantInt::get(Type::getInt64Ty(M->getContext()), 1, true); + Instruction *CounterIncr = + BinaryOperator::CreateNSW(Instruction::BinaryOps::Add, CounterPhi, IConst, + "cnt_incr", Body->getTerminator()); + + // Set incoming values for Phi node + IConst = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0, true); + CounterPhi->addIncoming(IConst, Entry); + CounterPhi->addIncoming(CounterIncr, Body); + + // Return the pointer to the created PHI node in the corresponding argument + return CounterPhi; } + // Returns a packed struct type. The structtype is created by packing the input // types, output types and isLastInput buffer type. All the streaming // inputs/outputs are converted to i8*, since this is the type of buffer @@ -831,135 +786,7 @@ void CGT_CPU::codeGenLaunch(DFInternalNode *Root) { } } -Value *CGT_CPU::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU, - Instruction *InsertBefore) { - // TODO: Assumption is that each input port of a node has just one - // incoming edge. May change later on. - - // Find the incoming edge at the requested input port - DFEdge *E = Child->getInDFEdgeAt(i); - assert(E && "No incoming edge or binding for input element!"); - // Find the Source DFNode associated with the incoming edge - DFNode *SrcDF = E->getSourceDF(); - - // If Source DFNode is a dummyNode, edge is from parent. Get the - // argument from argument list of this internal node - Value *inputVal; - if (SrcDF->isEntryNode()) { - inputVal = getArgumentAt(ParentF_CPU, E->getSourcePosition()); - DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n"); - } else { - // edge is from a sibling - // Check - code should already be generated for this source dfnode - assert(OutputMap.count(SrcDF) && - "Source node call not found. Dependency violation!"); - - // Find CallInst associated with the Source DFNode using OutputMap - Value *CI = OutputMap[SrcDF]; - - // Extract element at source position from this call instruction - std::vector<unsigned> IndexList; - IndexList.push_back(E->getSourcePosition()); - DEBUG(errs() << "Going to generate ExtarctVal inst from " << *CI << "\n"); - ExtractValueInst *EI = - ExtractValueInst::Create(CI, IndexList, "", InsertBefore); - inputVal = EI; - } - return inputVal; -} - -void CGT_CPU::invokeChild_CPU(DFNode *C, Function *F_CPU, - ValueToValueMapTy &VMap, Instruction *IB) { - Function *CF = C->getFuncPointer(); - - // Function* CF_CPU = C->getGenFunc(); - Function *CF_CPU = C->getGenFuncForTarget(hpvm::CPU_TARGET); - assert(CF_CPU != NULL && - "Found leaf node for which code generation has not happened yet!\n"); - assert(C->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) && - "The generated function to be called from cpu backend is not an cpu " - "function\n"); - DEBUG(errs() << "Invoking child node" << CF_CPU->getName() << "\n"); - - std::vector<Value *> Args; - // Create argument list to pass to call instruction - // First find the correct values using the edges - // The remaing six values are inserted as constants for now. - for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) { - Args.push_back(getInValueAt(C, i, F_CPU, IB)); - } - - Value *I64Zero = ConstantInt::get(Type::getInt64Ty(F_CPU->getContext()), 0); - for (unsigned j = 0; j < 6; j++) - Args.push_back(I64Zero); - - DEBUG(errs() << "Gen Function type: " << *CF_CPU->getType() << "\n"); - DEBUG(errs() << "Node Function type: " << *CF->getType() << "\n"); - DEBUG(errs() << "Arguments: " << Args.size() << "\n"); - // Call the F_CPU function associated with this node - CallInst *CI = - CallInst::Create(CF_CPU, Args, CF_CPU->getName() + "_output", IB); - DEBUG(errs() << *CI << "\n"); - OutputMap[C] = CI; - - // Find num of dimensions this node is replicated in. - // Based on number of dimensions, insert loop instructions - std::string varNames[3] = {"x", "y", "z"}; - unsigned numArgs = CI->getNumArgOperands(); - for (unsigned j = 0; j < C->getNumOfDim(); j++) { - Value *indexLimit = NULL; - // Limit can either be a constant or an arguement of the internal node. - // In case of constant we can use that constant value directly in the - // new F_CPU function. In case of an argument, we need to get the mapped - // value using VMap - if (isa<Constant>(C->getDimLimits()[j])) { - indexLimit = C->getDimLimits()[j]; - DEBUG(errs() << "In Constant case:\n" - << " indexLimit type = " << *indexLimit->getType() << "\n"); - } else { - indexLimit = VMap[C->getDimLimits()[j]]; - DEBUG(errs() << "In VMap case:" - << " indexLimit type = " << *indexLimit->getType() << "\n"); - } - assert(indexLimit && "Invalid dimension limit!"); - // Insert loop - Value *indexVar = addLoop(CI, indexLimit, varNames[j]); - DEBUG(errs() << "indexVar type = " << *indexVar->getType() << "\n"); - // Insert index variable and limit arguments - CI->setArgOperand(numArgs - 6 + j, indexVar); - CI->setArgOperand(numArgs - 3 + j, indexLimit); - } - // Insert call to runtime to push the dim limits and instanceID on the depth - // stack - Value *args[] = { - ConstantInt::get(Type::getInt32Ty(CI->getContext()), - C->getNumOfDim()), // numDim - CI->getArgOperand(numArgs - 3 + 0), // limitX - CI->getArgOperand(numArgs - 6 + 0), // iX - CI->getArgOperand(numArgs - 3 + 1), // limitY - CI->getArgOperand(numArgs - 6 + 1), // iY - CI->getArgOperand(numArgs - 3 + 2), // limitZ - CI->getArgOperand(numArgs - 6 + 2) // iZ - }; - - CallInst *Push = CallInst::Create(llvm_hpvm_cpu_dstack_push, - ArrayRef<Value *>(args, 7), "", CI); - DEBUG(errs() << "Push on stack: " << *Push << "\n"); - // Insert call to runtime to pop the dim limits and instanceID from the depth - // stack - BasicBlock::iterator i(CI); - ++i; - Instruction *NextI = &*i; - // Next Instruction should also belong to the same basic block as the basic - // block will have a terminator instruction - assert(NextI->getParent() == CI->getParent() && - "Next Instruction should also belong to the same basic block!"); - - CallInst *Pop = CallInst::Create(llvm_hpvm_cpu_dstack_pop, None, "", NextI); - DEBUG(errs() << "Pop from stack: " << *Pop << "\n"); - DEBUG(errs() << *CI->getParent()->getParent()); -} /* This function takes a DFNode, and creates a filter function for it. By filter * function we mean a function which keeps on getting input from input buffers, @@ -1140,6 +967,7 @@ Function *CGT_CPU::createFunctionFilter(DFNode *C) { return CF_Pipeline; } + void CGT_CPU::codeGen(DFInternalNode *N) { // Check if N is root node and its graph is streaming. We do not do codeGen // for Root in such a case @@ -1241,7 +1069,7 @@ void CGT_CPU::codeGen(DFInternalNode *N) { continue; // Create calls to CPU function of child node - invokeChild_CPU(C, F_CPU, VMap, RI); + invokeChild(C, F_CPU, VMap, RI, hpvm::CPU_TARGET); } DEBUG(errs() << "*** Generating epilogue code for the function****\n"); @@ -1404,13 +1232,12 @@ void CGT_CPU::codeGen(DFLeafNode *N) { << " : skipping it\n"); switch (N->getTag()) { - case hpvm::GPU_TARGET: { + case hpvm::GPU_TARGET: // A leaf node should not have an cpu function for GPU // by design of DFG2LLVM_OpenCL backend assert(!(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET)) && "Leaf node not expected to have GPU GenFunc"); break; - } case hpvm::CUDNN_TARGET: { DEBUG(errs() << "CUDNN hint found. Store CUDNN function as CPU funtion.\n"); // Make sure there is a generated CPU function for cudnn @@ -1448,16 +1275,18 @@ void CGT_CPU::codeGen(DFLeafNode *N) { N->setTag(hpvm::CPU_TARGET); break; } - default: - { - break; - } + default: + break; } return; } - assert(N->getGenFuncForTarget(hpvm::CPU_TARGET) == NULL && + /* if(N->getGenFuncForTarget(hpvm::CPU_TARGET) != NULL) { + DEBUG(errs() << "Already generated CPU code for this node!\n"); + return; + }*/ + assert(N->getGenFuncForTarget(hpvm::CPU_TARGET) == NULL && "Error: Visiting a node for which code already generated\n"); std::vector<IntrinsicInst *> IItoRemove; @@ -1466,6 +1295,7 @@ void CGT_CPU::codeGen(DFLeafNode *N) { // Get the function associated woth the dataflow node Function *F = N->getFuncPointer(); + DEBUG(errs() << "Generating CPU code for function " << F->getName() << "\n"); // Clone the function, if we are seeing this function for the first time. Function *F_CPU; @@ -1691,7 +1521,11 @@ void CGT_CPU::codeGen(DFLeafNode *N) { break; } - } else { + } else if (BuildDFG::isHPVMIntrinsic(I)) { + IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); + if (II->getIntrinsicID() == Intrinsic::hpvm_nz_loop) { + IItoRemove.push_back(II); + } } } diff --git a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp index eda655e3196450ee94ab44a70d500a1188007a66..44d6eec7075b93df987935078604da42ff6639fd 100644 --- a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp +++ b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp @@ -13,6 +13,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Intrinsics.h" #define DEBUG_TYPE "genhpvm" #include "GenHPVM/GenHPVM.h" @@ -97,23 +98,50 @@ static void ReplaceCallWithIntrinsic(Instruction *I, Intrinsic::ID IntrinsicID, FunctionType *FTy = F->getFunctionType(); DEBUG(errs() << *F << "\n"); - // Create argument list - assert(CI->getNumArgOperands() == FTy->getNumParams() && - "Number of arguments of call do not match with Intrinsic"); - for (unsigned i = 0; i < CI->getNumArgOperands(); i++) { - Value *V = CI->getArgOperand(i); - // Either the type should match or both should be of pointer type - assert((V->getType() == FTy->getParamType(i) || - (V->getType()->isPointerTy() && - FTy->getParamType(i)->isPointerTy())) && - "Dummy function call argument does not match with Intrinsic " - "argument!"); - // If the types do not match, then both must be pointer type and pointer - // cast needs to be performed - if (V->getType() != FTy->getParamType(i)) { - V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI); - } + // Adding support for loop tripcount info + // hpvm_nz_loop might have 1 or two operands + if (IntrinsicID == Intrinsic::hpvm_nz_loop) { + assert((CI->getNumArgOperands() > 0 && + (CI->getNumArgOperands() == FTy->getNumParams() || + CI->getNumArgOperands() + 1 == FTy->getNumParams())) && + "Number of arguments of call do not match with Intrinsic"); + // We always should have 1 argument that points to the PHI Node + Value *V = CI->getArgOperand(0); + assert(V->getType() == FTy->getParamType(0) && + "isNonZeroLoop argument 0 does not match!"); args.push_back(V); + // We can optionally have a second argument which includes the trip count. + // If we don't have it, set to Zero. + // This trip count is only used by hpvm-hypermapper for DSE + if (CI->getNumArgOperands() == 2) { + Value *V2 = CI->getArgOperand(1); + assert(V2->getType() == FTy->getParamType(1) && + "isNonZeroLoop argument 1 does not match!"); + args.push_back(V2); + } else { + Value *Zero = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0); + args.push_back(Zero); + } + } else { + + // Create argument list + assert(CI->getNumArgOperands() == FTy->getNumParams() && + "Number of arguments of call do not match with Intrinsic"); + for (unsigned i = 0; i < CI->getNumArgOperands(); i++) { + Value *V = CI->getArgOperand(i); + // Either the type should match or both should be of pointer type + assert((V->getType() == FTy->getParamType(i) || + (V->getType()->isPointerTy() && + FTy->getParamType(i)->isPointerTy())) && + "Dummy function call argument does not match with Intrinsic " + "argument!"); + // If the types do not match, then both must be pointer type and pointer + // cast needs to be performed + if (V->getType() != FTy->getParamType(i)) { + V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI); + } + args.push_back(V); + } } } // Insert call instruction @@ -169,6 +197,8 @@ IS_HPVM_CALL(requestMemory) IS_HPVM_CALL(attributes) IS_HPVM_CALL(hint) +IS_HPVM_CALL(task) + // Tensor Operators IS_HPVM_CALL(tensor_mul) IS_HPVM_CALL(tensor_convolution) @@ -186,6 +216,8 @@ IS_HPVM_CALL(tensor_softmax) IS_HPVM_CALL(node_id) +IS_HPVM_CALL(isNonZeroLoop) + // Return the constant integer represented by value V static unsigned getNumericValue(Value *V) { assert( @@ -274,6 +306,26 @@ static void handleHPVMAttributes(Function *F, CallInst *CI) { << *F << "\n"); } + +void insertChildren(Function* F, std::vector<Function*>& functions){ + for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) { + Instruction *I = &*i; // Grab pointer to Instruction + + CallInst* CI = dyn_cast<CallInst>(I); + + if(!CI) continue; + + if (isHPVMCall_createNodeND(I)) { + Function* ChildF = cast<Function>(CI->getArgOperand(1)->stripPointerCasts()); + insertChildren(ChildF, functions); + } + } + if(std::find(functions.begin(),functions.end(), F) != functions.end()) return; + functions.push_back(F); + +} + + // Public Functions of GenHPVM pass bool GenHPVM::runOnModule(Module &M) { DEBUG(errs() << "\nGENHPVM PASS\n"); @@ -313,6 +365,7 @@ bool GenHPVM::runOnModule(Module &M) { // Insert init context in main DEBUG(errs() << "Locate __hpvm__init()\n"); Function *VI = M.getFunction("__hpvm__init"); + assert(VI != NULL && "__hpvm__init not found!"); assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once"); Instruction *I = cast<Instruction>(*VI->user_begin()); @@ -332,11 +385,59 @@ bool GenHPVM::runOnModule(Module &M) { std::vector<Instruction *> toBeErased; std::vector<Function *> functions; - for (auto &F : M) - functions.push_back(&F); + /* + for (auto &F : M){ + if(!F.isDeclaration()){ + functions.push_back(&F); + } + }*/ + + Function* LaunchF = M.getFunction("__hpvm__launch"); + for(auto* User: LaunchF->users()){ + + CallInst* CI = dyn_cast<CallInst>(User); + + Function* Host = CI->getParent()->getParent(); + + + Function* RootFn = dyn_cast<Function>(CI->getArgOperand(1)->stripPointerCasts()); + insertChildren(RootFn, functions); + + if(std::find(functions.begin(),functions.end(), Host) == functions.end()) + functions.push_back(Host); + + + } + + Function* InitF = M.getFunction("__hpvm__init"); + for(auto* User: InitF->users()){ + + CallInst* CI = dyn_cast<CallInst>(User); + + Function* Host = CI->getParent()->getParent(); + + if(std::find(functions.begin(),functions.end(), Host) == functions.end()) + functions.push_back(Host); + + } + Function* ClearF = M.getFunction("__hpvm__cleanup"); + for(auto* User: ClearF->users()){ + + CallInst* CI = dyn_cast<CallInst>(User); + + Function* Host = CI->getParent()->getParent(); + + if(std::find(functions.begin(),functions.end(), Host) == functions.end()) + functions.push_back(Host); + + } + + + // Iterate over all functions in the module - for (Function *f : functions) { + for (unsigned i = 0; i < functions.size(); ++i) { + Function *f = functions[i]; DEBUG(errs() << "Function: " << f->getName() << "\n"); // List with the required additions in the function's return type @@ -378,13 +479,21 @@ bool GenHPVM::runOnModule(Module &M) { if (isHPVMCall_requestMemory(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_requestMemory, &toBeErased); } + if (isHPVMCall_task(I)){ + assert(isa<ConstantInt>(CI->getArgOperand(0)) + && "Argument to hpvm_task must be a constant integer"); + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_task, &toBeErased); + } if (isHPVMCall_hint(I)) { - assert(isa<ConstantInt>(CI->getArgOperand(0)) && - "Argument to hint must be constant integer!"); - ConstantInt *hint = cast<ConstantInt>(CI->getArgOperand(0)); - DEBUG(errs() << "HINT INSTRUCTION: " << *I << "\n"); - hpvm::Target t = (hpvm::Target)hint->getZExtValue(); - addHint(CI->getParent()->getParent(), t); + // Iterate over variadic hint call + for(unsigned h = 0; h < CI->getNumArgOperands(); h ++){ + assert(isa<ConstantInt>(CI->getArgOperand(h)) && + "Argument to hint must be constant integer!"); + ConstantInt *hint = cast<ConstantInt>(CI->getArgOperand(h)); + + hpvm::Target t = (hpvm::Target)hint->getZExtValue(); + addHint(CI->getParent()->getParent(), t); + } DEBUG(errs() << "Found hpvm hint call: " << *CI << "\n"); toBeErased.push_back(CI); } @@ -394,6 +503,7 @@ bool GenHPVM::runOnModule(Module &M) { DEBUG(errs() << *LaunchF << "\n"); // Get i8* cast to function pointer Function *graphFunc = cast<Function>(CI->getArgOperand(1)); + auto OldFuncPosition = std::find(functions.begin(), functions.end(), graphFunc); graphFunc = transformReturnTypeToStruct(graphFunc); Constant *F = ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx)); @@ -401,6 +511,8 @@ bool GenHPVM::runOnModule(Module &M) { F && "Function invoked by HPVM launch has to be define and constant."); + std::replace(functions.begin(), functions.end(), *OldFuncPosition, graphFunc); + ConstantInt *Op = cast<ConstantInt>(CI->getArgOperand(0)); assert(Op && "HPVM launch's streaming argument is a constant value."); Value *isStreaming = Op->isZero() ? ConstantInt::getFalse(Ctx) @@ -430,9 +542,17 @@ bool GenHPVM::runOnModule(Module &M) { assert(CI->getNumArgOperands() > 0 && "Too few arguments for __hpvm__createNodeND call"); unsigned numDims = getNumericValue(CI->getArgOperand(0)); - // We need as meny dimension argments are there are dimensions - assert(CI->getNumArgOperands() - 2 == numDims && - "Too few arguments for __hpvm_createNodeND call!\n"); + + // We need as many dimension argments are there are dimensions + // assert(CI->getNumArgOperands() - 2 == numDims && + // "Too few arguments for __hpvm_createNodeND call!\n"); + + unsigned numArgs = CI->getNumArgOperands(); + + + assert((numArgs - numDims == 2 || + numArgs - numDims == 3) && + "Invalid number of arguments passed to __hpvm__createNodeND call"); Function *CreateNodeF; switch (numDims) { @@ -463,22 +583,41 @@ bool GenHPVM::runOnModule(Module &M) { // Get i8* cast to function pointer Function *graphFunc = cast<Function>(CI->getArgOperand(1)); + auto OldFuncPosition = std::find(functions.begin(), functions.end(), graphFunc); graphFunc = transformReturnTypeToStruct(graphFunc); Constant *F = ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx)); + // Insert transformed functions into list of functions + // to process. + std::replace(functions.begin(), functions.end(), *OldFuncPosition, graphFunc); + CallInst *CreateNodeInst; + + // Each Node has a default criticality of 0 + ConstantInt* NodeCriticality = ConstantInt::get(Type::getInt32Ty(Ctx), 0); + + // If optional criticality provided + if(numArgs - numDims == 3){ + ConstantInt* Crit = dyn_cast<ConstantInt>(CI->getArgOperand(numArgs - 1) + ); + assert(Crit && "Criticality Value must be a constant integer"); + NodeCriticality = Crit; + } + switch (numDims) { - case 0: - CreateNodeInst = CallInst::Create(CreateNodeF, ArrayRef<Value *>(F), + case 0: { + Value *CreateNodeArgs[] = {F, NodeCriticality}; + CreateNodeInst = CallInst::Create(CreateNodeF, ArrayRef<Value *>(CreateNodeArgs,2), graphFunc->getName() + ".node", CI); - break; + + } break; case 1: { assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) && "CreateNodeND dimension argument, 2, expected to be i64\n"); - Value *CreateNodeArgs[] = {F, CI->getArgOperand(2)}; + Value *CreateNodeArgs[] = {F, CI->getArgOperand(2), NodeCriticality}; CreateNodeInst = CallInst::Create( - CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 2), + CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 3), graphFunc->getName() + ".node", CI); } break; case 2: { @@ -487,9 +626,9 @@ bool GenHPVM::runOnModule(Module &M) { assert((CI->getArgOperand(3)->getType() == Type::getInt64Ty(Ctx)) && "CreateNodeND dimension argument, 3, expected to be i64\n"); Value *CreateNodeArgs[] = {F, CI->getArgOperand(2), - CI->getArgOperand(3)}; + CI->getArgOperand(3),NodeCriticality}; CreateNodeInst = CallInst::Create( - CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 3), + CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 4), graphFunc->getName() + ".node", CI); } break; case 3: { @@ -501,9 +640,10 @@ bool GenHPVM::runOnModule(Module &M) { "CreateNodeND dimension argument, 4, expected to be i64\n"); Value *CreateNodeArgs[] = {F, CI->getArgOperand(2), CI->getArgOperand(3), - CI->getArgOperand(4)}; + CI->getArgOperand(4), + NodeCriticality}; CreateNodeInst = CallInst::Create( - CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 4), + CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 5), graphFunc->getName() + ".node", CI); } break; default: @@ -729,6 +869,9 @@ bool GenHPVM::runOnModule(Module &M) { if (isHPVMCall_cos(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::cos, &toBeErased); } + if (isHPVMCall_isNonZeroLoop(I)) { + ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_nz_loop, &toBeErased); + } if (isHPVMCall_tensor_convolution(I)) { ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_convolution, &toBeErased); @@ -868,16 +1011,17 @@ void GenHPVM::initializeTimerSet(Instruction *InsertBefore) { GlobalValue::CommonLinkage, Constant::getNullValue(Type::getInt8PtrTy(M->getContext())), "hpvmTimerSet_GenHPVM")); - DEBUG(errs() << "Inserting GV: " << *TimerSet->getType() << *TimerSet - << "\n"); + TIMER(DEBUG(errs() << "Inserting GV: " << *TimerSet->getType() << *TimerSet + << "\n")); // DEBUG(errs() << "Inserting call to: " << *llvm_hpvm_initializeTimerSet << // "\n"); TIMER(TimerSetAddr = CallInst::Create(llvm_hpvm_initializeTimerSet, None, "", InsertBefore)); - DEBUG(errs() << "TimerSetAddress = " << *TimerSetAddr << "\n"); + TIMER(DEBUG(errs() << "TimerSetAddress = " << *TimerSetAddr << "\n")); TIMER(SI = new StoreInst(TimerSetAddr, TimerSet, InsertBefore)); - DEBUG(errs() << "Store Timer Address in Global variable: " << *SI << "\n"); + TIMER(DEBUG(errs() << "Store Timer Address in Global variable: " << *SI + << "\n")); } void GenHPVM::switchToTimer(enum hpvm_TimerID timer,