diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp index ac5fa450c0956fc056caf732418cda9e52e3655c..ecec258dfe6ef45377bdf2890a5bcd1a31fedf6e 100644 --- a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp @@ -73,6 +73,7 @@ private: std::vector<Value*> Args; std::vector<IntrinsicInst*> IIs; + std::vector<IntrinsicInst*> IIs_remove; // Intrinsics to remove AbstractState *current; public: @@ -88,6 +89,10 @@ public: return M; } + Module *getRtModule() { + return RtM; + } + void addArgument(Value *Arg) { Args.push_back(Arg); } @@ -96,6 +101,10 @@ public: IIs.push_back(II); } + void addIntrinsicToRemove(IntrinsicInst *II) { + IIs_remove.push_back(II); + } + IntrinsicInst *getIntrinsicInstAt(unsigned idx) { return IIs[idx]; } @@ -267,6 +276,7 @@ public: void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override; }; + void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { if (II) { // Not end of instruction stream switch (II->getIntrinsicID()) { @@ -288,6 +298,31 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) { Mch->setCurrent(new FullyConnectedLayer_1()); } break; + + case Intrinsic::visc_node_id: + { + + DEBUG(errs() << "\t: Handling __visc_node_id \n"); + // Get uint32 node ID + Value *Op = II->getOperand(0); + + std::vector<Value*> Args; + Args.push_back(Op); + + Module *M = Mch->getModule(); + Module *RtM = Mch->getRtModule(); + + Constant* visc_node_id_call = + M->getOrInsertFunction(StringRef("tensor_set_node_id"), + RtM->getFunction(StringRef("tensor_set_node_id"))->getFunctionType()); + + CallInst::Create(visc_node_id_call, Args, "", II); + + Mch->addIntrinsicToRemove(II); + Mch->setCurrent(new InitialState()); + } + break; + default: // Other HPVM intrinsic { Mch->addIntrinsicInst(II); @@ -438,14 +473,15 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch, Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); // No pooling // 0 for unused pool arguments: @@ -470,9 +506,9 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, switch (II->getIntrinsicID()) { case Intrinsic::visc_tensor_tanh: { - // Type of activation : TanH -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 0)); + // Type of activation : TanH + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 0)); Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_3()); @@ -480,9 +516,9 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, break; case Intrinsic::visc_tensor_relu: { - // Type of activation : ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 1)); + // Type of activation : ReLU + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 1)); Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_3()); @@ -490,9 +526,9 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch, break; case Intrinsic::visc_tensor_clipped_relu: { - // Type of activation : Clipped ReLU -// Mch->addArgument(ConstantInt::get( -// Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + // Type of activation : Clipped ReLU + // Mch->addArgument(ConstantInt::get( + // Type::getInt32Ty(Mch->getModule()->getContext()), 2)); Mch->addIntrinsicInst(II); Mch->setCurrent(new ConvolutionLayer_3()); @@ -613,6 +649,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch, // pool min FIXME: 2: supported? Mch->addArgument(ConstantInt::get( Type::getInt32Ty(Mch->getModule()->getContext()), 2)); + // pool_size_v, pool_size_h, pool pad_v, // pool_pad_h, pool_stride_v, pool_stride_h for (int i = 1; i < 7; i++) { @@ -737,10 +774,11 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe assert( ( (current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER) || (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION) ) && - "Unsupported instruction sequence for the Wrapper API.\n" ); + "Unsupported instruction sequence for the Wrapper API.\n" ); if ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) || (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)) { + // Layer Operation. DEBUG(errs() << "Layer Instruction Sequence. Validating ...\n"); // We have a valid instruction sequence. @@ -765,6 +803,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe Constant* wrapper_ConvLayer2 = M->getOrInsertFunction(StringRef("wrapper_ConvLayer2"), RtM->getFunction(StringRef("wrapper_ConvLayer2"))->getFunctionType()); + DEBUG(errs() << *wrapper_ConvLayer2); // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0 @@ -839,12 +878,14 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe CI->insertBefore(IIlast); IIlast->replaceAllUsesWith(CI); - } else { // SINGLE_TENSOR_OPERATION + } + else { // SINGLE_TENSOR_OPERATION assert((IIs.size() == 1) && "Unexpected size of intrinsics vector in code gen state machine.\n"); assert(Args.empty() && "Unexpected arguments found in coge gen state machine.\n"); IntrinsicInst *TensorII = IIs[0]; -errs() << "TensorII: " << *TensorII << "\n"; + + errs() << "TensorII: " << *TensorII << "\n"; switch (TensorII->getIntrinsicID()) { case Intrinsic::visc_tensor_group_convolution: @@ -893,8 +934,9 @@ errs() << "TensorII: " << *TensorII << "\n"; case Intrinsic::visc_tensor_batchnorm: { /* llvm.hpvm.tensor.batchnorm */ + // Tensor batchnorm is not in place. - // FIXME: Add Check for InPlace Analysis + // FIXME: Add Check for InPlace Analysis DEBUG(errs() << F->getName() << "\t: Handling tensor batch normalization \n"); // Argument list for the runtime call @@ -933,18 +975,18 @@ errs() << "TensorII: " << *TensorII << "\n"; case Intrinsic::visc_tensor_add: { /* llvm.hpvm.tensor.add */ - DEBUG(errs() << F->getName() << "\t: Handling tensor add\n"); - // Tensor add(a,b) is in place for argument a. -// Value *Op = TensorII->getOperand(0); + DEBUG(errs() << F->getName() << "\t: Handling tensorAdd\n"); + // Tensor add(a,b) is in place for argument a. + // Value *Op = TensorII->getOperand(0); // Test the intrinsic operand for in place operation. -// bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); + // bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); // Code generation will not continue if this is false, because the target // may provide an in place operation(safe choice) // FIXME: remove this comment - must check for in-place -// assert(inplace && -// "Operand not valid for in place operation. Code gen aborted.\n"); + // assert(inplace && + // "Operand not valid for in place operation. Code gen aborted.\n"); // Argument list for the runtime call @@ -1047,13 +1089,11 @@ errs() << "TensorII: " << *TensorII << "\n"; Value *Op = TensorII->getOperand(0); // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); + //-- bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); // Code generation will not continue if this is false, because the target // may provide an in place operation(safe choice) - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call + //-- assert(inplace && + //-- "Operand not valid for in place operation. Code gen aborted.\n"); // Create string for node name, as first argument for wrapper API call Constant *ConstArray = ConstantDataArray::getString(M->getContext(), @@ -1110,15 +1150,6 @@ errs() << "TensorII: " << *TensorII << "\n"; // Tensor softmax(a) is in place for argument a. Value *Op = TensorII->getOperand(0); - // Test the intrinsic operand for in place operation. - bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP); - // Code generation will not continue if this is false, because the target - // may provide an in place operation(safe choice) - assert(inplace && - "Operand not valid for in place operation. Code gen aborted.\n"); - - // Argument list for the runtime call - // Create string for node name, as first argument for wrapper API call Constant *ConstArray = ConstantDataArray::getString(M->getContext(), strRef, true); @@ -1146,62 +1177,8 @@ errs() << "TensorII: " << *TensorII << "\n"; TensorII->replaceAllUsesWith(TensorII->getOperand(0)); } break; -/* - case Intrinsic::visc_image_fft_transform: - { // llvm.hpvm.image.fft.transform - Or another image intrinsic - // All will be treated as not in place - DEBUG(errs() << F->getName() << "\t: Handling fft transform \n"); - - // Create argument list for the runtime call - stored in Args - // All interfaces will have a string as first argument, which will be - // used to identify the dataflow node at runtime - // Create string for node name, as first argument for wrapper API call - Constant *ConstArray = ConstantDataArray::getString(M->getContext(), - strRef, true); - GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(), - true, GlobalValue::ExternalLinkage, ConstArray, ""); - // Create GEP expression to access it - Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0); - Constant* GEPIndices[] = { Int_0, Int_0 }; - Constant* GEPConst = - ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(), - GV, GEPIndices); - - Args.push_back(GEPConst); - - // Here, use you will access the appropriate arruments of the intrinsic - // and push_back, in order to create the argument list of runtime call - Args.push_back(TensorII->getOperand(0)); - Args.push_back(TensorII->getOperand(1)); - Args.push_back(TensorII->getOperand(2)); - Args.push_back(TensorII->getOperand(3)); - Args.push_back(TensorII->getOperand(4)); - Args.push_back(TensorII->getOperand(5)); - - Constant *conv_mode = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1); - Args.push_back(conv_mode); - - Args.push_back(TensorII->getOperand(7)); - - // Done with argument list. - - // Create wrapper API runtime function call - // Appropriately set the name of the function of the runtime that you - // want to call - // Note: the Constant * is what we need to pass to the callInst. - // This name does not have to match, but does so for similarity. - Constant* wrapper_tensorGroupConvolution; - M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"), - RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType()); - CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution, - Args, "", TensorII); - // We can replace the call to hpvm.tensor.xxx with the runtime call - TensorII->replaceAllUsesWith(CI); - } - break; - -*/ + default: llvm_unreachable("Unknown VISC Intrinsic!"); break; @@ -1219,6 +1196,13 @@ errs() << "TensorII: " << *TensorII << "\n"; (*ri)->eraseFromParent(); } + + for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs_remove.rbegin(), + re = IIs_remove.rend(); ri != re; ++ri) { + DEBUG(errs() << "Erasing: " << **ri << "\n"); + (*ri)->eraseFromParent(); + } + } // DFG2LLVM_WrapperAPI - The first implementation. @@ -1226,6 +1210,8 @@ errs() << "TensorII: " << *TensorII << "\n"; struct DFG2LLVM_WrapperAPI : public DFG2LLVM { static char ID; // Pass identification, replacement for typeid DFG2LLVM_WrapperAPI() : DFG2LLVM(ID) {} + + private: public: @@ -1380,20 +1366,13 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) { return; } -// For wrapper API, we generate code for every leaf node. -// No need to check for hints from frontend -// // Generate code only if it has the right hint -// if (!checkPreferredTarget(N, visc::PROMISE_TARGET)) { -// errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; -// return; -// } // Increment the node ID, for current node. ++nodeID; // Get the function associated with the dataflow node Function *F = N->getFuncPointer(); -errs() << "Node Function: " << *F << "\n"; + errs() << "Node Function: " << *F << "\n"; // Look up if we have visited this function before. If we have, then just // get the cloned function pointer from DFNode. Otherwise, create the cloned // function and add it to the DFNode GenFunc. @@ -1405,6 +1384,8 @@ errs() << "Node Function: " << *F << "\n"; // Clone the function ValueToValueMapTy VMap; std::string FName(F->getName().data());//Twine FName = F->getName(); + + F_wrapper_api = CloneFunction(F, VMap); F_wrapper_api->setName(FName+"_wrapper_api"); F_wrapper_api->removeFromParent(); @@ -1461,15 +1442,12 @@ errs() << "Node Function: " << *F << "\n"; //CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP); CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP); -//errs() << "-----------------------------------\n"; -//errs() << *F_wrapper_api << "\n"; - return; } bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) { - errs() << "\nDFG2LLVM_WrapperAPI PASS\n"; + errs() << "\nDFG2LLVM_WrapperAPI PASS\n"; // Get the BuildDFG Analysis Results: // - Dataflow graph BuildDFG &DFG = getAnalysis<BuildDFG>(); @@ -1477,9 +1455,8 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) { // Get the In Place Analysis Results InPlaceDFGAnalysis::InPlaceDFGParameter IPP = (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP(); - // Print results -// printInPlaceDFGParameter(IPP); + std::vector<DFInternalNode*> Roots = DFG.getRoots(); // Visitor for Code Generation Graph Traversal @@ -1496,6 +1473,7 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) { //TODO: Edit module epilogue to remove the VISC intrinsic declarations delete CGTVisitor; + return true; } diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp index eb72b3dd7425dd59ad2212741b78d5954d35e64c..541efe4e1dae7ec0b62fd041396cb34cd6f9e519 100644 --- a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp +++ b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp @@ -92,8 +92,7 @@ static IntrinsicInst *isValidHPVMTensorNode(DFNode *N) { if(dyn_cast<IntrinsicInst>(&*I)){ II = dyn_cast<IntrinsicInst>(&*I); if ((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")){ - - errs()<<"**** WATCH *** " << *II << "\n\n\n"; + errs()<<"** Tensor Intrinsic = " << *II << "\n"; } }