diff --git a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp index 6d04074b96b1d5043f25c58f6be6ef018cc1edb7..07d7667fa044732293833382a9f57b05c8297509 100644 --- a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp +++ b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp @@ -74,6 +74,7 @@ public: // Generate code for this internal node now. This way all the cloned // functions for children exist. deleteNode(N); + DEBUG(errs() << "\tDone - " << "\n"); //errs() << "DONE: Generating Code for Node (I) - " << N->getFuncPointer()->getName() << "\n"; } diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index 6926e6cfed65679b455152cffc311ae946dab314..59bdb622b93ef45a4d2b01ed415ff5e86b524470 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -147,6 +147,7 @@ private: Constant* llvm_visc_ocl_clearContext; Constant* llvm_visc_ocl_argument_scalar; Constant* llvm_visc_ocl_argument_ptr; + Constant* llvm_visc_ocl_output_ptr; Constant* llvm_visc_ocl_free; Constant* llvm_visc_ocl_getOutput; Constant* llvm_visc_ocl_executeNode; @@ -245,6 +246,7 @@ void CGT_NVPTX::initRuntimeAPI() { DECLARE(llvm_visc_ocl_clearContext); DECLARE(llvm_visc_ocl_argument_scalar); DECLARE(llvm_visc_ocl_argument_ptr); + DECLARE(llvm_visc_ocl_output_ptr); DECLARE(llvm_visc_ocl_free); DECLARE(llvm_visc_ocl_getOutput); DECLARE(llvm_visc_ocl_executeNode); @@ -332,7 +334,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi // have those arguments) // Add Index and Dim arguments except for the root node - if(!N->isRoot()) + if(!N->isRoot() && !N->getParent()->isChildGraphStreaming()) addIdxDimArgs(F_X86); /* TODO: Use this code to verufy if this is a good pattern for PTX kernel @@ -480,24 +482,23 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi // Set output if struct is not an empty struct StructType* OutputTy = K->KernelLeafNode->getOutputType(); - Value *outputSize, *d_Output; + std::vector<Value*> d_Outputs; if(!OutputTy->isEmptyTy()) { switchToTimer(visc_TimerID_COPY_PTR, RI); // Not an empty struct - unsigned outputIndex = KF->getFunctionType()->getNumParams(); - outputSize = ConstantExpr::getSizeOf(OutputTy); - Value* setOutputArgs[] = {GraphID, - Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), - ConstantInt::get(Type::getInt32Ty(M.getContext()),outputIndex), - ConstantExpr::getSizeOf(OutputTy), - False, - True - }; - - d_Output = CallInst::Create(llvm_visc_ocl_argument_ptr, - ArrayRef<Value*>(setOutputArgs, 6), + // Iterate over all elements of the struct and put them in + for(unsigned i=0; i < OutputTy->getNumElements(); i++) { + unsigned outputIndex = KF->getFunctionType()->getNumParams()+i; + Value* setOutputArgs[] = {GraphID, + ConstantInt::get(Type::getInt32Ty(M.getContext()),outputIndex), + ConstantExpr::getSizeOf(OutputTy->getElementType(i))}; + + CallInst* d_Output = CallInst::Create(llvm_visc_ocl_output_ptr, + ArrayRef<Value*>(setOutputArgs, 3), "d_output."+KF->getName(), RI); + d_Outputs.push_back(d_Output); + } } // Enqueue kernel @@ -529,22 +530,30 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi switchToTimer(visc_TimerID_READ_OUTPUT, RI); // Read Output Struct if not empty if(!OutputTy->isEmptyTy()) { - Value* GetOutputArgs[] = {GraphID, + std::vector<Value*>h_Outputs; + Value* KernelOutput = UndefValue::get(OutputTy); + for(unsigned i=0; i < OutputTy->getNumElements(); i++) { + Value* GetOutputArgs[] = {GraphID, Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), - d_Output, - outputSize + d_Outputs[i], + ConstantExpr::getSizeOf(OutputTy->getElementType(i)) }; - CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput, + CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput, ArrayRef<Value*>(GetOutputArgs, 4), "h_output."+KF->getName()+".addr", RI); - // Read each device pointer listed in output struct - // Load the output struct - CastInst* BI = BitCastInst::CreatePointerCast(h_Output, KF->getReturnType()->getPointerTo(), "output.ptr", RI); - Value* KernelOutput = new LoadInst(BI, "output."+KF->getName(), RI); + // Read each device pointer listed in output struct + // Load the output struct + CastInst* BI = BitCastInst::CreatePointerCast(h_Output, + OutputTy->getElementType(i)->getPointerTo(), "output.ptr", RI); + + Value* OutputElement = new LoadInst(BI, "output."+KF->getName(), RI); + KernelOutput = InsertValueInst::Create(KernelOutput, OutputElement, ArrayRef<unsigned>(i), + KF->getName()+"output", RI); + } OutputMap[K->KernelLeafNode] = KernelOutput; } - + // Read all the pointer arguments which had side effects i.e., had out // attribute DEBUG(errs() << "Output Pointers : " << OutputPointers.size() << "\n"); @@ -642,7 +651,14 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi // Right now, only targeting the one level case. In general, device functions // can return values so we don't need to change them void CGT_NVPTX::codeGen(DFInternalNode* N) { - + errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; + if(KernelLaunchNode == NULL) + errs () << "No kernel launch node\n"; + else { + errs() << "KernelLaunchNode is not null: "<< KernelLaunchNode<<"\n"; + errs () << "KernelLaunchNode: " << KernelLaunchNode->getFuncPointer()->getName() << "\n"; + } + if (!KernelLaunchNode) { DEBUG(errs() << "No code generated (host code for kernel launch complete).\n"); return; @@ -721,6 +737,13 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { DEBUG(errs() << "Skipping dummy node\n"); return; } + + // Generate code only if it has the right hint + if(!checkPreferredTarget(N, visc::GPU_TARGET)) { + errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; + return; + } + // Checking which node is the kernel launch DFNode* PNode = N->getParent(); @@ -1185,23 +1208,16 @@ void CGT_NVPTX::transformFunctionToVoid(Function* F) { else { // The struct has return values, thus needs to be converted to parameter - int initialNumParams = F->arg_size(); - - Type* ArgType = FRetTy->getPointerTo(GENERIC_ADDRSPACE); - new Argument(ArgType, "ret_struct_ptr", F); - DEBUG(errs() << "\tCreated parameter\n"); - - // Find where the new parameter is in the header - Function::arg_iterator ai, ae; - int check = 0; - for (ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai) { - if (ai->getName().equals("ret_struct_ptr")) break; - check++; + // Iterate over all element types of return struct and add arguments to the + // function + std::vector<Argument*> Args; + for (unsigned i=0; i<FRetTy->getNumElements(); i++) { + Argument* RetArg = new Argument(FRetTy->getElementType(i)->getPointerTo(), "ret_arg", F); + Args.push_back(RetArg); + DEBUG(errs() << "\tCreated parameter: " << *RetArg << "\n"); } - // DEBUG(errs() << "\tcheck = " << check << "\tinitialNumParams = " << initialNumParams << "\n"); - assert(check == initialNumParams); + Function::arg_iterator ai, ae; DEBUG(errs() << "\tReplacing Return statements\n"); // Replace return statements with extractValue and store instructions @@ -1209,11 +1225,15 @@ void CGT_NVPTX::transformFunctionToVoid(Function* F) { rie = RItoRemove.end(); rii != rie; ++rii) { ReturnInst* RI = (*rii); Value* RetVal = RI->getReturnValue(); + for(unsigned i = 0; i < Args.size(); i++) { + ExtractValueInst* EI = ExtractValueInst::Create(RetVal, ArrayRef<unsigned>(i), + Args[i]->getName()+".val", RI); + new StoreInst(EI, Args[i], RI); + } // assert(RetVal && "Return value should not be null at this point"); // StructType* RetType = cast<StructType>(RetVal->getType()); // assert(RetType && "Return type is not a struct"); - new StoreInst(RetVal, &(*ai), RI); ReturnInst::Create((F->getContext()), 0, RI); RI->eraseFromParent(); @@ -1237,7 +1257,6 @@ void CGT_NVPTX::transformFunctionToVoid(Function* F) { // Change the function type F->mutateType(PTy); - } /****************************************************************************** diff --git a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp index a05fe6080262ccf4657650c6169900ac4e35d6f9..51a40baa94bae48dc4fe4b5994d3bd43f07e8e4c 100644 --- a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp @@ -151,6 +151,7 @@ private: Constant* llvm_visc_ocl_clearContext; Constant* llvm_visc_ocl_argument_scalar; Constant* llvm_visc_ocl_argument_ptr; + Constant* llvm_visc_ocl_output_ptr; Constant* llvm_visc_ocl_free; Constant* llvm_visc_ocl_getOutput; Constant* llvm_visc_ocl_executeNode; @@ -177,6 +178,7 @@ public: // Constructor CGT_SPIR(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG), KernelM(*CloneModule(&_M)) { + KernelLaunchNode = NULL; init(); initRuntimeAPI(); @@ -250,6 +252,7 @@ void CGT_SPIR::initRuntimeAPI() { DECLARE(llvm_visc_ocl_clearContext); DECLARE(llvm_visc_ocl_argument_scalar); DECLARE(llvm_visc_ocl_argument_ptr); + DECLARE(llvm_visc_ocl_output_ptr); DECLARE(llvm_visc_ocl_free); DECLARE(llvm_visc_ocl_getOutput); DECLARE(llvm_visc_ocl_executeNode); @@ -337,7 +340,7 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil // have those arguments) // Add Index and Dim arguments except for the root node - if(!N->isRoot()) + if(!N->isRoot() && !N->getParent()->isChildGraphStreaming()) addIdxDimArgs(F_X86); /* TODO: Use this code to verufy if this is a good pattern for OCL kernel @@ -485,24 +488,23 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil // Set output if struct is not an empty struct StructType* OutputTy = K->KernelLeafNode->getOutputType(); - Value *outputSize, *d_Output; + std::vector<Value*> d_Outputs; if(!OutputTy->isEmptyTy()) { switchToTimer(visc_TimerID_COPY_PTR, RI); // Not an empty struct - unsigned outputIndex = KF->getFunctionType()->getNumParams(); - outputSize = ConstantExpr::getSizeOf(OutputTy); - Value* setOutputArgs[] = {GraphID, - Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), - ConstantInt::get(Type::getInt32Ty(M.getContext()),outputIndex), - ConstantExpr::getSizeOf(OutputTy), - False, - True - }; - - d_Output = CallInst::Create(llvm_visc_ocl_argument_ptr, - ArrayRef<Value*>(setOutputArgs, 6), + // Iterate over all elements of the struct and put them in + for(unsigned i=0; i < OutputTy->getNumElements(); i++) { + unsigned outputIndex = KF->getFunctionType()->getNumParams()+i; + Value* setOutputArgs[] = {GraphID, + ConstantInt::get(Type::getInt32Ty(M.getContext()),outputIndex), + ConstantExpr::getSizeOf(OutputTy->getElementType(i))}; + + CallInst* d_Output = CallInst::Create(llvm_visc_ocl_output_ptr, + ArrayRef<Value*>(setOutputArgs, 3), "d_output."+KF->getName(), RI); + d_Outputs.push_back(d_Output); + } } // Enqueue kernel @@ -534,19 +536,27 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil switchToTimer(visc_TimerID_READ_OUTPUT, RI); // Read Output Struct if not empty if(!OutputTy->isEmptyTy()) { - Value* GetOutputArgs[] = {GraphID, + std::vector<Value*>h_Outputs; + Value* KernelOutput = UndefValue::get(OutputTy); + for(unsigned i=0; i < OutputTy->getNumElements(); i++) { + Value* GetOutputArgs[] = {GraphID, Constant::getNullValue(Type::getInt8PtrTy(M.getContext())), - d_Output, - outputSize + d_Outputs[i], + ConstantExpr::getSizeOf(OutputTy->getElementType(i)) }; - CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput, + CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput, ArrayRef<Value*>(GetOutputArgs, 4), "h_output."+KF->getName()+".addr", RI); - // Read each device pointer listed in output struct - // Load the output struct - CastInst* BI = BitCastInst::CreatePointerCast(h_Output, KF->getReturnType()->getPointerTo(), "output.ptr", RI); - Value* KernelOutput = new LoadInst(BI, "output."+KF->getName(), RI); + // Read each device pointer listed in output struct + // Load the output struct + CastInst* BI = BitCastInst::CreatePointerCast(h_Output, + OutputTy->getElementType(i)->getPointerTo(), "output.ptr", RI); + + Value* OutputElement = new LoadInst(BI, "output."+KF->getName(), RI); + KernelOutput = InsertValueInst::Create(KernelOutput, OutputElement, ArrayRef<unsigned>(i), + KF->getName()+"output", RI); + } OutputMap[K->KernelLeafNode] = KernelOutput; } @@ -647,6 +657,14 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil // Right now, only targeting the one level case. In general, device functions // can return values so we don't need to change them void CGT_SPIR::codeGen(DFInternalNode* N) { + errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n"; + if(KernelLaunchNode == NULL) + errs () << "No kernel launch node\n"; + else { + errs() << "KernelLaunchNode is not null: "<< KernelLaunchNode<<"\n"; + errs () << "KernelLaunchNode: " << KernelLaunchNode->getFuncPointer()->getName() << "\n"; + } + if (!KernelLaunchNode) { DEBUG(errs() << "No code generated (host code for kernel launch complete).\n"); @@ -720,6 +738,31 @@ void CGT_SPIR::codeGen(DFInternalNode* N) { } +//static bool checkPreferredTarget(DFNode* N, visc::Target T) { + //Function* F = N->getFuncPointer(); + //Module* M = F->getParent(); + //NamedMDNode* HintNode; + //switch (T) { + //case visc::GPU_TARGET: + //HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); + //break; + //case visc::SPIR_TARGET: + //HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); + //break; + //case visc::CPU_TARGET: + //HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); + //break; + //default: + //llvm_unreachable("Target Not supported yet!"); + //} + //for (unsigned i = 0; i < HintNode->getNumOperands(); i++) { + //MDNode* MetaNode = HintNode->getOperand(i); + //if(F == MetaNode->getOperand(0)) + //return true; + //} + //return false; +//} + void CGT_SPIR::codeGen(DFLeafNode* N) { // Skip code generation if it is a dummy node @@ -728,6 +771,12 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { return; } + // Generate code only if it has the right hint + if(!checkPreferredTarget(N, visc::SPIR_TARGET)) { + errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n"; + return; + } + // Checking which node is the kernel launch DFNode* PNode = N->getParent(); int pLevel = PNode->getLevel(); @@ -739,6 +788,7 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { // (2) Parent does not have multiple instances if (!pLevel || !pReplFactor) { KernelLaunchNode = PNode; + errs() << "Setting Kernel Launch Node\n"; kernel = new Kernel(NULL, N, N->getInArgMap(), @@ -1203,23 +1253,16 @@ void CGT_SPIR::transformFunctionToVoid(Function* F) { else { // The struct has return values, thus needs to be converted to parameter - int initialNumParams = F->arg_size(); - - Type* ArgType = FRetTy->getPointerTo(GENERIC_ADDRSPACE); - new Argument(ArgType, "ret_struct_ptr", F); - DEBUG(errs() << "\tCreated parameter\n"); - - // Find where the new parameter is in the header - Function::arg_iterator ai, ae; - int check = 0; - for (ai = F->arg_begin(), ae = F->arg_end(); - ai != ae; ++ai) { - if (ai->getName().equals("ret_struct_ptr")) break; - check++; + // Iterate over all element types of return struct and add arguments to the + // function + std::vector<Argument*> Args; + for (unsigned i=0; i<FRetTy->getNumElements(); i++) { + Argument* RetArg = new Argument(FRetTy->getElementType(i)->getPointerTo(), "ret_arg", F); + Args.push_back(RetArg); + DEBUG(errs() << "\tCreated parameter: " << *RetArg << "\n"); } - // DEBUG(errs() << "\tcheck = " << check << "\tinitialNumParams = " << initialNumParams << "\n"); - assert(check == initialNumParams); + Function::arg_iterator ai, ae; DEBUG(errs() << "\tReplacing Return statements\n"); // Replace return statements with extractValue and store instructions @@ -1227,11 +1270,15 @@ void CGT_SPIR::transformFunctionToVoid(Function* F) { rie = RItoRemove.end(); rii != rie; ++rii) { ReturnInst* RI = (*rii); Value* RetVal = RI->getReturnValue(); + for(unsigned i = 0; i < Args.size(); i++) { + ExtractValueInst* EI = ExtractValueInst::Create(RetVal, ArrayRef<unsigned>(i), + Args[i]->getName()+".val", RI); + new StoreInst(EI, Args[i], RI); + } // assert(RetVal && "Return value should not be null at this point"); // StructType* RetType = cast<StructType>(RetVal->getType()); // assert(RetType && "Return type is not a struct"); - new StoreInst(RetVal, &(*ai), RI); ReturnInst::Create((F->getContext()), 0, RI); RI->eraseFromParent(); diff --git a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp index 98f654d3da748e1250a9a36ac18d8be6a3d1cb0e..5f9f6cb12fc28f64ca1b2ba72c54b27035cdf3f5 100644 --- a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp @@ -837,7 +837,7 @@ Value* CGT_X86::getInValueAt(DFNode* Child, unsigned i, Function* ParentF_X86, I // argument from argument list of this internal node Value* inputVal; if(SrcDF->isEntryNode()) { - inputVal = getArgumentAt(ParentF_X86, i); + inputVal = getArgumentAt(ParentF_X86, E->getSourcePosition()); DEBUG(errs() << "Argument "<< i<< " = " << *inputVal << "\n"); } else { @@ -881,6 +881,12 @@ void CGT_X86::invokeChild_X86(DFNode* C, Function* F_X86, for(unsigned j=0; j<6; j++) Args.push_back(I32Zero); + errs() << "Function type: " << *CF_X86->getType() << "\n"; + errs() << "Function type: " << *CF->getType() << "\n"; + errs() << "Arguments: " << Args.size() << "\n"; + for(unsigned i=0; i < Args.size(); i++) + errs() << *Args[i]->getType() << " "; + errs() << "\n"; // Call the F_X86 function associated with this node CallInst* CI = CallInst::Create(CF_X86, Args, CF_X86->getName()+"_output", diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp index 8ad0602703745e0a2c8b78fbda3a6b410d493408..3aa7735ee36720574b589d578cdd303abbf4b194 100644 --- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp +++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp @@ -120,7 +120,7 @@ static void addHint(Function* F, visc::Target T) { HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu"); } else { - HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu"); + HintNode = M->getOrInsertNamedMetadata("visc_hint_spir"); } // Create a node for the function and add it to the hint node @@ -826,11 +826,14 @@ bool GenVISC::runOnModule(Module &M) { Function* LaunchF = Intrinsic::getDeclaration(&M, Intrinsic::visc_launch); DEBUG(errs() << *LaunchF << "\n"); // Get i8* cast to function pointer - Function* graphFunc = cast<Function>(CI->getArgOperand(0)); + Function* graphFunc = cast<Function>(CI->getArgOperand(1)); Constant* F = ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx)); + ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(0)); + Value* isStreaming = Op->isZero()? ConstantInt::getFalse(Ctx) + : ConstantInt::getTrue(Ctx); - Value* LaunchArgs[] = {F, CI->getArgOperand(1), ConstantInt::getTrue(Ctx)}; + Value* LaunchArgs[] = {F, CI->getArgOperand(2), isStreaming}; CallInst* LaunchInst = CallInst::Create(LaunchF, ArrayRef<Value*>(LaunchArgs, 3), "graphID", CI); @@ -1038,9 +1041,12 @@ bool GenVISC::runOnModule(Module &M) { } // Erase the __visc__node calls - DEBUG(errs() << "Erase Statements:\n"); + DEBUG(errs() << "Erase " << toBeErased.size() << " Statements:\n"); for(auto I: toBeErased) { DEBUG(errs() << *I << "\n"); + } + for(auto I: toBeErased) { + DEBUG(errs() << "\tErasing " << *I << "\n"); I->eraseFromParent(); } @@ -1125,7 +1131,7 @@ void GenVISC::genKernel(Function* KernelF, CallInst* CI, StructType* RetTy) { Instruction *I = &(*i); if(isVISCattributesCall(I)) { handleVISCAttributes(KernelF, cast<CallInst>(I)); - I->eraseFromParent(); + //I->eraseFromParent(); break; } }