diff --git a/llvm/include/llvm/IR/DFGraph.h b/llvm/include/llvm/IR/DFGraph.h index d068301bc7d48e55fb78c2c10f7c10efbc797d4f..247154b6801abe7b7dffc1e90487ad872d832f9b 100644 --- a/llvm/include/llvm/IR/DFGraph.h +++ b/llvm/include/llvm/IR/DFGraph.h @@ -496,7 +496,6 @@ public: } void removeGenFuncForTarget(visc::Target T) { - errs() << "Target tag = " << T << "\n"; switch (T) { case visc::CPU_TARGET: GenFuncs.CPUGenFunc = NULL; diff --git a/llvm/include/llvm/SupportVISC/DFG2LLVM.h b/llvm/include/llvm/SupportVISC/DFG2LLVM.h index a036d255c81aeec436b1add99ac0738ec3a46860..9a48405cd5d2631905e265a547167f79dc2eb681 100644 --- a/llvm/include/llvm/SupportVISC/DFG2LLVM.h +++ b/llvm/include/llvm/SupportVISC/DFG2LLVM.h @@ -17,6 +17,7 @@ #include "llvm/BuildDFG/BuildDFG.h" #include "llvm/SupportVISC/VISCHint.h" #include "llvm/SupportVISC/VISCTimer.h" +#include "llvm/SupportVISC/VISCUtils.h" using namespace llvm; using namespace builddfg; @@ -84,8 +85,10 @@ protected: // Functions Value* getStringPointer(const Twine& S, Instruction* InsertBefore, const Twine& Name = ""); - void addArgument(Function*, Type*, const Twine& Name = ""); - void addIdxDimArgs(Function* F); +// void addArgument(Function*, Type*, const Twine& Name = ""); + Function *addArgument(Function*, Type*, const Twine& Name = ""); +// void addIdxDimArgs(Function* F); + Function *addIdxDimArgs(Function* F); std::vector<Value*> extractElements(Value*, std::vector<Type*>, std::vector<std::string>, Instruction*); Argument* getArgumentAt(Function* F, unsigned offset); @@ -223,7 +226,29 @@ Value* CodeGenTraversal::getStringPointer(const Twine& S, Instruction* IB, const } // Add an argument of type Ty to the given function F -void CodeGenTraversal::addArgument(Function* F, Type* Ty, const Twine& name) { +//void CodeGenTraversal::addArgument(Function* F, Type* Ty, const Twine& name) { +// // Add the argument to argument list +// new Argument(Ty, name, F); +// +// // Create the argument type list with added argument types +// std::vector<Type*> ArgTypes; +// for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end(); +// ai != ae; ++ai) { +// ArgTypes.push_back(ai->getType()); +// } +// // Adding new arguments to the function argument list, would not change the +// // function type. We need to change the type of this function to reflect the +// // added arguments +// FunctionType* FTy = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg()); +// PointerType* PTy = PointerType::get(FTy, cast<PointerType>(F->getType())->getAddressSpace()); +// +// // Change the function type +// F->mutateType(PTy); +//} + +// Creates a function with an additional argument of the specified type and +// name. The previous function is not deleted. +Function *CodeGenTraversal::addArgument(Function* F, Type* Ty, const Twine& name) { // Add the argument to argument list new Argument(Ty, name, F); @@ -235,21 +260,43 @@ void CodeGenTraversal::addArgument(Function* F, Type* Ty, const Twine& name) { } // Adding new arguments to the function argument list, would not change the // function type. We need to change the type of this function to reflect the - // added arguments + // added arguments. So, we create a clone of this function with the correct + // type. FunctionType* FTy = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg()); - PointerType* PTy = PointerType::get(FTy, cast<PointerType>(F->getType())->getAddressSpace()); + Function *newF = viscUtils::cloneFunction(F, FTy, false); + + // Check if the function is used by a metadata node + if(F->isUsedByMetadata()) { + viscUtils::fixHintMetadata(*F->getParent(), F, newF); + } - // Change the function type - F->mutateType(PTy); + return newF; } // Change the argument list of function F to add index and limit arguments -void CodeGenTraversal::addIdxDimArgs(Function* F) { +//void CodeGenTraversal::addIdxDimArgs(Function* F) { +// // Add Index and Dim arguments +// std::string names[] = {"idx_x", "idx_y", "idx_z", "dim_x", "dim_y", "dim_z"}; +// for (int i = 0; i < 6; ++i) { +// addArgument(F, Type::getInt32Ty(F->getContext()), names[i]); +// } +//} + +// Return new function with additional index and limit arguments. +// The original function is removed from the module and erased. +Function *CodeGenTraversal::addIdxDimArgs(Function* F) { + errs() << "Function Type: " << *F->getFunctionType() << "\n"; // Add Index and Dim arguments std::string names[] = {"idx_x", "idx_y", "idx_z", "dim_x", "dim_y", "dim_z"}; + Function *newF; for (int i = 0; i < 6; ++i) { - addArgument(F, Type::getInt32Ty(F->getContext()), names[i]); + newF = addArgument(F, Type::getInt64Ty(F->getContext()), names[i]); + F->replaceAllUsesWith(UndefValue::get(F->getType())); + F->eraseFromParent(); + F = newF; } + errs() << "Function Type after adding args: " << *newF->getFunctionType() << "\n"; + return newF; } // Extract elements from an aggregate value. TyList contains the type of each diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index bb83544969c3fa88fb0dd10557cae734ced28786..845e5a7f9f50fcd9cf5eaa8455b5d913ab401a1c 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -354,8 +354,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); i != e; ++i) { dest_iterator->setName(i->getName()); // Copy the name over... - // Add mapping to VMap and increment dest iterator - VMap[&*i] = &*dest_iterator; + // Increment dest iterator ++dest_iterator; } @@ -364,17 +363,29 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi ReturnInst* RI = ReturnInst::Create(M.getContext(), UndefValue::get(F_X86->getReturnType()), BB); - //Add the generated function info to DFNode -// N->setGenFunc(F_X86, visc::CPU_TARGET); - N->addGenFunc(F_X86, visc::GPU_TARGET, true); - // FIXME: Adding Index and Dim arguments are probably not required except // for consistency purpose (DFG2LLVM_X86 does assume that all leaf nodes do // have those arguments) // Add Index and Dim arguments except for the root node if(!N->isRoot() && !N->getParent()->isChildGraphStreaming()) - addIdxDimArgs(F_X86); + F_X86 = addIdxDimArgs(F_X86); + + BB = &*F_X86->begin(); + RI = cast<ReturnInst>(BB->getTerminator()); + + //Add the generated function info to DFNode +// N->setGenFunc(F_X86, visc::CPU_TARGET); + N->addGenFunc(F_X86, visc::GPU_TARGET, true); + + // Loop over the arguments, to create the VMap + dest_iterator = F_X86->arg_begin(); + for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); + i != e; ++i) { + // Add mapping to VMap and increment dest iterator + VMap[&*i] = &*dest_iterator; + ++dest_iterator; + } /* TODO: Use this code to verufy if this is a good pattern for PTX kernel diff --git a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp index f31e75931c4a997d8c43716e299faf323ac48477..d62d0561ac47c384c21835608a512d3c8246a022 100644 --- a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp @@ -369,7 +369,7 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil // Add Index and Dim arguments except for the root node if(!N->isRoot() && !N->getParent()->isChildGraphStreaming()) - addIdxDimArgs(F_X86); + F_X86 = addIdxDimArgs(F_X86); /* TODO: Use this code to verufy if this is a good pattern for OCL kernel diff --git a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp index 8794f423e03af521ab44562301924ba106454e1a..d55640dc6f875777ab3fb3fbe75763a4dd6d016d 100644 --- a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp @@ -949,9 +949,9 @@ void CGT_X86::invokeChild_X86(DFNode* C, Function* F_X86, Args.push_back(getInValueAt(C, i, F_X86, IB)); } - Value* I32Zero = ConstantInt::get(Type::getInt32Ty(F_X86->getContext()), 0); + Value* I64Zero = ConstantInt::get(Type::getInt64Ty(F_X86->getContext()), 0); for(unsigned j=0; j<6; j++) - Args.push_back(I32Zero); + Args.push_back(I64Zero); errs() << "Function type: " << *CF_X86->getType() << "\n"; errs() << "Function type: " << *CF->getType() << "\n"; @@ -971,7 +971,7 @@ void CGT_X86::invokeChild_X86(DFNode* C, Function* F_X86, std::string varNames[3] = {"x", "y", "z"}; unsigned numArgs = CI->getNumArgOperands(); for(unsigned j=0; j < C->getNumOfDim(); j++) { - Value* indexLimit; + Value* indexLimit = NULL; // Limit can either be a constant or an arguement of the internal node. // In case of constant we can use that constant value directly in the // new F_X86 function. In case of an argument, we need to get the mapped @@ -1300,7 +1300,7 @@ void CGT_X86::codeGen(DFInternalNode* N) { if (!(C->hasX86GenFuncForTarget(visc::CPU_TARGET))) { errs() << "No CPU x86 version for child node " << C->getFuncPointer()->getName() - << " . Skip code gen for parent node " + << "\n Skip code gen for parent node " << N->getFuncPointer()->getName() << "\n"; codeGen = false; } @@ -1318,34 +1318,42 @@ void CGT_X86::codeGen(DFInternalNode* N) { // Create new function with the same type F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M); - errs() << "--------------" << F->getName() << "\n"; + // Loop over the arguments, copying the names of arguments over. Function::arg_iterator dest_iterator = F_X86->arg_begin(); - assert(false && "Got here\n"); for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); i != e; ++i) { dest_iterator->setName(i->getName()); // Copy the name over... - // Add mapping to VMap and increment dest iterator - VMap[&*i] = &*dest_iterator; + // Increment dest iterator ++dest_iterator; } - - assert(false && "Got here\n"); // Add a basic block to this empty function BasicBlock *BB = BasicBlock::Create(F_X86->getContext(), "entry", F_X86); ReturnInst* RI = ReturnInst::Create(F_X86->getContext(), UndefValue::get(F_X86->getReturnType()), BB); - //Add generated function info to DFNode -// N->setGenFunc(F_X86, visc::CPU_TARGET); - N->addGenFunc(F_X86, visc::CPU_TARGET, true); - // Add Index and Dim arguments except for the root node and the child graph of // parent node is not streaming if(!N->isRoot() && !N->getParent()->isChildGraphStreaming()) - addIdxDimArgs(F_X86); + F_X86 = addIdxDimArgs(F_X86); + + BB = &*F_X86->begin(); + RI = cast<ReturnInst>(BB->getTerminator()); + //Add generated function info to DFNode +// N->setGenFunc(F_X86, visc::CPU_TARGET); + N->addGenFunc(F_X86, visc::CPU_TARGET, true); + + // Loop over the arguments, to create the VMap. + dest_iterator = F_X86->arg_begin(); + for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(); + i != e; ++i) { + // Add mapping and increment dest iterator + VMap[&*i] = &*dest_iterator; + ++dest_iterator; + } + // Iterate over children in topological order for(DFGraph::children_iterator ci = N->getChildGraph()->begin(), ce = N->getChildGraph()->end(); ci != ce; ++ci) { @@ -1675,14 +1683,14 @@ void CGT_X86::codeGen(DFLeafNode* N) { // Insert the cloned function into the module M.getFunctionList().push_back(F_X86); - // Add generated function info to DFNode -// N->setGenFunc(F_X86, visc::CPU_TARGET); - N->addGenFunc(F_X86, visc::CPU_TARGET, true); - // Add the new argument to the argument list. Add arguments only if the cild // graph of parent node is not streaming if(!N->getParent()->isChildGraphStreaming()) - addIdxDimArgs(F_X86); + F_X86 = addIdxDimArgs(F_X86); + + // Add generated function info to DFNode +// N->setGenFunc(F_X86, visc::CPU_TARGET); + N->addGenFunc(F_X86, visc::CPU_TARGET, true); // Go through the arguments, and any pointer arguments with in attribute need // to have x86_argument_ptr call to get the x86 ptr of the argument diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp index e61802d25e2b5ccd4affa7044deb8712feaf9c03..8d52e471652ab5ca17e30ca9328f067ae1ab9942 100644 --- a/llvm/projects/visc-rt/visc-rt.cpp +++ b/llvm/projects/visc-rt/visc-rt.cpp @@ -99,7 +99,7 @@ void llvm_visc_x86_dstack_pop() { //DEBUG(cout << "DStack size = " << DStack.size() << flush << "\n"); } -unsigned llvm_visc_x86_getDimLimit(unsigned level, unsigned dim) { +uint64_t llvm_visc_x86_getDimLimit(unsigned level, unsigned dim) { //DEBUG(cout << "Request limit for dim " << dim << " of ancestor " << level <<flush << "\n"); //unsigned size = DStack.size(); //DEBUG(cout << "\t Return: " << DStack[size-level-1].getDimLimit(dim) <<flush << "\n"); @@ -107,7 +107,7 @@ unsigned llvm_visc_x86_getDimLimit(unsigned level, unsigned dim) { return 0; } -unsigned llvm_visc_x86_getDimInstance(unsigned level, unsigned dim) { +uint64_t llvm_visc_x86_getDimInstance(unsigned level, unsigned dim) { //DEBUG(cout << "Request instance id for dim " << dim << " of ancestor " << level <<flush << "\n"); //unsigned size = DStack.size(); //DEBUG(cout << "\t Return: " << DStack[size-level-1].getDimInstance(dim) <<flush << "\n"); diff --git a/llvm/projects/visc-rt/visc-rt.h b/llvm/projects/visc-rt/visc-rt.h index 20cc6e35a6f0b7802d4e662b61f5c6e2bf086149..69392671fb6a244922c75a6c11c82b2405787732 100644 --- a/llvm/projects/visc-rt/visc-rt.h +++ b/llvm/projects/visc-rt/visc-rt.h @@ -71,8 +71,8 @@ class DFGDepth { void llvm_visc_x86_dstack_push(unsigned n, unsigned limitX = 0, unsigned iX = 0, unsigned limitY = 0, unsigned iY = 0, unsigned limitZ = 0, unsigned iZ = 0); void llvm_visc_x86_dstack_pop(); -unsigned llvm_visc_x86_getDimLimit(unsigned level, unsigned dim); -unsigned llvm_visc_x86_getDimInstance(unsigned level, unsigned dim); +uint64_t llvm_visc_x86_getDimLimit(unsigned level, unsigned dim); +uint64_t llvm_visc_x86_getDimInstance(unsigned level, unsigned dim); /********************* Memory Tracker **********************************/