diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp index dac22fe3d84256f85f391f3fa77bc1e9b08a6914..e9660aa6e7a4aad995c6003fce9697a8304f3ccf 100644 --- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp @@ -1250,33 +1250,25 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) { Function * OpenCLFunction; int parentLevel = ParentDFNode->getLevel(); int parentReplFactor = ParentDFNode->getNumOfDim(); + FunctionType* FT = + FunctionType::get(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/), + Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/), + false); if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) { // We only have one level in the hierarchy or the parent node is not // replicated. This indicates that the parent node is the kernel // launch, so the instances are global_size (gridDim x blockDim) - FunctionType* FT = - FunctionType::get(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/), - std::vector<Type*>(1, Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)), - false); OpenCLFunction = cast<Function> (KernelM.getOrInsertFunction(StringRef("get_global_size"), FT)); } else if (Leaf_HandleToDFNodeMap[ArgII] == N) { // We are asking for this node's instances // this is a local size (block dim) call - FunctionType* FT = - FunctionType::get(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/), - std::vector<Type*>(1, Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)), - false); OpenCLFunction = cast<Function> (KernelM.getOrInsertFunction(StringRef("get_local_size"), FT)); } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) { // We are asking for this node's parent's instances // this is a (global_size/local_size) (grid dim) call - FunctionType* FT = - FunctionType::get(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/), - std::vector<Type*>(1, Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)), - false); OpenCLFunction = cast<Function> (KernelM.getOrInsertFunction(StringRef("get_num_groups"), FT)); } else { diff --git a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp index b9f8865067fa269884e42c620114d763f556eac4..31103c218bca660f11563f98093c93b2cda2b353 100644 --- a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp +++ b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp @@ -1275,9 +1275,9 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { FunctionType* FT = FunctionType::get(Type::getInt64Ty(getGlobalContext() /*KernelM.getContext()*/), - ArrayRef<Type*>(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)), + Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/), false); - if (!parentLevel || !parentReplFactor) { + if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) { // We only have one level in the hierarchy or the parent node is not // replicated. This indicates that the parent node is the kernel // launch, so the instances are global_size (gridDim x blockDim) @@ -1396,6 +1396,8 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { case Intrinsic::sin: case Intrinsic::cos: case Intrinsic::sqrt: + case Intrinsic::floor: + case Intrinsic::nvvm_rsqrt_approx_f: { errs() << "Found math function: " << *II << "\n"; // Get the builtin function @@ -1415,38 +1417,6 @@ void CGT_SPIR::codeGen(DFLeafNode* N) { IItoRemove.push_back(II); break; } - case Intrinsic::floor: - { - errs() << "Found floor intrinsic\n"; - F = Intrinsic::getDeclaration(&KernelM, Intrinsic::nvvm_floor_f); - FunctionType* FTy = F->getFunctionType(); - DEBUG(errs() << *F << "\n"); - - // Create argument list - std::vector<Value*> args; - assert(CI->getNumArgOperands() == FTy->getNumParams() - && "Number of arguments of call do not match with Intrinsic"); - for(unsigned i=0; i < CI->getNumArgOperands(); i++) { - Value* V = CI->getArgOperand(i); - // Either the type should match or both should be of pointer type - assert(V->getType() == FTy->getParamType(i) || - (V->getType()->isPointerTy() && FTy->getParamType(i)->isPointerTy()) - && "Dummy function call argument does not match with Intrinsic argument!"); - // If the types do not match, then both must be pointer type and pointer - // cast needs to be performed - if(V->getType() != FTy->getParamType(i)) { - V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI); - } - args.push_back(V); - } - // Insert call instruction - CallInst* Inst = CallInst::Create(F, args, - F->getReturnType()->isVoidTy()? "" : CI->getName(), CI); - DEBUG(errs() << "\tSubstitute with: " << *Inst << "\n"); - CI->replaceAllUsesWith(Inst); - IItoRemove.push_back(II); - break; - } default: errs() << "[WARNING] Found Intrinsic: " << *II << "\n" ; } @@ -2003,6 +1973,8 @@ static std::string getMathFunctionName(Intrinsic::ID ID) { case Intrinsic::sin: return "_Z3sinf"; case Intrinsic::cos: return "_Z3cosf"; case Intrinsic::sqrt: return "_Z4sqrtf"; + case Intrinsic::floor: return "_Z5floorf"; + case Intrinsic::nvvm_rsqrt_approx_f: return "_Z5rsqrtf"; default: llvm_unreachable("Unsupported math function!"); };