diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index dac22fe3d84256f85f391f3fa77bc1e9b08a6914..e9660aa6e7a4aad995c6003fce9697a8304f3ccf 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -1250,33 +1250,25 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
         Function * OpenCLFunction;
         int parentLevel = ParentDFNode->getLevel();
         int parentReplFactor = ParentDFNode->getNumOfDim();
+        FunctionType* FT =
+            FunctionType::get(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/),
+                              Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/),
+                              false);
 
         if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) {
           // We only have one level in the hierarchy or the parent node is not
           // replicated. This indicates that the parent node is the kernel
           // launch, so the instances are global_size (gridDim x blockDim)
-          FunctionType* FT =
-            FunctionType::get(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/),
-                              std::vector<Type*>(1, Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)),
-                              false);
           OpenCLFunction = cast<Function>
                            (KernelM.getOrInsertFunction(StringRef("get_global_size"), FT));
         } else if (Leaf_HandleToDFNodeMap[ArgII] == N) {
           // We are asking for this node's instances
           // this is a local size (block dim) call
-          FunctionType* FT =
-            FunctionType::get(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/),
-                              std::vector<Type*>(1, Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)),
-                              false);
           OpenCLFunction = cast<Function>
                            (KernelM.getOrInsertFunction(StringRef("get_local_size"), FT));
         } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) {
           // We are asking for this node's parent's instances
           // this is a (global_size/local_size) (grid dim) call
-          FunctionType* FT =
-            FunctionType::get(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/),
-                              std::vector<Type*>(1, Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)),
-                              false);
           OpenCLFunction = cast<Function>
                            (KernelM.getOrInsertFunction(StringRef("get_num_groups"), FT));
         } else {
diff --git a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
index b9f8865067fa269884e42c620114d763f556eac4..31103c218bca660f11563f98093c93b2cda2b353 100644
--- a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
@@ -1275,9 +1275,9 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
 
         FunctionType* FT =
             FunctionType::get(Type::getInt64Ty(getGlobalContext() /*KernelM.getContext()*/),           
-                              ArrayRef<Type*>(Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/)),
+                              Type::getInt32Ty(getGlobalContext() /*KernelM.getContext()*/),
                               false);
-        if (!parentLevel || !parentReplFactor) {
+        if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) {
           // We only have one level in the hierarchy or the parent node is not
           // replicated. This indicates that the parent node is the kernel
           // launch, so the instances are global_size (gridDim x blockDim)
@@ -1396,6 +1396,8 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
           case Intrinsic::sin:
           case Intrinsic::cos:
           case Intrinsic::sqrt:
+          case Intrinsic::floor:
+          case Intrinsic::nvvm_rsqrt_approx_f:
           {
             errs() << "Found math function: " << *II << "\n";
             // Get the builtin function
@@ -1415,38 +1417,6 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
             IItoRemove.push_back(II);
             break;
           }
-          case Intrinsic::floor:
-          {
-            errs() << "Found floor intrinsic\n";
-            F = Intrinsic::getDeclaration(&KernelM, Intrinsic::nvvm_floor_f);
-            FunctionType* FTy = F->getFunctionType();
-            DEBUG(errs() << *F << "\n");
-
-            // Create argument list
-            std::vector<Value*> args;
-            assert(CI->getNumArgOperands() == FTy->getNumParams()
-                   && "Number of arguments of call do not match with Intrinsic");
-            for(unsigned i=0; i < CI->getNumArgOperands(); i++) {
-              Value* V = CI->getArgOperand(i);
-              // Either the type should match or both should be of pointer type
-              assert(V->getType() == FTy->getParamType(i) ||
-                     (V->getType()->isPointerTy() && FTy->getParamType(i)->isPointerTy())
-                     && "Dummy function call argument does not match with Intrinsic argument!");
-              // If the types do not match, then both must be pointer type and pointer
-              // cast needs to be performed
-              if(V->getType() != FTy->getParamType(i)) {
-                V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI);
-              }
-              args.push_back(V);
-            }
-            // Insert call instruction
-            CallInst* Inst = CallInst::Create(F, args,
-                  F->getReturnType()->isVoidTy()? "" : CI->getName(), CI);
-            DEBUG(errs() << "\tSubstitute with: " << *Inst << "\n");
-            CI->replaceAllUsesWith(Inst);
-            IItoRemove.push_back(II);
-            break;
-          }
           default:
             errs() << "[WARNING] Found Intrinsic: " << *II << "\n" ;
           }
@@ -2003,6 +1973,8 @@ static std::string getMathFunctionName(Intrinsic::ID ID) {
     case Intrinsic::sin: return "_Z3sinf";
     case Intrinsic::cos: return "_Z3cosf";
     case Intrinsic::sqrt: return "_Z4sqrtf";
+    case Intrinsic::floor: return "_Z5floorf";
+    case Intrinsic::nvvm_rsqrt_approx_f: return "_Z5rsqrtf";
     default:
       llvm_unreachable("Unsupported math function!");
   };