diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index 768742b53e4ad9628e040147b80a0cd039269674..ac93e5f79727523b671c03f0bd9078a42727e794 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -1242,7 +1242,7 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
         int parentLevel = ParentDFNode->getLevel();
         int parentReplFactor = ParentDFNode->getNumOfDim();
 
-        if (!parentLevel || !parentReplFactor) {
+        if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) {
           // We only have one level in the hierarchy or the parent node is not
           // replicated. This indicates that the parent node is the kernel
           // launch, so the instances are global_size (gridDim x blockDim)
diff --git a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
index 5f9f6cb12fc28f64ca1b2ba72c54b27035cdf3f5..75280bd50d674be701b7b59f36f75125ceed23e0 100644
--- a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
@@ -223,6 +223,12 @@ std::vector<IntrinsicInst*>* CGT_X86::getUseList(Value* GraphID) {
     if(IntrinsicInst* waitI = dyn_cast<IntrinsicInst>(*ui)) {
       UseList->push_back(waitI);
     }
+    //else if (PHINode* PN = dyn_cast<PHINode>(*ui)){
+      //errs() << "Found PhiNode use of graphID\n";
+      //std::vector<IntrinsicInst*>* phiUseList  = getUseList(PN);
+      //UseList->insert(UseList->end(), phiUseList->begin(), phiUseList->end());
+      //free(phiUseList);
+    //}
     else {
       llvm_unreachable("Error: Operation on Graph ID not supported!\n");
     }
diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
index 8957ebe2c8a5b099e4f930a90e748ededb780fae..1deb088a2d7e4e98ec91ff2a483d6c3b92d7ecc9 100644
--- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp
+++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
@@ -52,29 +52,43 @@ static void ReplaceCallWithIntrinsic(Instruction* I, Intrinsic::ID IntrinsicID,
 
   // Find the correct intrinsic call
   Module* M = CI->getParent()->getParent()->getParent();
-  Function* F = Intrinsic::getDeclaration(M, IntrinsicID);
-  FunctionType* FTy = F->getFunctionType();
-  DEBUG(errs() << *F << "\n");
-
-  // Create argument list
-  assert(CI->getNumArgOperands() == FTy->getNumParams()
-      && "Number of arguments of call do not match with Intrinsic");
+  Function* F;
+  std::vector<Type*> ArgTypes;
   std::vector<Value*> args;
-  for(unsigned i=0; i < CI->getNumArgOperands(); i++) {
-    Value* V = CI->getArgOperand(i);
-    // Either the type should match or both should be of pointer type
-    assert(V->getType() == FTy->getParamType(i) ||
-        (V->getType()->isPointerTy() && FTy->getParamType(i)->isPointerTy())
-        && "Dummy function call argument does not match with Intrinsic argument!");
-    // If the types do not match, then both must be pointer type and pointer
-    // cast needs to be performed
-    if(V->getType() != FTy->getParamType(i)) {
-      V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI);
+  if(Intrinsic::isOverloaded(IntrinsicID)) {
+    // This is an overloaded intrinsic. The types must exactly match. Get the
+    // argument types
+    for(unsigned i=0; i < CI->getNumArgOperands(); i++) {
+      ArgTypes.push_back(CI->getArgOperand(i)->getType());
+      args.push_back(CI->getArgOperand(i));
+    }
+    F = Intrinsic::getDeclaration(M, IntrinsicID, ArgTypes);
+    DEBUG(errs() << *F << "\n");
+  }
+  else { // Non-overloaded intrinsic
+    F = Intrinsic::getDeclaration(M, IntrinsicID);
+    FunctionType* FTy = F->getFunctionType();
+    DEBUG(errs() << *F << "\n");
+
+    // Create argument list
+    assert(CI->getNumArgOperands() == FTy->getNumParams()
+        && "Number of arguments of call do not match with Intrinsic");
+    for(unsigned i=0; i < CI->getNumArgOperands(); i++) {
+      Value* V = CI->getArgOperand(i);
+      // Either the type should match or both should be of pointer type
+      assert(V->getType() == FTy->getParamType(i) ||
+          (V->getType()->isPointerTy() && FTy->getParamType(i)->isPointerTy())
+          && "Dummy function call argument does not match with Intrinsic argument!");
+      // If the types do not match, then both must be pointer type and pointer
+      // cast needs to be performed
+      if(V->getType() != FTy->getParamType(i)) {
+        V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI);
+      }
+      args.push_back(V);
     }
-    args.push_back(V);
   }
   // Insert call instruction
-  CallInst* Inst = CallInst::Create(F, args, CI->getName(), CI);
+  CallInst* Inst = CallInst::Create(F, args, F->getReturnType()->isVoidTy()? "" : CI->getName(), CI);
 
   DEBUG(errs() << "\tSubstitute with: " << *Inst << "\n");
 
@@ -118,6 +132,11 @@ IS_VISC_CALL(atomic_max)
 IS_VISC_CALL(atomic_and)
 IS_VISC_CALL(atomic_or)
 IS_VISC_CALL(atomic_xor)
+// Misc Fn
+IS_VISC_CALL(floor)
+IS_VISC_CALL(rsqrt)
+IS_VISC_CALL(sqrt)
+
 
 IS_VISC_CALL(init)
 IS_VISC_CALL(node)
@@ -336,6 +355,12 @@ static std::vector<CallInst*>* getWaitList(Value* GraphID) {
              && "GraphID can only be used by __visc__wait call");
       WaitList->push_back(waitI);
     }
+    //else if (PHINode* PN = dyn_cast<PHINode>(*ui)){
+      //errs() << "Found PhiNode use of graphID\n";
+      //std::vector<CallInst*>* phiWaitList  = getWaitList(PN);
+      //WaitList->insert(WaitList->end(), phiWaitList->begin(), phiWaitList->end());
+      //free(phiWaitList);
+    //}
     else {
       DEBUG(errs() << *(*ui) << "\n");
       llvm_unreachable("Error: Operation on Graph ID not supported!\n");
@@ -976,7 +1001,7 @@ bool GenVISC::runOnModule(Module &M) {
                                      CI->getArgOperand(2), CI->getArgOperand(3)
                                     };
         CallInst* CreateNode3DInst = CallInst::Create(CreateNode3DF,
-                                     ArrayRef<Value*>(CreateNode3DArgs, 2),
+                                     ArrayRef<Value*>(CreateNode3DArgs, 4),
                                      graphFunc->getName()+".node", CI);
         DEBUG(errs() << "Found visc createNode3D call: " << *CI << "\n");
         DEBUG(errs() << "\tSubstitute with: " << *CreateNode3DInst << "\n");
@@ -1146,6 +1171,15 @@ bool GenVISC::runOnModule(Module &M) {
       if (isVISCCall_atomic_xor(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_xor, &toBeErased);
       }
+      if (isVISCCall_floor(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::nvvm_floor_f, &toBeErased);
+      }
+      if (isVISCCall_rsqrt(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::nvvm_rsqrt_approx_f, &toBeErased);
+      }
+      if (isVISCCall_sqrt(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::nvvm_sqrt_f, &toBeErased);
+      }
     }
   }