diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index dd4cc894e4429251c6fbf8b339885fcecdc249d9..6926e6cfed65679b455152cffc311ae946dab314 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -141,15 +141,15 @@ private:
   Kernel* kernel;
 
   // VISC Runtime API
-  Constant* llvm_visc_ptx_launch;
-  Constant* llvm_visc_ptx_wait;
-  Constant* llvm_visc_ptx_initContext;
-  Constant* llvm_visc_ptx_clearContext;
-  Constant* llvm_visc_ptx_argument_scalar;
-  Constant* llvm_visc_ptx_argument_ptr;
-  Constant* llvm_visc_ptx_free;
-  Constant* llvm_visc_ptx_getOutput;
-  Constant* llvm_visc_ptx_executeNode;
+  Constant* llvm_visc_ocl_launch;
+  Constant* llvm_visc_ocl_wait;
+  Constant* llvm_visc_ocl_initContext;
+  Constant* llvm_visc_ocl_clearContext;
+  Constant* llvm_visc_ocl_argument_scalar;
+  Constant* llvm_visc_ocl_argument_ptr;
+  Constant* llvm_visc_ocl_free;
+  Constant* llvm_visc_ocl_getOutput;
+  Constant* llvm_visc_ocl_executeNode;
 
   //Functions
   std::string getKernelsModuleName(Module &M);
@@ -239,15 +239,15 @@ void CGT_NVPTX::initRuntimeAPI() {
     DEBUG(errs() << "Successfully loaded visc-rt API module\n");
 
   // Get or insert the global declarations for launch/wait functions
-  DECLARE(llvm_visc_ptx_launch);
-  DECLARE(llvm_visc_ptx_wait);
-  DECLARE(llvm_visc_ptx_initContext);
-  DECLARE(llvm_visc_ptx_clearContext);
-  DECLARE(llvm_visc_ptx_argument_scalar);
-  DECLARE(llvm_visc_ptx_argument_ptr);
-  DECLARE(llvm_visc_ptx_free);
-  DECLARE(llvm_visc_ptx_getOutput);
-  DECLARE(llvm_visc_ptx_executeNode);
+  DECLARE(llvm_visc_ocl_launch);
+  DECLARE(llvm_visc_ocl_wait);
+  DECLARE(llvm_visc_ocl_initContext);
+  DECLARE(llvm_visc_ocl_clearContext);
+  DECLARE(llvm_visc_ocl_argument_scalar);
+  DECLARE(llvm_visc_ocl_argument_ptr);
+  DECLARE(llvm_visc_ocl_free);
+  DECLARE(llvm_visc_ocl_getOutput);
+  DECLARE(llvm_visc_ocl_executeNode);
 
   // Get or insert timerAPI functions as well if you plan to use timers
   initTimerAPI();
@@ -260,7 +260,9 @@ void CGT_NVPTX::initRuntimeAPI() {
   InitCall = cast<Instruction>(*VI->use_begin());
   initializeTimerSet(InitCall);
   switchToTimer(visc_TimerID_INIT_CTX, InitCall);
-  CallInst::Create(llvm_visc_ptx_initContext, None, "", InitCall);
+  CallInst::Create(llvm_visc_ocl_initContext,
+                  ArrayRef<Value*>(getTargetID(M, visc::GPU_TARGET)),
+                  "", InitCall);
   switchToTimer(visc_TimerID_NONE, InitCall);
 
   // Insert print instruction at visc exit
@@ -362,7 +364,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi
   Function* KF = K->KernelLeafNode->getFuncPointer();
   // Initialize context
   //DEBUG(errs() << "Initializing context" << "\n");
-  //CallInst::Create(llvm_visc_ptx_initContext, None, "", RI);
+  //CallInst::Create(llvm_visc_ocl_initContext, None, "", RI);
 
   DEBUG(errs() << "Initializing commandQ" << "\n");
   // Initialize command queue
@@ -375,7 +377,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi
   Value* LaunchInstArgs[] = {fileStr, kernelStr};
 
   DEBUG(errs() << "Inserting launch call" << "\n");
-  CallInst* NVPTX_Ctx = CallInst::Create(llvm_visc_ptx_launch,
+  CallInst* NVPTX_Ctx = CallInst::Create(llvm_visc_ocl_launch,
                                          ArrayRef<Value*>(LaunchInstArgs, 2),
                                          "graph"+KF->getName(),
                                          InitCall);
@@ -444,7 +446,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi
                                isInput,
                                isOutput
                               };
-      Value* d_ptr = CallInst::Create(llvm_visc_ptx_argument_ptr,
+      Value* d_ptr = CallInst::Create(llvm_visc_ocl_argument_ptr,
                                       ArrayRef<Value*>(setInputArgs, 6), "", RI);
       DevicePointers.push_back(d_ptr);
       // If this has out attribute, store the returned device pointer in
@@ -469,7 +471,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi
                                ConstantInt::get(Type::getInt32Ty(M.getContext()),i),
                                ConstantExpr::getSizeOf(inputVal->getType())
                               };
-      CallInst::Create(llvm_visc_ptx_argument_scalar,
+      CallInst::Create(llvm_visc_ocl_argument_scalar,
                        ArrayRef<Value*>(setInputArgs, 4), "", RI);
     }
 
@@ -492,7 +494,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi
                               True
                              };
 
-    d_Output = CallInst::Create(llvm_visc_ptx_argument_ptr,
+    d_Output = CallInst::Create(llvm_visc_ocl_argument_ptr,
                                 ArrayRef<Value*>(setOutputArgs, 6),
                                 "d_output."+KF->getName(),
                                 RI);
@@ -512,14 +514,14 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi
                            LocalWGPtr,
                            GlobalWGPtr
                           };
-  CallInst* Event = CallInst::Create(llvm_visc_ptx_executeNode,
+  CallInst* Event = CallInst::Create(llvm_visc_ocl_executeNode,
                                      ArrayRef<Value*>(ExecNodeArgs, 4),
                                      "event."+KF->getName(),
                                      RI);
   DEBUG(errs() << "Execute Node Call: " << *Event << "\n");
 
   // Wait for Kernel to Finish
-  CallInst::Create(llvm_visc_ptx_wait,
+  CallInst::Create(llvm_visc_ocl_wait,
                    ArrayRef<Value*>(GraphID),
                    "",
                    RI);
@@ -532,7 +534,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi
                               d_Output,
                               outputSize
                              };
-    CallInst* h_Output = CallInst::Create(llvm_visc_ptx_getOutput,
+    CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput,
                                           ArrayRef<Value*>(GetOutputArgs, 4),
                                           "h_output."+KF->getName()+".addr",
                                           RI);
@@ -554,7 +556,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi
     DEBUG(errs() << "\t#bytes: " << *output.bytes << "\n");
 
     Value* GetOutputArgs[] = {GraphID, output.h_ptr, output.d_ptr, output.bytes};
-    CallInst* CI = CallInst::Create(llvm_visc_ptx_getOutput,
+    CallInst* CI = CallInst::Create(llvm_visc_ocl_getOutput,
                                     ArrayRef<Value*>(GetOutputArgs, 4),
                                     "", RI);
   }*/
@@ -563,12 +565,12 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fi
   DEBUG(errs() << "Clearing context" << "\n");
   // Free Device Memory
   for(auto d_ptr: DevicePointers) {
-    CallInst::Create(llvm_visc_ptx_free, ArrayRef<Value*>(d_ptr), "", RI);
+    CallInst::Create(llvm_visc_ocl_free, ArrayRef<Value*>(d_ptr), "", RI);
   }
   switchToTimer(visc_TimerID_CLEAR_CTX, CleanupCall);
   // Clear Context
   LoadInst* LI = new LoadInst(GraphIDAddr, "", CleanupCall);
-  CallInst::Create(llvm_visc_ptx_clearContext, ArrayRef<Value*>(LI), "", CleanupCall);
+  CallInst::Create(llvm_visc_ocl_clearContext, ArrayRef<Value*>(LI), "", CleanupCall);
   switchToTimer(visc_TimerID_NONE, CleanupCall);
 
   switchToTimer(visc_TimerID_MISC, RI);
@@ -660,12 +662,11 @@ void CGT_NVPTX::codeGen(DFInternalNode* N) {
     // Keep track of the arguments order.
     std::vector<unsigned> inmap1 = N->getInArgMap();
     std::vector<unsigned> inmap2 = kernel->getInArgMap();
-    // TODO: Verify when we have incoming edges from more than one nodes
-    // The limit is the size of inmap2, because this is the number of kernel arguments
-    for (unsigned i = 0; i < inmap2.size(); i++) {
-      inmap2[i] = inmap1[inmap2[i]];
-    }
-    kernel->setInArgMap(inmap2);
+    // TODO: Verify when we have incoming edges from more than one nodes The
+    // limit is the size of inmap2, because this is the number of kernel
+    // arguments
+    for (unsigned i = 0; i < inmap2.size(); i++) { inmap2[i] =
+      inmap1[inmap2[i]]; } kernel->setInArgMap(inmap2);
 
     // Keep track of the output arguments order.
     std::vector<unsigned> outmap1 = N->getOutArgMap();
@@ -931,10 +932,10 @@ void CGT_NVPTX::codeGen(DFLeafNode* N) {
       case Intrinsic::visc_getNumNodeInstances_x:
       case Intrinsic::visc_getNumNodeInstances_y:
       case Intrinsic::visc_getNumNodeInstances_z: {
-//TODO: think about whether this is the best way to go
-// there are hw specific registers. therefore it is good to have the intrinsic
-// but then, why do we need to keep that info in the graph?
-// (only for the kernel configuration during the call)
+        // TODO: think about whether this is the best way to go there are hw
+        // specific registers. therefore it is good to have the intrinsic but
+        // then, why do we need to keep that info in the graph?  (only for the
+        // kernel configuration during the call)
 
         DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNumNodeInstances\n");
         ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
diff --git a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
index b29274d4e3a53b8745e7a6b916b303ff4441e1f6..a05fe6080262ccf4657650c6169900ac4e35d6f9 100644
--- a/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Bitcode/ReaderWriter.h"
 #include "llvm/IR/Attributes.h"
+#include "llvm/SupportVISC/VISCHint.h"
 #include "llvm/SupportVISC/VISCTimer.h"
 #include "llvm/SupportVISC/DFG2LLVM.h"
 
@@ -144,15 +145,15 @@ private:
   Kernel* kernel;
 
   // VISC Runtime API
-  Constant* llvm_visc_ptx_launch;
-  Constant* llvm_visc_ptx_wait;
-  Constant* llvm_visc_ptx_initContext;
-  Constant* llvm_visc_ptx_clearContext;
-  Constant* llvm_visc_ptx_argument_scalar;
-  Constant* llvm_visc_ptx_argument_ptr;
-  Constant* llvm_visc_ptx_free;
-  Constant* llvm_visc_ptx_getOutput;
-  Constant* llvm_visc_ptx_executeNode;
+  Constant* llvm_visc_ocl_launch;
+  Constant* llvm_visc_ocl_wait;
+  Constant* llvm_visc_ocl_initContext;
+  Constant* llvm_visc_ocl_clearContext;
+  Constant* llvm_visc_ocl_argument_scalar;
+  Constant* llvm_visc_ocl_argument_ptr;
+  Constant* llvm_visc_ocl_free;
+  Constant* llvm_visc_ocl_getOutput;
+  Constant* llvm_visc_ocl_executeNode;
 
   //Functions
   std::string getKernelsModuleName(Module &M);
@@ -243,15 +244,15 @@ void CGT_SPIR::initRuntimeAPI() {
     DEBUG(errs() << "Successfully loaded visc-rt API module\n");
 
   // Get or insert the global declarations for launch/wait functions
-  DECLARE(llvm_visc_ptx_launch);
-  DECLARE(llvm_visc_ptx_wait);
-  DECLARE(llvm_visc_ptx_initContext);
-  DECLARE(llvm_visc_ptx_clearContext);
-  DECLARE(llvm_visc_ptx_argument_scalar);
-  DECLARE(llvm_visc_ptx_argument_ptr);
-  DECLARE(llvm_visc_ptx_free);
-  DECLARE(llvm_visc_ptx_getOutput);
-  DECLARE(llvm_visc_ptx_executeNode);
+  DECLARE(llvm_visc_ocl_launch);
+  DECLARE(llvm_visc_ocl_wait);
+  DECLARE(llvm_visc_ocl_initContext);
+  DECLARE(llvm_visc_ocl_clearContext);
+  DECLARE(llvm_visc_ocl_argument_scalar);
+  DECLARE(llvm_visc_ocl_argument_ptr);
+  DECLARE(llvm_visc_ocl_free);
+  DECLARE(llvm_visc_ocl_getOutput);
+  DECLARE(llvm_visc_ocl_executeNode);
 
   // Get or insert timerAPI functions as well if you plan to use timers
   initTimerAPI();
@@ -264,7 +265,9 @@ void CGT_SPIR::initRuntimeAPI() {
   InitCall = cast<Instruction>(*VI->use_begin());
   initializeTimerSet(InitCall);
   switchToTimer(visc_TimerID_INIT_CTX, InitCall);
-  CallInst::Create(llvm_visc_ptx_initContext, None, "", InitCall);
+  CallInst::Create(llvm_visc_ocl_initContext,
+                  ArrayRef<Value*>(getTargetID(M, visc::SPIR_TARGET)),
+                  "", InitCall);
   switchToTimer(visc_TimerID_NONE, InitCall);
 
   // Insert print instruction at visc exit
@@ -337,13 +340,13 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
   if(!N->isRoot())
     addIdxDimArgs(F_X86);
 
-  /* TODO: Use this code to verufy if this is a good pattern for PTX kernel
+  /* TODO: Use this code to verufy if this is a good pattern for OCL kernel
 
   // Sort children in topological order before code generation for kernel call
   N->getChildGraph()->sortChildren();
 
   // The DFNode N has the property that it has only one child (leaving Entry
-  // and Exit dummy nodes). This child is the PTX kernel. This simplifies code
+  // and Exit dummy nodes). This child is the OCL kernel. This simplifies code
   // generation for kernel calls significantly. All the inputs to this child
   // node would either be constants or from the parent node N.
 
@@ -366,7 +369,7 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
   Function* KF = K->KernelLeafNode->getFuncPointer();
   // Initialize context
   //DEBUG(errs() << "Initializing context" << "\n");
-  //CallInst::Create(llvm_visc_ptx_initContext, None, "", RI);
+  //CallInst::Create(llvm_visc_ocl_initContext, None, "", RI);
 
   DEBUG(errs() << "Initializing commandQ" << "\n");
   // Initialize command queue
@@ -379,7 +382,7 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
   Value* LaunchInstArgs[] = {fileStr, kernelStr};
 
   DEBUG(errs() << "Inserting launch call" << "\n");
-  CallInst* SPIR_Ctx = CallInst::Create(llvm_visc_ptx_launch,
+  CallInst* SPIR_Ctx = CallInst::Create(llvm_visc_ocl_launch,
                                          ArrayRef<Value*>(LaunchInstArgs, 2),
                                          "graph"+KF->getName(),
                                          InitCall);
@@ -448,7 +451,7 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
                                isInput,
                                isOutput
                               };
-      Value* d_ptr = CallInst::Create(llvm_visc_ptx_argument_ptr,
+      Value* d_ptr = CallInst::Create(llvm_visc_ocl_argument_ptr,
                                       ArrayRef<Value*>(setInputArgs, 6), "", RI);
       DevicePointers.push_back(d_ptr);
       // If this has out attribute, store the returned device pointer in
@@ -473,7 +476,7 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
                                ConstantInt::get(Type::getInt32Ty(M.getContext()),i),
                                ConstantExpr::getSizeOf(inputVal->getType())
                               };
-      CallInst::Create(llvm_visc_ptx_argument_scalar,
+      CallInst::Create(llvm_visc_ocl_argument_scalar,
                        ArrayRef<Value*>(setInputArgs, 4), "", RI);
     }
 
@@ -496,7 +499,7 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
                               True
                              };
 
-    d_Output = CallInst::Create(llvm_visc_ptx_argument_ptr,
+    d_Output = CallInst::Create(llvm_visc_ocl_argument_ptr,
                                 ArrayRef<Value*>(setOutputArgs, 6),
                                 "d_output."+KF->getName(),
                                 RI);
@@ -516,14 +519,14 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
                            LocalWGPtr,
                            GlobalWGPtr
                           };
-  CallInst* Event = CallInst::Create(llvm_visc_ptx_executeNode,
+  CallInst* Event = CallInst::Create(llvm_visc_ocl_executeNode,
                                      ArrayRef<Value*>(ExecNodeArgs, 4),
                                      "event."+KF->getName(),
                                      RI);
   DEBUG(errs() << "Execute Node Call: " << *Event << "\n");
 
   // Wait for Kernel to Finish
-  CallInst::Create(llvm_visc_ptx_wait,
+  CallInst::Create(llvm_visc_ocl_wait,
                    ArrayRef<Value*>(GraphID),
                    "",
                    RI);
@@ -536,7 +539,7 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
                               d_Output,
                               outputSize
                              };
-    CallInst* h_Output = CallInst::Create(llvm_visc_ptx_getOutput,
+    CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput,
                                           ArrayRef<Value*>(GetOutputArgs, 4),
                                           "h_output."+KF->getName()+".addr",
                                           RI);
@@ -558,7 +561,7 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
     DEBUG(errs() << "\t#bytes: " << *output.bytes << "\n");
 
     Value* GetOutputArgs[] = {GraphID, output.h_ptr, output.d_ptr, output.bytes};
-    CallInst* CI = CallInst::Create(llvm_visc_ptx_getOutput,
+    CallInst* CI = CallInst::Create(llvm_visc_ocl_getOutput,
                                     ArrayRef<Value*>(GetOutputArgs, 4),
                                     "", RI);
   }*/
@@ -567,12 +570,12 @@ void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& Fil
   DEBUG(errs() << "Clearing context" << "\n");
   // Free Device Memory
   for(auto d_ptr: DevicePointers) {
-    CallInst::Create(llvm_visc_ptx_free, ArrayRef<Value*>(d_ptr), "", RI);
+    CallInst::Create(llvm_visc_ocl_free, ArrayRef<Value*>(d_ptr), "", RI);
   }
   switchToTimer(visc_TimerID_CLEAR_CTX, CleanupCall);
   // Clear Context
   LoadInst* LI = new LoadInst(GraphIDAddr, "", CleanupCall);
-  CallInst::Create(llvm_visc_ptx_clearContext, ArrayRef<Value*>(LI), "", CleanupCall);
+  CallInst::Create(llvm_visc_ocl_clearContext, ArrayRef<Value*>(LI), "", CleanupCall);
   switchToTimer(visc_TimerID_NONE, CleanupCall);
 
   switchToTimer(visc_TimerID_MISC, RI);
@@ -769,33 +772,33 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
   // Look up if we have visited this function before. If we have, then just
   // get the cloned function pointer from DFNode. Otherwise, create the cloned
   // function and add it to the DFNode GenFunc.
-  Function *F_nvptx = N->getGenFunc();
-  if(F_nvptx == NULL) {
+  Function *F_spir = N->getGenFunc();
+  if(F_spir == NULL) {
     // Clone the function
     ValueToValueMapTy VMap;
 
-    F_nvptx = CloneFunction(F, VMap, true);
+    F_spir = CloneFunction(F, VMap, true);
 
     // Insert the cloned function into the kernels module
-    KernelM.getFunctionList().push_back(F_nvptx);
+    KernelM.getFunctionList().push_back(F_spir);
 
 
-    //TODO: Iterate over all the instructions of F_nvptx and identify the
+    //TODO: Iterate over all the instructions of F_spir and identify the
     //callees and clone them into this module.
-    DEBUG(errs() << *F_nvptx->getType());
-    DEBUG(errs() << *F_nvptx);
+    DEBUG(errs() << *F_spir->getType());
+    DEBUG(errs() << *F_spir);
 
     //Add generated function info to DFNode
-    N->setGenFunc(F_nvptx, visc::GPU_TARGET);
+    N->setGenFunc(F_spir, visc::SPIR_TARGET);
   } else {
     errs() << "WARNING: Visiting a node for which code already generated!\n";
   }
 
-  transformFunctionToVoid(F_nvptx);
-  removeInOutAttributes(F_nvptx);
+  transformFunctionToVoid(F_spir);
+  removeInOutAttributes(F_spir);
 
   // Go through all the instructions
-  for (inst_iterator i = inst_begin(F_nvptx), e = inst_end(F_nvptx); i != e; ++i) {
+  for (inst_iterator i = inst_begin(F_spir), e = inst_end(F_spir); i != e; ++i) {
     Instruction *I = &(*i);
     // Leaf nodes should not contain VISC graph intrinsics or launch
     assert(!BuildDFG::isViscLaunchIntrinsic(I) && "Launch intrinsic within a dataflow graph!");
@@ -811,7 +814,7 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
       switch (II->getIntrinsicID()) {
       /**************************** llvm.visc.getNode() *****************************/
       case Intrinsic::visc_getNode: {
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNode\n");
+        DEBUG(errs() << F_spir->getName() << "\t: Handling getNode\n");
         // add mapping <intrinsic, this node> to the node-specific map
         Leaf_HandleToDFNodeMap[II] = N;
         IItoRemove.push_back(II);
@@ -819,7 +822,7 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
       break;
       /************************* llvm.visc.getParentNode() **************************/
       case Intrinsic::visc_getParentNode: {
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling getParentNode\n");
+        DEBUG(errs() << F_spir->getName() << "\t: Handling getParentNode\n");
         // get the parent node of the arg node
         // get argument node
         ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
@@ -835,7 +838,7 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
       break;
       /*************************** llvm.visc.getNumDims() ***************************/
       case Intrinsic::visc_getNumDims: {
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNumDims\n");
+        DEBUG(errs() << F_spir->getName() << "\t: Handling getNumDims\n");
         // get node from map
         // get the appropriate field
         ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
@@ -856,7 +859,7 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
       case Intrinsic::visc_getNodeInstanceID_x:
       case Intrinsic::visc_getNodeInstanceID_y:
       case Intrinsic::visc_getNodeInstanceID_z: {
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNodeInstanceID\n");
+        DEBUG(errs() << F_spir->getName() << "\t: Handling getNodeInstanceID\n");
         ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
         ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
         assert(ArgDFNode && "Arg node is NULL");
@@ -936,7 +939,7 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
 // but then, why do we need to keep that info in the graph?
 // (only for the kernel configuration during the call)
 
-        DEBUG(errs() << F_nvptx->getName() << "\t: Handling getNumNodeInstances\n");
+        DEBUG(errs() << F_spir->getName() << "\t: Handling getNumNodeInstances\n");
         ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
         ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
         // A leaf node always has a parent
@@ -1026,8 +1029,8 @@ void CGT_SPIR::codeGen(DFLeafNode* N) {
        re = IItoRemove.rend(); ri != re; ++ri)
     (*ri)->eraseFromParent();
 
-  addCLMetadata(F_nvptx);
-  kernel->KernelFunction = F_nvptx;
+  addCLMetadata(F_spir);
+  kernel->KernelFunction = F_spir;
   errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n";
   DEBUG(errs() << KernelM);
 
diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp
index 6170edf20bdd0ac76268fc666a3d08ae7a134372..f713105b917ecb71b3f3eacf9fe8b9fb6ba58c41 100644
--- a/llvm/projects/visc-rt/visc-rt.cpp
+++ b/llvm/projects/visc-rt/visc-rt.cpp
@@ -30,7 +30,7 @@ typedef struct {
   cl_command_queue clCommandQue;
   cl_program clProgram;
   cl_kernel clKernel;
-} DFNodeContext_PTX;
+} DFNodeContext_OCL;
 
 cl_context globalGPUContext;
 cl_command_queue globalCommandQue;
@@ -106,8 +106,8 @@ void llvm_visc_untrack_mem(void* ptr) {
 }
 
 
-static void* llvm_visc_ptx_request_mem(void* ptr, size_t size, DFNodeContext_PTX* Context, bool isInput, bool isOutput) {
-  DEBUG(cout << "[PTX] Request memory: " << ptr << " for context: " << Context->clGPUContext << "\n");
+static void* llvm_visc_ocl_request_mem(void* ptr, size_t size, DFNodeContext_OCL* Context, bool isInput, bool isOutput) {
+  DEBUG(cout << "[OCL] Request memory: " << ptr << " for context: " << Context->clGPUContext << "\n");
   MemTrackerEntry* MTE = MTracker.lookup(ptr);
   if (MTE == NULL) {
     MTracker.print();
@@ -116,7 +116,7 @@ static void* llvm_visc_ptx_request_mem(void* ptr, size_t size, DFNodeContext_PTX
   }
   // If already on device
   if (MTE->getLocation() == MemTrackerEntry::DEVICE &&
-      ((DFNodeContext_PTX*)MTE->getContext())->clGPUContext == Context->clGPUContext) {
+      ((DFNodeContext_OCL*)MTE->getContext())->clGPUContext == Context->clGPUContext) {
     DEBUG(cout << "\tMemory found on device at: " << MTE->getAddress() << "\n"); 
     return MTE->getAddress();
   }
@@ -159,7 +159,7 @@ void* llvm_visc_request_mem(void* ptr, size_t size) {
   // Else copy from device and update table
   DEBUG(cout << "\tMemory found on device at: " << MTE->getAddress() << "\n");
   DEBUG(cout << "\tCopying ...");
-  cl_int errcode = clEnqueueReadBuffer(((DFNodeContext_PTX*)MTE->getContext())->clCommandQue,
+  cl_int errcode = clEnqueueReadBuffer(((DFNodeContext_OCL*)MTE->getContext())->clCommandQue,
                                       (cl_mem) MTE->getAddress(),
                                       CL_TRUE,
                                       0,
@@ -951,7 +951,9 @@ void llvm_visc_x86_wait(void* graphID) {
   DEBUG(cout << "\t... pthread Done!\n");
 }
 
-void* llvm_visc_ptx_initContext() {
+void* llvm_visc_ocl_initContext(enum visc::Target T) {
+  DEBUG(std::string Target = T == visc::GPU_TARGET? "GPU" : "SPIR");
+  DEBUG(cout << "Initializing Context for " << Target << " device\n");
   cl_uint numPlatforms;
   cl_int errcode;
   errcode = clGetPlatformIDs(0, NULL, &numPlatforms);
@@ -985,9 +987,11 @@ void* llvm_visc_ptx_initContext() {
   assert(numPlatforms >= 2 && "Expecting two OpenCL platforms");
   // Choose second one which is X86 AVX
   cl_context_properties properties[] = {CL_CONTEXT_PLATFORM,
-                                        (long) platforms[1],
+                                        (long) platforms[T == visc::GPU_TARGET? 0 : 1],
                                         0};
-  globalGPUContext = clCreateContextFromType(properties, CL_DEVICE_TYPE_CPU,
+  globalGPUContext = clCreateContextFromType(properties,
+                                            T == visc::GPU_TARGET?
+                                              CL_DEVICE_TYPE_GPU : CL_DEVICE_TYPE_CPU,
                                          NULL, NULL, &errcode);
   free(platforms);
   DEBUG(cout << "\tContext " << globalGPUContext << "\n");
@@ -996,9 +1000,9 @@ void* llvm_visc_ptx_initContext() {
   return globalGPUContext;
 }
 
-void llvm_visc_ptx_clearContext(void* graphID) {
+void llvm_visc_ocl_clearContext(void* graphID) {
   DEBUG(cout << "Clear Context\n");
-  DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
+  DFNodeContext_OCL* Context = (DFNodeContext_OCL*) graphID;
   // FIXME: Have separate function to release command queue and clear context.
   // Would be useful when a context has multiple command queues
   clReleaseKernel(Context->clKernel);
@@ -1007,30 +1011,30 @@ void llvm_visc_ptx_clearContext(void* graphID) {
   //clReleaseContext(globalGPUContext);
   //DEBUG(cout << "Released context at: " << globalGPUContext);
   free(Context);
-  DEBUG(cout << "Done with PTX kernel\n");
+  DEBUG(cout << "Done with OCL kernel\n");
 
 }
 
-void llvm_visc_ptx_argument_scalar(void* graphID, void* input, int arg_index, size_t size) {
+void llvm_visc_ocl_argument_scalar(void* graphID, void* input, int arg_index, size_t size) {
   DEBUG(cout << "Set Scalar Input:");
   DEBUG(cout << "\tArgument Index = " << arg_index << ", Size = " << size << "\n");
-  DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
+  DFNodeContext_OCL* Context = (DFNodeContext_OCL*) graphID;
   DEBUG(cout << "Using Context: " << Context << "\n");
   DEBUG(cout << "Using clKernel: " << Context->clKernel << "\n");
   cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, size, input);
   checkErr(errcode, CL_SUCCESS, "Failure to set constant input argument");
 }
 
-void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size_t size, bool isInput, bool isOutput) {
+void* llvm_visc_ocl_argument_ptr(void* graphID, void* input, int arg_index, size_t size, bool isInput, bool isOutput) {
   DEBUG(cout << "Set Pointer Input:");
   DEBUG(cout << "\tArgument Index = " << arg_index << ", Ptr = " << input << ", Size = "<< size << "\n");
   // Size should be non-zero
   assert(size != 0 && "Size of data pointed to has to be non-zero!");
   DEBUG(cout << "\tInput = "<< isInput << "\tOutput = " << isOutput << "\n");
-  DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
+  DFNodeContext_OCL* Context = (DFNodeContext_OCL*) graphID;
   
   // Check with runtime the location of this memory 
-  cl_mem d_input = (cl_mem) llvm_visc_ptx_request_mem(input, size, Context, isInput, isOutput);
+  cl_mem d_input = (cl_mem) llvm_visc_ocl_request_mem(input, size, Context, isInput, isOutput);
   
   // Set Kernel Argument
   cl_int errcode = clSetKernelArg(Context->clKernel, arg_index, sizeof(cl_mem), (void*)&d_input);
@@ -1039,25 +1043,25 @@ void* llvm_visc_ptx_argument_ptr(void* graphID, void* input, int arg_index, size
   return d_input;
 }
 
-void llvm_visc_ptx_free(void* ptr) {
+void llvm_visc_ocl_free(void* ptr) {
   DEBUG(cout << "Release Device Pointer: " << ptr << "\n");
   //cl_mem d_ptr = (cl_mem) ptr;
   //clReleaseMemObject(d_ptr);
 }
 
-void* llvm_visc_ptx_getOutput(void* graphID, void* h_output, void* d_output, size_t size) {
+void* llvm_visc_ocl_getOutput(void* graphID, void* h_output, void* d_output, size_t size) {
   DEBUG(cout << "Get Output:\n");
   DEBUG(cout << "\tHostPtr = " << h_output << ", DevicePtr = " << d_output << ", Size = "<< size << "\n");
   if(h_output == NULL)
     h_output = malloc(size);
-  DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
+  DFNodeContext_OCL* Context = (DFNodeContext_OCL*) graphID;
   cl_int errcode = clEnqueueReadBuffer(Context->clCommandQue, (cl_mem)d_output, CL_TRUE, 0, size,
                                 h_output, 0, NULL, NULL);
   checkErr(errcode, CL_SUCCESS, "Failure to read output");
   return h_output;
 }
 
-void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t*
+void* llvm_visc_ocl_executeNode(void* graphID, unsigned workDim , const size_t*
                                 localWorkSize, const size_t* globalWorkSize) {
 
   size_t GlobalWG[3];
@@ -1077,7 +1081,7 @@ void* llvm_visc_ptx_executeNode(void* graphID, unsigned workDim , const size_t*
     }
   }
 
-  DFNodeContext_PTX* Context = (DFNodeContext_PTX*) graphID;
+  DFNodeContext_OCL* Context = (DFNodeContext_OCL*) graphID;
   // TODO: Would like to use event to ensure better scheduling of kernels.
   // Currently passing the event paratemeter results in seg fault with
   // clEnqueueNDRangeKernel.
@@ -1152,12 +1156,12 @@ static char* LoadProgSource(const char* Filename, size_t* szFinalLength)
   return cSourceString;
 }
 
-void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) {
-  DEBUG(cout << "Launch PTX Kernel\n");
+void* llvm_visc_ocl_launch(const char* FileName, const char* KernelName) {
+  DEBUG(cout << "Launch OCL Kernel\n");
   // Initialize OpenCL
 
   // OpenCL specific variables
-  DFNodeContext_PTX *Context = (DFNodeContext_PTX *) malloc(sizeof(DFNodeContext_PTX));
+  DFNodeContext_OCL *Context = (DFNodeContext_OCL *) malloc(sizeof(DFNodeContext_OCL));
 
   size_t dataBytes;
   size_t kernelLength;
@@ -1222,9 +1226,9 @@ void* llvm_visc_ptx_launch(const char* FileName, const char* KernelName) {
 }
 
 
-void llvm_visc_ptx_wait(void* graphID) {
+void llvm_visc_ocl_wait(void* graphID) {
   DEBUG(cout << "Wait\n");
-  DFNodeContext_PTX *Context = (DFNodeContext_PTX*) graphID;
+  DFNodeContext_OCL *Context = (DFNodeContext_OCL*) graphID;
   clFinish(Context->clCommandQue);
 }
 
diff --git a/llvm/projects/visc-rt/visc-rt.h b/llvm/projects/visc-rt/visc-rt.h
index db999bd6c5bd91725ac7ba979efbb1b5f9fca847..799e3618b9eccd5ec84e6ef1a345dd7fd20ff4ab 100644
--- a/llvm/projects/visc-rt/visc-rt.h
+++ b/llvm/projects/visc-rt/visc-rt.h
@@ -1,4 +1,5 @@
 /*
+ *
  * (c) 2010 The Board of Trustees of the University of Illinois.
  */
 #ifndef VISC_RT_HEADER
@@ -8,6 +9,7 @@
 #include <map>
 #include <ctime>
 #include "llvm/Support/CommandLine.h"
+#include "llvm/SupportVISC/VISCHint.h"
 #include "llvm/SupportVISC/VISCTimer.h"
 
 using namespace std;
@@ -157,16 +159,16 @@ void* llvm_visc_request_mem(void*, size_t);
 /*********************** OPENCL & PTHREAD API **************************/
 void* llvm_visc_x86_launch(void* (void*), void*);
 void llvm_visc_x86_wait(void*);
-void* llvm_visc_ptx_initContext();
-
-void llvm_visc_ptx_clearContext(void*);
-void llvm_visc_ptx_argument_scalar(void*, void*, int, size_t);
-void* llvm_visc_ptx_argument_ptr(void*, void*, int, size_t, bool, bool);
-void llvm_visc_ptx_free(void*);
-void* llvm_visc_ptx_getOutput(void*, void*, void*, size_t);
-void* llvm_visc_ptx_executeNode(void*, unsigned, const size_t*, const size_t*);
-void* llvm_visc_ptx_launch(const char*, const char*);
-void llvm_visc_ptx_wait(void*);
+void* llvm_visc_ocl_initContext(enum visc::Target);
+
+void llvm_visc_ocl_clearContext(void*);
+void llvm_visc_ocl_argument_scalar(void*, void*, int, size_t);
+void* llvm_visc_ocl_argument_ptr(void*, void*, int, size_t, bool, bool);
+void llvm_visc_ocl_free(void*);
+void* llvm_visc_ocl_getOutput(void*, void*, void*, size_t);
+void* llvm_visc_ocl_executeNode(void*, unsigned, const size_t*, const size_t*);
+void* llvm_visc_ocl_launch(const char*, const char*);
+void llvm_visc_ocl_wait(void*);
 
 void llvm_visc_switchToTimer(void** timerSet, enum visc_TimerID);
 void llvm_visc_printTimerSet(void** timerSet, char* timerName = NULL);