diff --git a/hpvm/docs/compilation.md b/hpvm/docs/compilation.md
index 8e68d00174b6fb63bfb647a0dbee1aa5dbd10b6a..6381fec7d856c79fdd2ed31bc23fe02990c9e38d 100644
--- a/hpvm/docs/compilation.md
+++ b/hpvm/docs/compilation.md
@@ -5,11 +5,11 @@ Compilation of an HPVM program involves the following steps:
 2. `opt` takes (`main.ll`) and invoke the GenHPVM pass on it, which converts the HPVM-C function calls to HPVM intrinsics. This generates the HPVM textual representation (`main.hpvm.ll`).
 3. `opt` takes the HPVM textual representation (`main.hpvm.ll`) and invokes the following passes in sequence: 
     * BuildDFG: Converts the textual representation to the internal HPVM representation.
-    * LocalMem and DFG2LLVM_NVPTX: Invoked only when GPU target is selected. Generates the kernel module (`main.kernels.ll`) and the portion of the host code that invokes the kernel into the host module (`main.host.ll`).
-    * DFG2LLVM_X86: Generates either all, or the remainder of the host module (`main.host.ll`) depending on the chosen target.
+    * LocalMem and DFG2LLVM_OpenCL: Invoked only when GPU target is selected. Generates the kernel module (`main.kernels.ll`) and the portion of the host code that invokes the kernel into the host module (`main.host.ll`).
+    * DFG2LLVM_CPU: Generates either all, or the remainder of the host module (`main.host.ll`) depending on the chosen target.
     * ClearDFG: Deletes the internal HPVM representation from memory.
 4. `clang` is used to to compile any remaining project files that would be later linked with the host module.
 5. `llvm-link` takes the host module and all the other generate `ll` files, and links them with the HPVM runtime module (`hpvm-rt.bc`), to generate the linked host module (`main.host.linked.ll`). 
 6. Generate the executable code from the generated `ll` files for all parts of the program:
     * GPU target: `llvm-cbe` takes the kernel module (`main.kernels.ll`) and generates an OpenCL representation of the kernels that will be invoked by the host.
-    * X86 target: `clang` takes the linked  host module (`main.host.linked.ll`) and generates the X86 binary.
+    * CPU target: `clang` takes the linked  host module (`main.host.linked.ll`) and generates the CPU binary.
diff --git a/hpvm/include/SupportHPVM/DFGraph.h b/hpvm/include/SupportHPVM/DFGraph.h
index d904e2401d7e9a58a38e9bca024de1a437cd56d1..2deb2ca8f5c17620da0ddf60e1ef269acde52235 100644
--- a/hpvm/include/SupportHPVM/DFGraph.h
+++ b/hpvm/include/SupportHPVM/DFGraph.h
@@ -51,11 +51,11 @@ struct TargetGenFunctions {
 };
 
 struct TargetGenFuncInfo {
-  bool cpu_hasX86Func;
-  bool gpu_hasX86Func;
-  bool spir_hasX86Func;
-  bool cudnn_hasX86Func;
-  bool promise_hasX86Func;
+  bool cpu_hasCPUFunc;
+  bool gpu_hasCPUFunc;
+  bool spir_hasCPUFunc;
+  bool cudnn_hasCPUFunc;
+  bool promise_hasCPUFunc;
 };
 
 class DFGraph {
@@ -191,7 +191,7 @@ private:
   ///< (if multiple are available)
   struct TargetGenFuncInfo GenFuncInfo;
   ///< True for each target generated function
-  ///< if the associated genFunc is an x86 function
+  ///< if the associated genFunc is an cpu function
   DFInternalNode *Parent;         ///< Pointer to parent dataflow Node
   unsigned NumOfDim;              ///< Number of dimensions
   std::vector<Value *> DimLimits; ///< Number of instances in each dimension
@@ -349,15 +349,15 @@ public:
 
   Function *getGenFunc() const { return GenFunc; }
 
-  void setHasX86FuncForTarget(hpvm::Target T, bool isX86Func) {
+  void setHasCPUFuncForTarget(hpvm::Target T, bool isCPUFunc) {
     switch (T) {
     case hpvm::None:
       return; // Do nothing.
     case hpvm::CPU_TARGET:
-      GenFuncInfo.cpu_hasX86Func = isX86Func;
+      GenFuncInfo.cpu_hasCPUFunc = isCPUFunc;
       break;
     case hpvm::GPU_TARGET:
-      GenFuncInfo.gpu_hasX86Func = isX86Func;
+      GenFuncInfo.gpu_hasCPUFunc = isCPUFunc;
       break;
     case hpvm::CPU_OR_GPU_TARGET:
       break;
@@ -368,14 +368,14 @@ public:
     return;
   }
 
-  bool hasX86GenFuncForTarget(hpvm::Target T) const {
+  bool hasCPUGenFuncForTarget(hpvm::Target T) const {
     switch (T) {
     case hpvm::None:
       return false;
     case hpvm::CPU_TARGET:
-      return GenFuncInfo.cpu_hasX86Func;
+      return GenFuncInfo.cpu_hasCPUFunc;
     case hpvm::GPU_TARGET:
-      return GenFuncInfo.gpu_hasX86Func;
+      return GenFuncInfo.gpu_hasCPUFunc;
     case hpvm::CPU_OR_GPU_TARGET:
       assert(false && "Single target expected (CPU/GPU/SPIR/CUDNN/PROMISE)\n");
     default:
@@ -384,7 +384,7 @@ public:
     return false;
   }
 
-  void addGenFunc(Function *F, hpvm::Target T, bool isX86Func) {
+  void addGenFunc(Function *F, hpvm::Target T, bool isCPUFunc) {
 
     switch (T) {
     case hpvm::CPU_TARGET:
@@ -393,7 +393,7 @@ public:
                      << FuncPointer->getName() << "\n");
       }
       GenFuncs.CPUGenFunc = F;
-      GenFuncInfo.cpu_hasX86Func = isX86Func;
+      GenFuncInfo.cpu_hasCPUFunc = isCPUFunc;
       break;
     case hpvm::GPU_TARGET:
       if (GenFuncs.GPUGenFunc != NULL) {
@@ -401,7 +401,7 @@ public:
                      << FuncPointer->getName() << "\n");
       }
       GenFuncs.GPUGenFunc = F;
-      GenFuncInfo.gpu_hasX86Func = isX86Func;
+      GenFuncInfo.gpu_hasCPUFunc = isCPUFunc;
       break;
     case hpvm::CPU_OR_GPU_TARGET:
       assert(false && "A node function should be set with a tag specifying its \
@@ -437,11 +437,11 @@ public:
       return;
     case hpvm::CPU_TARGET:
       GenFuncs.CPUGenFunc = NULL;
-      GenFuncInfo.cpu_hasX86Func = false;
+      GenFuncInfo.cpu_hasCPUFunc = false;
       break;
     case hpvm::GPU_TARGET:
       GenFuncs.GPUGenFunc = NULL;
-      GenFuncInfo.gpu_hasX86Func = false;
+      GenFuncInfo.gpu_hasCPUFunc = false;
       break;
     case hpvm::CPU_OR_GPU_TARGET:
       assert(false &&
@@ -690,11 +690,11 @@ DFNode::DFNode(IntrinsicInst *_II, Function *_FuncPointer, hpvm::Target _Hint,
   GenFuncs.CUDNNGenFunc = NULL;
   GenFuncs.PROMISEGenFunc = NULL;
 
-  GenFuncInfo.cpu_hasX86Func = false;
-  GenFuncInfo.gpu_hasX86Func = false;
-  GenFuncInfo.spir_hasX86Func = false;
-  GenFuncInfo.cudnn_hasX86Func = false;
-  GenFuncInfo.cudnn_hasX86Func = false;
+  GenFuncInfo.cpu_hasCPUFunc = false;
+  GenFuncInfo.gpu_hasCPUFunc = false;
+  GenFuncInfo.spir_hasCPUFunc = false;
+  GenFuncInfo.cudnn_hasCPUFunc = false;
+  GenFuncInfo.cudnn_hasCPUFunc = false;
 }
 
 void DFNode::setRank(unsigned r) {
diff --git a/hpvm/lib/Transforms/CMakeLists.txt b/hpvm/lib/Transforms/CMakeLists.txt
index 5c9b8b9fe026ea5612caa124535e02d28d619c53..74917773b04146456b84db9b2bbf0814cd9bf387 100644
--- a/hpvm/lib/Transforms/CMakeLists.txt
+++ b/hpvm/lib/Transforms/CMakeLists.txt
@@ -1,6 +1,6 @@
 add_subdirectory(BuildDFG)
 add_subdirectory(ClearDFG)
-add_subdirectory(DFG2LLVM_NVPTX)
-add_subdirectory(DFG2LLVM_X86)
+add_subdirectory(DFG2LLVM_OpenCL)
+add_subdirectory(DFG2LLVM_CPU)
 add_subdirectory(GenHPVM)
 add_subdirectory(LocalMem)
diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_CPU/CMakeLists.txt
similarity index 79%
rename from hpvm/lib/Transforms/DFG2LLVM_X86/CMakeLists.txt
rename to hpvm/lib/Transforms/DFG2LLVM_CPU/CMakeLists.txt
index 0a3a225f1967dd73d44d1401a2bc45cb8d43ee69..b4e129ba01837cf328912f7787b861f843f4f581 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_X86/CMakeLists.txt
+++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/CMakeLists.txt
@@ -4,9 +4,9 @@ endif()
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLLVM_BUILD_DIR=${PROJECT_BINARY_DIR}")
 
-add_llvm_library( LLVMDFG2LLVM_X86
+add_llvm_library( LLVMDFG2LLVM_CPU
   MODULE
-  DFG2LLVM_X86.cpp
+  DFG2LLVM_CPU.cpp
 
   DEPENDS intrinsics_gen
   PLUGIN_TOOL
diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
similarity index 88%
rename from hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
rename to hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
index 633afee593d01d6a579905cfd0f85f66e3060968..3f9f3101a3b0025a67ff432684163d6b859c6eb8 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
@@ -1,4 +1,4 @@
-//===-------------------------- DFG2LLVM_X86.cpp --------------------------===//
+//===-------------------------- DFG2LLVM_CPU.cpp --------------------------===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -7,7 +7,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "DFG2LLVM_X86"
+#define DEBUG_TYPE "DFG2LLVM_CPU"
 #include "SupportHPVM/DFG2LLVM.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
@@ -34,15 +34,15 @@ using namespace builddfg;
 using namespace dfg2llvm;
 
 // HPVM Command line option to use timer or not
-static cl::opt<bool> HPVMTimer_X86("hpvm-timers-x86",
+static cl::opt<bool> HPVMTimer_CPU("hpvm-timers-cpu",
                                    cl::desc("Enable hpvm timers"));
 
 namespace {
 
-// DFG2LLVM_X86 - The first implementation.
-struct DFG2LLVM_X86 : public DFG2LLVM {
+// DFG2LLVM_CPU - The first implementation.
+struct DFG2LLVM_CPU : public DFG2LLVM {
   static char ID; // Pass identification, replacement for typeid
-  DFG2LLVM_X86() : DFG2LLVM(ID) {}
+  DFG2LLVM_CPU() : DFG2LLVM(ID) {}
 
 private:
   // Member variables
@@ -54,16 +54,16 @@ public:
 };
 
 // Visitor for Code generation traversal (tree traversal for now)
-class CGT_X86 : public CodeGenTraversal {
+class CGT_CPU : public CodeGenTraversal {
 
 private:
   // Member variables
 
   FunctionCallee malloc;
   // HPVM Runtime API
-  FunctionCallee llvm_hpvm_x86_launch;
-  FunctionCallee llvm_hpvm_x86_wait;
-  FunctionCallee llvm_hpvm_x86_argument_ptr;
+  FunctionCallee llvm_hpvm_cpu_launch;
+  FunctionCallee llvm_hpvm_cpu_wait;
+  FunctionCallee llvm_hpvm_cpu_argument_ptr;
 
   FunctionCallee llvm_hpvm_streamLaunch;
   FunctionCallee llvm_hpvm_streamPush;
@@ -76,10 +76,10 @@ private:
   FunctionCallee llvm_hpvm_createThread;
   FunctionCallee llvm_hpvm_bufferPush;
   FunctionCallee llvm_hpvm_bufferPop;
-  FunctionCallee llvm_hpvm_x86_dstack_push;
-  FunctionCallee llvm_hpvm_x86_dstack_pop;
-  FunctionCallee llvm_hpvm_x86_getDimLimit;
-  FunctionCallee llvm_hpvm_x86_getDimInstance;
+  FunctionCallee llvm_hpvm_cpu_dstack_push;
+  FunctionCallee llvm_hpvm_cpu_dstack_pop;
+  FunctionCallee llvm_hpvm_cpu_getDimLimit;
+  FunctionCallee llvm_hpvm_cpu_getDimInstance;
 
   // Functions
   std::vector<IntrinsicInst *> *getUseList(Value *LI);
@@ -87,11 +87,11 @@ private:
   void addWhileLoop(Instruction *, Instruction *, Instruction *, Value *);
   Instruction *addWhileLoopCounter(BasicBlock *, BasicBlock *, BasicBlock *);
   Argument *getArgumentFromEnd(Function *F, unsigned offset);
-  Value *getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86,
+  Value *getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU,
                       Instruction *InsertBefore);
-  void invokeChild_X86(DFNode *C, Function *F_X86, ValueToValueMapTy &VMap,
+  void invokeChild_CPU(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap,
                        Instruction *InsertBefore);
-  void invokeChild_PTX(DFNode *C, Function *F_X86, ValueToValueMapTy &VMap,
+  void invokeChild_PTX(DFNode *C, Function *F_CPU, ValueToValueMapTy &VMap,
                        Instruction *InsertBefore);
   StructType *getArgumentListStructTy(DFNode *);
   Function *createFunctionFilter(DFNode *C);
@@ -102,8 +102,8 @@ private:
 
   // Virtual Functions
   void init() {
-    HPVMTimer = HPVMTimer_X86;
-    TargetName = "X86";
+    HPVMTimer = HPVMTimer_CPU;
+    TargetName = "CPU";
   }
   void initRuntimeAPI();
   void codeGen(DFInternalNode *N);
@@ -113,7 +113,7 @@ private:
 
 public:
   // Constructor
-  CGT_X86(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG) {
+  CGT_CPU(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG) {
     init();
     initRuntimeAPI();
   }
@@ -122,8 +122,8 @@ public:
   void codeGenLaunchStreaming(DFInternalNode *Root);
 };
 
-bool DFG2LLVM_X86::runOnModule(Module &M) {
-  DEBUG(errs() << "\nDFG2LLVM_X86 PASS\n");
+bool DFG2LLVM_CPU::runOnModule(Module &M) {
+  DEBUG(errs() << "\nDFG2LLVM_CPU PASS\n");
 
   // Get the BuildDFG Analysis Results:
   // - Dataflow graph
@@ -136,7 +136,7 @@ bool DFG2LLVM_X86::runOnModule(Module &M) {
   // BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap();
 
   // Visitor for Code Generation Graph Traversal
-  CGT_X86 *CGTVisitor = new CGT_X86(M, DFG);
+  CGT_CPU *CGTVisitor = new CGT_CPU(M, DFG);
 
   // Iterate over all the DFGs and produce code for each one of them
   for (auto &rootNode : Roots) {
@@ -160,7 +160,7 @@ bool DFG2LLVM_X86::runOnModule(Module &M) {
 }
 
 // Initialize the HPVM runtime API. This makes it easier to insert these calls
-void CGT_X86::initRuntimeAPI() {
+void CGT_CPU::initRuntimeAPI() {
 
   // Load Runtime API Module
   SMDiagnostic Err;
@@ -176,10 +176,10 @@ void CGT_X86::initRuntimeAPI() {
     DEBUG(errs() << "Successfully loaded hpvm-rt API module\n");
 
   // Get or insert the global declarations for launch/wait functions
-  DECLARE(llvm_hpvm_x86_launch);
+  DECLARE(llvm_hpvm_cpu_launch);
   DECLARE(malloc);
-  DECLARE(llvm_hpvm_x86_wait);
-  DECLARE(llvm_hpvm_x86_argument_ptr);
+  DECLARE(llvm_hpvm_cpu_wait);
+  DECLARE(llvm_hpvm_cpu_argument_ptr);
   DECLARE(llvm_hpvm_streamLaunch);
   DECLARE(llvm_hpvm_streamPush);
   DECLARE(llvm_hpvm_streamPop);
@@ -191,10 +191,10 @@ void CGT_X86::initRuntimeAPI() {
   DECLARE(llvm_hpvm_createThread);
   DECLARE(llvm_hpvm_bufferPush);
   DECLARE(llvm_hpvm_bufferPop);
-  DECLARE(llvm_hpvm_x86_dstack_push);
-  DECLARE(llvm_hpvm_x86_dstack_pop);
-  DECLARE(llvm_hpvm_x86_getDimLimit);
-  DECLARE(llvm_hpvm_x86_getDimInstance);
+  DECLARE(llvm_hpvm_cpu_dstack_push);
+  DECLARE(llvm_hpvm_cpu_dstack_pop);
+  DECLARE(llvm_hpvm_cpu_getDimLimit);
+  DECLARE(llvm_hpvm_cpu_getDimInstance);
 
   // Get or insert timerAPI functions as well if you plan to use timers
   initTimerAPI();
@@ -202,7 +202,7 @@ void CGT_X86::initRuntimeAPI() {
   // Insert init context in main
   Function *VI = M.getFunction("llvm.hpvm.init");
   assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once");
-  DEBUG(errs() << "Inserting x86 timer initialization\n");
+  DEBUG(errs() << "Inserting cpu timer initialization\n");
   Instruction *I = cast<Instruction>(*VI->user_begin());
   initializeTimerSet(I);
   switchToTimer(hpvm_TimerID_NONE, I);
@@ -210,13 +210,13 @@ void CGT_X86::initRuntimeAPI() {
   Function *VC = M.getFunction("llvm.hpvm.cleanup");
   assert(VC->getNumUses() == 1 && "__hpvm__cleanup should only be used once");
 
-  DEBUG(errs() << "Inserting x86 timer print\n");
+  DEBUG(errs() << "Inserting cpu timer print\n");
   printTimerSet(I);
 }
 
 /* Returns vector of all wait instructions
  */
-std::vector<IntrinsicInst *> *CGT_X86::getUseList(Value *GraphID) {
+std::vector<IntrinsicInst *> *CGT_CPU::getUseList(Value *GraphID) {
   std::vector<IntrinsicInst *> *UseList = new std::vector<IntrinsicInst *>();
   // It must have been loaded from memory somewhere
   for (Value::user_iterator ui = GraphID->user_begin(),
@@ -234,7 +234,7 @@ std::vector<IntrinsicInst *> *CGT_X86::getUseList(Value *GraphID) {
 /* Traverse the function argument list in reverse order to get argument at a
  * distance offset fromt he end of argument list of function F
  */
-Argument *CGT_X86::getArgumentFromEnd(Function *F, unsigned offset) {
+Argument *CGT_CPU::getArgumentFromEnd(Function *F, unsigned offset) {
   assert((F->getFunctionType()->getNumParams() >= offset && offset > 0) &&
          "Invalid offset to access arguments!");
   Function::arg_iterator e = F->arg_end();
@@ -259,7 +259,7 @@ Argument *CGT_X86::getArgumentFromEnd(Function *F, unsigned offset) {
  * which loops over bidy if true and goes to end if false
  * (5) Update phi node of body
  */
-void CGT_X86::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart,
+void CGT_CPU::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart,
                            Instruction *BodyEnd, Value *TerminationCond) {
   BasicBlock *Entry = CondBlockStart->getParent();
   BasicBlock *CondBlock = Entry->splitBasicBlock(CondBlockStart, "condition");
@@ -276,7 +276,7 @@ void CGT_X86::addWhileLoop(Instruction *CondBlockStart, Instruction *BodyStart,
   ReplaceInstWithInst(WhileBody->getTerminator(), UnconditionalBranch);
 }
 
-Instruction *CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond,
+Instruction *CGT_CPU::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond,
                                           BasicBlock *Body) {
   Module *M = Entry->getParent()->getParent();
   Type *Int64Ty = Type::getInt64Ty(M->getContext());
@@ -311,7 +311,7 @@ Instruction *CGT_X86::addWhileLoopCounter(BasicBlock *Entry, BasicBlock *Cond,
  * which loops over bidy if true and goes to end if false
  * (5) Update phi node of body
  */
-Value *CGT_X86::addLoop(Instruction *I, Value *limit, const Twine &indexName) {
+Value *CGT_CPU::addLoop(Instruction *I, Value *limit, const Twine &indexName) {
   BasicBlock *Entry = I->getParent();
   BasicBlock *ForBody = Entry->splitBasicBlock(I, "for.body");
 
@@ -356,7 +356,7 @@ Value *CGT_X86::addLoop(Instruction *I, Value *limit, const Twine &indexName) {
 // types, output types and isLastInput buffer type. All the streaming
 // inputs/outputs are converted to i8*, since this is the type of buffer
 // handles.
-StructType *CGT_X86::getArgumentListStructTy(DFNode *C) {
+StructType *CGT_CPU::getArgumentListStructTy(DFNode *C) {
   std::vector<Type *> TyList;
   // Input types
   Function *CF = C->getFuncPointer();
@@ -384,7 +384,7 @@ StructType *CGT_X86::getArgumentListStructTy(DFNode *C) {
   return STy;
 }
 
-void CGT_X86::startNodeThread(DFNode *C, std::vector<Value *> Args,
+void CGT_CPU::startNodeThread(DFNode *C, std::vector<Value *> Args,
                               DenseMap<DFEdge *, Value *> EdgeBufferMap,
                               Value *isLastInputBuffer, Value *graphID,
                               Instruction *IB) {
@@ -495,7 +495,7 @@ void CGT_X86::startNodeThread(DFNode *C, std::vector<Value *> Args,
                    ArrayRef<Value *>(CreateThreadArgs, 3), "", IB);
 }
 
-Function *CGT_X86::createLaunchFunction(DFInternalNode *N) {
+Function *CGT_CPU::createLaunchFunction(DFInternalNode *N) {
   DEBUG(errs() << "Generating Streaming Launch Function\n");
   // Get Function associated with Node N
   Function *NF = N->getFuncPointer();
@@ -643,7 +643,7 @@ Function *CGT_X86::createLaunchFunction(DFInternalNode *N) {
  * Modify each of the instrinsic in host code
  * Launch, Push, Pop, Wait
  */
-void CGT_X86::codeGenLaunchStreaming(DFInternalNode *Root) {
+void CGT_CPU::codeGenLaunchStreaming(DFInternalNode *Root) {
   IntrinsicInst *LI = Root->getInstruction();
   Function *RootLaunch = createLaunchFunction(Root);
   // Substitute launch intrinsic main
@@ -654,7 +654,7 @@ void CGT_X86::codeGenLaunchStreaming(DFInternalNode *Root) {
       "graph" + Root->getFuncPointer()->getName(), LI);
 
   DEBUG(errs() << *LaunchInst << "\n");
-  // Replace all wait instructions with x86 specific wait instructions
+  // Replace all wait instructions with cpu specific wait instructions
   DEBUG(errs() << "Substitute wait, push, pop intrinsics\n");
   std::vector<IntrinsicInst *> *UseList = getUseList(LI);
   for (unsigned i = 0; i < UseList->size(); ++i) {
@@ -684,7 +684,7 @@ void CGT_X86::codeGenLaunchStreaming(DFInternalNode *Root) {
   }
 }
 
-void CGT_X86::codeGenLaunch(DFInternalNode *Root) {
+void CGT_CPU::codeGenLaunch(DFInternalNode *Root) {
   // TODO: Place an assert to check if the constant passed by launch intrinsic
   // as the number of arguments to DFG is same as the number of arguments of the
   // root of DFG
@@ -725,28 +725,28 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) {
   switchToTimer(hpvm_TimerID_ARG_UNPACK, RI);
 
   DEBUG(errs() << "Created Empty Launch Function\n");
-  // Find the X86 function generated for Root and
-  //  Function* RootF_X86 = Root->getGenFunc();
-  Function *RootF_X86 = Root->getGenFuncForTarget(hpvm::CPU_TARGET);
-  assert(RootF_X86 && "Error: No generated CPU function for Root node\n");
-  assert(Root->hasX86GenFuncForTarget(hpvm::CPU_TARGET) &&
-         "Error: Generated Function for Root node with no x86 wrapper\n");
-
-  // Generate a call to RootF_X86 with null parameters for now
+  // Find the CPU function generated for Root and
+  //  Function* RootF_CPU = Root->getGenFunc();
+  Function *RootF_CPU = Root->getGenFuncForTarget(hpvm::CPU_TARGET);
+  assert(RootF_CPU && "Error: No generated CPU function for Root node\n");
+  assert(Root->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) &&
+         "Error: Generated Function for Root node with no cpu wrapper\n");
+
+  // Generate a call to RootF_CPU with null parameters for now
   std::vector<Value *> Args;
-  for (unsigned i = 0; i < RootF_X86->getFunctionType()->getNumParams(); i++) {
+  for (unsigned i = 0; i < RootF_CPU->getFunctionType()->getNumParams(); i++) {
     Args.push_back(
-        Constant::getNullValue(RootF_X86->getFunctionType()->getParamType(i)));
+        Constant::getNullValue(RootF_CPU->getFunctionType()->getParamType(i)));
   }
   CallInst *CI =
-      CallInst::Create(RootF_X86, Args, RootF_X86->getName() + ".output", RI);
+      CallInst::Create(RootF_CPU, Args, RootF_CPU->getName() + ".output", RI);
 
   // Extract input data from i8* data.addr and patch them to correct argument of
-  // call to RootF_X86. For each argument
+  // call to RootF_CPU. For each argument
   std::vector<Type *> TyList;
   std::vector<std::string> names;
-  for (Function::arg_iterator ai = RootF_X86->arg_begin(),
-                              ae = RootF_X86->arg_end();
+  for (Function::arg_iterator ai = RootF_CPU->arg_begin(),
+                              ae = RootF_CPU->arg_end();
        ai != ae; ++ai) {
     TyList.push_back(ai->getType());
     names.push_back(ai->getName());
@@ -756,19 +756,19 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) {
   for (unsigned i = 0; i < CI->getNumArgOperands(); i++)
     CI->setArgOperand(i, elements[i]);
 
-  // Add timers around Call to RootF_X86 function
+  // Add timers around Call to RootF_CPU function
   switchToTimer(hpvm_TimerID_COMPUTATION, CI);
   switchToTimer(hpvm_TimerID_OUTPUT_PACK, RI);
 
   StructType *RootRetTy =
-      cast<StructType>(RootF_X86->getFunctionType()->getReturnType());
+      cast<StructType>(RootF_CPU->getFunctionType()->getReturnType());
 
   // if Root has non empty return
   if (RootRetTy->getNumElements()) {
     // We can't access the type of the arg struct - build it
     std::vector<Type *> TyList;
-    for (Function::arg_iterator ai = RootF_X86->arg_begin(),
-                                ae = RootF_X86->arg_end();
+    for (Function::arg_iterator ai = RootF_CPU->arg_begin(),
+                                ae = RootF_CPU->arg_end();
          ai != ae; ++ai) {
       TyList.push_back(ai->getType());
     }
@@ -776,7 +776,7 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) {
 
     StructType *ArgStructTy = StructType::create(
         M.getContext(), ArrayRef<Type *>(TyList),
-        (RootF_X86->getName() + ".arg.struct.ty").str(), true);
+        (RootF_CPU->getName() + ".arg.struct.ty").str(), true);
 
     // Cast the data pointer to the type of the arg struct
     CastInst *OutputAddrCast = CastInst::CreatePointerCast(
@@ -816,19 +816,19 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) {
   // Substitute launch intrinsic main
   Value *LaunchInstArgs[] = {AppFunc, LI->getArgOperand(1)};
   CallInst *LaunchInst = CallInst::Create(
-      llvm_hpvm_x86_launch, ArrayRef<Value *>(LaunchInstArgs, 2),
+      llvm_hpvm_cpu_launch, ArrayRef<Value *>(LaunchInstArgs, 2),
       "graph" + Root->getFuncPointer()->getName(), LI);
   // ReplaceInstWithInst(LI, LaunchInst);
 
   DEBUG(errs() << *LaunchInst << "\n");
-  // Replace all wait instructions with x86 specific wait instructions
+  // Replace all wait instructions with cpu specific wait instructions
   std::vector<IntrinsicInst *> *UseList = getUseList(LI);
   for (unsigned i = 0; i < UseList->size(); ++i) {
     IntrinsicInst *II = UseList->at(i);
     CallInst *CI;
     switch (II->getIntrinsicID()) {
     case Intrinsic::hpvm_wait:
-      CI = CallInst::Create(llvm_hpvm_x86_wait, ArrayRef<Value *>(LaunchInst),
+      CI = CallInst::Create(llvm_hpvm_cpu_wait, ArrayRef<Value *>(LaunchInst),
                             "");
       break;
     case Intrinsic::hpvm_push:
@@ -848,7 +848,7 @@ void CGT_X86::codeGenLaunch(DFInternalNode *Root) {
   }
 }
 
-Value *CGT_X86::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86,
+Value *CGT_CPU::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_CPU,
                              Instruction *InsertBefore) {
   // TODO: Assumption is that each input port of a node has just one
   // incoming edge. May change later on.
@@ -863,7 +863,7 @@ Value *CGT_X86::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86,
   // argument from argument list of this internal node
   Value *inputVal;
   if (SrcDF->isEntryNode()) {
-    inputVal = getArgumentAt(ParentF_X86, E->getSourcePosition());
+    inputVal = getArgumentAt(ParentF_CPU, E->getSourcePosition());
     DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n");
   } else {
     // edge is from a sibling
@@ -885,38 +885,38 @@ Value *CGT_X86::getInValueAt(DFNode *Child, unsigned i, Function *ParentF_X86,
   return inputVal;
 }
 
-void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86,
+void CGT_CPU::invokeChild_CPU(DFNode *C, Function *F_CPU,
                               ValueToValueMapTy &VMap, Instruction *IB) {
   Function *CF = C->getFuncPointer();
 
-  //  Function* CF_X86 = C->getGenFunc();
-  Function *CF_X86 = C->getGenFuncForTarget(hpvm::CPU_TARGET);
-  assert(CF_X86 != NULL &&
+  //  Function* CF_CPU = C->getGenFunc();
+  Function *CF_CPU = C->getGenFuncForTarget(hpvm::CPU_TARGET);
+  assert(CF_CPU != NULL &&
          "Found leaf node for which code generation has not happened yet!\n");
-  assert(C->hasX86GenFuncForTarget(hpvm::CPU_TARGET) &&
-         "The generated function to be called from x86 backend is not an x86 "
+  assert(C->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) &&
+         "The generated function to be called from cpu backend is not an cpu "
          "function\n");
-  DEBUG(errs() << "Invoking child node" << CF_X86->getName() << "\n");
+  DEBUG(errs() << "Invoking child node" << CF_CPU->getName() << "\n");
 
   std::vector<Value *> Args;
   // Create argument list to pass to call instruction
   // First find the correct values using the edges
   // The remaing six values are inserted as constants for now.
   for (unsigned i = 0; i < CF->getFunctionType()->getNumParams(); i++) {
-    Args.push_back(getInValueAt(C, i, F_X86, IB));
+    Args.push_back(getInValueAt(C, i, F_CPU, IB));
   }
 
-  Value *I64Zero = ConstantInt::get(Type::getInt64Ty(F_X86->getContext()), 0);
+  Value *I64Zero = ConstantInt::get(Type::getInt64Ty(F_CPU->getContext()), 0);
   for (unsigned j = 0; j < 6; j++)
     Args.push_back(I64Zero);
 
-  DEBUG(errs() << "Gen Function type: " << *CF_X86->getType() << "\n");
+  DEBUG(errs() << "Gen Function type: " << *CF_CPU->getType() << "\n");
   DEBUG(errs() << "Node Function type: " << *CF->getType() << "\n");
   DEBUG(errs() << "Arguments: " << Args.size() << "\n");
 
-  // Call the F_X86 function associated with this node
+  // Call the F_CPU function associated with this node
   CallInst *CI =
-      CallInst::Create(CF_X86, Args, CF_X86->getName() + "_output", IB);
+      CallInst::Create(CF_CPU, Args, CF_CPU->getName() + "_output", IB);
   DEBUG(errs() << *CI << "\n");
   OutputMap[C] = CI;
 
@@ -928,7 +928,7 @@ void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86,
     Value *indexLimit = NULL;
     // Limit can either be a constant or an arguement of the internal node.
     // In case of constant we can use that constant value directly in the
-    // new F_X86 function. In case of an argument, we need to get the mapped
+    // new F_CPU function. In case of an argument, we need to get the mapped
     // value using VMap
     if (isa<Constant>(C->getDimLimits()[j])) {
       indexLimit = C->getDimLimits()[j];
@@ -960,7 +960,7 @@ void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86,
       CI->getArgOperand(numArgs - 6 + 2)  // iZ
   };
 
-  CallInst *Push = CallInst::Create(llvm_hpvm_x86_dstack_push,
+  CallInst *Push = CallInst::Create(llvm_hpvm_cpu_dstack_push,
                                     ArrayRef<Value *>(args, 7), "", CI);
   DEBUG(errs() << "Push on stack: " << *Push << "\n");
   // Insert call to runtime to pop the dim limits and instanceID from the depth
@@ -973,7 +973,7 @@ void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86,
   assert(NextI->getParent() == CI->getParent() &&
          "Next Instruction should also belong to the same basic block!");
 
-  CallInst *Pop = CallInst::Create(llvm_hpvm_x86_dstack_pop, None, "", NextI);
+  CallInst *Pop = CallInst::Create(llvm_hpvm_cpu_dstack_pop, None, "", NextI);
   DEBUG(errs() << "Pop from stack: " << *Pop << "\n");
   DEBUG(errs() << *CI->getParent()->getParent());
 }
@@ -994,7 +994,7 @@ void CGT_X86::invokeChild_X86(DFNode *C, Function *F_X86,
 // Add runtime API calls to push output for each of the streaming outputs
 // Add loop around the basic block, which exits the loop if isLastInput is false
 
-Function *CGT_X86::createFunctionFilter(DFNode *C) {
+Function *CGT_CPU::createFunctionFilter(DFNode *C) {
   DEBUG(errs() << "*********Creating Function filter for "
                << C->getFuncPointer()->getName() << "*****\n");
 
@@ -1160,7 +1160,7 @@ Function *CGT_X86::createFunctionFilter(DFNode *C) {
   return CF_Pipeline;
 }
 
-void CGT_X86::codeGen(DFInternalNode *N) {
+void CGT_CPU::codeGen(DFInternalNode *N) {
   // Check if N is root node and its graph is streaming. We do not do codeGen
   // for Root in such a case
   if (N->isRoot() && N->isChildGraphStreaming())
@@ -1182,7 +1182,7 @@ void CGT_X86::codeGen(DFInternalNode *N) {
   // Sort children in topological order before code generation
   N->getChildGraph()->sortChildren();
 
-  // Only process if all children have a CPU x86 function
+  // Only process if all children have a CPU cpu function
   // Otherwise skip to end
   bool codeGen = true;
   for (DFGraph::children_iterator ci = N->getChildGraph()->begin(),
@@ -1193,8 +1193,8 @@ void CGT_X86::codeGen(DFInternalNode *N) {
     if (C->isDummyNode())
       continue;
 
-    if (!(C->hasX86GenFuncForTarget(hpvm::CPU_TARGET))) {
-      DEBUG(errs() << "No CPU x86 version for child node "
+    if (!(C->hasCPUGenFuncForTarget(hpvm::CPU_TARGET))) {
+      DEBUG(errs() << "No CPU cpu version for child node "
                    << C->getFuncPointer()->getName()
                    << "\n  Skip code gen for parent node "
                    << N->getFuncPointer()->getName() << "\n");
@@ -1206,18 +1206,18 @@ void CGT_X86::codeGen(DFInternalNode *N) {
     Function *F = N->getFuncPointer();
     // Create of clone of F with no instructions. Only the type is the same as F
     // without the extra arguments.
-    Function *F_X86;
+    Function *F_CPU;
 
     // Clone the function, if we are seeing this function for the first time. We
     // only need a clone in terms of type.
     ValueToValueMapTy VMap;
 
     // Create new function with the same type
-    F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(),
+    F_CPU = Function::Create(F->getFunctionType(), F->getLinkage(),
                              F->getName(), &M);
 
     // Loop over the arguments, copying the names of arguments over.
-    Function::arg_iterator dest_iterator = F_X86->arg_begin();
+    Function::arg_iterator dest_iterator = F_CPU->arg_begin();
     for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
          i != e; ++i) {
       dest_iterator->setName(i->getName()); // Copy the name over...
@@ -1226,24 +1226,24 @@ void CGT_X86::codeGen(DFInternalNode *N) {
     }
 
     // Add a basic block to this empty function
-    BasicBlock *BB = BasicBlock::Create(F_X86->getContext(), "entry", F_X86);
+    BasicBlock *BB = BasicBlock::Create(F_CPU->getContext(), "entry", F_CPU);
     ReturnInst *RI = ReturnInst::Create(
-        F_X86->getContext(), UndefValue::get(F_X86->getReturnType()), BB);
+        F_CPU->getContext(), UndefValue::get(F_CPU->getReturnType()), BB);
 
     // Add Index and Dim arguments except for the root node and the child graph
     // of parent node is not streaming
     if (!N->isRoot() && !N->getParent()->isChildGraphStreaming())
-      F_X86 = addIdxDimArgs(F_X86);
+      F_CPU = addIdxDimArgs(F_CPU);
 
-    BB = &*F_X86->begin();
+    BB = &*F_CPU->begin();
     RI = cast<ReturnInst>(BB->getTerminator());
 
     // Add generated function info to DFNode
-    //    N->setGenFunc(F_X86, hpvm::CPU_TARGET);
-    N->addGenFunc(F_X86, hpvm::CPU_TARGET, true);
+    //    N->setGenFunc(F_CPU, hpvm::CPU_TARGET);
+    N->addGenFunc(F_CPU, hpvm::CPU_TARGET, true);
 
     // Loop over the arguments, to create the VMap.
-    dest_iterator = F_X86->arg_begin();
+    dest_iterator = F_CPU->arg_begin();
     for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
          i != e; ++i) {
       // Add mapping and increment dest iterator
@@ -1261,7 +1261,7 @@ void CGT_X86::codeGen(DFInternalNode *N) {
         continue;
 
       // Create calls to CPU function of child node
-      invokeChild_X86(C, F_X86, VMap, RI);
+      invokeChild_CPU(C, F_CPU, VMap, RI);
     }
 
     DEBUG(errs() << "*** Generating epilogue code for the function****\n");
@@ -1270,7 +1270,7 @@ void CGT_X86::codeGen(DFInternalNode *N) {
     DFNode *C = N->getChildGraph()->getExit();
     // Get OutputType of this node
     StructType *OutTy = N->getOutputType();
-    Value *retVal = UndefValue::get(F_X86->getReturnType());
+    Value *retVal = UndefValue::get(F_CPU->getReturnType());
     // Find all the input edges to exit node
     for (unsigned i = 0; i < OutTy->getNumElements(); i++) {
       DEBUG(errs() << "Output Edge " << i << "\n");
@@ -1288,7 +1288,7 @@ void CGT_X86::codeGen(DFInternalNode *N) {
       // argument from argument list of this internal node
       Value *inputVal;
       if (SrcDF->isEntryNode()) {
-        inputVal = getArgumentAt(F_X86, i);
+        inputVal = getArgumentAt(F_CPU, i);
         DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n");
       } else {
         // edge is from a internal node
@@ -1313,14 +1313,14 @@ void CGT_X86::codeGen(DFInternalNode *N) {
     }
     DEBUG(errs() << "Extracted all\n");
     retVal->setName("output");
-    ReturnInst *newRI = ReturnInst::Create(F_X86->getContext(), retVal);
+    ReturnInst *newRI = ReturnInst::Create(F_CPU->getContext(), retVal);
     ReplaceInstWithInst(RI, newRI);
   }
 
   //-------------------------------------------------------------------------//
   // Here, we need to check if this node (N) has more than one versions
   // If so, we query the policy and have a call to each version
-  // If not, we see which version exists, check that it is in fact an x86
+  // If not, we see which version exists, check that it is in fact an cpu
   // function and save it as the CPU_TARGET function
 
   // TODO: hpvm_id per node, so we can use this for id for policies
@@ -1328,16 +1328,16 @@ void CGT_X86::codeGen(DFInternalNode *N) {
   Function *CF = N->getGenFuncForTarget(hpvm::CPU_TARGET);
   Function *GF = N->getGenFuncForTarget(hpvm::GPU_TARGET);
 
-  bool CFx86 = N->hasX86GenFuncForTarget(hpvm::CPU_TARGET);
-  bool GFx86 = N->hasX86GenFuncForTarget(hpvm::GPU_TARGET);
+  bool CFcpu = N->hasCPUGenFuncForTarget(hpvm::CPU_TARGET);
+  bool GFcpu = N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET);
 
   DEBUG(errs() << "Before editing\n");
   DEBUG(errs() << "Node: " << N->getFuncPointer()->getName() << " with tag "
                << N->getTag() << "\n");
   DEBUG(errs() << "CPU Fun: " << (CF ? CF->getName() : "null") << "\n");
-  DEBUG(errs() << "hasx86GenFuncForCPU : " << CFx86 << "\n");
+  DEBUG(errs() << "hascpuGenFuncForCPU : " << CFcpu << "\n");
   DEBUG(errs() << "GPU Fun: " << (GF ? GF->getName() : "null") << "\n");
-  DEBUG(errs() << "hasx86GenFuncForGPU : " << GFx86 << "\n");
+  DEBUG(errs() << "hascpuGenFuncForGPU : " << GFcpu << "\n");
 
   if (N->getTag() == hpvm::None) {
     // No code is available for this node. This (usually) means that this
@@ -1357,15 +1357,15 @@ void CGT_X86::codeGen(DFInternalNode *N) {
     switch (N->getTag()) {
     case hpvm::CPU_TARGET:
       assert(N->getGenFuncForTarget(hpvm::CPU_TARGET) && "");
-      assert(N->hasX86GenFuncForTarget(hpvm::CPU_TARGET) && "");
+      assert(N->hasCPUGenFuncForTarget(hpvm::CPU_TARGET) && "");
       assert(!(N->getGenFuncForTarget(hpvm::GPU_TARGET)) && "");
-      assert(!(N->hasX86GenFuncForTarget(hpvm::GPU_TARGET)) && "");
+      assert(!(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET)) && "");
       break;
     case hpvm::GPU_TARGET:
       assert(!(N->getGenFuncForTarget(hpvm::CPU_TARGET)) && "");
-      assert(!(N->hasX86GenFuncForTarget(hpvm::CPU_TARGET)) && "");
+      assert(!(N->hasCPUGenFuncForTarget(hpvm::CPU_TARGET)) && "");
       assert(N->getGenFuncForTarget(hpvm::GPU_TARGET) && "");
-      assert(N->hasX86GenFuncForTarget(hpvm::GPU_TARGET) && "");
+      assert(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET) && "");
       break;
     default:
       assert(false && "Unreachable: we checked that tag was single target!\n");
@@ -1380,16 +1380,16 @@ void CGT_X86::codeGen(DFInternalNode *N) {
     CF = N->getGenFuncForTarget(hpvm::CPU_TARGET);
     GF = N->getGenFuncForTarget(hpvm::GPU_TARGET);
 
-    CFx86 = N->hasX86GenFuncForTarget(hpvm::CPU_TARGET);
-    GFx86 = N->hasX86GenFuncForTarget(hpvm::GPU_TARGET);
+    CFcpu = N->hasCPUGenFuncForTarget(hpvm::CPU_TARGET);
+    GFcpu = N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET);
 
     DEBUG(errs() << "After editing\n");
     DEBUG(errs() << "Node: " << N->getFuncPointer()->getName() << " with tag "
                  << N->getTag() << "\n");
     DEBUG(errs() << "CPU Fun: " << (CF ? CF->getName() : "null") << "\n");
-    DEBUG(errs() << "hasx86GenFuncForCPU : " << CFx86 << "\n");
+    DEBUG(errs() << "hascpuGenFuncForCPU : " << CFcpu << "\n");
     DEBUG(errs() << "GPU Fun: " << (GF ? GF->getName() : "null") << "\n");
-    DEBUG(errs() << "hasx86GenFuncForGPU : " << GFx86 << "\n");
+    DEBUG(errs() << "hascpuGenFuncForGPU : " << GFcpu << "\n");
 
   } else {
     assert(false && "Multiple tags unsupported!");
@@ -1397,14 +1397,14 @@ void CGT_X86::codeGen(DFInternalNode *N) {
 }
 
 // Code generation for leaf nodes
-void CGT_X86::codeGen(DFLeafNode *N) {
+void CGT_CPU::codeGen(DFLeafNode *N) {
   // Skip code generation if it is a dummy node
   if (N->isDummyNode()) {
     DEBUG(errs() << "Skipping dummy node\n");
     return;
   }
 
-  // At this point, the X86 backend does not support code generation for
+  // At this point, the CPU backend does not support code generation for
   // the case where allocation node is used, so we skip. This means that a
   // CPU version will not be created, and therefore code generation will
   // only succeed if another backend (nvptx or spir) has been invoked to
@@ -1425,9 +1425,9 @@ void CGT_X86::codeGen(DFLeafNode *N) {
 
     switch (N->getTag()) {
     case hpvm::GPU_TARGET:
-      // A leaf node should not have an x86 function for GPU
-      // by design of DFG2LLVM_NVPTX backend
-      assert(!(N->hasX86GenFuncForTarget(hpvm::GPU_TARGET)) &&
+      // A leaf node should not have an cpu function for GPU
+      // by design of DFG2LLVM_OpenCL backend
+      assert(!(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET)) &&
              "Leaf node not expected to have GPU GenFunc");
       break;
     default:
@@ -1448,34 +1448,34 @@ void CGT_X86::codeGen(DFLeafNode *N) {
   Function *F = N->getFuncPointer();
 
   // Clone the function, if we are seeing this function for the first time.
-  Function *F_X86;
+  Function *F_CPU;
   ValueToValueMapTy VMap;
-  F_X86 = CloneFunction(F, VMap);
-  F_X86->removeFromParent();
+  F_CPU = CloneFunction(F, VMap);
+  F_CPU->removeFromParent();
   // Insert the cloned function into the module
-  M.getFunctionList().push_back(F_X86);
+  M.getFunctionList().push_back(F_CPU);
 
   // Add the new argument to the argument list. Add arguments only if the cild
   // graph of parent node is not streaming
   if (!N->getParent()->isChildGraphStreaming())
-    F_X86 = addIdxDimArgs(F_X86);
+    F_CPU = addIdxDimArgs(F_CPU);
 
   // Add generated function info to DFNode
-  //  N->setGenFunc(F_X86, hpvm::CPU_TARGET);
-  N->addGenFunc(F_X86, hpvm::CPU_TARGET, true);
+  //  N->setGenFunc(F_CPU, hpvm::CPU_TARGET);
+  N->addGenFunc(F_CPU, hpvm::CPU_TARGET, true);
 
   // Go through the arguments, and any pointer arguments with in attribute need
-  // to have x86_argument_ptr call to get the x86 ptr of the argument
+  // to have cpu_argument_ptr call to get the cpu ptr of the argument
   // Insert these calls in a new BB which would dominate all other BBs
   // Create new BB
-  BasicBlock *EntryBB = &*F_X86->begin();
+  BasicBlock *EntryBB = &*F_CPU->begin();
   BasicBlock *BB =
-      BasicBlock::Create(M.getContext(), "getHPVMPtrArgs", F_X86, EntryBB);
+      BasicBlock::Create(M.getContext(), "getHPVMPtrArgs", F_CPU, EntryBB);
   BranchInst *Terminator = BranchInst::Create(EntryBB, BB);
   // Insert calls
-  for (Function::arg_iterator ai = F_X86->arg_begin(), ae = F_X86->arg_end();
+  for (Function::arg_iterator ai = F_CPU->arg_begin(), ae = F_CPU->arg_end();
        ai != ae; ++ai) {
-    if (F_X86->getAttributes().hasAttribute(ai->getArgNo() + 1,
+    if (F_CPU->getAttributes().hasAttribute(ai->getArgNo() + 1,
                                             Attribute::In)) {
       assert(ai->getType()->isPointerTy() &&
              "Only pointer arguments can have hpvm in/out attributes ");
@@ -1488,14 +1488,14 @@ void CGT_X86::codeGen(DFLeafNode *N) {
           &*ai, Type::getInt8PtrTy(M.getContext()), ai->getName() + ".i8ptr",
           Terminator);
       Value *ArgPtrCallArgs[] = {BI, size};
-      CallInst::Create(llvm_hpvm_x86_argument_ptr,
+      CallInst::Create(llvm_hpvm_cpu_argument_ptr,
                        ArrayRef<Value *>(ArgPtrCallArgs, 2), "", Terminator);
     }
   }
   DEBUG(errs() << *BB << "\n");
 
   // Go through all the instructions
-  for (inst_iterator i = inst_begin(F_X86), e = inst_end(F_X86); i != e; ++i) {
+  for (inst_iterator i = inst_begin(F_CPU), e = inst_end(F_CPU); i != e; ++i) {
     Instruction *I = &(*i);
     DEBUG(errs() << *I << "\n");
     // Leaf nodes should not contain HPVM graph intrinsics or launch
@@ -1572,19 +1572,19 @@ void CGT_X86::codeGen(DFLeafNode *N) {
                "ID!");
 
         // For immediate ancestor, use the extra argument introduced in
-        // F_X86
+        // F_CPU
         int numParamsF = F->getFunctionType()->getNumParams();
-        int numParamsF_X86 = F_X86->getFunctionType()->getNumParams();
+        int numParamsF_CPU = F_CPU->getFunctionType()->getNumParams();
         assert(
-            (numParamsF_X86 - numParamsF == 6) &&
+            (numParamsF_CPU - numParamsF == 6) &&
             "Difference of arguments between function and its clone is not 6!");
 
         if (parentLevel == 0) {
           // Case when the query is for this node itself
           unsigned offset = 3 + (3 - dim);
-          // Traverse argument list of F_X86 in reverse order to find the
+          // Traverse argument list of F_CPU in reverse order to find the
           // correct index or dim argument.
-          Argument *indexVal = getArgumentFromEnd(F_X86, offset);
+          Argument *indexVal = getArgumentFromEnd(F_CPU, offset);
           assert(indexVal && "Index argument not found. Invalid offset!");
 
           DEBUG(errs() << *II << " replaced with " << *indexVal << "\n");
@@ -1596,7 +1596,7 @@ void CGT_X86::codeGen(DFLeafNode *N) {
           Value *args[] = {
               ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel),
               ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)};
-          CallInst *CI = CallInst::Create(llvm_hpvm_x86_getDimInstance,
+          CallInst *CI = CallInst::Create(llvm_hpvm_cpu_getDimInstance,
                                           ArrayRef<Value *>(args, 2),
                                           "nodeInstanceID", II);
           DEBUG(errs() << *II << " replaced with " << *CI << "\n");
@@ -1630,19 +1630,19 @@ void CGT_X86::codeGen(DFLeafNode *N) {
                "Intrinsic ID!");
 
         // For immediate ancestor, use the extra argument introduced in
-        // F_X86
+        // F_CPU
         int numParamsF = F->getFunctionType()->getNumParams();
-        int numParamsF_X86 = F_X86->getFunctionType()->getNumParams();
+        int numParamsF_CPU = F_CPU->getFunctionType()->getNumParams();
         assert(
-            (numParamsF_X86 - numParamsF == 6) &&
+            (numParamsF_CPU - numParamsF == 6) &&
             "Difference of arguments between function and its clone is not 6!");
 
         if (parentLevel == 0) {
           // Case when the query is for this node itself
           unsigned offset = 3 - dim;
-          // Traverse argument list of F_X86 in reverse order to find the
+          // Traverse argument list of F_CPU in reverse order to find the
           // correct index or dim argument.
-          Argument *limitVal = getArgumentFromEnd(F_X86, offset);
+          Argument *limitVal = getArgumentFromEnd(F_CPU, offset);
           assert(limitVal && "Limit argument not found. Invalid offset!");
 
           DEBUG(errs() << *II << " replaced with " << *limitVal << "\n");
@@ -1654,7 +1654,7 @@ void CGT_X86::codeGen(DFLeafNode *N) {
           Value *args[] = {
               ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel),
               ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)};
-          CallInst *CI = CallInst::Create(llvm_hpvm_x86_getDimLimit,
+          CallInst *CI = CallInst::Create(llvm_hpvm_cpu_getDimLimit,
                                           ArrayRef<Value *>(args, 2),
                                           "numNodeInstances", II);
           DEBUG(errs() << *II << " replaced with " << *CI << "\n");
@@ -1682,13 +1682,13 @@ void CGT_X86::codeGen(DFLeafNode *N) {
     (*i)->eraseFromParent();
   }
 
-  DEBUG(errs() << *F_X86);
+  DEBUG(errs() << *F_CPU);
 }
 
 } // End of namespace
 
-char DFG2LLVM_X86::ID = 0;
-static RegisterPass<DFG2LLVM_X86>
-    X("dfg2llvm-x86", "Dataflow Graph to LLVM for X86 backend",
+char DFG2LLVM_CPU::ID = 0;
+static RegisterPass<DFG2LLVM_CPU>
+    X("dfg2llvm-cpu", "Dataflow Graph to LLVM for CPU backend",
       false /* does not modify the CFG */,
       true /* transformation, not just analysis */);
diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.exports b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.exports
similarity index 100%
rename from hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.exports
rename to hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.exports
diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/LLVMBuild.txt b/hpvm/lib/Transforms/DFG2LLVM_CPU/LLVMBuild.txt
similarity index 87%
rename from hpvm/lib/Transforms/DFG2LLVM_X86/LLVMBuild.txt
rename to hpvm/lib/Transforms/DFG2LLVM_CPU/LLVMBuild.txt
index 1e82065bf06fe059cbd081b42a9f83e37352b703..30ba8a76365d02ca8fcdcb34948442ef89f5755e 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_X86/LLVMBuild.txt
+++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Transforms/DFG2LLVM_X86/LLVMBuild.txt --------------*- Conf -*--===;
+;===- ./lib/Transforms/DFG2LLVM_CPU/LLVMBuild.txt --------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,5 +17,5 @@
 
 [component_0]
 type = Library
-name = DFG2LLVM_X86
+name = DFG2LLVM_CPU
 parent = Transforms
diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/CMakeLists.txt b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt
similarity index 78%
rename from hpvm/lib/Transforms/DFG2LLVM_NVPTX/CMakeLists.txt
rename to hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt
index 832f6334a4bc048992ee545844941f44ef2c8fe0..00c651eaa250fc114f229f30e0cb7c121154ff96 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/CMakeLists.txt
+++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/CMakeLists.txt
@@ -4,9 +4,9 @@ endif()
 
 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DLLVM_BUILD_DIR=${PROJECT_BINARY_DIR}")
 
-add_llvm_library( LLVMDFG2LLVM_NVPTX
+add_llvm_library( LLVMDFG2LLVM_OpenCL
   MODULE
-  DFG2LLVM_NVPTX.cpp
+  DFG2LLVM_OpenCL.cpp
 
   DEPENDS
   intrinsics_gen
diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
similarity index 96%
rename from hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
rename to hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
index d250562043b633aa69b4ac6bf77ba2bf51167093..b3ad2794b94614e9e866933151817942177c2589 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.cpp
@@ -1,4 +1,4 @@
-//=== DFG2LLVM_NVPTX.cpp ===//
+//=== DFG2LLVM_OpenCL.cpp ===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -14,7 +14,7 @@
 #define CONSTANT_ADDRSPACE 4
 #define SHARED_ADDRSPACE 3
 
-#define DEBUG_TYPE "DFG2LLVM_NVPTX"
+#define DEBUG_TYPE "DFG2LLVM_OpenCL"
 #include "SupportHPVM/DFG2LLVM.h"
 #include "SupportHPVM/HPVMTimer.h"
 #include "SupportHPVM/HPVMUtils.h"
@@ -54,8 +54,8 @@ using namespace dfg2llvm;
 using namespace hpvmUtils;
 
 // HPVM Command line option to use timer or not
-static cl::opt<bool> HPVMTimer_NVPTX("hpvm-timers-ptx",
-                                     cl::desc("Enable hpvm timers"));
+static cl::opt<bool> HPVMTimer_OpenCL("hpvm-timers-ptx",
+                                      cl::desc("Enable hpvm timers"));
 
 namespace {
 // Helper class declarations
@@ -149,10 +149,10 @@ static void findIntrinsicInst(Function *, Intrinsic::ID,
 static AtomicRMWInst::BinOp getAtomicOp(Intrinsic::ID);
 static std::string getAtomicOpName(Intrinsic::ID);
 
-// DFG2LLVM_NVPTX - The first implementation.
-struct DFG2LLVM_NVPTX : public DFG2LLVM {
+// DFG2LLVM_OpenCL - The first implementation.
+struct DFG2LLVM_OpenCL : public DFG2LLVM {
   static char ID; // Pass identification, replacement for typeid
-  DFG2LLVM_NVPTX() : DFG2LLVM(ID) {}
+  DFG2LLVM_OpenCL() : DFG2LLVM(ID) {}
 
 private:
 public:
@@ -160,7 +160,7 @@ public:
 };
 
 // Visitor for Code generation traversal (tree traversal for now)
-class CGT_NVPTX : public CodeGenTraversal {
+class CGT_OpenCL : public CodeGenTraversal {
 
 private:
   // Member variables
@@ -194,8 +194,8 @@ private:
 
   // Virtual Functions
   void init() {
-    HPVMTimer = HPVMTimer_NVPTX;
-    TargetName = "NVPTX";
+    HPVMTimer = HPVMTimer_OpenCL;
+    TargetName = "OpenCL";
   }
   void initRuntimeAPI();
   void codeGen(DFInternalNode *N);
@@ -203,7 +203,7 @@ private:
 
 public:
   // Constructor
-  CGT_NVPTX(Module &_M, BuildDFG &_DFG)
+  CGT_OpenCL(Module &_M, BuildDFG &_DFG)
       : CodeGenTraversal(_M, _DFG), KernelM(CloneModule(_M)) {
     init();
     initRuntimeAPI();
@@ -257,7 +257,7 @@ public:
 };
 
 // Initialize the HPVM runtime API. This makes it easier to insert these calls
-void CGT_NVPTX::initRuntimeAPI() {
+void CGT_OpenCL::initRuntimeAPI() {
 
   // Load Runtime API Module
   SMDiagnostic Err;
@@ -289,7 +289,7 @@ void CGT_NVPTX::initRuntimeAPI() {
   initTimerAPI();
 
   // Insert init context in main
-  DEBUG(errs() << "Gen Code to initialize NVPTX Timer\n");
+  DEBUG(errs() << "Gen Code to initialize OpenCL Timer\n");
   Function *VI = M.getFunction("llvm.hpvm.init");
   assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once");
 
@@ -302,7 +302,7 @@ void CGT_NVPTX::initRuntimeAPI() {
   switchToTimer(hpvm_TimerID_NONE, InitCall);
 
   // Insert print instruction at hpvm exit
-  DEBUG(errs() << "Gen Code to print NVPTX Timer\n");
+  DEBUG(errs() << "Gen Code to print OpenCL Timer\n");
   Function *VC = M.getFunction("llvm.hpvm.cleanup");
   DEBUG(errs() << *VC << "\n");
   assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once");
@@ -316,8 +316,8 @@ void CGT_NVPTX::initRuntimeAPI() {
 // used to generate a function to associate with this leaf node. The function
 // is responsible for all the memory allocation/transfer and invoking the
 // kernel call on the device
-void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
-                                   const Twine &FileName) {
+void CGT_OpenCL::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
+                                    const Twine &FileName) {
   // Check if clone already exists. If it does, it means we have visited this
   // function before.
   //  assert(N->getGenFunc() == NULL && "Code already generated for this node");
@@ -338,18 +338,18 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
 
   // Create of clone of F with no instructions. Only the type is the same as F
   // without the extra arguments.
-  Function *F_X86;
+  Function *F_CPU;
 
   // Clone the function, if we are seeing this function for the first time. We
   // only need a clone in terms of type.
   ValueToValueMapTy VMap;
 
   // Create new function with the same type
-  F_X86 =
+  F_CPU =
       Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M);
 
   // Loop over the arguments, copying the names of arguments over.
-  Function::arg_iterator dest_iterator = F_X86->arg_begin();
+  Function::arg_iterator dest_iterator = F_CPU->arg_begin();
   for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
        i != e; ++i) {
     dest_iterator->setName(i->getName()); // Copy the name over...
@@ -358,29 +358,29 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
   }
 
   // Add a basic block to this empty function
-  BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_X86);
+  BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_CPU);
   ReturnInst *RI = ReturnInst::Create(
-      M.getContext(), UndefValue::get(F_X86->getReturnType()), BB);
+      M.getContext(), UndefValue::get(F_CPU->getReturnType()), BB);
 
   // FIXME: Adding Index and Dim arguments are probably not required except
-  // for consistency purpose (DFG2LLVM_X86 does assume that all leaf nodes do
+  // for consistency purpose (DFG2LLVM_CPU does assume that all leaf nodes do
   // have those arguments)
 
   // Add Index and Dim arguments except for the root node
   if (!N->isRoot() && !N->getParent()->isChildGraphStreaming())
-    F_X86 = addIdxDimArgs(F_X86);
+    F_CPU = addIdxDimArgs(F_CPU);
 
-  BB = &*F_X86->begin();
+  BB = &*F_CPU->begin();
   RI = cast<ReturnInst>(BB->getTerminator());
 
   // Add the generated function info to DFNode
-  //  N->setGenFunc(F_X86, hpvm::CPU_TARGET);
-  N->addGenFunc(F_X86, hpvm::GPU_TARGET, true);
-  DEBUG(errs() << "Added GPUGenFunc: " << F_X86->getName() << " for node "
+  //  N->setGenFunc(F_CPU, hpvm::CPU_TARGET);
+  N->addGenFunc(F_CPU, hpvm::GPU_TARGET, true);
+  DEBUG(errs() << "Added GPUGenFunc: " << F_CPU->getName() << " for node "
                << N->getFuncPointer()->getName() << "\n");
 
   // Loop over the arguments, to create the VMap
-  dest_iterator = F_X86->arg_begin();
+  dest_iterator = F_CPU->arg_begin();
   for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
        i != e; ++i) {
     // Add mapping to VMap and increment dest iterator
@@ -435,16 +435,16 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
 
   DEBUG(errs() << "Inserting launch call"
                << "\n");
-  CallInst *NVPTX_Ctx = CallInst::Create(llvm_hpvm_ocl_launch,
-                                         ArrayRef<Value *>(LaunchInstArgs, 2),
-                                         "graph" + KF->getName(), InitCall);
-  DEBUG(errs() << *NVPTX_Ctx << "\n");
-  GraphIDAddr = new GlobalVariable(M, NVPTX_Ctx->getType(), false,
-                                   GlobalValue::CommonLinkage,
-                                   Constant::getNullValue(NVPTX_Ctx->getType()),
-                                   "graph" + KF->getName() + ".addr");
+  CallInst *OpenCL_Ctx = CallInst::Create(llvm_hpvm_ocl_launch,
+                                          ArrayRef<Value *>(LaunchInstArgs, 2),
+                                          "graph" + KF->getName(), InitCall);
+  DEBUG(errs() << *OpenCL_Ctx << "\n");
+  GraphIDAddr = new GlobalVariable(
+      M, OpenCL_Ctx->getType(), false, GlobalValue::CommonLinkage,
+      Constant::getNullValue(OpenCL_Ctx->getType()),
+      "graph" + KF->getName() + ".addr");
   DEBUG(errs() << "Store at: " << *GraphIDAddr << "\n");
-  StoreInst *SI = new StoreInst(NVPTX_Ctx, GraphIDAddr, InitCall);
+  StoreInst *SI = new StoreInst(OpenCL_Ctx, GraphIDAddr, InitCall);
   DEBUG(errs() << *SI << "\n");
   switchToTimer(hpvm_TimerID_NONE, InitCall);
   switchToTimer(hpvm_TimerID_SETUP, RI);
@@ -463,14 +463,14 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
     for(unsigned i=0; i<KF->getFunctionType()->getNumParams(); i++) {
 
       // The kernel object gives us the mapping of arguments from kernel launch
-      // node function (F_X86) to kernel (kernel->KF)
-      Value* inputVal = getArgumentAt(F_X86, K->getInArgMap()[i]);
+      // node function (F_CPU) to kernel (kernel->KF)
+      Value* inputVal = getArgumentAt(F_CPU, K->getInArgMap()[i]);
 
   */
 
   for (auto &InArgMapPair : kernelInArgMap) {
     unsigned i = InArgMapPair.first;
-    Value *inputVal = getArgumentAt(F_X86, InArgMapPair.second);
+    Value *inputVal = getArgumentAt(F_CPU, InArgMapPair.second);
     DEBUG(errs() << "\tArgument " << i << " = " << *inputVal << "\n");
 
     // input value has been obtained.
@@ -504,7 +504,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
       // Assert that the pointer argument size (next argument) is in the map
       assert(kernelInArgMap.find(i + 1) != kernelInArgMap.end());
 
-      Value *inputSize = getArgumentAt(F_X86, kernelInArgMap[i + 1]);
+      Value *inputSize = getArgumentAt(F_CPU, kernelInArgMap[i + 1]);
       assert(
           inputSize->getType() == Type::getInt64Ty(M.getContext()) &&
           "Pointer type input must always be followed by size (integer type)");
@@ -606,7 +606,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
 
     std::vector<Value *> AllocInputArgs;
     for (unsigned i = 0; i < K->allocInArgMap.size(); i++) {
-      AllocInputArgs.push_back(getArgumentAt(F_X86, K->allocInArgMap.at(i)));
+      AllocInputArgs.push_back(getArgumentAt(F_CPU, K->allocInArgMap.at(i)));
     }
 
     CallInst *CI = CallInst::Create(F_alloc, AllocInputArgs, "", RI);
@@ -759,7 +759,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
   DFNode *C = N->getChildGraph()->getExit();
   // Get OutputType of this node
   StructType *OutTy = N->getOutputType();
-  Value *retVal = UndefValue::get(F_X86->getReturnType());
+  Value *retVal = UndefValue::get(F_CPU->getReturnType());
   // Find the kernel's output arg map, to use instead of the bindings
   std::vector<unsigned> outArgMap = kernel->getOutArgMap();
   // Find all the input edges to exit node
@@ -779,7 +779,7 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
     // argument from argument list of this internal node
     Value *inputVal;
     if (SrcDF->isEntryNode()) {
-      inputVal = getArgumentAt(F_X86, i);
+      inputVal = getArgumentAt(F_CPU, i);
       DEBUG(errs() << "Argument " << i << " = " << *inputVal << "\n");
     } else {
       // edge is from a internal node
@@ -812,13 +812,13 @@ void CGT_NVPTX::insertRuntimeCalls(DFInternalNode *N, Kernel *K,
   DEBUG(errs() << "Extracted all\n");
   switchToTimer(hpvm_TimerID_NONE, RI);
   retVal->setName("output");
-  ReturnInst *newRI = ReturnInst::Create(F_X86->getContext(), retVal);
+  ReturnInst *newRI = ReturnInst::Create(F_CPU->getContext(), retVal);
   ReplaceInstWithInst(RI, newRI);
 }
 
 // Right now, only targeting the one level case. In general, device functions
 // can return values so we don't need to change them
-void CGT_NVPTX::codeGen(DFInternalNode *N) {
+void CGT_OpenCL::codeGen(DFInternalNode *N) {
   DEBUG(errs() << "Inside internal node: " << N->getFuncPointer()->getName()
                << "\n");
   if (KernelLaunchNode == NULL)
@@ -910,7 +910,7 @@ void CGT_NVPTX::codeGen(DFInternalNode *N) {
   }
 }
 
-void CGT_NVPTX::codeGen(DFLeafNode *N) {
+void CGT_OpenCL::codeGen(DFLeafNode *N) {
   DEBUG(errs() << "Inside leaf node: " << N->getFuncPointer()->getName()
                << "\n");
 
@@ -1625,7 +1625,7 @@ void CGT_NVPTX::codeGen(DFLeafNode *N) {
     // check that addressspace is 1
     //	  if (GEPIaddrspace != 1) {
     //			// does not fit this pattern - addrspace of pointer
-    //argument is not global 			continue;
+    // argument is not global 			continue;
     //		}
     if (!(GEPI->hasOneUse())) {
       // does not fit this pattern - more than one uses
@@ -1876,8 +1876,8 @@ void CGT_NVPTX::codeGen(DFLeafNode *N) {
   return;
 }
 
-bool DFG2LLVM_NVPTX::runOnModule(Module &M) {
-  DEBUG(errs() << "\nDFG2LLVM_NVPTX PASS\n");
+bool DFG2LLVM_OpenCL::runOnModule(Module &M) {
+  DEBUG(errs() << "\nDFG2LLVM_OpenCL PASS\n");
 
   // Get the BuildDFG Analysis Results:
   // - Dataflow graph
@@ -1891,7 +1891,7 @@ bool DFG2LLVM_NVPTX::runOnModule(Module &M) {
   //    = DFG.getHandleToDFEdgeMap();
 
   // Visitor for Code Generation Graph Traversal
-  CGT_NVPTX *CGTVisitor = new CGT_NVPTX(M, DFG);
+  CGT_OpenCL *CGTVisitor = new CGT_OpenCL(M, DFG);
 
   // Iterate over all the DFGs and produce code for each one of them
   for (auto rootNode : Roots) {
@@ -1907,7 +1907,7 @@ bool DFG2LLVM_NVPTX::runOnModule(Module &M) {
   return true;
 }
 
-std::string CGT_NVPTX::getKernelsModuleName(Module &M) {
+std::string CGT_OpenCL::getKernelsModuleName(Module &M) {
   /*SmallString<128> currentDir;
           llvm::sys::fs::current_path(currentDir);
           std::string fileName = getFilenameFromModule(M);
@@ -1917,7 +1917,7 @@ std::string CGT_NVPTX::getKernelsModuleName(Module &M) {
   return mid.append(".kernels.ll");
 }
 
-void CGT_NVPTX::fixValueAddrspace(Value *V, unsigned addrspace) {
+void CGT_OpenCL::fixValueAddrspace(Value *V, unsigned addrspace) {
   assert(isa<PointerType>(V->getType()) && "Value should be of Pointer Type!");
   PointerType *OldTy = cast<PointerType>(V->getType());
   PointerType *NewTy = PointerType::get(OldTy->getElementType(), addrspace);
@@ -1935,8 +1935,8 @@ void CGT_NVPTX::fixValueAddrspace(Value *V, unsigned addrspace) {
 }
 
 std::vector<unsigned>
-CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs,
-                                     Function *F) {
+CGT_OpenCL::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs,
+                                      Function *F) {
   std::vector<unsigned> ConstantMemArgs;
   for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae;
        ++ai) {
@@ -1959,9 +1959,9 @@ CGT_NVPTX::globalToConstantMemoryOpt(std::vector<unsigned> *GlobalMemArgs,
   return ConstantMemArgs;
 }
 
-Function *CGT_NVPTX::changeArgAddrspace(Function *F,
-                                        std::vector<unsigned> &Args,
-                                        unsigned addrspace) {
+Function *CGT_OpenCL::changeArgAddrspace(Function *F,
+                                         std::vector<unsigned> &Args,
+                                         unsigned addrspace) {
   unsigned idx = 0;
   std::vector<Type *> ArgTypes;
   for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae;
@@ -1986,7 +1986,7 @@ Function *CGT_NVPTX::changeArgAddrspace(Function *F,
 }
 
 /* Add metadata to module KernelM, for OpenCL kernels */
-void CGT_NVPTX::addCLMetadata(Function *F) {
+void CGT_OpenCL::addCLMetadata(Function *F) {
 
   IRBuilder<> Builder(&*F->begin());
 
@@ -2013,7 +2013,7 @@ void CGT_NVPTX::addCLMetadata(Function *F) {
   MDN_annotations->addOperand(MDNvvmAnnotationsNode);
 }
 
-void CGT_NVPTX::writeKernelsModule() {
+void CGT_OpenCL::writeKernelsModule() {
 
   // In addition to deleting all other functions, we also want to spiff it
   // up a little bit.  Do this now.
@@ -2035,7 +2035,7 @@ void CGT_NVPTX::writeKernelsModule() {
   Out.keep();
 }
 
-Function *CGT_NVPTX::transformFunctionToVoid(Function *F) {
+Function *CGT_OpenCL::transformFunctionToVoid(Function *F) {
 
   DEBUG(errs() << "Transforming function to void: " << F->getName() << "\n");
   // FIXME: Maybe do that using the Node?
@@ -2361,7 +2361,7 @@ static std::string getFilenameFromModule(const Module &M) {
   return moduleID.substr(moduleID.find_last_of("/") + 1);
 }
 
-// Changes the data layout of the Module to be compiled with NVPTX backend
+// Changes the data layout of the Module to be compiled with OpenCL backend
 // TODO: Figure out when to call it, probably after duplicating the modules
 static void changeDataLayout(Module &M) {
   std::string nvptx32_layoutStr = "e-p:32:32-i64:64-v16:16-v32:32-n16:32:64";
@@ -2464,9 +2464,9 @@ static std::string getAtomicOpName(Intrinsic::ID ID) {
 
 } // End of namespace
 
-char DFG2LLVM_NVPTX::ID = 0;
-static RegisterPass<DFG2LLVM_NVPTX> X("dfg2llvm-nvptx",
-		"Dataflow Graph to LLVM for NVPTX Pass",
+char DFG2LLVM_OpenCL::ID = 0;
+static RegisterPass<DFG2LLVM_OpenCL> X("dfg2llvm-nvptx",
+		"Dataflow Graph to LLVM for OpenCL Pass",
 		false /* does not modify the CFG */,
 		true /* transformation,   *
 					* not just analysis */);
diff --git a/hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.exports b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.exports
similarity index 100%
rename from hpvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.exports
rename to hpvm/lib/Transforms/DFG2LLVM_OpenCL/DFG2LLVM_OpenCL.exports
diff --git a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/LLVMBuild.txt
similarity index 84%
rename from hpvm/lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt
rename to hpvm/lib/Transforms/DFG2LLVM_OpenCL/LLVMBuild.txt
index fb7cae49f8452ee6f207e6f0ed87d9ea9d3e65e6..08d8b9d98d66c63cb02b4be8395b57c448482906 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt
+++ b/hpvm/lib/Transforms/DFG2LLVM_OpenCL/LLVMBuild.txt
@@ -1,4 +1,4 @@
-;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===;
+;===- ./lib/Transforms/DFG2LLVM_OpenCL/LLVMBuild.txt ------------*- Conf -*--===;
 ;
 ;                     The LLVM Compiler Infrastructure
 ;
@@ -17,5 +17,5 @@
 
 [component_0]
 type = Library
-name = DFG2LLVM_NVPTX
+name = DFG2LLVM_OpenCL
 parent = Transforms
diff --git a/hpvm/projects/hpvm-rt/hpvm-rt.cpp b/hpvm/projects/hpvm-rt/hpvm-rt.cpp
index e0e017c03e017edef7e6c1dfed17ceb8db9d2ba5..b6273ec2cca712469269f68f538ce437e9b062ec 100644
--- a/hpvm/projects/hpvm-rt/hpvm-rt.cpp
+++ b/hpvm/projects/hpvm-rt/hpvm-rt.cpp
@@ -39,7 +39,7 @@ typedef struct {
   std::vector<CircularBuffer<uint64_t> *> *BindOutputBuffers;
   std::vector<CircularBuffer<uint64_t> *> *EdgeBuffers;
   std::vector<CircularBuffer<uint64_t> *> *isLastInputBuffers;
-} DFNodeContext_X86;
+} DFNodeContext_CPU;
 
 typedef struct {
   cl_context clOCLContext;
@@ -212,7 +212,7 @@ static inline void checkErr(cl_int err, cl_int success, const char *name) {
 
 /************************* Depth Stack Routines ***************************/
 
-void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX, uint64_t iX,
+void llvm_hpvm_cpu_dstack_push(unsigned n, uint64_t limitX, uint64_t iX,
                                uint64_t limitY, uint64_t iY, uint64_t limitZ,
                                uint64_t iZ) {
   DEBUG(cout << "Pushing node information on stack:\n");
@@ -226,7 +226,7 @@ void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX, uint64_t iX,
   pthread_mutex_unlock(&ocl_mtx);
 }
 
-void llvm_hpvm_x86_dstack_pop() {
+void llvm_hpvm_cpu_dstack_pop() {
   DEBUG(cout << "Popping from depth stack\n");
   pthread_mutex_lock(&ocl_mtx);
   DStack.pop_back();
@@ -234,7 +234,7 @@ void llvm_hpvm_x86_dstack_pop() {
   pthread_mutex_unlock(&ocl_mtx);
 }
 
-uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim) {
+uint64_t llvm_hpvm_cpu_getDimLimit(unsigned level, unsigned dim) {
   DEBUG(cout << "Request limit for dim " << dim << " of ancestor " << level
              << flush << "\n");
   pthread_mutex_lock(&ocl_mtx);
@@ -246,7 +246,7 @@ uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim) {
   return result;
 }
 
-uint64_t llvm_hpvm_x86_getDimInstance(unsigned level, unsigned dim) {
+uint64_t llvm_hpvm_cpu_getDimInstance(unsigned level, unsigned dim) {
   DEBUG(cout << "Request instance id for dim " << dim << " of ancestor "
              << level << flush << "\n");
   pthread_mutex_lock(&ocl_mtx);
@@ -350,13 +350,13 @@ static void *llvm_hpvm_ocl_request_mem(void *ptr, size_t size,
   return d_input;
 }
 
-void *llvm_hpvm_x86_argument_ptr(void *ptr, size_t size) {
+void *llvm_hpvm_cpu_argument_ptr(void *ptr, size_t size) {
   return llvm_hpvm_request_mem(ptr, size);
 }
 
 void *llvm_hpvm_request_mem(void *ptr, size_t size) {
   pthread_mutex_lock(&ocl_mtx);
-  DEBUG(cout << "[X86] Request memory: " << ptr << flush << "\n");
+  DEBUG(cout << "[CPU] Request memory: " << ptr << flush << "\n");
   MemTrackerEntry *MTE = MTracker.lookup(ptr);
   if (MTE == NULL) {
     cout << "ERROR: Requesting memory not present in Table\n";
@@ -1152,8 +1152,8 @@ void hpvm_DestroyTimerSet(struct hpvm_TimerSet *timers) {
 
 // Launch API for a streaming dataflow graph
 void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) {
-  DFNodeContext_X86 *Context =
-      (DFNodeContext_X86 *)malloc(sizeof(DFNodeContext_X86));
+  DFNodeContext_CPU *Context =
+      (DFNodeContext_CPU *)malloc(sizeof(DFNodeContext_CPU));
 
   Context->threads = new std::vector<pthread_t>();
   Context->ArgInPortSizeMap = new std::map<unsigned, uint64_t>();
@@ -1176,7 +1176,7 @@ void *llvm_hpvm_streamLaunch(void (*LaunchFunc)(void *, void *), void *args) {
 void llvm_hpvm_streamPush(void *graphID, void *args) {
   DEBUG(cout << "StreamPush -- Graph: " << graphID << ", Arguments: " << args
              << flush << "\n");
-  DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID;
   unsigned offset = 0;
   for (unsigned i = 0; i < Ctx->ArgInPortSizeMap->size(); i++) {
     uint64_t element;
@@ -1198,7 +1198,7 @@ void llvm_hpvm_streamPush(void *graphID, void *args) {
 // Pop API for a streaming dataflow graph
 void *llvm_hpvm_streamPop(void *graphID) {
   DEBUG(cout << "StreamPop -- Graph: " << graphID << flush << "\n");
-  DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID;
   unsigned totalBytes = 0;
   for (uint64_t size : *(Ctx->BindOutSizes))
     totalBytes += size;
@@ -1216,7 +1216,7 @@ void *llvm_hpvm_streamPop(void *graphID) {
 // Wait API for a streaming dataflow graph
 void llvm_hpvm_streamWait(void *graphID) {
   DEBUG(cout << "StreamWait -- Graph: " << graphID << flush << "\n");
-  DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID;
   // Push garbage to all other input buffers
   for (unsigned i = 0; i < Ctx->BindInputBuffers->size(); i++) {
     uint64_t element = 0;
@@ -1235,7 +1235,7 @@ void *llvm_hpvm_createBindInBuffer(void *graphID, uint64_t size,
                                    unsigned inArgPort) {
   DEBUG(cout << "Create BindInBuffer -- Graph: " << graphID
              << ", Size: " << size << flush << "\n");
-  DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
   CircularBuffer<uint64_t> *bufferID =
       new CircularBuffer<uint64_t>(BUFFER_SIZE, "BindIn");
   DEBUG(cout << "\tNew Buffer: " << bufferID << flush << "\n");
@@ -1249,7 +1249,7 @@ void *llvm_hpvm_createBindInBuffer(void *graphID, uint64_t size,
 void *llvm_hpvm_createBindOutBuffer(void *graphID, uint64_t size) {
   DEBUG(cout << "Create BindOutBuffer -- Graph: " << graphID
              << ", Size: " << size << flush << "\n");
-  DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
   // Twine name = Twine("Bind.Out.")+Twine(Context->BindOutputBuffers->size());
   CircularBuffer<uint64_t> *bufferID =
       new CircularBuffer<uint64_t>(BUFFER_SIZE, "BindOut");
@@ -1261,7 +1261,7 @@ void *llvm_hpvm_createBindOutBuffer(void *graphID, uint64_t size) {
 void *llvm_hpvm_createEdgeBuffer(void *graphID, uint64_t size) {
   DEBUG(cout << "Create EdgeBuffer -- Graph: " << graphID << ", Size: " << size
              << flush << "\n");
-  DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
   // Twine name = Twine("Edge.")+Twine(Context->EdgeBuffers->size());
   CircularBuffer<uint64_t> *bufferID =
       new CircularBuffer<uint64_t>(BUFFER_SIZE, "Edge");
@@ -1274,7 +1274,7 @@ void *llvm_hpvm_createEdgeBuffer(void *graphID, uint64_t size) {
 void *llvm_hpvm_createLastInputBuffer(void *graphID, uint64_t size) {
   DEBUG(cout << "Create isLastInputBuffer -- Graph: " << graphID
              << ", Size: " << size << flush << "\n");
-  DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
   // Twine name = Twine("isLastInput.")+Twine(Context->EdgeBuffers->size());
   CircularBuffer<uint64_t> *bufferID =
       new CircularBuffer<uint64_t>(BUFFER_SIZE, "LastInput");
@@ -1286,7 +1286,7 @@ void *llvm_hpvm_createLastInputBuffer(void *graphID, uint64_t size) {
 // Free buffers
 void llvm_hpvm_freeBuffers(void *graphID) {
   DEBUG(cout << "Free all buffers -- Graph: " << graphID << flush << "\n");
-  DFNodeContext_X86 *Context = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Context = (DFNodeContext_CPU *)graphID;
   for (CircularBuffer<uint64_t> *bufferID : *(Context->BindInputBuffers))
     delete bufferID;
   for (CircularBuffer<uint64_t> *bufferID : *(Context->BindOutputBuffers))
@@ -1314,7 +1314,7 @@ void llvm_hpvm_createThread(void *graphID, void *(*Func)(void *),
                             void *arguments) {
   DEBUG(cout << "Create Thread -- Graph: " << graphID << ", Func: " << Func
              << ", Args: " << arguments << flush << "\n");
-  DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID;
   int err;
   pthread_t threadID;
   if ((err = pthread_create(&threadID, NULL, Func, arguments)) != 0)
@@ -1326,16 +1326,16 @@ void llvm_hpvm_createThread(void *graphID, void *(*Func)(void *),
 // Wait for thread to finish
 void llvm_hpvm_freeThreads(void *graphID) {
   DEBUG(cout << "Free Threads -- Graph: " << graphID << flush << "\n");
-  DFNodeContext_X86 *Ctx = (DFNodeContext_X86 *)graphID;
+  DFNodeContext_CPU *Ctx = (DFNodeContext_CPU *)graphID;
   for (pthread_t thread : *(Ctx->threads))
     pthread_join(thread, NULL);
 }
 
 /************************ OPENCL & PTHREAD API ********************************/
 
-void *llvm_hpvm_x86_launch(void *(*rootFunc)(void *), void *arguments) {
-  DFNodeContext_X86 *Context =
-      (DFNodeContext_X86 *)malloc(sizeof(DFNodeContext_X86));
+void *llvm_hpvm_cpu_launch(void *(*rootFunc)(void *), void *arguments) {
+  DFNodeContext_CPU *Context =
+      (DFNodeContext_CPU *)malloc(sizeof(DFNodeContext_CPU));
   // int err;
   // if((err = pthread_create(&Context->threadID, NULL, rootFunc, arguments)) !=
   // 0) cout << "Failed to create pthread. Error code = " << err << flush <<
@@ -1344,9 +1344,9 @@ void *llvm_hpvm_x86_launch(void *(*rootFunc)(void *), void *arguments) {
   return Context;
 }
 
-void llvm_hpvm_x86_wait(void *graphID) {
+void llvm_hpvm_cpu_wait(void *graphID) {
   DEBUG(cout << "Waiting for pthread to finish ...\n");
-  // DFNodeContext_X86* Context = (DFNodeContext_X86*) graphID;
+  // DFNodeContext_CPU* Context = (DFNodeContext_CPU*) graphID;
   // pthread_join(Context->threadID, NULL);
   free(graphID);
   DEBUG(cout << "\t... pthread Done!\n");
@@ -1451,8 +1451,7 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) {
     DEBUG(cout << "\tNAME = " << buffer << flush << "\n");
     clGetPlatformInfo(platformId, CL_PLATFORM_VENDOR, 10240, buffer, NULL);
     DEBUG(cout << "\tVENDOR = " << buffer << flush << "\n");
-    clGetPlatformInfo(platformId, CL_PLATFORM_EXTENSIONS, 10240, buffer,
-                      NULL);
+    clGetPlatformInfo(platformId, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
     DEBUG(cout << "\tEXTENSIONS = " << buffer << flush << "\n");
   } else {
     platformId = findPlatform("intel");
@@ -1466,8 +1465,7 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) {
     DEBUG(cout << "\tNAME = " << buffer << flush << "\n");
     clGetPlatformInfo(platformId, CL_PLATFORM_VENDOR, 10240, buffer, NULL);
     DEBUG(cout << "\tVENDOR = " << buffer << flush << "\n");
-    clGetPlatformInfo(platformId, CL_PLATFORM_EXTENSIONS, 10240, buffer,
-                      NULL);
+    clGetPlatformInfo(platformId, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
     DEBUG(cout << "\tEXTENSIONS = " << buffer << flush << "\n");
   }
   DEBUG(cout << "Found plarform with id: " << platformId << "\n");
@@ -1483,7 +1481,7 @@ void *llvm_hpvm_ocl_initContext(enum hpvm::Target T) {
   errcode = clGetContextInfo(globalOCLContext, CL_CONTEXT_DEVICES, 0, NULL,
                              &dataBytes);
   checkErr(errcode, CL_SUCCESS, "Failure to get context info length");
-  
+
   DEBUG(cout << "Got databytes: " << dataBytes << "\n");
 
   clDevices = (cl_device_id *)malloc(dataBytes);
diff --git a/hpvm/projects/hpvm-rt/hpvm-rt.h b/hpvm/projects/hpvm-rt/hpvm-rt.h
index 519b467c9047fbbdeea3a4610bedda3a77c36fe2..94fe5b5ef0d82aca9f7556f7022aa513b9d2cc28 100644
--- a/hpvm/projects/hpvm-rt/hpvm-rt.h
+++ b/hpvm/projects/hpvm-rt/hpvm-rt.h
@@ -64,12 +64,12 @@ public:
   unsigned getNumDim() const { return numDim; }
 };
 
-void llvm_hpvm_x86_dstack_push(unsigned n, uint64_t limitX = 0, uint64_t iX = 0,
+void llvm_hpvm_cpu_dstack_push(unsigned n, uint64_t limitX = 0, uint64_t iX = 0,
                                uint64_t limitY = 0, uint64_t iY = 0,
                                uint64_t limitZ = 0, uint64_t iZ = 0);
-void llvm_hpvm_x86_dstack_pop();
-uint64_t llvm_hpvm_x86_getDimLimit(unsigned level, unsigned dim);
-uint64_t llvm_hpvm_x86_getDimInstance(unsigned level, unsigned dim);
+void llvm_hpvm_cpu_dstack_pop();
+uint64_t llvm_hpvm_cpu_getDimLimit(unsigned level, unsigned dim);
+uint64_t llvm_hpvm_cpu_getDimInstance(unsigned level, unsigned dim);
 
 /********************* Memory Tracker **********************************/
 class MemTrackerEntry {
@@ -148,11 +148,11 @@ void llvm_hpvm_untrack_mem(void *);
 void *llvm_hpvm_request_mem(void *, size_t);
 
 /*********************** OPENCL & PTHREAD API **************************/
-void *llvm_hpvm_x86_launch(void *(void *), void *);
-void llvm_hpvm_x86_wait(void *);
+void *llvm_hpvm_cpu_launch(void *(void *), void *);
+void llvm_hpvm_cpu_wait(void *);
 void *llvm_hpvm_ocl_initContext(enum hpvm::Target);
 
-void *llvm_hpvm_x86_argument_ptr(void *, size_t);
+void *llvm_hpvm_cpu_argument_ptr(void *, size_t);
 
 void llvm_hpvm_ocl_clearContext(void *);
 void llvm_hpvm_ocl_argument_shared(void *, int, size_t);
diff --git a/hpvm/test/benchmarks/hpvm-cava/Makefile b/hpvm/test/benchmarks/hpvm-cava/Makefile
index 07bb7f06c0544dc87c8c4947bf04501e5e410e29..d7caf6688370b137e90ad300830bddd6cb8eacd4 100644
--- a/hpvm/test/benchmarks/hpvm-cava/Makefile
+++ b/hpvm/test/benchmarks/hpvm-cava/Makefile
@@ -61,12 +61,12 @@ TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce
 
 ifeq ($(TARGET),seq)
   DEVICE = CPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-x86
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS += -hpvm-timers-cpu
 else
   DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
 endif
   TESTGEN_OPTFLAGS += -hpvm-timers-gen
 
diff --git a/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk b/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk
index 9e0318600a3a2d43ed60922e2f48e7e23ea290a7..8e3ab8e65856d7a80c8477748c2eccfcf7c78219 100755
--- a/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk
+++ b/hpvm/test/benchmarks/parboil/common/mk/hpvm.mk
@@ -19,10 +19,10 @@ KERNEL_GEN_FLAGS = -O3 -target nvptx64-nvidia-nvcl
 
 ifeq ($(TARGET),seq)
   DEVICE = CPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
 else
   DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
 endif
 
 CFLAGS += -DDEVICE=$(DEVICE)
@@ -30,16 +30,16 @@ CXXFLAGS += -DDEVICE=$(DEVICE)
 
 HOST_LINKFLAGS =
 
-ifeq ($(TIMER),x86)
-  HPVM_OPTFLAGS += -hpvm-timers-x86
+ifeq ($(TIMER),cpu)
+  HPVM_OPTFLAGS += -hpvm-timers-cpu
 else ifeq ($(TIMER),gen)
   TESTGEN_OPTFLAGS += -hpvm-timers-gen
 else ifeq ($(TIMER),no)
 else
   ifeq ($(TARGET),seq)
-    HPVM_OPTFLAGS += -hpvm-timers-x86
+    HPVM_OPTFLAGS += -hpvm-timers-cpu
   else
-    HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx
+    HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
   endif
   TESTGEN_OPTFLAGS += -hpvm-timers-gen
 endif
diff --git a/hpvm/test/benchmarks/pipeline/Makefile b/hpvm/test/benchmarks/pipeline/Makefile
index 7a246a651a06ea67246578371d8797682aea5bfd..36f6a1f9005f3dadcf2a3a97c0ba27d6fb6f0ab2 100644
--- a/hpvm/test/benchmarks/pipeline/Makefile
+++ b/hpvm/test/benchmarks/pipeline/Makefile
@@ -48,12 +48,12 @@ TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce
 
 ifeq ($(TARGET),seq)
   DEVICE = CPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-x86
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS += -hpvm-timers-cpu
 else
   DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
 endif
   TESTGEN_OPTFLAGS += -hpvm-timers-gen
 
diff --git a/hpvm/test/benchmarks/pipeline/src/main.cc b/hpvm/test/benchmarks/pipeline/src/main.cc
index cda1d975a63fc07c174ed57ddef1e72f0973f033..057c13b62745ba618b13b9f2c1443fb41ca45bdb 100644
--- a/hpvm/test/benchmarks/pipeline/src/main.cc
+++ b/hpvm/test/benchmarks/pipeline/src/main.cc
@@ -143,7 +143,7 @@ void packData(struct InStruct *args, float *I, size_t bytesI, float *Is,
  * Need 2D grid, a thread per pixel
  * No use of separable algorithm because we need to do this in one kernel
  * No use of shared memory because
- * - we don't handle it in the X86 pass
+ * - we don't handle it in the CPU pass
  */
 
 #define GAUSSIAN_SIZE 7
@@ -452,7 +452,7 @@ void WrapperComputeZeroCrossings(float *L, size_t bytesL, float *B,
  * Need 2D grid, a thread per pixel
  * No use of separable algorithm because we need to do this in one kernel
  * No use of shared memory because
- * - we don't handle it in the X86 pass
+ * - we don't handle it in the CPU pass
  */
 
 #define SOBEL_SIZE 3
@@ -834,7 +834,7 @@ int main(int argc, char *argv[]) {
   resize(E, out, Size(HEIGHT, WIDTH));
   imshow(input_window, in);
   imshow(output_window, out);
-//  waitKey(0);
+  //  waitKey(0);
 
   struct InStruct *args = (struct InStruct *)malloc(sizeof(InStruct));
   packData(args, (float *)src.data, I_sz, (float *)Is.data, I_sz,
@@ -873,7 +873,7 @@ int main(int argc, char *argv[]) {
         __hpvm__push(DFG, args);
         void *ret = __hpvm__pop(DFG);
         // This is reading the result of the streaming graph
-        size_t framesize =  ((OutStruct *)ret)->ret;
+        size_t framesize = ((OutStruct *)ret)->ret;
 
         llvm_hpvm_request_mem(maxG, bytesMaxG);
         llvm_hpvm_request_mem(E.data, I_sz);
diff --git a/hpvm/test/benchmarks/template/Makefile b/hpvm/test/benchmarks/template/Makefile
index 5524f05286be7fb8bea1aac163f5732e1f31c966..46b1afe95df2f7f3a3f7e3a71b7952d744da5b65 100644
--- a/hpvm/test/benchmarks/template/Makefile
+++ b/hpvm/test/benchmarks/template/Makefile
@@ -52,12 +52,12 @@ TESTGEN_OPTFLAGS = -load LLVMGenHPVM.so -genhpvm -globaldce
 
 ifeq ($(TARGET),seq)
   DEVICE = CPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-x86
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS += -hpvm-timers-cpu
 else
   DEVICE = GPU_TARGET
-  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG
-  HPVM_OPTFLAGS += -hpvm-timers-x86 -hpvm-timers-ptx
+  HPVM_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -load LLVMDFG2LLVM_CPU.so -load LLVMClearDFG.so -localmem -dfg2llvm-nvptx -dfg2llvm-cpu -clearDFG
+  HPVM_OPTFLAGS += -hpvm-timers-cpu -hpvm-timers-ptx
 endif
   TESTGEN_OPTFLAGS += -hpvm-timers-gen
 
diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll
index 451035b21ede68a4796ebd1a0baa3645a77a31ef..e3570bcb664811af5e07539d93d19cf8fc2bcddf 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.atomic.genvisc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
 ; ModuleID = 'ThreeLevel.atomic.ll'
 source_filename = "ThreeLevel.constmem.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll
index ed99bee9f704b3dff96abcbd50982ec64a38c2d5..b08b951800a0871b3eeb14d61246bfd032ad88e4 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.genvisc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
 ; ModuleID = 'ThreeLevel.ll'
 source_filename = "ThreeLevel.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll
index 060608fdc5ae28ff52382fd722e7288c5531874f..b3cb659f9600cbbd4b12e9b1131e5c2f5112eb67 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_NVPTX/ThreeLevel.opt.genvisc.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_NVPTX.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMLocalMem.so -load LLVMDFG2LLVM_OpenCL.so -S -localmem -dfg2llvm-nvptx <  %s | FileCheck %s
 ; ModuleID = 'ThreeLevel.opt.ll'
 source_filename = "ThreeLevel.opt.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll
index 1373d13159ee90421d75a2f16e99e3d4a9a24bdd..f7f943a2550cb6745f0146d97ba6cea5cf5d9a6e 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/CreateNode.dfg.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu <  %s | FileCheck %s
 ; ModuleID = 'CreateNode.ll'
 source_filename = "CreateNode.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -10,9 +10,9 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: i32 @main(
 ; CHECK: call void @llvm.hpvm.init()
-; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
 ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
-; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8*
 
 ; CHECK-LABEL: @PipeRoot_cloned(
 ; CHECK: call i8* @llvm.hpvm.createNode(
@@ -23,12 +23,12 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func_cloned.node
 
 ; CHECK-LABEL: @Func_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned
-; CHECK: call i8* @llvm_hpvm_x86_argument_ptr(
+; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr(
 
 ; CHECK-LABEL: @PipeRoot_cloned.2(
-; CHECK: call void @llvm_hpvm_x86_dstack_push(
+; CHECK: call void @llvm_hpvm_cpu_dstack_push(
 ; CHECK-NEXT: @Func_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
 
 ; CHECK-LABEL: @LaunchDataflowGraph(i8*
 ; call %struct.out.PipeRoot @PipeRoot_cloned.2(
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll
index a60f28a08a3bad2272687169bb1f4778f1bb8b6e..35d63f16ef3ccde72b8827ba63770c1e1afadd9f 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/ThreeLevel.dfg.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu <  %s | FileCheck %s
 ; ModuleID = 'ThreeLevel.ll'
 source_filename = "ThreeLevel.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -13,9 +13,9 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: i32 @main(
 ; CHECK: call void @llvm.hpvm.init()
-; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
 ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
-; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8*
 
 ; CHECK-LABEL: @Func3_cloned(
 ; CHECK: call i8* @llvm.hpvm.createNode2D(
@@ -42,26 +42,26 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node
 
 ; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned
-; CHECK: call i8* @llvm_hpvm_x86_argument_ptr(
+; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr(
 
 ; CHECK-LABEL: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
 ; CHECK-LABEL: for.body1:
 ; CHECK: %index.y = phi i64 [ 0, %for.body ], [ %index.y.inc, %for.body1 ]
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push(
 ; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
 
 ; CHECK-LABEL: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned(
 ; CHECK-LABEL: for.body:
 ; CHECK-NEXT: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ]
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push(
 ; CHECK-NEXT: @Func3_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
 
 ; CHECK-LABEL: @PipeRoot_cloned.4(
-; CHECK: call void @llvm_hpvm_x86_dstack_push(
+; CHECK: call void @llvm_hpvm_cpu_dstack_push(
 ; CHECK-NEXT: @Func2_cloned.3_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
 
 ; CHECK-LABEL: @LaunchDataflowGraph(
 ; CHECK: call %struct.out.PipeRoot @PipeRoot_cloned.4(
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll
index 5ce7a58e2189d1a00806979af6bab0cbe1029852..3f74a190f31e707e17f939d9639814443aef642c 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLaunch.dfg.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu <  %s | FileCheck %s
 ; ModuleID = 'TwoLaunch.ll'
 source_filename = "TwoLaunch.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -11,12 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: i32 @main(
 ; CHECK: call void @llvm.hpvm.init()
-; CHECK: @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK: @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
 ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
-; CHECK: @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph.7, i8*
+; CHECK: @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph.7, i8*
 ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
-; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
-; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8*
 
 ; CHECK-LABEL: @Func2_cloned(
 ; CHECK: call i8* @llvm.hpvm.createNode1D(
@@ -35,30 +35,30 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node
 
 ; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK: call i8* @llvm_hpvm_x86_argument_ptr(
+; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr(
 
 ; CHECK-LABEL: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
 ; CHECK: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ]
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push(
 ; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
 
 ; CHECK-LABEL: @PipeRoot_cloned.3(
-; CHECK: call void @llvm_hpvm_x86_dstack_push(
+; CHECK: call void @llvm_hpvm_cpu_dstack_push(
 ; CHECK-NEXT: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
 
 ; CHECK-LABEL: @LaunchDataflowGraph(i8*
 ; CHECK: call %struct.out.PipeRoot @PipeRoot_cloned.3(
 
 ; CHECK-LABEL: @Func1_cloned.4_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK: @llvm_hpvm_x86_argument_ptr(
+; CHECK: @llvm_hpvm_cpu_argument_ptr(
 
 ; CHECK-LABEL: @Func2_cloned.5_cloned_cloned_cloned_cloned_cloned_cloned(
 ; CHECK: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ]
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push(
 ; CHECK-NEXT: @Func1_cloned.4_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
 
 ; CHECK-LABEL: @LaunchDataflowGraph.7(i8*
 ; call %struct.out.PipeRoot @PipeRoot_cloned.6(
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll
index b218b70fd0e32b6e6222e7a14e88ab3a09f57977..f8ee61f1a70120a4e57bb94e272912083b7b3c1a 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/TwoLevel.dfg.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu <  %s | FileCheck %s
 ; ModuleID = 'TwoLevel.ll'
 source_filename = "TwoLevel.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -11,9 +11,9 @@ target triple = "x86_64-unknown-linux-gnu"
 
 ; CHECK-LABEL: i32 @main(
 ; CHECK: call void @llvm.hpvm.init()
-; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
 ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8* 
-; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8*
 
 ; CHECK-LABEL: @Func2_cloned(
 ; CHECK: call i8* @llvm.hpvm.createNode1D(
@@ -32,19 +32,19 @@ target triple = "x86_64-unknown-linux-gnu"
 ; CHECK-NEXT: call void @llvm.hpvm.bind.output(i8* %Func2_cloned.node
 
 ; CHECK-LABEL: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK: call i8* @llvm_hpvm_x86_argument_ptr(
+; CHECK: call i8* @llvm_hpvm_cpu_argument_ptr(
 
 ; CHECK-LABEL: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
 ; CHECK-LABEL: for.body
 ; CHECK: %index.x = phi i64 [ 0, %entry ], [ %index.x.inc, %for.body ]
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_push(
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_push(
 ; CHECK-NEXT: @Func1_cloned.1_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
 
 ; CHECK-LABEL: @PipeRoot_cloned.3(
-; CHECK: call void @llvm_hpvm_x86_dstack_push(
+; CHECK: call void @llvm_hpvm_cpu_dstack_push(
 ; CHECK-NEXT: @Func2_cloned.2_cloned_cloned_cloned_cloned_cloned_cloned(
-; CHECK-NEXT: call void @llvm_hpvm_x86_dstack_pop()
+; CHECK-NEXT: call void @llvm_hpvm_cpu_dstack_pop()
 
 ; CHECK-LABEL: @LaunchDataflowGraph(i8*
 ; call %struct.out.PipeRoot @PipeRoot_cloned.3(
diff --git a/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll b/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll
index a0f0f6ecfc4b68cbc3f86272fb11cf3702f9b54e..1bfa5f0c0b3eb9237c242d0ba56ee6f17960dfec 100644
--- a/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll
+++ b/hpvm/test/regressionTests/DFG2LLVM_X86/oneLaunchAlloca.dfg.ll
@@ -1,4 +1,4 @@
-; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -S -dfg2llvm-x86 <  %s | FileCheck %s
+; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_CPU.so -S -dfg2llvm-cpu <  %s | FileCheck %s
 ; ModuleID = 'oneLaunchAlloca.ll'
 source_filename = "oneLaunchAlloca.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -13,9 +13,9 @@ declare dso_local void @__hpvm__attributes(i32, ...) local_unnamed_addr #0
 
 ; CHECK-LABEL: i32 @main(
 ; CHECK: call void @llvm.hpvm.init()
-; CHECK: call i8* @llvm_hpvm_x86_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
+; CHECK: call i8* @llvm_hpvm_cpu_launch(i8* (i8*)* @LaunchDataflowGraph, i8*
 ; CHECK-NEXT: call i8* @llvm.hpvm.launch(i8*
-; CHECK-NEXT: call void @llvm_hpvm_x86_wait(i8*
+; CHECK-NEXT: call void @llvm_hpvm_cpu_wait(i8*
 
 ; CHECK-LABEL: @PipeRoot_cloned.1(
 
diff --git a/hpvm/test/unitTests/ThreeLevel.ll b/hpvm/test/unitTests/ThreeLevel.ll
index d8bf050234264e55be6af269e40ab5f2ef36a03b..840a2b5685d33d02584b72d96482fedda9a52fb6 100644
--- a/hpvm/test/unitTests/ThreeLevel.ll
+++ b/hpvm/test/unitTests/ThreeLevel.ll
@@ -1,4 +1,4 @@
-; RUN: opt - load LLVMGenHPVM.so -S -genhpvm < %s
+; RUN: opt -load LLVMGenHPVM.so -S -genhpvm < %s
 ; ModuleID = 'TwoLevel.c'
 source_filename = "TwoLevel.c"
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu"
 define dso_local void @Func1(i32* %In, i64 %Insize, i32* %Out, i64 %Outsize) #0 {
 entry:
   tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32* %Out, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In, i32* %Out, i32 1, i32* %Out) #3
   %0 = load i32, i32* %In, align 4, !tbaa !2
   store i32 %0, i32* %Out, align 4, !tbaa !2
   tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3
diff --git a/hpvm/test/unitTests/TwoLevel.ll b/hpvm/test/unitTests/TwoLevel.ll
index 0289319517b7d6a1f83f7b64d615bcbd72630821..840a2b5685d33d02584b72d96482fedda9a52fb6 100644
--- a/hpvm/test/unitTests/TwoLevel.ll
+++ b/hpvm/test/unitTests/TwoLevel.ll
@@ -10,7 +10,7 @@ target triple = "x86_64-unknown-linux-gnu"
 define dso_local void @Func1(i32* %In, i64 %Insize, i32* %Out, i64 %Outsize) #0 {
 entry:
   tail call void @__hpvm__hint(i32 1) #3
-  tail call void (i32, ...) @__hpvm__attributes(i32 1, i32* %In, i32* %Out, i32 1, i32* %Out) #3
+  tail call void (i32, ...) @__hpvm__attributes(i32 2, i32* %In, i32* %Out, i32 1, i32* %Out) #3
   %0 = load i32, i32* %In, align 4, !tbaa !2
   store i32 %0, i32* %Out, align 4, !tbaa !2
   tail call void (i32, ...) @__hpvm__return(i32 1, i32* %Out) #3