diff --git a/hpvm/lib/Transforms/GenVISC/GenHPVM.cpp b/hpvm/lib/Transforms/GenVISC/GenHPVM.cpp
index a4d9f2c2a44da99750b5915b8197b06793e75fd9..2d62ffda0e483687eebe8e7b5d8bc5d4d4fb9466 100644
--- a/hpvm/lib/Transforms/GenVISC/GenHPVM.cpp
+++ b/hpvm/lib/Transforms/GenVISC/GenHPVM.cpp
@@ -1,4 +1,4 @@
-//=== GenVISC.cpp - Implements "Hierarchical Dataflow Graph Builder Pass" ===//
+//=== GenHPVM.cpp - Implements "Hierarchical Dataflow Graph Builder Pass" ===//
 //
 //                     The LLVM Compiler Infrastructure
 //
@@ -6,841 +6,338 @@
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
+//
+// This pass takes LLVM IR with HPVM-C functions to generate textual representa-
+// -tion for HPVM IR consisting of HPVM intrinsics. Memory-to-register
+// optimization pass is expected to execute prior to execution of this pass.
+//
+//===----------------------------------------------------------------------===//
 
-#define DEBUG_TYPE "genvisc"
-#include "llvm/GenVISC/GenVISC.h"
+#define DEBUG_TYPE "genhpvm"
+#include "GenHPVM/GenHPVM.h"
 
+#include "SupportHPVM/HPVMHint.h"
+#include "SupportHPVM/HPVMUtils.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/IR/CallSite.h"
+#include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Support/SourceMgr.h"
+#include "llvm/IR/Instructions.h"
 #include "llvm/IRReader/IRReader.h"
-#include "llvm/IR/DerivedTypes.h"
-#include "llvm/SupportVISC/VISCHint.h"
-#include "llvm/SupportVISC/VISCUtils.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Support/Debug.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/IR/Instructions.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/SupportVISC/VISCUtils.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+
+#define TIMER(X)                                                               \
+  do {                                                                         \
+    if (HPVMTimer) {                                                           \
+      X;                                                                       \
+    }                                                                          \
+  } while (0)
 
+#ifndef LLVM_BUILD_DIR
+#error LLVM_BUILD_DIR is not defined
+#endif
+
+#define STR_VALUE(X) #X
+#define STRINGIFY(X) STR_VALUE(X)
+#define LLVM_BUILD_DIR_STR STRINGIFY(LLVM_BUILD_DIR)
 
 using namespace llvm;
-using namespace viscUtils;
+using namespace hpvmUtils;
 
+// HPVM Command line option to use timer or not
+static cl::opt<bool> HPVMTimer("hpvm-timers-gen",
+                               cl::desc("Enable GenHPVM timer"));
 
-namespace genvisc {
+namespace genhpvm {
 
 // Helper Functions
 
-static Function* transformReturnTypeToStruct(Function* F);
-static Type* getReturnTypeFromReturnInst(Function* F);
-
-// Check if the dummy function call is a __visc__node call
-#define IS_VISC_CALL(callName) \
-  static bool isVISCCall_##callName(Instruction* I) { \
-    if(!isa<CallInst>(I)) \
-      return false; \
-    CallInst* CI = cast<CallInst>(I); \
-    return (CI->getCalledValue()->stripPointerCasts()->getName()).equals("__visc__"#callName); \
+static inline ConstantInt *getTimerID(Module &, enum hpvm_TimerID);
+static Function *transformReturnTypeToStruct(Function *F);
+static Type *getReturnTypeFromReturnInst(Function *F);
+
+// Check if the dummy function call is a __hpvm__node call
+#define IS_HPVM_CALL(callName)                                                 \
+  static bool isHPVMCall_##callName(Instruction *I) {                          \
+    if (!isa<CallInst>(I))                                                     \
+      return false;                                                            \
+    CallInst *CI = cast<CallInst>(I);                                          \
+    return (CI->getCalledValue()->stripPointerCasts()->getName())              \
+        .equals("__hpvm__" #callName);                                         \
   }
 
-static void ReplaceCallWithIntrinsic(Instruction* I, Intrinsic::ID IntrinsicID, std::vector<Instruction*>* Erase) {
+static void ReplaceCallWithIntrinsic(Instruction *I, Intrinsic::ID IntrinsicID,
+                                     std::vector<Instruction *> *Erase) {
   // Check if the instruction is Call Instruction
   assert(isa<CallInst>(I) && "Expecting CallInst");
-  CallInst* CI = cast<CallInst>(I);
+  CallInst *CI = cast<CallInst>(I);
   DEBUG(errs() << "Found call: " << *CI << "\n");
 
   // Find the correct intrinsic call
-  Module* M = CI->getParent()->getParent()->getParent();
-  Function* F;
-  std::vector<Type*> ArgTypes;
-  std::vector<Value*> args;
-  if(Intrinsic::isOverloaded(IntrinsicID)) {
+  Module *M = CI->getParent()->getParent()->getParent();
+  Function *F;
+  std::vector<Type *> ArgTypes;
+  std::vector<Value *> args;
+  if (Intrinsic::isOverloaded(IntrinsicID)) {
     // This is an overloaded intrinsic. The types must exactly match. Get the
     // argument types
-    for(unsigned i=0; i < CI->getNumArgOperands(); i++) {
+    for (unsigned i = 0; i < CI->getNumArgOperands(); i++) {
       ArgTypes.push_back(CI->getArgOperand(i)->getType());
       args.push_back(CI->getArgOperand(i));
     }
     F = Intrinsic::getDeclaration(M, IntrinsicID, ArgTypes);
     DEBUG(errs() << *F << "\n");
-  }
-  else { // Non-overloaded intrinsic
+  } else { // Non-overloaded intrinsic
     F = Intrinsic::getDeclaration(M, IntrinsicID);
-    FunctionType* FTy = F->getFunctionType();
+    FunctionType *FTy = F->getFunctionType();
     DEBUG(errs() << *F << "\n");
 
     // Create argument list
-    assert(CI->getNumArgOperands() == FTy->getNumParams()
-        && "Number of arguments of call do not match with Intrinsic");
-    for(unsigned i=0; i < CI->getNumArgOperands(); i++) {
-      Value* V = CI->getArgOperand(i);
+    assert(CI->getNumArgOperands() == FTy->getNumParams() &&
+           "Number of arguments of call do not match with Intrinsic");
+    for (unsigned i = 0; i < CI->getNumArgOperands(); i++) {
+      Value *V = CI->getArgOperand(i);
       // Either the type should match or both should be of pointer type
       assert((V->getType() == FTy->getParamType(i) ||
-          (V->getType()->isPointerTy() && FTy->getParamType(i)->isPointerTy()))
-          && "Dummy function call argument does not match with Intrinsic argument!");
+              (V->getType()->isPointerTy() &&
+               FTy->getParamType(i)->isPointerTy())) &&
+             "Dummy function call argument does not match with Intrinsic "
+             "argument!");
       // If the types do not match, then both must be pointer type and pointer
       // cast needs to be performed
-      if(V->getType() != FTy->getParamType(i)) {
+      if (V->getType() != FTy->getParamType(i)) {
         V = CastInst::CreatePointerCast(V, FTy->getParamType(i), "", CI);
       }
       args.push_back(V);
     }
   }
   // Insert call instruction
-  CallInst* Inst = CallInst::Create(F, args, F->getReturnType()->isVoidTy()? "" : CI->getName(), CI);
+  CallInst *Inst = CallInst::Create(
+      F, args, F->getReturnType()->isVoidTy() ? "" : CI->getName(), CI);
 
   DEBUG(errs() << "\tSubstitute with: " << *Inst << "\n");
 
   CI->replaceAllUsesWith(Inst);
   // If the previous instruction needs to be erased, insert it in the vector
   // Erased
-  if(Erase != NULL)
+  if (Erase != NULL)
     Erase->push_back(CI);
 }
 
-IS_VISC_CALL(launch) /* Exists but not required */
-IS_VISC_CALL(edge) /* Exists but not required */
-IS_VISC_CALL(createNodeND)
-//IS_VISC_CALL(createNode)
-//IS_VISC_CALL(createNode1D)
-//IS_VISC_CALL(createNode2D)
-//IS_VISC_CALL(createNode3D)
-IS_VISC_CALL(bindIn)
-IS_VISC_CALL(bindOut)
-IS_VISC_CALL(push)
-IS_VISC_CALL(pop)
-IS_VISC_CALL(getNode)
-IS_VISC_CALL(getParentNode)
-IS_VISC_CALL(barrier)
-IS_VISC_CALL(malloc)
-IS_VISC_CALL(return)
-IS_VISC_CALL(getNodeInstanceID_x)
-IS_VISC_CALL(getNodeInstanceID_y)
-IS_VISC_CALL(getNodeInstanceID_z)
-IS_VISC_CALL(getNumNodeInstances_x)
-IS_VISC_CALL(getNumNodeInstances_y)
-IS_VISC_CALL(getNumNodeInstances_z)
+IS_HPVM_CALL(launch) /* Exists but not required */
+IS_HPVM_CALL(edge)   /* Exists but not required */
+IS_HPVM_CALL(createNodeND)
+IS_HPVM_CALL(bindIn)
+IS_HPVM_CALL(bindOut)
+IS_HPVM_CALL(push)
+IS_HPVM_CALL(pop)
+IS_HPVM_CALL(getNode)
+IS_HPVM_CALL(getParentNode)
+IS_HPVM_CALL(barrier)
+IS_HPVM_CALL(malloc)
+IS_HPVM_CALL(return )
+IS_HPVM_CALL(getNodeInstanceID_x)
+IS_HPVM_CALL(getNodeInstanceID_y)
+IS_HPVM_CALL(getNodeInstanceID_z)
+IS_HPVM_CALL(getNumNodeInstances_x)
+IS_HPVM_CALL(getNumNodeInstances_y)
+IS_HPVM_CALL(getNumNodeInstances_z)
 // Atomics
-IS_VISC_CALL(atomic_cmpxchg)
-IS_VISC_CALL(atomic_add)
-IS_VISC_CALL(atomic_sub)
-IS_VISC_CALL(atomic_xchg)
-IS_VISC_CALL(atomic_inc)
-IS_VISC_CALL(atomic_dec)
-IS_VISC_CALL(atomic_min)
-IS_VISC_CALL(atomic_max)
-IS_VISC_CALL(atomic_umin)
-IS_VISC_CALL(atomic_umax)
-IS_VISC_CALL(atomic_and)
-IS_VISC_CALL(atomic_or)
-IS_VISC_CALL(atomic_xor)
+IS_HPVM_CALL(atomic_add)
+IS_HPVM_CALL(atomic_sub)
+IS_HPVM_CALL(atomic_xchg)
+IS_HPVM_CALL(atomic_min)
+IS_HPVM_CALL(atomic_max)
+IS_HPVM_CALL(atomic_and)
+IS_HPVM_CALL(atomic_or)
+IS_HPVM_CALL(atomic_xor)
 // Misc Fn
-IS_VISC_CALL(floor)
-IS_VISC_CALL(rsqrt)
-IS_VISC_CALL(sqrt)
-IS_VISC_CALL(sin)
-IS_VISC_CALL(cos)
-
-
-IS_VISC_CALL(init)
-IS_VISC_CALL(node)
-IS_VISC_CALL(cleanup)
-IS_VISC_CALL(wait)
-IS_VISC_CALL(trackMemory)
-IS_VISC_CALL(untrackMemory)
-IS_VISC_CALL(requestMemory)
-IS_VISC_CALL(attributes)
-IS_VISC_CALL(hint)
+IS_HPVM_CALL(sin)
+IS_HPVM_CALL(cos)
+
+IS_HPVM_CALL(init)
+IS_HPVM_CALL(cleanup)
+IS_HPVM_CALL(wait)
+IS_HPVM_CALL(trackMemory)
+IS_HPVM_CALL(untrackMemory)
+IS_HPVM_CALL(requestMemory)
+IS_HPVM_CALL(attributes)
+IS_HPVM_CALL(hint)
 
 // Tensor Operators
-IS_VISC_CALL(tensor_mul)
-IS_VISC_CALL(tensor_convolution)
-IS_VISC_CALL(tensor_group_convolution)
-IS_VISC_CALL(tensor_batchnorm)
-IS_VISC_CALL(tensor_add)
-IS_VISC_CALL(tensor_pool_max)
-IS_VISC_CALL(tensor_pool_min)
-IS_VISC_CALL(tensor_pool_mean)
-IS_VISC_CALL(tensor_relu)
-IS_VISC_CALL(tensor_clipped_relu)
-IS_VISC_CALL(tensor_tanh)
-IS_VISC_CALL(tensor_sigmoid)
-IS_VISC_CALL(tensor_softmax)
-
-IS_VISC_CALL(node_id)
-
+IS_HPVM_CALL(tensor_mul)
+IS_HPVM_CALL(tensor_convolution)
+IS_HPVM_CALL(tensor_group_convolution)
+IS_HPVM_CALL(tensor_batchnorm)
+IS_HPVM_CALL(tensor_add)
+IS_HPVM_CALL(tensor_pool_max)
+IS_HPVM_CALL(tensor_pool_min)
+IS_HPVM_CALL(tensor_pool_mean)
+IS_HPVM_CALL(tensor_relu)
+IS_HPVM_CALL(tensor_clipped_relu)
+IS_HPVM_CALL(tensor_tanh)
+IS_HPVM_CALL(tensor_sigmoid)
+IS_HPVM_CALL(tensor_softmax)
 
 // Return the constant integer represented by value V
-static unsigned getNumericValue(Value* V) {
-  assert(isa<ConstantInt>(V)
-         && "Value indicating the number of arguments should be a constant integer");
+static unsigned getNumericValue(Value *V) {
+  assert(isa<ConstantInt>(V) &&
+      "Value indicating the number of arguments should be a constant integer");
   return cast<ConstantInt>(V)->getZExtValue();
 }
 
-
-
-// Add <numArgs> to the argument list of Function <F>. The names for these arguments
-// should be put in the string array <names>. Ideally the length of <names>
-// array should be numArgs. But, even when the length is not numArgs the
-// arguments would be added correctly. The names however would not be as
-// intuitive.
-static Function* addArgs(Function* F, unsigned numArgs, std::string names[]) {
-  if(numArgs == 0) return F; // Return if no arguments are to be added.
-
-  // Create the argument type list with added argument types
-  std::vector<Type*> ArgTypes;
-  for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-      ai != ae; ++ai) {
-    ArgTypes.push_back(ai->getType());
-  }
-  // Adding new arguments to the function argument list, would not change the
-  // function type. We need to change the type of this function to reflect the
-  // added arguments
-  for(unsigned i = 0; i < numArgs; ++i) {
-//    ArgTypes.push_back(Type::getInt32Ty(F->getContext()));
-    ArgTypes.push_back(Type::getInt64Ty(F->getContext()));
-  }
-  FunctionType* newFT = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg());
-
-  // Change the function type
-  Function* newF = cloneFunction(F, newFT, false);
-
-  // Add names to the extra arguments to the Function argument list
-  unsigned numOldArgs = F->getFunctionType()->getNumParams();
-  for(Function::arg_iterator ai = newF->arg_begin(), ae = newF->arg_end();
-      ai != ae; ++ai) {
-    if (ai->getArgNo() < numOldArgs)
-      continue;
-    ai->setName(names[(ai->getArgNo() - numOldArgs) % names->size()]);
-  }
-
-  replaceNodeFunctionInIR(*F->getParent(), F, newF);
-  return newF;
-}
-
-
-// Take the __visc__return instruction and generate code for combining the
+// Take the __hpvm__return instruction and generate code for combining the
 // values being returned into a struct and returning it.
 // The first operand is the number of returned values
-static Value* genCodeForReturn(CallInst* CI) {
-  LLVMContext& Ctx = CI->getContext();
-  assert(isVISCCall_return(CI)
-      && "__visc__return instruction expected!");
+static Value *genCodeForReturn(CallInst *CI) {
+  LLVMContext &Ctx = CI->getContext();
+  assert(isHPVMCall_return(CI) && "__hpvm__return instruction expected!");
 
   // Parse the dummy function call here
-  assert(CI->getNumArgOperands() > 0 && "Too few arguments for __visc_return call!\n");
+  assert(CI->getNumArgOperands() > 0 &&
+         "Too few arguments for __hpvm_return call!\n");
   unsigned numRetVals = getNumericValue(CI->getArgOperand(0));
 
-  assert(CI->getNumArgOperands()-1 == numRetVals &&
-         "Too few arguments for __visc_return call!\n");
+  assert(CI->getNumArgOperands() - 1 == numRetVals &&
+         "Too few arguments for __hpvm_return call!\n");
   DEBUG(errs() << "\tNum of return values = " << numRetVals << "\n");
 
-  std::vector<Type*> ArgTypes;
-  for(unsigned i=1; i < CI->getNumArgOperands(); i++) {
+  std::vector<Type *> ArgTypes;
+  for (unsigned i = 1; i < CI->getNumArgOperands(); i++) {
     ArgTypes.push_back(CI->getArgOperand(i)->getType());
   }
   Twine outTyName = "struct.out." + CI->getParent()->getParent()->getName();
-  StructType* RetTy = StructType::create(Ctx, ArgTypes, outTyName.str(), true);
+  StructType *RetTy = StructType::create(Ctx, ArgTypes, outTyName.str(), true);
 
-  InsertValueInst* IV = InsertValueInst::Create(UndefValue::get(RetTy),
-                                                CI->getArgOperand(1),
-                                                0,
-                                                "returnStruct",
-                                                CI);
+  InsertValueInst *IV = InsertValueInst::Create(
+      UndefValue::get(RetTy), CI->getArgOperand(1), 0, "returnStruct", CI);
   DEBUG(errs() << "Code generation for return:\n");
   DEBUG(errs() << *IV << "\n");
 
-  for(unsigned i=2; i < CI->getNumArgOperands(); i++) {
-    IV = InsertValueInst::Create(IV,
-                                 CI->getArgOperand(i),
-                                 i-1,
-                                 IV->getName(),
+  for (unsigned i = 2; i < CI->getNumArgOperands(); i++) {
+    IV = InsertValueInst::Create(IV, CI->getArgOperand(i), i - 1, IV->getName(),
                                  CI);
     DEBUG(errs() << *IV << "\n");
   }
-  
-  return IV;
-}
 
-// The visc launch intrinsic requires all the input parameters to the kernel
-// function be placed in contiguous memory and pointer to that input be passed
-// as the second argument to the launch intrinsic. This generates code to bring
-// together all the input and dimension arguments in one packed struct
-// <InStruct>. First pack the arguments to the kernel function and then add the
-// dimension arguments depending on the hierarchy of DFG user wants to generate.
-static void marshallArguments(unsigned levels, unsigned numArgs, unsigned argOffset, unsigned numDims, unsigned dimOffset, Value* InStruct, CallInst* CI, Function* KernelF) {
-  DEBUG(errs() << "Kernel Function = " << KernelF->getName() << "\n");
-
-  // Get module context and i32 0 constant, as they would be frequently used in
-  // this function.
-  LLVMContext& Ctx = CI->getParent()->getContext();
-  Constant* IntZero = ConstantInt::get(Type::getInt32Ty(Ctx), 0);
-
-  // Find the arguments to be passed to kernel function and pack them in a
-  // struct. Specifically first generate a GEP instruction to find the correct
-  // memory location in InStruct and then generate Store instruction to store
-  // the argument in that location.
-  Function::arg_iterator ai = KernelF->arg_begin();
-  Function::arg_iterator ae = KernelF->arg_end();
-
-  for(unsigned i = 0; i < numArgs && ai != ae; i++, ai++) {
-    Value* arg = CI->getArgOperand(i+argOffset);
-    DEBUG(errs() << "Argument: " << ai->getName() << "\n");
-    DEBUG(errs() << "Passing: " << *arg << "\n");
-    // Create constant int (i)
-    Constant* Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i);
-    // Get Element pointer instruction
-    Value* GEPIndices[] = { IntZero, Int_i };
-    GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, InStruct,
-                             ArrayRef<Value*>(GEPIndices, 2),
-                             InStruct->getName()+"."+ai->getName(),
-                             CI);
-    // Store instruction
-    if(GEP->getType()->getPointerElementType() != arg->getType()) {
-      // Arguments type might not match with the kernel function definition
-      // One reason might be because of default argument promotions, where all
-      // arguments of type float are always promoted to double and types char,
-      // short int are promoted to int.
-      // LLVM 4.0 also promotes pointers to i8*. In case both are pointer types,
-      // we just issue a warning and cast it to appropriate type
-      if(arg->getType() == Type::getDoubleTy(Ctx)) {
-        DEBUG(errs() << "Cast from " << *arg->getType() << " To " <<
-            *GEP->getType()->getPointerElementType() << "\n");
-        CastInst* CastI = BitCastInst::CreateFPCast(arg,
-            GEP->getType()->getPointerElementType(), GEP->getName()+".cast",
-            CI);
-        new StoreInst(CastI, GEP, CI);
-      } else if (arg->getType() == Type::getInt32Ty(Ctx)) {
-        CastInst* CastI = BitCastInst::CreateIntegerCast(arg,
-            GEP->getType()->getPointerElementType(), false,
-            GEP->getName()+".cast", CI);
-        new StoreInst(CastI, GEP, CI);
-      } else if (arg->getType()->isPointerTy() && GEP->getType()->getPointerElementType()->isPointerTy()) {
-        errs() << "WARNING: Argument type mismatch between kernel and __visc__node call. Forcing cast\n";
-        CastInst* CastI = CastInst::CreatePointerCast(arg,
-            GEP->getType()->getPointerElementType(), GEP->getName()+".cast",
-            CI);
-        new StoreInst(CastI, GEP, CI);
-      } else {
-        errs() << "Error: Mismatch in argument types\n";
-        errs() << "__visc__node call: " << *CI << "\n";
-        errs() << "Argument: " << *arg << "\n";
-        errs() << "Expected: " << *ai << "\n";
-        llvm_unreachable("Mismatch in argument types of kernel function and __visc__node call");
-      }
-    } else {
-      new StoreInst(arg, GEP, CI);
-    }
-  }
-
-  // Based on the hierarchy of the DFG we want, we need to pass the dimension
-  // for each level. The number of dimensions we need to pass to the launch
-  // intrinsic is the product of the number of levels and dimesions at each
-  // level.
-  // Marshall dim arguments
-  DEBUG(errs() << *CI << "\n");
-  std::string names[] = {"dimX", "dimY", "dimZ"};
-  for(unsigned i=0; i< numDims*levels; i++) {
-    Value* arg = CI->getArgOperand(i+dimOffset);
-    DEBUG(errs() << "Passing: " << *arg << "\n");
-    // Create constant int (i)
-    Constant* Int_i = ConstantInt::get(Type::getInt32Ty(Ctx), i+numArgs);
-    // Get Element pointer instruction
-    Value* GEPIndices[] = { IntZero, Int_i };
-    GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, InStruct,
-                             ArrayRef<Value*>(GEPIndices, 2),
-                             InStruct->getName()+"."+names[i%numDims]+Twine(i/levels),
-                             CI);
-    // Store instruction
-    DEBUG(errs() << *arg << " " << *GEP << "\n");
-    StoreInst* SI = new StoreInst(arg, GEP, CI);
-    DEBUG(errs() << *SI << "\n");
-
-  }
-}
-
-// Returns vector of all wait instructions, waiting on the passed graphID value
-static std::vector<CallInst*>* getWaitList(Value* GraphID) {
-  DEBUG(errs() << "Getting Uses of: " << *GraphID << "\n");
-  std::vector<CallInst*>* WaitList = new std::vector<CallInst*>();
-  // It must have been loaded from memory somewhere
-  for(Value::user_iterator ui = GraphID->user_begin(),
-      ue = GraphID->user_end(); ui!=ue; ++ui) {
-    if(CallInst* waitI = dyn_cast<CallInst>(*ui)) {
-      DEBUG(errs() << "Use: " << *waitI << "\n");
-      assert(isVISCCall_wait(waitI)
-             && "GraphID can only be used by __visc__wait call");
-      WaitList->push_back(waitI);
-    }
-    //else if (PHINode* PN = dyn_cast<PHINode>(*ui)){
-      //errs() << "Found PhiNode use of graphID\n";
-      //std::vector<CallInst*>* phiWaitList  = getWaitList(PN);
-      //WaitList->insert(WaitList->end(), phiWaitList->begin(), phiWaitList->end());
-      //free(phiWaitList);
-    //}
-    else {
-      DEBUG(errs() << *(*ui) << "\n");
-      llvm_unreachable("Error: Operation on Graph ID not supported!\n");
-    }
-  }
-  return WaitList;
+  return IV;
 }
 
 // Analyse the attribute call for this function. Add the in and out
 // attributes to pointer parameters.
-static void handleVISCAttributes(Function* F, CallInst* CI) {
-  DEBUG(errs() << "Kernel before adding In/Out VISC attributes:\n" << *F << "\n");
+static void handleHPVMAttributes(Function *F, CallInst *CI) {
+  DEBUG(errs() << "Kernel before adding In/Out HPVM attributes:\n"
+               << *F << "\n");
   // Parse the dummy function call here
   unsigned offset = 0;
   // Find number of In pointers
-  assert(CI->getNumArgOperands() > offset
-         && "Too few arguments for __visc__attributes call!");
+  assert(CI->getNumArgOperands() > offset &&
+         "Too few arguments for __hpvm__attributes call!");
   unsigned numInPtrs = getNumericValue(CI->getArgOperand(offset));
   DEBUG(errs() << "\tNum of in pointers = " << numInPtrs << "\n");
 
-  for(unsigned i = offset+1; i< offset+1+numInPtrs; i++) {
-    Value* V = CI->getArgOperand(i);
-    if(Argument* arg = dyn_cast<Argument>(V)) {
-      F->addAttribute(1+arg->getArgNo(), Attribute::In);
-    }
-    else {
-      errs() << "Invalid argument to __visc__attribute: " << *V << "\n";
-      llvm_unreachable("Only pointer arguments can be passed to __visc__attributes call");
+  for (unsigned i = offset + 1; i < offset + 1 + numInPtrs; i++) {
+    Value *V = CI->getArgOperand(i);
+    if (Argument *arg = dyn_cast<Argument>(V)) {
+      F->addAttribute(1 + arg->getArgNo(), Attribute::In);
+    } else {
+      DEBUG(errs() << "Invalid argument to __hpvm__attribute: " << *V << "\n");
+      llvm_unreachable(
+          "Only pointer arguments can be passed to __hpvm__attributes call");
     }
   }
   // Find number of Out Pointers
   offset += 1 + numInPtrs;
-  assert(CI->getNumArgOperands() > offset
-         && "Too few arguments for __visc__attributes call!");
+  assert(CI->getNumArgOperands() > offset &&
+         "Too few arguments for __hpvm__attributes call!");
   unsigned numOutPtrs = getNumericValue(CI->getOperand(offset));
   DEBUG(errs() << "\tNum of out Pointers = " << numOutPtrs << "\n");
-  for(unsigned i = offset+1; i< offset+1+numOutPtrs; i++) {
-    Value* V = CI->getArgOperand(i);
-    if(Argument* arg = dyn_cast<Argument>(V)) {
-      F->addAttribute(1+arg->getArgNo(), Attribute::Out);
-    }
-    else {
-      errs() << "Invalid argument to __visc__attribute: " << *V << "\n";
-      llvm_unreachable("Only pointer arguments can be passed to __visc__attributes call");
+  for (unsigned i = offset + 1; i < offset + 1 + numOutPtrs; i++) {
+    Value *V = CI->getArgOperand(i);
+    if (Argument *arg = dyn_cast<Argument>(V)) {
+      F->addAttribute(1 + arg->getArgNo(), Attribute::Out);
+    } else {
+      DEBUG(errs() << "Invalid argument to __hpvm__attribute: " << *V << "\n");
+      llvm_unreachable(
+          "Only pointer arguments can be passed to __hpvm__attributes call");
     }
   }
-  DEBUG(errs() << "Kernel after adding In/Out VISC attributes:\n" << *F << "\n");
+  DEBUG(errs() << "Kernel after adding In/Out HPVM attributes:\n"
+               << *F << "\n");
 }
 
-// Recursively generate internal nodes for all the levels. Node at each level
-// will create the appropriate instances of the child node at that level using
-// the visc createNode intrinsic, and pass on the remaining dimensions to the
-// child node.
-static Function* genInternalNode(Function* KernelF, unsigned level,
-                                 unsigned numArgs, unsigned numDims, unsigned dimOffset, CallInst* CI) {
-  // Create new function with the same type
-  Module* module = KernelF->getParent();
-  Function* ChildNodeF;
-
-  // Recursively generate node for lower level
-  if(level > 1) {
-    ChildNodeF = genInternalNode(KernelF, level-1, numArgs, numDims, dimOffset, CI);
-    addHint(ChildNodeF, getPreferredTarget(KernelF));
-//    Internal nodes always get a CPU hint. If code geneation for them is not
-//     needed and can be skipped, this is handled by the accelerator backends
-//    addHint(ChildNodeF, visc::CPU_TARGET);
-  } else {
-    ChildNodeF = KernelF;
-  }
-
-  // Generate Internal node for current level
-  Function* InternalF = Function::Create(ChildNodeF->getFunctionType(),
-                                         ChildNodeF->getLinkage(),
-                                         KernelF->getName()+"Internal_level"+Twine(level),
-                                         module);
-  // Create a basic block in this function
-  BasicBlock *BB = BasicBlock::Create(InternalF->getContext(), "entry", InternalF);
-  ReturnInst* RI = ReturnInst::Create(InternalF->getContext(),
-                                      UndefValue::get(InternalF->getReturnType()), BB);
-  // Copy correct attributes
-  InternalF->setAttributes(ChildNodeF->getAttributes());
-  // Loop over the arguments, copying the names of arguments over.
-  Function::arg_iterator dest_iterator = InternalF->arg_begin();
-  for (Function::const_arg_iterator i = ChildNodeF->arg_begin(), e = ChildNodeF->arg_end();
-       i != e; ++i, ++dest_iterator) {
-    DEBUG(errs() << "Copying argument: " << i->getName() << "\n");
-    dest_iterator->setName(i->getName()); // Copy the name over...
-    DEBUG(errs() << "New Argument: " << *dest_iterator << "\n");
-  }
-
-  // Add extra dimesnion arguments
-  std::string dimNames[] = {"dimX", "dimY", "dimZ"};
-  DEBUG(errs() << "Adding extra args to function Function:\n" << *InternalF << "\n");
-  InternalF = addArgs(InternalF, numDims, dimNames);
-  // update RI
-  RI = cast<ReturnInst>(InternalF->getEntryBlock().getTerminator());
-  DEBUG(errs() << "After Adding extra args to function Function:\n" << *InternalF << "\n");
-
-  // Insert createNode intrinsic
-  // First generate constant expression to bitcast the function pointer to
-  // internal node to i8*
-  Value* NodeF = ConstantExpr::getPointerCast(ChildNodeF, Type::getInt8PtrTy(module->getContext()));
-
-  // Use args vectors to get the arguments for visc createNode
-  // intrinsic
-  std::vector<Value*> args;
-
-  // Push the i8* pointer to internal node into the args vector
-  args.push_back(NodeF);
-
-  // Traverse the argument list of internal node function in reverse to get the
-  // dimesnions to be used to create instances of child node at this level
-  Function::arg_iterator ai = InternalF->arg_end();
-  for(unsigned i=0; i<numDims; i++, ai--);
-  DEBUG(errs() << "Iterator at: " << *ai << "\n");
-
-  // ai now points to the first dimension argument to be passed to the
-  // createNode intrinsic. Follow it to push the dim argument into
-  // the args vector
-  for(unsigned i=0; i < numDims; i++, ai++) {
-    args.push_back(&*ai);
-  }
-
-  // Based on the number of dimensions choose the appropriate visc createNode
-  // intrinsic
-  DEBUG(errs() << "Number of dims = " << numDims << "\n");
-  Intrinsic::ID createNodeXD;
-  switch(numDims) {
-  case 0:
-    createNodeXD = Intrinsic::visc_createNode;
-    break;
-  case 1:
-    createNodeXD = Intrinsic::visc_createNode1D;
-    break;
-  case 2:
-    createNodeXD = Intrinsic::visc_createNode2D;
-    break;
-  case 3:
-    createNodeXD = Intrinsic::visc_createNode3D;
-    break;
-  default:
-    llvm_unreachable("Invalid number of dimensions!");
-    break;
-  };
-
-  // Generate the visc createNode intrinsic, using the args vector as parameter
-  Function* CreateNodeF = Intrinsic::getDeclaration(module, createNodeXD);
-  DEBUG(errs() << "Function chosen:\n" << *CreateNodeF << "\n");
-  CallInst *CreateNodeCall = CallInst::Create(CreateNodeF, args, ChildNodeF->getName()+".node", RI);
-  DEBUG(errs() << "Generate call: " << *CreateNodeCall << "\n");
-
-  // Generate Bind intrinsics
-  Function* bindInputF = Intrinsic::getDeclaration(module, Intrinsic::visc_bind_input);
-  DEBUG(errs() << "Generating input binding:\n" << *bindInputF << "\n");
-  for(unsigned i=0; i < ChildNodeF->getArgumentList().size(); i++) {
-    std::vector<Value*> bindArgs;
-    bindArgs.push_back(CreateNodeCall);
-    bindArgs.push_back(ConstantInt::get(Type::getInt32Ty(module->getContext()), i));
-    bindArgs.push_back(ConstantInt::get(Type::getInt32Ty(module->getContext()), i));
-    bindArgs.push_back(ConstantInt::getFalse(module->getContext()));
-    CallInst* bindInputCall = CallInst::Create(bindInputF, bindArgs, "", RI);
-    DEBUG(errs() << *bindInputCall << "\n");
-  }
-
-  // Print the generated internal node for debugging
-  DEBUG(errs() << "Generated Function:\n" << *InternalF << "\n");
-
-  return InternalF;
-}
-
-// Change the OpenCL query function calls with visc intrinsics in function F.
-static void replaceOpenCLCallsWithVISCIntrinsics(Function *F) {
-  Module* module = F->getParent();
-  std::vector<CallInst *> IItoRemove;
-
-  // Get first instruction
-  inst_iterator i = inst_begin(F);
-  Instruction *FI = &(*i);
-
-  // Insert getNode intrinsic
-  Intrinsic::ID getNodeID = Intrinsic::visc_getNode;
-  Function* GetNodeF = Intrinsic::getDeclaration(module, getNodeID);
-  std::vector<Value*> args;
-  CallInst *GetNodeCall = CallInst::Create(GetNodeF, args, F->getName()+".node", FI);
-  DEBUG(errs() << "Generate getNode intrinsic: " << *GetNodeCall << "\n");
-
-  // Insert getParentNode intrinsic
-  Intrinsic::ID getParentNodeID = Intrinsic::visc_getParentNode;
-  Function* GetParentNodeF = Intrinsic::getDeclaration(module, getParentNodeID);
-  args.push_back(GetNodeCall);
-  CallInst *GetParentNodeCall = CallInst::Create(GetParentNodeF, args, F->getName()+".parentNode", FI);
-  DEBUG(errs() << "Generate getParentNode intrinsic: " << *GetParentNodeCall << "\n");
-
-  // Iterate through all instructions
-  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
-    Instruction *I = &(*i);
-    CallInst *CI;
-
-    // Find OpenCL function calls
-    if ((CI = dyn_cast<CallInst>(I))) {
-      if ((CI->getCalledValue()->stripPointerCasts()->getName()).equals("get_global_id")) {
-        DEBUG(errs() << "Found get_global_id call: " << *CI << "\n");
-        CallSite OpenCLCallSite(CI);
-        Value *arg0 = OpenCLCallSite.getArgument(0);
-        // Find the intrinsic function to be called
-        unsigned dim = getNumericValue(arg0);
-        Intrinsic::ID getNodeInstanceID;
-        Intrinsic::ID getNumNodeInstancesID;
-        switch (dim) {
-        case 0:
-          getNodeInstanceID = Intrinsic::visc_getNodeInstanceID_x;
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_x;
-          break;
-        case 1:
-          getNodeInstanceID = Intrinsic::visc_getNodeInstanceID_y;
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_y;
-          break;
-        case 2:
-          getNodeInstanceID = Intrinsic::visc_getNodeInstanceID_z;
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_z;
-          break;
-        default:
-          assert(false && "Invalid dimension from valid OpenCL source!");
-          break;
-        }
-
-
-        // Creating getNodeInstanceID intrinsic for parent node
-        ArrayRef<Value *> Args0(GetParentNodeCall);
-        Function* GetNodeInstanceIDF = Intrinsic::getDeclaration(module, getNodeInstanceID);
-        CallInst* ParentIDIntrinsic = CallInst::Create(GetNodeInstanceIDF, Args0, "", CI);
-
-        // Creating getNumNodeInstances intrinsic for this node
-        ArrayRef<Value *> Args1(GetNodeCall);
-        Function* GetNumNodeInstancesF = Intrinsic::getDeclaration(module, getNumNodeInstancesID);
-        CallInst* InstancesIntrinsic = CallInst::Create(GetNumNodeInstancesF, Args1, "", CI);
-        // Creating mul instruction
-        BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul,
-                                  ParentIDIntrinsic,
-                                  InstancesIntrinsic,
-                                  "", CI);
-        // Creating getNodeInstanceID intrinsic for this node
-        CallInst* LocalIDIntrinsic = CallInst::Create(GetNodeInstanceIDF, Args1, "", CI);
-        // Creating add instruction
-        BinaryOperator* AddInst = BinaryOperator::Create(Instruction::Add,
-                                  MulInst,
-                                  LocalIDIntrinsic,
-                                  "", CI);
-        CI->replaceAllUsesWith(AddInst);
-        IItoRemove.push_back(CI);
-      }
-      if ((CI->getCalledValue()->stripPointerCasts()->getName()).equals("get_local_id")) {
-        DEBUG(errs() << "Found get_local_id call: " << *CI << "\n");
-        // Value *arg0 = CI->getOperand(0);
-        CallSite OpenCLCallSite(CI);
-        Value *arg0 = OpenCLCallSite.getArgument(0);
-
-        // Argument of the function to be called
-        ArrayRef<Value *> Args(GetNodeCall);
-
-        // Find the intrinsic function to be called
-        unsigned dim = getNumericValue(arg0);
-        Intrinsic::ID getNodeInstanceID;
-        switch (dim) {
-        case 0:
-          getNodeInstanceID = Intrinsic::visc_getNodeInstanceID_x;
-          break;
-        case 1:
-          getNodeInstanceID = Intrinsic::visc_getNodeInstanceID_y;
-          break;
-        case 2:
-          getNodeInstanceID = Intrinsic::visc_getNodeInstanceID_z;
-          break;
-        default:
-          assert(false && "Invalid dimension from valid OpenCL source!");
-          break;
-        }
-        Function* GetNodeInstanceIDF = Intrinsic::getDeclaration(module, getNodeInstanceID);
-        CallInst* VI = CallInst::Create(GetNodeInstanceIDF, Args, "", CI);
-        CI->replaceAllUsesWith(VI);
-        IItoRemove.push_back(CI);
-      }
-      if ((CI->getCalledValue()->stripPointerCasts()->getName()).equals("get_group_id")) {
-        DEBUG(errs() << "Found get_group_id call: " << *CI << "\n");
-        // Value *arg0 = CI->getOperand(0);
-        CallSite OpenCLCallSite(CI);
-        Value *arg0 = OpenCLCallSite.getArgument(0);
-
-        // Argument of the function to be called
-        ArrayRef<Value *> Args(GetParentNodeCall);
-
-        // Find the intrinsic function to be called
-        unsigned dim = getNumericValue(arg0);
-        Intrinsic::ID getNodeInstanceID;
-        switch (dim) {
-        case 0:
-          getNodeInstanceID = Intrinsic::visc_getNodeInstanceID_x;
-          break;
-        case 1:
-          getNodeInstanceID = Intrinsic::visc_getNodeInstanceID_y;
-          break;
-        case 2:
-          getNodeInstanceID = Intrinsic::visc_getNodeInstanceID_z;
-          break;
-        default:
-          assert(false && "Invalid dimension from valid OpenCL source!");
-          break;
-        }
-        Function* GetNodeInstanceIDF = Intrinsic::getDeclaration(module, getNodeInstanceID);
-        CallInst* VI = CallInst::Create(GetNodeInstanceIDF, Args, "", CI);
-        CI->replaceAllUsesWith(VI);
-        IItoRemove.push_back(CI);
-      }
-      if ((CI->getCalledValue()->stripPointerCasts()->getName()).equals("get_global_size")) {
-        DEBUG(errs() << "Found get_global_size call: " << *CI << "\n");
-        CallSite OpenCLCallSite(CI);
-        Value *arg0 = OpenCLCallSite.getArgument(0);
-        // Find the intrinsic function to be called
-        unsigned dim = getNumericValue(arg0);
-        Intrinsic::ID getNumNodeInstancesID;
-        switch (dim) {
-        case 0:
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_x;
-          break;
-        case 1:
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_y;
-          break;
-        case 2:
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_z;
-          break;
-        default:
-          assert(false && "Invalid dimension from valid OpenCL source!");
-          break;
-        }
-
+// Public Functions of GenHPVM pass
+bool GenHPVM::runOnModule(Module &M) {
+  DEBUG(errs() << "\nGENHPVM PASS\n");
+  this->M = &M;
 
-        // Creating getNumNodeInstances intrinsic for parent node
-        ArrayRef<Value *> Args0(GetParentNodeCall);
-        Function* GetNumNodeInstancesF = Intrinsic::getDeclaration(module, getNumNodeInstancesID);
-        CallInst* ParentInstancesIntrinsic = CallInst::Create(GetNumNodeInstancesF, Args0, "", CI);
-        // Creating getNumNodeInstances intrinsic for this node
-        ArrayRef<Value *> Args1(GetNodeCall);
-        CallInst* InstancesIntrinsic = CallInst::Create(GetNumNodeInstancesF, Args1, "", CI);
-        // Creating mul instruction
-        BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul,
-                                  ParentInstancesIntrinsic,
-                                  InstancesIntrinsic,
-                                  "", CI);
-        CI->replaceAllUsesWith(MulInst);
-        IItoRemove.push_back(CI);
+  // Load Runtime API Module
+  SMDiagnostic Err;
 
-      }
-      if ((CI->getCalledValue()->stripPointerCasts()->getName()).equals("get_local_size")) {
-        DEBUG(errs() << "Found get_local_size call: " << *CI << "\n");
-        CallSite OpenCLCallSite(CI);
-        Value *arg0 = OpenCLCallSite.getArgument(0);
-
-        // Argument of the function to be called
-        ArrayRef<Value *> Args(GetNodeCall);
-
-        // Find the intrinsic function to be called
-        unsigned dim = getNumericValue(arg0);
-        Intrinsic::ID getNumNodeInstancesID;
-        switch (dim) {
-        case 0:
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_x;
-          break;
-        case 1:
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_y;
-          break;
-        case 2:
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_z;
-          break;
-        default:
-          assert(false && "Invalid dimension from valid OpenCL source!");
-          break;
-        }
-        Function* GetNumNodeInstancesF = Intrinsic::getDeclaration(module, getNumNodeInstancesID);
-        CallInst* VI = CallInst::Create(GetNumNodeInstancesF, Args, "", CI);
-        CI->replaceAllUsesWith(VI);
-        IItoRemove.push_back(CI);
-      }
-      if ((CI->getCalledValue()->stripPointerCasts()->getName()).equals("get_num_groups")) {
-        DEBUG(errs() << "Found get_num_groups call: " << *CI << "\n");
-        CallSite OpenCLCallSite(CI);
-        Value *arg0 = OpenCLCallSite.getArgument(0);
-
-        // Argument of the function to be called
-        ArrayRef<Value *> Args(GetParentNodeCall);
-
-        // Find the intrinsic function to be called
-        unsigned dim = getNumericValue(arg0);
-        Intrinsic::ID getNumNodeInstancesID;
-        switch (dim) {
-        case 0:
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_x;
-          break;
-        case 1:
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_y;
-          break;
-        case 2:
-          getNumNodeInstancesID = Intrinsic::visc_getNumNodeInstances_z;
-          break;
-        default:
-          assert(false && "Invalid dimension from valid OpenCL source!");
-          break;
-        }
-        Function* GetNumNodeInstancesF = Intrinsic::getDeclaration(module, getNumNodeInstancesID);
-        CallInst* VI = CallInst::Create(GetNumNodeInstancesF, Args, "", CI);
-        CI->replaceAllUsesWith(VI);
-        IItoRemove.push_back(CI);
-      }
-    }
-  }
+  //std::string runtimeAPI = std::string(LLVM_BUILD_DIR_STR) +
+    //                       "/tools/hpvm/projects/hpvm-rt/hpvm-rt.bc";
 
-  for (std::vector<CallInst *>::reverse_iterator ri = IItoRemove.rbegin(),
-       re = IItoRemove.rend(); ri != re; ++ri)
-    (*ri)->eraseFromParent();
+  //std::unique_ptr<Module> runtimeModule =
+  //    parseIRFile(runtimeAPI, Err, M.getContext());
 
-}
+  //if (runtimeModule == NULL) {
+    //DEBUG(errs() << Err.getMessage() << " " << runtimeAPI << "\n");
+   // assert(false && "couldn't parse runtime");
+  //} else
+   // DEBUG(errs() << "Successfully loaded hpvm-rt API module\n");
 
+  //llvm_hpvm_initializeTimerSet = M.getOrInsertFunction(
+    //  "llvm_hpvm_initializeTimerSet",
+     // runtimeModule->getFunction("llvm_hpvm_initializeTimerSet")
+       //   ->getFunctionType());
+  // DEBUG(errs() << *llvm_hpvm_initializeTimerSet);
 
-// Public Functions of GenVISC pass
-bool GenVISC::runOnModule(Module &M) {
-  errs() << "\nGENVISC PASS\n";
-  this->M = &M;
+  //llvm_hpvm_switchToTimer = M.getOrInsertFunction(
+    //  "llvm_hpvm_switchToTimer",
+     // runtimeModule->getFunction("llvm_hpvm_switchToTimer")->getFunctionType());
+  // DEBUG(errs() << *llvm_hpvm_switchToTimer);
 
-  // Load Runtime API Module
-  SMDiagnostic Err;
+  //llvm_hpvm_printTimerSet = M.getOrInsertFunction(
+    //  "llvm_hpvm_printTimerSet",
+     // runtimeModule->getFunction("llvm_hpvm_printTimerSet")->getFunctionType());
+  // DEBUG(errs() << *llvm_hpvm_printTimerSet);
 
   // Insert init context in main
-  DEBUG(errs() << "Locate __visc__init()\n");
-  Function* VI = M.getFunction("__visc__init");
-  assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
-  Instruction* I = cast<Instruction>(*VI->user_begin());
-
-  // Insert print instruction at visc exit
-  DEBUG(errs() << "Locate __visc__cleanup()\n");
-  Function* VC = M.getFunction("__visc__cleanup");
-  assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once");
+  DEBUG(errs() << "Locate __hpvm__init()\n");
+  Function *VI = M.getFunction("__hpvm__init");
+  assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once");
+  Instruction *I = cast<Instruction>(*VI->user_begin());
+
+  //DEBUG(errs() << "Initialize Timer Set\n");
+  //initializeTimerSet(I);
+  //switchToTimer(hpvm_TimerID_NONE, I);
+
+  // Insert print instruction at hpvm exit
+  DEBUG(errs() << "Locate __hpvm__cleanup()\n");
+  Function *VC = M.getFunction("__hpvm__cleanup");
+  assert(VC->getNumUses() == 1 && "__hpvm__cleanup should only be used once");
   I = cast<Instruction>(*VC->user_begin());
+  //printTimerSet(I);
 
   DEBUG(errs() << "-------- Searching for launch sites ----------\n");
 
-  std::vector<Instruction*> toBeErased;
-  std::vector<Function*> functions;
+  std::vector<Instruction *> toBeErased;
+  std::vector<Function *> functions;
 
-  for (Module::iterator mi = M.begin(), me = M.end(); mi != me; ++mi) {
-    Function* f = &*mi;
-    functions.push_back(f);
-  }
+  for (auto &F : M)
+    functions.push_back(&F);
 
   // Iterate over all functions in the module
-  for (unsigned i = 0; i < functions.size(); i++) {
-    Function* f = functions[i];
+  for (Function *f : functions) {
     DEBUG(errs() << "Function: " << f->getName() << "\n");
 
     // List with the required additions in the function's return type
-    std::vector<Type*> FRetTypes;
+    std::vector<Type *> FRetTypes;
 
     enum mutateTypeCause {
       mtc_None,
@@ -851,100 +348,106 @@ bool GenVISC::runOnModule(Module &M) {
     bind = mutateTypeCause::mtc_None;
 
     // Iterate over all the instructions in this function
-    for (inst_iterator i = inst_begin(f), e = inst_end(f); i != e ; ++i) {
-      Instruction* I = &*i; // Grab pointer to Instruction
+    for (inst_iterator i = inst_begin(f), e = inst_end(f); i != e; ++i) {
+      Instruction *I = &*i; // Grab pointer to Instruction
       // If not a call instruction, move to next instruction
-      if(!isa<CallInst>(I))
+      if (!isa<CallInst>(I))
         continue;
 
-      CallInst* CI = cast<CallInst>(I);
-      LLVMContext& Ctx = CI->getContext();
-      // If __visc__node call found, generate the test case
+      CallInst *CI = cast<CallInst>(I);
+      LLVMContext &Ctx = CI->getContext();
 
-      if(isVISCCall_node(I)) {
-        errs() << "Found visc node call in Function: " << f->getName() << "\n";
-        assert(CI->getNumArgOperands() >= 5
-               && "__visc__node call should have atleast 5 arguments!");
-        generateTest(CI);
-        // Place this call in the list of instructions to be erased.
-        toBeErased.push_back(CI);
-      }
-      if(isVISCCall_init(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_init, &toBeErased);
+      if (isHPVMCall_init(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_init, &toBeErased);
       }
-      if(isVISCCall_cleanup(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_cleanup, &toBeErased);
+      if (isHPVMCall_cleanup(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_cleanup, &toBeErased);
       }
-      if(isVISCCall_wait(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_wait, &toBeErased);
+      if (isHPVMCall_wait(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_wait, &toBeErased);
       }
-      if(isVISCCall_trackMemory(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_trackMemory, &toBeErased);
+      if (isHPVMCall_trackMemory(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_trackMemory, &toBeErased);
       }
-      if(isVISCCall_untrackMemory(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_untrackMemory, &toBeErased);
+      if (isHPVMCall_untrackMemory(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_untrackMemory, &toBeErased);
       }
-      if(isVISCCall_requestMemory(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_requestMemory, &toBeErased);
+      if (isHPVMCall_requestMemory(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_requestMemory, &toBeErased);
       }
-      if(isVISCCall_hint(I)) {
-        assert(isa<ConstantInt>(CI->getArgOperand(0))
-               && "Argument to hint must be constant integer!");
-        ConstantInt* hint = cast<ConstantInt>(CI->getArgOperand(0));
+      if (isHPVMCall_hint(I)) {
+        assert(isa<ConstantInt>(CI->getArgOperand(0)) &&
+               "Argument to hint must be constant integer!");
+        ConstantInt *hint = cast<ConstantInt>(CI->getArgOperand(0));
 
-        visc::Target t = (visc::Target) hint->getZExtValue();
+        hpvm::Target t = (hpvm::Target)hint->getZExtValue();
         addHint(CI->getParent()->getParent(), t);
-        DEBUG(errs() << "Found visc hint call: " << *CI << "\n");
+        DEBUG(errs() << "Found hpvm hint call: " << *CI << "\n");
         toBeErased.push_back(CI);
       }
-      if(isVISCCall_launch(I)) {
-        Function* LaunchF = Intrinsic::getDeclaration(&M, Intrinsic::visc_launch);
+      if (isHPVMCall_launch(I)) {
+        Function *LaunchF =
+            Intrinsic::getDeclaration(&M, Intrinsic::hpvm_launch);
         DEBUG(errs() << *LaunchF << "\n");
         // Get i8* cast to function pointer
-        Function* graphFunc = cast<Function>(CI->getArgOperand(1));
+        Function *graphFunc = cast<Function>(CI->getArgOperand(1));
         graphFunc = transformReturnTypeToStruct(graphFunc);
-        Constant* F = ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx));
-
-        ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(0));
-        Value* isStreaming = Op->isZero()? ConstantInt::getFalse(Ctx)
-                             : ConstantInt::getTrue(Ctx);
-
-        Value* LaunchArgs[] = {F, CI->getArgOperand(2), isStreaming};
-        CallInst* LaunchInst = CallInst::Create(LaunchF,
-                                                ArrayRef<Value*>(LaunchArgs, 3),
-                                                "graphID", CI);
-        DEBUG(errs() << "Found visc launch call: " << *CI << "\n");
+        Constant *F =
+            ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx));
+        assert(
+            F &&
+            "Function invoked by HPVM launch has to be define and constant.");
+
+        ConstantInt *Op = cast<ConstantInt>(CI->getArgOperand(0));
+        assert(Op && "HPVM launch's streaming argument is a constant value.");
+        Value *isStreaming = Op->isZero() ? ConstantInt::getFalse(Ctx)
+                                          : ConstantInt::getTrue(Ctx);
+
+        auto *ArgTy = dyn_cast<PointerType>(CI->getArgOperand(2)->getType());
+        assert(ArgTy && "HPVM launch argument should be pointer type.");
+        Value *Arg = CI->getArgOperand(2);
+        if (!ArgTy->getElementType()->isIntegerTy(8))
+          Arg = BitCastInst::CreatePointerCast(CI->getArgOperand(2),
+                                               Type::getInt8PtrTy(Ctx), "", CI);
+        Value *LaunchArgs[] = {F, Arg, isStreaming};
+        CallInst *LaunchInst = CallInst::Create(
+            LaunchF, ArrayRef<Value *>(LaunchArgs, 3), "graphID", CI);
+        DEBUG(errs() << "Found hpvm launch call: " << *CI << "\n");
         DEBUG(errs() << "\tSubstitute with: " << *LaunchInst << "\n");
         CI->replaceAllUsesWith(LaunchInst);
         toBeErased.push_back(CI);
       }
-      if(isVISCCall_push(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_push, &toBeErased);
+      if (isHPVMCall_push(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_push, &toBeErased);
       }
-      if(isVISCCall_pop(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_pop, &toBeErased);
+      if (isHPVMCall_pop(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_pop, &toBeErased);
       }
-      if(isVISCCall_createNodeND(I)) {
+      if (isHPVMCall_createNodeND(I)) {
         assert(CI->getNumArgOperands() > 0 &&
-               "Too few arguments for __visc__createNodeND call");
+               "Too few arguments for __hpvm__createNodeND call");
         unsigned numDims = getNumericValue(CI->getArgOperand(0));
         // We need as meny dimension argments are there are dimensions
-        assert(CI->getNumArgOperands()-2 == numDims &&
-              "Too few arguments for __visc_createNodeND call!\n");
+        assert(CI->getNumArgOperands() - 2 == numDims &&
+               "Too few arguments for __hpvm_createNodeND call!\n");
 
-        Function* CreateNodeF;
+        Function *CreateNodeF;
         switch (numDims) {
         case 0:
-          CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode);
+          CreateNodeF =
+              Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createNode);
           break;
         case 1:
-          CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode1D);
+          CreateNodeF =
+              Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createNode1D);
           break;
         case 2:
-          CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode2D);
+          CreateNodeF =
+              Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createNode2D);
           break;
         case 3:
-          CreateNodeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createNode3D);
+          CreateNodeF =
+              Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createNode3D);
           break;
         default:
           llvm_unreachable("Unsupported number of dimensions\n");
@@ -952,619 +455,466 @@ bool GenVISC::runOnModule(Module &M) {
         }
         DEBUG(errs() << *CreateNodeF << "\n");
         DEBUG(errs() << *I << "\n");
-        DEBUG(errs() << "in " << I->getParent()->getParent()->getName() << "\n");
+        DEBUG(errs() << "in " << I->getParent()->getParent()->getName()
+                     << "\n");
 
         // Get i8* cast to function pointer
-        Function* graphFunc = cast<Function>(CI->getArgOperand(1));
+        Function *graphFunc = cast<Function>(CI->getArgOperand(1));
         graphFunc = transformReturnTypeToStruct(graphFunc);
-        Constant* F = ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx));
+        Constant *F =
+            ConstantExpr::getPointerCast(graphFunc, Type::getInt8PtrTy(Ctx));
 
-        CallInst* CreateNodeInst;
+        CallInst *CreateNodeInst;
         switch (numDims) {
         case 0:
-          CreateNodeInst = CallInst::Create(CreateNodeF,
-                                            ArrayRef<Value*>(F),
-                                            graphFunc->getName()+".node", CI);
+          CreateNodeInst = CallInst::Create(CreateNodeF, ArrayRef<Value *>(F),
+                                            graphFunc->getName() + ".node", CI);
           break;
-        case 1:
-          {
+        case 1: {
           assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) &&
                  "CreateNodeND dimension argument, 2, expected to be i64\n");
-          Value* CreateNodeArgs[] = {F, CI->getArgOperand(2)};
-          CreateNodeInst = CallInst::Create(CreateNodeF,
-                                            ArrayRef<Value*>(CreateNodeArgs, 2),
-                                            graphFunc->getName()+".node", CI);
-          }
-          break;
-        case 2:
-          {
+          Value *CreateNodeArgs[] = {F, CI->getArgOperand(2)};
+          CreateNodeInst = CallInst::Create(
+              CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 2),
+              graphFunc->getName() + ".node", CI);
+        } break;
+        case 2: {
           assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) &&
                  "CreateNodeND dimension argument, 2, expected to be i64\n");
           assert((CI->getArgOperand(3)->getType() == Type::getInt64Ty(Ctx)) &&
                  "CreateNodeND dimension argument, 3, expected to be i64\n");
-          Value* CreateNodeArgs[] = {F,
-                                     CI->getArgOperand(2),
+          Value *CreateNodeArgs[] = {F, CI->getArgOperand(2),
                                      CI->getArgOperand(3)};
-          CreateNodeInst = CallInst::Create(CreateNodeF,
-                                            ArrayRef<Value*>(CreateNodeArgs, 3),
-                                            graphFunc->getName()+".node", CI);
-          }
-          break;
-        case 3:
-          {
+          CreateNodeInst = CallInst::Create(
+              CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 3),
+              graphFunc->getName() + ".node", CI);
+        } break;
+        case 3: {
           assert((CI->getArgOperand(2)->getType() == Type::getInt64Ty(Ctx)) &&
                  "CreateNodeND dimension argument, 2, expected to be i64\n");
           assert((CI->getArgOperand(3)->getType() == Type::getInt64Ty(Ctx)) &&
                  "CreateNodeND dimension argument, 3, expected to be i64\n");
           assert((CI->getArgOperand(4)->getType() == Type::getInt64Ty(Ctx)) &&
                  "CreateNodeND dimension argument, 4, expected to be i64\n");
-          Value* CreateNodeArgs[] = {F,
-                                     CI->getArgOperand(2),
+          Value *CreateNodeArgs[] = {F, CI->getArgOperand(2),
                                      CI->getArgOperand(3),
                                      CI->getArgOperand(4)};
-          CreateNodeInst = CallInst::Create(CreateNodeF,
-                                            ArrayRef<Value*>(CreateNodeArgs, 4),
-                                            graphFunc->getName()+".node", CI);
-          }
-          break;
+          CreateNodeInst = CallInst::Create(
+              CreateNodeF, ArrayRef<Value *>(CreateNodeArgs, 4),
+              graphFunc->getName() + ".node", CI);
+        } break;
         default:
-          llvm_unreachable("Impossible path: number of dimensions is 0, 1, 2, 3\n");
+          llvm_unreachable(
+              "Impossible path: number of dimensions is 0, 1, 2, 3\n");
           break;
         }
 
-        DEBUG(errs() << "Found visc createNode call: " << *CI << "\n");
+        DEBUG(errs() << "Found hpvm createNode call: " << *CI << "\n");
         DEBUG(errs() << "\tSubstitute with: " << *CreateNodeInst << "\n");
         CI->replaceAllUsesWith(CreateNodeInst);
         toBeErased.push_back(CI);
       }
 
-      if(isVISCCall_edge(I)) {
-        Function* EdgeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createEdge);
+      if (isHPVMCall_edge(I)) {
+        Function *EdgeF =
+            Intrinsic::getDeclaration(&M, Intrinsic::hpvm_createEdge);
         DEBUG(errs() << *EdgeF << "\n");
-        ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(5));
-        ConstantInt* EdgeTypeOp = cast<ConstantInt>(CI->getArgOperand(2));
-        Value* isStreaming = Op->isZero()? ConstantInt::getFalse(Ctx)
-                             : ConstantInt::getTrue(Ctx);
-        Value* isAllToAll = EdgeTypeOp->isZero()? ConstantInt::getFalse(Ctx)
-                                                : ConstantInt::getTrue(Ctx);
-        Value* EdgeArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1),
-                             isAllToAll, CI->getArgOperand(3), CI->getArgOperand(4),
-                             isStreaming
-                            };
-        CallInst* EdgeInst = CallInst::Create(EdgeF,
-                                              ArrayRef<Value*>(EdgeArgs, 6),
-                                              "output", CI);
-        DEBUG(errs() << "Found visc edge call: " << *CI << "\n");
+        ConstantInt *Op = cast<ConstantInt>(CI->getArgOperand(5));
+        ConstantInt *EdgeTypeOp = cast<ConstantInt>(CI->getArgOperand(2));
+        assert(Op && EdgeTypeOp &&
+               "Arguments of CreateEdge are not constant integers.");
+        Value *isStreaming = Op->isZero() ? ConstantInt::getFalse(Ctx)
+                                          : ConstantInt::getTrue(Ctx);
+        Value *isAllToAll = EdgeTypeOp->isZero() ? ConstantInt::getFalse(Ctx)
+                                                 : ConstantInt::getTrue(Ctx);
+        Value *EdgeArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1),
+                             isAllToAll,           CI->getArgOperand(3),
+                             CI->getArgOperand(4), isStreaming};
+        CallInst *EdgeInst = CallInst::Create(
+            EdgeF, ArrayRef<Value *>(EdgeArgs, 6), "output", CI);
+        DEBUG(errs() << "Found hpvm edge call: " << *CI << "\n");
         DEBUG(errs() << "\tSubstitute with: " << *EdgeInst << "\n");
         CI->replaceAllUsesWith(EdgeInst);
         toBeErased.push_back(CI);
       }
-      if(isVISCCall_bindIn(I)) {
-        Function* BindInF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_input);
+      if (isHPVMCall_bindIn(I)) {
+        Function *BindInF =
+            Intrinsic::getDeclaration(&M, Intrinsic::hpvm_bind_input);
         DEBUG(errs() << *BindInF << "\n");
         // Check if this is a streaming bind or not
-        ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(3));
-        Value* isStreaming = Op->isZero()? ConstantInt::getFalse(Ctx)
-                             : ConstantInt::getTrue(Ctx);
-        Value* BindInArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1),
-                               CI->getArgOperand(2), isStreaming
-                              };
-        CallInst* BindInInst = CallInst::Create(BindInF,
-                                                ArrayRef<Value*>(BindInArgs, 4),
-                                                "", CI);
-        DEBUG(errs() << "Found visc bindIn call: " << *CI << "\n");
+        ConstantInt *Op = cast<ConstantInt>(CI->getArgOperand(3));
+        assert(Op && "Streaming argument for bind in intrinsic should be a "
+                     "constant integer.");
+        Value *isStreaming = Op->isZero() ? ConstantInt::getFalse(Ctx)
+                                          : ConstantInt::getTrue(Ctx);
+        Value *BindInArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1),
+                               CI->getArgOperand(2), isStreaming};
+        CallInst *BindInInst =
+            CallInst::Create(BindInF, ArrayRef<Value *>(BindInArgs, 4), "", CI);
+        DEBUG(errs() << "Found hpvm bindIn call: " << *CI << "\n");
         DEBUG(errs() << "\tSubstitute with: " << *BindInInst << "\n");
         CI->replaceAllUsesWith(BindInInst);
         toBeErased.push_back(CI);
       }
-      if(isVISCCall_bindOut(I)) {
-        Function* BindOutF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_output);
+      if (isHPVMCall_bindOut(I)) {
+        Function *BindOutF =
+            Intrinsic::getDeclaration(&M, Intrinsic::hpvm_bind_output);
         DEBUG(errs() << *BindOutF << "\n");
         // Check if this is a streaming bind or not
-        ConstantInt* Op = cast<ConstantInt>(CI->getArgOperand(3));
-        Value* isStreaming = Op->isZero()? ConstantInt::getFalse(Ctx)
-                             : ConstantInt::getTrue(Ctx);
-        Value* BindOutArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1),
-                                CI->getArgOperand(2), isStreaming
-                               };
-        CallInst* BindOutInst = CallInst::Create(BindOutF,
-                                ArrayRef<Value*>(BindOutArgs, 4),
-                                "", CI);
-        DEBUG(errs() << "Found visc bindOut call: " << *CI << "\n");
+        ConstantInt *Op = cast<ConstantInt>(CI->getArgOperand(3));
+        assert(Op && "Streaming argument for bind out intrinsic should be a "
+                     "constant integer.");
+        Value *isStreaming = Op->isZero() ? ConstantInt::getFalse(Ctx)
+                                          : ConstantInt::getTrue(Ctx);
+        Value *BindOutArgs[] = {CI->getArgOperand(0), CI->getArgOperand(1),
+                                CI->getArgOperand(2), isStreaming};
+        CallInst *BindOutInst = CallInst::Create(
+            BindOutF, ArrayRef<Value *>(BindOutArgs, 4), "", CI);
+        DEBUG(errs() << "Found hpvm bindOut call: " << *CI << "\n");
         DEBUG(errs() << "\tSubstitute with: " << *BindOutInst << "\n");
 
         DEBUG(errs() << "Fixing the return type of the function\n");
         // FIXME: What if the child node function has not been visited already.
         // i.e., it's return type has not been fixed.
-        Function* F = I->getParent()->getParent();
+        Function *F = I->getParent()->getParent();
         DEBUG(errs() << F->getName() << "\n";);
-        IntrinsicInst* NodeIntrinsic = cast<IntrinsicInst>(CI->getArgOperand(0));
+        IntrinsicInst *NodeIntrinsic =
+            cast<IntrinsicInst>(CI->getArgOperand(0));
+        assert(NodeIntrinsic &&
+               "Instruction value in bind out is not a create node intrinsic.");
         DEBUG(errs() << "Node intrinsic: " << *NodeIntrinsic << "\n");
-        Function* ChildF = cast<Function>(NodeIntrinsic->getArgOperand(0)->stripPointerCasts());
+        assert(
+            (NodeIntrinsic->getIntrinsicID() == Intrinsic::hpvm_createNode ||
+             NodeIntrinsic->getIntrinsicID() == Intrinsic::hpvm_createNode1D ||
+             NodeIntrinsic->getIntrinsicID() == Intrinsic::hpvm_createNode2D ||
+             NodeIntrinsic->getIntrinsicID() == Intrinsic::hpvm_createNode3D) &&
+            "Instruction value in bind out is not a create node intrinsic.");
+        Function *ChildF = cast<Function>(
+            NodeIntrinsic->getArgOperand(0)->stripPointerCasts());
         DEBUG(errs() << ChildF->getName() << "\n";);
         int srcpos = cast<ConstantInt>(CI->getArgOperand(1))->getSExtValue();
         int destpos = cast<ConstantInt>(CI->getArgOperand(2))->getSExtValue();
-        StructType* ChildReturnTy = cast<StructType>(ChildF->getReturnType());
+        StructType *ChildReturnTy = cast<StructType>(ChildF->getReturnType());
 
-        Type* ReturnType = F->getReturnType();
+        Type *ReturnType = F->getReturnType();
         DEBUG(errs() << *ReturnType << "\n";);
-        assert((ReturnType->isVoidTy() || isa<StructType>(ReturnType))
-            && "Return type should either be a struct or void type!");
+        assert((ReturnType->isVoidTy() || isa<StructType>(ReturnType)) &&
+               "Return type should either be a struct or void type!");
 
-        FRetTypes.insert(FRetTypes.begin()+destpos, ChildReturnTy->getElementType(srcpos));
+        FRetTypes.insert(FRetTypes.begin() + destpos,
+                         ChildReturnTy->getElementType(srcpos));
         assert(((bind == mutateTypeCause::mtc_BIND) ||
                 (bind == mutateTypeCause::mtc_None)) &&
-                "Both bind_out and visc_return detected");
+               "Both bind_out and hpvm_return detected");
         bind = mutateTypeCause::mtc_BIND;
 
         CI->replaceAllUsesWith(BindOutInst);
         toBeErased.push_back(CI);
       }
-      if(isVISCCall_attributes(I)) {
-        Function* F = CI->getParent()->getParent();
-        handleVISCAttributes(F, CI);
+      if (isHPVMCall_attributes(I)) {
+        Function *F = CI->getParent()->getParent();
+        handleHPVMAttributes(F, CI);
         toBeErased.push_back(CI);
       }
-      if (isVISCCall_getNode(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNode, &toBeErased);
+      if (isHPVMCall_getNode(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNode, &toBeErased);
       }
-      if (isVISCCall_getParentNode(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_getParentNode, &toBeErased);
+      if (isHPVMCall_getParentNode(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getParentNode, &toBeErased);
       }
-      if (isVISCCall_barrier(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_barrier, &toBeErased);
+      if (isHPVMCall_barrier(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_barrier, &toBeErased);
       }
-      if (isVISCCall_malloc(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_malloc, &toBeErased);
+      if (isHPVMCall_malloc(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_malloc, &toBeErased);
       }
-      if (isVISCCall_return(I)) {
-        DEBUG(errs() << "Function before visc return processing\n" << *I->getParent()->getParent() << "\n");
+      if (isHPVMCall_return(I)) {
+        DEBUG(errs() << "Function before hpvm return processing\n"
+                     << *I->getParent()->getParent() << "\n");
         // The operands to this call are the values to be returned by the node
-        Value* ReturnVal = genCodeForReturn(CI);
+        Value *ReturnVal = genCodeForReturn(CI);
         DEBUG(errs() << *ReturnVal << "\n");
-        Type* ReturnType = ReturnVal->getType();
-        assert(isa<StructType>(ReturnType)
-               && "Return type should be a struct type!");
+        Type *ReturnType = ReturnVal->getType();
+        assert(isa<StructType>(ReturnType) &&
+               "Return type should be a struct type!");
 
         assert(((bind == mutateTypeCause::mtc_RETURN) ||
                 (bind == mutateTypeCause::mtc_None)) &&
-                "Both bind_out and visc_return detected");
+               "Both bind_out and hpvm_return detected");
 
         if (bind == mutateTypeCause::mtc_None) {
-          // If this is None, this is the first __visc__return
+          // If this is None, this is the first __hpvm__return
           // instruction we have come upon. Place the return type of the
           // function in the return type vector
           bind = mutateTypeCause::mtc_RETURN;
-          StructType* ReturnStructTy = cast<StructType>(ReturnType);
+          StructType *ReturnStructTy = cast<StructType>(ReturnType);
           for (unsigned i = 0; i < ReturnStructTy->getNumElements(); i++)
             FRetTypes.push_back(ReturnStructTy->getElementType(i));
         } else { // bind == mutateTypeCause::mtc_RETURN
-          // This is not the first __visc__return
-          // instruction we have come upon. 
+          // This is not the first __hpvm__return
+          // instruction we have come upon.
           // Check that the return types are the same
-          assert((ReturnType == FRetTypes[0])
-                 && "Multiple returns with mismatching types");
+          assert((ReturnType == FRetTypes[0]) &&
+                 "Multiple returns with mismatching types");
         }
 
-        ReturnInst* RetInst = ReturnInst::Create(Ctx, ReturnVal);
-        DEBUG(errs() << "Found visc return call: " << *CI << "\n");
-        Instruction* oldReturn = CI->getParent()->getTerminator();
-        assert(isa<ReturnInst>(oldReturn)
-                && "Expecting a return to be the terminator of this BB!");
+        ReturnInst *RetInst = ReturnInst::Create(Ctx, ReturnVal);
+        DEBUG(errs() << "Found hpvm return call: " << *CI << "\n");
+        Instruction *oldReturn = CI->getParent()->getTerminator();
+        assert(isa<ReturnInst>(oldReturn) &&
+               "Expecting a return to be the terminator of this BB!");
         DEBUG(errs() << "Found return statement of BB: " << *oldReturn << "\n");
         DEBUG(errs() << "\tSubstitute return with: " << *RetInst << "\n");
-        //CI->replaceAllUsesWith(RetInst);
+        // CI->replaceAllUsesWith(RetInst);
         toBeErased.push_back(CI);
         ReplaceInstWithInst(oldReturn, RetInst);
-        DEBUG(errs() << "Function after visc return processing\n" << *I->getParent()->getParent() << "\n");
-
+        DEBUG(errs() << "Function after hpvm return processing\n"
+                     << *I->getParent()->getParent() << "\n");
       }
 
-      if (isVISCCall_getNodeInstanceID_x(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNodeInstanceID_x, &toBeErased);
-      }
-      if (isVISCCall_getNodeInstanceID_y(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNodeInstanceID_y, &toBeErased);
+      if (isHPVMCall_getNodeInstanceID_x(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNodeInstanceID_x,
+                                 &toBeErased);
       }
-      if (isVISCCall_getNodeInstanceID_z(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNodeInstanceID_z, &toBeErased);
+      if (isHPVMCall_getNodeInstanceID_y(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNodeInstanceID_y,
+                                 &toBeErased);
       }
-      if (isVISCCall_getNumNodeInstances_x(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNumNodeInstances_x, &toBeErased);
+      if (isHPVMCall_getNodeInstanceID_z(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNodeInstanceID_z,
+                                 &toBeErased);
       }
-      if (isVISCCall_getNumNodeInstances_y(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNumNodeInstances_y, &toBeErased);
+      if (isHPVMCall_getNumNodeInstances_x(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNumNodeInstances_x,
+                                 &toBeErased);
       }
-      if (isVISCCall_getNumNodeInstances_z(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_getNumNodeInstances_z, &toBeErased);
+      if (isHPVMCall_getNumNodeInstances_y(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNumNodeInstances_y,
+                                 &toBeErased);
       }
-      if (isVISCCall_atomic_cmpxchg(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_cmpxchg, &toBeErased);
+      if (isHPVMCall_getNumNodeInstances_z(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_getNumNodeInstances_z,
+                                 &toBeErased);
       }
-      if (isVISCCall_atomic_add(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_add, &toBeErased);
+      if (isHPVMCall_atomic_add(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_add, &toBeErased);
       }
-      if (isVISCCall_atomic_sub(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_sub, &toBeErased);
+      if (isHPVMCall_atomic_sub(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_sub, &toBeErased);
       }
-      if (isVISCCall_atomic_xchg(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_xchg, &toBeErased);
+      if (isHPVMCall_atomic_xchg(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_xchg, &toBeErased);
       }
-      if (isVISCCall_atomic_inc(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_inc, &toBeErased);
+      if (isHPVMCall_atomic_min(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_min, &toBeErased);
       }
-      if (isVISCCall_atomic_dec(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_dec, &toBeErased);
+      if (isHPVMCall_atomic_max(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_max, &toBeErased);
       }
-      if (isVISCCall_atomic_min(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_min, &toBeErased);
+      if (isHPVMCall_atomic_and(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_and, &toBeErased);
       }
-      if (isVISCCall_atomic_umin(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_umin, &toBeErased);
+      if (isHPVMCall_atomic_or(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_or, &toBeErased);
       }
-      if (isVISCCall_atomic_max(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_max, &toBeErased);
+      if (isHPVMCall_atomic_xor(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_atomic_xor, &toBeErased);
       }
-      if (isVISCCall_atomic_umax(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_umax, &toBeErased);
-      }
-      if (isVISCCall_atomic_and(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_and, &toBeErased);
-      }
-      if (isVISCCall_atomic_or(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_or, &toBeErased);
-      }
-      if (isVISCCall_atomic_xor(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_atomic_xor, &toBeErased);
-      }
-      if (isVISCCall_floor(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::floor, &toBeErased);
-      }
-      if (isVISCCall_rsqrt(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::nvvm_rsqrt_approx_f, &toBeErased);
-      }
-      if (isVISCCall_sqrt(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::sqrt, &toBeErased);
-      }
-      if (isVISCCall_sin(I)) {
+      if (isHPVMCall_sin(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::sin, &toBeErased);
       }
-      if (isVISCCall_cos(I)) {
+      if (isHPVMCall_cos(I)) {
         ReplaceCallWithIntrinsic(I, Intrinsic::cos, &toBeErased);
       }
-      if (isVISCCall_tensor_convolution(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_convolution, &toBeErased);
+      if (isHPVMCall_tensor_convolution(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_convolution, &toBeErased);
       }
-      if (isVISCCall_tensor_group_convolution(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_group_convolution, &toBeErased);
+      if (isHPVMCall_tensor_group_convolution(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_group_convolution, &toBeErased);
       }
-      if (isVISCCall_tensor_add(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_add, &toBeErased);
+      if (isHPVMCall_tensor_add(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_add, &toBeErased);
       }
-      if (isVISCCall_tensor_batchnorm(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_batchnorm, &toBeErased);
+      if (isHPVMCall_tensor_batchnorm(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_batchnorm, &toBeErased);
       }
-      if (isVISCCall_tensor_mul(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_mul, &toBeErased);
+      if (isHPVMCall_tensor_mul(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_mul, &toBeErased);
       }
-      if (isVISCCall_tensor_pool_max(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_pool_max, &toBeErased);
+      if (isHPVMCall_tensor_pool_max(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_pool_max, &toBeErased);
       }
-      if (isVISCCall_tensor_pool_min(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_pool_min, &toBeErased);
+      if (isHPVMCall_tensor_pool_min(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_pool_min, &toBeErased);
       }
-      if (isVISCCall_tensor_pool_mean(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_pool_mean, &toBeErased);
+      if (isHPVMCall_tensor_pool_mean(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_pool_mean, &toBeErased);
       }
-      if (isVISCCall_tensor_relu(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_relu, &toBeErased);
+      if (isHPVMCall_tensor_relu(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_relu, &toBeErased);
       }
-      if (isVISCCall_tensor_tanh(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_tanh, &toBeErased);
+      if (isHPVMCall_tensor_tanh(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_tanh, &toBeErased);
       }
-      if (isVISCCall_tensor_clipped_relu(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_clipped_relu, &toBeErased);
+      if (isHPVMCall_tensor_clipped_relu(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_clipped_relu, &toBeErased);
       }
-      if (isVISCCall_tensor_softmax(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_tensor_softmax, &toBeErased);
+      if (isHPVMCall_tensor_softmax(I)) {
+        ReplaceCallWithIntrinsic(I, Intrinsic::hpvm_tensor_softmax, &toBeErased);
       }
-
-      // New Intrinsic to set Node ID
-      if (isVISCCall_node_id(I)) {
-        ReplaceCallWithIntrinsic(I, Intrinsic::visc_node_id, &toBeErased);
-      }
-      
     }
 
-    // Erase the __visc__node calls
+    // Erase the __hpvm__node calls
     DEBUG(errs() << "Erase " << toBeErased.size() << " Statements:\n");
-    for(auto I: toBeErased) {
+    for (auto I : toBeErased) {
       DEBUG(errs() << *I << "\n");
     }
-    while(!toBeErased.empty()) {
-      Instruction* I = toBeErased.back(); 
+    while (!toBeErased.empty()) {
+      Instruction *I = toBeErased.back();
       DEBUG(errs() << "\tErasing " << *I << "\n");
       I->eraseFromParent();
-      toBeErased.pop_back(); 
+      toBeErased.pop_back();
     }
 
-    if(bind == mutateTypeCause::mtc_BIND || bind == mutateTypeCause::mtc_RETURN) {
-        DEBUG(errs() << "Function before fixing return type\n" << *f << "\n");
-        // Argument type list.
-        std::vector<Type*> FArgTypes;
-        for(Function::const_arg_iterator ai = f->arg_begin(), ae = f->arg_end();
-            ai != ae; ++ai) {
-          FArgTypes.push_back(ai->getType());
-        }
+    if (bind == mutateTypeCause::mtc_BIND ||
+        bind == mutateTypeCause::mtc_RETURN) {
+      DEBUG(errs() << "Function before fixing return type\n" << *f << "\n");
+      // Argument type list.
+      std::vector<Type *> FArgTypes;
+      for (Function::const_arg_iterator ai = f->arg_begin(), ae = f->arg_end();
+           ai != ae; ++ai) {
+        FArgTypes.push_back(ai->getType());
+      }
 
-        // Find new return type of function
-        Type* NewReturnTy;
-        if(bind == mutateTypeCause::mtc_BIND) {
+      // Find new return type of function
+      Type *NewReturnTy;
+      if (bind == mutateTypeCause::mtc_BIND) {
 
-          std::vector<Type*> TyList;
-          for (unsigned i = 0; i < FRetTypes.size(); i++)
-            TyList.push_back(FRetTypes[i]);
+        std::vector<Type *> TyList;
+        for (unsigned i = 0; i < FRetTypes.size(); i++)
+          TyList.push_back(FRetTypes[i]);
 
-          NewReturnTy = StructType::create(f->getContext(), TyList, Twine("struct.out."+f->getName()).str(), true);
-        }
-        else {
-          NewReturnTy = getReturnTypeFromReturnInst(f);
-          assert(NewReturnTy->isStructTy() && "Expecting a struct type!");
-        }
+        NewReturnTy =
+            StructType::create(f->getContext(), TyList,
+                               Twine("struct.out." + f->getName()).str(), true);
+      } else {
+        NewReturnTy = getReturnTypeFromReturnInst(f);
+        assert(NewReturnTy->isStructTy() && "Expecting a struct type!");
+      }
 
-        FunctionType* FTy = FunctionType::get(NewReturnTy, FArgTypes, f->isVarArg());
+      FunctionType *FTy =
+          FunctionType::get(NewReturnTy, FArgTypes, f->isVarArg());
 
-        // Change the function type
-        Function* newF = cloneFunction(f, FTy, false);
-        DEBUG(errs() << *newF << "\n");
+      // Change the function type
+      Function *newF = cloneFunction(f, FTy, false);
+      DEBUG(errs() << *newF << "\n");
 
-        if (bind == mutateTypeCause::mtc_BIND) {
-          // This is certainly an internal node, and hence just one BB with one
-          // return terminator instruction. Change return statement
-          ReturnInst* RI = cast<ReturnInst>(newF->getEntryBlock().getTerminator());
-          ReturnInst* newRI = ReturnInst::Create(newF->getContext(), UndefValue::get(NewReturnTy));
-          ReplaceInstWithInst(RI, newRI);        
-        }
-        if (bind == mutateTypeCause::mtc_RETURN) {
-          // Nothing
-        }
-        replaceNodeFunctionInIR(*f->getParent(), f, newF);
-        DEBUG(errs() << "Function after fixing return type\n" << *newF << "\n");
+      if (bind == mutateTypeCause::mtc_BIND) {
+        // This is certainly an internal node, and hence just one BB with one
+        // return terminator instruction. Change return statement
+        ReturnInst *RI =
+            cast<ReturnInst>(newF->getEntryBlock().getTerminator());
+        ReturnInst *newRI = ReturnInst::Create(newF->getContext(),
+                                               UndefValue::get(NewReturnTy));
+        ReplaceInstWithInst(RI, newRI);
+      }
+      if (bind == mutateTypeCause::mtc_RETURN) {
+        // Nothing
+      }
+      replaceNodeFunctionInIR(*f->getParent(), f, newF);
+      DEBUG(errs() << "Function after fixing return type\n" << *newF << "\n");
     }
-
-
   }
-  return false; //TODO: What does returning "false" mean?
+  return false; // TODO: What does returning "false" mean?
 }
 
 // Generate Code for declaring a constant string [L x i8] and return a pointer
 // to the start of it.
-Value* GenVISC::getStringPointer(const Twine& S, Instruction* IB, const Twine& Name) {
-  Constant* SConstant = ConstantDataArray::getString(M->getContext(), S.str(), true);
-  Value* SGlobal = new GlobalVariable(*M, SConstant->getType(), true,
-                                      GlobalValue::InternalLinkage, SConstant, Name);
-  Value* Zero = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0);
-  Value* GEPArgs[] = {Zero, Zero};
-  GetElementPtrInst* SPtr = GetElementPtrInst::Create(nullptr, SGlobal,
-                            ArrayRef<Value*>(GEPArgs, 2), Name+"Ptr", IB);
+Value *GenHPVM::getStringPointer(const Twine &S, Instruction *IB,
+                                 const Twine &Name) {
+  Constant *SConstant =
+      ConstantDataArray::getString(M->getContext(), S.str(), true);
+  Value *SGlobal =
+      new GlobalVariable(*M, SConstant->getType(), true,
+                         GlobalValue::InternalLinkage, SConstant, Name);
+  Value *Zero = ConstantInt::get(Type::getInt64Ty(M->getContext()), 0);
+  Value *GEPArgs[] = {Zero, Zero};
+  GetElementPtrInst *SPtr = GetElementPtrInst::Create(
+      nullptr, SGlobal, ArrayRef<Value *>(GEPArgs, 2), Name + "Ptr", IB);
   return SPtr;
 }
 
-
-
-// Generate the test case using the dummy __visc__node call CI
-// First parse the arguments to find the kernel function, num of levels,
-// dimensions, arguments, inputs and outputs. Pass this information to genKernel
-// and genInternalNode functions to generate the test case.
-void GenVISC::generateTest(CallInst* CI) {
-  // Parse the dummy function call here
-  LLVMContext& Ctx = CI->getParent()->getContext();
-
-  unsigned offset = 1; // argument at offset 1 is the number of dimensions
-  // Find number of arguments
-  assert(CI->getNumArgOperands() > offset
-         && "Too few arguments for __visc__node call!");
-  unsigned levels = getNumericValue(CI->getArgOperand(offset));
-  errs() << "\tNum of levels = " << levels << "\n";
-
-  // Find number of dimensions
-  offset += 1;
-  assert(CI->getNumArgOperands() > offset
-         && "Too few arguments for __visc__node call!");
-  unsigned numDims = getNumericValue(CI->getOperand(offset));
-  errs() << "\tNum of dimensions = " << numDims << "\n";
-
-
-  // Find number of arguments
-  offset += numDims*levels + 1; // skip the dimesnions
-  assert(CI->getNumArgOperands() > offset
-         && "Too few arguments for __visc__node call!");
-  unsigned numArgs = getNumericValue(CI->getArgOperand(offset));
-  errs() << "\tNum of kernel arguments = " << numArgs << "\n";
-
-  // Find number of outputs
-  offset += numArgs + 1; // skip the kernel arguments
-  assert(CI->getNumArgOperands() > offset
-         && "Too few arguments for __visc__node call!");
-  unsigned numOutputs = getNumericValue(CI->getArgOperand(offset));
-  errs() << "\tNum of kernel outputs = " << numOutputs << "\n";
-
-  // Find return struct type
-  assert(numOutputs == 0 && "Not handled case where number of outputs is non-zero!");
-  // This is always zero. One should look at the number of struct elements of
-  // kernel function
-  StructType* RetTy = StructType::create(Ctx, None, "rtype");
-
-  Function* KernelF = genKernel(cast<Function>(CI->getArgOperand(0)->stripPointerCasts()), CI, RetTy);
-  genHost(CI, KernelF, levels, numDims, numArgs, numOutputs, RetTy);
+void GenHPVM::initializeTimerSet(Instruction *InsertBefore) {
+  Value *TimerSetAddr;
+  StoreInst *SI;
+  TIMER(TimerSet = new GlobalVariable(
+            *M, Type::getInt8PtrTy(M->getContext()), false,
+            GlobalValue::CommonLinkage,
+            Constant::getNullValue(Type::getInt8PtrTy(M->getContext())),
+            "hpvmTimerSet_GenHPVM"));
+  DEBUG(errs() << "Inserting GV: " << *TimerSet->getType() << *TimerSet
+               << "\n");
+  // DEBUG(errs() << "Inserting call to: " << *llvm_hpvm_initializeTimerSet <<
+  // "\n");
+
+  TIMER(TimerSetAddr = CallInst::Create(llvm_hpvm_initializeTimerSet, None, "",
+                                        InsertBefore));
+  DEBUG(errs() << "TimerSetAddress = " << *TimerSetAddr << "\n");
+  TIMER(SI = new StoreInst(TimerSetAddr, TimerSet, InsertBefore));
+  DEBUG(errs() << "Store Timer Address in Global variable: " << *SI << "\n");
 }
 
-
-
-// Make all the required changes to the kernel function. This would include
-// changing the function signature by adding any extra arguments required.
-// Changing the return type. Changing all the OpenCL query intrinsics with the
-// visc intrinsics.
-Function* GenVISC::genKernel(Function* KernelF, CallInst* CI, StructType* RetTy) {
-  // Make changes to kernel here
-  DEBUG(errs() << "Modifying Node Function: " << KernelF->getName() << "\n");
-
-  // Find dummy __visc__attribute call in this function and add visc attributes
-  // in/out to pointer arguments
-  for (inst_iterator i = inst_begin(KernelF), e = inst_end(KernelF); i != e; ++i) {
-    Instruction *I = &(*i);
-    if(isVISCCall_attributes(I)) {
-      handleVISCAttributes(KernelF, cast<CallInst>(I));
-      //I->eraseFromParent();
-      break;
-    }
-  }
-
-  // Change arguments and types
-  // Create the argument type list with added argument types
-  //Function::ArgumentListType& argList = KernelF->getArgumentList();
-  std::vector<Type*> argTypes;
-  // Insert an i32 argument after every pointer argument. However adding an
-  // argument does not change the attribute list of function and so the
-  // arguments need to be shifted accordingly.
-  //bool shiftAttr = false;
-  for(Function::arg_iterator ai = KernelF->arg_begin(), ae = KernelF->arg_end();
-      ai != ae; ++ai) {
-
-    argTypes.push_back(ai->getType());
-    if(ai->getType()->isPointerTy()) {
-      // If it is a pointer argument, add an i64 type next
-      argTypes.push_back(Type::getInt64Ty(KernelF->getContext()));
-    }
-
-  }
-  // Adding new arguments to the function argument list, would not change the
-  // function type. We need to change the type of this function to reflect the
-  // added arguments
-  FunctionType* newFT = FunctionType::get(RetTy, argTypes, KernelF->isVarArg());
-
-  // Change the function type
-  SmallVector<ReturnInst*, 8> Returns;
-  Function* newKernelF = cloneFunction(KernelF, newFT, true, &Returns);
-  DEBUG(errs() << *newKernelF << "\n");
-
-  // Replace ret void instruction with ret %RetTy undef
-  for(auto RI: Returns) {
-    DEBUG(errs() << "Found return inst: "<< *RI << "\n");
-    ReturnInst* newRI = ReturnInst::Create(KernelF->getContext(), UndefValue::get(RetTy));
-    ReplaceInstWithInst(RI, newRI);
-  }
-
-  replaceNodeFunctionInIR(*KernelF->getParent(), KernelF, newKernelF);
-  // Replace opencl query intrinsics with visc query intrinsics
-  replaceOpenCLCallsWithVISCIntrinsics(newKernelF);
-  return newKernelF;
+void GenHPVM::switchToTimer(enum hpvm_TimerID timer,
+                            Instruction *InsertBefore) {
+  Value *switchArgs[] = {TimerSet, getTimerID(*M, timer)};
+  TIMER(CallInst::Create(llvm_hpvm_switchToTimer,
+                         ArrayRef<Value *>(switchArgs, 2), "", InsertBefore));
 }
 
-// Generate the code replacing the dummy __visc__node call with visc launch
-// intrinsic and also generate the internal nodes required at each level
-// depending on the hierarchy of DFG needed. This would also involve marhsalling
-// all the input arguments to the kernel function in memory. Replaceing CI with
-// launch intrinsic, and all the dummy __visc__wait calls with the visc wait
-// intrinsic.
-void GenVISC::genHost(CallInst* CI, Function* KernelF, unsigned levels, unsigned numDims, unsigned numArgs, unsigned numOutputs, StructType* RetTy) {
-  // Make host code changes here
-  DEBUG(errs() << "Modifying Host code for __visc__node call site: " << *CI << "\n");
-  DEBUG(errs() << "Kernel Function: " << KernelF->getName() << "\n");
-  LLVMContext& Ctx = CI->getParent()->getContext();
-
-  // Create a root funtion which has this as internal node
-  Function* Root = genInternalNode(KernelF, levels, numArgs, numDims, 3, CI);
-
-  // Add hint to compile root for CPU. This is always true.
-  addHint(Root, visc::CPU_TARGET);
-
-  // Generate argument struct type (All arguments followed by return struct type)
-  std::vector<Type*> ArgList;
-  unsigned offset = numDims*levels + 2 + 1 + 1;
-  for(Function::arg_iterator ai=KernelF->arg_begin(), ae=KernelF->arg_end();
-      ai!=ae; ai++) {
-    Type* Ty = ai->getType();
-    ArgList.push_back(Ty);
-  }
-  // Add the dimesnions arguments
-  for(unsigned i=0; i<numDims*levels; i++) {
-//    ArgList.push_back(Type::getInt32Ty(Ctx));
-    ArgList.push_back(Type::getInt64Ty(Ctx));
-  }
-  ArgList.push_back(RetTy);
-  StructType* ArgStructTy = StructType::create(ArgList, "struct.arg", true);
-  DEBUG(errs() << *ArgStructTy << "\n");
-
-  // Insert alloca inst for this argument struct type
-  AllocaInst* AI = new AllocaInst(ArgStructTy, "in.addr", CI);
-
-  // Marshall all input arguments and dimension arguments into argument struct
-  // type
-  marshallArguments(levels, numArgs, offset, numDims, 3, AI, CI, KernelF);
-
-  // Type cast argument struct to i8*
-  CastInst* BI = BitCastInst::CreatePointerCast(AI,
-                 Type::getInt8PtrTy(Ctx),
-                 "args",
-                 CI);
-
-  // Bitcast Root function to i8*
-  Constant* Root_i8ptr = ConstantExpr::getPointerCast(Root, Type::getInt8PtrTy(Ctx));
-  // Replace CI with launch call to a Root function
-  Function* LaunchF = Intrinsic::getDeclaration(Root->getParent(), Intrinsic::visc_launch);
-  DEBUG(errs() << "Intrinsic for launch: " << *LaunchF << "\n");
-
-  Value* LaunchInstArgs[] = {Root_i8ptr, BI, ConstantInt::getFalse(Ctx)};
-  CallInst* LaunchInst = CallInst::Create(LaunchF,
-                                          ArrayRef<Value*>(LaunchInstArgs,3),
-                                          "graph"+Root->getName(), CI);
-  //ReplaceInstWithInst(LI, LaunchInst);
-
-  DEBUG(errs() << *LaunchInst << "\n");
-  // Add wait call
-  // Replace all wait instructions with visc wait intrinsic instructions
-  Function* WaitF = Intrinsic::getDeclaration(Root->getParent(), Intrinsic::visc_wait);
-  std::vector<CallInst*>* WaitList = getWaitList(CI);
-  for(unsigned i=0; i < WaitList->size(); ++i) {
-    CallInst* waitCall = WaitList->at(i);
-    CallInst* waitInst = CallInst::Create(WaitF,
-                                          ArrayRef<Value*>(LaunchInst),
-                                          "", CI);
-    DEBUG(errs() << *waitInst << "\n");
-    waitCall->eraseFromParent();
-  }
+void GenHPVM::printTimerSet(Instruction *InsertBefore) {
+  Value *TimerName;
+  TIMER(TimerName = getStringPointer("GenHPVM_Timer", InsertBefore));
+  Value *printArgs[] = {TimerSet, TimerName};
+  TIMER(CallInst::Create(llvm_hpvm_printTimerSet,
+                         ArrayRef<Value *>(printArgs, 2), "", InsertBefore));
+}
 
-  // Get result (optional)
+static inline ConstantInt *getTimerID(Module &M, enum hpvm_TimerID timer) {
+  return ConstantInt::get(Type::getInt32Ty(M.getContext()), timer);
 }
 
-static Function* transformReturnTypeToStruct(Function* F) {
+static Function *transformReturnTypeToStruct(Function *F) {
   // Currently only works for void return types
-  DEBUG(errs() << "Transforming return type of function to Struct: " << F->getName() << "\n");
+  DEBUG(errs() << "Transforming return type of function to Struct: "
+               << F->getName() << "\n");
 
   if (isa<StructType>(F->getReturnType())) {
-    DEBUG(errs() << "Return type is already a Struct: " << F->getName() << ": " << *F->getReturnType() << "\n");
+    DEBUG(errs() << "Return type is already a Struct: " << F->getName() << ": "
+                 << *F->getReturnType() << "\n");
     return F;
   }
 
-  assert(F->getReturnType()->isVoidTy() && "Unhandled case - Only void return type handled\n");
+  assert(F->getReturnType()->isVoidTy() &&
+         "Unhandled case - Only void return type handled\n");
 
   // Create the argument type list with added argument types
-  std::vector<Type*> ArgTypes;
-  for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-      ai != ae; ++ai) {
+  std::vector<Type *> ArgTypes;
+  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
+       ai != ae; ++ai) {
     ArgTypes.push_back(ai->getType());
   }
-  
-  StructType* RetTy = StructType::create(F->getContext(), None, "emptyStruct", true);
-  FunctionType* FTy = FunctionType::get(RetTy, ArgTypes, F->isVarArg());
-  
-  SmallVector<ReturnInst*, 8> Returns;
-  Function* newF = cloneFunction(F, FTy, false, &Returns);
+
+  StructType *RetTy =
+      StructType::create(F->getContext(), None, "emptyStruct", true);
+  FunctionType *FTy = FunctionType::get(RetTy, ArgTypes, F->isVarArg());
+
+  SmallVector<ReturnInst *, 8> Returns;
+  Function *newF = cloneFunction(F, FTy, false, &Returns);
   // Replace ret void instruction with ret %RetTy undef
-  for(auto RI: Returns) {
-    DEBUG(errs() << "Found return inst: "<< *RI << "\n");
-    ReturnInst* newRI = ReturnInst::Create(newF->getContext(), UndefValue::get(RetTy));
+  for (auto &RI : Returns) {
+    DEBUG(errs() << "Found return inst: " << *RI << "\n");
+    ReturnInst *newRI =
+        ReturnInst::Create(newF->getContext(), UndefValue::get(RetTy));
     ReplaceInstWithInst(RI, newRI);
   }
 
@@ -1572,19 +922,21 @@ static Function* transformReturnTypeToStruct(Function* F) {
   return newF;
 }
 
-static Type* getReturnTypeFromReturnInst(Function* F) {
-  for(BasicBlock &BB: *F) {
-    if(ReturnInst* RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
-      DEBUG(errs() << "Return type value: " << *RI->getReturnValue()->getType() << "\n");
+static Type *getReturnTypeFromReturnInst(Function *F) {
+  for (BasicBlock &BB : *F) {
+    if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
+      DEBUG(errs() << "Return type value: " << *RI->getReturnValue()->getType()
+                   << "\n");
       return RI->getReturnValue()->getType();
     }
   }
+  return NULL;
 }
 
+char genhpvm::GenHPVM::ID = 0;
+static RegisterPass<genhpvm::GenHPVM>
+    X("genhpvm",
+      "Pass to generate HPVM IR from LLVM IR (with dummy function calls)",
+      false, false);
 
-char genvisc::GenVISC::ID = 0;
-static RegisterPass<genvisc::GenVISC> X("genvisc", "Pass to generate VISC IR from LLVM IR (with dummy function calls)", false, false);
-
-} // End of namespace genvisc
-
-
+} // End of namespace genhpvm