diff --git a/llvm/include/llvm/IR/DFGraph.h b/llvm/include/llvm/IR/DFGraph.h
index a1ac2cb45e216acb643cab0cd29b0b5e26d2d5e3..4e213c08c5d04d1e9705b27adbf653fc85737f47 100644
--- a/llvm/include/llvm/IR/DFGraph.h
+++ b/llvm/include/llvm/IR/DFGraph.h
@@ -603,8 +603,8 @@ std::vector<unsigned> DFNode::getOutArgMap() {
 }
 
 int DFNode::getAncestorHops(DFNode* N) {
-  DFNode* temp = this->getParent();
-  int hops = 1;
+  DFNode* temp = this;
+  int hops = 0;
   while (temp != NULL) {
     if(temp == N)
       return hops;
@@ -612,8 +612,8 @@ int DFNode::getAncestorHops(DFNode* N) {
     hops++;
   }
   // N not found among the ancestors
-  // Return 0 to indicate that N is not an ancestor.
-  return 0;
+  // Return -1 to indicate that N is not an ancestor.
+  return -1;
 }
 //===--------------------- DFInternalNode Outlined Functions --------------===//
 void DFInternalNode::addEdgeToDFGraph(DFEdge* E) {
diff --git a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp
index a6dd1d66282acb4f7f4218f5b768f98ec7c609c7..b3ea7d17b29a30d8e81cbda9f4a6ca89e98bfd4c 100644
--- a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp
+++ b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp
@@ -100,9 +100,11 @@ bool ClearDFG::runOnModule(Module &M) {
   Function* VI = M.getFunction("llvm.visc.init");
   VI->replaceAllUsesWith(UndefValue::get(VI->getType()));
   VI->eraseFromParent();
+
   Function* VC = M.getFunction("llvm.visc.cleanup");
   VC->replaceAllUsesWith(UndefValue::get(VC->getType()));
   VC->eraseFromParent();
+
   // Visitor for Code Generation Graph Traversal
   TreeTraversal *Visitor = new TreeTraversal(M, DFG);
 
diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index 0db9f231342fc400795b0ae67e8467e62a8f7812..c094c3db76dad53f3bcafa6ee682334efc96b29d 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -344,10 +344,10 @@ void CodeGenTraversal::initRuntimeAPI() {
   DEBUG(errs() << *llvm_visc_printTimerSet);
 
   // Insert init context in main
+  DEBUG(errs() << "Gen Code to initialize NVPTX Timer\n");
   Function* VI = M.getFunction("llvm.visc.init");
   assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
 
-  DEBUG(errs() << "Gen Code to initialize NVPTX Timer\n");
   InitCall = cast<Instruction>(*VI->use_begin());
   initializeTimerSet(InitCall);
   switchToTimer(visc_TimerID_INIT_CTX, InitCall);
@@ -355,11 +355,11 @@ void CodeGenTraversal::initRuntimeAPI() {
   switchToTimer(visc_TimerID_NONE, InitCall);
 
   // Insert print instruction at visc exit
+  DEBUG(errs() << "Gen Code to print NVPTX Timer\n");
   Function* VC = M.getFunction("llvm.visc.cleanup");
   errs() << *VC << "\n";
   assert(VC->getNumUses() == 1 && "__visc__clear should only be used once");
 
-  DEBUG(errs() << "Gen Code to print NVPTX Timer\n");
   CleanupCall = cast<Instruction>(*VC->use_begin());
   printTimerSet(CleanupCall);
 
diff --git a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
index ef63ceea0663112348df6f28e94f92f6e0217dff..774b558ca7a9865bc351919e98385b21e3a943aa 100644
--- a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
@@ -73,6 +73,10 @@ private:
   Module* runtimeModule;
   Constant* llvm_visc_x86_launch;
   Constant* llvm_visc_x86_wait;
+  Constant* llvm_visc_x86_push;
+  Constant* llvm_visc_x86_pop;
+  Constant* llvm_visc_x86_getDimLimit;
+  Constant* llvm_visc_x86_getDimInstance;
 
   Constant* llvm_visc_initializeTimerSet;
   Constant* llvm_visc_switchToTimer;
@@ -196,6 +200,22 @@ void CodeGenTraversal::initRuntimeAPI() {
                        runtimeModule->getFunction("llvm_visc_x86_wait")->getFunctionType());
   DEBUG(errs() << *llvm_visc_x86_wait);
 
+  llvm_visc_x86_push = M.getOrInsertFunction("llvm_visc_x86_push",
+                       runtimeModule->getFunction("llvm_visc_x86_push")->getFunctionType());
+  DEBUG(errs() << *llvm_visc_x86_push);
+
+  llvm_visc_x86_pop = M.getOrInsertFunction("llvm_visc_x86_pop",
+                       runtimeModule->getFunction("llvm_visc_x86_pop")->getFunctionType());
+  DEBUG(errs() << *llvm_visc_x86_pop);
+
+  llvm_visc_x86_getDimLimit = M.getOrInsertFunction("llvm_visc_x86_getDimLimit",
+                       runtimeModule->getFunction("llvm_visc_x86_getDimLimit")->getFunctionType());
+  DEBUG(errs() << *llvm_visc_x86_getDimLimit);
+
+  llvm_visc_x86_getDimInstance = M.getOrInsertFunction("llvm_visc_x86_getDimInstance",
+                       runtimeModule->getFunction("llvm_visc_x86_getDimInstance")->getFunctionType());
+  DEBUG(errs() << *llvm_visc_x86_getDimInstance);
+
   llvm_visc_initializeTimerSet = M.getOrInsertFunction("llvm_visc_initializeTimerSet",
                                  runtimeModule->getFunction("llvm_visc_initializeTimerSet")->getFunctionType());
   DEBUG(errs() << *llvm_visc_initializeTimerSet);
@@ -211,7 +231,7 @@ void CodeGenTraversal::initRuntimeAPI() {
   // Insert init context in main
   Function* VI = M.getFunction("llvm.visc.init");
   assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
-
+  DEBUG(errs() << "Inserting x86 timer initialization\n");
   Instruction* I = cast<Instruction>(*VI->use_begin());
   initializeTimerSet(I);
   switchToTimer(visc_TimerID_NONE, I);
@@ -220,6 +240,7 @@ void CodeGenTraversal::initRuntimeAPI() {
   Function* VC = M.getFunction("llvm.visc.cleanup");
   assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once");
 
+  DEBUG(errs() << "Inserting x86 timer print\n");
   I = cast<Instruction>(*VC->use_begin());
   printTimerSet(I);
 
@@ -551,6 +572,7 @@ void CodeGenTraversal::invokeChild_X86(DFNode* C, Function* F_X86,
   // Find num of dimensions this node is replicated in.
   // Based on number of dimensions, insert loop instructions
   std::string varNames[3] = {"x", "y", "z"};
+  unsigned numArgs = CI->getNumArgOperands();
   for(unsigned j=0; j < C->getNumOfDim(); j++) {
     Value* indexLimit;
     // Limit can either be a constant or an arguement of the internal node.
@@ -564,12 +586,36 @@ void CodeGenTraversal::invokeChild_X86(DFNode* C, Function* F_X86,
     assert(indexLimit && "Invalid dimension limit!");
     // Insert loop
     Value* indexVar = addLoop(CI, indexLimit, varNames[j]);
-    unsigned numArgs = CI->getNumArgOperands();
     // Insert index variable and limit arguments
     CI->setArgOperand(numArgs-6+j, indexVar);
     CI->setArgOperand(numArgs-3+j, indexLimit);
   }
-
+  // Insert call to runtime to push the dim limits and instanceID on the depth
+  // stack
+  Value* args[] = {
+                  ConstantInt::get(Type::getInt32Ty(CI->getContext()), C->getNumOfDim()), // numDim
+                  CI->getArgOperand(numArgs-3+0), // limitX
+                  CI->getArgOperand(numArgs-6+0), // iX
+                  CI->getArgOperand(numArgs-3+1), // limitY
+                  CI->getArgOperand(numArgs-6+1), // iY
+                  CI->getArgOperand(numArgs-3+2), // limitZ
+                  CI->getArgOperand(numArgs-6+2)  // iZ
+  };
+
+  CallInst* Push = CallInst::Create(llvm_visc_x86_push, ArrayRef<Value*>(args, 7), "", CI);
+  DEBUG(errs() << "Push on stack: " << *Push << "\n");
+  // Insert call to runtime to pop the dim limits and instanceID from the depth
+  // stack
+  BasicBlock::iterator i = CI;
+  Instruction* NextI = ++i;
+  // Next Instruction should also belong to the same basic block as the basic
+  // block will have a terminator instruction
+  assert(NextI->getParent() == CI->getParent()
+         && "Next Instruction should also belong to the same basic block!");
+ 
+  CallInst* Pop = CallInst::Create(llvm_visc_x86_pop, None, "", NextI);
+  DEBUG(errs() << "Pop from stack: " << *Pop << "\n");
+  errs() << *CI->getParent()->getParent();
 }
 
 void CodeGenTraversal::codeGen(DFInternalNode* N) {
@@ -773,14 +819,9 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
         // The dfnode argument should be an ancestor of this leaf node or
         // the leaf node itself
         int parentLevel = N->getAncestorHops(ArgDFNode);
-        assert(( parentLevel != 0 || ArgDFNode == (DFNode*)N )
+        assert(( parentLevel >= 0 || ArgDFNode == (DFNode*)N )
                && "Invalid DFNode argument to getNodeInstanceID_[xyz]!");
 
-        //FIXME: Not handling cases where the arg node is an ancestor
-        // To be removed later
-        assert((parentLevel == 0)
-               && "Currently not handling cases other than immediate ancestor!");
-
         // Get specified dimension
         // (dim = 0) => x
         // (dim = 1) => y
@@ -797,16 +838,32 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
         assert((numParamsF_X86 - numParamsF == 6)
                && "Difference of arguments between function and its clone is not 6!");
 
-        unsigned offset = 3 + (3-dim);
-        // Traverse argument list of F_X86 in reverse order to find the
-        // correct index or dim argument.
-        Argument* indexVal = getArgumentFromEnd(F_X86, offset);
-        assert(indexVal && "Index argument not found. Invalid offset!");
-
-        DEBUG(errs() << *II << " replaced with " << *indexVal << "\n");
-
-        II->replaceAllUsesWith(indexVal);
-        IItoRemove.push_back(II);
+        if(parentLevel == 0) {
+          // Case when the query is for this node itself
+          unsigned offset = 3 + (3-dim);
+          // Traverse argument list of F_X86 in reverse order to find the
+          // correct index or dim argument.
+          Argument* indexVal = getArgumentFromEnd(F_X86, offset);
+          assert(indexVal && "Index argument not found. Invalid offset!");
+
+          DEBUG(errs() << *II << " replaced with " << *indexVal << "\n");
+
+          II->replaceAllUsesWith(indexVal);
+          IItoRemove.push_back(II);
+        }
+        else {
+          // Case when query is for an ancestor
+          Value* args[] = {
+                          ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel),
+                          ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)
+                          };
+          CallInst* CI = CallInst::Create(llvm_visc_x86_getDimInstance,
+                                          ArrayRef<Value*>(args, 2),
+                                          "nodeInstanceID", II);
+          DEBUG(errs() << *II << " replaced with " << *CI << "\n");
+          II->replaceAllUsesWith(CI);
+          IItoRemove.push_back(II);
+        }
         break;
       }
       /********************** llvm.visc.getNumNodeInstances() *************/
@@ -820,14 +877,9 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
         // The dfnode argument should be an ancestor of this leaf node or
         // the leaf node itself
         int parentLevel = N->getAncestorHops(ArgDFNode);
-        assert(( parentLevel != 0 || ArgDFNode == (DFNode*)N )
+        assert(( parentLevel >= 0 || ArgDFNode == (DFNode*)N )
                && "Invalid DFNode argument to getNodeInstanceID_[xyz]!");
 
-        //FIXME: Not handling cases where the arg node is an ancestor
-        // To be removed later
-        assert((parentLevel == 0)
-               && "Currently not handling cases other than immediate ancestor!");
-
         // Get specified dimension
         // (dim = 0) => x
         // (dim = 1) => y
@@ -844,16 +896,32 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
         assert((numParamsF_X86 - numParamsF == 6)
                && "Difference of arguments between function and its clone is not 6!");
 
-        unsigned offset = 3 - dim;
-        // Traverse argument list of F_X86 in reverse order to find the
-        // correct index or dim argument.
-        Argument* limitVal = getArgumentFromEnd(F_X86, offset);
-        assert(limitVal && "Limit argument not found. Invalid offset!");
-
-        DEBUG(errs() << *II << " replaced with " <<  *limitVal << "\n");
-
-        II->replaceAllUsesWith(limitVal);
-        IItoRemove.push_back(II);
+        if(parentLevel == 0) {
+          // Case when the query is for this node itself
+          unsigned offset = 3 - dim;
+          // Traverse argument list of F_X86 in reverse order to find the
+          // correct index or dim argument.
+          Argument* limitVal = getArgumentFromEnd(F_X86, offset);
+          assert(limitVal && "Limit argument not found. Invalid offset!");
+
+          DEBUG(errs() << *II << " replaced with " <<  *limitVal << "\n");
+
+          II->replaceAllUsesWith(limitVal);
+          IItoRemove.push_back(II);
+        }
+        else {
+          // Case when query is from the ancestor
+           Value* args[] = {
+                          ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel),
+                          ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)
+                          };
+          CallInst* CI = CallInst::Create(llvm_visc_x86_getDimLimit,
+                                          ArrayRef<Value*>(args, 2),
+                                          "numNodeInstances", II);
+          DEBUG(errs() << *II << " replaced with " << *CI << "\n");
+          II->replaceAllUsesWith(CI);
+          IItoRemove.push_back(II);
+        }
 
         break;
       }
@@ -890,24 +958,20 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
 }
 
 void CodeGenTraversal::initializeTimerSet(Instruction* InsertBefore) {
-  Value* TimerSetAddr;
-  StoreInst* SI;
+  DEBUG(errs() << "Inserting call to: " << *llvm_visc_initializeTimerSet << "\n");
   TIMER(TimerSet = new GlobalVariable(M,
                                       Type::getInt8PtrTy(M.getContext()),
                                       false,
                                       GlobalValue::CommonLinkage,
                                       Constant::getNullValue(Type::getInt8PtrTy(M.getContext())),
-                                      "viscTimerSet_X86"));
-  DEBUG(errs() << "Inserting GV: " << *TimerSet->getType() << *TimerSet << "\n");
-  DEBUG(errs() << "Inserting call to: " << *llvm_visc_initializeTimerSet << "\n");
-
-  TIMER(TimerSetAddr = CallInst::Create(llvm_visc_initializeTimerSet,
-                                        None,
-                                        "",
-                                        InsertBefore));
-  DEBUG(errs() << "TimerSetAddress = " << *TimerSetAddr << "\n");
-  TIMER(SI = new StoreInst(TimerSetAddr, TimerSet, InsertBefore));
-  DEBUG(errs() << "Store Timer Address in Global variable: " << *SI << "\n");
+                                      "viscTimerSet_X86");
+
+    Value* TimerSetAddr = CallInst::Create(llvm_visc_initializeTimerSet,
+                                          None,
+                                          "",
+                                          InsertBefore);
+    StoreInst* SI = new StoreInst(TimerSetAddr, TimerSet, InsertBefore);
+  );
 }
 
 void CodeGenTraversal::switchToTimer(enum visc_TimerID timer, Instruction* InsertBefore) {
diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
index deafb9e359df2d32d4ebe0ed9890a5eac3eb07d0..75197cb1f5e50287964625bc3d969de52559af81 100644
--- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp
+++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
@@ -409,22 +409,22 @@ bool GenVISC::runOnModule(Module &M) {
     DEBUG(errs() << *llvm_visc_printTimerSet);
 
 
-  DEBUG(errs() << "-------- Searching for launch sites ----------\n");
   // Insert init context in main
-    Function* VI = M.getFunction("__visc__init");
-    assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
-    Instruction* I = cast<Instruction>(*VI->use_begin());
-    initializeTimerSet(I);
-    switchToTimer(visc_TimerID_NONE, I);
-  
-    // Insert print instruction at visc exit
-    Function* VC = M.getFunction("__visc__cleanup");
-    assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once"); 
-    I = cast<Instruction>(*VC->use_begin());
-    printTimerSet(I);
+  Function* VI = M.getFunction("__visc__init");
+  assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
+  Instruction* I = cast<Instruction>(*VI->use_begin());
+  initializeTimerSet(I);
+  switchToTimer(visc_TimerID_NONE, I);
 
+  // Insert print instruction at visc exit
+  Function* VC = M.getFunction("__visc__cleanup");
+  assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once"); 
+  I = cast<Instruction>(*VC->use_begin());
+  printTimerSet(I);
 
 
+  DEBUG(errs() << "-------- Searching for launch sites ----------\n");
+
   std::vector<Instruction*> toBeErased;
   // Iterate over all functions in the module
   for (Module::iterator mi = M.begin(), me = M.end(); mi != me; ++mi) {
diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp
index bf9ab3898809cb45d5ff07eeba74507a1074e7c2..2a6676c1f456eae79b8c580761467fa353be9739 100644
--- a/llvm/projects/visc-rt/visc-rt.cpp
+++ b/llvm/projects/visc-rt/visc-rt.cpp
@@ -36,6 +36,7 @@ cl_context globalGPUContext;
 cl_command_queue globalCommandQue;
 
 MemTracker MTracker;
+vector<DFGDepth> DStack;
 
 static inline void checkErr(cl_int err, cl_int success, const char * name) {
   if (err != success) {
@@ -44,6 +45,38 @@ static inline void checkErr(cl_int err, cl_int success, const char * name) {
   }
 }
 
+/************************* Depth Stack Routines ***************************/
+
+void llvm_visc_x86_push(unsigned n, unsigned limitX, unsigned iX, unsigned limitY,
+    unsigned iY, unsigned limitZ, unsigned iZ) {
+  DEBUG(cout << "Pushing node information on stack:\n");
+  DEBUG(cout << "\tNumDim = " << n << "\t Limit(" << limitX << ", " << limitY << ", "<< limitZ <<")\n");
+  DEBUG(cout << "\tInstance(" << iX << ", " << iY << ", "<< iZ <<")\n");
+  DFGDepth nodeInfo (n, limitX, iX, limitY, iY, limitZ, iZ);
+  DStack.push_back(nodeInfo);
+  DEBUG(cout << "DStack size = " << DStack.size() << "\n");
+}
+
+void llvm_visc_x86_pop() {
+  DEBUG(cout << "Popping from depth stack\n");
+  DStack.pop_back();
+  DEBUG(cout << "DStack size = " << DStack.size() << "\n");
+}
+
+unsigned llvm_visc_x86_getDimLimit(unsigned level, unsigned dim) {
+  DEBUG(cout << "Request limit for dim " << dim << " of ancestor " << level <<"\n");
+  unsigned size = DStack.size();
+  DEBUG(cout << "\t Return: " << DStack[size-level-1].getDimLimit(dim) <<"\n");
+  return DStack[size-level-1].getDimLimit(dim);
+}
+
+unsigned llvm_visc_x86_getDimInstance(unsigned level, unsigned dim) {
+  DEBUG(cout << "Request instance id for dim " << dim << " of ancestor " << level <<"\n");
+  unsigned size = DStack.size();
+  DEBUG(cout << "\t Return: " << DStack[size-level-1].getDimInstance(dim) <<"\n");
+  return DStack[size-level-1].getDimInstance(dim);
+}
+
 /********************** Memory Tracking Routines **************************/
 
 void llvm_visc_track_mem(void* ptr, size_t size) {
diff --git a/llvm/projects/visc-rt/visc-rt.h b/llvm/projects/visc-rt/visc-rt.h
index 1608ed00bf73f7378a87f6fde127260c1d9269c2..db999bd6c5bd91725ac7ba979efbb1b5f9fca847 100644
--- a/llvm/projects/visc-rt/visc-rt.h
+++ b/llvm/projects/visc-rt/visc-rt.h
@@ -10,11 +10,66 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/SupportVISC/VISCTimer.h"
 
-/********************* Memory Tracker **********************************/
 using namespace std;
 
 extern "C" {
 
+/********************* DFG Depth Stack **********************************/
+class DFGDepth {
+  private:
+    unsigned numDim;
+    unsigned dimLimit[3];
+    unsigned dimInstance[3];
+  public:
+    DFGDepth() {}
+    DFGDepth(unsigned n, unsigned dimX = 0, unsigned iX = 0, unsigned dimY = 0, unsigned iY = 0,
+        unsigned dimZ = 0, unsigned iZ = 0) {
+      assert(n <= 3 && "Error! More than 3 dimensions not supported");
+      numDim = n;
+      dimLimit[0] = dimX;
+      dimLimit[1] = dimY;
+      dimLimit[2] = dimZ;
+      dimInstance[0] = iX;
+      dimInstance[1] = iY;
+      dimInstance[2] = iZ;
+    }
+    unsigned getDimLimit(unsigned dim) {
+      assert(dim <= numDim && "Error! Requested dimension limit is not specified");
+      return dimLimit[dim];
+    }
+
+    unsigned getDimInstance(unsigned dim) {
+      assert(dim <= numDim && "Error! Requested dimension instance is not specified");
+      return dimInstance[dim];
+    }
+
+    unsigned getNumDim() {
+      return numDim;
+    }
+};
+
+void llvm_visc_x86_push(unsigned n, unsigned limitX = 0, unsigned iX = 0,
+    unsigned limitY = 0, unsigned iY = 0, unsigned limitZ = 0, unsigned iZ = 0);
+void llvm_visc_x86_pop();
+unsigned llvm_visc_x86_getDimLimit(unsigned level, unsigned dim);
+unsigned llvm_visc_x86_getDimInstance(unsigned level, unsigned dim);
+
+//class DFGDepthStack {
+  //private:
+    //vector<DFGDepth> Stack;
+  //public:
+    //DFGDepthStack() {
+    //}
+
+    //void push(DFGDepth D) {
+      //Stack.push_back(D);
+    //}
+    //void pop() {
+      //Stack.pop_back();
+    //}
+//};
+
+/********************* Memory Tracker **********************************/
 class MemTrackerEntry {
 public:
   enum Location {HOST, DEVICE};
diff --git a/llvm/test/VISC/parboil/common/mk/visc.mk b/llvm/test/VISC/parboil/common/mk/visc.mk
index 58f032a734b4a674af7100a2d077c031c9c36456..8e14b9cb57ca4b2b0ece54a0d97222261b22a323 100644
--- a/llvm/test/VISC/parboil/common/mk/visc.mk
+++ b/llvm/test/VISC/parboil/common/mk/visc.mk
@@ -19,7 +19,12 @@ LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx--nvidiacl.bc
 TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce
 KERNEL_GEN_FLAGS = -O3 -target nvptx
 
-VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG
+ifeq ($(TARGET),x86)
+  VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG
+else
+  VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG
+endif
+
 HOST_LINKFLAGS =
 
 ifeq ($(TIMER),x86)
@@ -30,7 +35,11 @@ else ifeq ($(TIMER),gen)
   TESTGEN_OPTFLAGS += -visc-timers-gen
 else ifeq ($(TIMER),no)
 else
-  VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx
+  ifeq ($(TARGET),x86)
+    VISC_OPTFLAGS += -visc-timers-x86
+  else
+    VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx
+  endif
   TESTGEN_OPTFLAGS += -visc-timers-gen
 endif
 
@@ -79,9 +88,11 @@ endif
 OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
 TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS))
 PARBOIL_OBJS = $(call INBUILDDIR,parboil.ll)
-KERNEL_LINKED = $(BUILDDIR)/$(APP).kernels.linked.ll
-KERNEL = $(TEST_OBJS).kernels.ll
-PTX_ASSEMBLY = $(TEST_OBJS).nvptx.s
+ifneq ($(TARGET),x86)
+  KERNEL_LINKED = $(BUILDDIR)/$(APP).kernels.linked.ll
+  KERNEL = $(TEST_OBJS).kernels.ll
+  PTX_ASSEMBLY = $(TEST_OBJS).nvptx.s
+endif
 HOST_LINKED = $(BUILDDIR)/$(APP).linked.ll
 HOST = $(BUILDDIR)/$(APP).host.ll
 APP_BINS = $(PTX_ASSEMBLY) $(BIN)
@@ -95,6 +106,7 @@ endif
 ########################################
 
 default: $(FAILSAFE) $(BUILDDIR) $(PTX_ASSEMBLY) $(BIN)
+#default: $(FAILSAFE) $(BUILDDIR) $(BIN)
 
 run : $(RUNDIR)
 	echo "Resolving OpenCL library..."
diff --git a/llvm/test/VISC/unitTests/3level.ll b/llvm/test/VISC/unitTests/3level.ll
index 340020ee54a3fd6fd265240ddad1948e2ebc08b8..168e7b42322c8f7fa4be83a64cbd06d44dd9e428 100644
--- a/llvm/test/VISC/unitTests/3level.ll
+++ b/llvm/test/VISC/unitTests/3level.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/3level.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -12,6 +12,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -41,6 +47,7 @@ entry:
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
+  call void @llvm.visc.init()
   %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
   %args = bitcast %struct.arg* %in.addr to i8*
@@ -51,6 +58,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output1 = extractvalue %rtype %outputstruct, 0
   %output2 = extractvalue %rtype %outputstruct, 1
+  call void @llvm.visc.cleanup()
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output1) #0
   %call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output2) #0
   ret i32 0
diff --git a/llvm/test/VISC/unitTests/query2D.ll b/llvm/test/VISC/unitTests/query2D.ll
index 6d2bb9cea2649cee3b2955c3c5744520f60944cf..c994c2a3ff5b166b2f192f4b900982b3b7afc508 100644
--- a/llvm/test/VISC/unitTests/query2D.ll
+++ b/llvm/test/VISC/unitTests/query2D.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/query2D.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -55,6 +61,7 @@ entry:
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
+  call void @llvm.visc.init()
   %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
   %args = bitcast %struct.arg* %in.addr to i8*
@@ -64,6 +71,7 @@ entry:
   %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
+  call void @llvm.visc.cleanup()
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
   ret i32 0
 }
diff --git a/llvm/test/VISC/unitTests/query3D.ll b/llvm/test/VISC/unitTests/query3D.ll
index 923a29c51ec847b50a9ff76ae077777d19de9e29..438fe60a3bc6c2dfe718da76d55041addc47367f 100644
--- a/llvm/test/VISC/unitTests/query3D.ll
+++ b/llvm/test/VISC/unitTests/query3D.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/query3D.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -56,6 +62,7 @@ declare i32 @llvm.visc.getNumNodeInstances.y(i8*) #0
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -71,6 +78,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/queryNodeInst.ll b/llvm/test/VISC/unitTests/queryNodeInst.ll
index 6572ae36339f596685c24d2537d6477eec3ca754..24d6a3f0d30e6661c0f1396e082f889d54dc50be 100644
--- a/llvm/test/VISC/unitTests/queryNodeInst.ll
+++ b/llvm/test/VISC/unitTests/queryNodeInst.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -39,6 +45,7 @@ declare i32 @llvm.visc.getNumDims(i8*) #0
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -54,6 +61,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/queryNumDim.ll b/llvm/test/VISC/unitTests/queryNumDim.ll
index 21de1ded737277e5071a60d0630a9023ccf8948c..500e2ff41bd52f29a56cfd49563927bf6323482b 100644
--- a/llvm/test/VISC/unitTests/queryNumDim.ll
+++ b/llvm/test/VISC/unitTests/queryNumDim.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -41,6 +47,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -56,6 +63,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/queryNumNodeInst.ll b/llvm/test/VISC/unitTests/queryNumNodeInst.ll
index ae96791491d0663e9055857c897ed4d5153b6c12..48add92f16125bdf33c9691896a8b7259339fe78 100644
--- a/llvm/test/VISC/unitTests/queryNumNodeInst.ll
+++ b/llvm/test/VISC/unitTests/queryNumNodeInst.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -47,6 +53,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -62,6 +69,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/singleNode.ll b/llvm/test/VISC/unitTests/singleNode.ll
index 94bf7314f20f61c23fa359e9c606a500b3345986..20713e955fb457acec2e2968d1b4a2ae61396fe0 100644
--- a/llvm/test/VISC/unitTests/singleNode.ll
+++ b/llvm/test/VISC/unitTests/singleNode.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/singleNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -32,6 +38,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -41,6 +48,7 @@ entry:
   %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype ()* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
   call void @llvm.visc.wait(i8* %graphID)
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/twoLaunch.ll b/llvm/test/VISC/unitTests/twoLaunch.ll
index e28b8677d7e757fc59d19ea1ec9d803b552da5e1..48c973a7e6f1cc5422fffd8d9e4ae0a0e1a06bf9 100644
--- a/llvm/test/VISC/unitTests/twoLaunch.ll
+++ b/llvm/test/VISC/unitTests/twoLaunch.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/singleNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -32,6 +38,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr_1 = alloca %struct.arg
   %in.addr_2= alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
@@ -45,6 +52,7 @@ entry:
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
   call void @llvm.visc.wait(i8* %graphID_1)
   call void @llvm.visc.wait(i8* %graphID_2)
+  call void @llvm.visc.cleanup()
 
   ret i32 0
 }
diff --git a/llvm/test/VISC/unitTests/twoNode.ll b/llvm/test/VISC/unitTests/twoNode.ll
index 3fcc9a353206c4ce84fb6558b6751437227bf981..5e2899830b835ff50c9d2d8e4157451d4bd26f7f 100644
--- a/llvm/test/VISC/unitTests/twoNode.ll
+++ b/llvm/test/VISC/unitTests/twoNode.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -10,6 +10,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -31,6 +37,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -42,6 +49,7 @@ entry:
   %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
   call void @llvm.visc.wait(i8* %graphID)
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/twoNodeConnect.ll b/llvm/test/VISC/unitTests/twoNodeConnect.ll
index e61636c71ab3fd8dbf7a6dca210bdaa04636048e..06652b94e02c2cac66ab4a07e88dec0a04da49f8 100644
--- a/llvm/test/VISC/unitTests/twoNodeConnect.ll
+++ b/llvm/test/VISC/unitTests/twoNodeConnect.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeConnect.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -10,6 +10,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -31,6 +37,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -46,6 +53,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/twoNodeQuery.ll b/llvm/test/VISC/unitTests/twoNodeQuery.ll
index a9df546b24c7d788144fc983a794acba068881e1..2e1ea0dba4659d92b9c1b0600732748c87571671 100644
--- a/llvm/test/VISC/unitTests/twoNodeQuery.ll
+++ b/llvm/test/VISC/unitTests/twoNodeQuery.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeQuery.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -10,6 +10,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -40,6 +46,7 @@ declare i32 @llvm.visc.getNumDims(i8*) #0
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -55,6 +62,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }