From 65414066711aa303595b8355348a0208f3f99501 Mon Sep 17 00:00:00 2001
From: Prakalp Srivastava <prakalps@gmail.com>
Date: Thu, 18 Jun 2015 11:44:05 -0500
Subject: [PATCH] (1) Implemented stack in runtime to keep dynamic node
 instance info, in case the code enquires about its ancestor. Enables to run
 visc parboil benchmarks on x86 alone

(2) Modified unittests to have llvm.visc.init and cleanup intrinsics. Now they
all pass

(3) Modified visc.mk to easily compile visc version for just x86. Pass
TARGET=x86 as option to make command
---
 llvm/include/llvm/IR/DFGraph.h                |   8 +-
 llvm/lib/Transforms/ClearDFG/ClearDFG.cpp     |   2 +
 .../DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp         |   4 +-
 .../Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp  | 160 ++++++++++++------
 llvm/lib/Transforms/GenVISC/GenVISC.cpp       |  24 +--
 llvm/projects/visc-rt/visc-rt.cpp             |  33 ++++
 llvm/projects/visc-rt/visc-rt.h               |  57 ++++++-
 llvm/test/VISC/parboil/common/mk/visc.mk      |  22 ++-
 llvm/test/VISC/unitTests/3level.ll            |  10 +-
 llvm/test/VISC/unitTests/query2D.ll           |  10 +-
 llvm/test/VISC/unitTests/query3D.ll           |  10 +-
 llvm/test/VISC/unitTests/queryNodeInst.ll     |  10 +-
 llvm/test/VISC/unitTests/queryNumDim.ll       |  10 +-
 llvm/test/VISC/unitTests/queryNumNodeInst.ll  |  10 +-
 llvm/test/VISC/unitTests/singleNode.ll        |  10 +-
 llvm/test/VISC/unitTests/twoLaunch.ll         |  10 +-
 llvm/test/VISC/unitTests/twoNode.ll           |  10 +-
 llvm/test/VISC/unitTests/twoNodeConnect.ll    |  10 +-
 llvm/test/VISC/unitTests/twoNodeQuery.ll      |  10 +-
 19 files changed, 337 insertions(+), 83 deletions(-)

diff --git a/llvm/include/llvm/IR/DFGraph.h b/llvm/include/llvm/IR/DFGraph.h
index a1ac2cb45e..4e213c08c5 100644
--- a/llvm/include/llvm/IR/DFGraph.h
+++ b/llvm/include/llvm/IR/DFGraph.h
@@ -603,8 +603,8 @@ std::vector<unsigned> DFNode::getOutArgMap() {
 }
 
 int DFNode::getAncestorHops(DFNode* N) {
-  DFNode* temp = this->getParent();
-  int hops = 1;
+  DFNode* temp = this;
+  int hops = 0;
   while (temp != NULL) {
     if(temp == N)
       return hops;
@@ -612,8 +612,8 @@ int DFNode::getAncestorHops(DFNode* N) {
     hops++;
   }
   // N not found among the ancestors
-  // Return 0 to indicate that N is not an ancestor.
-  return 0;
+  // Return -1 to indicate that N is not an ancestor.
+  return -1;
 }
 //===--------------------- DFInternalNode Outlined Functions --------------===//
 void DFInternalNode::addEdgeToDFGraph(DFEdge* E) {
diff --git a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp
index a6dd1d6628..b3ea7d17b2 100644
--- a/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp
+++ b/llvm/lib/Transforms/ClearDFG/ClearDFG.cpp
@@ -100,9 +100,11 @@ bool ClearDFG::runOnModule(Module &M) {
   Function* VI = M.getFunction("llvm.visc.init");
   VI->replaceAllUsesWith(UndefValue::get(VI->getType()));
   VI->eraseFromParent();
+
   Function* VC = M.getFunction("llvm.visc.cleanup");
   VC->replaceAllUsesWith(UndefValue::get(VC->getType()));
   VC->eraseFromParent();
+
   // Visitor for Code Generation Graph Traversal
   TreeTraversal *Visitor = new TreeTraversal(M, DFG);
 
diff --git a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
index 0db9f23134..c094c3db76 100644
--- a/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_NVPTX/DFG2LLVM_NVPTX.cpp
@@ -344,10 +344,10 @@ void CodeGenTraversal::initRuntimeAPI() {
   DEBUG(errs() << *llvm_visc_printTimerSet);
 
   // Insert init context in main
+  DEBUG(errs() << "Gen Code to initialize NVPTX Timer\n");
   Function* VI = M.getFunction("llvm.visc.init");
   assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
 
-  DEBUG(errs() << "Gen Code to initialize NVPTX Timer\n");
   InitCall = cast<Instruction>(*VI->use_begin());
   initializeTimerSet(InitCall);
   switchToTimer(visc_TimerID_INIT_CTX, InitCall);
@@ -355,11 +355,11 @@ void CodeGenTraversal::initRuntimeAPI() {
   switchToTimer(visc_TimerID_NONE, InitCall);
 
   // Insert print instruction at visc exit
+  DEBUG(errs() << "Gen Code to print NVPTX Timer\n");
   Function* VC = M.getFunction("llvm.visc.cleanup");
   errs() << *VC << "\n";
   assert(VC->getNumUses() == 1 && "__visc__clear should only be used once");
 
-  DEBUG(errs() << "Gen Code to print NVPTX Timer\n");
   CleanupCall = cast<Instruction>(*VC->use_begin());
   printTimerSet(CleanupCall);
 
diff --git a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
index ef63ceea06..774b558ca7 100644
--- a/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_X86/DFG2LLVM_X86.cpp
@@ -73,6 +73,10 @@ private:
   Module* runtimeModule;
   Constant* llvm_visc_x86_launch;
   Constant* llvm_visc_x86_wait;
+  Constant* llvm_visc_x86_push;
+  Constant* llvm_visc_x86_pop;
+  Constant* llvm_visc_x86_getDimLimit;
+  Constant* llvm_visc_x86_getDimInstance;
 
   Constant* llvm_visc_initializeTimerSet;
   Constant* llvm_visc_switchToTimer;
@@ -196,6 +200,22 @@ void CodeGenTraversal::initRuntimeAPI() {
                        runtimeModule->getFunction("llvm_visc_x86_wait")->getFunctionType());
   DEBUG(errs() << *llvm_visc_x86_wait);
 
+  llvm_visc_x86_push = M.getOrInsertFunction("llvm_visc_x86_push",
+                       runtimeModule->getFunction("llvm_visc_x86_push")->getFunctionType());
+  DEBUG(errs() << *llvm_visc_x86_push);
+
+  llvm_visc_x86_pop = M.getOrInsertFunction("llvm_visc_x86_pop",
+                       runtimeModule->getFunction("llvm_visc_x86_pop")->getFunctionType());
+  DEBUG(errs() << *llvm_visc_x86_pop);
+
+  llvm_visc_x86_getDimLimit = M.getOrInsertFunction("llvm_visc_x86_getDimLimit",
+                       runtimeModule->getFunction("llvm_visc_x86_getDimLimit")->getFunctionType());
+  DEBUG(errs() << *llvm_visc_x86_getDimLimit);
+
+  llvm_visc_x86_getDimInstance = M.getOrInsertFunction("llvm_visc_x86_getDimInstance",
+                       runtimeModule->getFunction("llvm_visc_x86_getDimInstance")->getFunctionType());
+  DEBUG(errs() << *llvm_visc_x86_getDimInstance);
+
   llvm_visc_initializeTimerSet = M.getOrInsertFunction("llvm_visc_initializeTimerSet",
                                  runtimeModule->getFunction("llvm_visc_initializeTimerSet")->getFunctionType());
   DEBUG(errs() << *llvm_visc_initializeTimerSet);
@@ -211,7 +231,7 @@ void CodeGenTraversal::initRuntimeAPI() {
   // Insert init context in main
   Function* VI = M.getFunction("llvm.visc.init");
   assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
-
+  DEBUG(errs() << "Inserting x86 timer initialization\n");
   Instruction* I = cast<Instruction>(*VI->use_begin());
   initializeTimerSet(I);
   switchToTimer(visc_TimerID_NONE, I);
@@ -220,6 +240,7 @@ void CodeGenTraversal::initRuntimeAPI() {
   Function* VC = M.getFunction("llvm.visc.cleanup");
   assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once");
 
+  DEBUG(errs() << "Inserting x86 timer print\n");
   I = cast<Instruction>(*VC->use_begin());
   printTimerSet(I);
 
@@ -551,6 +572,7 @@ void CodeGenTraversal::invokeChild_X86(DFNode* C, Function* F_X86,
   // Find num of dimensions this node is replicated in.
   // Based on number of dimensions, insert loop instructions
   std::string varNames[3] = {"x", "y", "z"};
+  unsigned numArgs = CI->getNumArgOperands();
   for(unsigned j=0; j < C->getNumOfDim(); j++) {
     Value* indexLimit;
     // Limit can either be a constant or an arguement of the internal node.
@@ -564,12 +586,36 @@ void CodeGenTraversal::invokeChild_X86(DFNode* C, Function* F_X86,
     assert(indexLimit && "Invalid dimension limit!");
     // Insert loop
     Value* indexVar = addLoop(CI, indexLimit, varNames[j]);
-    unsigned numArgs = CI->getNumArgOperands();
     // Insert index variable and limit arguments
     CI->setArgOperand(numArgs-6+j, indexVar);
     CI->setArgOperand(numArgs-3+j, indexLimit);
   }
-
+  // Insert call to runtime to push the dim limits and instanceID on the depth
+  // stack
+  Value* args[] = {
+                  ConstantInt::get(Type::getInt32Ty(CI->getContext()), C->getNumOfDim()), // numDim
+                  CI->getArgOperand(numArgs-3+0), // limitX
+                  CI->getArgOperand(numArgs-6+0), // iX
+                  CI->getArgOperand(numArgs-3+1), // limitY
+                  CI->getArgOperand(numArgs-6+1), // iY
+                  CI->getArgOperand(numArgs-3+2), // limitZ
+                  CI->getArgOperand(numArgs-6+2)  // iZ
+  };
+
+  CallInst* Push = CallInst::Create(llvm_visc_x86_push, ArrayRef<Value*>(args, 7), "", CI);
+  DEBUG(errs() << "Push on stack: " << *Push << "\n");
+  // Insert call to runtime to pop the dim limits and instanceID from the depth
+  // stack
+  BasicBlock::iterator i = CI;
+  Instruction* NextI = ++i;
+  // Next Instruction should also belong to the same basic block as the basic
+  // block will have a terminator instruction
+  assert(NextI->getParent() == CI->getParent()
+         && "Next Instruction should also belong to the same basic block!");
+ 
+  CallInst* Pop = CallInst::Create(llvm_visc_x86_pop, None, "", NextI);
+  DEBUG(errs() << "Pop from stack: " << *Pop << "\n");
+  errs() << *CI->getParent()->getParent();
 }
 
 void CodeGenTraversal::codeGen(DFInternalNode* N) {
@@ -773,14 +819,9 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
         // The dfnode argument should be an ancestor of this leaf node or
         // the leaf node itself
         int parentLevel = N->getAncestorHops(ArgDFNode);
-        assert(( parentLevel != 0 || ArgDFNode == (DFNode*)N )
+        assert(( parentLevel >= 0 || ArgDFNode == (DFNode*)N )
                && "Invalid DFNode argument to getNodeInstanceID_[xyz]!");
 
-        //FIXME: Not handling cases where the arg node is an ancestor
-        // To be removed later
-        assert((parentLevel == 0)
-               && "Currently not handling cases other than immediate ancestor!");
-
         // Get specified dimension
         // (dim = 0) => x
         // (dim = 1) => y
@@ -797,16 +838,32 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
         assert((numParamsF_X86 - numParamsF == 6)
                && "Difference of arguments between function and its clone is not 6!");
 
-        unsigned offset = 3 + (3-dim);
-        // Traverse argument list of F_X86 in reverse order to find the
-        // correct index or dim argument.
-        Argument* indexVal = getArgumentFromEnd(F_X86, offset);
-        assert(indexVal && "Index argument not found. Invalid offset!");
-
-        DEBUG(errs() << *II << " replaced with " << *indexVal << "\n");
-
-        II->replaceAllUsesWith(indexVal);
-        IItoRemove.push_back(II);
+        if(parentLevel == 0) {
+          // Case when the query is for this node itself
+          unsigned offset = 3 + (3-dim);
+          // Traverse argument list of F_X86 in reverse order to find the
+          // correct index or dim argument.
+          Argument* indexVal = getArgumentFromEnd(F_X86, offset);
+          assert(indexVal && "Index argument not found. Invalid offset!");
+
+          DEBUG(errs() << *II << " replaced with " << *indexVal << "\n");
+
+          II->replaceAllUsesWith(indexVal);
+          IItoRemove.push_back(II);
+        }
+        else {
+          // Case when query is for an ancestor
+          Value* args[] = {
+                          ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel),
+                          ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)
+                          };
+          CallInst* CI = CallInst::Create(llvm_visc_x86_getDimInstance,
+                                          ArrayRef<Value*>(args, 2),
+                                          "nodeInstanceID", II);
+          DEBUG(errs() << *II << " replaced with " << *CI << "\n");
+          II->replaceAllUsesWith(CI);
+          IItoRemove.push_back(II);
+        }
         break;
       }
       /********************** llvm.visc.getNumNodeInstances() *************/
@@ -820,14 +877,9 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
         // The dfnode argument should be an ancestor of this leaf node or
         // the leaf node itself
         int parentLevel = N->getAncestorHops(ArgDFNode);
-        assert(( parentLevel != 0 || ArgDFNode == (DFNode*)N )
+        assert(( parentLevel >= 0 || ArgDFNode == (DFNode*)N )
                && "Invalid DFNode argument to getNodeInstanceID_[xyz]!");
 
-        //FIXME: Not handling cases where the arg node is an ancestor
-        // To be removed later
-        assert((parentLevel == 0)
-               && "Currently not handling cases other than immediate ancestor!");
-
         // Get specified dimension
         // (dim = 0) => x
         // (dim = 1) => y
@@ -844,16 +896,32 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
         assert((numParamsF_X86 - numParamsF == 6)
                && "Difference of arguments between function and its clone is not 6!");
 
-        unsigned offset = 3 - dim;
-        // Traverse argument list of F_X86 in reverse order to find the
-        // correct index or dim argument.
-        Argument* limitVal = getArgumentFromEnd(F_X86, offset);
-        assert(limitVal && "Limit argument not found. Invalid offset!");
-
-        DEBUG(errs() << *II << " replaced with " <<  *limitVal << "\n");
-
-        II->replaceAllUsesWith(limitVal);
-        IItoRemove.push_back(II);
+        if(parentLevel == 0) {
+          // Case when the query is for this node itself
+          unsigned offset = 3 - dim;
+          // Traverse argument list of F_X86 in reverse order to find the
+          // correct index or dim argument.
+          Argument* limitVal = getArgumentFromEnd(F_X86, offset);
+          assert(limitVal && "Limit argument not found. Invalid offset!");
+
+          DEBUG(errs() << *II << " replaced with " <<  *limitVal << "\n");
+
+          II->replaceAllUsesWith(limitVal);
+          IItoRemove.push_back(II);
+        }
+        else {
+          // Case when query is from the ancestor
+           Value* args[] = {
+                          ConstantInt::get(Type::getInt32Ty(II->getContext()), parentLevel),
+                          ConstantInt::get(Type::getInt32Ty(II->getContext()), dim)
+                          };
+          CallInst* CI = CallInst::Create(llvm_visc_x86_getDimLimit,
+                                          ArrayRef<Value*>(args, 2),
+                                          "numNodeInstances", II);
+          DEBUG(errs() << *II << " replaced with " << *CI << "\n");
+          II->replaceAllUsesWith(CI);
+          IItoRemove.push_back(II);
+        }
 
         break;
       }
@@ -890,24 +958,20 @@ void CodeGenTraversal::codeGen(DFLeafNode* N) {
 }
 
 void CodeGenTraversal::initializeTimerSet(Instruction* InsertBefore) {
-  Value* TimerSetAddr;
-  StoreInst* SI;
+  DEBUG(errs() << "Inserting call to: " << *llvm_visc_initializeTimerSet << "\n");
   TIMER(TimerSet = new GlobalVariable(M,
                                       Type::getInt8PtrTy(M.getContext()),
                                       false,
                                       GlobalValue::CommonLinkage,
                                       Constant::getNullValue(Type::getInt8PtrTy(M.getContext())),
-                                      "viscTimerSet_X86"));
-  DEBUG(errs() << "Inserting GV: " << *TimerSet->getType() << *TimerSet << "\n");
-  DEBUG(errs() << "Inserting call to: " << *llvm_visc_initializeTimerSet << "\n");
-
-  TIMER(TimerSetAddr = CallInst::Create(llvm_visc_initializeTimerSet,
-                                        None,
-                                        "",
-                                        InsertBefore));
-  DEBUG(errs() << "TimerSetAddress = " << *TimerSetAddr << "\n");
-  TIMER(SI = new StoreInst(TimerSetAddr, TimerSet, InsertBefore));
-  DEBUG(errs() << "Store Timer Address in Global variable: " << *SI << "\n");
+                                      "viscTimerSet_X86");
+
+    Value* TimerSetAddr = CallInst::Create(llvm_visc_initializeTimerSet,
+                                          None,
+                                          "",
+                                          InsertBefore);
+    StoreInst* SI = new StoreInst(TimerSetAddr, TimerSet, InsertBefore);
+  );
 }
 
 void CodeGenTraversal::switchToTimer(enum visc_TimerID timer, Instruction* InsertBefore) {
diff --git a/llvm/lib/Transforms/GenVISC/GenVISC.cpp b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
index deafb9e359..75197cb1f5 100644
--- a/llvm/lib/Transforms/GenVISC/GenVISC.cpp
+++ b/llvm/lib/Transforms/GenVISC/GenVISC.cpp
@@ -409,22 +409,22 @@ bool GenVISC::runOnModule(Module &M) {
     DEBUG(errs() << *llvm_visc_printTimerSet);
 
 
-  DEBUG(errs() << "-------- Searching for launch sites ----------\n");
   // Insert init context in main
-    Function* VI = M.getFunction("__visc__init");
-    assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
-    Instruction* I = cast<Instruction>(*VI->use_begin());
-    initializeTimerSet(I);
-    switchToTimer(visc_TimerID_NONE, I);
-  
-    // Insert print instruction at visc exit
-    Function* VC = M.getFunction("__visc__cleanup");
-    assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once"); 
-    I = cast<Instruction>(*VC->use_begin());
-    printTimerSet(I);
+  Function* VI = M.getFunction("__visc__init");
+  assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
+  Instruction* I = cast<Instruction>(*VI->use_begin());
+  initializeTimerSet(I);
+  switchToTimer(visc_TimerID_NONE, I);
 
+  // Insert print instruction at visc exit
+  Function* VC = M.getFunction("__visc__cleanup");
+  assert(VC->getNumUses() == 1 && "__visc__cleanup should only be used once"); 
+  I = cast<Instruction>(*VC->use_begin());
+  printTimerSet(I);
 
 
+  DEBUG(errs() << "-------- Searching for launch sites ----------\n");
+
   std::vector<Instruction*> toBeErased;
   // Iterate over all functions in the module
   for (Module::iterator mi = M.begin(), me = M.end(); mi != me; ++mi) {
diff --git a/llvm/projects/visc-rt/visc-rt.cpp b/llvm/projects/visc-rt/visc-rt.cpp
index bf9ab38988..2a6676c1f4 100644
--- a/llvm/projects/visc-rt/visc-rt.cpp
+++ b/llvm/projects/visc-rt/visc-rt.cpp
@@ -36,6 +36,7 @@ cl_context globalGPUContext;
 cl_command_queue globalCommandQue;
 
 MemTracker MTracker;
+vector<DFGDepth> DStack;
 
 static inline void checkErr(cl_int err, cl_int success, const char * name) {
   if (err != success) {
@@ -44,6 +45,38 @@ static inline void checkErr(cl_int err, cl_int success, const char * name) {
   }
 }
 
+/************************* Depth Stack Routines ***************************/
+
+void llvm_visc_x86_push(unsigned n, unsigned limitX, unsigned iX, unsigned limitY,
+    unsigned iY, unsigned limitZ, unsigned iZ) {
+  DEBUG(cout << "Pushing node information on stack:\n");
+  DEBUG(cout << "\tNumDim = " << n << "\t Limit(" << limitX << ", " << limitY << ", "<< limitZ <<")\n");
+  DEBUG(cout << "\tInstance(" << iX << ", " << iY << ", "<< iZ <<")\n");
+  DFGDepth nodeInfo (n, limitX, iX, limitY, iY, limitZ, iZ);
+  DStack.push_back(nodeInfo);
+  DEBUG(cout << "DStack size = " << DStack.size() << "\n");
+}
+
+void llvm_visc_x86_pop() {
+  DEBUG(cout << "Popping from depth stack\n");
+  DStack.pop_back();
+  DEBUG(cout << "DStack size = " << DStack.size() << "\n");
+}
+
+unsigned llvm_visc_x86_getDimLimit(unsigned level, unsigned dim) {
+  DEBUG(cout << "Request limit for dim " << dim << " of ancestor " << level <<"\n");
+  unsigned size = DStack.size();
+  DEBUG(cout << "\t Return: " << DStack[size-level-1].getDimLimit(dim) <<"\n");
+  return DStack[size-level-1].getDimLimit(dim);
+}
+
+unsigned llvm_visc_x86_getDimInstance(unsigned level, unsigned dim) {
+  DEBUG(cout << "Request instance id for dim " << dim << " of ancestor " << level <<"\n");
+  unsigned size = DStack.size();
+  DEBUG(cout << "\t Return: " << DStack[size-level-1].getDimInstance(dim) <<"\n");
+  return DStack[size-level-1].getDimInstance(dim);
+}
+
 /********************** Memory Tracking Routines **************************/
 
 void llvm_visc_track_mem(void* ptr, size_t size) {
diff --git a/llvm/projects/visc-rt/visc-rt.h b/llvm/projects/visc-rt/visc-rt.h
index 1608ed00bf..db999bd6c5 100644
--- a/llvm/projects/visc-rt/visc-rt.h
+++ b/llvm/projects/visc-rt/visc-rt.h
@@ -10,11 +10,66 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/SupportVISC/VISCTimer.h"
 
-/********************* Memory Tracker **********************************/
 using namespace std;
 
 extern "C" {
 
+/********************* DFG Depth Stack **********************************/
+class DFGDepth {
+  private:
+    unsigned numDim;
+    unsigned dimLimit[3];
+    unsigned dimInstance[3];
+  public:
+    DFGDepth() {}
+    DFGDepth(unsigned n, unsigned dimX = 0, unsigned iX = 0, unsigned dimY = 0, unsigned iY = 0,
+        unsigned dimZ = 0, unsigned iZ = 0) {
+      assert(n <= 3 && "Error! More than 3 dimensions not supported");
+      numDim = n;
+      dimLimit[0] = dimX;
+      dimLimit[1] = dimY;
+      dimLimit[2] = dimZ;
+      dimInstance[0] = iX;
+      dimInstance[1] = iY;
+      dimInstance[2] = iZ;
+    }
+    unsigned getDimLimit(unsigned dim) {
+      assert(dim <= numDim && "Error! Requested dimension limit is not specified");
+      return dimLimit[dim];
+    }
+
+    unsigned getDimInstance(unsigned dim) {
+      assert(dim <= numDim && "Error! Requested dimension instance is not specified");
+      return dimInstance[dim];
+    }
+
+    unsigned getNumDim() {
+      return numDim;
+    }
+};
+
+void llvm_visc_x86_push(unsigned n, unsigned limitX = 0, unsigned iX = 0,
+    unsigned limitY = 0, unsigned iY = 0, unsigned limitZ = 0, unsigned iZ = 0);
+void llvm_visc_x86_pop();
+unsigned llvm_visc_x86_getDimLimit(unsigned level, unsigned dim);
+unsigned llvm_visc_x86_getDimInstance(unsigned level, unsigned dim);
+
+//class DFGDepthStack {
+  //private:
+    //vector<DFGDepth> Stack;
+  //public:
+    //DFGDepthStack() {
+    //}
+
+    //void push(DFGDepth D) {
+      //Stack.push_back(D);
+    //}
+    //void pop() {
+      //Stack.pop_back();
+    //}
+//};
+
+/********************* Memory Tracker **********************************/
 class MemTrackerEntry {
 public:
   enum Location {HOST, DEVICE};
diff --git a/llvm/test/VISC/parboil/common/mk/visc.mk b/llvm/test/VISC/parboil/common/mk/visc.mk
index 58f032a734..8e14b9cb57 100644
--- a/llvm/test/VISC/parboil/common/mk/visc.mk
+++ b/llvm/test/VISC/parboil/common/mk/visc.mk
@@ -19,7 +19,12 @@ LIBCLC_NVPTX_LIB = $(LIBCLC_LIB_PATH)/nvptx--nvidiacl.bc
 TESTGEN_OPTFLAGS = -load LLVMGenVISC.so -genvisc -globaldce
 KERNEL_GEN_FLAGS = -O3 -target nvptx
 
-VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG
+ifeq ($(TARGET),x86)
+  VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG
+else
+  VISC_OPTFLAGS = -load LLVMBuildDFG.so -load LLVMDFG2LLVM_NVPTX.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-nvptx -dfg2llvm-x86 -clearDFG
+endif
+
 HOST_LINKFLAGS =
 
 ifeq ($(TIMER),x86)
@@ -30,7 +35,11 @@ else ifeq ($(TIMER),gen)
   TESTGEN_OPTFLAGS += -visc-timers-gen
 else ifeq ($(TIMER),no)
 else
-  VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx
+  ifeq ($(TARGET),x86)
+    VISC_OPTFLAGS += -visc-timers-x86
+  else
+    VISC_OPTFLAGS += -visc-timers-x86 -visc-timers-ptx
+  endif
   TESTGEN_OPTFLAGS += -visc-timers-gen
 endif
 
@@ -79,9 +88,11 @@ endif
 OBJS = $(call INBUILDDIR,$(SRCDIR_OBJS))
 TEST_OBJS = $(call INBUILDDIR,$(VISC_OBJS))
 PARBOIL_OBJS = $(call INBUILDDIR,parboil.ll)
-KERNEL_LINKED = $(BUILDDIR)/$(APP).kernels.linked.ll
-KERNEL = $(TEST_OBJS).kernels.ll
-PTX_ASSEMBLY = $(TEST_OBJS).nvptx.s
+ifneq ($(TARGET),x86)
+  KERNEL_LINKED = $(BUILDDIR)/$(APP).kernels.linked.ll
+  KERNEL = $(TEST_OBJS).kernels.ll
+  PTX_ASSEMBLY = $(TEST_OBJS).nvptx.s
+endif
 HOST_LINKED = $(BUILDDIR)/$(APP).linked.ll
 HOST = $(BUILDDIR)/$(APP).host.ll
 APP_BINS = $(PTX_ASSEMBLY) $(BIN)
@@ -95,6 +106,7 @@ endif
 ########################################
 
 default: $(FAILSAFE) $(BUILDDIR) $(PTX_ASSEMBLY) $(BIN)
+#default: $(FAILSAFE) $(BUILDDIR) $(BIN)
 
 run : $(RUNDIR)
 	echo "Resolving OpenCL library..."
diff --git a/llvm/test/VISC/unitTests/3level.ll b/llvm/test/VISC/unitTests/3level.ll
index 340020ee54..168e7b4232 100644
--- a/llvm/test/VISC/unitTests/3level.ll
+++ b/llvm/test/VISC/unitTests/3level.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/3level.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -12,6 +12,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -41,6 +47,7 @@ entry:
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
+  call void @llvm.visc.init()
   %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
   %args = bitcast %struct.arg* %in.addr to i8*
@@ -51,6 +58,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output1 = extractvalue %rtype %outputstruct, 0
   %output2 = extractvalue %rtype %outputstruct, 1
+  call void @llvm.visc.cleanup()
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output1) #0
   %call3 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output2) #0
   ret i32 0
diff --git a/llvm/test/VISC/unitTests/query2D.ll b/llvm/test/VISC/unitTests/query2D.ll
index 6d2bb9cea2..c994c2a3ff 100644
--- a/llvm/test/VISC/unitTests/query2D.ll
+++ b/llvm/test/VISC/unitTests/query2D.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/query2D.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -55,6 +61,7 @@ entry:
   %0 = load i8** %arrayidx, align 8, !tbaa !0
   %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #0
   %conv.i = trunc i64 %call.i to i32
+  call void @llvm.visc.init()
   %1 = bitcast %struct.arg* %in.addr to i32*
   store i32 %conv.i, i32* %1
   %args = bitcast %struct.arg* %in.addr to i8*
@@ -64,6 +71,7 @@ entry:
   %2 = getelementptr %struct.arg* %in.addr, i32 0, i32 1
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
+  call void @llvm.visc.cleanup()
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
   ret i32 0
 }
diff --git a/llvm/test/VISC/unitTests/query3D.ll b/llvm/test/VISC/unitTests/query3D.ll
index 923a29c51e..438fe60a3b 100644
--- a/llvm/test/VISC/unitTests/query3D.ll
+++ b/llvm/test/VISC/unitTests/query3D.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/query3D.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -56,6 +62,7 @@ declare i32 @llvm.visc.getNumNodeInstances.y(i8*) #0
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -71,6 +78,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/queryNodeInst.ll b/llvm/test/VISC/unitTests/queryNodeInst.ll
index 6572ae3633..24d6a3f0d3 100644
--- a/llvm/test/VISC/unitTests/queryNodeInst.ll
+++ b/llvm/test/VISC/unitTests/queryNodeInst.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -39,6 +45,7 @@ declare i32 @llvm.visc.getNumDims(i8*) #0
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -54,6 +61,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/queryNumDim.ll b/llvm/test/VISC/unitTests/queryNumDim.ll
index 21de1ded73..500e2ff41b 100644
--- a/llvm/test/VISC/unitTests/queryNumDim.ll
+++ b/llvm/test/VISC/unitTests/queryNumDim.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -41,6 +47,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -56,6 +63,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/queryNumNodeInst.ll b/llvm/test/VISC/unitTests/queryNumNodeInst.ll
index ae96791491..48add92f16 100644
--- a/llvm/test/VISC/unitTests/queryNumNodeInst.ll
+++ b/llvm/test/VISC/unitTests/queryNumNodeInst.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -47,6 +53,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -62,6 +69,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/singleNode.ll b/llvm/test/VISC/unitTests/singleNode.ll
index 94bf7314f2..20713e955f 100644
--- a/llvm/test/VISC/unitTests/singleNode.ll
+++ b/llvm/test/VISC/unitTests/singleNode.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/singleNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -32,6 +38,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -41,6 +48,7 @@ entry:
   %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype ()* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
   call void @llvm.visc.wait(i8* %graphID)
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/twoLaunch.ll b/llvm/test/VISC/unitTests/twoLaunch.ll
index e28b8677d7..48c973a7e6 100644
--- a/llvm/test/VISC/unitTests/twoLaunch.ll
+++ b/llvm/test/VISC/unitTests/twoLaunch.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/singleNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -11,6 +11,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -32,6 +38,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr_1 = alloca %struct.arg
   %in.addr_2= alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
@@ -45,6 +52,7 @@ entry:
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
   call void @llvm.visc.wait(i8* %graphID_1)
   call void @llvm.visc.wait(i8* %graphID_2)
+  call void @llvm.visc.cleanup()
 
   ret i32 0
 }
diff --git a/llvm/test/VISC/unitTests/twoNode.ll b/llvm/test/VISC/unitTests/twoNode.ll
index 3fcc9a3532..5e2899830b 100644
--- a/llvm/test/VISC/unitTests/twoNode.ll
+++ b/llvm/test/VISC/unitTests/twoNode.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNode.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -10,6 +10,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -31,6 +37,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -42,6 +49,7 @@ entry:
   %graphID = call i8* @llvm.visc.launch(i8* bitcast (%rtype (i32)* @Root to i8*), i8* %args)
   %call1 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %conv.i) #0
   call void @llvm.visc.wait(i8* %graphID)
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/twoNodeConnect.ll b/llvm/test/VISC/unitTests/twoNodeConnect.ll
index e61636c71a..06652b94e0 100644
--- a/llvm/test/VISC/unitTests/twoNodeConnect.ll
+++ b/llvm/test/VISC/unitTests/twoNodeConnect.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeConnect.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -10,6 +10,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -31,6 +37,7 @@ declare void @llvm.visc.bind.output(i8*, i32, i32)
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -46,6 +53,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
diff --git a/llvm/test/VISC/unitTests/twoNodeQuery.ll b/llvm/test/VISC/unitTests/twoNodeQuery.ll
index a9df546b24..2e1ea0dba4 100644
--- a/llvm/test/VISC/unitTests/twoNodeQuery.ll
+++ b/llvm/test/VISC/unitTests/twoNodeQuery.ll
@@ -1,6 +1,6 @@
 ; RUN: opt -load LLVMBuildDFG.so -load LLVMDFG2LLVM_X86.so -load LLVMClearDFG.so -dfg2llvm-x86 -clearDFG -o %t.ll -S < %s
 ; RUN: llvm-link %t.ll ~/current-src/projects/visc-rt/visc-rt.ll -S -o %t.linked.ll
-; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -o %t.bin
+; RUN: clang++ -O3 %t.linked.ll -lpthread -lOpenCL -lrt -o %t.bin
 ; RUN: %t.bin 5
 ; ModuleID = '/home/psrivas2/current-test/unitTests/twoNodeQuery.ll'
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -10,6 +10,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @.str = private unnamed_addr constant [4 x i8] c"%d\0A\00", align 1
 
+; Function Attrs: nounwind
+declare void @llvm.visc.init() #1
+
+; Function Attrs: nounwind
+declare void @llvm.visc.cleanup() #1
+
 ; Function Attrs: nounwind
 declare i8* @llvm.visc.createNode(i8*) #0
 
@@ -40,6 +46,7 @@ declare i32 @llvm.visc.getNumDims(i8*) #0
 ; Function Attrs: nounwind uwtable
 define i32 @main(i32 %argc, i8** nocapture %argv) #1 {
 entry:
+  call void @llvm.visc.init()
   %in.addr = alloca %struct.arg
   %arrayidx = getelementptr inbounds i8** %argv, i64 1
   %0 = load i8** %arrayidx, align 8, !tbaa !0
@@ -55,6 +62,7 @@ entry:
   %outputstruct = load %rtype* %2
   %output = extractvalue %rtype %outputstruct, 0
   %call2 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i32 %output) #0
+  call void @llvm.visc.cleanup()
   ret i32 0
 }
 
-- 
GitLab