diff --git a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp
index a8c807704a89853d54cc6b7c01efbc98deef78f2..a21bba52177a803018df501ccd5d7476f8155c15 100644
--- a/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp
+++ b/hpvm/lib/Transforms/BuildDFG/BuildDFG.cpp
@@ -32,20 +32,20 @@ using namespace llvm;
 namespace builddfg {
 
 bool BuildDFG::runOnModule(Module &M) {
-  DEBUG(errs() << "\nBUILDDFG PASS\n");
-  DEBUG(errs() << "-------- Searching for launch sites ----------\n");
+  //DEBUG(errs() << "\nBUILDDFG PASS\n");
+  //DEBUG(errs() << "-------- Searching for launch sites ----------\n");
 
   IntrinsicInst *II;
 
   // Iterate over all functions in the module
   for (auto &Func : M) {
     Function *F = &Func;
-    DEBUG(errs() << "Function: " << F->getName() << "\n");
+    //DEBUG(errs() << "Function: " << F->getName() << "\n");
 
     for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
       Instruction *I = &*i; // Grab pointer to Instruction
       if (isHPVMLaunchIntrinsic(I)) {
-        DEBUG(errs() << "------------ Found launch site --------------\n");
+       // DEBUG(errs() << "------------ Found launch site --------------\n");
         II = cast<IntrinsicInst>(I);
 
         assert(II && "Launch intrinsic not recognized.");
@@ -53,8 +53,8 @@ bool BuildDFG::runOnModule(Module &M) {
         // Intrinsic Instruction has been initialized from this point on.
         Function *F = cast<Function>(II->getOperand(0)->stripPointerCasts());
         Root = DFInternalNode::Create(II, F, hpvmUtils::getPreferredTarget(F));
-        errs() << "INTRINSIC: " << II << "\n";
-        errs() << "ROOT NODE" << Root << "\n";
+        //errs() << "INTRINSIC: " << II << "\n";
+        //errs() << "ROOT NODE" << Root << "\n";
         Roots.push_back(Root);
         BuildGraph(Root, F);
 
@@ -62,14 +62,14 @@ bool BuildDFG::runOnModule(Module &M) {
                                         e = Root->getChildGraph()->end();
              i != e; i++) {
           DFNode *N = *i;
-          DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n");
+          //DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n");
         }
         Root->getChildGraph()->sortChildren();
         for (DFGraph::children_iterator i = Root->getChildGraph()->begin(),
                                         e = Root->getChildGraph()->end();
              i != e; i++) {
           DFNode *N = *i;
-          DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n");
+          //DEBUG(errs() << "\t" << N->getFuncPointer()->getName() << "\n");
         }
         viewDFGraph(Root->getChildGraph());
       }
@@ -176,9 +176,9 @@ bool BuildDFG::isTypeCongruent(Type *L, Type *R) {
 
 // Handles all the createNodeXX hpvm intrinsics.
 void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) {
-  errs() << "************ HANDLE CREATE NODE *********\n";
-  II->print(errs());
-  errs() << "\n";
+  //errs() << "************ HANDLE CREATE NODE *********\n";
+  //II->print(errs());
+  //errs() << "\n";
   bool isInternalNode = false;
 
   Function *F = cast<Function>((II->getOperand(0))->stripPointerCasts());
@@ -211,7 +211,7 @@ void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) {
     // dataflow graph
     DFInternalNode *childDFNode = DFInternalNode::Create(
         II, F, hpvmUtils::getPreferredTarget(F), N, numOfDim, dimLimits);
-    errs() << "INTERNAL NODE: " << childDFNode << "\n";
+    //errs() << "INTERNAL NODE: " << childDFNode << "\n";
     N->addChildToDFGraph(childDFNode);
     HandleToDFNodeMap[II] = childDFNode;
     BuildGraph(childDFNode, F);
@@ -219,26 +219,26 @@ void BuildDFG::handleCreateNode(DFInternalNode *N, IntrinsicInst *II) {
     // Create Leaf DFnode and add it to the map.
     DFLeafNode *childDFNode = DFLeafNode::Create(
         II, F, hpvmUtils::getPreferredTarget(F), N, numOfDim, dimLimits);
-    errs() << "LEAF NODE: " << childDFNode << "\n";
+    //errs() << "LEAF NODE: " << childDFNode << "\n";
     N->addChildToDFGraph(childDFNode);
     HandleToDFNodeMap[II] = childDFNode;
   }
 }
 
 void BuildDFG::handleCreateEdge(DFInternalNode *N, IntrinsicInst *II) {
-  errs() << "************ HANDLE CREATE EDGE *********\n";
-  II->print(errs());
-  errs() << "\n";
+  //errs() << "************ HANDLE CREATE EDGE *********\n";
+  //II->print(errs());
+  //errs() << "\n";
   // The DFNode structures must be in the map before the edge is processed
   HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0));
   assert(DFI != HandleToDFNodeMap.end());
   DFI = HandleToDFNodeMap.find(II->getOperand(1));
   assert(DFI != HandleToDFNodeMap.end());
  
-  errs() << "NODE TO MAP OPERAND 0: " << II->getOperand(0) << "\n";
-  errs() << "NODE TO MAP OPERAND 1: " << II->getOperand(1) << "\n";
-  errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n";
-  errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(1)] << "\n";
+  //errs() << "NODE TO MAP OPERAND 0: " << II->getOperand(0) << "\n";
+  //errs() << "NODE TO MAP OPERAND 1: " << II->getOperand(1) << "\n";
+  //errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n";
+  //errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(1)] << "\n";
   DFNode *SrcDF = HandleToDFNodeMap[II->getOperand(0)];
   DFNode *DestDF = HandleToDFNodeMap[II->getOperand(1)];
 
@@ -272,23 +272,23 @@ void BuildDFG::handleCreateEdge(DFInternalNode *N, IntrinsicInst *II) {
                                      DestPosition, DestTy, isStreaming);
 
   HandleToDFEdgeMap[II] = newDFEdge;
-   errs() << "NEW EDGE: " << newDFEdge << "\n";
+   //errs() << "NEW EDGE: " << newDFEdge << "\n";
 
   // Add Edge to the dataflow graph associated with the parent node
   N->addEdgeToDFGraph(newDFEdge);
 }
 
 void BuildDFG::handleBindInput(DFInternalNode *N, IntrinsicInst *II) {
-  errs() << "************ HANDLE BIND INPUT *********\n";
-  II->print(errs());
-  errs() << "\n";
+  //errs() << "************ HANDLE BIND INPUT *********\n";
+  //II->print(errs());
+  //errs() << "\n";
   // The DFNode structures must be in the map before the edge is processed
   HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0));
   assert(DFI != HandleToDFNodeMap.end());
   
-   errs() << "NODE TP MAP: " << II->getOperand(0) << "\n";
-  errs() << "SRC NODE: " << N->getChildGraph()->getEntry() << "\n";
-  errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n";
+   //errs() << "NODE TP MAP: " << II->getOperand(0) << "\n";
+  //errs() << "SRC NODE: " << N->getChildGraph()->getEntry() << "\n";
+  //errs() << "DEST NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n";
   DFNode *SrcDF = N->getChildGraph()->getEntry();
   DFNode *DestDF = HandleToDFNodeMap[II->getOperand(0)];
 
@@ -319,23 +319,23 @@ void BuildDFG::handleBindInput(DFInternalNode *N, IntrinsicInst *II) {
                                      DestPosition, DestTy, isStreaming);
 
   HandleToDFEdgeMap[II] = newDFEdge;
-   errs() << "NEW EDGE: " << newDFEdge << "\n";
+  // errs() << "NEW EDGE: " << newDFEdge << "\n";
 
   // Add Edge to the dataflow graph associated with the parent node
   N->addEdgeToDFGraph(newDFEdge);
 }
 
 void BuildDFG::handleBindOutput(DFInternalNode *N, IntrinsicInst *II) {
-  errs() << "************ HANDLE BIND OUTPUT *********\n";
-  II->print(errs());
-  errs() << "\n";
+  //errs() << "************ HANDLE BIND OUTPUT *********\n";
+  //II->print(errs());
+  //errs() << "\n";
   // The DFNode structures must be in the map before the edge is processed
   HandleToDFNode::iterator DFI = HandleToDFNodeMap.find(II->getOperand(0));
   assert(DFI != HandleToDFNodeMap.end());
   
-  errs() << "NODE TP MAP: " << II->getOperand(0) << "\n"; 
-  errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n";
-  errs() << "DEST NODE: " << N->getChildGraph()->getExit() << "\n";
+  //errs() << "NODE TP MAP: " << II->getOperand(0) << "\n"; 
+  //errs() << "SRC NODE: " << HandleToDFNodeMap[II->getOperand(0)] << "\n";
+  //errs() << "DEST NODE: " << N->getChildGraph()->getExit() << "\n";
   DFNode *SrcDF = HandleToDFNodeMap[II->getOperand(0)];
   DFNode *DestDF = N->getChildGraph()->getExit();
 
@@ -366,14 +366,14 @@ void BuildDFG::handleBindOutput(DFInternalNode *N, IntrinsicInst *II) {
                                      DestPosition, DestTy, isStreaming);
 
   HandleToDFEdgeMap[II] = newDFEdge;
-  errs() << "NEW EDGE: " << newDFEdge << "\n";
+  //errs() << "NEW EDGE: " << newDFEdge << "\n";
 
   // Add Edge to the dataflow graph associated with the parent node
   N->addEdgeToDFGraph(newDFEdge);
 }
 
 void BuildDFG::BuildGraph(DFInternalNode *N, Function *F) {
-  DEBUG(errs() << "FUNCTION: " << F->getName() << "\n");
+  //DEBUG(errs() << "FUNCTION: " << F->getName() << "\n");
   // TODO: Place checks for valid hpvm functions. For example one of the
   // check can be that any function that contains hpvm dataflow graph
   // construction intrinsics should not have other llvm IR statements.
@@ -382,10 +382,10 @@ void BuildDFG::BuildGraph(DFInternalNode *N, Function *F) {
   // intrinsics.
   for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
     Instruction *I = &*i; // Grab pointer to Instruction
-    DEBUG(errs() << *I << "\n");
+   // DEBUG(errs() << *I << "\n");
     if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
-      DEBUG(errs() << "IntrinsicID = " << II->getIntrinsicID() << ": "
-                   << II->getCalledFunction()->getName() << "\n");
+     // DEBUG(errs() << "IntrinsicID = " << II->getIntrinsicID() << ": "
+       //            << II->getCalledFunction()->getName() << "\n");
       switch (II->getIntrinsicID()) {
       case Intrinsic::hpvm_createNode:
       case Intrinsic::hpvm_createNode1D:
@@ -406,15 +406,15 @@ void BuildDFG::BuildGraph(DFInternalNode *N, Function *F) {
       // TODO: Reconsider launch within a dataflow graph (recursion?)
       case Intrinsic::hpvm_wait:
       case Intrinsic::hpvm_launch:
-        DEBUG(errs()
-              << "Error: Launch/wait intrinsic used within a dataflow graph\n\t"
-              << *II << "\n");
+       // DEBUG(errs()
+         //     << "Error: Launch/wait intrinsic used within a dataflow graph\n\t"
+           //   << *II << "\n");
         break;
 
       default:
-        DEBUG(
-            errs() << "Error: Invalid HPVM Intrinsic inside Internal node!\n\t"
-                   << *II << "\n");
+        //DEBUG(
+          //  errs() << "Error: Invalid HPVM Intrinsic inside Internal node!\n\t"
+            //       << *II << "\n");
         break;
       }
       continue;
diff --git a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
index de9c025c0e7e996b6abfaa8748adf6688d04d10d..349614f01948db52911c6760e1e99117c06e1bf5 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_CPU/DFG2LLVM_CPU.cpp
@@ -13,7 +13,9 @@
 //===----------------------------------------------------------------------===//
 
 #define DEBUG_TYPE "DFG2LLVM_CPU"
+
 #include "SupportHPVM/DFG2LLVM.h"
+
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/InstIterator.h"
@@ -1426,14 +1428,56 @@ void CGT_CPU::codeGen(DFLeafNode *N) {
                  << " : skipping it\n");
 
     switch (N->getTag()) {
-    case hpvm::GPU_TARGET:
-      // A leaf node should not have an cpu function for GPU
-      // by design of DFG2LLVM_OpenCL backend
-      assert(!(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET)) &&
+     case hpvm::GPU_TARGET:
+     {
+       // A leaf node should not have an cpu function for GPU
+       // by design of DFG2LLVM_OpenCL backend
+       assert(!(N->hasCPUGenFuncForTarget(hpvm::GPU_TARGET)) &&
              "Leaf node not expected to have GPU GenFunc");
-      break;
-    default:
-      break;
+       break;
+     }
+     case hpvm::CUDNN_TARGET:
+     { 
+       errs() << "CUDNN hint found. Store CUDNN function as CPU funtion.\n";
+       // Make sure there is a generated CPU function for cudnn
+       assert(N->getGenFuncForTarget(hpvm::CUDNN_TARGET) && "");
+       assert(N->hasCPUGenFuncForTarget(hpvm::CUDNN_TARGET) && "");
+       // Store the CUDNN x86 function as the CPU generated function
+       Function *Ftmp = N->getGenFuncForTarget(N->getTag());
+       // after adding the required number of arguments
+       if (!N->getParent()->isChildGraphStreaming()) {
+         Ftmp = addIdxDimArgs(Ftmp);
+        }
+
+        N->removeGenFuncForTarget(hpvm::CUDNN_TARGET);
+        N->setTag(hpvm::None);
+        N->addGenFunc(Ftmp, hpvm::CPU_TARGET, true);
+        N->setTag(hpvm::CPU_TARGET);
+        break; 
+     }
+     case hpvm::PROMISE_TARGET: 
+     {
+       errs() << "Promise hint found. Store PROMISE function as CPU funtion.\n";
+       // Make sure there is a generated x86 function for promise
+       assert(N->getGenFuncForTarget(hpvm::PROMISE_TARGET) && "");
+       assert(N->hasCPUGenFuncForTarget(hpvm::PROMISE_TARGET) && "");
+       // Store the PROMISE x86 function as the CPU generated function
+       Function *Ftmp = N->getGenFuncForTarget(N->getTag());
+       // after adding the required number of arguments
+       if (!N->getParent()->isChildGraphStreaming()) {
+         Ftmp = addIdxDimArgs(Ftmp);
+       }
+
+       N->setTag(hpvm::None);
+       N->removeGenFuncForTarget(hpvm::PROMISE_TARGET);
+       N->addGenFunc(Ftmp, hpvm::CPU_TARGET, true);
+       N->setTag(hpvm::CPU_TARGET);
+       break;
+     }
+     default:
+     {
+       break;
+     }
     }
 
     return;
diff --git a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
index dce9f25319f6fd75d72c16cd847e98fe44b5a793..8b2570fdad9f43be73fa6682d4954413375a5041 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
@@ -143,7 +143,7 @@ void CGT_CUDNN::initRuntimeAPI() {
 
   // FIXME: set correct path
   Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
+  Twine runtimeAPI = llvmSrcRoot+"/tools/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
   runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
   if(runtimeModule == nullptr)
     DEBUG(errs() << Err.getMessage());
diff --git a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
index aa310291a13acafdbad38a4269a848470c892bc2..294f9ac5742d561e79b8b81b3d88c22d3b156bcd 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
@@ -40,7 +40,7 @@ namespace {
 
 cl::opt<std::string> QuantizationInputsFilename(
   "quantization-levels-filename",
-  cl::desc("<CPU quantization levels input file (path)>"),
+  cl::desc("<PROMISE quantization levels input file (path)>"),
   cl::value_desc("filename"),
   cl::Required);
 
@@ -280,6 +280,7 @@ public:
   
 void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
+    errs() << "INITIAL STATE\n";
     switch (II->getIntrinsicID()) {
       case Intrinsic::hpvm_tensor_convolution:
         {
@@ -288,6 +289,7 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
         Mch->addArgument(II->getOperand(1)); // conv kernel
 
         Mch->setCurrent(new ConvolutionLayer_1());
+        errs() << "TO CONVOLUTION LAYER 1\n";
         }
         break;
       case Intrinsic::hpvm_tensor_mul:
@@ -297,6 +299,7 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
         Mch->addArgument(II->getOperand(1)); // 2nd gemm input
 
         Mch->setCurrent(new FullyConnectedLayer_1());
+        errs() << "TO FULLY CONNECTED LAYER 1\n";
         }
         break;
 
@@ -321,6 +324,7 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
 
 	 Mch->addIntrinsicToRemove(II);
 	 Mch->setCurrent(new InitialState());
+         errs() << "TO INIT STATE\n";
         }
         break;
 	
@@ -328,24 +332,30 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
         {
         Mch->addIntrinsicInst(II);
         Mch->setCurrent(new SingleTensorOperation());
+        errs() << "TO SINGLE OP\n";
         }
         break;
     }
     delete this;
-  } // else {} // No HPVM intrinsic received. Remain at initial 
+  } // else {} // No HPVM intrinsic received. Remain at initial
+  errs() << "TO NO CHANGE\n"; 
 }
 
 void SingleTensorOperation::transition(CodeGenStateMachine *Mch,
                                        IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
+    errs() << "SINGLE TENSOR OP\n";
     Mch->setCurrent(new NoPattern());
+    errs() << "TO NO PATTERN\n";
     delete this;
   }
+  errs() << "NO CHANGE\n";
 }
 
 void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
                                        IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
+    errs() << "FULLY CONNECTED LAYER 1\n";
     switch (II->getIntrinsicID()) {
       case Intrinsic::hpvm_tensor_add:
         {
@@ -357,14 +367,17 @@ void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
         Mch->addArgument(II->getOperand(1));     // bias
 
         Mch->setCurrent(new FullyConnectedLayer_2());
+         errs() << "TO FULLY CONNECTED LAYER 2\n";
         }
         break;
       default:
         Mch->setCurrent(new NoPattern());
+        errs() << "TO NO PATERN\n";
         break;
     }
   } else {
     Mch->setCurrent(new NoPattern());
+    errs() << "TO NO PATERN\n";
   }
   delete this;
 }
@@ -372,6 +385,7 @@ void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
 void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
                                        IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
+    errs() << "FULLY CONNECTED LAYER 2\n";
     switch (II->getIntrinsicID()) {
       case Intrinsic::hpvm_tensor_tanh:
         {
@@ -382,6 +396,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new FullyConnectedLayer_3());
+        errs() << "TO FULLY CONNECTED LAYER 3\n";
         }
         break;
       case Intrinsic::hpvm_tensor_relu:
@@ -393,6 +408,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new FullyConnectedLayer_3());
+        errs() << "TO FULLY CONNECTED LAYER 3\n";
         }
         break;
       case Intrinsic::hpvm_tensor_clipped_relu:
@@ -404,10 +420,12 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new FullyConnectedLayer_3());
+        errs() << "TO FULLY CONNECTED LAYER 3\n";
         }
         break;
       default: // No activation, but HPVM intrinsic
         Mch->setCurrent(new NoPattern());
+        errs() << "TO NO PATTERN\n";
         break;
     }
   } else { // End of instruction stream
@@ -416,6 +434,7 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
                      Type::getInt32Ty(Mch->getModule()->getContext()), -1));
 
     Mch->setCurrent(new FullyConnectedLayer());
+    errs() << "TO FULLY CONNECTED LAYER\n";
   }
   delete this;
 }
@@ -423,9 +442,12 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
 void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch,
                                        IntrinsicInst *II) {
   if (!II) { // End of instruction stream
+    errs() << "FULLY CONNECTED LAYER 3\n";
     Mch->setCurrent(new FullyConnectedLayer());
+    errs() << "TO FULLY CONNECTED LAYER\n";
   } else {
     Mch->setCurrent(new NoPattern());
+    errs() << "TO NO PATTERN\n";
   }
   delete this;
 }
@@ -433,14 +455,18 @@ void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch,
 void FullyConnectedLayer::transition(CodeGenStateMachine *Mch,
                                      IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
+    errs() << "FULLY CONNECTED LAYER\n";
     Mch->setCurrent(new NoPattern());
+     errs() << "TO NO PATTERN\n";
     delete this;
   }
+   errs() << "TO NO CHANGE\n";
 }
 
 void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
                                     IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
+    errs() << "CONVOLUTION LAYER 1\n";
     switch (II->getIntrinsicID()) {
       case Intrinsic::hpvm_tensor_add:
         {
@@ -457,10 +483,12 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
         Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
 
         Mch->setCurrent(new ConvolutionLayer_2());
+         errs() << "TO CONVOLUTION LAYER 2\n";
         }
         break;
       default:
         Mch->setCurrent(new NoPattern());
+        errs() << "TO NO PATTERN\n";
         break;
     }
   } else {
@@ -497,6 +525,7 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
                      Type::getInt32Ty(Mch->getModule()->getContext()), -1));
 
     Mch->setCurrent(new ConvolutionLayer());
+   errs() << "TO CONVOLUTION LAYER\n";
   }
   delete this;
 }
@@ -504,6 +533,7 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
 void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
                                     IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
+    errs() << "CONVOLUTION LAYER 2\n";
     switch (II->getIntrinsicID()) {
       case Intrinsic::hpvm_tensor_tanh:
         {
@@ -513,6 +543,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new ConvolutionLayer_3());
+        errs() << "TO CONVOLUTION LAYER 3\n";
         }
         break;
       case Intrinsic::hpvm_tensor_relu:
@@ -523,6 +554,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new ConvolutionLayer_3());
+        errs() << "TO CONVOLUTION LAYER 3\n";
         }
         break;
       case Intrinsic::hpvm_tensor_clipped_relu:
@@ -533,6 +565,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new ConvolutionLayer_3());
+        errs() << "TO CONVOLUTION LAYER 3\n";
         }
         break;
       case Intrinsic::hpvm_tensor_pool_max:
@@ -551,6 +584,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new ConvolutionLayer_4());
+        errs() << "TO CONVOLUTION LAYER 4\n";
         }
         break;
       case Intrinsic::hpvm_tensor_pool_min:
@@ -569,6 +603,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new ConvolutionLayer_4());
+        errs() << "TO CONVOLUTION LAYER 4\n";
         }
         break;
       case Intrinsic::hpvm_tensor_pool_mean:
@@ -587,10 +622,12 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new ConvolutionLayer_4());
+        errs() << "TO CONVOLUTION LAYER 4\n";
         }
         break;
       default: // No activation, No pooling, but HPVM intrinsic
         Mch->setCurrent(new NoPattern());
+        errs() << "TO NO PATTERN\n";
         break;
     }
   } else { // End of instruction stream
@@ -607,6 +644,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
                      Type::getInt32Ty(Mch->getModule()->getContext()), -1));
 
     Mch->setCurrent(new ConvolutionLayer());
+    errs() << "TO CONVOLUTION LAYER\n";
   }
   delete this;
 }
@@ -614,6 +652,7 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
 void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
                                     IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
+    errs() << "CONVOLUTION LAYER 3\n";
     switch (II->getIntrinsicID()) {
       case Intrinsic::hpvm_tensor_pool_max:
         {
@@ -643,6 +682,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
         }
 
         Mch->setCurrent(new ConvolutionLayer_4());
+        errs() << "TO CONVOLUTION LAYER 4\n";
         }
         break;
       case Intrinsic::hpvm_tensor_pool_min:
@@ -674,6 +714,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
         }
 
         Mch->setCurrent(new ConvolutionLayer_4());
+        errs() << "TO CONVOLUTION LAYER 4\n";
         }
         break;
       case Intrinsic::hpvm_tensor_pool_mean:
@@ -704,10 +745,12 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
         }
 
         Mch->setCurrent(new ConvolutionLayer_4());
+        errs() << "TO CONVOLUTION LAYER 4\n";
         }
         break;
       default: // No pooling, but HPVM intrinsic
         Mch->setCurrent(new NoPattern());
+        errs() << "TO NO PATTERN\n";
         break;
     }
   } else { // End of instruction stream
@@ -736,6 +779,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
       }
 
      Mch->setCurrent(new ConvolutionLayer());
+     errs() << "TO CONVOLUTION LAYER\n";
   }
   delete this;
 }
@@ -743,9 +787,12 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
 void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch,
                                     IntrinsicInst *II) {
   if (!II) { // End of instruction stream
+    errs() << "CONVOLUTION LAYER 4\n";
     Mch->setCurrent(new ConvolutionLayer());
+    errs() << "TO CONVOLUTION LAYER\n";
   } else {
     Mch->setCurrent(new NoPattern());
+    errs() << "TO NO PATTERN\n";
   }
   delete this;
 }
@@ -753,9 +800,12 @@ void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch,
 void ConvolutionLayer::transition(CodeGenStateMachine *Mch,
                                   IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
+    errs() << "CONVOLUTION LAYER\n";
     Mch->setCurrent(new NoPattern());
+    errs() << "TO NO PATTERN\n";
     delete this;
   }
+  errs() << "NO CHANGE\n";
 }
 
 void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {}
@@ -772,6 +822,7 @@ void CodeGenStateMachine::transition(IntrinsicInst *II) {
 void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRef,
                                   InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) {
 
+  errs() << "TRANSITIONTED TO: " << std::to_string(current->getStateID()) << "\n";
   assert( ( (current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) ||
             (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)     ||
             (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION) ) &&
@@ -1174,7 +1225,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe
 
       
       default:
-        llvm_unreachable("Unknown VISC Intrinsic!");
+        llvm_unreachable("Unknown HPVM Intrinsic!");
         break;
     }
 
@@ -1280,7 +1331,7 @@ void CGT_WrapperAPI::initRuntimeAPI() {
 
   // FIXME: set correct path
   Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
+  Twine runtimeAPI = llvmSrcRoot+"/tools/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
   runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
   if(runtimeModule == nullptr)
     DEBUG(errs() << Err.getMessage());
@@ -1363,6 +1414,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) {
 
   // Increment the node ID, for current node.
   ++nodeID;
+  errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n";
 
   // Get the function associated with the dataflow node
   Function *F = N->getFuncPointer();
@@ -1370,7 +1422,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) {
   // Look up if we have visited this function before. If we have, then just
   // get the cloned function pointer from DFNode. Otherwise, create the cloned
   // function and add it to the DFNode GenFunc.
-  Function *F_wrapper_api = N->getGenFuncForTarget(hpvm::CPU_TARGET);
+  Function *F_wrapper_api = N->getGenFuncForTarget(hpvm::PROMISE_TARGET);
 
   assert((F_wrapper_api == NULL) &&
          "Error: Visiting a node for which code already generated");
@@ -1385,7 +1437,7 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) {
   F_wrapper_api->removeFromParent();
   M.getFunctionList().push_back(F_wrapper_api);
 
-  N->addGenFunc(F_wrapper_api, hpvm::CPU_TARGET, true);
+  N->addGenFunc(F_wrapper_api, hpvm::PROMISE_TARGET, true);
 
   /* Removing HPVM in/out/inout function attributes */
   for(Function::arg_iterator ai = F_wrapper_api->arg_begin(), ae = F_wrapper_api->arg_end();
@@ -1429,10 +1481,11 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) {
   for (inst_iterator i = inst_begin(F_wrapper_api), e = inst_end(F_wrapper_api);
        i != e; ++i) {
     Instruction *I = &(*i);
+    errs() << "PRINT INST: " << *I << "\n";
     CGM.transition(dyn_cast<IntrinsicInst>(I));
   }
-
-  errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n";
+  errs() << "CLONED FUNCTION: " << *F_wrapper_api << "\n";
+ // errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n";
   //CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP);
   CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP);
 
diff --git a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
index 2ddc77ff4bcbfa1bd03ab5fc8fba0d35202980f2..e0032eaaf4cb073a99912a5eef08f1433f942234 100644
--- a/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
+++ b/hpvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
@@ -823,18 +823,19 @@ void FindFusionTargetsTraversal::codeGen(DFInternalNode *N) {
 void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
   DEBUG(errs() << "Inside leaf node: "
                << N->getFuncPointer()->getName() << "\n");
-
+   errs() << "FUSE TARGETS AT LEAF NODE\n";
   // Skip fusion check if it is a dummy node
   if(N->isDummyNode()) {
     DEBUG(errs() << "Skipping dummy node\n");
     return;
   }
-
-
-  if(!preferredTargetIncludes(N, hpvm::CPU_TARGET)) {
+  errs() << "THIS IS NOT A DUMMY NODE\n";
+  errs() << "INTRINSIC: " << *isValidHPVMTensorNode(N) << "\n";
+  if(!preferredTargetIncludes(N, hpvm::PROMISE_TARGET)) {
     // Only fuse if we plan to target PROMISE/Layers API
     // The CUDNN backend would be able to generate calls for the fused node,
     // but not the other way around
+    errs() << "NO PROMISE HINT. SKIPPING NODE.\n";
     DEBUG(errs() << "No PROMISE hint. Skipping node: "
                  << N->getFuncPointer()->getName() << "\n");
     return;
@@ -857,23 +858,29 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
     */
     
     case Intrinsic::hpvm_tensor_convolution:
-      { // Found beginning of pattern conv-bias-activation-pooling.
+      { errs() << "INSTRUCTION: " << *II << "\n";
+
+        // Found beginning of pattern conv-bias-activation-pooling.
         // Look for the rest
         CurrentNodeSequence.push_back(N->getInstruction());
 
         // Look for bias
         DFNode *SN = findNextNodeInSequence(N);
         if (!SN) {
+          errs() << "DID NOT FIND ADD IN NODE SEQUENCE\n";
           return; // Did not find a node sequence starting at N. Simpy return.
         }
         if (getPreferredTarget(SN) != StartNodePreferredTarget) {
+          errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n";
           return; // Node in sequence has different hint. Simpy return.
         }
         IntrinsicInst *SII = isValidHPVMTensorNode(SN);
         if (SII->getIntrinsicID() != Intrinsic::hpvm_tensor_add) {
+           errs() << "SUCCESSOR IS NOT A BIAS OPERATION\n";
           // Successor is not the bias operation, thus does not fit the pattern.
           return;
         }
+        errs() << "SUCCESSOR IS A BIAS OPERATION\n";
         // Otherwise, push this node to the current sequence
         CurrentNodeSequence.push_back(SN->getInstruction());
 
@@ -882,12 +889,15 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
         // Continue with next node, looking for activation (relu, clipped relu, tanh)
         SN = findNextNodeInSequence(SN);
         if (!SN) {
+           errs() << "DID NOT FIND POOLING AND ACTIVATION NODE SEQUENCE\n";
           // Did not find a node sequence starting at N.Use current sequence.
           break;
         }
         if (getPreferredTarget(SN) != StartNodePreferredTarget) {
+          errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n";
           break; // Node in sequence has different hint. Use current sequence.
         }
+        errs() << "SUCCESSOR IS A ACTIVATION OR POOLING  OPERATION\n";
         SII = isValidHPVMTensorNode(SN);
 
         if ((SII->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu) ||
@@ -895,13 +905,15 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
             (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh)) {
           // Successor is activation. Push this node to the current sequence.
           CurrentNodeSequence.push_back(SN->getInstruction());
-
+	  errs() << "SUCCESSOR IS AN ACTIVATION OPERATION\n";
           // Will continue, looking for pooling in the next node
           SN = findNextNodeInSequence(SN);
           if (!SN) {
+            errs() << "DID NOT FIND POOLING NODE SEQUENCE\n";
             break; // No node in sequence. Use currently found sequence.
           }
           if (getPreferredTarget(SN) != StartNodePreferredTarget) {
+            errs() << "NODE IN SEQUENCE HAS DIFFERENT HINT\n";
             break; // Node in sequence has different hint. Use current sequence.
           }
           SII = isValidHPVMTensorNode(SN);
@@ -910,6 +922,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
         if ((SII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) ||
             (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_min) ||
             (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean)) {
+            errs() << "SUCCESSOR IS A POOLING OPERATION\n";
           // Successor is a pool operation. Use currently found sequence.
           CurrentNodeSequence.push_back(SN->getInstruction());      
         }
@@ -921,16 +934,20 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
         // Look for bias
         DFNode *SN = findNextNodeInSequence(N);
         if (!SN) {
+          errs() << "DID NOT FIND ADD IN NODE SEQUENCE\n";
           return; // Did not find a node sequence starting at N. Simpy return.
         }
         if (getPreferredTarget(SN) != StartNodePreferredTarget) {
+          errs() << "HINT DO NOT MATCH IN NODE SEQUENCE\n";
           return; // Node in sequence has different hint. Simpy return.
         }
         IntrinsicInst *SII = isValidHPVMTensorNode(SN);
         if (SII->getIntrinsicID() != Intrinsic::hpvm_tensor_add) {
+           errs() << "SUCCESSOR IS NOT IS BIAS OPERATION\n";
           // Successor is not the bias operation, thus does not fit the pattern.
           return;
         }
+        errs() << "SUCCESSOR IS BIAS OPERATION\n";
         // Otherwise, push this node to the current sequence
         CurrentNodeSequence.push_back(SN->getInstruction());
         // This is a possible fuse target, gemm-add.
@@ -946,6 +963,7 @@ void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
             if ((SII->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu) ||
                 (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) ||
                 (SII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh)) {
+               errs() << "SUCCESSOR IS ACTIVATION OPERATION\n";
               // We found activation in sequence. Push in vector as well.
               CurrentNodeSequence.push_back(SN->getInstruction());
             }
diff --git a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp
index 00063d6dc85c2de2ec609fce29024da037d84f12..cac468a704990746806684d5bad0b3054ccfcc9d 100644
--- a/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp
+++ b/hpvm/lib/Transforms/GenHPVM/GenHPVM.cpp
@@ -381,7 +381,7 @@ bool GenHPVM::runOnModule(Module &M) {
         assert(isa<ConstantInt>(CI->getArgOperand(0)) &&
                "Argument to hint must be constant integer!");
         ConstantInt *hint = cast<ConstantInt>(CI->getArgOperand(0));
-
+        errs() << "HINT INSTRUCTION: " << *I << "\n";
         hpvm::Target t = (hpvm::Target)hint->getZExtValue();
         addHint(CI->getParent()->getParent(), t);
         DEBUG(errs() << "Found hpvm hint call: " << *CI << "\n");