diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
index ac5fa450c0956fc056caf732418cda9e52e3655c..ecec258dfe6ef45377bdf2890a5bcd1a31fedf6e 100644
--- a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
+++ b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
@@ -73,6 +73,7 @@ private:
 
   std::vector<Value*> Args;
   std::vector<IntrinsicInst*> IIs;
+  std::vector<IntrinsicInst*> IIs_remove; // Intrinsics to remove
   AbstractState *current;
 
 public:
@@ -88,6 +89,10 @@ public:
     return M;
   }
 
+  Module *getRtModule() {
+    return RtM;
+  }
+
   void addArgument(Value *Arg) {
     Args.push_back(Arg);
   }
@@ -96,6 +101,10 @@ public:
     IIs.push_back(II);
   }
 
+  void addIntrinsicToRemove(IntrinsicInst *II) {
+    IIs_remove.push_back(II);
+  }
+
   IntrinsicInst *getIntrinsicInstAt(unsigned idx) {
     return IIs[idx];
   }
@@ -267,6 +276,7 @@ public:
   void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
 };
 
+  
 void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
     switch (II->getIntrinsicID()) {
@@ -288,6 +298,31 @@ void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
         Mch->setCurrent(new FullyConnectedLayer_1());
         }
         break;
+
+      case Intrinsic::visc_node_id:
+        {
+
+	 DEBUG(errs() << "\t: Handling __visc_node_id \n");
+         // Get uint32 node ID
+         Value *Op = II->getOperand(0);
+
+	 std::vector<Value*> Args;
+         Args.push_back(Op); 
+
+	 Module *M = Mch->getModule();
+	 Module *RtM = Mch->getRtModule();
+	 
+         Constant* visc_node_id_call =
+          M->getOrInsertFunction(StringRef("tensor_set_node_id"),
+                  RtM->getFunction(StringRef("tensor_set_node_id"))->getFunctionType());
+
+	 CallInst::Create(visc_node_id_call, Args, "", II);
+
+	 Mch->addIntrinsicToRemove(II);
+	 Mch->setCurrent(new InitialState());
+        }
+        break;
+	
       default: // Other HPVM intrinsic
         {
         Mch->addIntrinsicInst(II);
@@ -438,14 +473,15 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
     Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv
     Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv
     Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+
+    //    Mch->addArgument(ConstantInt::get(
+    //                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+    //    Mch->addArgument(ConstantInt::get(
+    //                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+    //    Mch->addArgument(ConstantInt::get(
+    //                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+    //    Mch->addArgument(ConstantInt::get(
+    //                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
 
     // No pooling
     // 0 for unused pool arguments:
@@ -470,9 +506,9 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
     switch (II->getIntrinsicID()) {
       case Intrinsic::visc_tensor_tanh:
         {
-        // Type of activation : TanH
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+	  // Type of activation : TanH
+	  //        Mch->addArgument(ConstantInt::get(
+	  //                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new ConvolutionLayer_3());
@@ -480,9 +516,9 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
         break;
       case Intrinsic::visc_tensor_relu:
         {
-        // Type of activation : ReLU
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
+	  // Type of activation : ReLU
+	  //        Mch->addArgument(ConstantInt::get(
+	  //                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new ConvolutionLayer_3());
@@ -490,9 +526,9 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
         break;
       case Intrinsic::visc_tensor_clipped_relu:
         {
-        // Type of activation : Clipped ReLU
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
+	  // Type of activation : Clipped ReLU
+	  //        Mch->addArgument(ConstantInt::get(
+	  //                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
         Mch->addIntrinsicInst(II);
 
         Mch->setCurrent(new ConvolutionLayer_3());
@@ -613,6 +649,7 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
         // pool min FIXME: 2: supported?
         Mch->addArgument(ConstantInt::get(
                          Type::getInt32Ty(Mch->getModule()->getContext()), 2));
+	
         // pool_size_v, pool_size_h, pool pad_v,
         // pool_pad_h, pool_stride_v, pool_stride_h
         for (int i = 1; i < 7; i++) {
@@ -737,10 +774,11 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe
   assert( ( (current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) ||
             (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)     ||
             (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION) ) &&
-          "Unsupported instruction sequence for the Wrapper API.\n" );
+            "Unsupported instruction sequence for the Wrapper API.\n" );
 
   if ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) ||
       (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)) {
+
     // Layer Operation.
     DEBUG(errs() << "Layer Instruction Sequence. Validating ...\n");
     // We have a valid instruction sequence.
@@ -765,6 +803,7 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe
           Constant* wrapper_ConvLayer2 =
             M->getOrInsertFunction(StringRef("wrapper_ConvLayer2"),
                    RtM->getFunction(StringRef("wrapper_ConvLayer2"))->getFunctionType());
+	  
           DEBUG(errs() << *wrapper_ConvLayer2);
   
           // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0
@@ -839,12 +878,14 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe
     CI->insertBefore(IIlast);
     IIlast->replaceAllUsesWith(CI);
 
-  } else { // SINGLE_TENSOR_OPERATION
+  }
+  else { // SINGLE_TENSOR_OPERATION
     assert((IIs.size() == 1) &&
             "Unexpected size of intrinsics vector in code gen state machine.\n");
     assert(Args.empty() && "Unexpected arguments found in coge gen state machine.\n");
     IntrinsicInst *TensorII = IIs[0];
-errs() << "TensorII: " << *TensorII << "\n";
+
+    errs() << "TensorII: " << *TensorII << "\n";
 
     switch (TensorII->getIntrinsicID()) {
       case Intrinsic::visc_tensor_group_convolution:
@@ -893,8 +934,9 @@ errs() << "TensorII: " << *TensorII << "\n";
 
       case Intrinsic::visc_tensor_batchnorm:
       { /* llvm.hpvm.tensor.batchnorm */
+
         // Tensor batchnorm is not in place.
-    // FIXME: Add Check for InPlace Analysis 
+	// FIXME: Add Check for InPlace Analysis 
         DEBUG(errs() << F->getName() << "\t: Handling tensor batch normalization \n");
 
         // Argument list for the runtime call
@@ -933,18 +975,18 @@ errs() << "TensorII: " << *TensorII << "\n";
 
       case Intrinsic::visc_tensor_add:
       { /* llvm.hpvm.tensor.add */
-        DEBUG(errs() << F->getName() << "\t: Handling tensor add\n");
-        // Tensor add(a,b) is in place for argument a.
-//        Value *Op = TensorII->getOperand(0);
+        DEBUG(errs() << F->getName() << "\t: Handling tensorAdd\n");
 
+	// Tensor add(a,b) is in place for argument a.
+	//        Value *Op = TensorII->getOperand(0);
         // Test the intrinsic operand for in place operation.
-//        bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
+	//        bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
 
         // Code generation will not continue if this is false, because the target
         // may provide an in place operation(safe choice)
         // FIXME: remove this comment - must check for in-place
-//        assert(inplace &&
-//               "Operand not valid for in place operation. Code gen aborted.\n");
+	//        assert(inplace &&
+	//               "Operand not valid for in place operation. Code gen aborted.\n");
 
 
         // Argument list for the runtime call
@@ -1047,13 +1089,11 @@ errs() << "TensorII: " << *TensorII << "\n";
         Value *Op = TensorII->getOperand(0);
 
         // Test the intrinsic operand for in place operation.
-        bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
+        //-- bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
         // Code generation will not continue if this is false, because the target
         // may provide an in place operation(safe choice)
-        assert(inplace &&
-               "Operand not valid for in place operation. Code gen aborted.\n");
-
-        // Argument list for the runtime call
+        //-- assert(inplace &&
+        //--        "Operand not valid for in place operation. Code gen aborted.\n");
 
         // Create string for node name, as first argument for wrapper API call
         Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
@@ -1110,15 +1150,6 @@ errs() << "TensorII: " << *TensorII << "\n";
         // Tensor softmax(a) is in place for argument a.
         Value *Op = TensorII->getOperand(0);
 
-        // Test the intrinsic operand for in place operation.
-        bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
-        // Code generation will not continue if this is false, because the target
-        // may provide an in place operation(safe choice)
-        assert(inplace &&
-               "Operand not valid for in place operation. Code gen aborted.\n");
-
-        // Argument list for the runtime call
-
         // Create string for node name, as first argument for wrapper API call
         Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
                                                             strRef, true);
@@ -1146,62 +1177,8 @@ errs() << "TensorII: " << *TensorII << "\n";
         TensorII->replaceAllUsesWith(TensorII->getOperand(0));
       }
       break;
-/*
-      case Intrinsic::visc_image_fft_transform:
-      { // llvm.hpvm.image.fft.transform - Or another image intrinsic
-        // All will be treated as not in place
-        DEBUG(errs() << F->getName() << "\t: Handling fft transform \n");
-
-        // Create argument list for the runtime call - stored in Args
 
-        // All interfaces will have a string as first argument, which will be
-        // used to identify the dataflow node at runtime
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        // Here, use you will access the appropriate arruments of the intrinsic
-        // and push_back, in order to create the argument list of runtime call
-        Args.push_back(TensorII->getOperand(0));
-        Args.push_back(TensorII->getOperand(1));
-        Args.push_back(TensorII->getOperand(2));
-        Args.push_back(TensorII->getOperand(3));
-        Args.push_back(TensorII->getOperand(4));
-        Args.push_back(TensorII->getOperand(5));
-
-        Constant *conv_mode = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1);
-        Args.push_back(conv_mode);
-
-        Args.push_back(TensorII->getOperand(7));
-
-        // Done with argument list.
-
-        // Create wrapper API runtime function call
-        // Appropriately set the name of the function of the runtime that you
-        // want to call
-        // Note: the Constant * is what we need to pass to the callInst.
-        // This name does not have to match, but does so for similarity.
-        Constant* wrapper_tensorGroupConvolution;
-          M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"),
-            RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType());
-        CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution,
-                                        Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.xxx with the runtime call
-        TensorII->replaceAllUsesWith(CI);
-      }
-      break;
-
-*/
+      
       default:
         llvm_unreachable("Unknown VISC Intrinsic!");
         break;
@@ -1219,6 +1196,13 @@ errs() << "TensorII: " << *TensorII << "\n";
     (*ri)->eraseFromParent();
   }
 
+
+  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs_remove.rbegin(),
+       re = IIs_remove.rend(); ri != re; ++ri) {
+    DEBUG(errs() << "Erasing: " << **ri << "\n");
+    (*ri)->eraseFromParent();
+  }
+
 }
 
 // DFG2LLVM_WrapperAPI - The first implementation.
@@ -1226,6 +1210,8 @@ errs() << "TensorII: " << *TensorII << "\n";
 struct DFG2LLVM_WrapperAPI : public DFG2LLVM {
   static char ID; // Pass identification, replacement for typeid
   DFG2LLVM_WrapperAPI() : DFG2LLVM(ID) {}
+
+  
 private:
 
 public:
@@ -1380,20 +1366,13 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) {
     return;
   }
 
-//  For wrapper API, we generate code for every leaf node.
-//  No need to check for hints from frontend
-//  // Generate code only if it has the right hint
-//  if (!checkPreferredTarget(N, visc::PROMISE_TARGET)) {
-//    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
-//    return;
-//  }
 
   // Increment the node ID, for current node.
   ++nodeID;
 
   // Get the function associated with the dataflow node
   Function *F = N->getFuncPointer();
-errs() << "Node Function: " << *F << "\n";
+  errs() << "Node Function: " << *F << "\n";
   // Look up if we have visited this function before. If we have, then just
   // get the cloned function pointer from DFNode. Otherwise, create the cloned
   // function and add it to the DFNode GenFunc.
@@ -1405,6 +1384,8 @@ errs() << "Node Function: " << *F << "\n";
   // Clone the function
   ValueToValueMapTy VMap;
   std::string FName(F->getName().data());//Twine FName = F->getName();
+
+  
   F_wrapper_api = CloneFunction(F, VMap);
   F_wrapper_api->setName(FName+"_wrapper_api");
   F_wrapper_api->removeFromParent();
@@ -1461,15 +1442,12 @@ errs() << "Node Function: " << *F << "\n";
   //CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP);
   CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP);
 
-//errs() << "-----------------------------------\n";
-//errs() << *F_wrapper_api << "\n";
-
   return;
 }
 
 bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) {
-  errs() << "\nDFG2LLVM_WrapperAPI PASS\n";
 
+  errs() << "\nDFG2LLVM_WrapperAPI PASS\n";
   // Get the BuildDFG Analysis Results:
   // - Dataflow graph
   BuildDFG &DFG = getAnalysis<BuildDFG>();
@@ -1477,9 +1455,8 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) {
   // Get the In Place Analysis Results
   InPlaceDFGAnalysis::InPlaceDFGParameter IPP =
     (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
-  // Print results
-//  printInPlaceDFGParameter(IPP);
 
+  
   std::vector<DFInternalNode*> Roots = DFG.getRoots();
  
   // Visitor for Code Generation Graph Traversal
@@ -1496,6 +1473,7 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) {
   //TODO: Edit module epilogue to remove the VISC intrinsic declarations
   delete CGTVisitor;
 
+  
   return true;
 }
 
diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
index eb72b3dd7425dd59ad2212741b78d5954d35e64c..541efe4e1dae7ec0b62fd041396cb34cd6f9e519 100644
--- a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
+++ b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
@@ -92,8 +92,7 @@ static IntrinsicInst *isValidHPVMTensorNode(DFNode *N) {
     if(dyn_cast<IntrinsicInst>(&*I)){
       II = dyn_cast<IntrinsicInst>(&*I);
       if ((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")){
-
-	errs()<<"**** WATCH *** " << *II << "\n\n\n";
+	errs()<<"** Tensor Intrinsic = " << *II << "\n";
       }
       
     }