From 41411aad55677796c566b12990be7608bec6d2d0 Mon Sep 17 00:00:00 2001
From: Yifan Zhao <yifanz16@illinois.edu>
Date: Wed, 13 Jan 2021 05:55:07 -0600
Subject: [PATCH] Use config file to get path to tensor_runtime.ll (started in
 750ab0620)

---
 .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp         |  344 ++--
 .../DFG2LLVM_WrapperAPI.cpp                   | 1671 ++++++++---------
 .../ReplaceIntrinsics/ReplaceIntrinsics.cpp   |  244 ++-
 3 files changed, 1075 insertions(+), 1184 deletions(-)

diff --git a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
index 8b2570fdad..bd26a92fd3 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
@@ -27,6 +27,7 @@
 
 #include "SupportHPVM/DFG2LLVM.h"
 #include "InPlaceDFG/InPlaceDFGAnalysis.h"
+#include "Config.h"
 
 #include <sstream>
 
@@ -44,10 +45,9 @@ namespace {
 struct DFG2LLVM_CUDNN : public DFG2LLVM {
   static char ID; // Pass identification, replacement for typeid
   DFG2LLVM_CUDNN() : DFG2LLVM(ID) {}
-private:
 
+private:
 public:
-
   void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequired<BuildDFG>();
     AU.addRequired<InPlaceDFGAnalysisWrapper>();
@@ -62,7 +62,7 @@ public:
 class CGT_CUDNN : public CodeGenTraversal {
 
 private:
-  //Member variables
+  // Member variables
   InPlaceDFGAnalysis::InPlaceDFGParameter *IPP;
 
   // VISC Runtime API and Tensor runtime API
@@ -73,32 +73,28 @@ private:
   // Functions
   bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N);
 
-
-
   // Virtual Functions
   void init();
   void initRuntimeAPI();
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
+  void codeGen(DFInternalNode *N);
+  void codeGen(DFLeafNode *N);
 
 public:
-
   // Constructor
-  CGT_CUDNN(Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP)
-  : CodeGenTraversal(_M, _DFG), IPP(&_IPP) {
+  CGT_CUDNN(Module &_M, BuildDFG &_DFG,
+            InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP)
+      : CodeGenTraversal(_M, _DFG), IPP(&_IPP) {
     initRuntimeAPI();
   }
-
 };
 
-bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op,
-                                                  Function *Fgen,
+bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op, Function *Fgen,
                                                   DFNode *N) {
 
   if (Argument *Arg = dyn_cast<Argument>(Op)) {
     DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n");
     assert((Arg->getParent() == Fgen) &&
-          "Extra Parameter in body of Function\n");
+           "Extra Parameter in body of Function\n");
     // Candidae parameter is a function argument
     // In this case, consult the result of in place analysis
     // Find position in arg list
@@ -112,11 +108,10 @@ bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op,
       DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n");
       return false;
     }
-  }
-  else {
+  } else {
     // If it is not an argument, then it needs to be the result of
     // another intrinsic. These are new objects that are allocated,
-    // and consumed by next intrinsic. 
+    // and consumed by next intrinsic.
     DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n");
     if (dyn_cast<IntrinsicInst>(Op)) {
       DEBUG(errs() << *Arg << "\t: local, suitable for in place\n");
@@ -128,24 +123,15 @@ bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op,
   }
 }
 
-
-void CGT_CUDNN::init() {
-}
+void CGT_CUDNN::init() {}
 
 // Initialize the VISC runtime API. This makes it easier to insert these calls
 void CGT_CUDNN::initRuntimeAPI() {
 
   // Load Runtime API Module
   SMDiagnostic Err;
-
-  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
-  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n");
-
-  // FIXME: set correct path
-  Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/tools/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
-  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
-  if(runtimeModule == nullptr)
+  runtimeModule = parseIRFile(TENSOR_RT_LL, Err, M.getContext());
+  if (runtimeModule == nullptr)
     DEBUG(errs() << Err.getMessage());
   else
     DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
@@ -161,59 +147,60 @@ void CGT_CUDNN::initRuntimeAPI() {
   // Find hpvm.init and visc.cleanup calls, and add placeholder methods
   // for initialization and cleanup of the hpvm tensor runtime
 
-  Function* VI = M.getFunction("llvm.hpvm.init");
+  Function *VI = M.getFunction("llvm.hpvm.init");
   assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n");
   InitCall = cast<Instruction>(*VI->user_begin());
-  CallInst::Create(llvm_hpvm_initTensorRt,
-                   ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
-                   "", InitCall);
+  CallInst::Create(
+      llvm_hpvm_initTensorRt,
+      ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
+      "", InitCall);
 
-  Function* VC = M.getFunction("llvm.hpvm.cleanup");
+  Function *VC = M.getFunction("llvm.hpvm.cleanup");
   assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n");
   CleanupCall = cast<Instruction>(*VC->user_begin());
-  CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall);
-
+  CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value *>(), "",
+                   CleanupCall);
 }
 
-void CGT_CUDNN::codeGen(DFInternalNode* N) {
-  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs () << "Skipping internal node\n";
+void CGT_CUDNN::codeGen(DFInternalNode *N) {
+  errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n";
+  errs() << "Skipping internal node\n";
 }
 
-  
-void CGT_CUDNN::codeGen(DFLeafNode* N) {
+void CGT_CUDNN::codeGen(DFLeafNode *N) {
 
   // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
+  if (N->isDummyNode()) {
     DEBUG(errs() << "Skipping dummy node\n");
     return;
   }
 
   // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
+  if (N->isAllocationNode()) {
     assert(false && "Allocation Node not expected in ApproxHPVM");
     return;
   }
 
   // Generate code only if it has the right hint
   if (!checkPreferredTarget(N, hpvm::CUDNN_TARGET)) {
-    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
+    errs() << "Skipping node: " << N->getFuncPointer()->getName() << "\n";
     return;
   }
 
   // Get the function associated with the dataflow node
   Function *F = N->getFuncPointer();
-  errs()<<"function name = "<< F->getName()<<"\n";
+  errs() << "function name = " << F->getName() << "\n";
 
   /* Removing HPVM in/out/inout function attributes */
-  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ai++){
+  for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae;
+       ai++) {
     Argument *Arg = &*ai;
-    if(Arg->hasAttribute(Attribute::In))
+    if (Arg->hasAttribute(Attribute::In))
       Arg->removeAttr(Attribute::In);
-    if(Arg->hasAttribute(Attribute::Out))
+    if (Arg->hasAttribute(Attribute::Out))
       Arg->removeAttr(Attribute::Out);
-    if(Arg->hasAttribute(Attribute::InOut))
-      Arg->removeAttr(Attribute::InOut);    
+    if (Arg->hasAttribute(Attribute::InOut))
+      Arg->removeAttr(Attribute::InOut);
   }
 
   // Look up if we have visited this function before. If we have, then just
@@ -223,14 +210,14 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
 
   assert((F_cudnn == NULL) &&
          "Error: Visiting a node for which code already generated");
-  
+
   // Clone the function
   ValueToValueMapTy VMap;
   std::string FName(F->getName().data());
   F_cudnn = CloneFunction(F, VMap);
   F_cudnn->setName(FName + "_cudnn");
-  errs()<<"Cloned function name2 = "<<F_cudnn->getName()<<"\n";
-  F_cudnn->removeFromParent();  
+  errs() << "Cloned function name2 = " << F_cudnn->getName() << "\n";
+  F_cudnn->removeFromParent();
   M.getFunctionList().push_back(F_cudnn);
 
   N->addGenFunc(F_cudnn, hpvm::CUDNN_TARGET, true);
@@ -239,165 +226,161 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
   DEBUG(errs() << "Adding nounwind to generated function\n");
   F_cudnn->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
 
-  // Add llvm_hpvm_requestTensor calls for every pointer argument of the function
-  // (they are all expected to be tensors), at the beginning of the function.
-  // This is the first instruction of the function, insert them before this
-  Instruction* FI = &*(F_cudnn->getEntryBlock().begin());
+  // Add llvm_hpvm_requestTensor calls for every pointer argument of the
+  // function (they are all expected to be tensors), at the beginning of the
+  // function. This is the first instruction of the function, insert them before
+  // this
+  Instruction *FI = &*(F_cudnn->getEntryBlock().begin());
 
   // In this backend, the target device is GPU, represented by i32 1.
   ConstantInt *TargetDeviceID =
-    ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+      ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
 
   for (Function::arg_iterator ai = F_cudnn->arg_begin(),
-       ae = F_cudnn->arg_end(); ai != ae; ++ai) {
-    Argument* Arg = &*ai;
+                              ae = F_cudnn->arg_end();
+       ai != ae; ++ai) {
+    Argument *Arg = &*ai;
     if (Arg->getType()->isPointerTy()) {
       Value *Args[] = {Arg, TargetDeviceID};
-      CallInst::Create(hpvm_request_tensor,
-                       ArrayRef<Value*>(Args, 2),
-                       "", FI);
+      CallInst::Create(hpvm_request_tensor, ArrayRef<Value *>(Args, 2), "", FI);
     }
   }
 
   std::vector<IntrinsicInst *> IItoRemove;
 
-  for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e; ++i) {
+  for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e;
+       ++i) {
     Instruction *I = &(*i);
 
     if (BuildDFG::isHPVMIntrinsic(I)) {
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-      //assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")
+      IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+      // assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")
       //  && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
 
-      //if (!(II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")){
-      //continue; // skip non-tensor ops 
+      // if
+      // (!(II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")){
+      // continue; // skip non-tensor ops
       //}
-      
+
       /********************* Handle VISC Tensor intrinsics ********************/
       switch (II->getIntrinsicID()) {
 
-      case Intrinsic::hpvm_tensor_convolution:
-      { /* llvm.hpvm.tensor.mul */
+      case Intrinsic::hpvm_tensor_convolution: { /* llvm.hpvm.tensor.mul */
         // Tensor mul is not in place.
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling tensor convolution \n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
         Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
+        Args.push_back(II->getOperand(2));
         Args.push_back(II->getOperand(3));
         Args.push_back(II->getOperand(4));
         Args.push_back(II->getOperand(5));
 
-	Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
-	Constant* conv_precision = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
+        Constant *conv_mode =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+        Constant *conv_precision =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
 
         Args.push_back(conv_mode);
         Args.push_back(conv_precision);
-	
+
         // Create cudnn runtime function call
         FunctionCallee tensorConvolution;
         DECLARE(tensorConvolution);
-	
-        CallInst* CI = CallInst::Create(tensorConvolution,
-                                        Args, "", II);
+
+        CallInst *CI = CallInst::Create(tensorConvolution, Args, "", II);
         // We can replace the call to hpvm.tensor.mul with the runtime call
         II->replaceAllUsesWith(CI);
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
-      case Intrinsic::hpvm_tensor_group_convolution:
-      { /* llvm.hpvm.tensor.mul */
+      case Intrinsic::hpvm_tensor_group_convolution: { /* llvm.hpvm.tensor.mul
+                                                        */
         // Tensor mul is not in place.
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling tensor convolution \n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
         Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
+        Args.push_back(II->getOperand(2));
         Args.push_back(II->getOperand(3));
         Args.push_back(II->getOperand(4));
         Args.push_back(II->getOperand(5));
 
-	Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+        Constant *conv_mode =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
 
         Args.push_back(conv_mode);
         Args.push_back(II->getOperand(7));
-	
+
         // Create cudnn runtime function call
         FunctionCallee tensorConvolution;
         DECLARE(tensorConvolution);
-	
-        CallInst* CI = CallInst::Create(tensorConvolution,
-                                        Args, "", II);
+
+        CallInst *CI = CallInst::Create(tensorConvolution, Args, "", II);
         // We can replace the call to hpvm.tensor.mul with the runtime call
         II->replaceAllUsesWith(CI);
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
-      case Intrinsic::hpvm_tensor_batchnorm:
-      { /* llvm.hpvm.tensor.batchnorm */
+      case Intrinsic::hpvm_tensor_batchnorm: { /* llvm.hpvm.tensor.batchnorm */
         // Tensor batchnorm is in place.
-	// FIXME: Add Check for InPlace Analysis 
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor batch normalization \n");
+        // FIXME: Add Check for InPlace Analysis
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling tensor batch normalization \n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
         Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
+        Args.push_back(II->getOperand(2));
         Args.push_back(II->getOperand(3));
         Args.push_back(II->getOperand(4));
         Args.push_back(II->getOperand(5));
-	
+
         // Create cudnn runtime function call
         FunctionCallee tensorBatchNorm;
         DECLARE(tensorBatchNorm);
-	
-        CallInst* CI = CallInst::Create(tensorBatchNorm,
-                                        Args, "", II);
-        // We can replace the call to hpvm.tensor.batchnorm with the TensorRT call
+
+        CallInst *CI = CallInst::Create(tensorBatchNorm, Args, "", II);
+        // We can replace the call to hpvm.tensor.batchnorm with the TensorRT
+        // call
         II->replaceAllUsesWith(CI);
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
-      
-      case Intrinsic::hpvm_tensor_mul:
-      { /* llvm.hpvm.tensor.mul */
+      case Intrinsic::hpvm_tensor_mul: { /* llvm.hpvm.tensor.mul */
         // Tensor mul is not in place.
         DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor mul\n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
         Args.push_back(II->getOperand(1));
 
         // Create cudnn runtime function call
         FunctionCallee tensorGemmGPU;
         DECLARE(tensorGemmGPU);
-	
-        CallInst* CI = CallInst::Create(tensorGemmGPU,
-                                        Args, "", II);
+
+        CallInst *CI = CallInst::Create(tensorGemmGPU, Args, "", II);
         // We can replace the call to hpvm.tensor.mul with the runtime call
         II->replaceAllUsesWith(CI);
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
-      case Intrinsic::hpvm_tensor_add:
-      { /* llvm.hpvm.tensor.add */
+      } break;
+      case Intrinsic::hpvm_tensor_add: { /* llvm.hpvm.tensor.add */
         DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor add\n");
         // Tensor add(a,b) is in place for argument a.
         Value *Op = II->getOperand(0);
@@ -407,12 +390,13 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
         // Code generation cannot continue if this is false, because the target
         // only provides an in place operation
 
-	// FIXME: remove this comment - must check for in-place
-        //assert(inplace &&
-        //       "Operand not valid for in place operation. Code gen aborted.\n");
+        // FIXME: remove this comment - must check for in-place
+        // assert(inplace &&
+        //       "Operand not valid for in place operation. Code gen
+        //       aborted.\n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
         Args.push_back(II->getOperand(1));
 
@@ -426,54 +410,55 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
       case Intrinsic::hpvm_tensor_pool_max:
-      case Intrinsic::hpvm_tensor_pool_mean:
-      { /* llvm.hpvm.tensor.relu */
+      case Intrinsic::hpvm_tensor_pool_mean: { /* llvm.hpvm.tensor.relu */
         DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor_pool_max\n");
 
         // Argument list - tensorPooling(input, poolFunction, window_height,
-	//                               window_width, vertical_pad, horizontal_pad,
-	//                               vertical_stride, horizontal_stride);
-        std::vector<Value*> Args;
+        //                               window_width, vertical_pad,
+        //                               horizontal_pad, vertical_stride,
+        //                               horizontal_stride);
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
 
-	int pool_type = 0;
-	if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max){
+        int pool_type = 0;
+        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) {
           pool_type = 0;
-	}
-        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean){
+        }
+        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) {
           pool_type = 1;
-	}	
-	
-	Constant* constPoolType = ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type);
-        Args.push_back(constPoolType); // ID for max pool. Min/Avg have different IDs (non-zero)	
-	Args.push_back(II->getOperand(1));
+        }
+
+        Constant *constPoolType =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type);
+        Args.push_back(constPoolType); // ID for max pool. Min/Avg have
+                                       // different IDs (non-zero)
+        Args.push_back(II->getOperand(1));
         Args.push_back(II->getOperand(2));
-	Args.push_back(II->getOperand(3));
+        Args.push_back(II->getOperand(3));
         Args.push_back(II->getOperand(4));
-	Args.push_back(II->getOperand(5));
-	Args.push_back(II->getOperand(6));
+        Args.push_back(II->getOperand(5));
+        Args.push_back(II->getOperand(6));
 
         // Create cudnn runtime function call
         FunctionCallee tensorPooling;
         DECLARE(tensorPooling);
-        CallInst* CI = CallInst::Create(tensorPooling, Args, "", II);
+        CallInst *CI = CallInst::Create(tensorPooling, Args, "", II);
 
-	// Replacing intrinsic result uses with the result of the tensor runtime operation
+        // Replacing intrinsic result uses with the result of the tensor runtime
+        // operation
         II->replaceAllUsesWith(CI);
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
-      
+      } break;
+
       case Intrinsic::hpvm_tensor_relu:
       case Intrinsic::hpvm_tensor_clipped_relu:
-      case Intrinsic::hpvm_tensor_tanh:
-      { /* llvm.hpvm.tensor.relu */
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor activation functions \n");
+      case Intrinsic::hpvm_tensor_tanh: { /* llvm.hpvm.tensor.relu */
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling tensor activation functions \n");
         // Tensor relu(a) is in place for argument a.
         Value *Op = II->getOperand(0);
 
@@ -485,41 +470,38 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
                "Operand not valid for in place operation. Code gen aborted.\n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
 
-	if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu){
+        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) {
           // Create cudnn runtime function call
           FunctionCallee tensorRelu;
           DECLARE(tensorRelu);
           CallInst::Create(tensorRelu, Args, "", II);
-	}
-	else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu){
+        } else if (II->getIntrinsicID() ==
+                   Intrinsic::hpvm_tensor_clipped_relu) {
           // Create cudnn runtime function call
           //-- FunctionCallee tensorClippedRelu;
-	  FunctionCallee tensorRelu2;
+          FunctionCallee tensorRelu2;
           DECLARE(tensorRelu2);
           CallInst::Create(tensorRelu2, Args, "", II);
-	}
-	else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh){
+        } else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) {
           // Create cudnn runtime function call
           FunctionCallee tensorTanh;
-	  errs()<<"tensorTanh Call = \n\n";
+          errs() << "tensorTanh Call = \n\n";
           DECLARE(tensorTanh);
-	  //errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l";
+          // errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l";
           CallInst::Create(tensorTanh, Args, "", II);
-	}
-     
+        }
+
         // We can replace the call to hpvm.tensor.relu with the 1st argument
         // that, due to in place operation, now contains the result
         II->replaceAllUsesWith(II->getOperand(0));
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
-      case Intrinsic::hpvm_tensor_softmax:
-      { /* llvm.hpvm.tensor.softmax */
+      } break;
+      case Intrinsic::hpvm_tensor_softmax: { /* llvm.hpvm.tensor.softmax */
         DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor softmax\n");
         // Tensor relu(a) is in place for argument a.
         Value *Op = II->getOperand(0);
@@ -532,7 +514,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
                "Operand not valid for in place operation. Code gen aborted.\n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
 
         // Create cudnn runtime function call
@@ -545,17 +527,16 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
-      case Intrinsic::hpvm_node_id:
-      { /* llvm.hpvm.node.id */
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling Node ID Intrinsic \n");
+      case Intrinsic::hpvm_node_id: { /* llvm.hpvm.node.id */
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling Node ID Intrinsic \n");
         // Get uint32 argument
         Value *Op = II->getOperand(0);
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
 
         // Create hpvm-tensor-rt function call
@@ -565,10 +546,8 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
-      
       default:
         llvm_unreachable("Unknown VISC Intrinsic!");
         break;
@@ -582,7 +561,8 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
   // Traverse the vector backwards, otherwise definitions are deleted while
   // their subsequent uses are still around.
   for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(),
-       re = IItoRemove.rend(); ri != re; ++ri) {
+                                                      re = IItoRemove.rend();
+       ri != re; ++ri) {
     DEBUG(errs() << "Erasing: " << **ri << "\n");
     errs() << "Erasing: " << **ri << "\n";
     (*ri)->eraseFromParent();
@@ -600,33 +580,31 @@ bool DFG2LLVM_CUDNN::runOnModule(Module &M) {
 
   // Get the In Place Analysis Results
   InPlaceDFGAnalysis::InPlaceDFGParameter IPP =
-    (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
+      (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
   // Print results
   printInPlaceDFGParameter(IPP);
 
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
- 
+  std::vector<DFInternalNode *> Roots = DFG.getRoots();
+
   // Visitor for Code Generation Graph Traversal
   CGT_CUDNN *CGTVisitor = new CGT_CUDNN(M, DFG, IPP);
 
   // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
+  for (auto rootNode : Roots) {
     // Initiate code generation for root DFNode
     CGTVisitor->visit(rootNode);
   }
 
-  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
+  // TODO: Edit module epilogue to remove the VISC intrinsic declarations
   delete CGTVisitor;
 
   return true;
 }
 
-
 /******************************************************************************
  *                              Helper functions                              *
  ******************************************************************************/
 
-
 } // End of namespace
 
 char DFG2LLVM_CUDNN::ID = 0;
@@ -635,5 +613,3 @@ static RegisterPass<DFG2LLVM_CUDNN> X("dfg2llvm-cudnn",
                                       false /* does not modify the CFG */,
                                       true /* transformation,   *
                                             * not just analysis */);
-
-
diff --git a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
index 294f9ac574..d9dcc7c876 100644
--- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
@@ -39,20 +39,18 @@ using namespace inplacedfg;
 namespace {
 
 cl::opt<std::string> QuantizationInputsFilename(
-  "quantization-levels-filename",
-  cl::desc("<PROMISE quantization levels input file (path)>"),
-  cl::value_desc("filename"),
-  cl::Required);
+    "quantization-levels-filename",
+    cl::desc("<PROMISE quantization levels input file (path)>"),
+    cl::value_desc("filename"), cl::Required);
 
 cl::opt<std::string> ConfigurationInputsFilename(
-  "configuration-inputs-filename",
-  cl::desc("<Autotuner configurations input file (path)>"),
-  cl::value_desc("filename"),
-  cl::Required);
+    "configuration-inputs-filename",
+    cl::desc("<Autotuner configurations input file (path)>"),
+    cl::value_desc("filename"), cl::Required);
 
 // Helper function declarations
-bool isValidOperandForInPlaceOperation(Value *, Function *, DFNode *,
-                                       InPlaceDFGAnalysis::InPlaceDFGParameter &);
+bool isValidOperandForInPlaceOperation(
+    Value *, Function *, DFNode *, InPlaceDFGAnalysis::InPlaceDFGParameter &);
 
 // Helper class declarations
 
@@ -72,53 +70,37 @@ private:
   Module *M;
   Module *RtM;
 
-  std::vector<Value*> Args;
-  std::vector<IntrinsicInst*> IIs;
-  std::vector<IntrinsicInst*> IIs_remove; // Intrinsics to remove
+  std::vector<Value *> Args;
+  std::vector<IntrinsicInst *> IIs;
+  std::vector<IntrinsicInst *> IIs_remove; // Intrinsics to remove
   AbstractState *current;
 
 public:
   CodeGenStateMachine(Module *, Module *);
 
-  void setCurrent(AbstractState *s) {
-    current = s;
-  }
+  void setCurrent(AbstractState *s) { current = s; }
 
   void transition(IntrinsicInst *II);
 
-  Module *getModule() {
-    return M;
-  }
+  Module *getModule() { return M; }
 
-  Module *getRtModule() {
-    return RtM;
-  }
+  Module *getRtModule() { return RtM; }
 
-  void addArgument(Value *Arg) {
-    Args.push_back(Arg);
-  }
+  void addArgument(Value *Arg) { Args.push_back(Arg); }
 
-  void addIntrinsicInst(IntrinsicInst *II) {
-    IIs.push_back(II);
-  }
+  void addIntrinsicInst(IntrinsicInst *II) { IIs.push_back(II); }
 
-  void addIntrinsicToRemove(IntrinsicInst *II) {
-    IIs_remove.push_back(II);
-  }
+  void addIntrinsicToRemove(IntrinsicInst *II) { IIs_remove.push_back(II); }
 
-  IntrinsicInst *getIntrinsicInstAt(unsigned idx) {
-    return IIs[idx];
-  }
+  IntrinsicInst *getIntrinsicInstAt(unsigned idx) { return IIs[idx]; }
 
-  void codeGen(DFNode *, Function * , const StringRef &,
+  void codeGen(DFNode *, Function *, const StringRef &,
                InPlaceDFGAnalysis::InPlaceDFGParameter &);
-
 };
 
 class AbstractState {
 public:
-  enum ID
-  {
+  enum ID {
     INITIAL_STATE,
     FULLY_CONNECTED_LAYER_1,
     FULLY_CONNECTED_LAYER_2,
@@ -137,9 +119,7 @@ protected:
   enum ID StateID;
 
 public:
-  enum ID getStateID() {
-    return StateID;
-  }
+  enum ID getStateID() { return StateID; }
 
   virtual void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) = 0;
   virtual ~AbstractState() {}
@@ -277,68 +257,60 @@ public:
   void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
 };
 
-  
 void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
     errs() << "INITIAL STATE\n";
     switch (II->getIntrinsicID()) {
-      case Intrinsic::hpvm_tensor_convolution:
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->addArgument(II->getOperand(0)); // conv input
-        Mch->addArgument(II->getOperand(1)); // conv kernel
-
-        Mch->setCurrent(new ConvolutionLayer_1());
-        errs() << "TO CONVOLUTION LAYER 1\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_mul:
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->addArgument(II->getOperand(0)); // 1st gemm input
-        Mch->addArgument(II->getOperand(1)); // 2nd gemm input
-
-        Mch->setCurrent(new FullyConnectedLayer_1());
-        errs() << "TO FULLY CONNECTED LAYER 1\n";
-        }
-        break;
-
-      case Intrinsic::hpvm_node_id:
-        {
-
-	 DEBUG(errs() << "\t: Handling __hpvm_node_id \n");
-         // Get uint32 node ID
-         Value *Op = II->getOperand(0);
-
-	 std::vector<Value*> Args;
-         Args.push_back(Op); 
-
-	 Module *M = Mch->getModule();
-	 Module *RtM = Mch->getRtModule();
-	 
-         FunctionCallee hpvm_node_id_call =
-          M->getOrInsertFunction(StringRef("tensor_set_node_id"),
-                  RtM->getFunction(StringRef("tensor_set_node_id"))->getFunctionType());
-
-	 CallInst::Create(hpvm_node_id_call, Args, "", II);
-
-	 Mch->addIntrinsicToRemove(II);
-	 Mch->setCurrent(new InitialState());
-         errs() << "TO INIT STATE\n";
-        }
-        break;
-	
-      default: // Other HPVM intrinsic
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->setCurrent(new SingleTensorOperation());
-        errs() << "TO SINGLE OP\n";
-        }
-        break;
+    case Intrinsic::hpvm_tensor_convolution: {
+      Mch->addIntrinsicInst(II);
+      Mch->addArgument(II->getOperand(0)); // conv input
+      Mch->addArgument(II->getOperand(1)); // conv kernel
+
+      Mch->setCurrent(new ConvolutionLayer_1());
+      errs() << "TO CONVOLUTION LAYER 1\n";
+    } break;
+    case Intrinsic::hpvm_tensor_mul: {
+      Mch->addIntrinsicInst(II);
+      Mch->addArgument(II->getOperand(0)); // 1st gemm input
+      Mch->addArgument(II->getOperand(1)); // 2nd gemm input
+
+      Mch->setCurrent(new FullyConnectedLayer_1());
+      errs() << "TO FULLY CONNECTED LAYER 1\n";
+    } break;
+
+    case Intrinsic::hpvm_node_id: {
+
+      DEBUG(errs() << "\t: Handling __hpvm_node_id \n");
+      // Get uint32 node ID
+      Value *Op = II->getOperand(0);
+
+      std::vector<Value *> Args;
+      Args.push_back(Op);
+
+      Module *M = Mch->getModule();
+      Module *RtM = Mch->getRtModule();
+
+      FunctionCallee hpvm_node_id_call = M->getOrInsertFunction(
+          StringRef("tensor_set_node_id"),
+          RtM->getFunction(StringRef("tensor_set_node_id"))->getFunctionType());
+
+      CallInst::Create(hpvm_node_id_call, Args, "", II);
+
+      Mch->addIntrinsicToRemove(II);
+      Mch->setCurrent(new InitialState());
+      errs() << "TO INIT STATE\n";
+    } break;
+
+    default: // Other HPVM intrinsic
+    {
+      Mch->addIntrinsicInst(II);
+      Mch->setCurrent(new SingleTensorOperation());
+      errs() << "TO SINGLE OP\n";
+    } break;
     }
     delete this;
   } // else {} // No HPVM intrinsic received. Remain at initial
-  errs() << "TO NO CHANGE\n"; 
+  errs() << "TO NO CHANGE\n";
 }
 
 void SingleTensorOperation::transition(CodeGenStateMachine *Mch,
@@ -357,23 +329,21 @@ void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
   if (II) { // Not end of instruction stream
     errs() << "FULLY CONNECTED LAYER 1\n";
     switch (II->getIntrinsicID()) {
-      case Intrinsic::hpvm_tensor_add:
-        {
-        IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0);
-        assert((MulII == II->getOperand(0)) &&
-               "Output of mul must be used as 1st operand of add");
-        Mch->addIntrinsicInst(II);
-
-        Mch->addArgument(II->getOperand(1));     // bias
-
-        Mch->setCurrent(new FullyConnectedLayer_2());
-         errs() << "TO FULLY CONNECTED LAYER 2\n";
-        }
-        break;
-      default:
-        Mch->setCurrent(new NoPattern());
-        errs() << "TO NO PATERN\n";
-        break;
+    case Intrinsic::hpvm_tensor_add: {
+      IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0);
+      assert((MulII == II->getOperand(0)) &&
+             "Output of mul must be used as 1st operand of add");
+      Mch->addIntrinsicInst(II);
+
+      Mch->addArgument(II->getOperand(1)); // bias
+
+      Mch->setCurrent(new FullyConnectedLayer_2());
+      errs() << "TO FULLY CONNECTED LAYER 2\n";
+    } break;
+    default:
+      Mch->setCurrent(new NoPattern());
+      errs() << "TO NO PATERN\n";
+      break;
     }
   } else {
     Mch->setCurrent(new NoPattern());
@@ -387,51 +357,45 @@ void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
   if (II) { // Not end of instruction stream
     errs() << "FULLY CONNECTED LAYER 2\n";
     switch (II->getIntrinsicID()) {
-      case Intrinsic::hpvm_tensor_tanh:
-        {
-        // Type of activation : TanH
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+    case Intrinsic::hpvm_tensor_tanh: {
+      // Type of activation : TanH
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 0));
 
-        Mch->addIntrinsicInst(II);
+      Mch->addIntrinsicInst(II);
 
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        errs() << "TO FULLY CONNECTED LAYER 3\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_relu:
-        {
-        // Type of activation : ReLU
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
+      Mch->setCurrent(new FullyConnectedLayer_3());
+      errs() << "TO FULLY CONNECTED LAYER 3\n";
+    } break;
+    case Intrinsic::hpvm_tensor_relu: {
+      // Type of activation : ReLU
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 1));
 
-        Mch->addIntrinsicInst(II);
+      Mch->addIntrinsicInst(II);
 
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        errs() << "TO FULLY CONNECTED LAYER 3\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_clipped_relu:
-        {
-        // Type of activation : Clipped ReLU
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        errs() << "TO FULLY CONNECTED LAYER 3\n";
-        }
-        break;
-      default: // No activation, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        errs() << "TO NO PATTERN\n";
-        break;
+      Mch->setCurrent(new FullyConnectedLayer_3());
+      errs() << "TO FULLY CONNECTED LAYER 3\n";
+    } break;
+    case Intrinsic::hpvm_tensor_clipped_relu: {
+      // Type of activation : Clipped ReLU
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 2));
+
+      Mch->addIntrinsicInst(II);
+
+      Mch->setCurrent(new FullyConnectedLayer_3());
+      errs() << "TO FULLY CONNECTED LAYER 3\n";
+    } break;
+    default: // No activation, but HPVM intrinsic
+      Mch->setCurrent(new NoPattern());
+      errs() << "TO NO PATTERN\n";
+      break;
     }
   } else { // End of instruction stream
     // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
+    Mch->addArgument(
+        ConstantInt::get(Type::getInt32Ty(Mch->getModule()->getContext()), -1));
 
     Mch->setCurrent(new FullyConnectedLayer());
     errs() << "TO FULLY CONNECTED LAYER\n";
@@ -457,10 +421,10 @@ void FullyConnectedLayer::transition(CodeGenStateMachine *Mch,
   if (II) { // Not end of instruction stream
     errs() << "FULLY CONNECTED LAYER\n";
     Mch->setCurrent(new NoPattern());
-     errs() << "TO NO PATTERN\n";
+    errs() << "TO NO PATTERN\n";
     delete this;
   }
-   errs() << "TO NO CHANGE\n";
+  errs() << "TO NO CHANGE\n";
 }
 
 void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
@@ -468,33 +432,31 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
   if (II) { // Not end of instruction stream
     errs() << "CONVOLUTION LAYER 1\n";
     switch (II->getIntrinsicID()) {
-      case Intrinsic::hpvm_tensor_add:
-        {
-        IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
-        assert((ConvII == II->getOperand(0)) &&
-               "Output of conv must be used as 1st operand of add");
-        Mch->addIntrinsicInst(II);
-
-        Mch->addArgument(II->getOperand(1));     // bias
-
-        Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
-
-        Mch->setCurrent(new ConvolutionLayer_2());
-         errs() << "TO CONVOLUTION LAYER 2\n";
-        }
-        break;
-      default:
-        Mch->setCurrent(new NoPattern());
-        errs() << "TO NO PATTERN\n";
-        break;
+    case Intrinsic::hpvm_tensor_add: {
+      IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
+      assert((ConvII == II->getOperand(0)) &&
+             "Output of conv must be used as 1st operand of add");
+      Mch->addIntrinsicInst(II);
+
+      Mch->addArgument(II->getOperand(1)); // bias
+
+      Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv
+      Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv
+      Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv
+      Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
+
+      Mch->setCurrent(new ConvolutionLayer_2());
+      errs() << "TO CONVOLUTION LAYER 2\n";
+    } break;
+    default:
+      Mch->setCurrent(new NoPattern());
+      errs() << "TO NO PATTERN\n";
+      break;
     }
   } else {
     // No addition
     Mch->addArgument(ConstantPointerNull::get(
-                     Type::getInt8PtrTy(Mch->getModule()->getContext())));
+        Type::getInt8PtrTy(Mch->getModule()->getContext())));
 
     // Zero for all convolution numeric arguments FIXME???
     IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
@@ -504,28 +466,32 @@ void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
     Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
 
     //    Mch->addArgument(ConstantInt::get(
-    //                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+    //                     Type::getInt32Ty(Mch->getModule()->getContext()),
+    //                     0));
     //    Mch->addArgument(ConstantInt::get(
-    //                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+    //                     Type::getInt32Ty(Mch->getModule()->getContext()),
+    //                     0));
     //    Mch->addArgument(ConstantInt::get(
-    //                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+    //                     Type::getInt32Ty(Mch->getModule()->getContext()),
+    //                     0));
     //    Mch->addArgument(ConstantInt::get(
-    //                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+    //                     Type::getInt32Ty(Mch->getModule()->getContext()),
+    //                     0));
 
     // No pooling
     // 0 for unused pool arguments:
     // pool_id, pool_size_v, pool_size_h, pool pad_v,
     // pool_pad_h, pool_stride_v, pool_stride_h
     for (int i = 0; i < 7; i++) {
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 0));
     }
     // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
+    Mch->addArgument(
+        ConstantInt::get(Type::getInt32Ty(Mch->getModule()->getContext()), -1));
 
     Mch->setCurrent(new ConvolutionLayer());
-   errs() << "TO CONVOLUTION LAYER\n";
+    errs() << "TO CONVOLUTION LAYER\n";
   }
   delete this;
 }
@@ -535,100 +501,91 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
   if (II) { // Not end of instruction stream
     errs() << "CONVOLUTION LAYER 2\n";
     switch (II->getIntrinsicID()) {
-      case Intrinsic::hpvm_tensor_tanh:
-        {
-	  // Type of activation : TanH
-	  //        Mch->addArgument(ConstantInt::get(
-	  //                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        errs() << "TO CONVOLUTION LAYER 3\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_relu:
-        {
-	  // Type of activation : ReLU
-	  //        Mch->addArgument(ConstantInt::get(
-	  //                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        errs() << "TO CONVOLUTION LAYER 3\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_clipped_relu:
-        {
-	  // Type of activation : Clipped ReLU
-	  //        Mch->addArgument(ConstantInt::get(
-	  //                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        errs() << "TO CONVOLUTION LAYER 3\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_pool_max:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        // pool_size_v, pool_size_h, pool pad_v,
-        // pool_pad_h, pool_stride_v, pool_stride_h
-        for (int i = 1; i < 7; i++) {
-            Mch->addArgument(II->getOperand(i));
-        }
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        errs() << "TO CONVOLUTION LAYER 4\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_pool_min:
-        {
-        // pool min FIXME: 2: supported?
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        // pool_size_v, pool_size_h, pool pad_v,
-        // pool_pad_h, pool_stride_v, pool_stride_h
-        for (int i = 1; i < 7; i++) {
-            Mch->addArgument(II->getOperand(i));
-        }
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        errs() << "TO CONVOLUTION LAYER 4\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_pool_mean:
-        {
-        // pool mean
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        // pool_size_v, pool_size_h, pool pad_v,
-        // pool_pad_h, pool_stride_v, pool_stride_h
-        for (int i = 1; i < 7; i++) {
-            Mch->addArgument(II->getOperand(i));
-        }
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        errs() << "TO CONVOLUTION LAYER 4\n";
-        }
-        break;
-      default: // No activation, No pooling, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        errs() << "TO NO PATTERN\n";
-        break;
+    case Intrinsic::hpvm_tensor_tanh: {
+      // Type of activation : TanH
+      //        Mch->addArgument(ConstantInt::get(
+      //                         Type::getInt32Ty(Mch->getModule()->getContext()),
+      //                         0));
+      Mch->addIntrinsicInst(II);
+
+      Mch->setCurrent(new ConvolutionLayer_3());
+      errs() << "TO CONVOLUTION LAYER 3\n";
+    } break;
+    case Intrinsic::hpvm_tensor_relu: {
+      // Type of activation : ReLU
+      //        Mch->addArgument(ConstantInt::get(
+      //                         Type::getInt32Ty(Mch->getModule()->getContext()),
+      //                         1));
+      Mch->addIntrinsicInst(II);
+
+      Mch->setCurrent(new ConvolutionLayer_3());
+      errs() << "TO CONVOLUTION LAYER 3\n";
+    } break;
+    case Intrinsic::hpvm_tensor_clipped_relu: {
+      // Type of activation : Clipped ReLU
+      //        Mch->addArgument(ConstantInt::get(
+      //                         Type::getInt32Ty(Mch->getModule()->getContext()),
+      //                         2));
+      Mch->addIntrinsicInst(II);
+
+      Mch->setCurrent(new ConvolutionLayer_3());
+      errs() << "TO CONVOLUTION LAYER 3\n";
+    } break;
+    case Intrinsic::hpvm_tensor_pool_max: {
+      // pool max
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+      // pool_size_v, pool_size_h, pool pad_v,
+      // pool_pad_h, pool_stride_v, pool_stride_h
+      for (int i = 1; i < 7; i++) {
+        Mch->addArgument(II->getOperand(i));
+      }
+      // No activation
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), -1));
+      Mch->addIntrinsicInst(II);
+
+      Mch->setCurrent(new ConvolutionLayer_4());
+      errs() << "TO CONVOLUTION LAYER 4\n";
+    } break;
+    case Intrinsic::hpvm_tensor_pool_min: {
+      // pool min FIXME: 2: supported?
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 2));
+      // pool_size_v, pool_size_h, pool pad_v,
+      // pool_pad_h, pool_stride_v, pool_stride_h
+      for (int i = 1; i < 7; i++) {
+        Mch->addArgument(II->getOperand(i));
+      }
+      // No activation
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), -1));
+      Mch->addIntrinsicInst(II);
+
+      Mch->setCurrent(new ConvolutionLayer_4());
+      errs() << "TO CONVOLUTION LAYER 4\n";
+    } break;
+    case Intrinsic::hpvm_tensor_pool_mean: {
+      // pool mean
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 1));
+      // pool_size_v, pool_size_h, pool pad_v,
+      // pool_pad_h, pool_stride_v, pool_stride_h
+      for (int i = 1; i < 7; i++) {
+        Mch->addArgument(II->getOperand(i));
+      }
+      // No activation
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), -1));
+      Mch->addIntrinsicInst(II);
+
+      Mch->setCurrent(new ConvolutionLayer_4());
+      errs() << "TO CONVOLUTION LAYER 4\n";
+    } break;
+    default: // No activation, No pooling, but HPVM intrinsic
+      Mch->setCurrent(new NoPattern());
+      errs() << "TO NO PATTERN\n";
+      break;
     }
   } else { // End of instruction stream
     // No pooling
@@ -636,12 +593,12 @@ void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
     // pool_id, pool_size_v, pool_size_h, pool pad_v,
     // pool_pad_h, pool_stride_v, pool_stride_h
     for (int i = 0; i < 7; i++) {
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 0));
     }
     // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
+    Mch->addArgument(
+        ConstantInt::get(Type::getInt32Ty(Mch->getModule()->getContext()), -1));
 
     Mch->setCurrent(new ConvolutionLayer());
     errs() << "TO CONVOLUTION LAYER\n";
@@ -654,104 +611,98 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
   if (II) { // Not end of instruction stream
     errs() << "CONVOLUTION LAYER 3\n";
     switch (II->getIntrinsicID()) {
-      case Intrinsic::hpvm_tensor_pool_max:
-        {
-        // pool max
+    case Intrinsic::hpvm_tensor_pool_max: {
+      // pool max
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+      // pool_size_v, pool_size_h, pool pad_v,
+      // pool_pad_h, pool_stride_v, pool_stride_h
+      for (int i = 1; i < 7; i++) {
+        Mch->addArgument(II->getOperand(i));
+      }
+      Mch->addIntrinsicInst(II);
+
+      // Revisit last intrinsic, to add argument for activation operation
+      IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
+      // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
+      Intrinsic::ID ActIID = ActII->getIntrinsicID();
+      if (ActIID == Intrinsic::hpvm_tensor_tanh) {
+        Mch->addArgument(ConstantInt::get(
+            Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+      } else if (ActIID == Intrinsic::hpvm_tensor_relu) {
+        Mch->addArgument(ConstantInt::get(
+            Type::getInt32Ty(Mch->getModule()->getContext()), 1));
+      } else { // ActIID == Intrinsic::hpvm_tensor_clipped_relu
+        Mch->addArgument(ConstantInt::get(
+            Type::getInt32Ty(Mch->getModule()->getContext()), 2));
+      }
+
+      Mch->setCurrent(new ConvolutionLayer_4());
+      errs() << "TO CONVOLUTION LAYER 4\n";
+    } break;
+    case Intrinsic::hpvm_tensor_pool_min: {
+      // pool min FIXME: 2: supported?
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 2));
+
+      // pool_size_v, pool_size_h, pool pad_v,
+      // pool_pad_h, pool_stride_v, pool_stride_h
+      for (int i = 1; i < 7; i++) {
+        Mch->addArgument(II->getOperand(i));
+      }
+      Mch->addIntrinsicInst(II);
+
+      // Revisit last intrinsic, to add argument for activation operation
+      IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
+      // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
+      Intrinsic::ID ActIID = ActII->getIntrinsicID();
+      if (ActIID == Intrinsic::hpvm_tensor_tanh) {
         Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        // pool_size_v, pool_size_h, pool pad_v,
-        // pool_pad_h, pool_stride_v, pool_stride_h
-        for (int i = 1; i < 7; i++) {
-            Mch->addArgument(II->getOperand(i));
-        }
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::hpvm_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::hpvm_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        errs() << "TO CONVOLUTION LAYER 4\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_pool_min:
-        {
-        // pool min FIXME: 2: supported?
+            Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+      } else if (ActIID == Intrinsic::hpvm_tensor_relu) {
+        Mch->addArgument(ConstantInt::get(
+            Type::getInt32Ty(Mch->getModule()->getContext()), 1));
+      } else { // ActIID == Intrinsic::hpvm_tensor_clipped_relu
+        Mch->addArgument(ConstantInt::get(
+            Type::getInt32Ty(Mch->getModule()->getContext()), 2));
+      }
+
+      Mch->setCurrent(new ConvolutionLayer_4());
+      errs() << "TO CONVOLUTION LAYER 4\n";
+    } break;
+    case Intrinsic::hpvm_tensor_pool_mean: {
+      // pool max
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 1));
+      // pool_size_v, pool_size_h, pool pad_v,
+      // pool_pad_h, pool_stride_v, pool_stride_h
+      for (int i = 1; i < 7; i++) {
+        Mch->addArgument(II->getOperand(i));
+      }
+      Mch->addIntrinsicInst(II);
+
+      // Revisit last intrinsic, to add argument for activation operation
+      IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
+      // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
+      Intrinsic::ID ActIID = ActII->getIntrinsicID();
+      if (ActIID == Intrinsic::hpvm_tensor_tanh) {
+        Mch->addArgument(ConstantInt::get(
+            Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+      } else if (ActIID == Intrinsic::hpvm_tensor_relu) {
         Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-	
-        // pool_size_v, pool_size_h, pool pad_v,
-        // pool_pad_h, pool_stride_v, pool_stride_h
-        for (int i = 1; i < 7; i++) {
-            Mch->addArgument(II->getOperand(i));
-        }
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::hpvm_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::hpvm_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        errs() << "TO CONVOLUTION LAYER 4\n";
-        }
-        break;
-      case Intrinsic::hpvm_tensor_pool_mean:
-        {
-        // pool max
+            Type::getInt32Ty(Mch->getModule()->getContext()), 1));
+      } else { // ActIID == Intrinsic::hpvm_tensor_clipped_relu
         Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        // pool_size_v, pool_size_h, pool pad_v,
-        // pool_pad_h, pool_stride_v, pool_stride_h
-        for (int i = 1; i < 7; i++) {
-            Mch->addArgument(II->getOperand(i));
-        }
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::hpvm_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::hpvm_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        errs() << "TO CONVOLUTION LAYER 4\n";
-        }
-        break;
-      default: // No pooling, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        errs() << "TO NO PATTERN\n";
-        break;
+            Type::getInt32Ty(Mch->getModule()->getContext()), 2));
+      }
+
+      Mch->setCurrent(new ConvolutionLayer_4());
+      errs() << "TO CONVOLUTION LAYER 4\n";
+    } break;
+    default: // No pooling, but HPVM intrinsic
+      Mch->setCurrent(new NoPattern());
+      errs() << "TO NO PATTERN\n";
+      break;
     }
   } else { // End of instruction stream
     // No pooling
@@ -759,8 +710,8 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
     // pool_id, pool_size_v, pool_size_h, pool pad_v,
     // pool_pad_h, pool_stride_v, pool_stride_h
     for (int i = 0; i < 7; i++) {
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 0));
     }
 
     // Revisit last intrinsic, to add argument for activation operation
@@ -769,17 +720,17 @@ void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
     Intrinsic::ID ActIID = ActII->getIntrinsicID();
     if (ActIID == Intrinsic::hpvm_tensor_tanh) {
       Mch->addArgument(ConstantInt::get(
-                       Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-      } else if (ActIID == Intrinsic::hpvm_tensor_relu) {
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-      } else { //ActIID == Intrinsic::hpvm_tensor_clipped_relu
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-      }
+          Type::getInt32Ty(Mch->getModule()->getContext()), 0));
+    } else if (ActIID == Intrinsic::hpvm_tensor_relu) {
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 1));
+    } else { // ActIID == Intrinsic::hpvm_tensor_clipped_relu
+      Mch->addArgument(ConstantInt::get(
+          Type::getInt32Ty(Mch->getModule()->getContext()), 2));
+    }
 
-     Mch->setCurrent(new ConvolutionLayer());
-     errs() << "TO CONVOLUTION LAYER\n";
+    Mch->setCurrent(new ConvolutionLayer());
+    errs() << "TO CONVOLUTION LAYER\n";
   }
   delete this;
 }
@@ -797,8 +748,7 @@ void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch,
   delete this;
 }
 
-void ConvolutionLayer::transition(CodeGenStateMachine *Mch,
-                                  IntrinsicInst *II) {
+void ConvolutionLayer::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
   if (II) { // Not end of instruction stream
     errs() << "CONVOLUTION LAYER\n";
     Mch->setCurrent(new NoPattern());
@@ -810,8 +760,8 @@ void ConvolutionLayer::transition(CodeGenStateMachine *Mch,
 
 void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {}
 
-CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM) :
-  M(_M), RtM(_RtM) {
+CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM)
+    : M(_M), RtM(_RtM) {
   current = new InitialState();
 }
 
@@ -819,14 +769,17 @@ void CodeGenStateMachine::transition(IntrinsicInst *II) {
   current->transition(this, II);
 }
 
-void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRef,
-                                  InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) {
+void CodeGenStateMachine::codeGen(
+    DFNode *N, Function *F, const StringRef &strRef,
+    InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) {
 
-  errs() << "TRANSITIONTED TO: " << std::to_string(current->getStateID()) << "\n";
-  assert( ( (current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) ||
-            (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)     ||
-            (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION) ) &&
-            "Unsupported instruction sequence for the Wrapper API.\n" );
+  errs() << "TRANSITIONTED TO: " << std::to_string(current->getStateID())
+         << "\n";
+  assert(
+      ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) ||
+       (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER) ||
+       (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION)) &&
+      "Unsupported instruction sequence for the Wrapper API.\n");
 
   if ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) ||
       (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)) {
@@ -836,90 +789,90 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe
     // We have a valid instruction sequence.
     // Make sure that the instruction sequence can be traslated:
     // each instruction's result must be used only by the next one in sequence.
-  
-    for (unsigned p = 0; p < IIs.size()-1; p++) {
+
+    for (unsigned p = 0; p < IIs.size() - 1; p++) {
       IntrinsicInst *II = IIs[p];
       assert((II->hasOneUse()) &&
-            "Instruction sequence does not fit pattern: not single use\n");
-  
+             "Instruction sequence does not fit pattern: not single use\n");
+
       Value::user_iterator ui = II->user_begin(); // The only use
-      assert((*ui == IIs[p+1]) &&
-             "Instruction sequence does not fit pattern: not used by next instruction\n");
+      assert((*ui == IIs[p + 1]) && "Instruction sequence does not fit "
+                                    "pattern: not used by next instruction\n");
     }
 
     // Create corresponding wrapper API call
     CallInst *CI;
     switch (current->getStateID()) {
-      case AbstractState::ID::CONVOLUTION_LAYER:
-        {
-          FunctionCallee wrapper_ConvLayer2 =
-            M->getOrInsertFunction(StringRef("wrapper_ConvLayer2"),
-                   RtM->getFunction(StringRef("wrapper_ConvLayer2"))->getFunctionType());
-	  
-  
-          // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0
-          Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0));
-          Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0));
-
-
-          // Create string for node name, as first argument for wrapper API call
-          Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                              strRef, true);
-          GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                                 true, GlobalValue::ExternalLinkage, ConstArray, "");
-
-          // Create GEP expression to access it
-          Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-          Constant* GEPIndices[] = { Int_0, Int_0 };
-          Constant* GEPConst =
-            ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                           GV, GEPIndices);
-
-          std::vector<Value*> UpdatedArgs;
-          UpdatedArgs.push_back(GEPConst);
-          for (unsigned i = 0; i < Args.size(); i++) {
-            UpdatedArgs.push_back(Args[i]);
-          }
-          // Create wrapper API function call
-          CI = CallInst::Create(wrapper_ConvLayer2, UpdatedArgs, "");
-        }
-        break;
-      case AbstractState::ID::FULLY_CONNECTED_LAYER:
-        {
-          FunctionCallee wrapper_FCLayer =
-            M->getOrInsertFunction(StringRef("wrapper_FCLayer"),
-                RtM->getFunction(StringRef("wrapper_FCLayer"))->getFunctionType());
-  
-          // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0
-          Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0));
-          Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0));
-
-          // Create string for node name, as first argument for wrapper API call
-          Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                              strRef, true);
-          GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                                 true, GlobalValue::ExternalLinkage, ConstArray, "");
-
-          // Create GEP expression to access it
-          Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-          Constant* GEPIndices[] = { Int_0, Int_0 };
-          Constant* GEPConst =
-            ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                           GV, GEPIndices);
-
-          std::vector<Value*> UpdatedArgs;
-          UpdatedArgs.push_back(GEPConst);
-          for (unsigned i = 0; i < Args.size(); i++) {
-            UpdatedArgs.push_back(Args[i]);
-          }
-
-          // Create wrapper API function call
-          CI = CallInst::Create(wrapper_FCLayer, UpdatedArgs, "");
-        }
-        break;
-      default:
-        llvm_unreachable("Unexpected CodeGenStateMachine State\n");
-        break;
+    case AbstractState::ID::CONVOLUTION_LAYER: {
+      FunctionCallee wrapper_ConvLayer2 = M->getOrInsertFunction(
+          StringRef("wrapper_ConvLayer2"),
+          RtM->getFunction(StringRef("wrapper_ConvLayer2"))->getFunctionType());
+
+      // FIXME: get last (float) arguments from clipped relu intrinsic. For now,
+      // 0
+      Args.push_back(
+          ConstantFP::get(Type::getFloatTy(M->getContext()), (double)0));
+      Args.push_back(
+          ConstantFP::get(Type::getFloatTy(M->getContext()), (double)0));
+
+      // Create string for node name, as first argument for wrapper API call
+      Constant *ConstArray =
+          ConstantDataArray::getString(M->getContext(), strRef, true);
+      GlobalVariable *GV =
+          new GlobalVariable(*M, ConstArray->getType(), true,
+                             GlobalValue::ExternalLinkage, ConstArray, "");
+
+      // Create GEP expression to access it
+      Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+      Constant *GEPIndices[] = {Int_0, Int_0};
+      Constant *GEPConst = ConstantExpr::getGetElementPtr(
+          GV->getType()->getPointerElementType(), GV, GEPIndices);
+
+      std::vector<Value *> UpdatedArgs;
+      UpdatedArgs.push_back(GEPConst);
+      for (unsigned i = 0; i < Args.size(); i++) {
+        UpdatedArgs.push_back(Args[i]);
+      }
+      // Create wrapper API function call
+      CI = CallInst::Create(wrapper_ConvLayer2, UpdatedArgs, "");
+    } break;
+    case AbstractState::ID::FULLY_CONNECTED_LAYER: {
+      FunctionCallee wrapper_FCLayer = M->getOrInsertFunction(
+          StringRef("wrapper_FCLayer"),
+          RtM->getFunction(StringRef("wrapper_FCLayer"))->getFunctionType());
+
+      // FIXME: get last (float) arguments from clipped relu intrinsic. For now,
+      // 0
+      Args.push_back(
+          ConstantFP::get(Type::getFloatTy(M->getContext()), (double)0));
+      Args.push_back(
+          ConstantFP::get(Type::getFloatTy(M->getContext()), (double)0));
+
+      // Create string for node name, as first argument for wrapper API call
+      Constant *ConstArray =
+          ConstantDataArray::getString(M->getContext(), strRef, true);
+      GlobalVariable *GV =
+          new GlobalVariable(*M, ConstArray->getType(), true,
+                             GlobalValue::ExternalLinkage, ConstArray, "");
+
+      // Create GEP expression to access it
+      Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+      Constant *GEPIndices[] = {Int_0, Int_0};
+      Constant *GEPConst = ConstantExpr::getGetElementPtr(
+          GV->getType()->getPointerElementType(), GV, GEPIndices);
+
+      std::vector<Value *> UpdatedArgs;
+      UpdatedArgs.push_back(GEPConst);
+      for (unsigned i = 0; i < Args.size(); i++) {
+        UpdatedArgs.push_back(Args[i]);
+      }
+
+      // Create wrapper API function call
+      CI = CallInst::Create(wrapper_FCLayer, UpdatedArgs, "");
+    } break;
+    default:
+      llvm_unreachable("Unexpected CodeGenStateMachine State\n");
+      break;
     }
 
     // Insert new call and replace all uses of pattern result with
@@ -928,326 +881,328 @@ void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRe
     CI->insertBefore(IIlast);
     IIlast->replaceAllUsesWith(CI);
 
-  }
-  else { // SINGLE_TENSOR_OPERATION
+  } else { // SINGLE_TENSOR_OPERATION
     assert((IIs.size() == 1) &&
-            "Unexpected size of intrinsics vector in code gen state machine.\n");
-    assert(Args.empty() && "Unexpected arguments found in coge gen state machine.\n");
+           "Unexpected size of intrinsics vector in code gen state machine.\n");
+    assert(Args.empty() &&
+           "Unexpected arguments found in coge gen state machine.\n");
     IntrinsicInst *TensorII = IIs[0];
 
     errs() << "TensorII: " << *TensorII << "\n";
 
     switch (TensorII->getIntrinsicID()) {
-      case Intrinsic::hpvm_tensor_group_convolution:
-      { /* llvm.hpvm.tensor.group.conv */
-        // Tensor group conv is not in place.
-        DEBUG(errs() << F->getName() << "\t: Handling tensor group convolution \n");
-
-        // Argument list for the runtime call
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-        Args.push_back(TensorII->getOperand(1));
-        Args.push_back(TensorII->getOperand(2));
-        Args.push_back(TensorII->getOperand(3));
-        Args.push_back(TensorII->getOperand(4));
-        Args.push_back(TensorII->getOperand(5));
-
-        Constant *conv_mode = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1);
-        Args.push_back(conv_mode);
-
-        Args.push_back(TensorII->getOperand(7));
-    
-        // Create wrapper API runtime function call
-        FunctionCallee wrapper_tensorGroupConvolution =
-          M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"),
-            RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType());
-        CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution,
-                                        Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.mul with the runtime call
-        TensorII->replaceAllUsesWith(CI);
+    case Intrinsic::hpvm_tensor_group_convolution: { /* llvm.hpvm.tensor.group.conv
+                                                      */
+      // Tensor group conv is not in place.
+      DEBUG(errs() << F->getName()
+                   << "\t: Handling tensor group convolution \n");
+
+      // Argument list for the runtime call
+
+      // Create string for node name, as first argument for wrapper API call
+      Constant *ConstArray =
+          ConstantDataArray::getString(M->getContext(), strRef, true);
+      GlobalVariable *GV =
+          new GlobalVariable(*M, ConstArray->getType(), true,
+                             GlobalValue::ExternalLinkage, ConstArray, "");
+      // Create GEP expression to access it
+      Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+      Constant *GEPIndices[] = {Int_0, Int_0};
+      Constant *GEPConst = ConstantExpr::getGetElementPtr(
+          GV->getType()->getPointerElementType(), GV, GEPIndices);
+
+      Args.push_back(GEPConst);
+
+      Args.push_back(TensorII->getOperand(0));
+      Args.push_back(TensorII->getOperand(1));
+      Args.push_back(TensorII->getOperand(2));
+      Args.push_back(TensorII->getOperand(3));
+      Args.push_back(TensorII->getOperand(4));
+      Args.push_back(TensorII->getOperand(5));
+
+      Constant *conv_mode =
+          ConstantInt::get(Type::getInt32Ty(M->getContext()), 1);
+      Args.push_back(conv_mode);
+
+      Args.push_back(TensorII->getOperand(7));
+
+      // Create wrapper API runtime function call
+      FunctionCallee wrapper_tensorGroupConvolution = M->getOrInsertFunction(
+          StringRef("wrapper_tensorGroupConvolution"),
+          RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))
+              ->getFunctionType());
+      CallInst *CI =
+          CallInst::Create(wrapper_tensorGroupConvolution, Args, "", TensorII);
+      // We can replace the call to hpvm.tensor.mul with the runtime call
+      TensorII->replaceAllUsesWith(CI);
+    } break;
+
+    case Intrinsic::hpvm_tensor_batchnorm: { /* llvm.hpvm.tensor.batchnorm */
+
+      // Tensor batchnorm is not in place.
+      // FIXME: Add Check for InPlace Analysis
+      DEBUG(errs() << F->getName()
+                   << "\t: Handling tensor batch normalization \n");
+
+      // Argument list for the runtime call
+
+      // Create string for node name, as first argument for wrapper API call
+      Constant *ConstArray =
+          ConstantDataArray::getString(M->getContext(), strRef, true);
+      GlobalVariable *GV =
+          new GlobalVariable(*M, ConstArray->getType(), true,
+                             GlobalValue::ExternalLinkage, ConstArray, "");
+      // Create GEP expression to access it
+      Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+      Constant *GEPIndices[] = {Int_0, Int_0};
+      Constant *GEPConst = ConstantExpr::getGetElementPtr(
+          GV->getType()->getPointerElementType(), GV, GEPIndices);
+
+      Args.push_back(GEPConst);
+
+      Args.push_back(TensorII->getOperand(0));
+      Args.push_back(TensorII->getOperand(1));
+      Args.push_back(TensorII->getOperand(2));
+      Args.push_back(TensorII->getOperand(3));
+      Args.push_back(TensorII->getOperand(4));
+      Args.push_back(TensorII->getOperand(5));
+
+      // Create wrapper API runtime function call
+      FunctionCallee wrapper_tensorBatchNorm = M->getOrInsertFunction(
+          StringRef("wrapper_tensorBatchNorm"),
+          RtM->getFunction(StringRef("wrapper_tensorBatchNorm"))
+              ->getFunctionType());
+      CallInst *CI =
+          CallInst::Create(wrapper_tensorBatchNorm, Args, "", TensorII);
+      // We can replace the call to hpvm.tensor.batchnorm with the wrapper API
+      // call
+      TensorII->replaceAllUsesWith(CI);
+    } break;
+
+    case Intrinsic::hpvm_tensor_add: { /* llvm.hpvm.tensor.add */
+      DEBUG(errs() << F->getName() << "\t: Handling tensorAdd\n");
+
+      // Tensor add(a,b) is in place for argument a.
+      //        Value *Op = TensorII->getOperand(0);
+      // Test the intrinsic operand for in place operation.
+      //        bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
+
+      // Code generation will not continue if this is false, because the target
+      // may provide an in place operation(safe choice)
+      // FIXME: remove this comment - must check for in-place
+      //        assert(inplace &&
+      //               "Operand not valid for in place operation. Code gen
+      //               aborted.\n");
+
+      // Argument list for the runtime call
+
+      // Create string for node name, as first argument for wrapper API call
+      Constant *ConstArray =
+          ConstantDataArray::getString(M->getContext(), strRef, true);
+      GlobalVariable *GV =
+          new GlobalVariable(*M, ConstArray->getType(), true,
+                             GlobalValue::ExternalLinkage, ConstArray, "");
+      // Create GEP expression to access it
+      Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+      Constant *GEPIndices[] = {Int_0, Int_0};
+      Constant *GEPConst = ConstantExpr::getGetElementPtr(
+          GV->getType()->getPointerElementType(), GV, GEPIndices);
+
+      Args.push_back(GEPConst);
+
+      Args.push_back(TensorII->getOperand(0));
+      Args.push_back(TensorII->getOperand(1));
+
+      // Create wrapper API runtime function call
+      FunctionCallee wrapper_tensorAdd = M->getOrInsertFunction(
+          StringRef("wrapper_tensorAdd"),
+          RtM->getFunction(StringRef("wrapper_tensorAdd"))->getFunctionType());
+      CallInst::Create(wrapper_tensorAdd, Args, "", TensorII);
+      // We can replace the call to hpvm.tensor.add with the 1st argument
+      // that, due to in place operation, now contains the result
+      TensorII->replaceAllUsesWith(TensorII->getOperand(0));
+    } break;
+
+    case Intrinsic::hpvm_tensor_pool_max:
+    case Intrinsic::hpvm_tensor_pool_mean:
+    case Intrinsic::hpvm_tensor_pool_min: {
+      DEBUG(errs() << F->getName()
+                   << "\t: Handling tensor pooling functions\n");
+
+      // Argument list for tensor pooling:
+      // input, poolFunction, window_height, window_width,
+      // vertical_pad, horizontal_pad, vertical_stride, horizontal_stride
+
+      // Create string for node name, as first argument for wrapper API call
+      Constant *ConstArray =
+          ConstantDataArray::getString(M->getContext(), strRef, true);
+      GlobalVariable *GV =
+          new GlobalVariable(*M, ConstArray->getType(), true,
+                             GlobalValue::ExternalLinkage, ConstArray, "");
+      // Create GEP expression to access it
+      Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+      Constant *GEPIndices[] = {Int_0, Int_0};
+      Constant *GEPConst = ConstantExpr::getGetElementPtr(
+          GV->getType()->getPointerElementType(), GV, GEPIndices);
+
+      Args.push_back(GEPConst);
+
+      Args.push_back(TensorII->getOperand(0));
+
+      int pool_type = 0;
+      if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) {
+        pool_type = 0;
       }
-      break;
-
-      case Intrinsic::hpvm_tensor_batchnorm:
-      { /* llvm.hpvm.tensor.batchnorm */
-
-        // Tensor batchnorm is not in place.
-	// FIXME: Add Check for InPlace Analysis 
-        DEBUG(errs() << F->getName() << "\t: Handling tensor batch normalization \n");
-
-        // Argument list for the runtime call
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-        Args.push_back(TensorII->getOperand(1));
-        Args.push_back(TensorII->getOperand(2));
-        Args.push_back(TensorII->getOperand(3));
-        Args.push_back(TensorII->getOperand(4));
-        Args.push_back(TensorII->getOperand(5));
-
-        // Create wrapper API runtime function call
-        FunctionCallee wrapper_tensorBatchNorm =
-          M->getOrInsertFunction(StringRef("wrapper_tensorBatchNorm"),
-            RtM->getFunction(StringRef("wrapper_tensorBatchNorm"))->getFunctionType());
-        CallInst* CI = CallInst::Create(wrapper_tensorBatchNorm,
-                                        Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.batchnorm with the wrapper API call
-        TensorII->replaceAllUsesWith(CI);
+      if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) {
+        pool_type = 1;
       }
-      break;
-
-      case Intrinsic::hpvm_tensor_add:
-      { /* llvm.hpvm.tensor.add */
-        DEBUG(errs() << F->getName() << "\t: Handling tensorAdd\n");
-
-	// Tensor add(a,b) is in place for argument a.
-	//        Value *Op = TensorII->getOperand(0);
-        // Test the intrinsic operand for in place operation.
-	//        bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
-
-        // Code generation will not continue if this is false, because the target
-        // may provide an in place operation(safe choice)
-        // FIXME: remove this comment - must check for in-place
-	//        assert(inplace &&
-	//               "Operand not valid for in place operation. Code gen aborted.\n");
-
-
-        // Argument list for the runtime call
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-        Args.push_back(TensorII->getOperand(1));
-
-        // Create wrapper API runtime function call
-        FunctionCallee wrapper_tensorAdd =
-          M->getOrInsertFunction(StringRef("wrapper_tensorAdd"),
-            RtM->getFunction(StringRef("wrapper_tensorAdd"))->getFunctionType());
-        CallInst::Create(wrapper_tensorAdd, Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.add with the 1st argument
-        // that, due to in place operation, now contains the result
-        TensorII->replaceAllUsesWith(TensorII->getOperand(0));
+      if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_min) {
+        pool_type = 2;
       }
-      break;
 
-      case Intrinsic::hpvm_tensor_pool_max:
-      case Intrinsic::hpvm_tensor_pool_mean:
-      case Intrinsic::hpvm_tensor_pool_min:
-      {
-        DEBUG(errs() << F->getName() << "\t: Handling tensor pooling functions\n");
-
-        // Argument list for tensor pooling:
-        // input, poolFunction, window_height, window_width,
-        // vertical_pad, horizontal_pad, vertical_stride, horizontal_stride
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-
-        int pool_type = 0;
-        if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) {
-          pool_type = 0;
-        }
-        if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) {
-          pool_type = 1;
-        }
-        if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_min) {
-          pool_type = 2;
-        }
-
-        Constant *constPoolType =
+      Constant *constPoolType =
           ConstantInt::get(Type::getInt32Ty(M->getContext()), pool_type);
-        Args.push_back(constPoolType);
-
-        Args.push_back(TensorII->getOperand(1));
-        Args.push_back(TensorII->getOperand(2));
-        Args.push_back(TensorII->getOperand(3));
-        Args.push_back(TensorII->getOperand(4));
-        Args.push_back(TensorII->getOperand(5));
-        Args.push_back(TensorII->getOperand(6));
-
+      Args.push_back(constPoolType);
+
+      Args.push_back(TensorII->getOperand(1));
+      Args.push_back(TensorII->getOperand(2));
+      Args.push_back(TensorII->getOperand(3));
+      Args.push_back(TensorII->getOperand(4));
+      Args.push_back(TensorII->getOperand(5));
+      Args.push_back(TensorII->getOperand(6));
+
+      // Create wrapper API runtime function call
+      FunctionCallee wrapper_tensorPooling = M->getOrInsertFunction(
+          StringRef("wrapper_tensorPooling"),
+          RtM->getFunction(StringRef("wrapper_tensorPooling"))
+              ->getFunctionType());
+      CallInst *CI =
+          CallInst::Create(wrapper_tensorPooling, Args, "", TensorII);
+
+      // Replacing intrinsic result uses with the result of the tensor runtime
+      // operation
+      TensorII->replaceAllUsesWith(CI);
+    } break;
+
+    case Intrinsic::hpvm_tensor_relu:
+    case Intrinsic::hpvm_tensor_clipped_relu:
+    case Intrinsic::hpvm_tensor_tanh: {
+      DEBUG(errs() << F->getName()
+                   << "\t: Handling tensor activation functions\n");
+
+      // Tensor relu(a) (and others) is in place for argument a.
+      Value *Op = TensorII->getOperand(0);
+
+      // Test the intrinsic operand for in place operation.
+      //-- bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
+      // Code generation will not continue if this is false, because the target
+      // may provide an in place operation(safe choice)
+      //-- assert(inplace &&
+      //--        "Operand not valid for in place operation. Code gen
+      //aborted.\n");
+
+      // Create string for node name, as first argument for wrapper API call
+      Constant *ConstArray =
+          ConstantDataArray::getString(M->getContext(), strRef, true);
+      GlobalVariable *GV =
+          new GlobalVariable(*M, ConstArray->getType(), true,
+                             GlobalValue::ExternalLinkage, ConstArray, "");
+      // Create GEP expression to access it
+      Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+      Constant *GEPIndices[] = {Int_0, Int_0};
+      Constant *GEPConst = ConstantExpr::getGetElementPtr(
+          GV->getType()->getPointerElementType(), GV, GEPIndices);
+
+      Args.push_back(GEPConst);
+
+      Args.push_back(TensorII->getOperand(0));
+
+      if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) {
         // Create wrapper API runtime function call
-        FunctionCallee wrapper_tensorPooling =
-          M->getOrInsertFunction(StringRef("wrapper_tensorPooling"),
-            RtM->getFunction(StringRef("wrapper_tensorPooling"))->getFunctionType());
-        CallInst* CI = CallInst::Create(wrapper_tensorPooling, Args, "", TensorII);
-
-        // Replacing intrinsic result uses with the result of the tensor runtime operation
-        TensorII->replaceAllUsesWith(CI);
-      }
-      break;
-
-      case Intrinsic::hpvm_tensor_relu:
-      case Intrinsic::hpvm_tensor_clipped_relu:
-      case Intrinsic::hpvm_tensor_tanh:
-      {
-        DEBUG(errs() << F->getName() << "\t: Handling tensor activation functions\n");
-
-        // Tensor relu(a) (and others) is in place for argument a.
-        Value *Op = TensorII->getOperand(0);
-
-        // Test the intrinsic operand for in place operation.
-        //-- bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
-        // Code generation will not continue if this is false, because the target
-        // may provide an in place operation(safe choice)
-        //-- assert(inplace &&
-        //--        "Operand not valid for in place operation. Code gen aborted.\n");
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-
-        if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) {
-              // Create wrapper API runtime function call
-              FunctionCallee wrapper_tensorRelu =
-                M->getOrInsertFunction(StringRef("wrapper_tensorRelu"),
-                  RtM->getFunction(StringRef("wrapper_tensorRelu"))->getFunctionType());
-              CallInst::Create(wrapper_tensorRelu, Args, "", TensorII);
-        }
-        else if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu) {
-              // Create wrapper API runtime function call
-              FunctionCallee wrapper_tensorClippedRelu =
-                M->getOrInsertFunction(StringRef("wrapper_tensorClippedRelu"),
-                  RtM->getFunction(StringRef("wrapper_tensorClippedRelu"))->getFunctionType());
-              CallInst::Create(wrapper_tensorClippedRelu, Args, "", TensorII);
-        }
-        else if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) {
-              // Create wrapper API runtime function call
-              FunctionCallee wrapper_tensorTanh =
-                M->getOrInsertFunction(StringRef("wrapper_tensorTanh"),
-                  RtM->getFunction(StringRef("wrapper_tensorTanh"))->getFunctionType());
-              CallInst::Create(wrapper_tensorTanh, Args, "", TensorII);
-        }
-     
-        // We can replace the call to hpvm.tensor.{relu,clipped relu, tanh}
-        //  with the 1st argument that, due to in place operation,
-        // now contains the result
-        TensorII->replaceAllUsesWith(TensorII->getOperand(0));
-      }
-      break;
-
-      case Intrinsic::hpvm_tensor_softmax:
-      { /* llvm.hpvm.tensor.softmax */
-
-        DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n");
-        // Tensor softmax(a) is in place for argument a.
-        Value *Op = TensorII->getOperand(0);
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-
+        FunctionCallee wrapper_tensorRelu = M->getOrInsertFunction(
+            StringRef("wrapper_tensorRelu"),
+            RtM->getFunction(StringRef("wrapper_tensorRelu"))
+                ->getFunctionType());
+        CallInst::Create(wrapper_tensorRelu, Args, "", TensorII);
+      } else if (TensorII->getIntrinsicID() ==
+                 Intrinsic::hpvm_tensor_clipped_relu) {
+        // Create wrapper API runtime function call
+        FunctionCallee wrapper_tensorClippedRelu = M->getOrInsertFunction(
+            StringRef("wrapper_tensorClippedRelu"),
+            RtM->getFunction(StringRef("wrapper_tensorClippedRelu"))
+                ->getFunctionType());
+        CallInst::Create(wrapper_tensorClippedRelu, Args, "", TensorII);
+      } else if (TensorII->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) {
         // Create wrapper API runtime function call
-        FunctionCallee wrapper_tensorSoftmax =
-          M->getOrInsertFunction(StringRef("wrapper_tensorSoftmax"),
-                 RtM->getFunction(StringRef("wrapper_tensorSoftmax"))->getFunctionType());
-        CallInst::Create(wrapper_tensorSoftmax, Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.softmax with the 1st argument
-        // that, due to in place operation, now contains the result
-        TensorII->replaceAllUsesWith(TensorII->getOperand(0));
+        FunctionCallee wrapper_tensorTanh = M->getOrInsertFunction(
+            StringRef("wrapper_tensorTanh"),
+            RtM->getFunction(StringRef("wrapper_tensorTanh"))
+                ->getFunctionType());
+        CallInst::Create(wrapper_tensorTanh, Args, "", TensorII);
       }
-      break;
 
-      
-      default:
-        llvm_unreachable("Unknown HPVM Intrinsic!");
-        break;
+      // We can replace the call to hpvm.tensor.{relu,clipped relu, tanh}
+      //  with the 1st argument that, due to in place operation,
+      // now contains the result
+      TensorII->replaceAllUsesWith(TensorII->getOperand(0));
+    } break;
+
+    case Intrinsic::hpvm_tensor_softmax: { /* llvm.hpvm.tensor.softmax */
+
+      DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n");
+      // Tensor softmax(a) is in place for argument a.
+      Value *Op = TensorII->getOperand(0);
+
+      // Create string for node name, as first argument for wrapper API call
+      Constant *ConstArray =
+          ConstantDataArray::getString(M->getContext(), strRef, true);
+      GlobalVariable *GV =
+          new GlobalVariable(*M, ConstArray->getType(), true,
+                             GlobalValue::ExternalLinkage, ConstArray, "");
+      // Create GEP expression to access it
+      Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
+      Constant *GEPIndices[] = {Int_0, Int_0};
+      Constant *GEPConst = ConstantExpr::getGetElementPtr(
+          GV->getType()->getPointerElementType(), GV, GEPIndices);
+
+      Args.push_back(GEPConst);
+
+      Args.push_back(TensorII->getOperand(0));
+
+      // Create wrapper API runtime function call
+      FunctionCallee wrapper_tensorSoftmax = M->getOrInsertFunction(
+          StringRef("wrapper_tensorSoftmax"),
+          RtM->getFunction(StringRef("wrapper_tensorSoftmax"))
+              ->getFunctionType());
+      CallInst::Create(wrapper_tensorSoftmax, Args, "", TensorII);
+      // We can replace the call to hpvm.tensor.softmax with the 1st argument
+      // that, due to in place operation, now contains the result
+      TensorII->replaceAllUsesWith(TensorII->getOperand(0));
+    } break;
+
+    default:
+      llvm_unreachable("Unknown HPVM Intrinsic!");
+      break;
     }
 
   } // No other case exists, since assertion passed
 
-
   // Remove the instructions we translated to the simulator call.
   // Traverse the vector backwards, otherwise definitions are deleted while
   // their subsequent uses are still around.
   for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs.rbegin(),
-       re = IIs.rend(); ri != re; ++ri) {
+                                                      re = IIs.rend();
+       ri != re; ++ri) {
     DEBUG(errs() << "Erasing: " << **ri << "\n");
     (*ri)->eraseFromParent();
   }
 
-
   for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs_remove.rbegin(),
-       re = IIs_remove.rend(); ri != re; ++ri) {
+                                                      re = IIs_remove.rend();
+       ri != re; ++ri) {
     DEBUG(errs() << "Erasing: " << **ri << "\n");
     (*ri)->eraseFromParent();
   }
-
 }
 
 // DFG2LLVM_WrapperAPI - The first implementation.
@@ -1256,11 +1211,8 @@ struct DFG2LLVM_WrapperAPI : public DFG2LLVM {
   static char ID; // Pass identification, replacement for typeid
   DFG2LLVM_WrapperAPI() : DFG2LLVM(ID) {}
 
-  
 private:
-
 public:
-
   void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequired<BuildDFG>();
     AU.addRequired<InPlaceDFGAnalysisWrapper>();
@@ -1275,7 +1227,7 @@ public:
 class CGT_WrapperAPI : public CodeGenTraversal {
 
 private:
-  //Member variables
+  // Member variables
   unsigned nodeID; // Used as a node identifier
 
   std::string QuantizationInputsFilenameStr;
@@ -1296,26 +1248,23 @@ private:
   // Virtual Functions
   void init();
   void initRuntimeAPI();
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
+  void codeGen(DFInternalNode *N);
+  void codeGen(DFLeafNode *N);
 
 public:
-
   // Constructor
   CGT_WrapperAPI(Module &_M, BuildDFG &_DFG,
-    InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP,
-    std::string &_QuantizationInputsFilenameStr,
-    std::string &_ConfigurationInputsFilenameStr)
-  : CodeGenTraversal(_M, _DFG), IPP(&_IPP),
-    QuantizationInputsFilenameStr(_QuantizationInputsFilenameStr),
-    ConfigurationInputsFilenameStr(_ConfigurationInputsFilenameStr) {
+                 InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP,
+                 std::string &_QuantizationInputsFilenameStr,
+                 std::string &_ConfigurationInputsFilenameStr)
+      : CodeGenTraversal(_M, _DFG), IPP(&_IPP),
+        QuantizationInputsFilenameStr(_QuantizationInputsFilenameStr),
+        ConfigurationInputsFilenameStr(_ConfigurationInputsFilenameStr) {
     nodeID = 0;
     initRuntimeAPI();
   }
-
 };
 
-
 void CGT_WrapperAPI::init() {
   // FIXME: what to do here? If anything?
 }
@@ -1325,15 +1274,8 @@ void CGT_WrapperAPI::initRuntimeAPI() {
 
   // Load Runtime API Module
   SMDiagnostic Err;
-
-  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
-  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n");
-
-  // FIXME: set correct path
-  Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/tools/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
-  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
-  if(runtimeModule == nullptr)
+  runtimeModule = parseIRFile(TENSOR_RT_LL, Err, M.getContext());
+  if (runtimeModule == nullptr)
     DEBUG(errs() << Err.getMessage());
   else
     DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
@@ -1352,69 +1294,71 @@ void CGT_WrapperAPI::initRuntimeAPI() {
   // Find hpvm.init and visc.cleanup calls, and add placeholder methods
   // for initialization and cleanup of the hpvm tensor runtime
 
-  Function* VI = M.getFunction("llvm.hpvm.init");
+  Function *VI = M.getFunction("llvm.hpvm.init");
   assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n");
   InitCall = cast<Instruction>(*VI->user_begin());
-  CallInst::Create(llvm_hpvm_initApproxhpvmRt,
-                   ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
-                   "", InitCall);
+  CallInst::Create(
+      llvm_hpvm_initApproxhpvmRt,
+      ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
+      "", InitCall);
 
   StringRef QRangesStrRef = StringRef(QuantizationInputsFilenameStr);
   // Create string for node name, as first argument for wrapper API call
-  Constant *ConstArray1 = ConstantDataArray::getString(M.getContext(),
-                                                       QRangesStrRef, true);
-  GlobalVariable *GV1 = new GlobalVariable(M,ConstArray1->getType(),
-                        true, GlobalValue::ExternalLinkage, ConstArray1, "");
+  Constant *ConstArray1 =
+      ConstantDataArray::getString(M.getContext(), QRangesStrRef, true);
+  GlobalVariable *GV1 =
+      new GlobalVariable(M, ConstArray1->getType(), true,
+                         GlobalValue::ExternalLinkage, ConstArray1, "");
   // Create GEP expression to access it
-  Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
-  Constant* GEPIndices[] = { Int_0, Int_0 };
-  Constant* QRangesGEPConst =
-    ConstantExpr::getGetElementPtr(GV1->getType()->getPointerElementType(),
-                                   GV1, GEPIndices);
+  Constant *Int_0 = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
+  Constant *GEPIndices[] = {Int_0, Int_0};
+  Constant *QRangesGEPConst = ConstantExpr::getGetElementPtr(
+      GV1->getType()->getPointerElementType(), GV1, GEPIndices);
 
   StringRef ConfsStrRef = StringRef(ConfigurationInputsFilenameStr);
   // Create string for node name, as first argument for wrapper API call
-  Constant *ConstArray2 = ConstantDataArray::getString(M.getContext(),
-                                                       ConfsStrRef, true);
-  GlobalVariable *GV2 = new GlobalVariable(M,ConstArray2->getType(),
-                        true, GlobalValue::ExternalLinkage, ConstArray2, "");
-  Constant* ConfsGEPConst =
-    ConstantExpr::getGetElementPtr(GV2->getType()->getPointerElementType(),
-                                   GV2, GEPIndices);
-  ArrayRef<Value*> RTCInitArgs = {ConfsGEPConst, QRangesGEPConst};
-  CallInst::Create(llvm_hpvm_initializeRuntimeController, RTCInitArgs, "", InitCall);
-
-  Function* VC = M.getFunction("llvm.hpvm.cleanup");
+  Constant *ConstArray2 =
+      ConstantDataArray::getString(M.getContext(), ConfsStrRef, true);
+  GlobalVariable *GV2 =
+      new GlobalVariable(M, ConstArray2->getType(), true,
+                         GlobalValue::ExternalLinkage, ConstArray2, "");
+  Constant *ConfsGEPConst = ConstantExpr::getGetElementPtr(
+      GV2->getType()->getPointerElementType(), GV2, GEPIndices);
+  ArrayRef<Value *> RTCInitArgs = {ConfsGEPConst, QRangesGEPConst};
+  CallInst::Create(llvm_hpvm_initializeRuntimeController, RTCInitArgs, "",
+                   InitCall);
+
+  Function *VC = M.getFunction("llvm.hpvm.cleanup");
   assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n");
   CleanupCall = cast<Instruction>(*VC->user_begin());
-  CallInst::Create(llvm_hpvm_cleanupApproxhpvmRt, ArrayRef<Value*>(), "", CleanupCall);
-  CallInst::Create(llvm_hpvm_clearRuntimeController, ArrayRef<Value*>(), "", CleanupCall);
-
+  CallInst::Create(llvm_hpvm_cleanupApproxhpvmRt, ArrayRef<Value *>(), "",
+                   CleanupCall);
+  CallInst::Create(llvm_hpvm_clearRuntimeController, ArrayRef<Value *>(), "",
+                   CleanupCall);
 }
 
-void CGT_WrapperAPI::codeGen(DFInternalNode* N) {
-  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs () << "Skipping internal node\n";
+void CGT_WrapperAPI::codeGen(DFInternalNode *N) {
+  errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n";
+  errs() << "Skipping internal node\n";
 }
 
-void CGT_WrapperAPI::codeGen(DFLeafNode* N) {
+void CGT_WrapperAPI::codeGen(DFLeafNode *N) {
 
   // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
+  if (N->isDummyNode()) {
     DEBUG(errs() << "Skipping dummy node\n");
     return;
   }
 
   // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
+  if (N->isAllocationNode()) {
     assert(false && "Allocation Node not expected in ApproxHPVM");
     return;
   }
 
-
   // Increment the node ID, for current node.
   ++nodeID;
-  errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n";
+  errs() << "Node ID string: " << StringRef(std::to_string(nodeID)) << "\n";
 
   // Get the function associated with the dataflow node
   Function *F = N->getFuncPointer();
@@ -1429,50 +1373,51 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) {
 
   // Clone the function
   ValueToValueMapTy VMap;
-  std::string FName(F->getName().data());//Twine FName = F->getName();
+  std::string FName(F->getName().data()); // Twine FName = F->getName();
 
-  
   F_wrapper_api = CloneFunction(F, VMap);
-  F_wrapper_api->setName(FName+"_wrapper_api");
+  F_wrapper_api->setName(FName + "_wrapper_api");
   F_wrapper_api->removeFromParent();
   M.getFunctionList().push_back(F_wrapper_api);
 
   N->addGenFunc(F_wrapper_api, hpvm::PROMISE_TARGET, true);
 
   /* Removing HPVM in/out/inout function attributes */
-  for(Function::arg_iterator ai = F_wrapper_api->arg_begin(), ae = F_wrapper_api->arg_end();
-      ai != ae; ai++){
+  for (Function::arg_iterator ai = F_wrapper_api->arg_begin(),
+                              ae = F_wrapper_api->arg_end();
+       ai != ae; ai++) {
     Argument *Arg = &*ai;
-    if(Arg->hasAttribute(Attribute::In))
+    if (Arg->hasAttribute(Attribute::In))
       Arg->removeAttr(Attribute::In);
-    if(Arg->hasAttribute(Attribute::Out))
+    if (Arg->hasAttribute(Attribute::Out))
       Arg->removeAttr(Attribute::Out);
-    if(Arg->hasAttribute(Attribute::InOut))
-      Arg->removeAttr(Attribute::InOut);    
+    if (Arg->hasAttribute(Attribute::InOut))
+      Arg->removeAttr(Attribute::InOut);
   }
 
   // Adding nounwind to generated function : FIXME: needed?
   DEBUG(errs() << "Adding nounwind to generated function\n");
-  F_wrapper_api->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);
+  F_wrapper_api->addAttribute(AttributeList::FunctionIndex,
+                              Attribute::NoUnwind);
 
-  // Add llvm_hpvm_requestTensor calls for every pointer argument of the function
-  // (they are all expected to be tensors), at the beginning of the function.
-  // This is the first instruction of the function, insert them before this
-  Instruction* FI = &*(F_wrapper_api->getEntryBlock().begin());
+  // Add llvm_hpvm_requestTensor calls for every pointer argument of the
+  // function (they are all expected to be tensors), at the beginning of the
+  // function. This is the first instruction of the function, insert them before
+  // this
+  Instruction *FI = &*(F_wrapper_api->getEntryBlock().begin());
 
   // FIXME: verify that we want 1 as a target device
   // In this backend, the target device is GPU, represented by i32 1.
   ConstantInt *TargetDeviceID =
-    ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+      ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
 
   for (Function::arg_iterator ai = F_wrapper_api->arg_begin(),
-       ae = F_wrapper_api->arg_end(); ai != ae; ++ai) {
-    Argument* Arg = &*ai;
+                              ae = F_wrapper_api->arg_end();
+       ai != ae; ++ai) {
+    Argument *Arg = &*ai;
     if (Arg->getType()->isPointerTy()) {
       Value *Args[] = {Arg, TargetDeviceID};
-      CallInst::Create(hpvm_request_tensor,
-                       ArrayRef<Value*>(Args, 2),
-                       "", FI);
+      CallInst::Create(hpvm_request_tensor, ArrayRef<Value *>(Args, 2), "", FI);
     }
   }
 
@@ -1485,8 +1430,8 @@ void CGT_WrapperAPI::codeGen(DFLeafNode* N) {
     CGM.transition(dyn_cast<IntrinsicInst>(I));
   }
   errs() << "CLONED FUNCTION: " << *F_wrapper_api << "\n";
- // errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n";
-  //CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP);
+  // errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n";
+  // CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP);
   CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP);
 
   return;
@@ -1501,30 +1446,26 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) {
 
   // Get the In Place Analysis Results
   InPlaceDFGAnalysis::InPlaceDFGParameter IPP =
-    (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
+      (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
+
+  std::vector<DFInternalNode *> Roots = DFG.getRoots();
 
-  
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
- 
   // Visitor for Code Generation Graph Traversal
-  CGT_WrapperAPI *CGTVisitor = new CGT_WrapperAPI(M, DFG, IPP,
-                                            QuantizationInputsFilename,
-                                            ConfigurationInputsFilename);
+  CGT_WrapperAPI *CGTVisitor = new CGT_WrapperAPI(
+      M, DFG, IPP, QuantizationInputsFilename, ConfigurationInputsFilename);
 
   // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
+  for (auto rootNode : Roots) {
     // Initiate code generation for root DFNode
     CGTVisitor->visit(rootNode);
   }
 
-  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
+  // TODO: Edit module epilogue to remove the VISC intrinsic declarations
   delete CGTVisitor;
 
-  
   return true;
 }
 
-
 /******************************************************************************
  *                              Helper functions                              *
  ******************************************************************************/
@@ -1532,13 +1473,14 @@ bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) {
 /* Method needs to be called as part of an analysis pre-step, before code      *
  * generation is run on a node function, so that the HPVM intrinsics are still *
  * in place. */
-bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N,
-                                       InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) {
+bool isValidOperandForInPlaceOperation(
+    Value *Op, Function *Fgen, DFNode *N,
+    InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) {
 
   if (Argument *Arg = dyn_cast<Argument>(Op)) {
     DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n");
     assert((Arg->getParent() == Fgen) &&
-          "Extra Parameter in body of Function\n");
+           "Extra Parameter in body of Function\n");
     // Candidate parameter is a function argument
     // In this case, consult the result of in place analysis
     // Find position in arg list
@@ -1552,11 +1494,10 @@ bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N,
       DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n");
       return false;
     }
-  }
-  else {
+  } else {
     // If it is not an argument, then it needs to be the result of
     // another intrinsic. These are new objects that are allocated,
-    // and consumed by next intrinsic. 
+    // and consumed by next intrinsic.
     DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n");
     if (dyn_cast<IntrinsicInst>(Op)) {
       DEBUG(errs() << *Arg << "\t: local, suitable for in place\n");
@@ -1576,5 +1517,3 @@ static RegisterPass<DFG2LLVM_WrapperAPI> X("dfg2llvm-wrapperapi",
                                            false /* does not modify the CFG */,
                                            true  /* transformation,   *
                                                  * not just analysis */);
-
-
diff --git a/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp b/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp
index 6944d0d0e2..45ad0ece23 100644
--- a/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp
+++ b/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp
@@ -46,10 +46,9 @@ namespace {
 struct DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls : public DFG2LLVM {
   static char ID; // Pass identification, replacement for typeid
   DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls() : DFG2LLVM(ID) {}
-private:
 
+private:
 public:
-
   void getAnalysisUsage(AnalysisUsage &AU) const {
     AU.addRequired<BuildDFG>();
     AU.addRequired<InPlaceDFGAnalysisWrapper>();
@@ -64,7 +63,7 @@ public:
 class CGT_ReplaceApproxHPVMIntrinsicsWithFCalls : public CodeGenTraversal {
 
 private:
-  //Member variables
+  // Member variables
   InPlaceDFGAnalysis::InPlaceDFGParameter *IPP;
 
   // VISC Runtime API and Tensor runtime API
@@ -74,7 +73,7 @@ private:
    relevant code also, but I leave in in for now until verified. */
   FunctionCallee llvm_hpvm_initTensorRt;
   FunctionCallee llvm_hpvm_cleanupTensorRt;
-//  Constant* hpvm_request_tensor; DONE: request tensor will not be used
+  //  Constant* hpvm_request_tensor; DONE: request tensor will not be used
 
   // Functions
   bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N);
@@ -82,27 +81,25 @@ private:
   // Virtual Functions
   void init();
   void initRuntimeAPI();
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
+  void codeGen(DFInternalNode *N);
+  void codeGen(DFLeafNode *N);
 
 public:
-
   // Constructor
-  CGT_ReplaceApproxHPVMIntrinsicsWithFCalls(Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP)
-  : CodeGenTraversal(_M, _DFG), IPP(&_IPP) {
+  CGT_ReplaceApproxHPVMIntrinsicsWithFCalls(
+      Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP)
+      : CodeGenTraversal(_M, _DFG), IPP(&_IPP) {
     initRuntimeAPI();
   }
-
 };
 
-bool CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::isValidOperandForInPlaceOperation(Value *Op,
-                                                  Function *Fgen,
-                                                  DFNode *N) {
+bool CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::
+    isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N) {
   // We only expect the if branch to be taken
   if (Argument *Arg = dyn_cast<Argument>(Op)) {
     DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n");
     assert((Arg->getParent() == Fgen) &&
-          "Extra Parameter in body of Function\n");
+           "Extra Parameter in body of Function\n");
     // Candidae parameter is a function argument
     // In this case, consult the result of in place analysis
     // Find position in arg list
@@ -116,8 +113,7 @@ bool CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::isValidOperandForInPlaceOperatio
       DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n");
       return false;
     }
-  }
-  else {
+  } else {
     // If it is not an argument, then it needs to be the result of
     // another intrinsic. These are new objects that are allocated,
     // and consumed by next intrinsic. Alternatively, the intrinsic
@@ -133,32 +129,22 @@ bool CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::isValidOperandForInPlaceOperatio
         return true;
       else
         return false;
-    }
-    else {
+    } else {
       DEBUG(errs() << *Arg << "\t: local, not suitable for in place\n");
       return false;
     }
   }
 }
 
-
-void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::init() {
-}
+void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::init() {}
 
 // Initialize the VISC runtime API. This makes it easier to insert these calls
 void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::initRuntimeAPI() {
 
   // Load Runtime API Module
   SMDiagnostic Err;
-
-  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
-  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n");
-
-  // FIXME: set correct path
-  Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_cpu_runtime.ll";
-  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
-  if(runtimeModule == nullptr)
+  runtimeModule = parseIRFile(TENSOR_RT_LL, Err, M.getContext());
+  if (runtimeModule == nullptr)
     DEBUG(errs() << Err.getMessage());
   else
     DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
@@ -169,125 +155,123 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::initRuntimeAPI() {
   // - request a tensor
   DECLARE(llvm_hpvm_initTensorRt);
   DECLARE(llvm_hpvm_cleanupTensorRt);
-//  DECLARE(hpvm_request_tensor);
+  //  DECLARE(hpvm_request_tensor);
 
   // Find hpvm.init and visc.cleanup calls, and add placeholder methods
   // for initialization and cleanup of the hpvm tensor runtime
 
-  Function* VI = M.getFunction("llvm.hpvm.init");
+  Function *VI = M.getFunction("llvm.hpvm.init");
   assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n");
   InitCall = cast<Instruction>(*VI->user_begin());
-  CallInst::Create(llvm_hpvm_initTensorRt,
-                   ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
-                   "", InitCall);
+  CallInst::Create(
+      llvm_hpvm_initTensorRt,
+      ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
+      "", InitCall);
 
-  Function* VC = M.getFunction("llvm.hpvm.cleanup");
+  Function *VC = M.getFunction("llvm.hpvm.cleanup");
   assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n");
   CleanupCall = cast<Instruction>(*VC->user_begin());
-  CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall);
-
+  CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value *>(), "",
+                   CleanupCall);
 }
 
-void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFInternalNode* N) {
-  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs () << "Skipping internal node\n";
+void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFInternalNode *N) {
+  errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n";
+  errs() << "Skipping internal node\n";
 }
 
-  
-void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) {
+void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode *N) {
 
   // Skip if it is a dummy node
-  if(N->isDummyNode()) {
+  if (N->isDummyNode()) {
     DEBUG(errs() << "Skipping dummy node\n");
     return;
   }
 
   // Abort if it is an allocation node
-  if(N->isAllocationNode()) {
+  if (N->isAllocationNode()) {
     assert(false && "Allocation Node not expected in ApproxHPVM");
     return;
   }
 
   // Search for intrinsic only if it has the right hint
   if (!checkPreferredTarget(N, hpvm::CPU_TARGET)) {
-    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
+    errs() << "Skipping node: " << N->getFuncPointer()->getName() << "\n";
     return;
   }
 
   // Get the function associated with the dataflow node
   Function *F = N->getFuncPointer();
-  errs()<<"function name = "<< F->getName()<<"\n";
+  errs() << "function name = " << F->getName() << "\n";
 
   std::vector<IntrinsicInst *> IItoRemove;
 
   for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
     Instruction *I = &(*i);
     if (BuildDFG::isHPVMIntrinsic(I)) {
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-      assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")
-        && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
+      IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+      assert(
+          (II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor") &&
+          "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
       /********************* Handle VISC Tensor intrinsics ********************/
-      // We replace them with calls to functions with implementations at the LLVM level
+      // We replace them with calls to functions with implementations at the
+      // LLVM level
       switch (II->getIntrinsicID()) {
 
-      case Intrinsic::hpvm_tensor_convolution:
-      { /* llvm.hpvm.tensor.convolution */
+      case Intrinsic::hpvm_tensor_convolution: { /* llvm.hpvm.tensor.convolution
+                                                  */
         DEBUG(errs() << F->getName() << "\t: Handling tensor convolution \n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
         Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
+        Args.push_back(II->getOperand(2));
         Args.push_back(II->getOperand(3));
         Args.push_back(II->getOperand(4));
         Args.push_back(II->getOperand(5));
 
-	Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
-	Constant* conv_precision = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
+        Constant *conv_mode =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+        Constant *conv_precision =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
 
         Args.push_back(conv_mode);
         Args.push_back(conv_precision);
-	
+
         // Create function call
         FunctionCallee tensorConvolutionCPU;
         DECLARE(tensorConvolutionCPU);
-	
-        CallInst* CI = CallInst::Create(tensorConvolutionCPU,
-                                        Args, "", II);
+
+        CallInst *CI = CallInst::Create(tensorConvolutionCPU, Args, "", II);
         // We can replace the call to hpvm.tensor.mul with the LLVM call
         II->replaceAllUsesWith(CI);
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
-      case Intrinsic::hpvm_tensor_mul:
-      { /* llvm.hpvm.tensor.mul */
+      case Intrinsic::hpvm_tensor_mul: { /* llvm.hpvm.tensor.mul */
         DEBUG(errs() << F->getName() << "\t: Handling tensor mul\n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
         Args.push_back(II->getOperand(1));
 
         // Create function call
         FunctionCallee tensorGemmCPU;
         DECLARE(tensorGemmCPU);
-	
-        CallInst* CI = CallInst::Create(tensorGemmCPU,
-                                        Args, "", II);
+
+        CallInst *CI = CallInst::Create(tensorGemmCPU, Args, "", II);
         // We can replace the call to hpvm.tensor.mul with the LLVM call
         II->replaceAllUsesWith(CI);
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
-      case Intrinsic::hpvm_tensor_add:
-      { /* llvm.hpvm.tensor.add */
+      case Intrinsic::hpvm_tensor_add: { /* llvm.hpvm.tensor.add */
         DEBUG(errs() << F->getName() << "\t: Handling tensor add\n");
         // Tensor add(a,b) is in place for argument a.
         Value *Op = II->getOperand(0);
@@ -297,12 +281,13 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) {
         // Code generation cannot continue if this is false, because the target
         // only provides an in place operation
 
-	// FIXME: remove this comment - must check for in-place
-        //assert(inplace &&
-        //       "Operand not valid for in place operation. Code gen aborted.\n");
+        // FIXME: remove this comment - must check for in-place
+        // assert(inplace &&
+        //       "Operand not valid for in place operation. Code gen
+        //       aborted.\n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
         Args.push_back(II->getOperand(1));
 
@@ -316,12 +301,10 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) {
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
       case Intrinsic::hpvm_tensor_pool_max:
-      case Intrinsic::hpvm_tensor_pool_mean:
-      { /* llvm.hpvm.tensor.relu */
+      case Intrinsic::hpvm_tensor_pool_mean: { /* llvm.hpvm.tensor.relu */
         DEBUG(errs() << F->getName() << "\t: Handling tensor_pool_max\n");
         // Tensor relu(a) is in place for argument a.
         Value *Op = II->getOperand(0);
@@ -333,45 +316,48 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) {
         assert(inplace &&
                "Operand not valid for in place operation. Code gen aborted.\n");
 
-        // Argument list - tensorPooling(input, poolFunction, window_height, window_width, vertical_pad, horizontal_pad,
-	//                               vertical_stride, horizontal_stride);
-        std::vector<Value*> Args;
+        // Argument list - tensorPooling(input, poolFunction, window_height,
+        // window_width, vertical_pad, horizontal_pad,
+        //                               vertical_stride, horizontal_stride);
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
 
-	int pool_type = 0;
-	if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max){
+        int pool_type = 0;
+        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) {
           pool_type = 0;
-	}
-        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean){
+        }
+        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) {
           pool_type = 1;
-	}	
-	
-	Constant* constPoolType = ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type);
-        Args.push_back(constPoolType); // ID for max pool. Min/Avg have different IDs (non-zero)	
-	Args.push_back(II->getOperand(1));
+        }
+
+        Constant *constPoolType =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type);
+        Args.push_back(constPoolType); // ID for max pool. Min/Avg have
+                                       // different IDs (non-zero)
+        Args.push_back(II->getOperand(1));
         Args.push_back(II->getOperand(2));
-	Args.push_back(II->getOperand(3));
+        Args.push_back(II->getOperand(3));
         Args.push_back(II->getOperand(4));
-	Args.push_back(II->getOperand(5));
-	Args.push_back(II->getOperand(6));
+        Args.push_back(II->getOperand(5));
+        Args.push_back(II->getOperand(6));
 
         // Create function call
         FunctionCallee tensorPoolingCPU;
         DECLARE(tensorPoolingCPU);
-        CallInst* CI = CallInst::Create(tensorPoolingCPU, Args, "", II);
+        CallInst *CI = CallInst::Create(tensorPoolingCPU, Args, "", II);
 
-	// Replacing intrinsic result uses with the result of the LLVM call
+        // Replacing intrinsic result uses with the result of the LLVM call
         II->replaceAllUsesWith(CI);
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }break;
+      } break;
 
       case Intrinsic::hpvm_tensor_relu:
       case Intrinsic::hpvm_tensor_clipped_relu:
-      case Intrinsic::hpvm_tensor_tanh:
-      { /* llvm.hpvm.tensor.relu */
-        DEBUG(errs() << F->getName() << "\t: Handling tensor activation functions \n");
+      case Intrinsic::hpvm_tensor_tanh: { /* llvm.hpvm.tensor.relu */
+        DEBUG(errs() << F->getName()
+                     << "\t: Handling tensor activation functions \n");
         // Tensor relu(a) is in place for argument a.
         Value *Op = II->getOperand(0);
 
@@ -383,42 +369,39 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) {
                "Operand not valid for in place operation. Code gen aborted.\n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
 
-	if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu){
+        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) {
           // Create function call
           FunctionCallee tensorReluCPU;
           DECLARE(tensorReluCPU);
           CallInst::Create(tensorReluCPU, Args, "", II);
-	}
-	else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu){
+        } else if (II->getIntrinsicID() ==
+                   Intrinsic::hpvm_tensor_clipped_relu) {
           // Create function call
           //-- FunctionCallee tensorClippedRelu;
-	  FunctionCallee tensorRelu2CPU;
+          FunctionCallee tensorRelu2CPU;
           DECLARE(tensorRelu2CPU);
           CallInst::Create(tensorRelu2CPU, Args, "", II);
-	}
-	else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh){
+        } else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) {
           // Create function call
           FunctionCallee tensorTanhCPU;
-	  errs()<<"tensorTanh Call = \n\n";
+          errs() << "tensorTanh Call = \n\n";
           DECLARE(tensorTanhCPU);
-	  //errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l";
+          // errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l";
           CallInst::Create(tensorTanhCPU, Args, "", II);
-	}
-     
+        }
+
         // We can replace the call to hpvm.tensor.relu with the 1st argument
         // that, due to in place operation, now contains the result
         II->replaceAllUsesWith(II->getOperand(0));
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
-      case Intrinsic::hpvm_tensor_softmax:
-      { /* llvm.hpvm.tensor.softmax */
+      case Intrinsic::hpvm_tensor_softmax: { /* llvm.hpvm.tensor.softmax */
         DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n");
         // Tensor relu(a) is in place for argument a.
         Value *Op = II->getOperand(0);
@@ -431,7 +414,7 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) {
                "Operand not valid for in place operation. Code gen aborted.\n");
 
         // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
         Args.push_back(II->getOperand(0));
 
         // Create function call
@@ -444,24 +427,21 @@ void CGT_ReplaceApproxHPVMIntrinsicsWithFCalls::codeGen(DFLeafNode* N) {
 
         // Mark to remove at the end
         IItoRemove.push_back(II);
-      }
-      break;
+      } break;
 
       default:
         llvm_unreachable("Unknown VISC Intrinsic!");
         break;
-
       }
-
     }
-
   }
 
   // We need to do this explicitly: DCE pass may not remove them.
   // Traverse the vector backwards, otherwise definitions are deleted while
   // their subsequent uses are still around.
   for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(),
-       re = IItoRemove.rend(); ri != re; ++ri) {
+                                                      re = IItoRemove.rend();
+       ri != re; ++ri) {
     DEBUG(errs() << "Erasing: " << **ri << "\n");
     errs() << "Erasing: " << **ri << "\n";
     (*ri)->eraseFromParent();
@@ -479,34 +459,32 @@ bool DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls::runOnModule(Module &M) {
 
   // Get the In Place Analysis Results
   InPlaceDFGAnalysis::InPlaceDFGParameter IPP =
-    (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
+      (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
   // Print results
   printInPlaceDFGParameter(IPP);
 
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
- 
+  std::vector<DFInternalNode *> Roots = DFG.getRoots();
+
   // Visitor for Code Generation Graph Traversal
   CGT_ReplaceApproxHPVMIntrinsicsWithFCalls *CGTVisitor =
-    new CGT_ReplaceApproxHPVMIntrinsicsWithFCalls(M, DFG, IPP);
+      new CGT_ReplaceApproxHPVMIntrinsicsWithFCalls(M, DFG, IPP);
 
   // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
+  for (auto rootNode : Roots) {
     // Initiate code generation for root DFNode
     CGTVisitor->visit(rootNode);
   }
 
-  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
+  // TODO: Edit module epilogue to remove the VISC intrinsic declarations
   delete CGTVisitor;
 
   return true;
 }
 
-
 /******************************************************************************
  *                              Helper functions                              *
  ******************************************************************************/
 
-
 } // End of namespace
 
 char DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls::ID = 0;
@@ -515,5 +493,3 @@ static RegisterPass<DFG2LLVM_ReplaceApproxHPVMIntrinsicsWithFCalls> X("replace-i
                                       false /* does not modify the CFG */,
                                       true /* transformation,   *
                                             * not just analysis */);
-
-
-- 
GitLab