Use config file to get path to tensor_runtime.ll (started in 750ab062)

41411aad · Yifan Zhao · 750ab062 · 41411aad · 41411aad · 41411aad
Commit 41411aad authored 4 years ago by Yifan Zhao
--- a/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
@@ -27,6 +27,7 @@

 #include "SupportHPVM/DFG2LLVM.h"
 #include "InPlaceDFG/InPlaceDFGAnalysis.h"
+#include "Config.h"

 #include <sstream>

@@ -44,10 +45,9 @@ namespace {
 struct DFG2LLVM_CUDNN : public DFG2LLVM {
  static char ID; // Pass identification, replacement for typeid
  DFG2LLVM_CUDNN() : DFG2LLVM(ID) {}
-private:

+private:
 public:
-
  void getAnalysisUsage(AnalysisUsage &AU) const {
    AU.addRequired<BuildDFG>();
    AU.addRequired<InPlaceDFGAnalysisWrapper>();
@@ -62,7 +62,7 @@ public:
 class CGT_CUDNN : public CodeGenTraversal {

 private:
-  //Member variables
+  // Member variables
  InPlaceDFGAnalysis::InPlaceDFGParameter *IPP;

  // VISC Runtime API and Tensor runtime API
@@ -73,32 +73,28 @@ private:
  // Functions
  bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N);

-
-
  // Virtual Functions
  void init();
  void initRuntimeAPI();
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
+  void codeGen(DFInternalNode *N);
+  void codeGen(DFLeafNode *N);

 public:
-
  // Constructor
-  CGT_CUDNN(Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP)
-  : CodeGenTraversal(_M, _DFG), IPP(&_IPP) {
+  CGT_CUDNN(Module &_M, BuildDFG &_DFG,
+            InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP)
+      : CodeGenTraversal(_M, _DFG), IPP(&_IPP) {
    initRuntimeAPI();
  }
-
 };

-bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op,
-                                                  Function *Fgen,
+bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op, Function *Fgen,
                                                  DFNode *N) {

  if (Argument *Arg = dyn_cast<Argument>(Op)) {
    DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n");
    assert((Arg->getParent() == Fgen) &&
-          "Extra Parameter in body of Function\n");
+           "Extra Parameter in body of Function\n");
    // Candidae parameter is a function argument
    // In this case, consult the result of in place analysis
    // Find position in arg list
@@ -112,11 +108,10 @@ bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op,
      DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n");
      return false;
    }
-  }
-  else {
+  } else {
    // If it is not an argument, then it needs to be the result of
    // another intrinsic. These are new objects that are allocated,
-    // and consumed by next intrinsic. 
+    // and consumed by next intrinsic.
    DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n");
    if (dyn_cast<IntrinsicInst>(Op)) {
      DEBUG(errs() << *Arg << "\t: local, suitable for in place\n");
@@ -128,24 +123,15 @@ bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op,
  }
 }

-
-void CGT_CUDNN::init() {
-}
+void CGT_CUDNN::init() {}

 // Initialize the VISC runtime API. This makes it easier to insert these calls
 void CGT_CUDNN::initRuntimeAPI() {

  // Load Runtime API Module
  SMDiagnostic Err;
-
-  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
-  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n");
-
-  // FIXME: set correct path
-  Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/tools/hpvm/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
-  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
-  if(runtimeModule == nullptr)
+  runtimeModule = parseIRFile(TENSOR_RT_LL, Err, M.getContext());
+  if (runtimeModule == nullptr)
    DEBUG(errs() << Err.getMessage());
  else
    DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
@@ -161,59 +147,60 @@ void CGT_CUDNN::initRuntimeAPI() {
  // Find hpvm.init and visc.cleanup calls, and add placeholder methods
  // for initialization and cleanup of the hpvm tensor runtime

-  Function* VI = M.getFunction("llvm.hpvm.init");
+  Function *VI = M.getFunction("llvm.hpvm.init");
  assert(VI->getNumUses() == 1 && "__hpvm__init should only be used once\n");
  InitCall = cast<Instruction>(*VI->user_begin());
-  CallInst::Create(llvm_hpvm_initTensorRt,
-                   ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
-                   "", InitCall);
+  CallInst::Create(
+      llvm_hpvm_initTensorRt,
+      ArrayRef<Value *>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
+      "", InitCall);

-  Function* VC = M.getFunction("llvm.hpvm.cleanup");
+  Function *VC = M.getFunction("llvm.hpvm.cleanup");
  assert(VC->getNumUses() == 1 && "__hpvm__clear should only be used once\n");
  CleanupCall = cast<Instruction>(*VC->user_begin());
-  CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall);
-
+  CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value *>(), "",
+                   CleanupCall);
 }

-void CGT_CUDNN::codeGen(DFInternalNode* N) {
-  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs () << "Skipping internal node\n";
+void CGT_CUDNN::codeGen(DFInternalNode *N) {
+  errs() << "Inside node: " << N->getFuncPointer()->getName() << "\n";
+  errs() << "Skipping internal node\n";
 }

-  
-void CGT_CUDNN::codeGen(DFLeafNode* N) {
+void CGT_CUDNN::codeGen(DFLeafNode *N) {

  // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
+  if (N->isDummyNode()) {
    DEBUG(errs() << "Skipping dummy node\n");
    return;
  }

  // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
+  if (N->isAllocationNode()) {
    assert(false && "Allocation Node not expected in ApproxHPVM");
    return;
  }

  // Generate code only if it has the right hint
  if (!checkPreferredTarget(N, hpvm::CUDNN_TARGET)) {
-    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
+    errs() << "Skipping node: " << N->getFuncPointer()->getName() << "\n";
    return;
  }

  // Get the function associated with the dataflow node
  Function *F = N->getFuncPointer();
-  errs()<<"function name = "<< F->getName()<<"\n";
+  errs() << "function name = " << F->getName() << "\n";

  /* Removing HPVM in/out/inout function attributes */
-  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ai++){
+  for (Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae;
+       ai++) {
    Argument *Arg = &*ai;
-    if(Arg->hasAttribute(Attribute::In))
+    if (Arg->hasAttribute(Attribute::In))
      Arg->removeAttr(Attribute::In);
-    if(Arg->hasAttribute(Attribute::Out))
+    if (Arg->hasAttribute(Attribute::Out))
      Arg->removeAttr(Attribute::Out);
-    if(Arg->hasAttribute(Attribute::InOut))
-      Arg->removeAttr(Attribute::InOut);    
+    if (Arg->hasAttribute(Attribute::InOut))
+      Arg->removeAttr(Attribute::InOut);
  }

  // Look up if we have visited this function before. If we have, then just
@@ -223,14 +210,14 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {

  assert((F_cudnn == NULL) &&
         "Error: Visiting a node for which code already generated");
-  
+
  // Clone the function
  ValueToValueMapTy VMap;
  std::string FName(F->getName().data());
  F_cudnn = CloneFunction(F, VMap);
  F_cudnn->setName(FName + "_cudnn");
-  errs()<<"Cloned function name2 = "<<F_cudnn->getName()<<"\n";
-  F_cudnn->removeFromParent();  
+  errs() << "Cloned function name2 = " << F_cudnn->getName() << "\n";
+  F_cudnn->removeFromParent();
  M.getFunctionList().push_back(F_cudnn);

  N->addGenFunc(F_cudnn, hpvm::CUDNN_TARGET, true);
@@ -239,165 +226,161 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
  DEBUG(errs() << "Adding nounwind to generated function\n");
  F_cudnn->addAttribute(AttributeList::FunctionIndex, Attribute::NoUnwind);

-  // Add llvm_hpvm_requestTensor calls for every pointer argument of the function
-  // (they are all expected to be tensors), at the beginning of the function.
-  // This is the first instruction of the function, insert them before this
-  Instruction* FI = &*(F_cudnn->getEntryBlock().begin());
+  // Add llvm_hpvm_requestTensor calls for every pointer argument of the
+  // function (they are all expected to be tensors), at the beginning of the
+  // function. This is the first instruction of the function, insert them before
+  // this
+  Instruction *FI = &*(F_cudnn->getEntryBlock().begin());

  // In this backend, the target device is GPU, represented by i32 1.
  ConstantInt *TargetDeviceID =
-    ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+      ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);

  for (Function::arg_iterator ai = F_cudnn->arg_begin(),
-       ae = F_cudnn->arg_end(); ai != ae; ++ai) {
-    Argument* Arg = &*ai;
+                              ae = F_cudnn->arg_end();
+       ai != ae; ++ai) {
+    Argument *Arg = &*ai;
    if (Arg->getType()->isPointerTy()) {
      Value *Args[] = {Arg, TargetDeviceID};
-      CallInst::Create(hpvm_request_tensor,
-                       ArrayRef<Value*>(Args, 2),
-                       "", FI);
+      CallInst::Create(hpvm_request_tensor, ArrayRef<Value *>(Args, 2), "", FI);
    }
  }

  std::vector<IntrinsicInst *> IItoRemove;

-  for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e; ++i) {
+  for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e;
+       ++i) {
    Instruction *I = &(*i);

    if (BuildDFG::isHPVMIntrinsic(I)) {
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-      //assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")
+      IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
+      // assert((II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")
      //  && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");

-      //if (!(II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")){
-      //continue; // skip non-tensor ops 
+      // if
+      // (!(II->getCalledFunction()->getName()).startswith("llvm.hpvm.tensor")){
+      // continue; // skip non-tensor ops
      //}
-      
+
      /********************* Handle VISC Tensor intrinsics ********************/
      switch (II->getIntrinsicID()) {

-      case Intrinsic::hpvm_tensor_convolution:
-      { /* llvm.hpvm.tensor.mul */
+      case Intrinsic::hpvm_tensor_convolution: { /* llvm.hpvm.tensor.mul */
        // Tensor mul is not in place.
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling tensor convolution \n");

        // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
        Args.push_back(II->getOperand(0));
        Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
+        Args.push_back(II->getOperand(2));
        Args.push_back(II->getOperand(3));
        Args.push_back(II->getOperand(4));
        Args.push_back(II->getOperand(5));

-	Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
-	Constant* conv_precision = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
+        Constant *conv_mode =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+        Constant *conv_precision =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);

        Args.push_back(conv_mode);
        Args.push_back(conv_precision);
-	
+
        // Create cudnn runtime function call
        FunctionCallee tensorConvolution;
        DECLARE(tensorConvolution);
-	
-        CallInst* CI = CallInst::Create(tensorConvolution,
-                                        Args, "", II);
+
+        CallInst *CI = CallInst::Create(tensorConvolution, Args, "", II);
        // We can replace the call to hpvm.tensor.mul with the runtime call
        II->replaceAllUsesWith(CI);

        // Mark to remove at the end
        IItoRemove.push_back(II);
-      }
-      break;
+      } break;

-      case Intrinsic::hpvm_tensor_group_convolution:
-      { /* llvm.hpvm.tensor.mul */
+      case Intrinsic::hpvm_tensor_group_convolution: { /* llvm.hpvm.tensor.mul
+                                                        */
        // Tensor mul is not in place.
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling tensor convolution \n");

        // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
        Args.push_back(II->getOperand(0));
        Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
+        Args.push_back(II->getOperand(2));
        Args.push_back(II->getOperand(3));
        Args.push_back(II->getOperand(4));
        Args.push_back(II->getOperand(5));

-	Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
+        Constant *conv_mode =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);

        Args.push_back(conv_mode);
        Args.push_back(II->getOperand(7));
-	
+
        // Create cudnn runtime function call
        FunctionCallee tensorConvolution;
        DECLARE(tensorConvolution);
-	
-        CallInst* CI = CallInst::Create(tensorConvolution,
-                                        Args, "", II);
+
+        CallInst *CI = CallInst::Create(tensorConvolution, Args, "", II);
        // We can replace the call to hpvm.tensor.mul with the runtime call
        II->replaceAllUsesWith(CI);

        // Mark to remove at the end
        IItoRemove.push_back(II);
-      }
-      break;
+      } break;

-      case Intrinsic::hpvm_tensor_batchnorm:
-      { /* llvm.hpvm.tensor.batchnorm */
+      case Intrinsic::hpvm_tensor_batchnorm: { /* llvm.hpvm.tensor.batchnorm */
        // Tensor batchnorm is in place.
-	// FIXME: Add Check for InPlace Analysis 
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor batch normalization \n");
+        // FIXME: Add Check for InPlace Analysis
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling tensor batch normalization \n");

        // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
        Args.push_back(II->getOperand(0));
        Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
+        Args.push_back(II->getOperand(2));
        Args.push_back(II->getOperand(3));
        Args.push_back(II->getOperand(4));
        Args.push_back(II->getOperand(5));
-	
+
        // Create cudnn runtime function call
        FunctionCallee tensorBatchNorm;
        DECLARE(tensorBatchNorm);
-	
-        CallInst* CI = CallInst::Create(tensorBatchNorm,
-                                        Args, "", II);
-        // We can replace the call to hpvm.tensor.batchnorm with the TensorRT call
+
+        CallInst *CI = CallInst::Create(tensorBatchNorm, Args, "", II);
+        // We can replace the call to hpvm.tensor.batchnorm with the TensorRT
+        // call
        II->replaceAllUsesWith(CI);

        // Mark to remove at the end
        IItoRemove.push_back(II);
-      }
-      break;
+      } break;

-      
-      case Intrinsic::hpvm_tensor_mul:
-      { /* llvm.hpvm.tensor.mul */
+      case Intrinsic::hpvm_tensor_mul: { /* llvm.hpvm.tensor.mul */
        // Tensor mul is not in place.
        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor mul\n");

        // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
        Args.push_back(II->getOperand(0));
        Args.push_back(II->getOperand(1));

        // Create cudnn runtime function call
        FunctionCallee tensorGemmGPU;
        DECLARE(tensorGemmGPU);
-	
-        CallInst* CI = CallInst::Create(tensorGemmGPU,
-                                        Args, "", II);
+
+        CallInst *CI = CallInst::Create(tensorGemmGPU, Args, "", II);
        // We can replace the call to hpvm.tensor.mul with the runtime call
        II->replaceAllUsesWith(CI);

        // Mark to remove at the end
        IItoRemove.push_back(II);
-      }
-      break;
-      case Intrinsic::hpvm_tensor_add:
-      { /* llvm.hpvm.tensor.add */
+      } break;
+      case Intrinsic::hpvm_tensor_add: { /* llvm.hpvm.tensor.add */
        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor add\n");
        // Tensor add(a,b) is in place for argument a.
        Value *Op = II->getOperand(0);
@@ -407,12 +390,13 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
        // Code generation cannot continue if this is false, because the target
        // only provides an in place operation

-	// FIXME: remove this comment - must check for in-place
-        //assert(inplace &&
-        //       "Operand not valid for in place operation. Code gen aborted.\n");
+        // FIXME: remove this comment - must check for in-place
+        // assert(inplace &&
+        //       "Operand not valid for in place operation. Code gen
+        //       aborted.\n");

        // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
        Args.push_back(II->getOperand(0));
        Args.push_back(II->getOperand(1));

@@ -426,54 +410,55 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {

        // Mark to remove at the end
        IItoRemove.push_back(II);
-      }
-      break;
+      } break;
      case Intrinsic::hpvm_tensor_pool_max:
-      case Intrinsic::hpvm_tensor_pool_mean:
-      { /* llvm.hpvm.tensor.relu */
+      case Intrinsic::hpvm_tensor_pool_mean: { /* llvm.hpvm.tensor.relu */
        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor_pool_max\n");

        // Argument list - tensorPooling(input, poolFunction, window_height,
-	//                               window_width, vertical_pad, horizontal_pad,
-	//                               vertical_stride, horizontal_stride);
-        std::vector<Value*> Args;
+        //                               window_width, vertical_pad,
+        //                               horizontal_pad, vertical_stride,
+        //                               horizontal_stride);
+        std::vector<Value *> Args;
        Args.push_back(II->getOperand(0));

-	int pool_type = 0;
-	if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max){
+        int pool_type = 0;
+        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_max) {
          pool_type = 0;
-	}
-        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean){
+        }
+        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_pool_mean) {
          pool_type = 1;
-	}	
-	
-	Constant* constPoolType = ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type);
-        Args.push_back(constPoolType); // ID for max pool. Min/Avg have different IDs (non-zero)	
-	Args.push_back(II->getOperand(1));
+        }
+
+        Constant *constPoolType =
+            ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type);
+        Args.push_back(constPoolType); // ID for max pool. Min/Avg have
+                                       // different IDs (non-zero)
+        Args.push_back(II->getOperand(1));
        Args.push_back(II->getOperand(2));
-	Args.push_back(II->getOperand(3));
+        Args.push_back(II->getOperand(3));
        Args.push_back(II->getOperand(4));
-	Args.push_back(II->getOperand(5));
-	Args.push_back(II->getOperand(6));
+        Args.push_back(II->getOperand(5));
+        Args.push_back(II->getOperand(6));

        // Create cudnn runtime function call
        FunctionCallee tensorPooling;
        DECLARE(tensorPooling);
-        CallInst* CI = CallInst::Create(tensorPooling, Args, "", II);
+        CallInst *CI = CallInst::Create(tensorPooling, Args, "", II);

-	// Replacing intrinsic result uses with the result of the tensor runtime operation
+        // Replacing intrinsic result uses with the result of the tensor runtime
+        // operation
        II->replaceAllUsesWith(CI);

        // Mark to remove at the end
        IItoRemove.push_back(II);
-      }
-      break;
-      
+      } break;
+
      case Intrinsic::hpvm_tensor_relu:
      case Intrinsic::hpvm_tensor_clipped_relu:
-      case Intrinsic::hpvm_tensor_tanh:
-      { /* llvm.hpvm.tensor.relu */
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor activation functions \n");
+      case Intrinsic::hpvm_tensor_tanh: { /* llvm.hpvm.tensor.relu */
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling tensor activation functions \n");
        // Tensor relu(a) is in place for argument a.
        Value *Op = II->getOperand(0);

@@ -485,41 +470,38 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
               "Operand not valid for in place operation. Code gen aborted.\n");

        // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
        Args.push_back(II->getOperand(0));

-	if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu){
+        if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_relu) {
          // Create cudnn runtime function call
          FunctionCallee tensorRelu;
          DECLARE(tensorRelu);
          CallInst::Create(tensorRelu, Args, "", II);
-	}
-	else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_clipped_relu){
+        } else if (II->getIntrinsicID() ==
+                   Intrinsic::hpvm_tensor_clipped_relu) {
          // Create cudnn runtime function call
          //-- FunctionCallee tensorClippedRelu;
-	  FunctionCallee tensorRelu2;
+          FunctionCallee tensorRelu2;
          DECLARE(tensorRelu2);
          CallInst::Create(tensorRelu2, Args, "", II);
-	}
-	else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh){
+        } else if (II->getIntrinsicID() == Intrinsic::hpvm_tensor_tanh) {
          // Create cudnn runtime function call
          FunctionCallee tensorTanh;
-	  errs()<<"tensorTanh Call = \n\n";
+          errs() << "tensorTanh Call = \n\n";
          DECLARE(tensorTanh);
-	  //errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l";
+          // errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l";
          CallInst::Create(tensorTanh, Args, "", II);
-	}
-     
+        }
+
        // We can replace the call to hpvm.tensor.relu with the 1st argument
        // that, due to in place operation, now contains the result
        II->replaceAllUsesWith(II->getOperand(0));

        // Mark to remove at the end
        IItoRemove.push_back(II);
-      }
-      break;
-      case Intrinsic::hpvm_tensor_softmax:
-      { /* llvm.hpvm.tensor.softmax */
+      } break;
+      case Intrinsic::hpvm_tensor_softmax: { /* llvm.hpvm.tensor.softmax */
        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor softmax\n");
        // Tensor relu(a) is in place for argument a.
        Value *Op = II->getOperand(0);
@@ -532,7 +514,7 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
               "Operand not valid for in place operation. Code gen aborted.\n");

        // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
        Args.push_back(II->getOperand(0));

        // Create cudnn runtime function call
@@ -545,17 +527,16 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {

        // Mark to remove at the end
        IItoRemove.push_back(II);
-      }
-      break;
+      } break;

-      case Intrinsic::hpvm_node_id:
-      { /* llvm.hpvm.node.id */
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling Node ID Intrinsic \n");
+      case Intrinsic::hpvm_node_id: { /* llvm.hpvm.node.id */
+        DEBUG(errs() << F_cudnn->getName()
+                     << "\t: Handling Node ID Intrinsic \n");
        // Get uint32 argument
        Value *Op = II->getOperand(0);

        // Argument list for the runtime call
-        std::vector<Value*> Args;
+        std::vector<Value *> Args;
        Args.push_back(II->getOperand(0));

        // Create hpvm-tensor-rt function call
@@ -565,10 +546,8 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {

        // Mark to remove at the end
        IItoRemove.push_back(II);
-      }
-      break;
+      } break;

-      
      default:
        llvm_unreachable("Unknown VISC Intrinsic!");
        break;
@@ -582,7 +561,8 @@ void CGT_CUDNN::codeGen(DFLeafNode* N) {
  // Traverse the vector backwards, otherwise definitions are deleted while
  // their subsequent uses are still around.
  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(),
-       re = IItoRemove.rend(); ri != re; ++ri) {
+                                                      re = IItoRemove.rend();
+       ri != re; ++ri) {
    DEBUG(errs() << "Erasing: " << **ri << "\n");
    errs() << "Erasing: " << **ri << "\n";
    (*ri)->eraseFromParent();
@@ -600,33 +580,31 @@ bool DFG2LLVM_CUDNN::runOnModule(Module &M) {

  // Get the In Place Analysis Results
  InPlaceDFGAnalysis::InPlaceDFGParameter IPP =
-    (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
+      (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
  // Print results
  printInPlaceDFGParameter(IPP);

-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
- 
+  std::vector<DFInternalNode *> Roots = DFG.getRoots();
+
  // Visitor for Code Generation Graph Traversal
  CGT_CUDNN *CGTVisitor = new CGT_CUDNN(M, DFG, IPP);

  // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
+  for (auto rootNode : Roots) {
    // Initiate code generation for root DFNode
    CGTVisitor->visit(rootNode);
  }

-  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
+  // TODO: Edit module epilogue to remove the VISC intrinsic declarations
  delete CGTVisitor;

  return true;
 }

-
 /******************************************************************************
 *                              Helper functions                              *
 ******************************************************************************/

-
 } // End of namespace

 char DFG2LLVM_CUDNN::ID = 0;
@@ -635,5 +613,3 @@ static RegisterPass<DFG2LLVM_CUDNN> X("dfg2llvm-cudnn",
                                      false /* does not modify the CFG */,
                                      true /* transformation,   *
                                            * not just analysis */);
-
-
--- a/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
+++ b/hpvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
--- a/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp
+++ b/hpvm/lib/Transforms/ReplaceIntrinsics/ReplaceIntrinsics.cpp