From b9838d8c90b99c7ab7d8b10dea8bc174a6e14f35 Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu>
Date: Fri, 6 Dec 2019 00:17:03 -0600
Subject: [PATCH] Removing ApproxHPVM passes from ./lib/Transforms

---
 .../ApproxScheduler/ApproxScheduler.cpp       |  275 ---
 .../Transforms/ApproxScheduler/CMakeLists.txt |   12 -
 .../Transforms/ApproxScheduler/LLVMBuild.txt  |   21 -
 .../Transforms/DFG2LLVM_CUDNN/CMakeLists.txt  |   12 -
 .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp         |  609 -------
 .../DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports     |    0
 .../Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt   |   21 -
 .../DFG2LLVM_PROMISE/CMakeLists.txt           |   12 -
 .../DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp     | 1283 --------------
 .../DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports |    0
 .../Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt |   21 -
 .../DFG2LLVM_WrapperAPI/CMakeLists.txt        |   12 -
 .../DFG2LLVM_WrapperAPI.cpp                   | 1530 -----------------
 .../DFG2LLVM_WrapperAPI.exports               |    0
 .../DFG2LLVM_WrapperAPI/LLVMBuild.txt         |   21 -
 .../FuseHPVMTensorNodes/CMakeLists.txt        |   12 -
 .../FuseHPVMTensorNodes.cpp                   |  971 -----------
 .../FuseHPVMTensorNodes.exports               |    0
 .../FuseHPVMTensorNodes/LLVMBuild.txt         |   21 -
 .../InsertApproxInfo/CMakeLists.txt           |   12 -
 .../InsertApproxInfo/InsertApproxInfo.cpp     |  498 ------
 .../Transforms/InsertApproxInfo/LLVMBuild.txt |   21 -
 22 files changed, 5364 deletions(-)
 delete mode 100644 llvm/lib/Transforms/ApproxScheduler/ApproxScheduler.cpp
 delete mode 100644 llvm/lib/Transforms/ApproxScheduler/CMakeLists.txt
 delete mode 100644 llvm/lib/Transforms/ApproxScheduler/LLVMBuild.txt
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_PROMISE/CMakeLists.txt
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports
 delete mode 100644 llvm/lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt
 delete mode 100644 llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt
 delete mode 100644 llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
 delete mode 100644 llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.exports
 delete mode 100644 llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt
 delete mode 100644 llvm/lib/Transforms/InsertApproxInfo/CMakeLists.txt
 delete mode 100644 llvm/lib/Transforms/InsertApproxInfo/InsertApproxInfo.cpp
 delete mode 100644 llvm/lib/Transforms/InsertApproxInfo/LLVMBuild.txt

diff --git a/llvm/lib/Transforms/ApproxScheduler/ApproxScheduler.cpp b/llvm/lib/Transforms/ApproxScheduler/ApproxScheduler.cpp
deleted file mode 100644
index 7537b517bc..0000000000
--- a/llvm/lib/Transforms/ApproxScheduler/ApproxScheduler.cpp
+++ /dev/null
@@ -1,275 +0,0 @@
-//===------------------------ InPlaceDFGAnalysis.cpp ----------------------===//
-//
-//
-//
-//                     The LLVM Compiler Infrastructure
-//
-//
-//
-// This file is distributed under the University of Illinois Open Source
-//
-// License. See LICENSE.TXT for details.
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "ApproxScheduler"
-
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-#include "llvm/IR/InstrTypes.h"
-#include <unordered_map>
-#include <dirent.h>
-#include <stdio.h>
-#include <sstream>
-#include <fstream>
-
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-using namespace inplacedfg;
-
-
-namespace {
-
-static cl::opt<std::string> category_input("category", cl::desc(" Hardware-agnostic ranking cateogy {log, linear, quad} "));
-static cl::opt<int> rank_input("rank", cl::desc(" Hardware-agostic rank given by autotuner "));
-
-
-struct ApproxMetrics{
-  std::string op_name;
-  std::string category;
-  unsigned int rank; // rank given by autotuner
-  double approx_level;
-  // Relative L-norm metrics
-  double relative_l1;
-  double relative_l2;
-  double relative_linf;
-  // Mean L-norm metrics
-  double mean_l1;
-  double mean_l2;
-  double mean_linf;
-};    
-
-  
-  
-  
-struct ApproxSchedulerWrapperPass : public ModulePass {
-  static char ID; // Pass identification, replacement for typeid
-  ApproxSchedulerWrapperPass() : ModulePass(ID) {}
-    
-public:
-  // Functions
-  bool runOnModule(Module &M);
-  void getAnalysisUsage(AnalysisUsage &AU) const;
-};
-
-
-// Visitor for Code generation traversal (tree traversal for now)
-class ApproxScheduler : public CodeGenTraversal {
-
-private:
-
-  int rank; // Rank to use for scheduling - ranks added in operand bundles
-  std::string category; // category = {log, linear, quad}
-  
-  // Virtual Functions
-  void init() {}
-  void initRuntimeAPI() {}
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
-  bool rankMatches(OperandBundleUse opBundle, std::string category, int rank);
-  ApproxMetrics* getApproxInfo(Instruction* I);
-  ApproxMetrics* loadApproxMetrics(OperandBundleUse opBundle);
-  
-  // Tracks the id of the tensor op processed
-  unsigned int currentID;
-
-public:
-  // Constructor
-  ApproxScheduler(Module &_M, BuildDFG &_DFG, std::string category, int rank);  
-  void run();
-
-};
-
-  
-
-void ApproxSchedulerWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<BuildDFG>();
-  AU.addPreserved<BuildDFG>();
-}
-
-    
-bool ApproxSchedulerWrapperPass::runOnModule(Module &M) {
-
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  std::string category = category_input.getValue();
-  int rank = rank_input.getValue();
-  
-  ApproxScheduler scheduler(M, DFG, category, rank);
-  scheduler.run();
-
-  return true;
-}
-
-
-  
-ApproxScheduler::ApproxScheduler(Module &_M, BuildDFG &_DFG, std::string category, int rank) :
-    CodeGenTraversal(_M, _DFG){
-
-  this->category = category;
-  this->rank = rank;
-}
-
-  
-void ApproxScheduler::run() {
-
-  errs() << "\n NOTE: Approximation-based scheduling transform \n";
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
-
-  // Iterate over all the DFGs
-  for (auto rootNode: Roots) {
-   this->visit(rootNode);
-  }
-
-  return;
-}
-
-
-/*** Analysis of internal node ***/
-void ApproxScheduler::codeGen(DFInternalNode* N) {
-  DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n");
-}
-
-
-ApproxMetrics* ApproxScheduler::loadApproxMetrics(OperandBundleUse opBundle){
-
-    ApproxMetrics* approx_metrics = new ApproxMetrics;
-    for(unsigned int j = 0; j < opBundle.Inputs.size(); j = j + 2){
-
-      GlobalVariable* gv = dyn_cast<GlobalVariable>(opBundle.Inputs[j].get());
-      ConstantDataArray* constString = dyn_cast<ConstantDataArray>(gv->getInitializer());
-      std::string metric = std::string(constString->getAsCString().data());
-
-      if(metric == "rel_l1"){
-	double norm_value = dyn_cast<ConstantFP>(opBundle.Inputs[j+1].get())->getValueAPF().convertToDouble();
-        approx_metrics->relative_l1 = norm_value;
-	errs()<<"***relative_l1 = "<<approx_metrics->relative_l1<<"\n";
-      }
-      
-      if(metric == "rel_l2"){
-        double norm_value = dyn_cast<ConstantFP>(opBundle.Inputs[j+1].get())->getValueAPF().convertToDouble();
-        approx_metrics->relative_l2 = norm_value;
-      }      
-    }
-     
-}
-
-
-bool ApproxScheduler::rankMatches(OperandBundleUse opBundle, std::string category_in, int rank_in){
-
-  // Extracting value of the 'category' attribute
-  GlobalVariable* gv = dyn_cast<GlobalVariable>(opBundle.Inputs[1].get());
-  ConstantDataArray* constString = dyn_cast<ConstantDataArray>(gv->getInitializer());
-  std::string category = std::string(constString->getAsCString().data());
-  errs()<<"*category = "<<category<<"\n";
-
-  int rank = dyn_cast<ConstantInt>(opBundle.Inputs[3].get())->getZExtValue();
-  errs()<<"-rank = "<<rank<<"\n";
-
-  if(category == category_in && rank == rank_in)
-    return true;
-  else
-    return false;
-
-}
-  
-  
-ApproxMetrics* ApproxScheduler::getApproxInfo(Instruction* I){
-
-  CallSite* CS = new CallSite(I);
-  if(CS->hasOperandBundles()){
-    errs()<<"CallSite has OperandBundles \n";
-
-    for(unsigned int i = 0; i < CS->getNumOperandBundles(); i++){
-      OperandBundleUse bundleUse = CS->getOperandBundleAt(i);
-      errs()<<"bundleUse -> getTagName() = "<<bundleUse.getTagName()<<"\n";
-
-      if(rankMatches(bundleUse, category, rank)){
-        return loadApproxMetrics(bundleUse);
-      }
-    
-      /*for(unsigned int j = 0; j < bundleUse.Inputs.size(); j++){
-	Value* bundleVal = bundleUse.Inputs[j].get();
-	errs()<<"Val = "<<*bundleVal<<"\n";
-      }
-      */
-	  
-    }	
-  }
-  else{
-    errs()<<"DOES NOT have OperandBundles \n";
-  }
-
-  assert("No Bundle Matched the provided rank and Category! \n");
-
-}
-  
-  
-/*** Analysis of leaf node ***/
-void ApproxScheduler::codeGen(DFLeafNode* N) {
-  DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n");
-
-  // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
-    DEBUG(errs() << "Skipping dummy node\n");
-    return;
-  }
-
-  // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
-    assert(false && "Allocation Node not expected in ApproxHPVM");
-    return;
-  }
-
-  Function *F = N->getFuncPointer();
-  Module* M = F->getParent();
-
-  std::vector<ApproxMetrics*> metrics_list;
-  /**** Reading all tensor operations in the DFG Leaf Node ****/
-  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
-    Instruction *I = &(*i);
-    errs()<<*I<<"\n";
-
-    if (BuildDFG::isViscIntrinsic(I)) {
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-      // FIXME: The assumption of only tensor instrinsics is restrictive
-      assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")
-	     && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
-   
-      // NOTE: Get tensorOp name - the scheduling decisions are made per-operation type 
-      std::string intrinsic_id = std::string(II->getCalledFunction()->getName().data());
-      ApproxMetrics* approx_metrics = getApproxInfo(I);
-      metrics_list.push_back(approx_metrics);
-    }
-
-  }
-  
-}
-
-char ApproxSchedulerWrapperPass::ID = 0;
-static RegisterPass<ApproxSchedulerWrapperPass> X("approx-scheduler",
-  "Select target compute unit based on aprroximation metrics",
-  false /* does not modify the CFG */,
-  false /* not transformation, just analysis */);
-
-
-  
-} // End of namespace
-
diff --git a/llvm/lib/Transforms/ApproxScheduler/CMakeLists.txt b/llvm/lib/Transforms/ApproxScheduler/CMakeLists.txt
deleted file mode 100644
index 267ad1d859..0000000000
--- a/llvm/lib/Transforms/ApproxScheduler/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( ApproxScheduler
-  ApproxScheduler.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/llvm/lib/Transforms/ApproxScheduler/LLVMBuild.txt b/llvm/lib/Transforms/ApproxScheduler/LLVMBuild.txt
deleted file mode 100644
index ccd8479c2e..0000000000
--- a/llvm/lib/Transforms/ApproxScheduler/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/LocalMem/LLVMBuild.txt ------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = ApproxScheduler
-parent = Transforms
diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt b/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt
deleted file mode 100644
index dc98faafec..0000000000
--- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( LLVMDFG2LLVM_CUDNN
-  DFG2LLVM_CUDNN.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
deleted file mode 100644
index abc4e9ef89..0000000000
--- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
+++ /dev/null
@@ -1,609 +0,0 @@
-//=== DFG2LLVM_CUDNN.cpp ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-#define ENABLE_ASSERTS
-
-#define DEBUG_TYPE "DFG2LLVM_CUDNN"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker/Linker.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm-c/Core.h"
-#include "llvm/SupportVISC/VISCTimer.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h"
-#include <sstream>
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-
-using namespace inplacedfg;
-
-namespace {
-// Helper class declarations
-
-// DFG2LLVM_CUDNN - The first implementation.
-
-struct DFG2LLVM_CUDNN : public DFG2LLVM {
-  static char ID; // Pass identification, replacement for typeid
-  DFG2LLVM_CUDNN() : DFG2LLVM(ID) {}
-private:
-
-public:
-
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<BuildDFG>();
-    AU.addRequired<InPlaceDFGAnalysisWrapper>();
-    AU.addPreserved<BuildDFG>();
-    AU.addPreserved<InPlaceDFGAnalysisWrapper>();
-  }
-
-  bool runOnModule(Module &M);
-};
-
-// Visitor for Code generation traversal (tree traversal for now)
-class CGT_CUDNN : public CodeGenTraversal {
-
-private:
-  //Member variables
-  InPlaceDFGAnalysis::InPlaceDFGParameter *IPP;
-
-  // VISC Runtime API and Tensor runtime API
-  Constant* llvm_hpvm_initTensorRt;
-  Constant* llvm_hpvm_cleanupTensorRt;
-  Constant* hpvm_request_tensor;
-
-  // Functions
-  bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N);
-
-
-
-  // Virtual Functions
-  void init();
-  void initRuntimeAPI();
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
-
-public:
-
-  // Constructor
-  CGT_CUDNN(Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP)
-  : CodeGenTraversal(_M, _DFG), IPP(&_IPP) {
-    initRuntimeAPI();
-  }
-
-};
-
-bool CGT_CUDNN::isValidOperandForInPlaceOperation(Value *Op,
-                                                  Function *Fgen,
-                                                  DFNode *N) {
-
-  if (Argument *Arg = dyn_cast<Argument>(Op)) {
-    DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n");
-    assert((Arg->getParent() == Fgen) &&
-          "Extra Parameter in body of Function\n");
-    // Candidae parameter is a function argument
-    // In this case, consult the result of in place analysis
-    // Find position in arg list
-    unsigned pos = Arg->getArgNo();
-    // If this parameter cannot be used for in place operation
-    // code gen cannot continue
-    if (IPP->at(N)[pos]) {
-      DEBUG(errs() << *Arg << "\t: argument, suitable for in place\n");
-      return true;
-    } else {
-      DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n");
-      return false;
-    }
-  }
-  else {
-    // If it is not an argument, then it needs to be the result of
-    // another intrinsic. These are new objects that are allocated,
-    // and consumed by next intrinsic. 
-    DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n");
-    if (dyn_cast<IntrinsicInst>(Op)) {
-      DEBUG(errs() << *Arg << "\t: local, suitable for in place\n");
-      return true;
-    } else {
-      DEBUG(errs() << *Arg << "\t: local, not suitable for in place\n");
-      return false;
-    }
-  }
-}
-
-
-void CGT_CUDNN::init() {
-}
-
-// Initialize the VISC runtime API. This makes it easier to insert these calls
-void CGT_CUDNN::initRuntimeAPI() {
-
-  // Load Runtime API Module
-  SMDiagnostic Err;
-
-  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
-  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n");
-
-  // FIXME: set correct path
-  Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
-  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
-  if(runtimeModule == nullptr)
-    DEBUG(errs() << Err.getMessage());
-  else
-    DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
-
-  // Get or insert Global declarations for
-  // - initialization
-  // - cleanup
-  // - request a tensor
-  DECLARE(llvm_hpvm_initTensorRt);
-  DECLARE(llvm_hpvm_cleanupTensorRt);
-  DECLARE(hpvm_request_tensor);
-
-  // Find visc.init and visc.cleanup calls, and add placeholder methods
-  // for initialization and cleanup of the hpvm tensor runtime
-
-  Function* VI = M.getFunction("llvm.visc.init");
-  assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n");
-  InitCall = cast<Instruction>(*VI->user_begin());
-  CallInst::Create(llvm_hpvm_initTensorRt,
-                   ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
-                   "", InitCall);
-
-  Function* VC = M.getFunction("llvm.visc.cleanup");
-  assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n");
-  CleanupCall = cast<Instruction>(*VC->user_begin());
-  CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall);
-
-}
-
-void CGT_CUDNN::codeGen(DFInternalNode* N) {
-  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs () << "Skipping internal node\n";
-}
-
-  
-void CGT_CUDNN::codeGen(DFLeafNode* N) {
-
-  // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
-    DEBUG(errs() << "Skipping dummy node\n");
-    return;
-  }
-
-  // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
-    assert(false && "Allocation Node not expected in ApproxHPVM");
-    return;
-  }
-
-  // Generate code only if it has the right hint
-  if (!checkPreferredTarget(N, visc::CUDNN_TARGET)) {
-    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
-    return;
-  }
-
-  // Get the function associated with the dataflow node
-  Function *F = N->getFuncPointer();
-  errs()<<"function name = "<< F->getName()<<"\n";
-
-  /* Removing HPVM in/out/inout function attributes */
-  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae; ai++){
-    Argument *Arg = &*ai;
-    if(Arg->hasAttribute(Attribute::In))
-      Arg->removeAttr(Attribute::In);
-    if(Arg->hasAttribute(Attribute::Out))
-      Arg->removeAttr(Attribute::Out);
-    if(Arg->hasAttribute(Attribute::InOut))
-      Arg->removeAttr(Attribute::InOut);    
-  }
-
-  // Look up if we have visited this function before. If we have, then just
-  // get the cloned function pointer from DFNode. Otherwise, create the cloned
-  // function and add it to the DFNode GenFunc.
-  Function *F_cudnn = N->getGenFuncForTarget(visc::CUDNN_TARGET);
-
-  assert((F_cudnn == NULL) &&
-         "Error: Visiting a node for which code already generated");
-  
-  // Clone the function
-  ValueToValueMapTy VMap;
-  std::string FName(F->getName().data());
-  F_cudnn = CloneFunction(F, VMap);
-  F_cudnn->setName(FName + "_cudnn");
-  errs()<<"Cloned function name2 = "<<F_cudnn->getName()<<"\n";
-  F_cudnn->removeFromParent();  
-  M.getFunctionList().push_back(F_cudnn);
-
-  N->addGenFunc(F_cudnn, visc::CUDNN_TARGET, true);
-
-  // Adding nounwind to generated function : FIXME: needed?
-  DEBUG(errs() << "Adding nounwind to generated function\n");
-  F_cudnn->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
-
-  // Add llvm_visc_requestTensor calls for every pointer argument of the function
-  // (they are all expected to be tensors), at the beginning of the function.
-  // This is the first instruction of the function, insert them before this
-  Instruction* FI = &*(F_cudnn->getEntryBlock().begin());
-
-  // In this backend, the target device is GPU, represented by i32 1.
-  ConstantInt *TargetDeviceID =
-    ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
-
-  for (Function::arg_iterator ai = F_cudnn->arg_begin(),
-       ae = F_cudnn->arg_end(); ai != ae; ++ai) {
-    Argument* Arg = &*ai;
-    if (Arg->getType()->isPointerTy()) {
-      Value *Args[] = {Arg, TargetDeviceID};
-      CallInst::Create(hpvm_request_tensor,
-                       ArrayRef<Value*>(Args, 2),
-                       "", FI);
-    }
-  }
-
-  std::vector<IntrinsicInst *> IItoRemove;
-
-  for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e; ++i) {
-    Instruction *I = &(*i);
-
-    if (BuildDFG::isViscIntrinsic(I)) {
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-      assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")
-        && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
-
-      /********************* Handle VISC Tensor intrinsics ********************/
-      switch (II->getIntrinsicID()) {
-
-      case Intrinsic::visc_tensor_convolution:
-      { /* llvm.hpvm.tensor.mul */
-        // Tensor mul is not in place.
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
-
-        // Argument list for the runtime call
-        std::vector<Value*> Args;
-        Args.push_back(II->getOperand(0));
-        Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
-        Args.push_back(II->getOperand(3));
-        Args.push_back(II->getOperand(4));
-        Args.push_back(II->getOperand(5));
-
-	Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
-	Constant* conv_precision = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
-
-        Args.push_back(conv_mode);
-        Args.push_back(conv_precision);
-	
-        // Create cudnn runtime function call
-        Constant* tensorConvolution;
-        DECLARE(tensorConvolution);
-	
-        CallInst* CI = CallInst::Create(tensorConvolution,
-                                        Args, "", II);
-        // We can replace the call to hpvm.tensor.mul with the runtime call
-        II->replaceAllUsesWith(CI);
-
-        // Mark to remove at the end
-        IItoRemove.push_back(II);
-      }
-      break;
-
-      case Intrinsic::visc_tensor_group_convolution:
-      { /* llvm.hpvm.tensor.mul */
-        // Tensor mul is not in place.
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor convolution \n");
-
-        // Argument list for the runtime call
-        std::vector<Value*> Args;
-        Args.push_back(II->getOperand(0));
-        Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
-        Args.push_back(II->getOperand(3));
-        Args.push_back(II->getOperand(4));
-        Args.push_back(II->getOperand(5));
-
-	Constant* conv_mode = ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
-
-        Args.push_back(conv_mode);
-        Args.push_back(II->getOperand(7));
-	
-        // Create cudnn runtime function call
-        Constant* tensorConvolution;
-        DECLARE(tensorConvolution);
-	
-        CallInst* CI = CallInst::Create(tensorConvolution,
-                                        Args, "", II);
-        // We can replace the call to hpvm.tensor.mul with the runtime call
-        II->replaceAllUsesWith(CI);
-
-        // Mark to remove at the end
-        IItoRemove.push_back(II);
-      }
-      break;
-
-      case Intrinsic::visc_tensor_batchnorm:
-      { /* llvm.hpvm.tensor.batchnorm */
-        // Tensor batchnorm is in place.
-	// FIXME: Add Check for InPlace Analysis 
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor batch normalization \n");
-
-        // Argument list for the runtime call
-        std::vector<Value*> Args;
-        Args.push_back(II->getOperand(0));
-        Args.push_back(II->getOperand(1));
-	Args.push_back(II->getOperand(2));
-        Args.push_back(II->getOperand(3));
-        Args.push_back(II->getOperand(4));
-        Args.push_back(II->getOperand(5));
-	
-        // Create cudnn runtime function call
-        Constant* tensorBatchNorm;
-        DECLARE(tensorBatchNorm);
-	
-        CallInst* CI = CallInst::Create(tensorBatchNorm,
-                                        Args, "", II);
-        // We can replace the call to hpvm.tensor.batchnorm with the TensorRT call
-        II->replaceAllUsesWith(CI);
-
-        // Mark to remove at the end
-        IItoRemove.push_back(II);
-      }
-      break;
-
-      
-      case Intrinsic::visc_tensor_mul:
-      { /* llvm.hpvm.tensor.mul */
-        // Tensor mul is not in place.
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor mul\n");
-
-        // Argument list for the runtime call
-        std::vector<Value*> Args;
-        Args.push_back(II->getOperand(0));
-        Args.push_back(II->getOperand(1));
-
-        // Create cudnn runtime function call
-        Constant* tensorGemmGPU;
-        DECLARE(tensorGemmGPU);
-	
-        CallInst* CI = CallInst::Create(tensorGemmGPU,
-                                        Args, "", II);
-        // We can replace the call to hpvm.tensor.mul with the runtime call
-        II->replaceAllUsesWith(CI);
-
-        // Mark to remove at the end
-        IItoRemove.push_back(II);
-      }
-      break;
-      case Intrinsic::visc_tensor_add:
-      { /* llvm.hpvm.tensor.add */
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor add\n");
-        // Tensor add(a,b) is in place for argument a.
-        Value *Op = II->getOperand(0);
-
-        // Test the intrinsic operand for in place operation.
-        bool inplace = isValidOperandForInPlaceOperation(Op, F_cudnn, N);
-        // Code generation cannot continue if this is false, because the target
-        // only provides an in place operation
-
-	// FIXME: remove this comment - must check for in-place
-        //assert(inplace &&
-        //       "Operand not valid for in place operation. Code gen aborted.\n");
-
-        // Argument list for the runtime call
-        std::vector<Value*> Args;
-        Args.push_back(II->getOperand(0));
-        Args.push_back(II->getOperand(1));
-
-        // Create cudnn runtime function call
-        Constant* tensorAdd;
-        DECLARE(tensorAdd);
-        CallInst::Create(tensorAdd, Args, "", II);
-        // We can replace the call to hpvm.tensor.add with the 1st argument
-        // that, due to in place operation, now contains the result
-        II->replaceAllUsesWith(II->getOperand(0));
-
-        // Mark to remove at the end
-        IItoRemove.push_back(II);
-      }
-      break;
-      case Intrinsic::visc_tensor_pool_max:
-      case Intrinsic::visc_tensor_pool_mean:
-      { /* llvm.visc.tensor.relu */
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor_pool_max\n");
-
-        // Argument list - tensorPooling(input, poolFunction, window_height, window_width, vertical_pad, horizontal_pad,
-	//                               vertical_stride, horizontal_stride);
-        std::vector<Value*> Args;
-        Args.push_back(II->getOperand(0));
-
-	int pool_type = 0;
-	if (II->getIntrinsicID() == Intrinsic::visc_tensor_pool_max){
-          pool_type = 0;
-	}
-        if (II->getIntrinsicID() == Intrinsic::visc_tensor_pool_mean){
-          pool_type = 1;
-	}	
-	
-	Constant* constPoolType = ConstantInt::get(Type::getInt32Ty(M.getContext()), pool_type);
-        Args.push_back(constPoolType); // ID for max pool. Min/Avg have different IDs (non-zero)	
-	Args.push_back(II->getOperand(1));
-        Args.push_back(II->getOperand(2));
-	Args.push_back(II->getOperand(3));
-        Args.push_back(II->getOperand(4));
-	Args.push_back(II->getOperand(5));
-	Args.push_back(II->getOperand(6));
-
-        // Create cudnn runtime function call
-        Constant* tensorPooling;
-        DECLARE(tensorPooling);
-        CallInst* CI = CallInst::Create(tensorPooling, Args, "", II);
-
-	// Replacing intrinsic result uses with the result of the tensor runtime operation
-        II->replaceAllUsesWith(CI);
-
-        // Mark to remove at the end
-        IItoRemove.push_back(II);
-      }
-      break;
-      
-      case Intrinsic::visc_tensor_relu:
-      case Intrinsic::visc_tensor_clipped_relu:
-      case Intrinsic::visc_tensor_tanh:
-      { /* llvm.visc.tensor.relu */
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor activation functions \n");
-        // Tensor relu(a) is in place for argument a.
-        Value *Op = II->getOperand(0);
-
-        // Test the intrinsic operand for in place operation.
-        bool inplace = isValidOperandForInPlaceOperation(Op, F_cudnn, N);
-        // Code generation cannot continue if this is false, because the target
-        // only provides an in place operation
-        assert(inplace &&
-               "Operand not valid for in place operation. Code gen aborted.\n");
-
-        // Argument list for the runtime call
-        std::vector<Value*> Args;
-        Args.push_back(II->getOperand(0));
-
-	if (II->getIntrinsicID() == Intrinsic::visc_tensor_relu){
-          // Create cudnn runtime function call
-          Constant* tensorRelu;
-          DECLARE(tensorRelu);
-          CallInst::Create(tensorRelu, Args, "", II);
-	}
-	else if (II->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu){
-          // Create cudnn runtime function call
-          //-- Constant* tensorClippedRelu;
-	  Constant* tensorRelu2;
-          DECLARE(tensorRelu2);
-          CallInst::Create(tensorRelu2, Args, "", II);
-	}
-	else if (II->getIntrinsicID() == Intrinsic::visc_tensor_tanh){
-          // Create cudnn runtime function call
-          Constant* tensorTanh;
-	  errs()<<"tensorTanh Call = \n\n";
-          DECLARE(tensorTanh);
-	  //errs()<<"tensorTanh Call = "<<*tensorTanh<<"\l";
-          CallInst::Create(tensorTanh, Args, "", II);
-	}
-     
-        // We can replace the call to hpvm.tensor.relu with the 1st argument
-        // that, due to in place operation, now contains the result
-        II->replaceAllUsesWith(II->getOperand(0));
-
-        // Mark to remove at the end
-        IItoRemove.push_back(II);
-      }
-      break;
-      case Intrinsic::visc_tensor_softmax:
-      { /* llvm.visc.tensor.softmax */
-        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor softmax\n");
-        // Tensor relu(a) is in place for argument a.
-        Value *Op = II->getOperand(0);
-
-        // Test the intrinsic operand for in place operation.
-        bool inplace = isValidOperandForInPlaceOperation(Op, F_cudnn, N);
-        // Code generation cannot continue if this is false, because the target
-        // only provides an in place operation
-        assert(inplace &&
-               "Operand not valid for in place operation. Code gen aborted.\n");
-
-        // Argument list for the runtime call
-        std::vector<Value*> Args;
-        Args.push_back(II->getOperand(0));
-
-        // Create cudnn runtime function call
-        Constant* tensorSoftmax;
-        DECLARE(tensorSoftmax);
-        CallInst::Create(tensorSoftmax, Args, "", II);
-        // We can replace the call to hpvm.tensor.softmax with the 1st argument
-        // that, due to in place operation, now contains the result
-        II->replaceAllUsesWith(II->getOperand(0));
-
-        // Mark to remove at the end
-        IItoRemove.push_back(II);
-      }
-      break;
-      default:
-        llvm_unreachable("Unknown VISC Intrinsic!");
-        break;
-      }
-    }
-  }
-
-  //--- errs()<<"IIToRemove.size() = "<<IItoRemove.size()<<"\n\n";
-
-  // We need to do this explicitly: DCE pass may not remove them.
-  // Traverse the vector backwards, otherwise definitions are deleted while
-  // their subsequent uses are still around.
-  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(),
-       re = IItoRemove.rend(); ri != re; ++ri) {
-    DEBUG(errs() << "Erasing: " << **ri << "\n");
-    errs() << "Erasing: " << **ri << "\n";
-    (*ri)->eraseFromParent();
-  }
-
-  return;
-}
-
-bool DFG2LLVM_CUDNN::runOnModule(Module &M) {
-  errs() << "\nDFG2LLVM_CUDNN PASS\n";
-
-  // Get the BuildDFG Analysis Results:
-  // - Dataflow graph
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  // Get the In Place Analysis Results
-  InPlaceDFGAnalysis::InPlaceDFGParameter IPP =
-    (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
-  // Print results
-  printInPlaceDFGParameter(IPP);
-
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
- 
-  // Visitor for Code Generation Graph Traversal
-  CGT_CUDNN *CGTVisitor = new CGT_CUDNN(M, DFG, IPP);
-
-  // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
-    // Initiate code generation for root DFNode
-    CGTVisitor->visit(rootNode);
-  }
-
-  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
-  delete CGTVisitor;
-
-  return true;
-}
-
-
-/******************************************************************************
- *                              Helper functions                              *
- ******************************************************************************/
-
-
-} // End of namespace
-
-char DFG2LLVM_CUDNN::ID = 0;
-static RegisterPass<DFG2LLVM_CUDNN> X("dfg2llvm-cudnn",
-                                      "Dataflow Graph to LLVM for CUDNN Pass",
-                                      false /* does not modify the CFG */,
-                                      true /* transformation,   *
-                                            * not just analysis */);
-
diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt b/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt
deleted file mode 100644
index 1579b2fc47..0000000000
--- a/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = DFG2LLVM_CUDNN
-parent = Transforms
diff --git a/llvm/lib/Transforms/DFG2LLVM_PROMISE/CMakeLists.txt b/llvm/lib/Transforms/DFG2LLVM_PROMISE/CMakeLists.txt
deleted file mode 100644
index 5b5d2677d0..0000000000
--- a/llvm/lib/Transforms/DFG2LLVM_PROMISE/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( LLVMDFG2LLVM_PROMISE
-  DFG2LLVM_PROMISE.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp b/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp
deleted file mode 100644
index 184f92910a..0000000000
--- a/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp
+++ /dev/null
@@ -1,1283 +0,0 @@
-//=== DFG2LLVM_PROMISE.cpp ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-#define ENABLE_ASSERTS
-
-#define DEBUG_TYPE "DFG2LLVM_PROMISE"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker/Linker.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm-c/Core.h"
-#include "llvm/SupportVISC/VISCTimer.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h"
-#include <sstream>
-#include <fstream>
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-
-namespace {
-
-cl::opt<std::string> QuantizationInputsFilename(
-  "quantization-levels-filename",
-  cl::desc("<PROMISE quantization levels input file (path)>"),
-  cl::value_desc("filename"),
-  cl::Required);
-
-// Helper class declarations
-
-// State machine definition for pattern identification
-
-/* An assumption is made for the PROMISE simulator:                           *
- * a leaf node will contain consequtive operations that will map to a         *
- * single PROMISE simulator call                                              *
-
- * To alleviate that, the states that correspond to valid patterns            *
- * - (FullyConnectedLayer_(2,3,x, ConvilutionLayer_(2,3,4,x))                 *
- * can invoke codeGen when detecting the beginning of a new pattern, then     *
- * clear the collected IIs and Args, then go to initial and invoke its        *
- * transition.                                                                */
-
-class AbstractState;
-
-class CodeGenStateMachine {
-private:
-  Module *M;
-  Module *RtM;
-
-  std::ifstream &qin; // Quantization levels input stream reference
-  std::vector<Value*> Args;
-  std::vector<IntrinsicInst*> IIs;
-  AbstractState *current;
-
-public:
-  CodeGenStateMachine(Module *, Module *, std::ifstream &);
-
-  void setCurrent(AbstractState *s) {
-    current = s;
-  }
-
-  void transition(IntrinsicInst *II);
-
-  Module *getModule() {
-    return M;
-  }
-
-  void getNextQuantizationLevel(float &ql) {
-    qin >> ql;
-  }
-
-  void addArgument(Value *Arg) {
-    Args.push_back(Arg);
-  }
-
-  void addIntrinsicInst(IntrinsicInst *II) {
-    IIs.push_back(II);
-  }
-
-  IntrinsicInst *getIntrinsicInstAt(unsigned idx) {
-    return IIs[idx];
-  }
-
-  void codeGen();
-
-};
-
-class AbstractState {
-public:
-  enum ID
-  {
-    INITIAL_STATE,
-    FULLY_CONNECTED_LAYER_1,
-    FULLY_CONNECTED_LAYER_2,
-    FULLY_CONNECTED_LAYER_3,
-    FULLY_CONNECTED_LAYER,
-    CONVOLUTION_LAYER_1,
-    CONVOLUTION_LAYER_2,
-    CONVOLUTION_LAYER_3,
-    CONVOLUTION_LAYER_4,
-    CONVOLUTION_LAYER,
-    NO_PATTERN,
-  };
-
-protected:
-  enum ID StateID;
-
-public:
-  enum ID getStateID() {
-    return StateID;
-  }
-
-  virtual void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) = 0;
-  virtual ~AbstractState() {}
-};
-
-class InitialState : public AbstractState {
-public:
-  InitialState() {
-    StateID = ID::INITIAL_STATE;
-    DEBUG(errs() << "new InitialState\n");
-  }
-  ~InitialState() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer_1 : public AbstractState {
-public:
-  FullyConnectedLayer_1() {
-    StateID = ID::FULLY_CONNECTED_LAYER_1;
-    DEBUG(errs() << "new FullyConnectedLayer_1\n");
-  }
-  ~FullyConnectedLayer_1() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer_2 : public AbstractState {
-public:
-  FullyConnectedLayer_2() {
-    StateID = ID::FULLY_CONNECTED_LAYER_2;
-    DEBUG(errs() << "new FullyConnectedLayer_2\n");
-  }
-  ~FullyConnectedLayer_2() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer_3 : public AbstractState {
-public:
-  FullyConnectedLayer_3() {
-    StateID = ID::FULLY_CONNECTED_LAYER_3;
-    DEBUG(errs() << "new FullyConnectedLayer_3\n");
-  }
-  ~FullyConnectedLayer_3() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer : public AbstractState {
-public:
-  FullyConnectedLayer() {
-    StateID = ID::FULLY_CONNECTED_LAYER;
-    DEBUG(errs() << "new FullyConnectedLayer\n");
-  }
-  ~FullyConnectedLayer() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_1 : public AbstractState {
-public:
-  ConvolutionLayer_1() {
-    StateID = ID::CONVOLUTION_LAYER_1;
-    DEBUG(errs() << "new ConvolutionLayer_1\n");
-  }
-  ~ConvolutionLayer_1() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_2 : public AbstractState {
-public:
-  ConvolutionLayer_2() {
-    StateID = ID::CONVOLUTION_LAYER_2;
-    DEBUG(errs() << "new ConvolutionLayer_2\n");
-  }
-  ~ConvolutionLayer_2() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_3 : public AbstractState {
-public:
-  ConvolutionLayer_3() {
-    StateID = ID::CONVOLUTION_LAYER_3;
-    DEBUG(errs() << "new ConvolutionLayer_3\n");
-  }
-  ~ConvolutionLayer_3() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_4 : public AbstractState {
-public:
-  ConvolutionLayer_4() {
-    StateID = ID::CONVOLUTION_LAYER_4;
-    DEBUG(errs() << "new ConvolutionLayer_4\n");
-  }
-  ~ConvolutionLayer_4() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer : public AbstractState {
-public:
-  ConvolutionLayer() {
-    StateID = ID::CONVOLUTION_LAYER;
-    DEBUG(errs() << "new ConvolutionLayer\n");
-  }
-  ~ConvolutionLayer() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class NoPattern : public AbstractState {
-public:
-  NoPattern() {
-    StateID = ID::NO_PATTERN;
-    DEBUG(errs() << "new NoPattern\n");
-  }
-  ~NoPattern() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_convolution:
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->addArgument(II->getOperand(0)); // conv input
-
-        // Read quantization levels for input
-        float i_min, i_max;
-        Mch->getNextQuantizationLevel(i_min);
-        Mch->getNextQuantizationLevel(i_max);
-        errs() << "i_min: " << i_min << "\n";
-        errs() << "i_max: " << i_max << "\n";
-
-        // Create associated arguments for the quantization levels
-        Constant *IminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) i_min);
-//      errs() << "IminC : "
-//             << dyn_cast<ConstantFP>(IminC)->getValueAPF().convertToFloat()
-//             << "\n";
-        Mch->addArgument(IminC);
-        Constant *ImaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) i_max);
-        Mch->addArgument(ImaxC);
-
-        Mch->addArgument(II->getOperand(1)); // conv kernel
-
-        // Read quantization levels for filter
-        float w_min, w_max;
-        Mch->getNextQuantizationLevel(w_min);
-        Mch->getNextQuantizationLevel(w_max);
-        errs() << "w_min: " << w_min << "\n";
-        errs() << "w_max: " << w_max << "\n";
-        Constant *WminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) w_min);
-        Mch->addArgument(WminC);
-        Constant *WmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) w_max);
-        Mch->addArgument(WmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_1());
-        }
-        break;
-      case Intrinsic::visc_tensor_mul:
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->addArgument(II->getOperand(0)); // 1st gemm input
-
-        // Read quantization levels for input
-        float i_min, i_max;
-        Mch->getNextQuantizationLevel(i_min);
-        Mch->getNextQuantizationLevel(i_max);
-        errs() << "i_min: " << i_min << "\n";
-        errs() << "i_max: " << i_max << "\n";
-
-        Constant *IminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) i_min);
-        Mch->addArgument(IminC);
-        Constant *ImaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) i_max);
-        Mch->addArgument(ImaxC);
-
-        Mch->addArgument(II->getOperand(1)); // 2nd gemm input
-
-        // Read quantization levels for weight
-        float w_min, w_max;
-        Mch->getNextQuantizationLevel(w_min);
-        Mch->getNextQuantizationLevel(w_max);
-        errs() << "w_min: " << w_min << "\n";
-        errs() << "w_max: " << w_max << "\n";
-
-        Constant *WminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) w_min);
-        Mch->addArgument(WminC);
-        Constant *WmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) w_max);
-        Mch->addArgument(WmaxC);
-
-        Mch->setCurrent(new FullyConnectedLayer_1());
-        }
-        break;
-      default: // Other HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-    delete this;
-  } // else {} // No HPVM intrinsic received. Remain at initial 
-}
-
-void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_add:
-        {
-        IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0);
-        assert((MulII == II->getOperand(0)) &&
-               "Output of mul must be used as 1st operand of add");
-        Mch->addIntrinsicInst(II);
-
-        Mch->addArgument(II->getOperand(1));     // bias
-
-        // Read quantization levels for input
-        float b_min, b_max;
-        Mch->getNextQuantizationLevel(b_min);
-        Mch->getNextQuantizationLevel(b_max);
-        errs() << "b_min: " << b_min << "\n";
-        errs() << "b_max: " << b_max << "\n";
-
-        Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) b_min);
-        Mch->addArgument(BminC);
-        Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) b_max);
-        Mch->addArgument(BmaxC);
-
-        Mch->setCurrent(new FullyConnectedLayer_2());
-        }
-        break;
-      default:
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else {
-    Mch->setCurrent(new NoPattern());
-  }
-  delete this;
-}
-
-void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_tanh:
-        {
-        // Type of activation : TanH
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_relu:
-        {
-        // Type of activation : ReLU
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_clipped_relu:
-        {
-        // Type of activation : Clipped ReLU
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        }
-        break;
-      default: // No activation, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else { // End of instruction stream
-    // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-
-    // Read quantization levels for output
-    float out_min, out_max;
-    Mch->getNextQuantizationLevel(out_min);
-    Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-    Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_min);
-    Mch->addArgument(OutminC);
-    Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_max);
-    Mch->addArgument(OutmaxC);
-
-    Mch->setCurrent(new FullyConnectedLayer());
-  }
-  delete this;
-}
-
-void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (!II) { // End of instruction stream
-    Mch->setCurrent(new FullyConnectedLayer());
-  } else {
-    Mch->setCurrent(new NoPattern());
-  }
-  delete this;
-}
-
-void FullyConnectedLayer::transition(CodeGenStateMachine *Mch,
-                                     IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    Mch->setCurrent(new NoPattern());
-    delete this;
-  }
-}
-
-void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_add:
-        {
-        IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
-        assert((ConvII == II->getOperand(0)) &&
-               "Output of conv must be used as 1st operand of add");
-        Mch->addIntrinsicInst(II);
-
-        Mch->addArgument(II->getOperand(1));     // bias
-        // Read quantization levels for bias
-        float b_min, b_max;
-        Mch->getNextQuantizationLevel(b_min);
-        Mch->getNextQuantizationLevel(b_max);
-        errs() << "b_min: " << b_min << "\n";
-        errs() << "b_max: " << b_max << "\n";
-
-        Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) b_min);
-        Mch->addArgument(BminC);
-        Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) b_max);
-        Mch->addArgument(BmaxC);
-
-        Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
-
-        Mch->setCurrent(new ConvolutionLayer_2());
-        }
-        break;
-      default:
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else {
-    // No addition
-    Mch->addArgument(ConstantPointerNull::get(
-                     Type::getInt8PtrTy(Mch->getModule()->getContext())));
-    // Still need to add the quantization constants - and remove them from file
-    float b_min, b_max;
-    Mch->getNextQuantizationLevel(b_min);
-    Mch->getNextQuantizationLevel(b_max);
-        errs() << "b_min: " << b_min << "\n";
-        errs() << "b_max: " << b_max << "\n";
-    Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                      (double) b_min);
-    Mch->addArgument(BminC);
-    Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                      (double) b_max);
-    Mch->addArgument(BmaxC);
-
-    // Zero for all convolution numeric arguments FIXME???
-        IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
-        Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-
-    // No pooling
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // 0 for unused pool argument
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-
-    // Read quantization levels for output
-    float out_min, out_max;
-    Mch->getNextQuantizationLevel(out_min);
-    Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-    Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_min);
-    Mch->addArgument(OutminC);
-    Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_max);
-    Mch->addArgument(OutmaxC);
-
-    Mch->setCurrent(new ConvolutionLayer());
-  }
-  delete this;
-}
-
-void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_tanh:
-        {
-        // Type of activation : TanH
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_relu:
-        {
-        // Type of activation : ReLU
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_clipped_relu:
-        {
-        // Type of activation : Clipped ReLU
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_max:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_min:
-        {
-        // pool min FIXME: 2: supported?
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_mean:
-        {
-        // pool mean
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      default: // No activation, No pooling, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else { // End of instruction stream
-    // No pooling
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // 0 for unused pool argument
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-
-    // Read quantization levels for output
-    float out_min, out_max;
-    Mch->getNextQuantizationLevel(out_min);
-    Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-    Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_min);
-    Mch->addArgument(OutminC);
-    Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_max);
-    Mch->addArgument(OutmaxC);
-
-    Mch->setCurrent(new ConvolutionLayer());
-  }
-  delete this;
-}
-
-void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_pool_max:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::visc_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::visc_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_min:
-        {
-        // pool min FIXME: 2: supported?
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::visc_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::visc_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_mean:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::visc_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::visc_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      default: // No pooling, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else { // End of instruction stream
-    // No pooling
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // 0 for unused pool argument
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-
-    // Revisit last intrinsic, to add argument for activation operation
-    IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-    // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-    Intrinsic::ID ActIID = ActII->getIntrinsicID();
-    if (ActIID == Intrinsic::visc_tensor_tanh) {
-      Mch->addArgument(ConstantInt::get(
-                       Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-      } else if (ActIID == Intrinsic::visc_tensor_relu) {
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-      } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-      }
-
-      // Read quantization levels for output
-      float out_min, out_max;
-      Mch->getNextQuantizationLevel(out_min);
-      Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-      Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) out_min);
-      Mch->addArgument(OutminC);
-      Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) out_max);
-      Mch->addArgument(OutmaxC);
-
-     Mch->setCurrent(new ConvolutionLayer());
-  }
-  delete this;
-}
-
-void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (!II) { // End of instruction stream
-    Mch->setCurrent(new ConvolutionLayer());
-  } else {
-    Mch->setCurrent(new NoPattern());
-  }
-  delete this;
-}
-
-void ConvolutionLayer::transition(CodeGenStateMachine *Mch,
-                                  IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    Mch->setCurrent(new NoPattern());
-    delete this;
-  }
-}
-
-void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {}
-
-CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM, std::ifstream &_qin) :
-  M(_M), RtM(_RtM), qin(_qin) {
-  current = new InitialState();
-}
-
-void CodeGenStateMachine::transition(IntrinsicInst *II) {
-  current->transition(this, II);
-}
-
-void CodeGenStateMachine::codeGen() {
-
-  if ((current->getStateID() != AbstractState::ID::FULLY_CONNECTED_LAYER) &&
-      (current->getStateID() != AbstractState::ID::CONVOLUTION_LAYER)) {
-    // Not a valid instruction sequence.
-    assert(false && "Unsupported instruction sequence by PROMISE simulator\n");
-  }
-
-  // We have a valid instruction sequence.
-  // Make sure that the instruction sequence can be traslated:
-  // each instruction's result must be used only by the next one in sequence.
-  for (unsigned p = 0; p < IIs.size()-1; p++) {
-    IntrinsicInst *II = IIs[p];
-    assert((II->hasOneUse()) &&
-          "Instruction sequence does not fit expected pattern: not single use\n");
-
-    Value::user_iterator ui = II->user_begin(); // The only use
-    assert((*ui == IIs[p+1]) &&
-           "Instruction sequence does not fit expected pattern: not used by next instruction\n");
-  }
-
-  // Create corresponding PROMISE simulator call
-  CallInst *CI;
-  switch (current->getStateID()) {
-    case AbstractState::ID::CONVOLUTION_LAYER:
-      {
-        Constant* ConvLayer_PROMISE =
-          M->getOrInsertFunction(StringRef("ConvLayer_PROMISE"),
-                 RtM->getFunction(StringRef("ConvLayer_PROMISE"))->getFunctionType());
-        DEBUG(errs() << *ConvLayer_PROMISE);
-
-        // FIXME: get last argument from some intrinsic. For now, 7
-        Args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 7));
-        // Create PROMISE simulator function call
-        CI = CallInst::Create(ConvLayer_PROMISE, Args, "");
-      }
-      break;
-    case AbstractState::ID::FULLY_CONNECTED_LAYER:
-      {
-        Constant* FCLayer_PROMISE =
-          M->getOrInsertFunction(StringRef("FCLayer_PROMISE"),
-              RtM->getFunction(StringRef("FCLayer_PROMISE"))->getFunctionType());
-        DEBUG(errs() << *FCLayer_PROMISE);
-
-        // FIXME: get last argument from some intrinsic. For now, 7
-        Args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 7));
-        // Create PROMISE simulator function call
-        CI = CallInst::Create(FCLayer_PROMISE, Args, "");
-      }
-      break;
-    default:
-      llvm_unreachable("Unexpected CodeGenStateMachine State\n");
-      break;
-  }
-
-  // Insert new call and replace all uses of pattern result with
-  // the PROMISE simulator call
-  IntrinsicInst *IIlast = *(IIs.rbegin());
-  CI->insertBefore(IIlast);
-  IIlast->replaceAllUsesWith(CI);
-
-  // Remove the instructions we translated to the simulator call.
-  // Traverse the vector backwards, otherwise definitions are deleted while
-  // their subsequent uses are still around.
-  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs.rbegin(),
-       re = IIs.rend(); ri != re; ++ri) {
-    DEBUG(errs() << "Erasing: " << **ri << "\n");
-    (*ri)->eraseFromParent();
-  }
-errs() << "****** GenF:\n" << *(CI->getParent()->getParent());
-
-}
-
-// DFG2LLVM_PROMISE - The first implementation.
-
-struct DFG2LLVM_PROMISE : public DFG2LLVM {
-  static char ID; // Pass identification, replacement for typeid
-  DFG2LLVM_PROMISE() : DFG2LLVM(ID) {}
-private:
-
-public:
-
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<BuildDFG>();
-    AU.addPreserved<BuildDFG>();
-  }
-
-  bool runOnModule(Module &M);
-};
-
-// Visitor for Code generation traversal (tree traversal for now)
-class CGT_PROMISE : public CodeGenTraversal {
-
-private:
-  //Member variables
-  std::ifstream qin;
-
-  // VISC Runtime API and Tensor runtime API
-  Constant* llvm_hpvm_initTensorRt;
-  Constant* llvm_hpvm_cleanupTensorRt;
-  Constant* hpvm_request_tensor;
-
-  // Functions
-
-  // Virtual Functions
-  void init();
-  void initRuntimeAPI();
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
-
-public:
-
-  // Constructor
-  CGT_PROMISE(Module &_M, BuildDFG &_DFG, std::string &_str) : CodeGenTraversal(_M, _DFG) {
-    qin.open(_str.c_str());
-    assert(qin && "Failed to open quantization levels input file\n"); 
-    initRuntimeAPI();
-  }
-
-  ~CGT_PROMISE() {
-    qin.close();
-  }
-
-};
-
-void CGT_PROMISE::init() {
-  // FIXME: what to do here? If anything?
-}
-
-// Initialize the VISC runtime API. This makes it easier to insert these calls
-void CGT_PROMISE::initRuntimeAPI() {
-
-  // Load Runtime API Module
-  SMDiagnostic Err;
-
-  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
-  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n");
-
-  // FIXME: set correct path
-  Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
-  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
-  if(runtimeModule == nullptr)
-    DEBUG(errs() << Err.getMessage());
-  else
-    DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
-
-  // Get or insert Global declarations for
-  // - initialization
-  // - cleanup
-  // - request a tensor
-  DECLARE(llvm_hpvm_initTensorRt);
-  DECLARE(llvm_hpvm_cleanupTensorRt);
-  DECLARE(hpvm_request_tensor);
-
-  // Find visc.init and visc.cleanup calls, and add placeholder methods
-  // for initialization and cleanup of the hpvm tensor runtime
-
-  Function* VI = M.getFunction("llvm.visc.init");
-  assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n");
-  InitCall = cast<Instruction>(*VI->user_begin());
-  CallInst::Create(llvm_hpvm_initTensorRt,
-                   ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
-                   "", InitCall);
-
-  Function* VC = M.getFunction("llvm.visc.cleanup");
-  assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n");
-  CleanupCall = cast<Instruction>(*VC->user_begin());
-  CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall);
-
-}
-
-void CGT_PROMISE::codeGen(DFInternalNode* N) {
-  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs () << "Skipping internal node\n";
-}
-
-void CGT_PROMISE::codeGen(DFLeafNode* N) {
-
-  // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
-    DEBUG(errs() << "Skipping dummy node\n");
-    return;
-  }
-
-  // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
-    assert(false && "Allocation Node not expected in ApproxHPVM");
-    return;
-  }
-
-  // Generate code only if it has the right hint
-  if (!checkPreferredTarget(N, visc::PROMISE_TARGET)) {
-    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
-    return;
-  }
-
-  // Get the function associated with the dataflow node
-  Function *F = N->getFuncPointer();
-errs() << "Node Function: " << *F << "\n";
-  // Look up if we have visited this function before. If we have, then just
-  // get the cloned function pointer from DFNode. Otherwise, create the cloned
-  // function and add it to the DFNode GenFunc.
-  Function *F_promise = N->getGenFuncForTarget(visc::PROMISE_TARGET);
-
-  assert((F_promise == NULL) &&
-         "Error: Visiting a node for which code already generated");
-
-  // Clone the function
-  ValueToValueMapTy VMap;
-  std::string FName(F->getName().data());//Twine FName = F->getName();
-  F_promise = CloneFunction(F, VMap);
-  F_promise->setName(FName+"_promise");
-  F_promise->removeFromParent();
-  M.getFunctionList().push_back(F_promise);
-
-  N->addGenFunc(F_promise, visc::PROMISE_TARGET, true);
-
-  /* Removing HPVM in/out/inout function attributes */
-  for(Function::arg_iterator ai = F_promise->arg_begin(), ae = F_promise->arg_end();
-      ai != ae; ai++){
-    Argument *Arg = &*ai;
-    if(Arg->hasAttribute(Attribute::In))
-      Arg->removeAttr(Attribute::In);
-    if(Arg->hasAttribute(Attribute::Out))
-      Arg->removeAttr(Attribute::Out);
-    if(Arg->hasAttribute(Attribute::InOut))
-      Arg->removeAttr(Attribute::InOut);    
-  }
-
-  // Adding nounwind to generated function : FIXME: needed?
-  DEBUG(errs() << "Adding nounwind to generated function\n");
-  F_promise->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
-
-  // Add llvm_visc_requestTensor calls for every pointer argument of the function
-  // (they are all expected to be tensors), at the beginning of the function.
-  // This is the first instruction of the function, insert them before this
-  Instruction* FI = &*(F_promise->getEntryBlock().begin());
-
-  // FIXME: verify that we want 0 as a target device
-  // In this backend, the target device is CPU, represented by i32 0.
-  ConstantInt *TargetDeviceID =
-    ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
-
-  for (Function::arg_iterator ai = F_promise->arg_begin(),
-       ae = F_promise->arg_end(); ai != ae; ++ai) {
-    Argument* Arg = &*ai;
-    if (Arg->getType()->isPointerTy()) {
-      Value *Args[] = {Arg, TargetDeviceID};
-      CallInst::Create(hpvm_request_tensor,
-                       ArrayRef<Value*>(Args, 2),
-                       "", FI);
-    }
-  }
-
-  CodeGenStateMachine CGM(&M, runtimeModule.get(), qin);
-
-  /* An assumption is made for the PROMISE simulator:                         *
-   * a leaf node will contain consequtive operations that will map to a       *
-   * single PROMISE simulator call                                            */
-
-  for (inst_iterator i = inst_begin(F_promise), e = inst_end(F_promise);
-       i != e; ++i) {
-    Instruction *I = &(*i);
-    CGM.transition(dyn_cast<IntrinsicInst>(I));
-  }
-
-  CGM.codeGen();
-
-//errs() << "-----------------------------------\n";
-//errs() << *F_promise << "\n";
-
-  return;
-}
-
-bool DFG2LLVM_PROMISE::runOnModule(Module &M) {
-  errs() << "\nDFG2LLVM_PROMISE PASS\n";
-
-  errs() << QuantizationInputsFilename << "\n";
-  
-//  std::ifstream qin(quantizationInputsFilename_cstr);
-//  std::ifstream qin;
-//  qin.open(QuantizationInputsFilename.c_str());
-//  qin.open(QuantizationInputsFilename.c_str(), std::ifstream::in);
-
-  // Get the BuildDFG Analysis Results:
-  // - Dataflow graph
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
- 
-  // Visitor for Code Generation Graph Traversal
-  CGT_PROMISE *CGTVisitor = new CGT_PROMISE(M, DFG, QuantizationInputsFilename);
-
-  // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
-    // Initiate code generation for root DFNode
-    CGTVisitor->visit(rootNode);
-  }
-
-  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
-  delete CGTVisitor;
-
-  return true;
-}
-
-
-/******************************************************************************
- *                              Helper functions                              *
- ******************************************************************************/
-
-} // End of namespace
-
-char DFG2LLVM_PROMISE::ID = 0;
-static RegisterPass<DFG2LLVM_PROMISE> X("dfg2llvm-promise",
-                                      "Dataflow Graph to LLVM for PROMISE Pass",
-                                      false /* does not modify the CFG */,
-                                      true /* transformation,   *
-                                            * not just analysis */);
-
diff --git a/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports b/llvm/lib/Transforms/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/llvm/lib/Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt b/llvm/lib/Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt
deleted file mode 100644
index 714ad14f18..0000000000
--- a/llvm/lib/Transforms/DFG2LLVM_PROMISE/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = DFG2LLVM_PROMISE
-parent = Transforms
diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt
deleted file mode 100644
index 22c219d0a1..0000000000
--- a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( LLVMDFG2LLVM_WrapperAPI
-  DFG2LLVM_WrapperAPI.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
deleted file mode 100644
index c54dd9ef3b..0000000000
--- a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.cpp
+++ /dev/null
@@ -1,1530 +0,0 @@
-//=== DFG2LLVM_WrapperAPI.cpp ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-#define ENABLE_ASSERTS
-
-#define DEBUG_TYPE "DFG2LLVM_WrapperAPI"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker/Linker.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm-c/Core.h"
-#include "llvm/SupportVISC/VISCTimer.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h"
-#include <sstream>
-#include <fstream>
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-
-using namespace inplacedfg;
-
-namespace {
-
-cl::opt<std::string> QuantizationInputsFilename(
-  "quantization-levels-filename",
-  cl::desc("<PROMISE quantization levels input file (path)>"),
-  cl::value_desc("filename"),
-  cl::Required);
-
-cl::opt<std::string> ConfigurationInputsFilename(
-  "configuration-inputs-filename",
-  cl::desc("<Autotuner configurations input file (path)>"),
-  cl::value_desc("filename"),
-  cl::Required);
-
-// Helper function declarations
-bool isValidOperandForInPlaceOperation(Value *, Function *, DFNode *,
-                                       InPlaceDFGAnalysis::InPlaceDFGParameter &);
-
-// Helper class declarations
-
-// State machine definition for pattern identification
-
-/* An assumption is made for the Wrapper API input:                           *
- * a leaf node will contain consequtive operations that will map to a         *
- * single convolution or fully connected layer, or a single tensor operation. *
-
- * FullyConnectedLayer: Multiply, Add, [Activation]                           *
- * ConvolutionLayer: Convolution, [Add], [Activation], [Pooling]              */
-
-class AbstractState;
-
-class CodeGenStateMachine {
-private:
-  Module *M;
-  Module *RtM;
-
-  std::vector<Value*> Args;
-  std::vector<IntrinsicInst*> IIs;
-  AbstractState *current;
-
-public:
-  CodeGenStateMachine(Module *, Module *);
-
-  void setCurrent(AbstractState *s) {
-    current = s;
-  }
-
-  void transition(IntrinsicInst *II);
-
-  Module *getModule() {
-    return M;
-  }
-
-  void addArgument(Value *Arg) {
-    Args.push_back(Arg);
-  }
-
-  void addIntrinsicInst(IntrinsicInst *II) {
-    IIs.push_back(II);
-  }
-
-  IntrinsicInst *getIntrinsicInstAt(unsigned idx) {
-    return IIs[idx];
-  }
-
-  void codeGen(DFNode *, Function * , const StringRef &,
-               InPlaceDFGAnalysis::InPlaceDFGParameter &);
-
-};
-
-class AbstractState {
-public:
-  enum ID
-  {
-    INITIAL_STATE,
-    FULLY_CONNECTED_LAYER_1,
-    FULLY_CONNECTED_LAYER_2,
-    FULLY_CONNECTED_LAYER_3,
-    FULLY_CONNECTED_LAYER,
-    CONVOLUTION_LAYER_1,
-    CONVOLUTION_LAYER_2,
-    CONVOLUTION_LAYER_3,
-    CONVOLUTION_LAYER_4,
-    CONVOLUTION_LAYER,
-    SINGLE_TENSOR_OPERATION,
-    NO_PATTERN,
-  };
-
-protected:
-  enum ID StateID;
-
-public:
-  enum ID getStateID() {
-    return StateID;
-  }
-
-  virtual void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) = 0;
-  virtual ~AbstractState() {}
-};
-
-class InitialState : public AbstractState {
-public:
-  InitialState() {
-    StateID = ID::INITIAL_STATE;
-    DEBUG(errs() << "new InitialState\n");
-  }
-  ~InitialState() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer_1 : public AbstractState {
-public:
-  FullyConnectedLayer_1() {
-    StateID = ID::FULLY_CONNECTED_LAYER_1;
-    DEBUG(errs() << "new FullyConnectedLayer_1\n");
-  }
-  ~FullyConnectedLayer_1() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer_2 : public AbstractState {
-public:
-  FullyConnectedLayer_2() {
-    StateID = ID::FULLY_CONNECTED_LAYER_2;
-    DEBUG(errs() << "new FullyConnectedLayer_2\n");
-  }
-  ~FullyConnectedLayer_2() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer_3 : public AbstractState {
-public:
-  FullyConnectedLayer_3() {
-    StateID = ID::FULLY_CONNECTED_LAYER_3;
-    DEBUG(errs() << "new FullyConnectedLayer_3\n");
-  }
-  ~FullyConnectedLayer_3() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer : public AbstractState {
-public:
-  FullyConnectedLayer() {
-    StateID = ID::FULLY_CONNECTED_LAYER;
-    DEBUG(errs() << "new FullyConnectedLayer\n");
-  }
-  ~FullyConnectedLayer() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_1 : public AbstractState {
-public:
-  ConvolutionLayer_1() {
-    StateID = ID::CONVOLUTION_LAYER_1;
-    DEBUG(errs() << "new ConvolutionLayer_1\n");
-  }
-  ~ConvolutionLayer_1() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_2 : public AbstractState {
-public:
-  ConvolutionLayer_2() {
-    StateID = ID::CONVOLUTION_LAYER_2;
-    DEBUG(errs() << "new ConvolutionLayer_2\n");
-  }
-  ~ConvolutionLayer_2() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_3 : public AbstractState {
-public:
-  ConvolutionLayer_3() {
-    StateID = ID::CONVOLUTION_LAYER_3;
-    DEBUG(errs() << "new ConvolutionLayer_3\n");
-  }
-  ~ConvolutionLayer_3() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_4 : public AbstractState {
-public:
-  ConvolutionLayer_4() {
-    StateID = ID::CONVOLUTION_LAYER_4;
-    DEBUG(errs() << "new ConvolutionLayer_4\n");
-  }
-  ~ConvolutionLayer_4() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer : public AbstractState {
-public:
-  ConvolutionLayer() {
-    StateID = ID::CONVOLUTION_LAYER;
-    DEBUG(errs() << "new ConvolutionLayer\n");
-  }
-  ~ConvolutionLayer() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class SingleTensorOperation : public AbstractState {
-public:
-  SingleTensorOperation() {
-    StateID = ID::SINGLE_TENSOR_OPERATION;
-    DEBUG(errs() << "new SingleTensorOperation\n");
-  }
-  ~SingleTensorOperation() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class NoPattern : public AbstractState {
-public:
-  NoPattern() {
-    StateID = ID::NO_PATTERN;
-    DEBUG(errs() << "new NoPattern\n");
-  }
-  ~NoPattern() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_convolution:
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->addArgument(II->getOperand(0)); // conv input
-        Mch->addArgument(II->getOperand(1)); // conv kernel
-
-        Mch->setCurrent(new ConvolutionLayer_1());
-        }
-        break;
-      case Intrinsic::visc_tensor_mul:
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->addArgument(II->getOperand(0)); // 1st gemm input
-        Mch->addArgument(II->getOperand(1)); // 2nd gemm input
-
-        Mch->setCurrent(new FullyConnectedLayer_1());
-        }
-        break;
-      default: // Other HPVM intrinsic
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->setCurrent(new SingleTensorOperation());
-        }
-        break;
-    }
-    delete this;
-  } // else {} // No HPVM intrinsic received. Remain at initial 
-}
-
-void SingleTensorOperation::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    Mch->setCurrent(new NoPattern());
-    delete this;
-  }
-}
-
-void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_add:
-        {
-        IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0);
-        assert((MulII == II->getOperand(0)) &&
-               "Output of mul must be used as 1st operand of add");
-        Mch->addIntrinsicInst(II);
-
-        Mch->addArgument(II->getOperand(1));     // bias
-
-        Mch->setCurrent(new FullyConnectedLayer_2());
-        }
-        break;
-      default:
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else {
-    Mch->setCurrent(new NoPattern());
-  }
-  delete this;
-}
-
-void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_tanh:
-        {
-        // Type of activation : TanH
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_relu:
-        {
-        // Type of activation : ReLU
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_clipped_relu:
-        {
-        // Type of activation : Clipped ReLU
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        }
-        break;
-      default: // No activation, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else { // End of instruction stream
-    // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-
-    Mch->setCurrent(new FullyConnectedLayer());
-  }
-  delete this;
-}
-
-void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (!II) { // End of instruction stream
-    Mch->setCurrent(new FullyConnectedLayer());
-  } else {
-    Mch->setCurrent(new NoPattern());
-  }
-  delete this;
-}
-
-void FullyConnectedLayer::transition(CodeGenStateMachine *Mch,
-                                     IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    Mch->setCurrent(new NoPattern());
-    delete this;
-  }
-}
-
-void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_add:
-        {
-        IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
-        assert((ConvII == II->getOperand(0)) &&
-               "Output of conv must be used as 1st operand of add");
-        Mch->addIntrinsicInst(II);
-
-        Mch->addArgument(II->getOperand(1));     // bias
-
-        Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
-
-        Mch->setCurrent(new ConvolutionLayer_2());
-        }
-        break;
-      default:
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else {
-    // No addition
-    Mch->addArgument(ConstantPointerNull::get(
-                     Type::getInt8PtrTy(Mch->getModule()->getContext())));
-
-    // Zero for all convolution numeric arguments FIXME???
-    IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
-    Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv
-    Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv
-    Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv
-    Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-
-    // No pooling
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // 0 for unused pool argument
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-
-    Mch->setCurrent(new ConvolutionLayer());
-  }
-  delete this;
-}
-
-void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_tanh:
-        {
-        // Type of activation : TanH
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_relu:
-        {
-        // Type of activation : ReLU
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_clipped_relu:
-        {
-        // Type of activation : Clipped ReLU
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_max:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_min:
-        {
-        // pool min FIXME: 2: supported?
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_mean:
-        {
-        // pool mean
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      default: // No activation, No pooling, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else { // End of instruction stream
-    // No pooling
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // 0 for unused pool argument
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-
-    Mch->setCurrent(new ConvolutionLayer());
-  }
-  delete this;
-}
-
-void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_pool_max:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::visc_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::visc_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_min:
-        {
-        // pool min FIXME: 2: supported?
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::visc_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::visc_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_mean:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::visc_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::visc_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      default: // No pooling, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else { // End of instruction stream
-    // No pooling
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // 0 for unused pool argument
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-
-    // Revisit last intrinsic, to add argument for activation operation
-    IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-    // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-    Intrinsic::ID ActIID = ActII->getIntrinsicID();
-    if (ActIID == Intrinsic::visc_tensor_tanh) {
-      Mch->addArgument(ConstantInt::get(
-                       Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-      } else if (ActIID == Intrinsic::visc_tensor_relu) {
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-      } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-      }
-
-     Mch->setCurrent(new ConvolutionLayer());
-  }
-  delete this;
-}
-
-void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (!II) { // End of instruction stream
-    Mch->setCurrent(new ConvolutionLayer());
-  } else {
-    Mch->setCurrent(new NoPattern());
-  }
-  delete this;
-}
-
-void ConvolutionLayer::transition(CodeGenStateMachine *Mch,
-                                  IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    Mch->setCurrent(new NoPattern());
-    delete this;
-  }
-}
-
-void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {}
-
-CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM) :
-  M(_M), RtM(_RtM) {
-  current = new InitialState();
-}
-
-void CodeGenStateMachine::transition(IntrinsicInst *II) {
-  current->transition(this, II);
-}
-
-void CodeGenStateMachine::codeGen(DFNode *N, Function *F, const StringRef &strRef,
-                                  InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) {
-
-  assert( ( (current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) ||
-            (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)     ||
-            (current->getStateID() == AbstractState::ID::SINGLE_TENSOR_OPERATION) ) &&
-          "Unsupported instruction sequence for the Wrapper API.\n" );
-
-  if ((current->getStateID() == AbstractState::ID::FULLY_CONNECTED_LAYER) ||
-      (current->getStateID() == AbstractState::ID::CONVOLUTION_LAYER)) {
-    // Layer Operation.
-    DEBUG(errs() << "Layer Instruction Sequence. Validating ...\n");
-    // We have a valid instruction sequence.
-    // Make sure that the instruction sequence can be traslated:
-    // each instruction's result must be used only by the next one in sequence.
-  
-    for (unsigned p = 0; p < IIs.size()-1; p++) {
-      IntrinsicInst *II = IIs[p];
-      assert((II->hasOneUse()) &&
-            "Instruction sequence does not fit pattern: not single use\n");
-  
-      Value::user_iterator ui = II->user_begin(); // The only use
-      assert((*ui == IIs[p+1]) &&
-             "Instruction sequence does not fit pattern: not used by next instruction\n");
-    }
-
-    // Create corresponding wrapper API call
-    CallInst *CI;
-    switch (current->getStateID()) {
-      case AbstractState::ID::CONVOLUTION_LAYER:
-        {
-          Constant* wrapper_ConvLayer =
-            M->getOrInsertFunction(StringRef("wrapper_ConvLayer"),
-                   RtM->getFunction(StringRef("wrapper_ConvLayer"))->getFunctionType());
-          DEBUG(errs() << *wrapper_ConvLayer);
-  
-          // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0
-          Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0));
-          Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0));
-
-
-          // Create string for node name, as first argument for wrapper API call
-          Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                              strRef, true);
-          GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                                 true, GlobalValue::ExternalLinkage, ConstArray, "");
-
-          // Create GEP expression to access it
-          Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-          Constant* GEPIndices[] = { Int_0, Int_0 };
-          Constant* GEPConst =
-            ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                           GV, GEPIndices);
-
-          std::vector<Value*> UpdatedArgs;
-          UpdatedArgs.push_back(GEPConst);
-          for (unsigned i = 0; i < Args.size(); i++) {
-            UpdatedArgs.push_back(Args[i]);
-          }
-          // Create wrapper API function call
-          CI = CallInst::Create(wrapper_ConvLayer, UpdatedArgs, "");
-        }
-        break;
-      case AbstractState::ID::FULLY_CONNECTED_LAYER:
-        {
-          Constant* wrapper_FCLayer =
-            M->getOrInsertFunction(StringRef("wrapper_FCLayer"),
-                RtM->getFunction(StringRef("wrapper_FCLayer"))->getFunctionType());
-          DEBUG(errs() << *wrapper_FCLayer);
-  
-          // FIXME: get last (float) arguments from clipped relu intrinsic. For now, 0
-          Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0));
-          Args.push_back(ConstantFP::get(Type::getFloatTy(M->getContext()), (double) 0));
-
-          // Create string for node name, as first argument for wrapper API call
-          Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                              strRef, true);
-          GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                                 true, GlobalValue::ExternalLinkage, ConstArray, "");
-
-          // Create GEP expression to access it
-          Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-          Constant* GEPIndices[] = { Int_0, Int_0 };
-          Constant* GEPConst =
-            ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                           GV, GEPIndices);
-
-          std::vector<Value*> UpdatedArgs;
-          UpdatedArgs.push_back(GEPConst);
-          for (unsigned i = 0; i < Args.size(); i++) {
-            UpdatedArgs.push_back(Args[i]);
-          }
-
-          // Create wrapper API function call
-          CI = CallInst::Create(wrapper_FCLayer, UpdatedArgs, "");
-        }
-        break;
-      default:
-        llvm_unreachable("Unexpected CodeGenStateMachine State\n");
-        break;
-    }
-
-    // Insert new call and replace all uses of pattern result with
-    // the wrapper API call
-    IntrinsicInst *IIlast = *(IIs.rbegin());
-    CI->insertBefore(IIlast);
-    IIlast->replaceAllUsesWith(CI);
-
-  } else { // SINGLE_TENSOR_OPERATION
-    assert((IIs.size() == 1) &&
-            "Unexpected size of intrinsics vector in code gen state machine.\n");
-    assert(Args.empty() && "Unexpected arguments found in coge gen state machine.\n");
-    IntrinsicInst *TensorII = IIs[0];
-errs() << "TensorII: " << *TensorII << "\n";
-
-    switch (TensorII->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_group_convolution:
-      { /* llvm.hpvm.tensor.group.conv */
-        // Tensor group conv is not in place.
-        DEBUG(errs() << F->getName() << "\t: Handling tensor group convolution \n");
-
-        // Argument list for the runtime call
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-        Args.push_back(TensorII->getOperand(1));
-        Args.push_back(TensorII->getOperand(2));
-        Args.push_back(TensorII->getOperand(3));
-        Args.push_back(TensorII->getOperand(4));
-        Args.push_back(TensorII->getOperand(5));
-
-        Constant *conv_mode = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1);
-        Args.push_back(conv_mode);
-
-        Args.push_back(TensorII->getOperand(7));
-    
-        // Create wrapper API runtime function call
-        Constant* wrapper_tensorGroupConvolution =
-          M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"),
-            RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType());
-        CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution,
-                                        Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.mul with the runtime call
-        TensorII->replaceAllUsesWith(CI);
-      }
-      break;
-
-      case Intrinsic::visc_tensor_batchnorm:
-      { /* llvm.hpvm.tensor.batchnorm */
-        // Tensor batchnorm is not in place.
-    // FIXME: Add Check for InPlace Analysis 
-        DEBUG(errs() << F->getName() << "\t: Handling tensor batch normalization \n");
-
-        // Argument list for the runtime call
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-        Args.push_back(TensorII->getOperand(1));
-        Args.push_back(TensorII->getOperand(2));
-        Args.push_back(TensorII->getOperand(3));
-        Args.push_back(TensorII->getOperand(4));
-        Args.push_back(TensorII->getOperand(5));
-
-        // Create wrapper API runtime function call
-        Constant* wrapper_tensorBatchNorm =
-          M->getOrInsertFunction(StringRef("wrapper_tensorBatchNorm"),
-            RtM->getFunction(StringRef("wrapper_tensorBatchNorm"))->getFunctionType());
-        CallInst* CI = CallInst::Create(wrapper_tensorBatchNorm,
-                                        Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.batchnorm with the wrapper API call
-        TensorII->replaceAllUsesWith(CI);
-      }
-      break;
-
-      case Intrinsic::visc_tensor_add:
-      { /* llvm.hpvm.tensor.add */
-        DEBUG(errs() << F->getName() << "\t: Handling tensor add\n");
-        // Tensor add(a,b) is in place for argument a.
-//        Value *Op = TensorII->getOperand(0);
-
-        // Test the intrinsic operand for in place operation.
-//        bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
-
-        // Code generation will not continue if this is false, because the target
-        // may provide an in place operation(safe choice)
-        // FIXME: remove this comment - must check for in-place
-//        assert(inplace &&
-//               "Operand not valid for in place operation. Code gen aborted.\n");
-
-
-        // Argument list for the runtime call
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-        Args.push_back(TensorII->getOperand(1));
-
-        // Create wrapper API runtime function call
-        Constant* wrapper_tensorAdd =
-          M->getOrInsertFunction(StringRef("wrapper_tensorAdd"),
-            RtM->getFunction(StringRef("wrapper_tensorAdd"))->getFunctionType());
-        CallInst::Create(wrapper_tensorAdd, Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.add with the 1st argument
-        // that, due to in place operation, now contains the result
-        TensorII->replaceAllUsesWith(TensorII->getOperand(0));
-      }
-      break;
-
-      case Intrinsic::visc_tensor_pool_max:
-      case Intrinsic::visc_tensor_pool_mean:
-      case Intrinsic::visc_tensor_pool_min:
-      {
-        DEBUG(errs() << F->getName() << "\t: Handling tensor pooling functions\n");
-
-        // Argument list for tensor pooling:
-        // input, poolFunction, window_height, window_width,
-        // vertical_pad, horizontal_pad, vertical_stride, horizontal_stride
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-
-        int pool_type = 0;
-        if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_pool_max) {
-          pool_type = 0;
-        }
-        if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_pool_mean) {
-          pool_type = 1;
-        }
-        if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_pool_min) {
-          pool_type = 2;
-        }
-
-        Constant *constPoolType =
-          ConstantInt::get(Type::getInt32Ty(M->getContext()), pool_type);
-        Args.push_back(constPoolType);
-
-        Args.push_back(TensorII->getOperand(1));
-        Args.push_back(TensorII->getOperand(2));
-        Args.push_back(TensorII->getOperand(3));
-        Args.push_back(TensorII->getOperand(4));
-        Args.push_back(TensorII->getOperand(5));
-        Args.push_back(TensorII->getOperand(6));
-
-        // Create wrapper API runtime function call
-        Constant* wrapper_tensorPooling =
-          M->getOrInsertFunction(StringRef("wrapper_tensorPooling"),
-            RtM->getFunction(StringRef("wrapper_tensorPooling"))->getFunctionType());
-        DEBUG(errs() << *wrapper_tensorPooling);
-        CallInst* CI = CallInst::Create(wrapper_tensorPooling, Args, "", TensorII);
-
-        // Replacing intrinsic result uses with the result of the tensor runtime operation
-        TensorII->replaceAllUsesWith(CI);
-      }
-      break;
-
-      case Intrinsic::visc_tensor_relu:
-      case Intrinsic::visc_tensor_clipped_relu:
-      case Intrinsic::visc_tensor_tanh:
-      {
-        DEBUG(errs() << F->getName() << "\t: Handling tensor activation functions\n");
-
-        // Tensor relu(a) (and others) is in place for argument a.
-        Value *Op = TensorII->getOperand(0);
-
-        // Test the intrinsic operand for in place operation.
-        bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
-        // Code generation will not continue if this is false, because the target
-        // may provide an in place operation(safe choice)
-        assert(inplace &&
-               "Operand not valid for in place operation. Code gen aborted.\n");
-
-        // Argument list for the runtime call
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-
-        if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_relu) {
-              // Create wrapper API runtime function call
-              Constant* wrapper_tensorRelu =
-                M->getOrInsertFunction(StringRef("wrapper_tensorRelu"),
-                  RtM->getFunction(StringRef("wrapper_tensorRelu"))->getFunctionType());
-              DEBUG(errs() << *wrapper_tensorRelu);
-              CallInst::Create(wrapper_tensorRelu, Args, "", TensorII);
-        }
-        else if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu) {
-              // Create wrapper API runtime function call
-              Constant* wrapper_tensorClippedRelu =
-                M->getOrInsertFunction(StringRef("wrapper_tensorClippedRelu"),
-                  RtM->getFunction(StringRef("wrapper_tensorClippedRelu"))->getFunctionType());
-              DEBUG(errs() << *wrapper_tensorClippedRelu);
-              CallInst::Create(wrapper_tensorClippedRelu, Args, "", TensorII);
-        }
-        else if (TensorII->getIntrinsicID() == Intrinsic::visc_tensor_tanh) {
-              // Create wrapper API runtime function call
-              Constant* wrapper_tensorTanh =
-                M->getOrInsertFunction(StringRef("wrapper_tensorTanh"),
-                  RtM->getFunction(StringRef("wrapper_tensorTanh"))->getFunctionType());
-              DEBUG(errs() << *wrapper_tensorTanh);
-              CallInst::Create(wrapper_tensorTanh, Args, "", TensorII);
-        }
-     
-        // We can replace the call to hpvm.tensor.{relu,clipped relu, tanh}
-        //  with the 1st argument that, due to in place operation,
-        // now contains the result
-        TensorII->replaceAllUsesWith(TensorII->getOperand(0));
-      }
-      break;
-
-      case Intrinsic::visc_tensor_softmax:
-      { /* llvm.visc.tensor.softmax */
-
-        DEBUG(errs() << F->getName() << "\t: Handling tensor softmax\n");
-        // Tensor softmax(a) is in place for argument a.
-        Value *Op = TensorII->getOperand(0);
-
-        // Test the intrinsic operand for in place operation.
-        bool inplace = isValidOperandForInPlaceOperation(Op, F, N, IPP);
-        // Code generation will not continue if this is false, because the target
-        // may provide an in place operation(safe choice)
-        assert(inplace &&
-               "Operand not valid for in place operation. Code gen aborted.\n");
-
-        // Argument list for the runtime call
-
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        Args.push_back(TensorII->getOperand(0));
-
-        // Create wrapper API runtime function call
-        Constant* wrapper_tensorSoftmax =
-          M->getOrInsertFunction(StringRef("wrapper_tensorSoftmax"),
-                 RtM->getFunction(StringRef("wrapper_tensorSoftmax"))->getFunctionType());
-        DEBUG(errs() << *wrapper_tensorSoftmax);
-        CallInst::Create(wrapper_tensorSoftmax, Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.softmax with the 1st argument
-        // that, due to in place operation, now contains the result
-        TensorII->replaceAllUsesWith(TensorII->getOperand(0));
-      }
-      break;
-/*
-      case Intrinsic::visc_image_fft_transform:
-      { // llvm.hpvm.image.fft.transform - Or another image intrinsic
-        // All will be treated as not in place
-        DEBUG(errs() << F->getName() << "\t: Handling fft transform \n");
-
-        // Create argument list for the runtime call - stored in Args
-
-        // All interfaces will have a string as first argument, which will be
-        // used to identify the dataflow node at runtime
-        // Create string for node name, as first argument for wrapper API call
-        Constant *ConstArray = ConstantDataArray::getString(M->getContext(),
-                                                            strRef, true);
-        GlobalVariable *GV = new GlobalVariable(*M,ConstArray->getType(),
-                               true, GlobalValue::ExternalLinkage, ConstArray, "");
-        // Create GEP expression to access it
-        Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M->getContext()), 0);
-        Constant* GEPIndices[] = { Int_0, Int_0 };
-        Constant* GEPConst =
-          ConstantExpr::getGetElementPtr(GV->getType()->getPointerElementType(),
-                                         GV, GEPIndices);
-
-        Args.push_back(GEPConst);
-
-        // Here, use you will access the appropriate arruments of the intrinsic
-        // and push_back, in order to create the argument list of runtime call
-        Args.push_back(TensorII->getOperand(0));
-        Args.push_back(TensorII->getOperand(1));
-        Args.push_back(TensorII->getOperand(2));
-        Args.push_back(TensorII->getOperand(3));
-        Args.push_back(TensorII->getOperand(4));
-        Args.push_back(TensorII->getOperand(5));
-
-        Constant *conv_mode = ConstantInt::get(Type::getInt32Ty(M->getContext()), 1);
-        Args.push_back(conv_mode);
-
-        Args.push_back(TensorII->getOperand(7));
-
-        // Done with argument list.
-
-        // Create wrapper API runtime function call
-        // Appropriately set the name of the function of the runtime that you
-        // want to call
-        // Note: the Constant * is what we need to pass to the callInst.
-        // This name does not have to match, but does so for similarity.
-        Constant* wrapper_tensorGroupConvolution;
-          M->getOrInsertFunction(StringRef("wrapper_tensorGroupConvolution"),
-            RtM->getFunction(StringRef("wrapper_tensorGroupConvolution"))->getFunctionType());
-        CallInst* CI = CallInst::Create(wrapper_tensorGroupConvolution,
-                                        Args, "", TensorII);
-        // We can replace the call to hpvm.tensor.xxx with the runtime call
-        TensorII->replaceAllUsesWith(CI);
-      }
-      break;
-
-*/
-      default:
-        llvm_unreachable("Unknown VISC Intrinsic!");
-        break;
-    }
-
-  } // No other case exists, since assertion passed
-
-
-  // Remove the instructions we translated to the simulator call.
-  // Traverse the vector backwards, otherwise definitions are deleted while
-  // their subsequent uses are still around.
-  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs.rbegin(),
-       re = IIs.rend(); ri != re; ++ri) {
-    DEBUG(errs() << "Erasing: " << **ri << "\n");
-    (*ri)->eraseFromParent();
-  }
-
-}
-
-// DFG2LLVM_WrapperAPI - The first implementation.
-
-struct DFG2LLVM_WrapperAPI : public DFG2LLVM {
-  static char ID; // Pass identification, replacement for typeid
-  DFG2LLVM_WrapperAPI() : DFG2LLVM(ID) {}
-private:
-
-public:
-
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<BuildDFG>();
-    AU.addRequired<InPlaceDFGAnalysisWrapper>();
-    AU.addPreserved<BuildDFG>();
-    AU.addPreserved<InPlaceDFGAnalysisWrapper>();
-  }
-
-  bool runOnModule(Module &M);
-};
-
-// Visitor for Code generation traversal (tree traversal for now)
-class CGT_WrapperAPI : public CodeGenTraversal {
-
-private:
-  //Member variables
-  unsigned nodeID; // Used as a node identifier
-
-  std::string QuantizationInputsFilenameStr;
-  std::string ConfigurationInputsFilenameStr;
-
-  InPlaceDFGAnalysis::InPlaceDFGParameter *IPP;
-
-  // VISC Runtime API and Tensor runtime API
-  Constant* llvm_hpvm_initApproxhpvmRt;
-  Constant* llvm_hpvm_cleanupApproxhpvmRt;
-  Constant* hpvm_request_tensor;
-
-  Constant* llvm_hpvm_initializeRuntimeController;
-  Constant* llvm_hpvm_clearRuntimeController;
-
-  // Functions
-
-  // Virtual Functions
-  void init();
-  void initRuntimeAPI();
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
-
-public:
-
-  // Constructor
-  CGT_WrapperAPI(Module &_M, BuildDFG &_DFG,
-    InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP,
-    std::string &_QuantizationInputsFilenameStr,
-    std::string &_ConfigurationInputsFilenameStr)
-  : CodeGenTraversal(_M, _DFG), IPP(&_IPP),
-    QuantizationInputsFilenameStr(_QuantizationInputsFilenameStr),
-    ConfigurationInputsFilenameStr(_ConfigurationInputsFilenameStr) {
-    nodeID = 0;
-    initRuntimeAPI();
-  }
-
-};
-
-
-void CGT_WrapperAPI::init() {
-  // FIXME: what to do here? If anything?
-}
-
-// Initialize the VISC runtime API. This makes it easier to insert these calls
-void CGT_WrapperAPI::initRuntimeAPI() {
-
-  // Load Runtime API Module
-  SMDiagnostic Err;
-
-  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
-  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n");
-
-  // FIXME: set correct path
-  Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
-  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
-  if(runtimeModule == nullptr)
-    DEBUG(errs() << Err.getMessage());
-  else
-    DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
-
-  // Get or insert Global declarations for
-  // - initialization
-  // - cleanup
-  // - request a tensor
-  DECLARE(llvm_hpvm_initApproxhpvmRt);
-  DECLARE(llvm_hpvm_cleanupApproxhpvmRt);
-  DECLARE(hpvm_request_tensor);
-
-  DECLARE(llvm_hpvm_initializeRuntimeController);
-  DECLARE(llvm_hpvm_clearRuntimeController);
-
-  // Find visc.init and visc.cleanup calls, and add placeholder methods
-  // for initialization and cleanup of the hpvm tensor runtime
-
-  Function* VI = M.getFunction("llvm.visc.init");
-  assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n");
-  InitCall = cast<Instruction>(*VI->user_begin());
-  CallInst::Create(llvm_hpvm_initApproxhpvmRt,
-                   ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
-                   "", InitCall);
-
-  StringRef QRangesStrRef = StringRef(QuantizationInputsFilenameStr);
-  // Create string for node name, as first argument for wrapper API call
-  Constant *ConstArray1 = ConstantDataArray::getString(M.getContext(),
-                                                       QRangesStrRef, true);
-  GlobalVariable *GV1 = new GlobalVariable(M,ConstArray1->getType(),
-                        true, GlobalValue::ExternalLinkage, ConstArray1, "");
-  // Create GEP expression to access it
-  Constant* Int_0 = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
-  Constant* GEPIndices[] = { Int_0, Int_0 };
-  Constant* QRangesGEPConst =
-    ConstantExpr::getGetElementPtr(GV1->getType()->getPointerElementType(),
-                                   GV1, GEPIndices);
-
-  StringRef ConfsStrRef = StringRef(ConfigurationInputsFilenameStr);
-  // Create string for node name, as first argument for wrapper API call
-  Constant *ConstArray2 = ConstantDataArray::getString(M.getContext(),
-                                                       ConfsStrRef, true);
-  GlobalVariable *GV2 = new GlobalVariable(M,ConstArray2->getType(),
-                        true, GlobalValue::ExternalLinkage, ConstArray2, "");
-  Constant* ConfsGEPConst =
-    ConstantExpr::getGetElementPtr(GV2->getType()->getPointerElementType(),
-                                   GV2, GEPIndices);
-  ArrayRef<Value*> RTCInitArgs = {ConfsGEPConst, QRangesGEPConst};
-  CallInst::Create(llvm_hpvm_initializeRuntimeController, RTCInitArgs, "", InitCall);
-
-  Function* VC = M.getFunction("llvm.visc.cleanup");
-  assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n");
-  CleanupCall = cast<Instruction>(*VC->user_begin());
-  CallInst::Create(llvm_hpvm_cleanupApproxhpvmRt, ArrayRef<Value*>(), "", CleanupCall);
-  CallInst::Create(llvm_hpvm_clearRuntimeController, ArrayRef<Value*>(), "", CleanupCall);
-
-}
-
-void CGT_WrapperAPI::codeGen(DFInternalNode* N) {
-  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs () << "Skipping internal node\n";
-}
-
-void CGT_WrapperAPI::codeGen(DFLeafNode* N) {
-
-  // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
-    DEBUG(errs() << "Skipping dummy node\n");
-    return;
-  }
-
-  // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
-    assert(false && "Allocation Node not expected in ApproxHPVM");
-    return;
-  }
-
-//  For wrapper API, we generate code for every leaf node.
-//  No need to check for hints from frontend
-//  // Generate code only if it has the right hint
-//  if (!checkPreferredTarget(N, visc::PROMISE_TARGET)) {
-//    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
-//    return;
-//  }
-
-  // Increment the node ID, for current node.
-  ++nodeID;
-
-  // Get the function associated with the dataflow node
-  Function *F = N->getFuncPointer();
-errs() << "Node Function: " << *F << "\n";
-  // Look up if we have visited this function before. If we have, then just
-  // get the cloned function pointer from DFNode. Otherwise, create the cloned
-  // function and add it to the DFNode GenFunc.
-  Function *F_wrapper_api = N->getGenFuncForTarget(visc::PROMISE_TARGET);
-
-  assert((F_wrapper_api == NULL) &&
-         "Error: Visiting a node for which code already generated");
-
-  // Clone the function
-  ValueToValueMapTy VMap;
-  std::string FName(F->getName().data());//Twine FName = F->getName();
-  F_wrapper_api = CloneFunction(F, VMap);
-  F_wrapper_api->setName(FName+"_wrapper_api");
-  F_wrapper_api->removeFromParent();
-  M.getFunctionList().push_back(F_wrapper_api);
-
-  N->addGenFunc(F_wrapper_api, visc::PROMISE_TARGET, true);
-
-  /* Removing HPVM in/out/inout function attributes */
-  for(Function::arg_iterator ai = F_wrapper_api->arg_begin(), ae = F_wrapper_api->arg_end();
-      ai != ae; ai++){
-    Argument *Arg = &*ai;
-    if(Arg->hasAttribute(Attribute::In))
-      Arg->removeAttr(Attribute::In);
-    if(Arg->hasAttribute(Attribute::Out))
-      Arg->removeAttr(Attribute::Out);
-    if(Arg->hasAttribute(Attribute::InOut))
-      Arg->removeAttr(Attribute::InOut);    
-  }
-
-  // Adding nounwind to generated function : FIXME: needed?
-  DEBUG(errs() << "Adding nounwind to generated function\n");
-  F_wrapper_api->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
-
-  // Add llvm_visc_requestTensor calls for every pointer argument of the function
-  // (they are all expected to be tensors), at the beginning of the function.
-  // This is the first instruction of the function, insert them before this
-  Instruction* FI = &*(F_wrapper_api->getEntryBlock().begin());
-
-  // FIXME: verify that we want 1 as a target device
-  // In this backend, the target device is GPU, represented by i32 1.
-  ConstantInt *TargetDeviceID =
-    ConstantInt::get(Type::getInt32Ty(M.getContext()), 1);
-
-  for (Function::arg_iterator ai = F_wrapper_api->arg_begin(),
-       ae = F_wrapper_api->arg_end(); ai != ae; ++ai) {
-    Argument* Arg = &*ai;
-    if (Arg->getType()->isPointerTy()) {
-      Value *Args[] = {Arg, TargetDeviceID};
-      CallInst::Create(hpvm_request_tensor,
-                       ArrayRef<Value*>(Args, 2),
-                       "", FI);
-    }
-  }
-
-  CodeGenStateMachine CGM(&M, runtimeModule.get());
-
-  for (inst_iterator i = inst_begin(F_wrapper_api), e = inst_end(F_wrapper_api);
-       i != e; ++i) {
-    Instruction *I = &(*i);
-    CGM.transition(dyn_cast<IntrinsicInst>(I));
-  }
-
-  errs() << "Node ID string: "<< StringRef(std::to_string(nodeID)) << "\n";
-  //CGM.codeGen(N, F_wrapper_api, N->getFuncPointer()->getName(), *IPP);
-  CGM.codeGen(N, F_wrapper_api, StringRef(std::to_string(nodeID)), *IPP);
-
-//errs() << "-----------------------------------\n";
-//errs() << *F_wrapper_api << "\n";
-
-  return;
-}
-
-bool DFG2LLVM_WrapperAPI::runOnModule(Module &M) {
-  errs() << "\nDFG2LLVM_WrapperAPI PASS\n";
-
-  // Get the BuildDFG Analysis Results:
-  // - Dataflow graph
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  // Get the In Place Analysis Results
-  InPlaceDFGAnalysis::InPlaceDFGParameter IPP =
-    (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
-  // Print results
-//  printInPlaceDFGParameter(IPP);
-
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
- 
-  // Visitor for Code Generation Graph Traversal
-  CGT_WrapperAPI *CGTVisitor = new CGT_WrapperAPI(M, DFG, IPP,
-                                            QuantizationInputsFilename,
-                                            ConfigurationInputsFilename);
-
-  // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
-    // Initiate code generation for root DFNode
-    CGTVisitor->visit(rootNode);
-  }
-
-  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
-  delete CGTVisitor;
-
-  return true;
-}
-
-
-/******************************************************************************
- *                              Helper functions                              *
- ******************************************************************************/
-
-/* Method needs to be called as part of an analysis pre-step, before code      *
- * generation is run on a node function, so that the HPVM intrinsics are still *
- * in place. */
-bool isValidOperandForInPlaceOperation(Value *Op, Function *Fgen, DFNode *N,
-                                       InPlaceDFGAnalysis::InPlaceDFGParameter &IPP) {
-
-  if (Argument *Arg = dyn_cast<Argument>(Op)) {
-    DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n");
-    assert((Arg->getParent() == Fgen) &&
-          "Extra Parameter in body of Function\n");
-    // Candidate parameter is a function argument
-    // In this case, consult the result of in place analysis
-    // Find position in arg list
-    unsigned pos = Arg->getArgNo();
-    // If this parameter cannot be used for in place operation
-    // code gen cannot continue
-    if (IPP.at(N)[pos]) {
-      DEBUG(errs() << *Arg << "\t: argument, suitable for in place\n");
-      return true;
-    } else {
-      DEBUG(errs() << *Arg << "\t: argument, not suitable for in place\n");
-      return false;
-    }
-  }
-  else {
-    // If it is not an argument, then it needs to be the result of
-    // another intrinsic. These are new objects that are allocated,
-    // and consumed by next intrinsic. 
-    DEBUG(errs() << *Op << "\t: Test for result of intrinsic operation\n");
-    if (dyn_cast<IntrinsicInst>(Op)) {
-      DEBUG(errs() << *Arg << "\t: local, suitable for in place\n");
-      return true;
-    } else {
-      DEBUG(errs() << *Arg << "\t: local, not suitable for in place\n");
-      return false;
-    }
-  }
-}
-
-} // End of namespace
-
-char DFG2LLVM_WrapperAPI::ID = 0;
-static RegisterPass<DFG2LLVM_WrapperAPI> X("dfg2llvm-wrapperapi",
-                                           "Dataflow Graph to LLVM for WrapperAPI Pass",
-                                           false /* does not modify the CFG */,
-                                           true  /* transformation,   *
-                                                 * not just analysis */);
-
diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/DFG2LLVM_WrapperAPI.exports
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt b/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt
deleted file mode 100644
index b4ebb8019d..0000000000
--- a/llvm/lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/DFG2LLVM_WrapperAPI/LLVMBuild.txt -------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = DFG2LLVM_WrapperAPI
-parent = Transforms
diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt b/llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt
deleted file mode 100644
index 374f3b26f1..0000000000
--- a/llvm/lib/Transforms/FuseHPVMTensorNodes/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( LLVMFuseHPVMTensorNodes
-  FuseHPVMTensorNodes.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
deleted file mode 100644
index d9a3c588b5..0000000000
--- a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.cpp
+++ /dev/null
@@ -1,971 +0,0 @@
-//===                        FuseHPVMTensorNodes.cpp                       ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "FuseTensorNodes"
-
-#include "llvm/IR/ValueMap.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-
-#include "llvm/FuseHPVMTensorNodes/FuseHPVMTensorNodes.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-#include "llvm/SupportVISC/VISCUtils.h"
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-using namespace viscUtils;
-
-namespace tensorfuse {
-/***                                Classes                                 ***/
-
-/***                            Helper Functions                            ***/
-
-/* Return the constant integer represented by value V */
-static unsigned getNumericValue(Value* V) {
-  assert(isa<ConstantInt>(V)
-         && "Value indicating the number of arguments should be a constant integer");
-  return cast<ConstantInt>(V)->getZExtValue();
-}
-
-/* Query the kind of edge described by a createEdge intrinsic IIe             *
- * with respect to node handle IIn                                            */
-static bool isIncomingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) {
-  Value* Src = IIe->getArgOperand(1);
-  IntrinsicInst* ArgII = cast<IntrinsicInst>(Src);
-  assert(ArgII && "First argument of createEdge is not an intrinsic");
-  return (ArgII == IIn);
-}
-static bool isOutgoingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) {
-  Value* Src = IIe->getArgOperand(0);
-  IntrinsicInst* ArgII = cast<IntrinsicInst>(Src);
-  assert(ArgII && "First argument of createEdge is not an intrinsic");
-  return (ArgII == IIn);
-}
-
-/* Populates vector with all incoming edge intrinsics to node II              */
-static void getIncomingEdgeIntrinsicList(IntrinsicInst *II,
-                                        std::vector<IntrinsicInst*> &EdgeList) {
-  for(Value::user_iterator ui = II->user_begin(),
-      ue = II->user_end(); ui!=ue; ++ui) {
-    IntrinsicInst* useI = dyn_cast<IntrinsicInst>(*ui);
-    assert(useI &&
-           "HPVM graph intrinsic used in non HPVM intrinsic instruction\n");
-    if (useI->getIntrinsicID() != Intrinsic::visc_createEdge)
-      continue; // Skip all non edge intrinsics
-
-    // For edge intrinsics, test the descination operand
-    if (useI->getOperand(1) == II) { // Argument is the destination
-      EdgeList.push_back(useI);
-    }
-  }
-  return;
-}
-
-/* Returns true if argument at position argno is coming from a dataflow edge  *
- * in the vector EdgeList                                                     */
-static bool isIncomingEdgeArgument(unsigned argno,
-                                   std::vector<IntrinsicInst*> &EdgeList) {
-  for (IntrinsicInst *ii : EdgeList) {
-    if (getNumericValue(ii->getOperand(4)) == argno)
-      return true;
-  }
-  return false;
-}
-
-// Check that this is a valid HPVM Tensor Node (starts with an HPVM intrinsic)
-// Return the node intrinsic function
-static IntrinsicInst *isValidHPVMTensorNode(DFNode *N) {
-  Function *F = N->getFuncPointer();
-  IntrinsicInst *II = dyn_cast<IntrinsicInst>(&*(inst_begin(F)));
-  assert(II &&
-         "HPVM tensor intrinsic expected as first instruction of HPVM tensor node\n");
-  assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor") &&
-         "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
-  return II;
-}
-
-// Returns the next node in a node sequence, or NULL if it does not exist.
-// We consider two nodes a sequence if SrcN has a single successor, DstN,
-// and DstN a single predeccessor, SrcN (other than the Root node)
-static DFNode *findNextNodeInSequence(DFNode *SrcN) {
-
-  DFNode *DstN = NULL;
-
-  for (DFNode::successor_iterator si = SrcN->successors_begin(),
-       se = SrcN->successors_end(); si != se; ++si) {
-    DFNode *N = *si;
-    if (N->isDummyNode()) {
-      continue;
-    }
-    if (!DstN)
-      DstN = N;
-    if (DstN != N) {
-      errs() << "Found different destination nodes: no node sequence.\n";
-      return NULL;
-    }
-  }
-
-  // If we reach this point, DstN is the unique successor of SrcN
-
-  // Now, test that the DstN has a single predeccessor except Root (dummy)
-  for (DFNode::indfedge_iterator eb = DstN->indfedge_begin(),
-       ee = DstN->indfedge_end(); eb != ee; ++eb) {
-    DFNode *SN = (*eb)->getSourceDF();
-    if ((SN != SrcN) && (!(SN->isDummyNode()))) {
-      // Does not satisfy requirement
-      return NULL;
-    }
-  }
-
-  return DstN;
-}
-
-/***                                Methods                                 ***/
-
-/* Create an identical bind (in or out, depending on the argument intrinsic)  *
- * with different src (true) or dst (false) port                              */
-IntrinsicInst* FuseHPVMTensorNodes::createIdenticalBindWithDifferentPort(
-                               IntrinsicInst* II, unsigned port, bool srcport) {
-  // Argument of the function to be called
-  ConstantInt* PortConstant =
-    ConstantInt::get(Type::getInt32Ty(II->getContext()), port);
-  Value* SrcPort = (srcport) ? PortConstant: II->getArgOperand(1);
-  Value* DstPort = (srcport) ? II->getArgOperand(2): PortConstant;
-
-  Value* BindArgs[] = {II->getArgOperand(0),
-                       SrcPort,
-                       DstPort,
-                       II->getArgOperand(3)
-                      };
-  Function* BindF = II->getCalledFunction();
-  CallInst* BindInst = CallInst::Create(BindF,
-                                        ArrayRef<Value*>(BindArgs, 4),
-                                        "");
-  IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst);
-
-  return newII;
-}
-
-/* Given two createNode intrinsics describing connected nodes, this function  *
- * returns the argument list type of the fused function                       */
-void FuseHPVMTensorNodes::createArgTypes(IntrinsicInst* II1,
-                                         IntrinsicInst* II2,
-                                         std::vector<Type*> &ArgTypes) {
-  Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts());
-  Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts());
-
-  // Arguments of the first node are simply added
-  for(auto& arg: F1->getArgumentList()) {
-    DEBUG(errs() << arg << "\n");
-    ArgTypes.push_back(arg.getType());
-  }
-
-  // Arguments of the second node are added only if they are not the output of
-  // the previous node
-
-  // Find all incoming edges.
-  std::vector<IntrinsicInst *> IncomingEdgeList;
-  getIncomingEdgeIntrinsicList(II2, IncomingEdgeList);
-
-  // Their source must be the first fusion node, otherwise they would not have
-  // been fusion candidates
-  for (IntrinsicInst *ii : IncomingEdgeList) {
-    assert((ii->getOperand(0) == II1) && "Unexpected source operand\n");
-  }
-
-  // Add argument type to the new function only if it is not incoming from
-  // an edge 
-  for(auto& arg: F2->getArgumentList()) {
-    DEBUG(errs() << arg << "\n");
-    unsigned inport = arg.getArgNo();
-    if (isIncomingEdgeArgument(inport, IncomingEdgeList))
-      continue;
-    ArgTypes.push_back(arg.getType());
-  }
-}
-
-/* Get the return type of the function for fused node II1-II2                 */
-StructType* FuseHPVMTensorNodes::createReturnType(IntrinsicInst* II1,
-                                                  IntrinsicInst* II2) {
-  Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts());
-  Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts());
-
-  // Based on the HPVM tensor node assumptions and the patterns we want to
-  // support, when two nodes are fused the result will always be the result
-  // of the second node.
-  StructType* F1RetTy = dyn_cast<StructType>(F1->getReturnType());
-  assert(F1RetTy && "Return Type must always be a struct");
-  StructType* F2RetTy = dyn_cast<StructType>(F2->getReturnType());
-  assert(F2RetTy && "Return Type must always be a struct");
-
-  return F2RetTy;
-}
-
-/* Copy argument names, from functions of II1 and II2 to F                    */
-void FuseHPVMTensorNodes::copyArgumentNames(IntrinsicInst* II1,
-                                            IntrinsicInst* II2,
-                                            Function* F) {
-  Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts());
-  Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts());
-
-  Function::arg_iterator dest_it = F->arg_begin();
-
-  // Argument names of the first node are simply copied
-  for(auto& arg: F1->getArgumentList()) {
-    dest_it->setName("s_" + arg.getName());
-    dest_it++;
-  }
-
-  // For the second node, we ignore those arguments that are incoming edges
-  // (from II1)
-  // Find all incoming edges.
-  std::vector<IntrinsicInst *> IncomingEdgeList;
-  getIncomingEdgeIntrinsicList(II2, IncomingEdgeList);
-
-  // Their source must be the first fusion node, otherwise they would not have
-  // been fusion candidates
-  for (IntrinsicInst *ii : IncomingEdgeList) {
-    assert((ii->getOperand(0) == II1) && "Unexpected source operand\n");
-  }
-
-  // Copy argument name to the new function only if it is not incoming from
-  // an edge 
-  for(auto& arg: F2->getArgumentList()) {
-    DEBUG(errs() << arg << "\n");
-    unsigned inport = arg.getArgNo();
-    if (isIncomingEdgeArgument(inport, IncomingEdgeList))
-      continue;
-
-    dest_it->setName("d_" + arg.getName());
-    dest_it++;
-  }
-  assert((dest_it == F->arg_end()) &&
-         "Argument list of fused function not fully traversed\n");
-  return;
-}
-
-/* Copy attributes, from functions of II1 and II2 to F                        */
-void FuseHPVMTensorNodes::copyAttrList(IntrinsicInst* II1,
-                                       IntrinsicInst* II2,
-                                       Function* F) {
-  Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts());
-  Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts());
-
-  Function::arg_iterator f1_ai = F1->arg_begin(), f1_ae = F1->arg_end();
-  Function::arg_iterator f2_ai = F2->arg_begin(), f2_ae = F2->arg_end();
-  Function::arg_iterator f_ai = F->arg_begin(), f_ae = F->arg_end();
-
-  // For the second node, we have to ignore the arguments that are incoming
-  // edges (from II1)
-  // Find all incoming edges.
-  std::vector<IntrinsicInst *> IncomingEdgeList;
-  getIncomingEdgeIntrinsicList(II2, IncomingEdgeList);
-
-  // Their source must be the first fusion node, otherwise they would not have
-  // been fusion candidates
-  for (IntrinsicInst *ii : IncomingEdgeList) {
-    assert((ii->getOperand(0) == II1) && "Unexpected source operand\n");
-  }
-
-  // Copy attributes of F1
-  for(; f1_ai != f1_ae && f_ai != f_ae; ++f1_ai, ++f_ai) {
-    AttributeSet AS = F1->getAttributes();
-    DEBUG(errs() << "Copying attributes from "
-                 << F1->getName() << " at " << f1_ai->getArgNo() << "\n");
-    AttrBuilder AB(AS, f1_ai->getArgNo()+1);
-    AttributeSet argAS = AttributeSet::get(F1->getContext(),
-                                           f_ai->getArgNo()+1, AB);
-    F->addAttributes(f_ai->getArgNo()+1, argAS);
-  }
-
-  // Copy needed attributes of F2
-  for(; f2_ai != f2_ae && f_ai != f_ae; ++f2_ai) {
-    unsigned inport = f2_ai->getArgNo();
-    if (isIncomingEdgeArgument(inport, IncomingEdgeList))
-      continue;
-
-    AttributeSet AS = F2->getAttributes();
-    DEBUG(errs() << "Copying attributes from "
-                 << F2->getName() << " at " << f2_ai->getArgNo() << "\n");
-    AttrBuilder AB(AS, f2_ai->getArgNo()+1);
-    AttributeSet argAS = AttributeSet::get(F2->getContext(),
-                                           f_ai->getArgNo()+1, AB);
-    F->addAttributes(f_ai->getArgNo()+1, argAS);
-    ++f_ai;;
-  }
-  return;
-}
-
-/* Creates and inserts an empty function of the rght type for the fused node  */
-Function* FuseHPVMTensorNodes::createEmptyDFNodeFunction(IntrinsicInst* II1,
-                                                         IntrinsicInst* II2,
-                                                         Module &M) {
-  Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts());
-  Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts());
-
-  DEBUG(errs () << "Constructing argument list\n");
- // Construct argument list
-  std::vector<Type*> ArgTypes;
-  createArgTypes(II1, II2, ArgTypes);
-
-  DEBUG(errs () << "Constructing return type\n");
-  // Construct return type
-  StructType* FRetTy = createReturnType(II1, II2);
-
-  FunctionType* FTy = FunctionType::get(FRetTy, ArgTypes, false);
-  // Create a function with the new type
-  Function* F = Function::Create(FTy, F1->getLinkage(),
-                                 F1->getName()+"_"+F2->getName(), &M);
-
-  DEBUG(errs () << "Copying argument names\n");
-  // Copy argument names from original functions
-  copyArgumentNames(II1, II2, F);
-  // Copy argument attributes from original functions
-  copyAttrList(II1, II2, F);
-
-   return F;
-}
-
-/* Inline first node function, updating required mappings                     *
- * - F1: first node function                                                  *
- * - M:  module containing the node function                                  *
- * - Ffused: fused node function                                              *
- * - VMap: maps values used in the body of F1 to those that mst be used in    *
-           the body of the fused function instead                             *
- * OutVs: This maps the output struct field index to the stored value         */
-void FuseHPVMTensorNodes::inlineFirstNodeFunction(Module &M, Function *F1,
-                                                 Function *Ffused,
-                                                 ValueMap<Value*, Value*> &VMap,
-                                                 std::vector<Value*> &OutVs) {
-
-  ReturnInst *RI = cast<ReturnInst>(Ffused->getEntryBlock().getTerminator());
-
-  inst_iterator f1_i = inst_begin(F1);
-  // First, we copy the HPVM intrinsics of F1 into Ffused, applying the mapping
-  for (inst_iterator f1_e = inst_end(F1); f1_i != f1_e; ++f1_i) {
-    Instruction *I = &(*f1_i);
-    if (!(BuildDFG::isViscIntrinsic(I))) {
-      // We are done with the node computation
-      break;
-    }
-
-    IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-    assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")
-      && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
-
-    std::vector<Value*> Args;
-    for(unsigned i = 0; i < II->getNumArgOperands(); i++) {
-      Value *V = II->getArgOperand(i);
-      if (isa<Constant>(V)) { // Constants can be reused
-        Args.push_back(V);
-      } else {
-        assert((VMap.find(V) != VMap.end()) &&
-              "Attempted to use value without existing mapping in VMap");
-        Args.push_back(VMap[V]);
-      }
-    }
-    Function *F = Intrinsic::getDeclaration(&M, II->getIntrinsicID());
-    CallInst* CI =
-      CallInst::Create(F, Args,
-                       F->getReturnType()->isVoidTy()? "" : "s_"+II->getName(), RI);
-    // Update the map with the newly created value
-    VMap[II] = CI;
-  }
-
-  // We continue with gathering information about the return values
-  for (inst_iterator f1_e = inst_end(F1); f1_i != f1_e; ++f1_i) {
-    Instruction *I = &(*f1_i);
-    InsertValueInst* IV = dyn_cast<InsertValueInst>(I);
-    if (!IV) {
-      // End of insertvalue instructions. This should be a return statement
-      assert((dyn_cast<ReturnInst>(I)) && "Unexpected Instruction\n");
-      break; // Done processing this function
-    }
-    OutVs.push_back(IV->getOperand(1));
-  }
-  return;
-}
-
-/* Inline second node function, updating required mappings                    *
- * - F2: second node function                                                 *
- * - M:  module containing the node function                                  *
- * - Ffused: fused node function                                              *
- * - VMap: maps values used in the body of F2 to those that mst be used in    *
-           the body of the fused function instead                             */
-void FuseHPVMTensorNodes::inlineSecondNodeFunction(Module &M, Function *F2,
-                             Function *Ffused, ValueMap<Value*, Value*> &VMap) {
-
-  ReturnInst *RI = cast<ReturnInst>(Ffused->getEntryBlock().getTerminator());
-
-  // Copy the body of F2 into Ffused, applying the mapping
-  inst_iterator f2_i = inst_begin(F2);
-  for (inst_iterator f2_e = inst_end(F2); f2_i != f2_e; ++f2_i) {
-    Instruction *I = &(*f2_i);
-    if ((BuildDFG::isViscIntrinsic(I))) {
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-      assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")
-        && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
-
-      std::vector<Value*> Args;
-      for(unsigned i = 0; i < II->getNumArgOperands(); i++) {
-        Value *V = II->getArgOperand(i);
-        if (isa<Constant>(V)) { // Constants can be reused 
-          Args.push_back(V);
-        } else {
-          assert((VMap.find(V) != VMap.end()) &&
-                "Attempted to use value without existing mapping in VMap");
-          Args.push_back(VMap[V]);
-        }
-      }
-      Function *F = Intrinsic::getDeclaration(&M, II->getIntrinsicID());
-      CallInst* CI =
-        CallInst::Create(F, Args,
-                         F->getReturnType()->isVoidTy()? "" : II->getName(),
-                         RI);
-      // Update the map with the newly created value
-      VMap[II] = CI;
-    } else if (InsertValueInst* IV = dyn_cast<InsertValueInst>(I)) {
-      Value *AggOp = IV->getAggregateOperand();
-      Value *InsOp = IV->getInsertedValueOperand();
-      assert(((VMap.find(AggOp) != VMap.end()) ||
-              (isa<Constant>(AggOp)) ) &&
-            "Attempted to use value without existing mapping in VMap");
-      assert(((VMap.find(InsOp) != VMap.end()) ||
-             (isa<Constant>(InsOp))) &&
-            "Attempted to use value without existing mapping in VMap");
-      InsertValueInst* IVI = InsertValueInst::Create(
-        (isa<Constant>(AggOp)) ? AggOp : VMap[AggOp],
-        (isa<Constant>(InsOp)) ? InsOp : VMap[InsOp],
-        IV->getIndices(),
-        IV->getName(),
-        RI);
-      // Update the map with the newly created value
-      VMap[IV] = IVI;
-    } else {
-      ReturnInst* RetI = dyn_cast<ReturnInst>(I);
-      assert(RetI && "Unexpected Instruction\n");
-      Value *RetVal = RetI->getOperand(0);
-      ReturnInst *newRI = ReturnInst::Create(Ffused->getContext(),
-                                             VMap[RetVal]);
-      ReplaceInstWithInst(RI, newRI);
-    }
-  }
-  return;
-}
-
-/* Create function of leaf node after fusion                                  *
- * - create type                                                              *
- * - create empty function of the type                                        *
- * - inline body of first function (applying and updating appropriate         *
- *   mappings)                                                                *
- * - inline body of second function (applying and updating appropriate        *
- *   mappings)                                                                */
-Function* FuseHPVMTensorNodes::createLeafDFNodeFunction(IntrinsicInst* II1,
-                                                        IntrinsicInst* II2,
-                                                        Module &M) {
-  DEBUG(errs () << "Creating function signature\n");
-
-  /* Create empty node function of the correct type */
-  Function* Ffused = createEmptyDFNodeFunction(II1, II2, M);
-
-  // Get return type, needed for building the assignmens to the return struct
-  StructType* FfusedRetTy = cast<StructType>(Ffused->getReturnType());
-
-  /* Mapping information required for using the correct values in the body of *
-   * the fused node function                                                  */
-
-  // This map maps the values used in the original function bodies with
-  // the ones that need to be used in the fused function body.
-  ValueMap<Value*, Value*> FusedValueMap;
-
-  // Intemediate information saved for return values of first node function
-  // This maps the output port to the value returned through the outgoing edge
-  std::vector<Value*> OutValues;
-
-  DEBUG(errs () << "Creating function body\n");
-
-  // Add a basic block to the new, empty function
-  BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", Ffused);
-  ReturnInst::Create(M.getContext(), UndefValue::get(FfusedRetTy), BB);
-
-  // Get the node functions
-  Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts());
-  Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts());
-
-  // Initially, update FusedValueMap: it is populated with the arguments of F1
-  Function::arg_iterator fused_arg_it = Ffused->arg_begin();
-  // Argument names of the first node are simply copied
-  for(auto& arg: F1->getArgumentList()) {
-    FusedValueMap[&arg] = &*fused_arg_it;
-    ++fused_arg_it;
-  }
-
-//  for(const auto& v: FusedValueMap) {
-//    errs() << "key = " << *(v.first) << "\t";
-//    errs() << "value = " << *(v.second) << "\n";
-//  }
-
-  // Invoke function that inlines F1 into Ffused, using and updating mappings
-  inlineFirstNodeFunction(M, F1, Ffused, FusedValueMap, OutValues);
-
-  // Compute mapping between inputs of F2 and outputs of F1
-  std::vector<IntrinsicInst *> IncomingEdgeList;
-  getIncomingEdgeIntrinsicList(II2, IncomingEdgeList);
-  std::vector<unsigned> PortMap(IncomingEdgeList.size(), 0);
-  for (IntrinsicInst * ii : IncomingEdgeList) {
-    unsigned srcPort = getNumericValue(ii->getOperand(3));
-    unsigned dstPort = getNumericValue(ii->getOperand(4));
-    PortMap[dstPort] = srcPort;
-  }
-
-  // FusedValueMap is now populated with the arguments of F2 as well
-  for(auto& arg: F2->getArgumentList()) {
-    DEBUG(errs() << arg << "\n");
-    unsigned inport = arg.getArgNo();
-    if (isIncomingEdgeArgument(inport, IncomingEdgeList)) {
-      // Get the mappings of the return values of F1 if incoming edge argument
-      Value *V = OutValues[PortMap[inport]];
-      FusedValueMap[&arg] = (isa<Constant>(V)) ? V: FusedValueMap[V];
-    }
-    else {
-      // Get new argument otherwise
-      FusedValueMap[&arg] = &*fused_arg_it;
-      ++fused_arg_it;
-    }
-  }
-
-  // Invoke function that inlines F2 into Ffused, using and updating mappings
-  inlineSecondNodeFunction(M, F2, Ffused, FusedValueMap);
-
-  // Done with fused node function
-  return Ffused;
-}
-
-/* Updates parent of fused nodes to use the new node intrinsic                */
-void FuseHPVMTensorNodes::updateParentNodeFunction(IntrinsicInst* II1,
-                                                   IntrinsicInst* II2,
-                                                   IntrinsicInst* IInew) {
-
-  // Compute the required shifting of positions for edges/binds to the second
-  // fusion node. No shifting is required for the first fusion node.
-  Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts());
-  Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts());
-  std::vector<unsigned> ShiftMap(F2->getFunctionType()->getNumParams(), 0);
-  unsigned shiftCount = F1->getFunctionType()->getNumParams();
-
-  // Find all incoming edges.
-  std::vector<IntrinsicInst *> IncomingEdgeList;
-  getIncomingEdgeIntrinsicList(II2, IncomingEdgeList);
-  // Their source must be the first fusion node, otherwise they would not have
-  // been fusion candidates
-  for (IntrinsicInst *ii : IncomingEdgeList) {
-    assert((ii->getOperand(0) == II1) && "Unexpected source operand\n");
-  }
-
-  // Compute shift map for n2: maps position in F2 arg list to Ffused arg list 
-  for(auto& arg: F2->getArgumentList()) {
-    DEBUG(errs() << arg << "\n");
-    unsigned inport = arg.getArgNo();
-    if (isIncomingEdgeArgument(inport, IncomingEdgeList))
-      continue;
-
-    ShiftMap[inport] = shiftCount;
-    shiftCount++;
-  }
-
-  std::vector<IntrinsicInst*> IItoRemove;
-
-  // First, iterate over uses of the first node's createNode intrinsic
-  for (Value::user_iterator i = II1->user_begin(), ie = II1->user_end();
-       i != ie; ++i) {
-    Instruction *VI = dyn_cast<Instruction>(*i);
-    IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI);
-    assert(II && "Use of a node handle outside of a visc intrinsic\n");
-
-    switch(II->getIntrinsicID()) {
-      case Intrinsic::visc_createEdge:
-        {
-        if (isOutgoingEdgeIntrinsic(II,II1)) {
-          assert(isIncomingEdgeIntrinsic(II,II2) &&
-                 "Outgoing edge of node 1 should only go to node 2\n");
-          IItoRemove.push_back(II);
-        }
-        }
-        break;
-      case Intrinsic::visc_bind_input:
-        {
-        }
-        break;
-      case Intrinsic::visc_bind_output:
-        {
-          assert(false &&
-                 "Source node of node fusion not expected in bind.out\n");
-        }
-        break;
-      default:
-        llvm_unreachable("Unknown use of HPVM createNode handle\n");
-        break;
-    }
-  }
-
-  // Delete gathered instructions - they are the edges between n1-n2
-  for (std::vector<IntrinsicInst *>::iterator ib = IItoRemove.begin(),
-       ie = IItoRemove.end(); ib != ie; ++ib) {
-    DEBUG(errs() << "Erasing: " << **ib << "\n");
-    (*ib)->eraseFromParent();
-  }
-  II1->replaceAllUsesWith(IInew);
-  II1->eraseFromParent();
-
-  IItoRemove.clear();
-
-  // Then, iterate over uses of the second node's createNode intrinsic
-  for (Value::user_iterator i = II2->user_begin(), ie = II2->user_end();
-       i != ie; ++i) {
-    Instruction *VI = dyn_cast<Instruction>(*i);
-    IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI);
-    assert(II && "Use of a node handle outside of a visc intrinsic\n");
-
-    switch(II->getIntrinsicID()) {
-      case Intrinsic::visc_createEdge:
-        {
-        assert(isOutgoingEdgeIntrinsic(II,II2) &&
-               "Node 2 is expected to have only outgoing edges at this point\n");
-        }
-        break;
-      case Intrinsic::visc_bind_input:
-        {
-        /* The index must be updated to the matching argument position of *
-         * the fused functionm using ShiftMap                             */
-        unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-        IntrinsicInst *newII =
-          createIdenticalBindWithDifferentPort(II,
-                                               ShiftMap[dstPos],
-                                               false);
-        newII->insertBefore(II);
-        IItoRemove.push_back(II);
-        }
-        break;
-      case Intrinsic::visc_bind_output:
-        {
-          assert(false &&
-                 "Source node of node fusion not expected in bind.out\n");
-        }
-        break;
-      default:
-        llvm_unreachable("Unknown use of HPVM createNode handle\n");
-        break;
-    }
-  }
-
-  // Delete gathered instructions - they are the old bindings of n2
-  for (std::vector<IntrinsicInst *>::iterator ib = IItoRemove.begin(),
-       ie = IItoRemove.end(); ib != ie; ++ib) {
-    DEBUG(errs() << "Erasing: " << **ib << "\n");
-    (*ib)->eraseFromParent();
-  }
-  II2->replaceAllUsesWith(IInew);
-  II2->eraseFromParent();
-
-  return;
-}
-
-/* Performs all operations required at the IR level for fusion of HPVM tensor *
- * nodes with intrinsic instructions II1 and II2                              *
- * - Creates fused node function                                              *
- * - Creates createNode intrinsic for it and returns it                       *
- * - Updates parent function:                                                 *
- * - - adds new intrinsic                                                     *
- * - - edges and binds consistently use the new intrinsic                     *
- * - Removes old functions                                                    */
-IntrinsicInst* FuseHPVMTensorNodes::FuseHPVMTensorNodesStep(IntrinsicInst* II1,
-                                                            IntrinsicInst* II2,
-                                                            Module &M) {
-  // Get the node functions
-  Function* F1 = cast<Function>((II1->getOperand(0))->stripPointerCasts());
-  Function* F2 = cast<Function>((II2->getOperand(0))->stripPointerCasts());
-
-  // Create fused node function
-  Function *Ffused = createLeafDFNodeFunction(II1, II2, M);
-  addHint(Ffused, getPreferredTarget(F1));
-
-  // FIX PARENT DFNode'S FUNCTION
-
-  // Generate createNode Intrinsic for fused node and insert it
-  Function* CreateNodeF = Intrinsic::getDeclaration(&M,
-                                                    Intrinsic::visc_createNode);
-  Constant* Fp = ConstantExpr::getPointerCast(Ffused,
-                                          Type::getInt8PtrTy(M.getContext()));
-  CallInst *CI = CallInst::Create(CreateNodeF,
-                                  ArrayRef<Value*>(Fp),
-                                  Ffused->getName()+".node");
-  IntrinsicInst* CreateNodeII = cast<IntrinsicInst>(CI);
-  CreateNodeII->insertBefore(II1);
-
-  // By the assumptions about the fusion pattern structure, all edges that have
-  // II1 as source will have II2 as destination and vice versa.
-  // We can simply delete them.
-
-  // All createEdge intrinsics with destination argument = II1 need to use
-  // CreateNodeII instead.
-  // Similarly with bind.in
-
-  // All createEdge intrinsics with source argument = II1 need to use
-  // CreateNodeII instead
-  // Similarly with bind.out
-
-  // By the assumptions about the fusion pattern structure, the first node
-  // cannot be the argument of a bind.out
-  // The second node can be the argument of a bind.in.
-  // For the bind.in, we need to adjust the destination port.
-  updateParentNodeFunction(II1, II2, CreateNodeII);
-
-  // Remove old node functions
-  removeHint(F1, getPreferredTarget(F1));
-  removeHint(F2, getPreferredTarget(F2));
-  F1->replaceAllUsesWith(UndefValue::get(F1->getType()));
-  F1->eraseFromParent();
-  F2->replaceAllUsesWith(UndefValue::get(F2->getType()));
-  F2->eraseFromParent();
-
-  return CreateNodeII;
-}
-
-/* Fuse node sequence described by creaetNode intrinsics in IIs.              *
- * Contents of IIs are cleared.                                               */
-void FuseHPVMTensorNodes::FuseHPVMTensorNodeSequence(
-                                  std::vector<IntrinsicInst*> &IIs, Module &M) {
-  for (IntrinsicInst *II : IIs) {
-    assert((II->getIntrinsicID() == Intrinsic::visc_createNode) &&
-           "Expected createNode intrinsic in fuse intrinsic sequence\n");
-  }
-
-  if (IIs.size() < 2) {
-    errs() << "Warning: Attempted to fuse fewer than 2 nodes\n";
-    return;
-  }
-
-  for (unsigned i = 0; i + 1 < IIs.size(); i++) {
-    IntrinsicInst *II1 = IIs[i];
-    IntrinsicInst *II2 = IIs[i+1];
-    IIs[i+1] = FuseHPVMTensorNodesStep(II1, II2, M);
-  }
-  IIs.clear();
-  return;
-}
-
-/* Run method for FuseHPVMTensorNodes class, simply invokes fusion of all the *
- * sequenses in member variable FTs.                                          */
-void FuseHPVMTensorNodes::run(Module &M, FusionTargets &FTs) {
-  for (unsigned i = 0; i < FTs.size(); i++) {
-    FuseHPVMTensorNodeSequence(FTs[i], M);
-  }
-  return;
-}
-
-// Print Fusion Targets. The argument vector contains createNode intrinsics
-// of nodes to be fused).
-void FuseHPVMTensorNodes::printFusionTargets(FusionTargets &FTs) {
-  errs() << "Print Fusion Targets\n";
-  errs() << "Found " << FTs.size() << " targets\n";
-  for (FuseHPVMTensorNodes::FusionTargets::iterator ii = FTs.begin(),
-       ie = FTs.end(); ii != ie ; ++ii) {
-    errs() << "Target:\n";
-    std::vector<IntrinsicInst*> IIv = *ii;
-    for (std::vector< IntrinsicInst*>::iterator pi = IIv.begin(),
-         pe = IIv.end(); pi != pe; ++pi) {
-      errs() << "\t" << *((*pi)->getOperand(0)) << "\n";
-    }
-  }
-  return;
-}
-
-void FindFusionTargetsTraversal::codeGen(DFInternalNode *N) {
-  DEBUG(errs() << "Skipping Internal Node: "
-               << N->getFuncPointer()->getName() << "\n");
-  return;
-}
-
-void FindFusionTargetsTraversal::codeGen(DFLeafNode *N) {
-  DEBUG(errs() << "Inside leaf node: "
-               << N->getFuncPointer()->getName() << "\n");
-
-  // Skip fusion check if it is a dummy node
-  if(N->isDummyNode()) {
-    DEBUG(errs() << "Skipping dummy node\n");
-    return;
-  }
-
-//  if(N->getTargetHint() != visc::PROMISE_TARGET) {
-  if(!preferredTargetIncludes(N, visc::PROMISE_TARGET)) {
-    // Only fuse if we plan to target PROMISE
-    // The CUDNN backend would be able to generate calls for the fused node,
-    // but not the other way around
-    DEBUG(errs() << "No PROMISE hint. Skipping node: "
-                 << N->getFuncPointer()->getName() << "\n");
-    return;
-  }
-
-  visc::Target StartNodePreferredTarget = getPreferredTarget(N);
-  // Make sure that this is a valid HPVM Tensor Node
-  // Find first instruction, and check that it is an HPVM tensor intrinsic
-  IntrinsicInst *II = isValidHPVMTensorNode(N);
-
-  std::vector<IntrinsicInst*> CurrentNodeSequence;
-
-  switch(II->getIntrinsicID()) {
-    case Intrinsic::visc_tensor_convolution:
-      { // Found beginning of pattern conv-bias-activation-pooling.
-        // Look for the rest
-        CurrentNodeSequence.push_back(N->getInstruction());
-
-        // Look for bias
-        DFNode *SN = findNextNodeInSequence(N);
-        if (!SN) {
-          return; // Did not find a node sequence starting at N. Simpy return.
-        }
-        if (getPreferredTarget(SN) != StartNodePreferredTarget) {
-          return; // Node in sequence has different hint. Simpy return.
-        }
-        IntrinsicInst *SII = isValidHPVMTensorNode(SN);
-        if (SII->getIntrinsicID() != Intrinsic::visc_tensor_add) {
-          // Successor is not the bias operation, thus does not fit the pattern.
-          return;
-        }
-        // Otherwise, push this node to the current sequence
-        CurrentNodeSequence.push_back(SN->getInstruction());
-
-        // This is a valid sequence.
-        // We still need to fuse activation and/or pooling if we find them 
-        // Continue with next node, looking for activation (relu, clipped relu, tanh)
-        SN = findNextNodeInSequence(SN);
-        if (!SN) {
-          // Did not find a node sequence starting at N.Use current sequence.
-          break;
-        }
-        if (getPreferredTarget(SN) != StartNodePreferredTarget) {
-          break; // Node in sequence has different hint. Use current sequence.
-        }
-        SII = isValidHPVMTensorNode(SN);
-
-        if ((SII->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu) ||
-            (SII->getIntrinsicID() == Intrinsic::visc_tensor_relu) ||
-            (SII->getIntrinsicID() == Intrinsic::visc_tensor_tanh)) {
-          // Successor is activation. Push this node to the current sequence.
-          CurrentNodeSequence.push_back(SN->getInstruction());
-
-          // Will continue, looking for pooling in the next node
-          SN = findNextNodeInSequence(SN);
-          if (!SN) {
-            break; // No node in sequence. Use currently found sequence.
-          }
-          if (getPreferredTarget(SN) != StartNodePreferredTarget) {
-            break; // Node in sequence has different hint. Use current sequence.
-          }
-          SII = isValidHPVMTensorNode(SN);
-        } //else {} // Look for pooling in this node
-
-        if ((SII->getIntrinsicID() == Intrinsic::visc_tensor_pool_max) ||
-            (SII->getIntrinsicID() == Intrinsic::visc_tensor_pool_min) ||
-            (SII->getIntrinsicID() == Intrinsic::visc_tensor_pool_mean)) {
-          // Successor is a pool operation. Use currently found sequence.
-          CurrentNodeSequence.push_back(SN->getInstruction());      
-        }
-      }
-      break;
-    case Intrinsic::visc_tensor_mul:
-      { // Found beginning of pattern gemm-bias-activation. Look for the rest
-        CurrentNodeSequence.push_back(N->getInstruction());
-        // Look for bias
-        DFNode *SN = findNextNodeInSequence(N);
-        if (!SN) {
-          return; // Did not find a node sequence starting at N. Simpy return.
-        }
-        if (getPreferredTarget(SN) != StartNodePreferredTarget) {
-          return; // Node in sequence has different hint. Simpy return.
-        }
-        IntrinsicInst *SII = isValidHPVMTensorNode(SN);
-        if (SII->getIntrinsicID() != Intrinsic::visc_tensor_add) {
-          // Successor is not the bias operation, thus does not fit the pattern.
-          return;
-        }
-        // Otherwise, push this node to the current sequence
-        CurrentNodeSequence.push_back(SN->getInstruction());
-        // This is a possible fuse target, gemm-add.
-        // We need to reach the end of the function, where the found sequence
-        // is added.
-
-        // If the next operation is activation, we fuse that as well.
-        // Continue with next node, looking for activation (relu, clipped relu, tanh)
-        SN = findNextNodeInSequence(SN);
-        if (SN) {
-          if (getPreferredTarget(SN) == StartNodePreferredTarget) {
-            SII = isValidHPVMTensorNode(SN);
-            if ((SII->getIntrinsicID() == Intrinsic::visc_tensor_clipped_relu) ||
-                (SII->getIntrinsicID() == Intrinsic::visc_tensor_relu) ||
-                (SII->getIntrinsicID() == Intrinsic::visc_tensor_tanh)) {
-              // We found activation in sequence. Push in vector as well.
-              CurrentNodeSequence.push_back(SN->getInstruction());
-            }
-          }
-        }
-      }
-      break;
-    default:
-      DEBUG(errs() << "No pattern begins at this node\n");
-      break;
-  }
-
-  if (CurrentNodeSequence.size() != 0) {
-    // A sequence was found. Store the node sequence in FTs.
-    FTs.push_back(CurrentNodeSequence);
-  }
-
-  return;
-}
-
-bool FuseHPVMTensorNodesWrapper::runOnModule(Module &M) {
-  errs() << "\nFUSE HPVM TENSOR NODES PASS\n";
-
-// Get the BuildDFG Analysis Results:
-  // - Dataflow graph
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
-  // Visitor for Fuse Target Detection Graph Traversal
-  FindFusionTargetsTraversal *FTTVisitor =
-                                         new FindFusionTargetsTraversal(M, DFG);
-
-  errs() << "Find targets\n";
-  // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
-    // Initiate code generation for root DFNode
-    FTTVisitor->visit(rootNode);
-  }
-
-  FuseHPVMTensorNodes::FusionTargets &FTs = FTTVisitor->getFusionTargets();
-
-  FuseHPVMTensorNodes Fuse;
-//  Fuse.printFusionTargets(FTs);
-
-  Fuse.run(M, FTs);
-
-  delete FTTVisitor;
-
-  return true;
-}
-
-char FuseHPVMTensorNodesWrapper::ID = 0;
-static RegisterPass<FuseHPVMTensorNodesWrapper> X("hpvm-fuse",
-  "Fuse HPVM Tensor Nodes Pass",
-  false /* does not modify the CFG */,
-  true /* transformation, not just analysis */);
-
-} // End of namespace
-
diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.exports b/llvm/lib/Transforms/FuseHPVMTensorNodes/FuseHPVMTensorNodes.exports
deleted file mode 100644
index e69de29bb2..0000000000
diff --git a/llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt b/llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt
deleted file mode 100644
index 55a6ee5150..0000000000
--- a/llvm/lib/Transforms/FuseHPVMTensorNodes/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/LocalMem/LLVMBuild.txt ------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = FuseHPVMTensorNodes
-parent = Transforms
diff --git a/llvm/lib/Transforms/InsertApproxInfo/CMakeLists.txt b/llvm/lib/Transforms/InsertApproxInfo/CMakeLists.txt
deleted file mode 100644
index 2b6d41bd70..0000000000
--- a/llvm/lib/Transforms/InsertApproxInfo/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( InsertApproxInfo
-  InsertApproxInfo.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/llvm/lib/Transforms/InsertApproxInfo/InsertApproxInfo.cpp b/llvm/lib/Transforms/InsertApproxInfo/InsertApproxInfo.cpp
deleted file mode 100644
index bde4ef8907..0000000000
--- a/llvm/lib/Transforms/InsertApproxInfo/InsertApproxInfo.cpp
+++ /dev/null
@@ -1,498 +0,0 @@
-//===------------------------ InPlaceDFGAnalysis.cpp ----------------------===//
-//
-//
-//
-//                     The LLVM Compiler Infrastructure
-//
-//
-//
-// This file is distributed under the University of Illinois Open Source
-//
-// License. See LICENSE.TXT for details.
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "InsertApproxInfo"
-
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-#include "llvm/IR/InstrTypes.h"
-#include <unordered_map>
-#include <dirent.h>
-#include <stdio.h>
-#include <sstream>
-#include <fstream>
-
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-using namespace inplacedfg;
-
-
-namespace {
-
-static cl::opt<std::string> dir_name("results-dir", cl::desc(" Name of directory with Autotuner results "));
-
-
-struct ApproxMetrics{
-  std::string op_name;
-  std::string category;
-  unsigned int rank; // rank given by autotuner
-  double approx_level;
-  // Relative L-norm metrics
-  double relative_l1;
-  double relative_l2;
-  double relative_linf;
-  // Mean L-norm metrics
-  double mean_l1;
-  double mean_l2;
-  double mean_linf;
-};    
-
-  
-  
-struct InsertApproxInfoWrapperPass : public ModulePass {
-  static char ID; // Pass identification, replacement for typeid
-  InsertApproxInfoWrapperPass() : ModulePass(ID) {}
-    
-public:
-  // Functions
-  bool runOnModule(Module &M);
-  void getAnalysisUsage(AnalysisUsage &AU) const;
-};
-
-
-// Visitor for Code generation traversal (tree traversal for now)
-class InsertApproxInfo : public CodeGenTraversal {
-
-private:
-  // Virtual Functions
-  void init() {}
-  void initRuntimeAPI() {}
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
-  void loadTrainedApproxMetrics(std::string dir_path);
-  void loadMetricsFromFile(std::string dir_path, std::string file_path, std::string category);
-  void loadMetricsFromDir(std::string dir_path, std::string category);
-  void readApproxValues(const std::string line, ApproxMetrics* approx_metrics);
-  void initIntrinsicNames();
-  void initGlobalStrings();
-
-  // private data
-  std::unordered_map<std::string, std::string> intrinsics_map;
-  std::unordered_map<std::string, std::vector<ApproxMetrics*>> operation_metrics;
-  GlobalVariable* rank_str;
-  GlobalVariable* category_str;
-  GlobalVariable* mean_l1_str;
-  GlobalVariable* mean_l2_str;
-  GlobalVariable* mean_linf_str;
-  GlobalVariable* rel_l1_str;
-  GlobalVariable* rel_l2_str;
-  GlobalVariable* rel_linf_str;
-
-
-  // Tracks the id of the tensor op processed
-  unsigned int currentID;
-
-public:
-  // Constructor
-  InsertApproxInfo(Module &_M, BuildDFG &_DFG);
-  
-  //void run(Module &M, BuildDFG &DFG);
-  void run(std::string dir_path);
-
-};
-
-
-
-void InsertApproxInfo::initIntrinsicNames(){
-
-  intrinsics_map["llvm.visc.tensor.convolution"] = "tensorConv";
-  intrinsics_map["llvm.visc.tensor.mul"] = "tensorGemm";
-  intrinsics_map["llvm.visc.tensor.add"] = "tensorAdd";
-  intrinsics_map["llvm.visc.tensor.pool.max"] = "tensorPooling";
-  intrinsics_map["llvm.visc.tensor.tanh"] = "tensorTanh";  
-}
-
-
-void InsertApproxInfo::initGlobalStrings(){
-
- /**** Creating global constant strings for each approximation metric type *******/
-
-  std::string rank_string = "rank";
-  Constant* stringConst = ConstantDataArray::getString(M.getContext(), StringRef(rank_string.c_str()), true);
-  rank_str = new GlobalVariable(M, stringConst->getType(), true,
-				GlobalValue::ExternalLinkage, stringConst, "");
-
-  std::string category_string = "category";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(category_string.c_str()), true);
-  category_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  // Mean l-norm metrics
-  std::string metric_string = "mean_l1";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  mean_l1_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  metric_string = "mean_l2";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  mean_l2_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  metric_string = "mean_linf";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  mean_linf_str = new GlobalVariable(M, stringConst->getType(), true,
-				     GlobalValue::ExternalLinkage, stringConst, "");
-
-  // Relative l-norm metrics
-  metric_string = "rel_l1";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  rel_l1_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  metric_string = "rel_l2";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  rel_l2_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  metric_string = "rel_linf";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  rel_linf_str = new GlobalVariable(M, stringConst->getType(), true,
-				     GlobalValue::ExternalLinkage, stringConst, "");
-
-}
-
-  
-InsertApproxInfo::InsertApproxInfo(Module &_M, BuildDFG &_DFG) :
-    CodeGenTraversal(_M, _DFG){
-
-  currentID = 1;
-  
-  initIntrinsicNames();
-  initGlobalStrings();   
-}
-
-
-void InsertApproxInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<BuildDFG>();
-  AU.addPreserved<BuildDFG>();
-}
-
-    
-bool InsertApproxInfoWrapperPass::runOnModule(Module &M) {
-  
-  std::string dir_path = dir_name.getValue();
-  // Get the BuildDFG Analysis Results:
-  // - Dataflow graph
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  InsertApproxInfo IApprox(M, DFG);
-  IApprox.run(dir_path);
-
-  return false;
-}
-
-
-void InsertApproxInfo::readApproxValues(const std::string line, ApproxMetrics* approx_metrics){
- 
-  std::istringstream in(line);
-  std::string op_name;
-
-  float approx_level;
-
-  float mean_l1;
-  float mean_l2;
-  float mean_linf;
-
-  float relative_l1;
-  float relative_l2;
-  float relative_linf;
-
-  in >> op_name;
-  in >> approx_level;
-  
-  in >> mean_l1;
-  in >> mean_l2;
-  in >> mean_linf;
-
-  in >> relative_l1;
-  in >> relative_l2;
-  in >> relative_linf;
-    
-  printf("\n *** op_name = %s \n", op_name.c_str());
-  printf("approx_level = %f \n", approx_level);
-  printf("relative_l1 = %f \n", relative_l1);
-  printf("relative_l2 = %f \n", relative_l2);
-  printf("relative_linf = %f \n", relative_linf);
-  printf("mean_l1 = %f \n", mean_l1);
-  printf("mean_l2 = %f \n", mean_l2);
-  printf("mean_linf = %f \n", mean_linf);
-
-  approx_metrics->op_name = op_name;
-  approx_metrics->approx_level = approx_level;
-  approx_metrics->mean_l1 = mean_l1;
-  approx_metrics->mean_l2 = mean_l2;
-  approx_metrics->mean_linf = mean_linf;
-  approx_metrics->relative_l1 = relative_l1;
-  approx_metrics->relative_l2 = relative_l2;
-  approx_metrics->relative_linf = relative_linf;
-   
-}
-
-
-unsigned int getFileRank(std::string file_path){
-
-  char file_name[100]; // Assuming no file names greater than 100 chars
-  strcpy(file_name, file_path.c_str());
-  
-  char* pch = strtok(file_name, "_");
-  char* last_pch;
-  while(pch != NULL){
-    last_pch = pch;   
-    pch = strtok(NULL, "_");  
-  }
-
-  printf("NOTE: ****** last_pch = %s \n", last_pch);
-
-  size_t sz;
-  int rank = std::stoi(last_pch, &sz);
-  
-  return rank + 1; // NOTE: Adding 1 to start ranks with '1' 
-}
-
-  
-  
-void InsertApproxInfo::loadMetricsFromFile(std::string dir_path, std::string file_path, std::string category){
-
-  std::string full_path = dir_path + "/" + file_path;
-  printf("full_path = %s \n", full_path.c_str());
-  std::ifstream infile(full_path.c_str());
-  std::string line;
-
-  unsigned int it_count = 0;
-  while(std::getline(infile, line)){
-
-    // Skip first line with confidence information
-    if(it_count > 0){
-      std::vector<float> approx_values;
-      ApproxMetrics* approx_metrics = new ApproxMetrics;
-      readApproxValues(line, approx_metrics);
-      
-      approx_metrics->category = category;
-      unsigned int rank = getFileRank(file_path);
-      approx_metrics->rank = rank; 
-
-      std::string unique_op_name = approx_metrics->op_name + std::to_string(it_count);
-      operation_metrics[unique_op_name].push_back(approx_metrics);
-      printf("\n ** unique_op_name = %s \n", unique_op_name.c_str());     
-    }
-    
-    it_count++;
-  }
-  
-}
-
-
-  
-void InsertApproxInfo::loadMetricsFromDir(std::string dir_path, std::string category){
-
-  struct dirent* entry;
-  dir_path = dir_path + category;
-
-  DIR* dir = opendir(dir_path.c_str());
-  if(dir == NULL){
-    printf("Directory %s not found . Aborting ... \n\n ", dir_path.c_str());
-    abort();
-  }
-
-  while((entry = readdir(dir)) != NULL){
-    printf("f_name = %s \n", entry->d_name);
-    std::string f_name = entry->d_name;
-    loadMetricsFromFile(dir_path, f_name, category);
-  }
-}
-
-  
-  
-void InsertApproxInfo::loadTrainedApproxMetrics(std::string dir_path){
-  
-  std::string root_path = dir_path + "/high_confidence/";
-  loadMetricsFromDir(root_path, "linear");
-  loadMetricsFromDir(root_path, "log");
-  loadMetricsFromDir(root_path, "quad");
-}
-
-  
-/*** Methods of InPlaceDFGAnalysis ***/
-void InsertApproxInfo::run(std::string dir_path) {
-
-  loadTrainedApproxMetrics(dir_path);
-
-  errs() << "\n NOTE: ApproxInfo INSERTION TRANSFORM \n";
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
-
-  // Iterate over all the DFGs
-  // Analyse the edges for parameters that are valid to be used in place
-  for (auto rootNode: Roots) {
-    //ATVisitor->visit(rootNode);
-   this->visit(rootNode);
-  }
-
-  //delete ATVisitor;
-  return;
-}
-
-/*** Analysis of internal node ***/
-void InsertApproxInfo::codeGen(DFInternalNode* N) {
-  DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n");
-}
-
-/*** Analysis of leaf node ***/
-void InsertApproxInfo::codeGen(DFLeafNode* N) {
-  DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n");
-
-  // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
-    DEBUG(errs() << "Skipping dummy node\n");
-    return;
-  }
-
-  // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
-    assert(false && "Allocation Node not expected in ApproxHPVM");
-    return;
-  }
-  
-  Function *F = N->getFuncPointer();
-  Module* M = F->getParent();
-  std::vector<IntrinsicInst *> IItoRemove;
-  
-
-  /**** Adding operand bundles for each tensor operation in the HPVM DFG Leaf Node ****/
-  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
-    Instruction *I = &(*i);
-    errs()<<*I<<"\n";
-
-
-    if (BuildDFG::isViscIntrinsic(I)) {
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-      assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")
-        && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
-
-      std::string intrinsic_id = std::string(II->getCalledFunction()->getName().data());
-      std::string runtime_func_name = intrinsics_map[intrinsic_id];
-      std::string unique_name = runtime_func_name + std::to_string(currentID);
-      printf("\n ---- unique_name = %s \n ", unique_name.c_str());
-      std::vector<ApproxMetrics*> approx_metrics;
-      if(operation_metrics.find(unique_name) != operation_metrics.end()){
-         approx_metrics = operation_metrics[unique_name];
-      }
-      else{
-	errs()<<"Intrinsic Name NOT found in the map - Unexpected Error. Aborting ... \n\n";
-        abort();
-      }
-      
-      
-      unsigned int num_configs = approx_metrics.size();
-      std::vector<OperandBundleDef> conf_bundles;
-      for(unsigned int i = 0; i < num_configs; i++){
-	std::vector<Value*> norm_vals;
-
-	norm_vals.push_back(category_str);
-	Constant* categoryConst = ConstantDataArray::getString(M->getContext(), StringRef(approx_metrics[i]->category.c_str()), true);
-        GlobalVariable* category_value = new GlobalVariable(*M, categoryConst->getType(), true,
-							    GlobalValue::ExternalLinkage, categoryConst, "");
-	norm_vals.push_back(category_value);
-
-	norm_vals.push_back(rank_str);
-	Constant* constIntVal = ConstantInt::get(Type::getInt32Ty(M->getContext()), approx_metrics[i]->rank);
-	norm_vals.push_back(constIntVal);
-
-	// Adding mean l-norm metrics
-	norm_vals.push_back(mean_l1_str);
-	Constant* constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_l1);
-	norm_vals.push_back(constFPVal);
-
-	norm_vals.push_back(mean_l2_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_l2);
-	norm_vals.push_back(constFPVal);
-
-	norm_vals.push_back(mean_linf_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_linf);
-	norm_vals.push_back(constFPVal);
-
-        // Relative l-norm Metrics
-	norm_vals.push_back(rel_l1_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_l1);
-	norm_vals.push_back(constFPVal);
-
-	norm_vals.push_back(rel_l2_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_l2);
-	norm_vals.push_back(constFPVal);
-
-	norm_vals.push_back(rel_linf_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_linf);
-	norm_vals.push_back(constFPVal);
-
-
-	std::string config_name = "config_" + std::to_string(i+1);
-	OperandBundleDef norm_bundle(config_name, norm_vals);
- 
-	conf_bundles.push_back(norm_bundle);
-      }
-
-      ArrayRef<OperandBundleDef> bundle_arr(conf_bundles);
-
-      /*** Creating new Intrinsic call with Operand Bundles attached **/
-      Function* calledFunction = II->getCalledFunction();
-      unsigned num_args = II->getNumArgOperands();
-      std::vector<Value*> args;
-      for(unsigned i = 0; i < num_args; i++){
-        Value* argValue = II->getArgOperand(i);
-	args.push_back(argValue);
-      }
-
-      CallInst* CI = CallInst::Create(calledFunction,
-                 		      args, bundle_arr, "", II);
-
-      errs()<<"NOTE: New CallInst = "<<*CI<<"\n";
-      
-      II->replaceAllUsesWith(CI);
-      // Mark to remove at the end
-      IItoRemove.push_back(II);
-      
-      // Increment counter of op processed
-      currentID++;
-    }
-  }
-
-
-  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(),
-       re = IItoRemove.rend(); ri != re; ++ri) {
-    DEBUG(errs() << "Erasing: " << **ri << "\n");
-    errs() << "Erasing: " << **ri << "\n";
-    (*ri)->eraseFromParent();
-  }
-
-  
-}
-
-char InsertApproxInfoWrapperPass::ID = 0;
-static RegisterPass<InsertApproxInfoWrapperPass> X("insert-approxinfo",
-  "Pass to add approximation information (l-norm metrics) in the ApproxHPVM DFG",
-  false /* does not modify the CFG */,
-  false /* not transformation, just analysis */);
-
-
-
-
-  
-} // End of namespace
-
diff --git a/llvm/lib/Transforms/InsertApproxInfo/LLVMBuild.txt b/llvm/lib/Transforms/InsertApproxInfo/LLVMBuild.txt
deleted file mode 100644
index e9cf5afd4a..0000000000
--- a/llvm/lib/Transforms/InsertApproxInfo/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/LocalMem/LLVMBuild.txt ------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = InsertApproxInfo
-parent = Transforms
-- 
GitLab