diff --git a/lib/DFG2LLVM_PROMISE/CMakeLists.txt b/lib/DFG2LLVM_PROMISE/CMakeLists.txt
deleted file mode 100644
index 5b5d2677d0e827d590a07898208f00f9392b62a5..0000000000000000000000000000000000000000
--- a/lib/DFG2LLVM_PROMISE/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( LLVMDFG2LLVM_PROMISE
-  DFG2LLVM_PROMISE.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp b/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp
deleted file mode 100644
index 184f92910ae7e6d60574b0b207bd22a8f7076d8e..0000000000000000000000000000000000000000
--- a/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.cpp
+++ /dev/null
@@ -1,1283 +0,0 @@
-//=== DFG2LLVM_PROMISE.cpp ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-#define ENABLE_ASSERTS
-
-#define DEBUG_TYPE "DFG2LLVM_PROMISE"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker/Linker.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm-c/Core.h"
-#include "llvm/SupportVISC/VISCTimer.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h"
-#include <sstream>
-#include <fstream>
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-
-namespace {
-
-cl::opt<std::string> QuantizationInputsFilename(
-  "quantization-levels-filename",
-  cl::desc("<PROMISE quantization levels input file (path)>"),
-  cl::value_desc("filename"),
-  cl::Required);
-
-// Helper class declarations
-
-// State machine definition for pattern identification
-
-/* An assumption is made for the PROMISE simulator:                           *
- * a leaf node will contain consequtive operations that will map to a         *
- * single PROMISE simulator call                                              *
-
- * To alleviate that, the states that correspond to valid patterns            *
- * - (FullyConnectedLayer_(2,3,x, ConvilutionLayer_(2,3,4,x))                 *
- * can invoke codeGen when detecting the beginning of a new pattern, then     *
- * clear the collected IIs and Args, then go to initial and invoke its        *
- * transition.                                                                */
-
-class AbstractState;
-
-class CodeGenStateMachine {
-private:
-  Module *M;
-  Module *RtM;
-
-  std::ifstream &qin; // Quantization levels input stream reference
-  std::vector<Value*> Args;
-  std::vector<IntrinsicInst*> IIs;
-  AbstractState *current;
-
-public:
-  CodeGenStateMachine(Module *, Module *, std::ifstream &);
-
-  void setCurrent(AbstractState *s) {
-    current = s;
-  }
-
-  void transition(IntrinsicInst *II);
-
-  Module *getModule() {
-    return M;
-  }
-
-  void getNextQuantizationLevel(float &ql) {
-    qin >> ql;
-  }
-
-  void addArgument(Value *Arg) {
-    Args.push_back(Arg);
-  }
-
-  void addIntrinsicInst(IntrinsicInst *II) {
-    IIs.push_back(II);
-  }
-
-  IntrinsicInst *getIntrinsicInstAt(unsigned idx) {
-    return IIs[idx];
-  }
-
-  void codeGen();
-
-};
-
-class AbstractState {
-public:
-  enum ID
-  {
-    INITIAL_STATE,
-    FULLY_CONNECTED_LAYER_1,
-    FULLY_CONNECTED_LAYER_2,
-    FULLY_CONNECTED_LAYER_3,
-    FULLY_CONNECTED_LAYER,
-    CONVOLUTION_LAYER_1,
-    CONVOLUTION_LAYER_2,
-    CONVOLUTION_LAYER_3,
-    CONVOLUTION_LAYER_4,
-    CONVOLUTION_LAYER,
-    NO_PATTERN,
-  };
-
-protected:
-  enum ID StateID;
-
-public:
-  enum ID getStateID() {
-    return StateID;
-  }
-
-  virtual void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) = 0;
-  virtual ~AbstractState() {}
-};
-
-class InitialState : public AbstractState {
-public:
-  InitialState() {
-    StateID = ID::INITIAL_STATE;
-    DEBUG(errs() << "new InitialState\n");
-  }
-  ~InitialState() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer_1 : public AbstractState {
-public:
-  FullyConnectedLayer_1() {
-    StateID = ID::FULLY_CONNECTED_LAYER_1;
-    DEBUG(errs() << "new FullyConnectedLayer_1\n");
-  }
-  ~FullyConnectedLayer_1() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer_2 : public AbstractState {
-public:
-  FullyConnectedLayer_2() {
-    StateID = ID::FULLY_CONNECTED_LAYER_2;
-    DEBUG(errs() << "new FullyConnectedLayer_2\n");
-  }
-  ~FullyConnectedLayer_2() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer_3 : public AbstractState {
-public:
-  FullyConnectedLayer_3() {
-    StateID = ID::FULLY_CONNECTED_LAYER_3;
-    DEBUG(errs() << "new FullyConnectedLayer_3\n");
-  }
-  ~FullyConnectedLayer_3() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class FullyConnectedLayer : public AbstractState {
-public:
-  FullyConnectedLayer() {
-    StateID = ID::FULLY_CONNECTED_LAYER;
-    DEBUG(errs() << "new FullyConnectedLayer\n");
-  }
-  ~FullyConnectedLayer() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_1 : public AbstractState {
-public:
-  ConvolutionLayer_1() {
-    StateID = ID::CONVOLUTION_LAYER_1;
-    DEBUG(errs() << "new ConvolutionLayer_1\n");
-  }
-  ~ConvolutionLayer_1() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_2 : public AbstractState {
-public:
-  ConvolutionLayer_2() {
-    StateID = ID::CONVOLUTION_LAYER_2;
-    DEBUG(errs() << "new ConvolutionLayer_2\n");
-  }
-  ~ConvolutionLayer_2() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_3 : public AbstractState {
-public:
-  ConvolutionLayer_3() {
-    StateID = ID::CONVOLUTION_LAYER_3;
-    DEBUG(errs() << "new ConvolutionLayer_3\n");
-  }
-  ~ConvolutionLayer_3() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer_4 : public AbstractState {
-public:
-  ConvolutionLayer_4() {
-    StateID = ID::CONVOLUTION_LAYER_4;
-    DEBUG(errs() << "new ConvolutionLayer_4\n");
-  }
-  ~ConvolutionLayer_4() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class ConvolutionLayer : public AbstractState {
-public:
-  ConvolutionLayer() {
-    StateID = ID::CONVOLUTION_LAYER;
-    DEBUG(errs() << "new ConvolutionLayer\n");
-  }
-  ~ConvolutionLayer() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-class NoPattern : public AbstractState {
-public:
-  NoPattern() {
-    StateID = ID::NO_PATTERN;
-    DEBUG(errs() << "new NoPattern\n");
-  }
-  ~NoPattern() {}
-
-  void transition(CodeGenStateMachine *Mch, IntrinsicInst *II) override;
-};
-
-void InitialState::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_convolution:
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->addArgument(II->getOperand(0)); // conv input
-
-        // Read quantization levels for input
-        float i_min, i_max;
-        Mch->getNextQuantizationLevel(i_min);
-        Mch->getNextQuantizationLevel(i_max);
-        errs() << "i_min: " << i_min << "\n";
-        errs() << "i_max: " << i_max << "\n";
-
-        // Create associated arguments for the quantization levels
-        Constant *IminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) i_min);
-//      errs() << "IminC : "
-//             << dyn_cast<ConstantFP>(IminC)->getValueAPF().convertToFloat()
-//             << "\n";
-        Mch->addArgument(IminC);
-        Constant *ImaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) i_max);
-        Mch->addArgument(ImaxC);
-
-        Mch->addArgument(II->getOperand(1)); // conv kernel
-
-        // Read quantization levels for filter
-        float w_min, w_max;
-        Mch->getNextQuantizationLevel(w_min);
-        Mch->getNextQuantizationLevel(w_max);
-        errs() << "w_min: " << w_min << "\n";
-        errs() << "w_max: " << w_max << "\n";
-        Constant *WminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) w_min);
-        Mch->addArgument(WminC);
-        Constant *WmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) w_max);
-        Mch->addArgument(WmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_1());
-        }
-        break;
-      case Intrinsic::visc_tensor_mul:
-        {
-        Mch->addIntrinsicInst(II);
-        Mch->addArgument(II->getOperand(0)); // 1st gemm input
-
-        // Read quantization levels for input
-        float i_min, i_max;
-        Mch->getNextQuantizationLevel(i_min);
-        Mch->getNextQuantizationLevel(i_max);
-        errs() << "i_min: " << i_min << "\n";
-        errs() << "i_max: " << i_max << "\n";
-
-        Constant *IminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) i_min);
-        Mch->addArgument(IminC);
-        Constant *ImaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) i_max);
-        Mch->addArgument(ImaxC);
-
-        Mch->addArgument(II->getOperand(1)); // 2nd gemm input
-
-        // Read quantization levels for weight
-        float w_min, w_max;
-        Mch->getNextQuantizationLevel(w_min);
-        Mch->getNextQuantizationLevel(w_max);
-        errs() << "w_min: " << w_min << "\n";
-        errs() << "w_max: " << w_max << "\n";
-
-        Constant *WminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) w_min);
-        Mch->addArgument(WminC);
-        Constant *WmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) w_max);
-        Mch->addArgument(WmaxC);
-
-        Mch->setCurrent(new FullyConnectedLayer_1());
-        }
-        break;
-      default: // Other HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-    delete this;
-  } // else {} // No HPVM intrinsic received. Remain at initial 
-}
-
-void FullyConnectedLayer_1::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_add:
-        {
-        IntrinsicInst *MulII = Mch->getIntrinsicInstAt(0);
-        assert((MulII == II->getOperand(0)) &&
-               "Output of mul must be used as 1st operand of add");
-        Mch->addIntrinsicInst(II);
-
-        Mch->addArgument(II->getOperand(1));     // bias
-
-        // Read quantization levels for input
-        float b_min, b_max;
-        Mch->getNextQuantizationLevel(b_min);
-        Mch->getNextQuantizationLevel(b_max);
-        errs() << "b_min: " << b_min << "\n";
-        errs() << "b_max: " << b_max << "\n";
-
-        Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) b_min);
-        Mch->addArgument(BminC);
-        Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) b_max);
-        Mch->addArgument(BmaxC);
-
-        Mch->setCurrent(new FullyConnectedLayer_2());
-        }
-        break;
-      default:
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else {
-    Mch->setCurrent(new NoPattern());
-  }
-  delete this;
-}
-
-void FullyConnectedLayer_2::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_tanh:
-        {
-        // Type of activation : TanH
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_relu:
-        {
-        // Type of activation : ReLU
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_clipped_relu:
-        {
-        // Type of activation : Clipped ReLU
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new FullyConnectedLayer_3());
-        }
-        break;
-      default: // No activation, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else { // End of instruction stream
-    // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-
-    // Read quantization levels for output
-    float out_min, out_max;
-    Mch->getNextQuantizationLevel(out_min);
-    Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-    Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_min);
-    Mch->addArgument(OutminC);
-    Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_max);
-    Mch->addArgument(OutmaxC);
-
-    Mch->setCurrent(new FullyConnectedLayer());
-  }
-  delete this;
-}
-
-void FullyConnectedLayer_3::transition(CodeGenStateMachine *Mch,
-                                       IntrinsicInst *II) {
-  if (!II) { // End of instruction stream
-    Mch->setCurrent(new FullyConnectedLayer());
-  } else {
-    Mch->setCurrent(new NoPattern());
-  }
-  delete this;
-}
-
-void FullyConnectedLayer::transition(CodeGenStateMachine *Mch,
-                                     IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    Mch->setCurrent(new NoPattern());
-    delete this;
-  }
-}
-
-void ConvolutionLayer_1::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_add:
-        {
-        IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
-        assert((ConvII == II->getOperand(0)) &&
-               "Output of conv must be used as 1st operand of add");
-        Mch->addIntrinsicInst(II);
-
-        Mch->addArgument(II->getOperand(1));     // bias
-        // Read quantization levels for bias
-        float b_min, b_max;
-        Mch->getNextQuantizationLevel(b_min);
-        Mch->getNextQuantizationLevel(b_max);
-        errs() << "b_min: " << b_min << "\n";
-        errs() << "b_max: " << b_max << "\n";
-
-        Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) b_min);
-        Mch->addArgument(BminC);
-        Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) b_max);
-        Mch->addArgument(BmaxC);
-
-        Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
-
-        Mch->setCurrent(new ConvolutionLayer_2());
-        }
-        break;
-      default:
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else {
-    // No addition
-    Mch->addArgument(ConstantPointerNull::get(
-                     Type::getInt8PtrTy(Mch->getModule()->getContext())));
-    // Still need to add the quantization constants - and remove them from file
-    float b_min, b_max;
-    Mch->getNextQuantizationLevel(b_min);
-    Mch->getNextQuantizationLevel(b_max);
-        errs() << "b_min: " << b_min << "\n";
-        errs() << "b_max: " << b_max << "\n";
-    Constant *BminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                      (double) b_min);
-    Mch->addArgument(BminC);
-    Constant *BmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                      (double) b_max);
-    Mch->addArgument(BmaxC);
-
-    // Zero for all convolution numeric arguments FIXME???
-        IntrinsicInst *ConvII = Mch->getIntrinsicInstAt(0);
-        Mch->addArgument(ConvII->getOperand(2)); // 1st numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(3)); // 2nd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(4)); // 3rd numeric arg of conv
-        Mch->addArgument(ConvII->getOperand(5)); // 4th numeric arg of conv
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-//    Mch->addArgument(ConstantInt::get(
-//                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-
-    // No pooling
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // 0 for unused pool argument
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-
-    // Read quantization levels for output
-    float out_min, out_max;
-    Mch->getNextQuantizationLevel(out_min);
-    Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-    Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_min);
-    Mch->addArgument(OutminC);
-    Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_max);
-    Mch->addArgument(OutmaxC);
-
-    Mch->setCurrent(new ConvolutionLayer());
-  }
-  delete this;
-}
-
-void ConvolutionLayer_2::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_tanh:
-        {
-        // Type of activation : TanH
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_relu:
-        {
-        // Type of activation : ReLU
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_clipped_relu:
-        {
-        // Type of activation : Clipped ReLU
-//        Mch->addArgument(ConstantInt::get(
-//                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        Mch->addIntrinsicInst(II);
-
-        Mch->setCurrent(new ConvolutionLayer_3());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_max:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_min:
-        {
-        // pool min FIXME: 2: supported?
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_mean:
-        {
-        // pool mean
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        // No activation
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-        Mch->addIntrinsicInst(II);
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      default: // No activation, No pooling, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else { // End of instruction stream
-    // No pooling
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // 0 for unused pool argument
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // No activation
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), -1));
-
-    // Read quantization levels for output
-    float out_min, out_max;
-    Mch->getNextQuantizationLevel(out_min);
-    Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-    Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_min);
-    Mch->addArgument(OutminC);
-    Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                        (double) out_max);
-    Mch->addArgument(OutmaxC);
-
-    Mch->setCurrent(new ConvolutionLayer());
-  }
-  delete this;
-}
-
-void ConvolutionLayer_3::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    switch (II->getIntrinsicID()) {
-      case Intrinsic::visc_tensor_pool_max:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::visc_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::visc_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_min:
-        {
-        // pool min FIXME: 2: supported?
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::visc_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::visc_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      case Intrinsic::visc_tensor_pool_mean:
-        {
-        // pool max
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        // poolSize
-        Mch->addArgument(II->getOperand(1));
-        Mch->addIntrinsicInst(II);
-
-        // Revisit last intrinsic, to add argument for activation operation
-        IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-        // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-        Intrinsic::ID ActIID = ActII->getIntrinsicID();
-        if (ActIID == Intrinsic::visc_tensor_tanh) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-        } else if (ActIID == Intrinsic::visc_tensor_relu) {
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-        } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-          Mch->addArgument(ConstantInt::get(
-                           Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-        }
-
-        // Read quantization levels for output
-        float out_min, out_max;
-        Mch->getNextQuantizationLevel(out_min);
-        Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-        Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_min);
-        Mch->addArgument(OutminC);
-        Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                            (double) out_max);
-        Mch->addArgument(OutmaxC);
-
-        Mch->setCurrent(new ConvolutionLayer_4());
-        }
-        break;
-      default: // No pooling, but HPVM intrinsic
-        Mch->setCurrent(new NoPattern());
-        break;
-    }
-  } else { // End of instruction stream
-    // No pooling
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-    // 0 for unused pool argument
-    Mch->addArgument(ConstantInt::get(
-                     Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-
-    // Revisit last intrinsic, to add argument for activation operation
-    IntrinsicInst *ActII = Mch->getIntrinsicInstAt(2);
-    // Due to previous switch, we know it is a TanH, ReLU, or Clipped ReLU
-    Intrinsic::ID ActIID = ActII->getIntrinsicID();
-    if (ActIID == Intrinsic::visc_tensor_tanh) {
-      Mch->addArgument(ConstantInt::get(
-                       Type::getInt32Ty(Mch->getModule()->getContext()), 0));
-      } else if (ActIID == Intrinsic::visc_tensor_relu) {
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 1));
-      } else { //ActIID == Intrinsic::visc_tensor_clipped_relu
-        Mch->addArgument(ConstantInt::get(
-                         Type::getInt32Ty(Mch->getModule()->getContext()), 2));
-      }
-
-      // Read quantization levels for output
-      float out_min, out_max;
-      Mch->getNextQuantizationLevel(out_min);
-      Mch->getNextQuantizationLevel(out_max);
-        errs() << "out_min: " << out_min << "\n";
-        errs() << "out_max: " << out_max << "\n";
-
-      Constant *OutminC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) out_min);
-      Mch->addArgument(OutminC);
-      Constant *OutmaxC = ConstantFP::get(Type::getFloatTy(Mch->getModule()->getContext()),
-                          (double) out_max);
-      Mch->addArgument(OutmaxC);
-
-     Mch->setCurrent(new ConvolutionLayer());
-  }
-  delete this;
-}
-
-void ConvolutionLayer_4::transition(CodeGenStateMachine *Mch,
-                                    IntrinsicInst *II) {
-  if (!II) { // End of instruction stream
-    Mch->setCurrent(new ConvolutionLayer());
-  } else {
-    Mch->setCurrent(new NoPattern());
-  }
-  delete this;
-}
-
-void ConvolutionLayer::transition(CodeGenStateMachine *Mch,
-                                  IntrinsicInst *II) {
-  if (II) { // Not end of instruction stream
-    Mch->setCurrent(new NoPattern());
-    delete this;
-  }
-}
-
-void NoPattern::transition(CodeGenStateMachine *Mch, IntrinsicInst *II) {}
-
-CodeGenStateMachine::CodeGenStateMachine(Module *_M, Module *_RtM, std::ifstream &_qin) :
-  M(_M), RtM(_RtM), qin(_qin) {
-  current = new InitialState();
-}
-
-void CodeGenStateMachine::transition(IntrinsicInst *II) {
-  current->transition(this, II);
-}
-
-void CodeGenStateMachine::codeGen() {
-
-  if ((current->getStateID() != AbstractState::ID::FULLY_CONNECTED_LAYER) &&
-      (current->getStateID() != AbstractState::ID::CONVOLUTION_LAYER)) {
-    // Not a valid instruction sequence.
-    assert(false && "Unsupported instruction sequence by PROMISE simulator\n");
-  }
-
-  // We have a valid instruction sequence.
-  // Make sure that the instruction sequence can be traslated:
-  // each instruction's result must be used only by the next one in sequence.
-  for (unsigned p = 0; p < IIs.size()-1; p++) {
-    IntrinsicInst *II = IIs[p];
-    assert((II->hasOneUse()) &&
-          "Instruction sequence does not fit expected pattern: not single use\n");
-
-    Value::user_iterator ui = II->user_begin(); // The only use
-    assert((*ui == IIs[p+1]) &&
-           "Instruction sequence does not fit expected pattern: not used by next instruction\n");
-  }
-
-  // Create corresponding PROMISE simulator call
-  CallInst *CI;
-  switch (current->getStateID()) {
-    case AbstractState::ID::CONVOLUTION_LAYER:
-      {
-        Constant* ConvLayer_PROMISE =
-          M->getOrInsertFunction(StringRef("ConvLayer_PROMISE"),
-                 RtM->getFunction(StringRef("ConvLayer_PROMISE"))->getFunctionType());
-        DEBUG(errs() << *ConvLayer_PROMISE);
-
-        // FIXME: get last argument from some intrinsic. For now, 7
-        Args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 7));
-        // Create PROMISE simulator function call
-        CI = CallInst::Create(ConvLayer_PROMISE, Args, "");
-      }
-      break;
-    case AbstractState::ID::FULLY_CONNECTED_LAYER:
-      {
-        Constant* FCLayer_PROMISE =
-          M->getOrInsertFunction(StringRef("FCLayer_PROMISE"),
-              RtM->getFunction(StringRef("FCLayer_PROMISE"))->getFunctionType());
-        DEBUG(errs() << *FCLayer_PROMISE);
-
-        // FIXME: get last argument from some intrinsic. For now, 7
-        Args.push_back(ConstantInt::get(Type::getInt32Ty(M->getContext()), 7));
-        // Create PROMISE simulator function call
-        CI = CallInst::Create(FCLayer_PROMISE, Args, "");
-      }
-      break;
-    default:
-      llvm_unreachable("Unexpected CodeGenStateMachine State\n");
-      break;
-  }
-
-  // Insert new call and replace all uses of pattern result with
-  // the PROMISE simulator call
-  IntrinsicInst *IIlast = *(IIs.rbegin());
-  CI->insertBefore(IIlast);
-  IIlast->replaceAllUsesWith(CI);
-
-  // Remove the instructions we translated to the simulator call.
-  // Traverse the vector backwards, otherwise definitions are deleted while
-  // their subsequent uses are still around.
-  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IIs.rbegin(),
-       re = IIs.rend(); ri != re; ++ri) {
-    DEBUG(errs() << "Erasing: " << **ri << "\n");
-    (*ri)->eraseFromParent();
-  }
-errs() << "****** GenF:\n" << *(CI->getParent()->getParent());
-
-}
-
-// DFG2LLVM_PROMISE - The first implementation.
-
-struct DFG2LLVM_PROMISE : public DFG2LLVM {
-  static char ID; // Pass identification, replacement for typeid
-  DFG2LLVM_PROMISE() : DFG2LLVM(ID) {}
-private:
-
-public:
-
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<BuildDFG>();
-    AU.addPreserved<BuildDFG>();
-  }
-
-  bool runOnModule(Module &M);
-};
-
-// Visitor for Code generation traversal (tree traversal for now)
-class CGT_PROMISE : public CodeGenTraversal {
-
-private:
-  //Member variables
-  std::ifstream qin;
-
-  // VISC Runtime API and Tensor runtime API
-  Constant* llvm_hpvm_initTensorRt;
-  Constant* llvm_hpvm_cleanupTensorRt;
-  Constant* hpvm_request_tensor;
-
-  // Functions
-
-  // Virtual Functions
-  void init();
-  void initRuntimeAPI();
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
-
-public:
-
-  // Constructor
-  CGT_PROMISE(Module &_M, BuildDFG &_DFG, std::string &_str) : CodeGenTraversal(_M, _DFG) {
-    qin.open(_str.c_str());
-    assert(qin && "Failed to open quantization levels input file\n"); 
-    initRuntimeAPI();
-  }
-
-  ~CGT_PROMISE() {
-    qin.close();
-  }
-
-};
-
-void CGT_PROMISE::init() {
-  // FIXME: what to do here? If anything?
-}
-
-// Initialize the VISC runtime API. This makes it easier to insert these calls
-void CGT_PROMISE::initRuntimeAPI() {
-
-  // Load Runtime API Module
-  SMDiagnostic Err;
-
-  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
-  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!\n");
-
-  // FIXME: set correct path
-  Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/projects/hpvm-tensor-rt/lib/tensor_runtime.ll";
-  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
-  if(runtimeModule == nullptr)
-    DEBUG(errs() << Err.getMessage());
-  else
-    DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
-
-  // Get or insert Global declarations for
-  // - initialization
-  // - cleanup
-  // - request a tensor
-  DECLARE(llvm_hpvm_initTensorRt);
-  DECLARE(llvm_hpvm_cleanupTensorRt);
-  DECLARE(hpvm_request_tensor);
-
-  // Find visc.init and visc.cleanup calls, and add placeholder methods
-  // for initialization and cleanup of the hpvm tensor runtime
-
-  Function* VI = M.getFunction("llvm.visc.init");
-  assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n");
-  InitCall = cast<Instruction>(*VI->user_begin());
-  CallInst::Create(llvm_hpvm_initTensorRt,
-                   ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)),
-                   "", InitCall);
-
-  Function* VC = M.getFunction("llvm.visc.cleanup");
-  assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n");
-  CleanupCall = cast<Instruction>(*VC->user_begin());
-  CallInst::Create(llvm_hpvm_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall);
-
-}
-
-void CGT_PROMISE::codeGen(DFInternalNode* N) {
-  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  errs () << "Skipping internal node\n";
-}
-
-void CGT_PROMISE::codeGen(DFLeafNode* N) {
-
-  // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
-    DEBUG(errs() << "Skipping dummy node\n");
-    return;
-  }
-
-  // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
-    assert(false && "Allocation Node not expected in ApproxHPVM");
-    return;
-  }
-
-  // Generate code only if it has the right hint
-  if (!checkPreferredTarget(N, visc::PROMISE_TARGET)) {
-    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
-    return;
-  }
-
-  // Get the function associated with the dataflow node
-  Function *F = N->getFuncPointer();
-errs() << "Node Function: " << *F << "\n";
-  // Look up if we have visited this function before. If we have, then just
-  // get the cloned function pointer from DFNode. Otherwise, create the cloned
-  // function and add it to the DFNode GenFunc.
-  Function *F_promise = N->getGenFuncForTarget(visc::PROMISE_TARGET);
-
-  assert((F_promise == NULL) &&
-         "Error: Visiting a node for which code already generated");
-
-  // Clone the function
-  ValueToValueMapTy VMap;
-  std::string FName(F->getName().data());//Twine FName = F->getName();
-  F_promise = CloneFunction(F, VMap);
-  F_promise->setName(FName+"_promise");
-  F_promise->removeFromParent();
-  M.getFunctionList().push_back(F_promise);
-
-  N->addGenFunc(F_promise, visc::PROMISE_TARGET, true);
-
-  /* Removing HPVM in/out/inout function attributes */
-  for(Function::arg_iterator ai = F_promise->arg_begin(), ae = F_promise->arg_end();
-      ai != ae; ai++){
-    Argument *Arg = &*ai;
-    if(Arg->hasAttribute(Attribute::In))
-      Arg->removeAttr(Attribute::In);
-    if(Arg->hasAttribute(Attribute::Out))
-      Arg->removeAttr(Attribute::Out);
-    if(Arg->hasAttribute(Attribute::InOut))
-      Arg->removeAttr(Attribute::InOut);    
-  }
-
-  // Adding nounwind to generated function : FIXME: needed?
-  DEBUG(errs() << "Adding nounwind to generated function\n");
-  F_promise->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
-
-  // Add llvm_visc_requestTensor calls for every pointer argument of the function
-  // (they are all expected to be tensors), at the beginning of the function.
-  // This is the first instruction of the function, insert them before this
-  Instruction* FI = &*(F_promise->getEntryBlock().begin());
-
-  // FIXME: verify that we want 0 as a target device
-  // In this backend, the target device is CPU, represented by i32 0.
-  ConstantInt *TargetDeviceID =
-    ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
-
-  for (Function::arg_iterator ai = F_promise->arg_begin(),
-       ae = F_promise->arg_end(); ai != ae; ++ai) {
-    Argument* Arg = &*ai;
-    if (Arg->getType()->isPointerTy()) {
-      Value *Args[] = {Arg, TargetDeviceID};
-      CallInst::Create(hpvm_request_tensor,
-                       ArrayRef<Value*>(Args, 2),
-                       "", FI);
-    }
-  }
-
-  CodeGenStateMachine CGM(&M, runtimeModule.get(), qin);
-
-  /* An assumption is made for the PROMISE simulator:                         *
-   * a leaf node will contain consequtive operations that will map to a       *
-   * single PROMISE simulator call                                            */
-
-  for (inst_iterator i = inst_begin(F_promise), e = inst_end(F_promise);
-       i != e; ++i) {
-    Instruction *I = &(*i);
-    CGM.transition(dyn_cast<IntrinsicInst>(I));
-  }
-
-  CGM.codeGen();
-
-//errs() << "-----------------------------------\n";
-//errs() << *F_promise << "\n";
-
-  return;
-}
-
-bool DFG2LLVM_PROMISE::runOnModule(Module &M) {
-  errs() << "\nDFG2LLVM_PROMISE PASS\n";
-
-  errs() << QuantizationInputsFilename << "\n";
-  
-//  std::ifstream qin(quantizationInputsFilename_cstr);
-//  std::ifstream qin;
-//  qin.open(QuantizationInputsFilename.c_str());
-//  qin.open(QuantizationInputsFilename.c_str(), std::ifstream::in);
-
-  // Get the BuildDFG Analysis Results:
-  // - Dataflow graph
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
- 
-  // Visitor for Code Generation Graph Traversal
-  CGT_PROMISE *CGTVisitor = new CGT_PROMISE(M, DFG, QuantizationInputsFilename);
-
-  // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
-    // Initiate code generation for root DFNode
-    CGTVisitor->visit(rootNode);
-  }
-
-  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
-  delete CGTVisitor;
-
-  return true;
-}
-
-
-/******************************************************************************
- *                              Helper functions                              *
- ******************************************************************************/
-
-} // End of namespace
-
-char DFG2LLVM_PROMISE::ID = 0;
-static RegisterPass<DFG2LLVM_PROMISE> X("dfg2llvm-promise",
-                                      "Dataflow Graph to LLVM for PROMISE Pass",
-                                      false /* does not modify the CFG */,
-                                      true /* transformation,   *
-                                            * not just analysis */);
-
diff --git a/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports b/lib/DFG2LLVM_PROMISE/DFG2LLVM_PROMISE.exports
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/lib/DFG2LLVM_PROMISE/LLVMBuild.txt b/lib/DFG2LLVM_PROMISE/LLVMBuild.txt
deleted file mode 100644
index 714ad14f18c136998d25cda12e44e28ad191028e..0000000000000000000000000000000000000000
--- a/lib/DFG2LLVM_PROMISE/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = DFG2LLVM_PROMISE
-parent = Transforms
diff --git a/lib/DFG2LLVM_SPIR/CMakeLists.txt b/lib/DFG2LLVM_SPIR/CMakeLists.txt
deleted file mode 100644
index 43e2254c7930d8f36142338412ec0a1b87f789ab..0000000000000000000000000000000000000000
--- a/lib/DFG2LLVM_SPIR/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( LLVMDFG2LLVM_SPIR
-  DFG2LLVM_SPIR.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp b/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
deleted file mode 100644
index 48b1492047d5d98a9018f9cff2d94f7450cb490b..0000000000000000000000000000000000000000
--- a/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.cpp
+++ /dev/null
@@ -1,2010 +0,0 @@
-//=== DFG2LLVM_SPIR.cpp ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define ENABLE_ASSERTS
-#define TARGET_PTX 32
-#define GENERIC_ADDRSPACE 0
-#define GLOBAL_ADDRSPACE 1
-#define SHARED_ADDRSPACE 3
-
-#define DEBUG_TYPE "DFG2LLVM_SPIR"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker/Linker.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Bitcode/BitcodeReader.h"
-#include "llvm/Bitcode/BitcodeWriter.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/SupportVISC/VISCHint.h"
-#include "llvm/SupportVISC/VISCTimer.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm-c/Core.h"
-
-#include "llvm/SupportVISC/VISCUtils.h"
-#include "llvm/IR/IRPrintingPasses.h"
-#include "llvm/IR/LegacyPassManager.h"
-#include "llvm/Support/ToolOutputFile.h"
-#include "llvm/IR/UseListOrder.h"
-
-#include <sstream>
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-using namespace viscUtils;
-
-// VISC Command line option to use timer or not
-static cl::opt<bool>
-VISCTimer_SPIR("visc-timers-spir", cl::desc("Enable visc timers"));
-
-namespace {
-// Helper class declarations
-
-// Class to maintain the tuple of host pointer, device pointer and size
-// in bytes. Would have preferred to use tuple but support not yet available
-class OutputPtr {
-public:
-  OutputPtr(Value* _h_ptr, Value* _d_ptr, Value* _bytes)
-    : h_ptr(_h_ptr), d_ptr(_d_ptr), bytes(_bytes) {}
-
-  Value* h_ptr;
-  Value* d_ptr;
-  Value* bytes;
-};
-
-// Class to maintain important kernel info required for generating runtime
-// calls
-class Kernel {
-public:
-  Kernel(Function* _KF, DFLeafNode* _KLeafNode, std::map<unsigned, unsigned> _inArgMap =
-         std::map<unsigned, unsigned>(),
-         std::map<unsigned, std::pair<Value*, unsigned> > _sharedInArgMap =
-         std::map<unsigned, std::pair<Value*, unsigned> >(),
-         std::vector<unsigned> _outArgMap = std::vector<unsigned>(),
-         unsigned _gridDim = 0, std::vector<Value*> _globalWGSize = std::vector<Value*>(),
-         unsigned _blockDim = 0, std::vector<Value*> _localWGSize = std::vector<Value*>())
-    : KernelFunction(_KF), KernelLeafNode(_KLeafNode), inArgMap(_inArgMap),
-      sharedInArgMap(_sharedInArgMap), outArgMap(_outArgMap), gridDim(_gridDim),
-      globalWGSize(_globalWGSize), blockDim(_blockDim), localWGSize(_localWGSize) {
-
-    assert(gridDim == globalWGSize.size()
-           && "gridDim should be same as the size of vector globalWGSize");
-    assert(blockDim == localWGSize.size()
-           && "blockDim should be same as the size of vector localWGSize");
-  }
-
-  Function* KernelFunction;
-  DFLeafNode* KernelLeafNode;
-  std::map<unsigned, unsigned> inArgMap;
-  // Map for shared memory arguments
-  std::map<unsigned, std::pair<Value*, unsigned> > sharedInArgMap;
-  // Fields for (potential) allocation node
-  DFLeafNode* AllocationNode;
-  Function* AllocationFunction;
-  std::map<unsigned, unsigned> allocInArgMap;  
-
-  std::vector<unsigned> outArgMap;
-  unsigned gridDim;
-  std::vector<Value*> globalWGSize;
-  unsigned blockDim;
-  std::vector<Value*> localWGSize;
-  std::vector<int> localDimMap;
-
-  std::map<unsigned, unsigned> getInArgMap() {
-    return inArgMap;
-  }
-  void setInArgMap(std::map<unsigned, unsigned> map) {
-    inArgMap = map;
-  }
-
-  std::map<unsigned, std::pair<Value*, unsigned> > getSharedInArgMap() {
-    return sharedInArgMap;
-  }
-  void setSharedInArgMap(std::map<unsigned, std::pair<Value*, unsigned> > map) {
-    sharedInArgMap = map;
-  }
-
-  std::vector<unsigned> getOutArgMap() {
-    return outArgMap;
-  }
-  void setOutArgMap(std::vector<unsigned> map) {
-    outArgMap = map;
-  }
-
-  void setLocalWGSize(std::vector<Value*> V) {
-    localWGSize = V;
-  }
-
-  bool hasLocalWG() {
-    return blockDim != 0;
-  }
-};
-
-// Helper function declarations
-static void getExecuteNodeParams(Module &M, Value* &, Value* &, Value* &, Kernel*,
-                                 ValueToValueMapTy&, Instruction*);
-static Value* genWorkGroupPtr(Module &M, std::vector<Value*>, ValueToValueMapTy&,
-                              Instruction*, const Twine& WGName = "WGSize");
-static std::string getSPIRFilename(const Module&);
-static std::string getFilenameFromModule(const Module& M);
-static void changeDataLayout(Module &);
-static void changeTargetTriple(Module &);
-static std::string printType(Type*);
-static StringRef getMangledName(std::string);
-static StringRef getAtomicMangledName(std::string, unsigned, bool);
-static void findReturnInst(Function *, std::vector<ReturnInst *> &);
-static void findIntrinsicInst(Function *, Intrinsic::ID, std::vector<IntrinsicInst *> &);
-static StringRef getAtomicOpName(Intrinsic::ID, unsigned);
-static std::string getMathFunctionName(Intrinsic::ID);
-
-// DFG2LLVM_SPIR - The first implementation.
-struct DFG2LLVM_SPIR : public DFG2LLVM {
-  static char ID; // Pass identification, replacement for typeid
-  DFG2LLVM_SPIR() : DFG2LLVM(ID) {}
-
-private:
-
-public:
-  bool runOnModule(Module &M);
-};
-
-// Visitor for Code generation traversal (tree traversal for now)
-class CGT_SPIR : public CodeGenTraversal {
-
-private:
-  //Member variables
-  std::unique_ptr<Module> KernelM;
-  DFNode* KernelLaunchNode = nullptr;
-  Kernel* kernel;
-
-  // VISC Runtime API
-  Constant* llvm_visc_ocl_launch;
-  Constant* llvm_visc_ocl_wait;
-  Constant* llvm_visc_ocl_initContext;
-  Constant* llvm_visc_ocl_clearContext;
-  Constant* llvm_visc_ocl_argument_shared;
-  Constant* llvm_visc_ocl_argument_scalar;
-  Constant* llvm_visc_ocl_argument_ptr;
-  Constant* llvm_visc_ocl_output_ptr;
-  Constant* llvm_visc_ocl_free;
-  Constant* llvm_visc_ocl_getOutput;
-  Constant* llvm_visc_ocl_executeNode;
-
-  //Functions
-  std::string getKernelsModuleName(Module &M);
-  void fixValueAddrspace(Value* V, unsigned addrspace);
-  Function* changeArgAddrspace(Function* F, std::vector<unsigned> &Args, unsigned i);
-  void removeAttributeAtArguments(Function* F, std::vector<unsigned> &Ags, Attribute::AttrKind attrKind);
-  void addCLMetadata(Function* F);
-  Function* transformFunctionToVoid(Function* F);
-  void removeInOutAttributes(Function* F);
-  void insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& FileName);
-
-  // Virtual Functions
-  void init() {
-    VISCTimer = VISCTimer_SPIR;
-    TargetName = "SPIR";
-  }
-  void initRuntimeAPI();
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
-
-public:
-
-  // Constructor
-  CGT_SPIR(Module &_M, BuildDFG &_DFG) : CodeGenTraversal(_M, _DFG), KernelM(CloneModule(&_M)) {
-    KernelLaunchNode = NULL;
-    init();
-    initRuntimeAPI();
-    errs() << "Old module pointer: " << &_M << "\n";
-    errs() << "New module pointer: " <<  KernelM.get() << "\n";
-    // Copying instead of creating new, in order to preserve required info (metadata)
-    // Remove functions, global variables and aliases
-    std::vector<GlobalVariable*> gvv = std::vector<GlobalVariable*>();
-    for (Module::global_iterator mi = KernelM->global_begin(),
-         me = KernelM->global_end(); (mi != me); ++mi) {
-      GlobalVariable* gv = &*mi;
-      gvv.push_back(gv);
-    }
-    for (std::vector<GlobalVariable*>::iterator vi = gvv.begin(); vi != gvv.end(); ++vi) {
-      (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType()));
-      (*vi)->eraseFromParent();
-    }
-
-    std::vector<Function*> fv = std::vector<Function*>();
-    for (Module::iterator mi = KernelM->begin(),
-         me = KernelM->end(); (mi != me); ++mi) {
-      Function* f = &*mi;
-      fv.push_back(f);
-    }
-    for (std::vector<Function*>::iterator vi = fv.begin(); vi != fv.end(); ++vi) {
-      (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType()));
-      (*vi)->eraseFromParent();
-    }
-
-    std::vector<GlobalAlias*> av = std::vector<GlobalAlias*>();
-    for (Module::alias_iterator mi = KernelM->alias_begin(),
-         me = KernelM->alias_end(); (mi != me); ++mi) {
-      GlobalAlias* a = &*mi;
-      av.push_back(a);
-    }
-    for (std::vector<GlobalAlias*>::iterator vi = av.begin(); vi != av.end(); ++vi) {
-      (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType()));
-      (*vi)->eraseFromParent();
-    }
-
-    changeDataLayout(*KernelM);
-    changeTargetTriple(*KernelM);
-
-    DEBUG(errs() << *KernelM);
-
-  }
-
-  void removeLLVMIntrinsics();
-  void writeKernelsModule();
-};
-
-// Initialize the VISC runtime API. This makes it easier to insert these calls
-void CGT_SPIR::initRuntimeAPI() {
-
-  // Load Runtime API Module
-  SMDiagnostic Err;
-
-  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
-  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!");
-
-  Twine llvmSrcRoot = LLVM_SRC_ROOT;
-  Twine runtimeAPI = llvmSrcRoot+"/../build/projects/visc-rt/visc-rt.ll";
-  errs() << "Open file: " << runtimeAPI.str() << "\n";
-  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
-  if(runtimeModule == NULL)
-    DEBUG(errs() << Err.getMessage());
-  else
-    errs() << "Successfully loaded visc-rt API module\n";
-
-  // Get or insert the global declarations for launch/wait functions
-  DECLARE(llvm_visc_ocl_launch);
-  DECLARE(llvm_visc_ocl_wait);
-  DECLARE(llvm_visc_ocl_initContext);
-  DECLARE(llvm_visc_ocl_clearContext);
-  DECLARE(llvm_visc_ocl_argument_shared);
-  DECLARE(llvm_visc_ocl_argument_scalar);
-  DECLARE(llvm_visc_ocl_argument_ptr);
-  DECLARE(llvm_visc_ocl_output_ptr);
-  DECLARE(llvm_visc_ocl_free);
-  DECLARE(llvm_visc_ocl_getOutput);
-  DECLARE(llvm_visc_ocl_executeNode);
-
-  // Get or insert timerAPI functions as well if you plan to use timers
-  initTimerAPI();
-
-  // Insert init context in main
-  DEBUG(errs() << "Gen Code to initialize SPIR Timer\n");
-  Function* VI = M.getFunction("llvm.visc.init");
-  assert(VI->getNumUses() == 1 && "__visc__init should only be used once");
-
-  InitCall = cast<Instruction>(*VI->user_begin());
-  initializeTimerSet(InitCall);
-  switchToTimer(visc_TimerID_INIT_CTX, InitCall);
-  CallInst::Create(llvm_visc_ocl_initContext,
-                  ArrayRef<Value*>(getTargetID(M, visc::SPIR_TARGET)),
-                  "", InitCall);
-  switchToTimer(visc_TimerID_NONE, InitCall);
-
-  // Insert print instruction at visc exit
-  DEBUG(errs() << "Gen Code to print SPIR Timer\n");
-  Function* VC = M.getFunction("llvm.visc.cleanup");
-  DEBUG(errs() << *VC << "\n");
-  assert(VC->getNumUses() == 1 && "__visc__clear should only be used once");
-
-  CleanupCall = cast<Instruction>(*VC->user_begin());
-  printTimerSet(CleanupCall);
-
-
-}
-
-// Generate Code to call the kernel
-// The plan is to replace the internal node with a leaf node. This method is
-// used to generate a function to associate with this leaf node. The function
-// is responsible for all the memory allocation/transfer and invoking the
-// kernel call on the device
-void CGT_SPIR::insertRuntimeCalls(DFInternalNode* N, Kernel* K, const Twine& FileName) {
-  // Check if clone already exists. If it does, it means we have visited this
-  // function before.
-//  assert(N->getGenFunc() == NULL && "Code already generated for this node");
-
-  assert(N->getGenFuncForTarget(visc::SPIR_TARGET) == NULL &&
-         "Code already generated for this node");
-
-  // Useful values
-  Value* True = ConstantInt::get(Type::getInt1Ty(M.getContext()), 1);
-  Value* False = ConstantInt::get(Type::getInt1Ty(M.getContext()), 0);
-
-  // If kernel struct has not been initialized with kernel function, then fail
-  assert(K != NULL && "No kernel found!!");
-
-  DEBUG(errs() << "Generating kernel call code\n");
-
-  Function* F = N->getFuncPointer();
-
-
-  // Create of clone of F with no instructions. Only the type is the same as F
-  // without the extra arguments.
-  Function* F_X86;
-
-  // Clone the function, if we are seeing this function for the first time. We
-  // only need a clone in terms of type.
-  ValueToValueMapTy VMap;
-
-  // Create new function with the same type
-  F_X86 = Function::Create(F->getFunctionType(), F->getLinkage(), F->getName(), &M);
-
-  // Loop over the arguments, copying the names of arguments over.
-  Function::arg_iterator dest_iterator = F_X86->arg_begin();
-  for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
-       i != e; ++i) {
-    dest_iterator->setName(i->getName()); // Copy the name over...
-    // Increment dest iterator
-    ++dest_iterator;
-  }
-
-  // Add a basic block to this empty function
-  BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F_X86);
-  ReturnInst* RI = ReturnInst::Create(M.getContext(),
-                                      UndefValue::get(F_X86->getReturnType()), BB);
-
-  // FIXME: Adding Index and Dim arguments are probably not required except
-  // for consistency purpose (DFG2LLVM_X86 does assume that all leaf nodes do
-  // have those arguments)
-
-  // Add Index and Dim arguments except for the root node
-  if(!N->isRoot() && !N->getParent()->isChildGraphStreaming())
-    F_X86 = addIdxDimArgs(F_X86);
-
-  BB = &*F_X86->begin();
-  RI = cast<ReturnInst>(BB->getTerminator());
-
-  //Add the generated function info to DFNode
-//  N->setGenFunc(F_X86, visc::CPU_TARGET);
-  N->addGenFunc(F_X86, visc::SPIR_TARGET, true);
-
-  // Loop over the arguments, to create the VMap
-  dest_iterator = F_X86->arg_begin();
-  for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end();
-       i != e; ++i) {
-    // Add mapping to VMap and increment dest iterator
-    VMap[&*i] = &*dest_iterator;
-    ++dest_iterator;
-  }
-
-  /* TODO: Use this code to verufy if this is a good pattern for OCL kernel
-
-  // Sort children in topological order before code generation for kernel call
-  N->getChildGraph()->sortChildren();
-
-  // The DFNode N has the property that it has only one child (leaving Entry
-  // and Exit dummy nodes). This child is the OCL kernel. This simplifies code
-  // generation for kernel calls significantly. All the inputs to this child
-  // node would either be constants or from the parent node N.
-
-  assert(N->getChildGraph()->size() == 3
-         && "Node expected to have just one non-dummy node!");
-
-  DFNode* C;
-  for(DFGraph::children_iterator ci = N->getChildGraph()->begin(),
-      ce = N->getChildGraph()->end(); ci != ce; ++ci) {
-    C = *ci;
-    // Skip dummy node call
-    if (!C->isDummyNode())
-      break;
-  }
-
-  assert(C->isDummyNode() == false && "Internal Node only contains dummy nodes!");
-
-  Function* CF = C->getFuncPointer();
-  */
-  Function* KF = K->KernelLeafNode->getFuncPointer();
-  // Initialize context
-  //DEBUG(errs() << "Initializing context" << "\n");
-  //CallInst::Create(llvm_visc_ocl_initContext, None, "", RI);
-
-  DEBUG(errs() << "Initializing commandQ" << "\n");
-  // Initialize command queue
-  switchToTimer(visc_TimerID_SETUP, InitCall);
-  Value* fileStr = getStringPointer(FileName, InitCall, "Filename");
-  DEBUG(errs() << "Kernel Filename constant: " << *fileStr << "\n");
-  DEBUG(errs() << "Generating code for kernel - " << K->KernelFunction->getName()<< "\n");
-  Value* kernelStr = getStringPointer(K->KernelFunction->getName(), InitCall,"KernelName");
-
-  Value* LaunchInstArgs[] = {fileStr, kernelStr};
-
-  DEBUG(errs() << "Inserting launch call" << "\n");
-  CallInst* SPIR_Ctx = CallInst::Create(llvm_visc_ocl_launch,
-                                         ArrayRef<Value*>(LaunchInstArgs, 2),
-                                         "graph"+KF->getName(),
-                                         InitCall);
-  DEBUG(errs() << *SPIR_Ctx << "\n");
-  GraphIDAddr = new GlobalVariable(M,
-                                   SPIR_Ctx->getType(),
-                                   false,
-                                   GlobalValue::CommonLinkage,
-                                   Constant::getNullValue(SPIR_Ctx->getType()),
-                                   "graph"+KF->getName()+".addr");
-  DEBUG(errs() << "Store at: " << *GraphIDAddr << "\n");
-  StoreInst* SI = new StoreInst(SPIR_Ctx, GraphIDAddr, InitCall);
-  DEBUG(errs() << *SI << "\n");
-  switchToTimer(visc_TimerID_NONE, InitCall);
-  switchToTimer(visc_TimerID_SETUP, RI);
-  Value* GraphID = new LoadInst(GraphIDAddr, "graph."+KF->getName(), RI);
-
-  // Iterate over the required input edges of the node and use the visc-rt API
-  // to set inputs
-  DEBUG(errs() << "Iterate over input edges of node and insert visc api\n");
-  std::vector<OutputPtr> OutputPointers;
-  // Vector to hold the device memory object that need to be cleared before we release
-  // context
-  std::vector<Value*> DevicePointers;
-
-  std::map<unsigned, unsigned> kernelInArgMap = K->getInArgMap();
-/*
-  for(unsigned i=0; i<KF->getFunctionType()->getNumParams(); i++) {
-
-    // The kernel object gives us the mapping of arguments from kernel launch
-    // node function (F_X86) to kernel (kernel->KF)
-    Value* inputVal = getArgumentAt(F_X86, K->getInArgMap()[i]);
-
-*/
-  for(std::map<unsigned, unsigned>::iterator ib = kernelInArgMap.begin(),
-      ie = kernelInArgMap.end(); ib != ie; ++ib) {
-    unsigned i = ib->first;
-    Value* inputVal = getArgumentAt(F_X86, ib->second);
-    DEBUG(errs() << "\tArgument "<< i<< " = "  << *inputVal << "\n");
-
-    // input value has been obtained.
-    // Check if input is a scalar value or a pointer operand
-    // For scalar values such as int, float, etc. the size is simply the size of
-    // type on target machine, but for pointers, the size of data would be the
-    // next integer argument
-    if(inputVal->getType()->isPointerTy()) {
-
-      switchToTimer(visc_TimerID_COPY_PTR, RI);
-      // Pointer Input
-      // CheckAttribute
-      Value* isOutput = (hasAttribute(KF, i, Attribute::Out))? True : False;
-      Value* isInput = ((hasAttribute(KF, i, Attribute::Out))
-                        && !(hasAttribute(KF, i, Attribute::In)))? False : True;
-
-      Argument* A = getArgumentAt(KF, i);
-      if(isOutput == True) {
-        DEBUG(errs() << *A << " is an OUTPUT argument\n");
-      }
-      if(isInput == True) {
-        DEBUG(errs() << *A << " is an INPUT argument\n");
-      }
-
-
-      Value* inputValI8Ptr = CastInst::CreatePointerCast(inputVal,
-                             Type::getInt8PtrTy(M.getContext()),
-                             inputVal->getName()+".i8ptr",
-                             RI);
-
-      // Assert that the pointer argument size (next argument) is in the map
-      assert(kernelInArgMap.find(i+1) != kernelInArgMap.end());
-
-      Value* inputSize = getArgumentAt(F_X86, kernelInArgMap[i+1]);
-
-      assert(inputSize->getType() == Type::getInt64Ty(M.getContext())
-             && "Pointer type input must always be followed by size (integer type)");
-      Value* setInputArgs[] = {GraphID,
-                               inputValI8Ptr,
-                               ConstantInt::get(Type::getInt32Ty(M.getContext()),i),
-                               inputSize,
-                               isInput,
-                               isOutput
-                              };
-      Value* d_ptr = CallInst::Create(llvm_visc_ocl_argument_ptr,
-                                      ArrayRef<Value*>(setInputArgs, 6), "", RI);
-      DevicePointers.push_back(d_ptr);
-      // If this has out attribute, store the returned device pointer in
-      // memory to read device memory later
-      if(isOutput == True) OutputPointers.push_back(OutputPtr(inputValI8Ptr, d_ptr, inputSize));
-    }
-    else {
-      switchToTimer(visc_TimerID_COPY_SCALAR, RI);
-      // Scalar Input
-      // Store the scalar value on stack and then pass the pointer to its
-      // location
-      AllocaInst* inputValPtr = new AllocaInst(inputVal->getType(), inputVal->getName()+".ptr", RI);
-      StoreInst* SI = new StoreInst(inputVal, inputValPtr, RI);
-
-      Value* inputValI8Ptr = CastInst::CreatePointerCast(inputValPtr,
-                             Type::getInt8PtrTy(M.getContext()),
-                             inputVal->getName()+".i8ptr",
-                             RI);
-
-      Value* setInputArgs[] = {GraphID,
-                               inputValI8Ptr,
-                               ConstantInt::get(Type::getInt32Ty(M.getContext()),i),
-                               ConstantExpr::getSizeOf(inputVal->getType())
-                              };
-      CallInst::Create(llvm_visc_ocl_argument_scalar,
-                       ArrayRef<Value*>(setInputArgs, 4), "", RI);
-    }
-  }
-
-  DEBUG(errs() << "Setup shared memory arguments of node and insert visc api\n");
-
-  // Check to see if all the allocation sizes are constant (determined
-  // statically)
-  bool constSizes = true;
-  for (auto& e: K->getSharedInArgMap()) {
-    constSizes &= isa<Constant>(e.second.first);
-  }
-
-  // If the sizes are all constant
-  if (constSizes) {
-    for (auto& e: K->getSharedInArgMap()) {
-      unsigned argNum = e.first;
-      Value* allocSize = e.second.first;
-
-      DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = "  << *allocSize << "\n");
-
-      if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) {
-        // Shared memory ptr argument - scalar at size position
-        switchToTimer(visc_TimerID_COPY_SCALAR, RI);
-
-        assert(isa<Constant>(allocSize) && "Constant shared memory size is expected");
-
-        Value* setInputArgs[] = {GraphID,
-                                 ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum),
-                                 allocSize
-                                };
-        CallInst::Create(llvm_visc_ocl_argument_shared,
-                         ArrayRef<Value*>(setInputArgs, 3), "", RI);
-      }
-      else {
-        // Sharem memory size argument - scalar at address position
-        switchToTimer(visc_TimerID_COPY_SCALAR, RI);
-        // Store the scalar value on stack and then pass the pointer to its
-        // location
-        AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(),
-                                        allocSize->getName()+".sharedMem.ptr", RI);
-        StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI);
-
-        Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr,
-                               Type::getInt8PtrTy(M.getContext()),
-                               allocSize->getName()+".sharedMem.i8ptr",
-                               RI);
-
-        Value* setInputArgs[] = {GraphID,
-                                 allocSizeI8Ptr,
-                                 ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum),
-                                 ConstantExpr::getSizeOf(allocSize->getType())
-                                };
-        CallInst::Create(llvm_visc_ocl_argument_scalar,
-                         ArrayRef<Value*>(setInputArgs, 4), "", RI);
-      }
-    }
-  } else {
-
-    Function *F_alloc = K->AllocationFunction;
-    StructType *FAllocRetTy = dyn_cast<StructType>(F_alloc->getReturnType());
-    assert(FAllocRetTy && "Allocation node with no struct return type");
-
-    std::vector<Value *> AllocInputArgs;
-    for (unsigned i = 0; i < K->allocInArgMap.size(); i++) {
-      AllocInputArgs.push_back(getArgumentAt(F_X86, K->allocInArgMap.at(i)));
-    }
-
-    CallInst *CI = CallInst::Create(F_alloc, AllocInputArgs, "", RI);
-    std::vector<ExtractValueInst *> ExtractValueInstVec;
-    for (unsigned i = 1; i < FAllocRetTy->getNumElements(); i += 2) {
-      ExtractValueInst *EI = ExtractValueInst::Create(CI, i, "", RI);
-      ExtractValueInstVec.push_back(EI);
-    }
-
-    for (auto& e: K->getSharedInArgMap()) {
-      unsigned argNum = e.first;
-      Value* allocSize = ExtractValueInstVec[e.second.second/2];
-
-      DEBUG(errs() << "\tLocal Memory at "<< argNum << ", size = "  << *allocSize << "\n");
-
-      if (KF->getFunctionType()->getParamType(argNum)->isPointerTy()) {
-        // Shared memory ptr argument - scalar at size position
-        switchToTimer(visc_TimerID_COPY_SCALAR, RI);
-
-        Value* setInputArgs[] = {GraphID,
-                                 ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum),
-                                 allocSize
-                                };
-        CallInst::Create(llvm_visc_ocl_argument_shared,
-                         ArrayRef<Value*>(setInputArgs, 3), "", RI);
-      }
-      else {
-        // Sharem memory size argument - scalar at address position
-        switchToTimer(visc_TimerID_COPY_SCALAR, RI);
-        // Store the scalar value on stack and then pass the pointer to its
-        // location
-        AllocaInst* allocSizePtr = new AllocaInst(allocSize->getType(),
-                                        allocSize->getName()+".sharedMem.ptr", RI);
-        StoreInst* SI = new StoreInst(allocSize, allocSizePtr, RI);
-
-        Value* allocSizeI8Ptr = CastInst::CreatePointerCast(allocSizePtr,
-                               Type::getInt8PtrTy(M.getContext()),
-                               allocSize->getName()+".sharedMem.i8ptr",
-                               RI);
-
-        Value* setInputArgs[] = {GraphID,
-                                 allocSizeI8Ptr,
-                                 ConstantInt::get(Type::getInt32Ty(M.getContext()),argNum),
-                                 ConstantExpr::getSizeOf(allocSize->getType())
-                                };
-        CallInst::Create(llvm_visc_ocl_argument_scalar,
-                         ArrayRef<Value*>(setInputArgs, 4), "", RI);
-      }
-    }
-  }
-
-
-  DEBUG(errs() << "Setup output edges of node and insert visc api\n");
-
-  // Set output if struct is not an empty struct
-  StructType* OutputTy = K->KernelLeafNode->getOutputType();
-  std::vector<Value*> d_Outputs;
-  if(!OutputTy->isEmptyTy()) {
-    switchToTimer(visc_TimerID_COPY_PTR, RI);
-    // Not an empty struct
-    // Iterate over all elements of the struct and put them in
-    for(unsigned i=0; i < OutputTy->getNumElements(); i++) {
-      unsigned outputIndex = KF->getFunctionType()->getNumParams()+i;
-      Value* setOutputArgs[] = {GraphID,
-                                ConstantInt::get(Type::getInt32Ty(M.getContext()),outputIndex),
-                                ConstantExpr::getSizeOf(OutputTy->getElementType(i))};
-
-      CallInst* d_Output = CallInst::Create(llvm_visc_ocl_output_ptr,
-                                ArrayRef<Value*>(setOutputArgs, 3),
-                                "d_output."+KF->getName(),
-                                RI);
-      d_Outputs.push_back(d_Output);
-    }
-  }
-
-  // Enqueue kernel
-  // Need work dim, localworksize, globalworksize
-  // Allocate size_t[numDims] space on stack. Store the work group sizes and
-  // pass it as an argument to ExecNode
-
-  switchToTimer(visc_TimerID_MISC, RI);
-  Value *workDim, *LocalWGPtr, *GlobalWGPtr;
-  getExecuteNodeParams(M, workDim, LocalWGPtr, GlobalWGPtr, K, VMap, RI);
-  switchToTimer(visc_TimerID_KERNEL, RI);
-  Value* ExecNodeArgs[] = {GraphID,
-                           workDim,
-                           LocalWGPtr,
-                           GlobalWGPtr
-                          };
-  CallInst* Event = CallInst::Create(llvm_visc_ocl_executeNode,
-                                     ArrayRef<Value*>(ExecNodeArgs, 4),
-                                     "event."+KF->getName(),
-                                     RI);
-  DEBUG(errs() << "Execute Node Call: " << *Event << "\n");
-
-  // Wait for Kernel to Finish
-  CallInst::Create(llvm_visc_ocl_wait,
-                   ArrayRef<Value*>(GraphID),
-                   "",
-                   RI);
-
-  switchToTimer(visc_TimerID_READ_OUTPUT, RI);
-  // Read Output Struct if not empty
-  if(!OutputTy->isEmptyTy()) {
-    std::vector<Value*>h_Outputs;
-    Value* KernelOutput = UndefValue::get(OutputTy);
-    for(unsigned i=0; i < OutputTy->getNumElements(); i++) {
-      Value* GetOutputArgs[] = {GraphID,
-                              Constant::getNullValue(Type::getInt8PtrTy(M.getContext())),
-                              d_Outputs[i],
-                              ConstantExpr::getSizeOf(OutputTy->getElementType(i))
-                             };
-      CallInst* h_Output = CallInst::Create(llvm_visc_ocl_getOutput,
-                                          ArrayRef<Value*>(GetOutputArgs, 4),
-                                          "h_output."+KF->getName()+".addr",
-                                          RI);
-      // Read each device pointer listed in output struct
-      // Load the output struct
-      CastInst* BI = BitCastInst::CreatePointerCast(h_Output,
-          OutputTy->getElementType(i)->getPointerTo(), "output.ptr", RI);
-
-      Value* OutputElement = new LoadInst(BI, "output."+KF->getName(), RI);
-      KernelOutput = InsertValueInst::Create(KernelOutput, OutputElement, ArrayRef<unsigned>(i),
-                                            KF->getName()+"output", RI);
-    }
-    OutputMap[K->KernelLeafNode] = KernelOutput;
-  }
-
-  // Read all the pointer arguments which had side effects i.e., had out
-  // attribute
-  DEBUG(errs() << "Output Pointers : " << OutputPointers.size() << "\n");
-  // FIXME: Not reading output pointers anymore as we read them when data is
-  // actually requested
-  /*for(auto output: OutputPointers) {
-    DEBUG(errs() << "Read: " << *output.d_ptr << "\n");
-    DEBUG(errs() << "\tTo: " << *output.h_ptr << "\n");
-    DEBUG(errs() << "\t#bytes: " << *output.bytes << "\n");
-
-    Value* GetOutputArgs[] = {GraphID, output.h_ptr, output.d_ptr, output.bytes};
-    CallInst* CI = CallInst::Create(llvm_visc_ocl_getOutput,
-                                    ArrayRef<Value*>(GetOutputArgs, 4),
-                                    "", RI);
-  }*/
-  switchToTimer(visc_TimerID_MEM_FREE, RI);
-  // Clear Context and free device memory
-  DEBUG(errs() << "Clearing context" << "\n");
-  // Free Device Memory
-  for(auto d_ptr: DevicePointers) {
-    CallInst::Create(llvm_visc_ocl_free, ArrayRef<Value*>(d_ptr), "", RI);
-  }
-  switchToTimer(visc_TimerID_CLEAR_CTX, CleanupCall);
-  // Clear Context
-  LoadInst* LI = new LoadInst(GraphIDAddr, "", CleanupCall);
-  CallInst::Create(llvm_visc_ocl_clearContext, ArrayRef<Value*>(LI), "", CleanupCall);
-  switchToTimer(visc_TimerID_NONE, CleanupCall);
-
-  switchToTimer(visc_TimerID_MISC, RI);
-  DEBUG(errs() << "*** Generating epilogue code for the function****\n");
-  // Generate code for output bindings
-  // Get Exit node
-  DFNode* C = N->getChildGraph()->getExit();
-  // Get OutputType of this node
-  StructType* OutTy = N->getOutputType();
-  Value *retVal = UndefValue::get(F_X86->getReturnType());
-  // Find the kernel's output arg map, to use instead of the bindings
-  std::vector<unsigned> outArgMap = kernel->getOutArgMap();
-  // Find all the input edges to exit node
-  for (unsigned i=0; i < OutTy->getNumElements(); i++) {
-    DEBUG(errs() << "Output Edge " << i << "\n");
-    // Find the incoming edge at the requested input port
-    DFEdge* E = C->getInDFEdgeAt(i);
-
-    assert(E && "No Binding for output element!");
-    // Find the Source DFNode associated with the incoming edge
-    DFNode* SrcDF = E->getSourceDF();
-
-    DEBUG(errs() << "Edge source -- " <<  SrcDF->getFuncPointer()->getName() << "\n");
-
-    // If Source DFNode is a dummyNode, edge is from parent. Get the
-    // argument from argument list of this internal node
-    Value* inputVal;
-    if(SrcDF->isEntryNode()) {
-      inputVal = getArgumentAt(F_X86, i);
-      DEBUG(errs() << "Argument "<< i<< " = "  << *inputVal << "\n");
-    }
-    else {
-      // edge is from a internal node
-      // Check - code should already be generated for this source dfnode
-      // FIXME: Since the 2-level kernel code gen has aspecific structure, we
-      // can assume the SrcDF is same as Kernel Leaf node.
-      // Use outArgMap to get correct mapping
-      SrcDF = K->KernelLeafNode;
-      assert(OutputMap.count(SrcDF)
-             && "Source node call not found. Dependency violation!");
-
-      // Find Output Value associated with the Source DFNode using OutputMap
-      Value* CI = OutputMap[SrcDF];
-
-      // Extract element at source position from this call instruction
-      std::vector<unsigned> IndexList;
-      // i is the destination of DFEdge E
-      // Use the mapping instead of the bindings
-//      IndexList.push_back(E->getSourcePosition());
-      IndexList.push_back(outArgMap[i]);
-      DEBUG(errs() << "Going to generate ExtarctVal inst from "<< *CI <<"\n");
-      ExtractValueInst* EI = ExtractValueInst::Create(CI, IndexList,
-                             "",RI);
-      inputVal = EI;
-    }
-    std::vector<unsigned> IdxList;
-    IdxList.push_back(i);
-    retVal = InsertValueInst::Create(retVal, inputVal, IdxList, "", RI);
-  }
-
-  DEBUG(errs() << "Extracted all\n");
-  switchToTimer(visc_TimerID_NONE, RI);
-  retVal->setName("output");
-  ReturnInst* newRI = ReturnInst::Create(F_X86->getContext(), retVal);
-  ReplaceInstWithInst(RI, newRI);
-}
-
-
-// Right now, only targeting the one level case. In general, device functions
-// can return values so we don't need to change them
-void CGT_SPIR::codeGen(DFInternalNode* N) {
-  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
-  if(KernelLaunchNode == NULL)
-    errs () << "No kernel launch node\n";
-  else {
-    errs () << "KernelLaunchNode: " << KernelLaunchNode->getFuncPointer()->getName() << "\n";
-  }
-
-
-  if (!KernelLaunchNode) {
-    DEBUG(errs() << "No code generated (host code for kernel launch complete).\n");
-    return;
-  }
-
-  if (N == KernelLaunchNode) {
-    DEBUG(errs() << "Found kernel launch node. Generating host code.\n");
-    //TODO
-
-    // Now the remaining nodes to be visited should be ignored
-    KernelLaunchNode = NULL;
-    DEBUG(errs() << "Insert Runtime calls\n");
-    insertRuntimeCalls(N, kernel, getSPIRFilename(M));
-
-  } else {
-    DEBUG(errs() << "Found intermediate node. Getting size parameters.\n");
-    // Keep track of the arguments order.
-    std::map<unsigned, unsigned> inmap1 = N->getInArgMap();
-    std::map<unsigned, unsigned> inmap2 = kernel->getInArgMap();
-    // TODO: Structure assumed: one thread node, one allocation node (at most),
-    // TB node
-    std::map<unsigned, unsigned> inmapFinal;
-    for (std::map<unsigned, unsigned>::iterator ib = inmap2.begin(), ie = inmap2.end();
-         ib != ie; ++ib) {
-      inmapFinal[ib->first] = inmap1[ib->second];
-    }
-
-    kernel->setInArgMap(inmapFinal);
-
-    // Keep track of the output arguments order.
-    std::vector<unsigned> outmap1 = N->getOutArgMap();
-    std::vector<unsigned> outmap2 = kernel->getOutArgMap();
-
-    // TODO: Change when we have incoming edges to the dummy exit node from more
-    // than one nodes. In this case, the number of bindings is the same, but
-    // their destination position, thus the index in outmap1, is not
-    // 0 ... outmap2.size()-1
-    // The limit is the size of outmap2, because this is the number of kernel
-    // output arguments for which the mapping matters
-    // For now, it reasonable to assume that all the kernel arguments are returned,
-    // maybe plys some others from other nodes, thus outmap2.size() <= outmap1.size()
-    for (unsigned i = 0; i < outmap2.size(); i++) {
-      outmap1[i] = outmap2[outmap1[i]];
-    }
-    kernel->setOutArgMap(outmap1);
-
-    // Track the source of local dimlimits for the kernel
-    // Dimension limit can either be a constant or an argument of parent
-    // function. Since Internal node would no longer exist, we need to insert the
-    // localWGSize with values from the parent of N.
-    std::vector<Value*> localWGSizeMapped;
-    for (unsigned i = 0; i < kernel->localWGSize.size(); i++) {
-      if (isa<Constant>(kernel->localWGSize[i])) {
-        // if constant, use as it is
-        localWGSizeMapped.push_back(kernel->localWGSize[i]);
-      }
-      else if (Argument* Arg = dyn_cast<Argument>(kernel->localWGSize[i])) {
-        // if argument, find the argument location in N. Use InArgMap of N to
-        // find the source location in Parent of N. Retrieve the argument from
-        // parent to insert in the vector.
-        unsigned argNum = Arg->getArgNo();
-        // This argument will be coming from the parent node, not the allocation
-        // Node
-        assert(N->getInArgMap().find(argNum) != N->getInArgMap().end());
-
-        unsigned parentArgNum = N->getInArgMap()[argNum];
-        Argument* A = getArgumentAt(N->getParent()->getFuncPointer(), parentArgNum);
-        localWGSizeMapped.push_back(A);
-      }
-      else {
-        assert(false && "LocalWGsize using value which is neither argument nor constant!");
-      }
-    }
-    // Update localWGSize vector of kernel
-    kernel->setLocalWGSize(localWGSizeMapped);
-  }
-
-}
-
-//static bool checkPreferredTarget(DFNode* N, visc::Target T) {
-  //Function* F = N->getFuncPointer();
-  //Module* M = F->getParent();
-  //NamedMDNode* HintNode;
-  //switch (T) {
-    //case visc::GPU_TARGET:
-      //HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu");
-      //break;
-    //case visc::SPIR_TARGET:
-      //HintNode = M->getOrInsertNamedMetadata("visc_hint_spir");
-      //break;
-    //case visc::CPU_TARGET:
-      //HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu");
-      //break;
-    //default:
-      //llvm_unreachable("Target Not supported yet!");
-  //}
-  //for (unsigned i = 0; i < HintNode->getNumOperands(); i++) {
-    //MDNode* MetaNode = HintNode->getOperand(i);
-    //if(F == MetaNode->getOperand(0))
-      //return true;
-  //}
-  //return false;
-//}
-
-void CGT_SPIR::codeGen(DFLeafNode* N) {
-
-  // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
-    DEBUG(errs() << "Skipping dummy node\n");
-    return;
-  }
-
-  // Skip code generation if it is an allocation node
-  if(N->isAllocationNode()) {
-    DEBUG(errs() << "Skipping allocation node\n");
-    return;
-  }
-
-  // Generate code only if it has the right hint
-//  if(!checkPreferredTarget(N, visc::SPIR_TARGET)) {
-//    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
-//    return;
-//  }
-  if(!preferredTargetIncludes(N, visc::SPIR_TARGET)) {
-    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
-    return;
-  }
-
-  // Checking which node is the kernel launch
-  DFNode* PNode = N->getParent();
-  int pLevel = PNode->getLevel();
-  int pReplFactor = PNode->getNumOfDim();
-
-  // Choose parent node as kernel launch if:
-  // (1) Parent is the top level node i.e., Root of DFG
-  //                    OR
-  // (2) Parent does not have multiple instances
-  errs() << "pLevel = " << pLevel << "\n";
-  errs() << "pReplFactor = " << pReplFactor << "\n";
-
-  if (!pLevel || !pReplFactor) {
-    errs() << "*************** Kernel Gen: 1-Level Hierarchy **************\n";
-    KernelLaunchNode = PNode;
-    errs() << "Setting Kernel Launch Node\n";
-    kernel = new Kernel(NULL,
-                        N,
-                        N->getInArgMap(),
-                        N->getSharedInArgMap(),
-                        N->getOutArgMap(),
-                        N->getNumOfDim(),
-                        N->getDimLimits());
-  }
-  else {
-    // Converting a 2-level DFG to opencl kernel
-    errs() << "*************** Kernel Gen: 2-Level Hierarchy **************\n";
-    KernelLaunchNode = PNode->getParent();
-    assert((PNode->getNumOfDim() == N->getNumOfDim()) && "Dimension number must match");
-    // Contains the instructions generating the kernel configuration parameters
-    kernel = new Kernel(NULL,                 // kernel function
-                        N,                    // kernel leaf node
-                        N->getInArgMap(),     // kenel argument mapping
-                        N->getSharedInArgMap(),
-                        N->getOutArgMap(),    // kernel output mapping from the leaf to the interemediate node
-                        PNode->getNumOfDim(), // gridDim
-                        PNode->getDimLimits(),// grid size
-                        N->getNumOfDim(),     // blockDim
-                        N->getDimLimits());   // block size
-
-  }
-
-  std::vector<IntrinsicInst *> IItoRemove;
-  BuildDFG::HandleToDFNode Leaf_HandleToDFNodeMap;
-
-  // Get the function associated with the dataflow node
-  Function *F = N->getFuncPointer();
-
-  // Look up if we have visited this function before. If we have, then just
-  // get the cloned function pointer from DFNode. Otherwise, create the cloned
-  // function and add it to the DFNode GenFunc.
-  Function *F_spir = N->getGenFuncForTarget(visc::SPIR_TARGET);
-  assert(F_spir == NULL && "Error: Visiting a node for which code already generated");
-
-  // Clone the function
-  ValueToValueMapTy VMap;
-
-  Twine FName = F->getName();
-  F_spir = CloneFunction(F, VMap);
-  F_spir->setName(FName+"_spir");
-  errs() << "Old Function Name: " << F->getName() << "\n";
-  errs() << "New Function Name: " << F_spir->getName() << "\n";
-
-  F_spir->removeFromParent();
-
-  // Insert the cloned function into the kernels module
-  KernelM->getFunctionList().push_back(F_spir);
-
-  //TODO: Iterate over all the instructions of F_spir and identify the
-  //callees and clone them into this module.
-  DEBUG(errs() << *F_spir->getType());
-  DEBUG(errs() << *F_spir);
-
-  //Add generated function info to DFNode
-  //N->setGenFunc(F_spir, visc::SPIR_TARGET);
-
-  F_spir = transformFunctionToVoid(F_spir);
-
-  // Add generated function info to DFNode
-  //N->setGenFunc(F_spir, visc::SPIR_TARGET);
-
-  removeInOutAttributes(F_spir);
-
-  //Add generated function info to DFNode
-  N->addGenFunc(F_spir, visc::SPIR_TARGET, false);
-
-  DEBUG(errs() << "Removing all attributes from Kernel Function and adding nounwind\n");
-  F_spir->removeAttributes(AttributeSet::FunctionIndex, F_spir->getAttributes().getFnAttributes());
-  F_spir->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
-
-
-  //FIXME: For now, assume only one allocation node
-  kernel->AllocationNode = NULL;
-
-  for (DFNode::const_indfedge_iterator ieb = N->indfedge_begin(), iee = N->indfedge_end();
-       ieb != iee; ++ieb) {
-    DFNode *SrcDFNode = (*ieb)->getSourceDF();
-    DEBUG(errs() << "Found edge from node: " << " " << SrcDFNode->getFuncPointer()->getName() << "\n");
-    DEBUG(errs() << "Current Node: " << N->getFuncPointer()->getName() << "\n");
-    DEBUG(errs() << "isAllocationNode = "<< SrcDFNode->isAllocationNode() << "\n");
-    if (!SrcDFNode->isDummyNode()) {
-      assert(SrcDFNode->isAllocationNode());
-      kernel->AllocationNode = dyn_cast<DFLeafNode>(SrcDFNode);
-      kernel->allocInArgMap = SrcDFNode->getInArgMap();
-      break;
-    }
-  }
-
-  // Vector for shared memory arguments
-  std::vector<unsigned> SharedMemArgs;
-
-  // If no allocation node was found, SharedMemArgs is empty
-  if (kernel->AllocationNode) {
-
-    ValueToValueMapTy VMap;
-    Function *F_alloc = CloneFunction(kernel->AllocationNode->getFuncPointer(), VMap);
-    //F_alloc->removeFromParent();
-    // Insert the cloned function into the kernels module
-    //M.getFunctionList().push_back(F_alloc);
-
-    std::vector<IntrinsicInst *> ViscMallocInstVec;
-    findIntrinsicInst(F_alloc, Intrinsic::visc_malloc, ViscMallocInstVec);
-
-    for (unsigned i = 0; i < ViscMallocInstVec.size(); i++) {
-      IntrinsicInst *II = ViscMallocInstVec[i];
-      assert(II->hasOneUse() && "visc_malloc result is used more than once");
-      II->replaceAllUsesWith(ConstantPointerNull::get(Type::getInt8PtrTy(M.getContext())));
-      II->eraseFromParent();
-    }
-    kernel->AllocationFunction = F_alloc;
-
-    // This could be used to check that the allocation node has the appropriate
-    // number of fields in its return struct
-/*
-    ReturnInst *RI = ReturnInstVec[0];
-    Value *RetVal = RI->getReturnValue();
-    Type *RetTy = RetVal->getType();
-    StructType *RetStructTy = dyn_cast<StructType>(RetTy);
-    assert(RetStructTy && "Allocation node does not return a struct type");
-    unsigned numFields = RetStructTy->getNumElements();
-*/
-    std::map<unsigned, std::pair<Value*, unsigned> > sharedInMap = kernel->getSharedInArgMap();
-    AllocationNodeProperty* APN =
-      (AllocationNodeProperty*) kernel->AllocationNode->getProperty(DFNode::Allocation);
-    for (auto& AllocPair: APN->getAllocationList()) {
-      unsigned destPos = AllocPair.first->getDestPosition();
-      unsigned srcPos = AllocPair.first->getSourcePosition();
-      SharedMemArgs.push_back(destPos);
-      sharedInMap[destPos] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1);
-      sharedInMap[destPos+1] = std::pair<Value *, unsigned>(AllocPair.second, srcPos+1);
-    }
-    kernel->setSharedInArgMap(sharedInMap);
-  }
-  std::sort(SharedMemArgs.begin(), SharedMemArgs.end());
-
-  // All pointer args which are not shared memory pointers have to be moved to
-  // global address space
-  unsigned argIndex = 0;
-  std::vector<unsigned> GlobalMemArgs;
-  for(auto& Arg: F_spir->getArgumentList()) {
-    if (Arg.getType()->isPointerTy()) {
-      // If the arguement is already chosen for shared memory arguemnt list, skip.
-      // Else put it in Global memory arguement list
-      if(std::count(SharedMemArgs.begin(), SharedMemArgs.end(), argIndex) == 0) {
-        GlobalMemArgs.push_back(argIndex);
-      }
-    }
-    argIndex++;
-  }
-  std::sort(GlobalMemArgs.begin(), GlobalMemArgs.end());
-
-  /* At this point, we assume that chescks for the fact that SharedMemArgs only
-     contains pointer arguments to GLOBAL_ADDRSPACE have been performed by the
-     analysis pass */
-
-  F_spir = changeArgAddrspace(F_spir, SharedMemArgs, SHARED_ADDRSPACE);
-  removeAttributeAtArguments(F_spir, SharedMemArgs, Attribute::NoCapture);
-  F_spir = changeArgAddrspace(F_spir, GlobalMemArgs, GLOBAL_ADDRSPACE);
-
-
-  // Go through all the instructions
-  for (inst_iterator i = inst_begin(F_spir), e = inst_end(F_spir); i != e; ++i) {
-    Instruction *I = &(*i);
-    // Leaf nodes should not contain VISC graph intrinsics or launch
-    assert(!BuildDFG::isViscLaunchIntrinsic(I) && "Launch intrinsic within a dataflow graph!");
-    assert(!BuildDFG::isViscGraphIntrinsic(I) && "VISC graph intrinsic within a leaf dataflow node!");
-
-    if (BuildDFG::isViscIntrinsic(I)) {
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-      IntrinsicInst* ArgII;
-      DFNode* ArgDFNode;
-
-      /************************ Handle VISC Query intrinsics ************************/
-
-      switch (II->getIntrinsicID()) {
-      /**************************** llvm.visc.getNode() *****************************/
-      case Intrinsic::visc_getNode: {
-        DEBUG(errs() << F_spir->getName() << "\t: Handling getNode\n");
-        // add mapping <intrinsic, this node> to the node-specific map
-        Leaf_HandleToDFNodeMap[II] = N;
-        IItoRemove.push_back(II);
-      }
-      break;
-      /************************* llvm.visc.getParentNode() **************************/
-      case Intrinsic::visc_getParentNode: {
-        DEBUG(errs() << F_spir->getName() << "\t: Handling getParentNode\n");
-        // get the parent node of the arg node
-        // get argument node
-        ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
-        ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
-        // get the parent node of the arg node
-        // Add mapping <intrinsic, parent node> to the node-specific map
-        // the argument node must have been added to the map, orelse the
-        // code could not refer to it
-        Leaf_HandleToDFNodeMap[II] = ArgDFNode->getParent();
-
-        IItoRemove.push_back(II);
-      }
-      break;
-      /*************************** llvm.visc.getNumDims() ***************************/
-      case Intrinsic::visc_getNumDims: {
-        DEBUG(errs() << F_spir->getName() << "\t: Handling getNumDims\n");
-        // get node from map
-        // get the appropriate field
-        ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
-        ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
-        int numOfDim = ArgDFNode->getNumOfDim();
-        DEBUG(errs() << "\t  Got node dimension : " << numOfDim << "\n");
-        IntegerType* IntTy = Type::getInt32Ty(KernelM->getContext());
-        ConstantInt* numOfDimConstant = ConstantInt::getSigned(IntTy, (int64_t) numOfDim);
-
-        // Replace the result of the intrinsic with the computed value
-        II->replaceAllUsesWith(numOfDimConstant);
-
-        IItoRemove.push_back(II);
-      }
-      break;
-      /*********************** llvm.visc.getNodeInstanceID() ************************/
-      case Intrinsic::visc_getNodeInstanceID_x:
-      case Intrinsic::visc_getNodeInstanceID_y:
-      case Intrinsic::visc_getNodeInstanceID_z: {
-        DEBUG(errs() << F_spir->getName() << "\t: Handling getNodeInstanceID\n");
-        ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
-        ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
-        assert(ArgDFNode && "Arg node is NULL");
-        // A leaf node always has a parent
-        DFNode* ParentDFNode = ArgDFNode->getParent();
-        assert(ParentDFNode && "Parent node of a leaf is NULL");
-
-        // Get the number associated with the required dimension
-        // FIXME: The order is important!
-        // These three intrinsics need to be consecutive x,y,z
-        uint64_t dim = II->getIntrinsicID() -
-                       Intrinsic::visc_getNodeInstanceID_x;
-        assert((dim >= 0) && (dim < 3) && "Invalid dimension argument");
-        DEBUG(errs() << "\t  dimension = " << dim << "\n");
-
-        // Argument of the function to be called
-        ConstantInt * DimConstant =
-          ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim);
-        //ArrayRef<Value *> Args(DimConstant);
-
-        // The following is to find which function to call
-        Function * OpenCLFunction;
-        int parentLevel = N->getParent()->getLevel();
-        int parentReplFactor = N->getParent()->getNumOfDim();
-        DEBUG(errs() << "Parent Level = " << parentLevel << "\n");
-        DEBUG(errs() << "Parent Repl factor = " << parentReplFactor << "\n");
-
-        FunctionType* FT =
-            FunctionType::get(Type::getInt64Ty(KernelM->getContext()),           
-                              ArrayRef<Type*>(Type::getInt32Ty(KernelM->getContext())),
-                              false);
-
-        if ((!parentLevel || !parentReplFactor) && ArgDFNode == N) {
-          // We only have one level in the hierarchy or the parent node is not
-          // replicated. This indicates that the parent node is the kernel
-          // launch, so we need to specify a global id.
-          // We can translate this only if the argument is the current node
-          // itself
-          DEBUG(errs() << "Substitute with get_global_id()\n");
-          DEBUG(errs() << *II << "\n");
-          OpenCLFunction = cast<Function>
-                           (KernelM->getOrInsertFunction(getMangledName("get_global_id"), FT));
-        } else if (Leaf_HandleToDFNodeMap[ArgII] == N) {
-          // We are asking for this node's id with respect to its parent
-          // this is a local id call
-          OpenCLFunction = cast<Function>
-                           (KernelM->getOrInsertFunction(getMangledName("get_local_id"), FT));
-        } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) {
-          // We are asking for this node's parent's id with respect to its
-          // parent: this is a group id call
-          OpenCLFunction = cast<Function>
-                           (KernelM->getOrInsertFunction(getMangledName("get_group_id"), FT));
-        } else {
-          errs() << N->getFuncPointer()->getName() << "\n";
-          errs() << N->getParent()->getFuncPointer()->getName() << "\n";
-          errs() << *II << "\n";
-
-          assert(false && "Unable to translate getNodeInstanceID intrinsic");
-        }
-
-        // Create call instruction, insert it before the intrinsic and truncate
-        // the output to 32 bits and replace all the uses of the previous
-        // instruction with the new one
-        CallInst* CI = CallInst::Create(OpenCLFunction, DimConstant, "", II);
-        II->replaceAllUsesWith(CI);
-
-        IItoRemove.push_back(II);
-      }
-      break;
-      /********************** llvm.visc.getNumNodeInstances() ***********************/
-      case Intrinsic::visc_getNumNodeInstances_x:
-      case Intrinsic::visc_getNumNodeInstances_y:
-      case Intrinsic::visc_getNumNodeInstances_z: {
-//TODO: think about whether this is the best way to go
-// there are hw specific registers. therefore it is good to have the intrinsic
-// but then, why do we need to keep that info in the graph?
-// (only for the kernel configuration during the call)
-
-        DEBUG(errs() << F_spir->getName() << "\t: Handling getNumNodeInstances\n");
-        ArgII = cast<IntrinsicInst>((II->getOperand(0))->stripPointerCasts());
-        ArgDFNode = Leaf_HandleToDFNodeMap[ArgII];
-        // A leaf node always has a parent
-        DFNode* ParentDFNode = ArgDFNode->getParent();
-        assert(ParentDFNode && "Parent node of a leaf is NULL");
-
-        // Get the number associated with the required dimension
-        // FIXME: The order is important!
-        // These three intrinsics need to be consecutive x,y,z
-        uint64_t dim = II->getIntrinsicID() -
-                       Intrinsic::visc_getNumNodeInstances_x;
-        assert((dim >= 0) && (dim < 3) && "Invalid dimension argument");
-        DEBUG(errs() << "\t  dimension = " << dim << "\n");
-
-        // Argument of the function to be called
-        ConstantInt * DimConstant =
-          ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), dim);
-        //ArrayRef<Value *> Args(DimConstant);
-
-        // The following is to find which function to call
-        Function * OpenCLFunction;
-        int parentLevel = ParentDFNode->getLevel();
-        int parentReplFactor = ParentDFNode->getNumOfDim();
-
-        FunctionType* FT =
-            FunctionType::get(Type::getInt64Ty(KernelM->getContext()),
-                              Type::getInt32Ty(KernelM->getContext()),
-                              false);
-        if ((N == ArgDFNode) && (!parentLevel || !parentReplFactor)) {
-          // We only have one level in the hierarchy or the parent node is not
-          // replicated. This indicates that the parent node is the kernel
-          // launch, so the instances are global_size (gridDim x blockDim)
-          OpenCLFunction = cast<Function>
-                           (KernelM->getOrInsertFunction(getMangledName("get_global_size"), FT));
-        } else if (Leaf_HandleToDFNodeMap[ArgII] == N) {
-          // We are asking for this node's instances
-          // this is a local size (block dim) call
-          OpenCLFunction = cast<Function>
-                           (KernelM->getOrInsertFunction(getMangledName("get_local_size"), FT));
-        } else if (Leaf_HandleToDFNodeMap[ArgII] == N->getParent()) {
-          // We are asking for this node's parent's instances
-          // this is a (global_size/local_size) (grid dim) call
-          OpenCLFunction = cast<Function>
-                           (KernelM->getOrInsertFunction(getMangledName("get_num_groups"), FT));
-        } else {
-          assert(false && "Unable to translate getNumNodeInstances intrinsic");
-        }
-
-        // Create call instruction, insert it before the intrinsic and truncate
-        // the output to 32 bits and replace all the uses of the previous
-        // instruction with the new one
-        CallInst* CI = CallInst::Create(OpenCLFunction, DimConstant, "", II);
-        II->replaceAllUsesWith(CI);
-
-        IItoRemove.push_back(II);
-      }
-      break;
-      case Intrinsic::visc_barrier:
-      {
-        DEBUG(errs() << F_spir->getName() << "\t: Handling barrier\n");
-        DEBUG(errs() << "Substitute with barrier()\n");
-        DEBUG(errs() << *II << "\n");
-        FunctionType* FT = FunctionType::get(Type::getVoidTy(KernelM->getContext()),
-                              std::vector<Type*>(1, Type::getInt32Ty(KernelM->getContext())),
-                              false);
-        Function* OpenCLFunction = cast<Function>
-                           (KernelM->getOrInsertFunction(getMangledName("barrier"), FT));
-        CallInst* CI = CallInst::Create(OpenCLFunction,
-                               ArrayRef<Value*>(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()), 1)),
-                               "", II);
-        II->replaceAllUsesWith(CI);
-        IItoRemove.push_back(II); 
-      }
-      break;
-      case Intrinsic::visc_atomic_cmpxchg:
-      case Intrinsic::visc_atomic_add:
-      case Intrinsic::visc_atomic_sub:
-      case Intrinsic::visc_atomic_xchg:
-      case Intrinsic::visc_atomic_min:
-      case Intrinsic::visc_atomic_umin:
-      case Intrinsic::visc_atomic_max:
-      case Intrinsic::visc_atomic_umax:
-      case Intrinsic::visc_atomic_and:
-      case Intrinsic::visc_atomic_or:
-      case Intrinsic::visc_atomic_xor:
-      case Intrinsic::visc_atomic_inc:
-      case Intrinsic::visc_atomic_dec:
-      {
-        DEBUG(errs() << *II << "\n");
-        // Only have support for i32 atomic intrinsics
-        assert(II->getType() == Type::getInt32Ty(II->getContext())
-            && "Only support i32 atomic intrinsics for now");
-        // Substitute with appropriate atomic builtin
-        assert(II->getNumArgOperands() == 2 && "Expecting 2 operands for these atomics");
-
-        Value* Ptr = II->getArgOperand(0);
-        Value* Val = II->getArgOperand(1);
-        assert(Ptr->getType()->isPointerTy()
-            && "First argument of supported atomics is expected to be a pointer");
-        PointerType* PtrTy = cast<PointerType>(Ptr->getType());
-        if(PtrTy != Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace())) {
-          Ptr = CastInst::CreatePointerCast(Ptr,
-                                            Type::getInt32PtrTy(II->getContext(),
-                                            PtrTy->getAddressSpace()), "", II);
-        }
-
-        StringRef name = getAtomicOpName(II->getIntrinsicID(), PtrTy->getAddressSpace());
-
-        Type* paramTypes[] = { Type::getInt32PtrTy(II->getContext(), PtrTy->getAddressSpace()),
-                               Type::getInt32Ty(KernelM->getContext())
-                             };
-        FunctionType* AtomicFT = FunctionType::get(II->getType(),
-                                 ArrayRef<Type*>(paramTypes, 2),
-                                 false);
-        Function* AtomicFunction = cast<Function>
-                                   (KernelM->getOrInsertFunction(name, AtomicFT));
-        Value* atomicArgs[] = { Ptr, Val };
-        CallInst* AtomicInst = CallInst::Create(AtomicFunction,
-                                               ArrayRef<Value*>(atomicArgs, 2),
-                                               "", II);
-
-        DEBUG(errs() << "Substitute with: " << *AtomicInst << "\n");
-        II->replaceAllUsesWith(AtomicInst);
-        IItoRemove.push_back(II);
-      }
-      break;
-      default:
-        assert(false && "Unknown VISC Intrinsic!");
-        break;
-      }
-
-    }
-    else if(CallInst* CI = dyn_cast<CallInst>(I)) {
-      DEBUG(errs() << "Found a call: " << *CI << "\n");
-      Function* calleeF = cast<Function>(CI->getCalledValue()->stripPointerCasts());
-      if(calleeF->isDeclaration()) {
-        // Add the declaration to kernel module
-        DEBUG(errs() << "Adding declaration to Kernel module: " << *calleeF << "\n");
-        KernelM->getOrInsertFunction(calleeF->getName(), calleeF->getFunctionType());
-        if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(CI)) {
-          // Now handle a few specific intrinsics
-          // For now, sin and cos are translated to their libclc equivalent
-          switch(II->getIntrinsicID()) {
-          case Intrinsic::sin:
-          case Intrinsic::cos:
-          case Intrinsic::sqrt:
-          case Intrinsic::floor:
-          case Intrinsic::nvvm_rsqrt_approx_f:
-          {
-            DEBUG(errs() << "Found math function: " << *II << "\n");
-            // Get the builtin function
-            // SPIR uses mangled name for builtin math functions
-            assert(II->getType()->isFloatTy()
-                   && "Only handling sin(float) and cos(float)!");
-            std::string name = getMathFunctionName(II->getIntrinsicID());
-
-            FunctionType* MathFT = FunctionType::get(II->getType(),
-                                   Type::getFloatTy(KernelM->getContext()),
-                                   false);
-            Function* MathFunction = cast<Function>
-                                     (KernelM->getOrInsertFunction(name, MathFT));
-            CallInst* CI = CallInst::Create(MathFunction, II->getArgOperand(0), II->getName(), II);
-
-            II->replaceAllUsesWith(CI);
-            IItoRemove.push_back(II);
-            break;
-          }
-          default:
-            DEBUG(errs() << "[WARNING] Found Intrinsic: " << *II << "\n" );
-          }
-        }
-
-      }
-      else {
-        // Clone the function
-        ValueToValueMapTy VMap;
-        Function* newCalleeF = CloneFunction(calleeF, VMap);
-        newCalleeF->removeFromParent(); //TODO: MARIA check
-        KernelM->getFunctionList().push_back(newCalleeF);
-      }
-      //TODO: how to handle address space qualifiers in load/store
-    }
-
-  }
-
-  // We need to do this explicitly: DCE pass will not remove them because we
-  // have assumed theworst memory behaviour for these function calls
-  // Traverse the vector backwards, otherwise definitions are deleted while
-  // their subsequent uses are still around
-  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(),
-       re = IItoRemove.rend(); ri != re; ++ri)
-    (*ri)->eraseFromParent();
-
-  addCLMetadata(F_spir);
-  kernel->KernelFunction = F_spir;
-  errs() << "Identified kernel - " << kernel->KernelFunction->getName() << "\n";
-  DEBUG(errs() << *KernelM);
-
-  return;
-}
-
-bool DFG2LLVM_SPIR::runOnModule(Module &M) {
-  errs() << "\nDFG2LLVM_SPIR PASS\n";
-
-  // Get the BuildDFG Analysis Results:
-  // - Dataflow graph
-  // - Maps from i8* hansles to DFNode and DFEdge
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  // DFInternalNode *Root = DFG.getRoot();
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
-  //    BuildDFG::HandleToDFNode &HandleToDFNodeMap = DFG.getHandleToDFNodeMap();
-  //    BuildDFG::HandleToDFEdge &HandleToDFEdgeMap = DFG.getHandleToDFEdgeMap();
-
-  // Visitor for Code Generation Graph Traversal
-  CGT_SPIR *CGTVisitor = new CGT_SPIR(M, DFG);
-
-  // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
-    // Initiate code generation for root DFNode
-    CGTVisitor->visit(rootNode);
-  }
-
-  // This is not required. Itrinsics that do not have a use are not a problem
-  //CGTVisitor->removeLLVMIntrinsics();
-  CGTVisitor->writeKernelsModule();
-
-  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
-  delete CGTVisitor;
-
-  return true;
-}
-
-std::string CGT_SPIR::getKernelsModuleName(Module &M) {
-  /*SmallString<128> currentDir;
-  llvm::sys::fs::current_path(currentDir);
-  std::string fileName = getFilenameFromModule(M);
-  Twine output = Twine(currentDir) + "/Output/" + fileName + "";
-  return output.str().append(".kernels.ll");*/
-  std::string mid = M.getModuleIdentifier();
-  return mid.append(".kernels.ll");
-}
-
-void CGT_SPIR::fixValueAddrspace(Value* V, unsigned addrspace) {
-  assert(isa<PointerType>(V->getType())
-         && "Value should be of Pointer Type!");
-  PointerType* OldTy = cast<PointerType>(V->getType());
-  PointerType* NewTy = PointerType::get(OldTy->getElementType(), addrspace);
-  V->mutateType(NewTy);
-  for(Value::user_iterator ui = V->user_begin(), ue = V->user_end(); ui != ue; ui++) {
-    // Change all uses producing pointer type in same address space to new
-    // addressspace.
-    if(PointerType* PTy = dyn_cast<PointerType>((*ui)->getType())) {
-      if(PTy->getAddressSpace() == OldTy->getAddressSpace()) {
-        fixValueAddrspace(*ui, addrspace);
-      }
-    }
-  }
-}
-
-Function* CGT_SPIR::changeArgAddrspace(Function* F, std::vector<unsigned> &Args, unsigned addrspace) {
-  unsigned idx = 0;
-  std::vector<Type*> ArgTypes;
-  for(auto& arg: F->getArgumentList()) {
-    DEBUG(errs() << arg << "\n");
-    unsigned argno = arg.getArgNo();
-    if ((idx < Args.size()) && (argno == Args[idx])) {
-      fixValueAddrspace(&arg, addrspace);
-      idx++;
-    }
-    ArgTypes.push_back(arg.getType());
-  }
-  FunctionType* newFT = FunctionType::get(F->getReturnType(), ArgTypes, false);
-
-  //F->mutateType(PTy);
-  Function* newF = cloneFunction(F, newFT, false);
-  replaceNodeFunctionInIR(*F->getParent(), F, newF);
-
-  DEBUG(errs() << *newF->getFunctionType() << "\n" <<*newF << "\n");
-  return newF;
-}
-
-/* Remove the specified argument from arguments at positions denoted in Args */
-void CGT_SPIR::removeAttributeAtArguments(Function* F, std::vector<unsigned> &Args, Attribute::AttrKind attrKind) {
-  DEBUG(errs() << "Removing nocapture attribute from shared memory arguments of function " << F->getName() << "\n");
-
-  unsigned cnt = 0, arg_no = 0;
-  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-      ai != ae && arg_no < Args.size(); ++ai, ++cnt) {
-
-    if (Args[arg_no] == cnt) {
-      AttributeSet AS = F->getAttributes();
-      AttrBuilder AB(AS, ai->getArgNo()+1);
-      AB.removeAttribute(attrKind);
-      AttributeSet argAS = AttributeSet::get(F->getContext(), ai->getArgNo()+1, AB);
-      F->removeAttributes(1+ai->getArgNo(), AS.getParamAttributes(ai->getArgNo() + 1));
-      F->addAttributes(1+ai->getArgNo(), argAS);
-
-      arg_no++;
-    }
-  }
-}
-
-/* Add metadata to module KernelM, for OpenCL kernels */
-void CGT_SPIR::addCLMetadata(Function *F) {
-  // TODO: There is additional metadata used by kernel files but we skip them as
-  // they are not mandatory. In future they might be useful to enable
-  // optimizations
-
-  IRBuilder<> Builder(&*F->begin());
- 
-  // Create node for "kernel_arg_type"
-  SmallVector<Metadata*,8> argTypeNames;
-  argTypeNames.push_back(MDString::get(KernelM->getContext(), "kernel_arg_type"));
-
-  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(); ai != ae;
-      ai++) {
-    argTypeNames.push_back(MDString::get(KernelM->getContext(), printType(ai->getType())));
-  }
-  // All argument type names are in the vector. Create a metadata node
-  // "kernel_arg_type"
-  MDTuple* KernelArgTypes = MDNode::get(KernelM->getContext(), argTypeNames);
-  
-  // Create kernel metadata node containg the kernel function and the
-  // "kernel_arg_type" metadata node created above
-  SmallVector<Metadata*,8> KernelMD;
-  KernelMD.push_back(ValueAsMetadata::get(F));
-  KernelMD.push_back(KernelArgTypes);
-  MDTuple *MDKernelNode = MDNode::get(KernelM->getContext(), KernelMD);
-
-  // Create metadata node opencl.kernels. It points to the kernel metadata node
-  NamedMDNode *MDN_kernels = KernelM->getOrInsertNamedMetadata("opencl.kernels");
-  MDN_kernels->addOperand(MDKernelNode);
-
-  //KernelMD.push_back(MDString::get(KernelM->getContext(), "kernel"));
-  // TODO: Replace 1 with the number of the kernel.
-  // Add when support for multiple launces is added
-  //KernelMD.push_back(ConstantInt::get(Type::getInt32Ty(KernelM->getContext()),1));
-  //MDNode *MDNvvmAnnotationsNode = MDNode::get(KernelM->getContext(), KernelMD);
-  //NamedMDNode *MDN_annotations = KernelM->getOrInsertNamedMetadata("nvvm.annotations");
-  //MDN_annotations->addOperand(MDNvvmAnnotationsNode);
-
-}
-
-/* Function to remove all remaining declarations of llvm intrinsics,
- * as they are not supported in SPIR.
- */
-void CGT_SPIR::removeLLVMIntrinsics() {
-
-  std::vector<Function*> fv = std::vector<Function*>();
-
-  for (Module::iterator mi = KernelM->begin(), me = KernelM->end(); (mi != me); ++mi) {
-    Function* F = &*mi;
-    if (F->isDeclaration() && F->getName().startswith("llvm.")) {
-      DEBUG(errs() << "Declaration: " << F->getName() << " with " << F->getNumUses() <<"uses.\n");
-      assert(F->hasNUses(0) && "LLVM intrinsic function still in use");
-      fv.push_back(F);
-    }
-  }
-
-  for (std::vector<Function*>::iterator vi = fv.begin(); vi != fv.end(); ++vi) {
-    DEBUG(errs() << "Erasing declaration: " << (*vi)->getName() <<"\n");
-    (*vi)->replaceAllUsesWith(UndefValue::get((*vi)->getType()));
-    (*vi)->eraseFromParent();
-  }
-
-}
-
-void CGT_SPIR::writeKernelsModule() {
-
-  // In addition to deleteing all otjer functions, we also want to spice it up a
-  // little bit. Do this now.
-  legacy::PassManager Passes;
-
-  std::error_code EC;
-  tool_output_file Out(getKernelsModuleName(M).c_str(), EC, sys::fs::F_None);
-  if (EC) {
-    errs() << EC.message() << "\n";
-  }
-
-  Passes.add(
-      createPrintModulePass(Out.os()));
-
-  Passes.run(*KernelM);
-
-  // Declare success.
-  Out.keep();
-}
-
-Function* CGT_SPIR::transformFunctionToVoid(Function* F) {
-
-  // FIXME: Maybe do that using the Node?
-  StructType* FRetTy = cast<StructType>(F->getReturnType());
-  assert(FRetTy && "Return Type must always be a struct");
-
-  // Keeps return statements, because we will need to replace them
-  std::vector<ReturnInst *> RItoRemove;
-  findReturnInst(F, RItoRemove);
-
-
-  // Check for { } return struct, which means that the function returns void
-  if (FRetTy->isEmptyTy()) {
-
-    DEBUG(errs() << "\tFunction output struct is void\n");
-    DEBUG(errs() << "\tNo parameters added\n");
-
-    // Replacing return statements with others returning void
-    for (std::vector<ReturnInst *>::iterator i = RItoRemove.begin(),
-         e = RItoRemove.end(); i != e; ++i) {
-      ReturnInst::Create((F->getContext()), 0, (*i));
-      (*i)->eraseFromParent();
-    }
-    DEBUG(errs() << "\tChanged return statements to return void\n");
-  }
-  else {
-    // The struct has return values, thus needs to be converted to parameter
-
-    // Iterate over all element types of return struct and add arguments to the
-    // function
-    std::vector<Argument*> Args;
-    for (unsigned i=0; i<FRetTy->getNumElements(); i++) {
-      Argument* RetArg = new Argument(FRetTy->getElementType(i)->getPointerTo(), "ret_arg", F);
-      Args.push_back(RetArg);
-      DEBUG(errs() << "\tCreated parameter: " << *RetArg << "\n");
-    }
-
-    Function::arg_iterator ai, ae;
-
-    DEBUG(errs() << "\tReplacing Return statements\n");
-    // Replace return statements with extractValue and store instructions
-    for (std::vector<ReturnInst *>::iterator rii = RItoRemove.begin(),
-         rie = RItoRemove.end(); rii != rie; ++rii) {
-      ReturnInst* RI = (*rii);
-      Value* RetVal = RI->getReturnValue();
-      for(unsigned i = 0; i < Args.size(); i++) {
-        ExtractValueInst* EI = ExtractValueInst::Create(RetVal, ArrayRef<unsigned>(i),
-                                    Args[i]->getName()+".val", RI);
-        new StoreInst(EI, Args[i], RI);
-      }
-      // assert(RetVal && "Return value should not be null at this point");
-      // StructType* RetType = cast<StructType>(RetVal->getType());
-      // assert(RetType && "Return type is not a struct");
-
-      ReturnInst::Create((F->getContext()), 0, RI);
-      RI->eraseFromParent();
-
-    }
-  }
-  DEBUG(errs() << "\tReplaced return statements\n");
-
-  // Create the argument type list with the added argument's type
-  std::vector<Type*> ArgTypes;
-  for(Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-      ai != ae; ++ai) {
-    ArgTypes.push_back(ai->getType());
-  }
-
-  // Adding new arguments to the function argument list, would not change the
-  // function type. We need to change the type of this function to reflect the
-  // added arguments
-  Type* VoidRetType = Type::getVoidTy(F->getContext());
-  FunctionType* newFT = FunctionType::get(VoidRetType, ArgTypes, F->isVarArg());
-
-  // Change the function type
-  //F->mutateType(PTy);
-  Function* newF = cloneFunction(F, newFT, false);
-  replaceNodeFunctionInIR(*F->getParent(), F, newF);
-
-  return newF;
-}
-
-// Remove the visc in/out attributes from kernel function
-void CGT_SPIR::removeInOutAttributes(Function* F) {
-  DEBUG(errs() << "Removing visc attributes from argument list of function " << F->getName() << "\n");
-  for(Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-      ai != ae; ai++) {
-
-    AttributeSet AS = F->getAttributes();
-    AttrBuilder AB(AS, ai->getArgNo()+1);
-    AB.removeAttribute(Attribute::In);
-    AB.removeAttribute(Attribute::Out);
-    AB.removeAttribute(Attribute::InOut);
-    AttributeSet argAS = AttributeSet::get(F->getContext(), ai->getArgNo()+1, AB);
-    F->removeAttributes(1+ai->getArgNo(), AS.getParamAttributes(ai->getArgNo() + 1));
-    F->addAttributes(1+ai->getArgNo(), argAS);
-
-  }
-}
-
-/******************************************************************************
- *                              Helper functions                              *
- ******************************************************************************/
-
-// Calculate execute node parameters which include, number of diemnsions for
-// dynamic instances of the kernel, local and global work group sizes.
-static void getExecuteNodeParams(Module &M, Value* &workDim, Value* &LocalWGPtr, Value*
-                                 &GlobalWGPtr, Kernel* kernel, ValueToValueMapTy& VMap, Instruction* IB) {
-
-  // Assign number of dimenstions a constant value
-  workDim = ConstantInt::get(Type::getInt32Ty(M.getContext()), kernel->gridDim);
-
-  // If local work group size if null
-  if(!kernel->hasLocalWG()) {
-    LocalWGPtr = Constant::getNullValue(Type::getInt64PtrTy(M.getContext()));
-  }
-  else {
-    for(unsigned i = 0; i < kernel->localWGSize.size(); i++) {
-      if(isa<Argument>(kernel->localWGSize[i]))
-        kernel->localWGSize[i] = VMap[kernel->localWGSize[i]];
-    }
-    LocalWGPtr = genWorkGroupPtr(M, kernel->localWGSize, VMap, IB, "LocalWGSize");
-  }
-
-  for(unsigned i = 0; i < kernel->globalWGSize.size(); i++) {
-    if(isa<Argument>(kernel->globalWGSize[i]))
-      kernel->globalWGSize[i] = VMap[kernel->globalWGSize[i]];
-  }
-
-  // For OpenCL, global work group size is the total bumber of instances in each
-  // dimension. So, multiply local and global dim limits.
-  std::vector<Value*> globalWGSizeInsts;
-  if(kernel->hasLocalWG()) {
-    for (unsigned i = 0; i < kernel->gridDim; i++) {
-      BinaryOperator* MulInst = BinaryOperator::Create(Instruction::Mul, kernel->globalWGSize[i], kernel->localWGSize[i], "", IB);
-      globalWGSizeInsts.push_back(MulInst);
-    }
-  }
-  else {
-    globalWGSizeInsts = kernel->globalWGSize;
-  }
-  GlobalWGPtr = genWorkGroupPtr(M, globalWGSizeInsts, VMap, IB, "GlobalWGSize");
-  DEBUG(errs() << "Pointer to global work group: " << *GlobalWGPtr << "\n");
-}
-
-// CodeGen for allocating space for Work Group on stack and returning a pointer
-// to its address
-static Value* genWorkGroupPtr(Module &M, std::vector<Value*> WGSize, ValueToValueMapTy& VMap, Instruction* IB, const Twine& WGName) {
-  Value* WGPtr;
-  // Get int64_t and or ease of use
-  Type* Int64Ty = Type::getInt64Ty(M.getContext());
-
-  // Work Group type is [#dim x i64]
-  Type* WGTy = ArrayType::get(Int64Ty, WGSize.size());
-  // Allocate space of Global work group data on stack and get pointer to
-  // first element.
-  AllocaInst* WG = new AllocaInst(WGTy, WGName, IB);
-  WGPtr = BitCastInst::CreatePointerCast(WG, Int64Ty->getPointerTo(), WG->getName()+".0", IB);
-  Value* nextDim = WGPtr;
-  DEBUG(errs() << *WGPtr << "\n");
-
-  // Iterate over the number of dimensions and store the global work group
-  // size in that dimension
-  for(unsigned i=0; i < WGSize.size(); i++) {
-    assert(WGSize[i]->getType()->isIntegerTy() && "Dimension not an integer type!");
-
-    if(WGSize[i]->getType() != Int64Ty) {
-      // If number of dimensions are mentioned in any other integer format,
-      // generate code to extend it to i64. We need to use the mapped value in
-      // the new generated function, hence the use of VMap
-      // FIXME: Why are we changing the kernel WGSize vector here?
-      DEBUG(errs() << "Not i64. Zero extend required.\n");
-      DEBUG(errs() << *WGSize[i] << "\n");
-      CastInst* CI = BitCastInst::CreateIntegerCast(WGSize[i], Int64Ty, true, "", IB);
-      DEBUG(errs() << "Bitcast done.\n");
-      StoreInst* SI = new StoreInst(CI, nextDim, IB);
-      DEBUG(errs() << "Zero extend done.\n");
-      DEBUG(errs() << "\tZero extended work group size: " << *SI << "\n");
-    } else {
-      // Store the value representing work group size in ith dimension on
-      // stack
-      StoreInst* SI = new StoreInst(WGSize[i], nextDim, IB);
-
-      DEBUG(errs() << "\t Work group size: " << *SI << "\n");
-    }
-    if(i+1 < WGSize.size()) {
-      // Move to next dimension
-      GetElementPtrInst* GEP = GetElementPtrInst::Create(nullptr, nextDim,
-                               ArrayRef<Value*>(ConstantInt::get(Int64Ty, 1)),
-                               WG->getName()+"."+Twine(i+1),
-                               IB);
-      DEBUG(errs() << "\tPointer to next dimension on stack: " << *GEP << "\n");
-      nextDim = GEP;
-    }
-  }
-  return WGPtr;
-
-}
-
-//Get generated SPIR binary name
-static std::string getSPIRFilename(const Module& M) {
-  std::string mid = M.getModuleIdentifier();
-  return mid.append(".kernels.bc");
-
-}
-
-// Get the name of the input file from module ID
-static std::string getFilenameFromModule(const Module& M) {
-  std::string moduleID = M.getModuleIdentifier();
-  return moduleID.substr(moduleID.find_last_of("/")+1);
-}
-
-// Changes the data layout of the Module to be compiled with SPIR backend
-// TODO: Figure out when to call it, probably after duplicating the modules
-static void changeDataLayout(Module &M) {
-  std::string spir64_layoutStr = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v24:32:32-v32:32:32-v48:64:64-v64:64:64-v96:128:128-v128:128:128-v192:256:256-v256:256:256-v512:512:512-v1024:1024:1024";
-    
-  M.setDataLayout(StringRef(spir64_layoutStr));
-  return;
-}
-
-static void changeTargetTriple(Module &M) {
-  std::string spir64_TargetTriple = "spir64-unknown-unknown";
-  M.setTargetTriple(StringRef(spir64_TargetTriple));
-}
-
-// Helper function, generate a string representation of a type
-static std::string printType(Type* ty) {
-  std::string type_str;
-  raw_string_ostream rso(type_str);
-  ty->print(rso);
-  return rso.str();
-}
-
-// Helper function to get mangled names of OpenCL built ins
-static StringRef getMangledName(std::string name) {
-  Twine mangledName = "_Z"+Twine(name.size())+name+"j";
-  return StringRef(mangledName.str());
-}
-
-
-// Helper function, populate a vector with all return statements in a function
-static void findReturnInst(Function* F, std::vector<ReturnInst *> & ReturnInstVec) {
-  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
-    Instruction *I = &(*i);
-    ReturnInst* RI = dyn_cast<ReturnInst>(I);
-    if (RI) {
-      ReturnInstVec.push_back(RI);
-    }
-  }
-}
-
-// Helper function, populate a vector with all IntrinsicID intrinsics in a function
-static void findIntrinsicInst(Function* F, Intrinsic::ID IntrinsicID, std::vector<IntrinsicInst *> & IntrinsicInstVec) {
-  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
-    Instruction *I = &(*i);
-    IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-    if (II && II->getIntrinsicID() == IntrinsicID) {
-      IntrinsicInstVec.push_back(II);
-    }
-  }
-}
-
-// Helper function to get mangled names of OpenCL built ins for atomics
-static StringRef getAtomicMangledName(std::string name, unsigned addrspace, bool sign) {
-  Twine mangledName = "_Z" +
-                      Twine(name.size())+name +
-                      "PU3AS"+Twine(addrspace) + "jj";
-//                      ((sign) ? "ii" : "jj");
-  return StringRef(mangledName.str());
-}
-
-// Helper funtion, returns the OpenCL function name corresponding to atomic op
-static StringRef getAtomicOpName(Intrinsic::ID ID, unsigned addrspace) {
-  switch(ID) {
-    case Intrinsic::visc_atomic_cmpxchg:
-      return getAtomicMangledName("atom_cmpxchg", addrspace, true);
-    case Intrinsic::visc_atomic_add:
-      return getAtomicMangledName("atom_add", addrspace, true);
-    case Intrinsic::visc_atomic_sub:
-      return getAtomicMangledName("atom_sub", addrspace, true);
-    case Intrinsic::visc_atomic_min:
-      return getAtomicMangledName("atom_min", addrspace, true);
-    case Intrinsic::visc_atomic_umin:
-      return getAtomicMangledName("atom_min", addrspace, false);
-    case Intrinsic::visc_atomic_max:
-      return getAtomicMangledName("atom_max", addrspace, true);
-    case Intrinsic::visc_atomic_umax:
-      return getAtomicMangledName("atom_max", addrspace, false);
-    case Intrinsic::visc_atomic_inc:
-      return getAtomicMangledName("atom_inc", addrspace, true);
-    case Intrinsic::visc_atomic_dec:
-      return getAtomicMangledName("atom_dec", addrspace, true);
-    case Intrinsic::visc_atomic_xchg:
-      return getAtomicMangledName("atom_xchg", addrspace, true);
-    case Intrinsic::visc_atomic_and:
-      return getAtomicMangledName("atom_and", addrspace, true);
-    case Intrinsic::visc_atomic_or:
-      return getAtomicMangledName("atom_or", addrspace, true);
-    case Intrinsic::visc_atomic_xor:
-      return getAtomicMangledName("atom_xor", addrspace, true);
-    default:
-      llvm_unreachable("Unsupported atomic intrinsic!");
-  };
-}
-
-static std::string getMathFunctionName(Intrinsic::ID ID) {
-  switch(ID) {
-    case Intrinsic::sin: return "_Z3sinf";
-    case Intrinsic::cos: return "_Z3cosf";
-    case Intrinsic::sqrt: return "_Z4sqrtf";
-    case Intrinsic::floor: return "_Z5floorf";
-    case Intrinsic::nvvm_rsqrt_approx_f: return "_Z5rsqrtf";
-    default:
-      llvm_unreachable("Unsupported math function!");
-  };
-}
-
-} // End of namespace
-
-char DFG2LLVM_SPIR::ID = 0;
-static RegisterPass<DFG2LLVM_SPIR> X("dfg2llvm-spir",
-                                      "Dataflow Graph to LLVM for SPIR Pass",
-                                      false /* does not modify the CFG */,
-                                      true /* transformation,   *
-                                            * not just analysis */);
-
diff --git a/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.exports b/lib/DFG2LLVM_SPIR/DFG2LLVM_SPIR.exports
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000
diff --git a/lib/DFG2LLVM_SPIR/LLVMBuild.txt b/lib/DFG2LLVM_SPIR/LLVMBuild.txt
deleted file mode 100644
index 72c4de9efdc816ca74d54d96f4f66afd467d1639..0000000000000000000000000000000000000000
--- a/lib/DFG2LLVM_SPIR/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/DFG2LLVM_SPIR/LLVMBuild.txt -------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = DFG2LLVM_SPIR
-parent = Transforms
diff --git a/lib/InsertApproxInfo/CMakeLists.txt b/lib/InsertApproxInfo/CMakeLists.txt
deleted file mode 100644
index 2b6d41bd709ce1e57bd081ffb9542d157bad36e4..0000000000000000000000000000000000000000
--- a/lib/InsertApproxInfo/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( InsertApproxInfo
-  InsertApproxInfo.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/lib/InsertApproxInfo/InsertApproxInfo.cpp b/lib/InsertApproxInfo/InsertApproxInfo.cpp
deleted file mode 100644
index bde4ef89073fbf1df5bcafc3907979f8d15b704f..0000000000000000000000000000000000000000
--- a/lib/InsertApproxInfo/InsertApproxInfo.cpp
+++ /dev/null
@@ -1,498 +0,0 @@
-//===------------------------ InPlaceDFGAnalysis.cpp ----------------------===//
-//
-//
-//
-//                     The LLVM Compiler Infrastructure
-//
-//
-//
-// This file is distributed under the University of Illinois Open Source
-//
-// License. See LICENSE.TXT for details.
-//
-//
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "InsertApproxInfo"
-
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-#include "llvm/IR/InstrTypes.h"
-#include <unordered_map>
-#include <dirent.h>
-#include <stdio.h>
-#include <sstream>
-#include <fstream>
-
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-using namespace inplacedfg;
-
-
-namespace {
-
-static cl::opt<std::string> dir_name("results-dir", cl::desc(" Name of directory with Autotuner results "));
-
-
-struct ApproxMetrics{
-  std::string op_name;
-  std::string category;
-  unsigned int rank; // rank given by autotuner
-  double approx_level;
-  // Relative L-norm metrics
-  double relative_l1;
-  double relative_l2;
-  double relative_linf;
-  // Mean L-norm metrics
-  double mean_l1;
-  double mean_l2;
-  double mean_linf;
-};    
-
-  
-  
-struct InsertApproxInfoWrapperPass : public ModulePass {
-  static char ID; // Pass identification, replacement for typeid
-  InsertApproxInfoWrapperPass() : ModulePass(ID) {}
-    
-public:
-  // Functions
-  bool runOnModule(Module &M);
-  void getAnalysisUsage(AnalysisUsage &AU) const;
-};
-
-
-// Visitor for Code generation traversal (tree traversal for now)
-class InsertApproxInfo : public CodeGenTraversal {
-
-private:
-  // Virtual Functions
-  void init() {}
-  void initRuntimeAPI() {}
-  void codeGen(DFInternalNode* N);
-  void codeGen(DFLeafNode* N);
-  void loadTrainedApproxMetrics(std::string dir_path);
-  void loadMetricsFromFile(std::string dir_path, std::string file_path, std::string category);
-  void loadMetricsFromDir(std::string dir_path, std::string category);
-  void readApproxValues(const std::string line, ApproxMetrics* approx_metrics);
-  void initIntrinsicNames();
-  void initGlobalStrings();
-
-  // private data
-  std::unordered_map<std::string, std::string> intrinsics_map;
-  std::unordered_map<std::string, std::vector<ApproxMetrics*>> operation_metrics;
-  GlobalVariable* rank_str;
-  GlobalVariable* category_str;
-  GlobalVariable* mean_l1_str;
-  GlobalVariable* mean_l2_str;
-  GlobalVariable* mean_linf_str;
-  GlobalVariable* rel_l1_str;
-  GlobalVariable* rel_l2_str;
-  GlobalVariable* rel_linf_str;
-
-
-  // Tracks the id of the tensor op processed
-  unsigned int currentID;
-
-public:
-  // Constructor
-  InsertApproxInfo(Module &_M, BuildDFG &_DFG);
-  
-  //void run(Module &M, BuildDFG &DFG);
-  void run(std::string dir_path);
-
-};
-
-
-
-void InsertApproxInfo::initIntrinsicNames(){
-
-  intrinsics_map["llvm.visc.tensor.convolution"] = "tensorConv";
-  intrinsics_map["llvm.visc.tensor.mul"] = "tensorGemm";
-  intrinsics_map["llvm.visc.tensor.add"] = "tensorAdd";
-  intrinsics_map["llvm.visc.tensor.pool.max"] = "tensorPooling";
-  intrinsics_map["llvm.visc.tensor.tanh"] = "tensorTanh";  
-}
-
-
-void InsertApproxInfo::initGlobalStrings(){
-
- /**** Creating global constant strings for each approximation metric type *******/
-
-  std::string rank_string = "rank";
-  Constant* stringConst = ConstantDataArray::getString(M.getContext(), StringRef(rank_string.c_str()), true);
-  rank_str = new GlobalVariable(M, stringConst->getType(), true,
-				GlobalValue::ExternalLinkage, stringConst, "");
-
-  std::string category_string = "category";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(category_string.c_str()), true);
-  category_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  // Mean l-norm metrics
-  std::string metric_string = "mean_l1";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  mean_l1_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  metric_string = "mean_l2";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  mean_l2_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  metric_string = "mean_linf";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  mean_linf_str = new GlobalVariable(M, stringConst->getType(), true,
-				     GlobalValue::ExternalLinkage, stringConst, "");
-
-  // Relative l-norm metrics
-  metric_string = "rel_l1";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  rel_l1_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  metric_string = "rel_l2";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  rel_l2_str = new GlobalVariable(M, stringConst->getType(), true,
-				   GlobalValue::ExternalLinkage, stringConst, "");
-
-  metric_string = "rel_linf";
-  stringConst = ConstantDataArray::getString(M.getContext(), StringRef(metric_string.c_str()), true);
-  rel_linf_str = new GlobalVariable(M, stringConst->getType(), true,
-				     GlobalValue::ExternalLinkage, stringConst, "");
-
-}
-
-  
-InsertApproxInfo::InsertApproxInfo(Module &_M, BuildDFG &_DFG) :
-    CodeGenTraversal(_M, _DFG){
-
-  currentID = 1;
-  
-  initIntrinsicNames();
-  initGlobalStrings();   
-}
-
-
-void InsertApproxInfoWrapperPass::getAnalysisUsage(AnalysisUsage &AU) const {
-  AU.addRequired<BuildDFG>();
-  AU.addPreserved<BuildDFG>();
-}
-
-    
-bool InsertApproxInfoWrapperPass::runOnModule(Module &M) {
-  
-  std::string dir_path = dir_name.getValue();
-  // Get the BuildDFG Analysis Results:
-  // - Dataflow graph
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  InsertApproxInfo IApprox(M, DFG);
-  IApprox.run(dir_path);
-
-  return false;
-}
-
-
-void InsertApproxInfo::readApproxValues(const std::string line, ApproxMetrics* approx_metrics){
- 
-  std::istringstream in(line);
-  std::string op_name;
-
-  float approx_level;
-
-  float mean_l1;
-  float mean_l2;
-  float mean_linf;
-
-  float relative_l1;
-  float relative_l2;
-  float relative_linf;
-
-  in >> op_name;
-  in >> approx_level;
-  
-  in >> mean_l1;
-  in >> mean_l2;
-  in >> mean_linf;
-
-  in >> relative_l1;
-  in >> relative_l2;
-  in >> relative_linf;
-    
-  printf("\n *** op_name = %s \n", op_name.c_str());
-  printf("approx_level = %f \n", approx_level);
-  printf("relative_l1 = %f \n", relative_l1);
-  printf("relative_l2 = %f \n", relative_l2);
-  printf("relative_linf = %f \n", relative_linf);
-  printf("mean_l1 = %f \n", mean_l1);
-  printf("mean_l2 = %f \n", mean_l2);
-  printf("mean_linf = %f \n", mean_linf);
-
-  approx_metrics->op_name = op_name;
-  approx_metrics->approx_level = approx_level;
-  approx_metrics->mean_l1 = mean_l1;
-  approx_metrics->mean_l2 = mean_l2;
-  approx_metrics->mean_linf = mean_linf;
-  approx_metrics->relative_l1 = relative_l1;
-  approx_metrics->relative_l2 = relative_l2;
-  approx_metrics->relative_linf = relative_linf;
-   
-}
-
-
-unsigned int getFileRank(std::string file_path){
-
-  char file_name[100]; // Assuming no file names greater than 100 chars
-  strcpy(file_name, file_path.c_str());
-  
-  char* pch = strtok(file_name, "_");
-  char* last_pch;
-  while(pch != NULL){
-    last_pch = pch;   
-    pch = strtok(NULL, "_");  
-  }
-
-  printf("NOTE: ****** last_pch = %s \n", last_pch);
-
-  size_t sz;
-  int rank = std::stoi(last_pch, &sz);
-  
-  return rank + 1; // NOTE: Adding 1 to start ranks with '1' 
-}
-
-  
-  
-void InsertApproxInfo::loadMetricsFromFile(std::string dir_path, std::string file_path, std::string category){
-
-  std::string full_path = dir_path + "/" + file_path;
-  printf("full_path = %s \n", full_path.c_str());
-  std::ifstream infile(full_path.c_str());
-  std::string line;
-
-  unsigned int it_count = 0;
-  while(std::getline(infile, line)){
-
-    // Skip first line with confidence information
-    if(it_count > 0){
-      std::vector<float> approx_values;
-      ApproxMetrics* approx_metrics = new ApproxMetrics;
-      readApproxValues(line, approx_metrics);
-      
-      approx_metrics->category = category;
-      unsigned int rank = getFileRank(file_path);
-      approx_metrics->rank = rank; 
-
-      std::string unique_op_name = approx_metrics->op_name + std::to_string(it_count);
-      operation_metrics[unique_op_name].push_back(approx_metrics);
-      printf("\n ** unique_op_name = %s \n", unique_op_name.c_str());     
-    }
-    
-    it_count++;
-  }
-  
-}
-
-
-  
-void InsertApproxInfo::loadMetricsFromDir(std::string dir_path, std::string category){
-
-  struct dirent* entry;
-  dir_path = dir_path + category;
-
-  DIR* dir = opendir(dir_path.c_str());
-  if(dir == NULL){
-    printf("Directory %s not found . Aborting ... \n\n ", dir_path.c_str());
-    abort();
-  }
-
-  while((entry = readdir(dir)) != NULL){
-    printf("f_name = %s \n", entry->d_name);
-    std::string f_name = entry->d_name;
-    loadMetricsFromFile(dir_path, f_name, category);
-  }
-}
-
-  
-  
-void InsertApproxInfo::loadTrainedApproxMetrics(std::string dir_path){
-  
-  std::string root_path = dir_path + "/high_confidence/";
-  loadMetricsFromDir(root_path, "linear");
-  loadMetricsFromDir(root_path, "log");
-  loadMetricsFromDir(root_path, "quad");
-}
-
-  
-/*** Methods of InPlaceDFGAnalysis ***/
-void InsertApproxInfo::run(std::string dir_path) {
-
-  loadTrainedApproxMetrics(dir_path);
-
-  errs() << "\n NOTE: ApproxInfo INSERTION TRANSFORM \n";
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
-
-  // Iterate over all the DFGs
-  // Analyse the edges for parameters that are valid to be used in place
-  for (auto rootNode: Roots) {
-    //ATVisitor->visit(rootNode);
-   this->visit(rootNode);
-  }
-
-  //delete ATVisitor;
-  return;
-}
-
-/*** Analysis of internal node ***/
-void InsertApproxInfo::codeGen(DFInternalNode* N) {
-  DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n");
-}
-
-/*** Analysis of leaf node ***/
-void InsertApproxInfo::codeGen(DFLeafNode* N) {
-  DEBUG(errs() << "Analysing Node: " << N->getFuncPointer()->getName() << "\n");
-
-  // Skip code generation if it is a dummy node
-  if(N->isDummyNode()) {
-    DEBUG(errs() << "Skipping dummy node\n");
-    return;
-  }
-
-  // Abort code generation if it is an allocation node
-  if(N->isAllocationNode()) {
-    assert(false && "Allocation Node not expected in ApproxHPVM");
-    return;
-  }
-  
-  Function *F = N->getFuncPointer();
-  Module* M = F->getParent();
-  std::vector<IntrinsicInst *> IItoRemove;
-  
-
-  /**** Adding operand bundles for each tensor operation in the HPVM DFG Leaf Node ****/
-  for (inst_iterator i = inst_begin(F), e = inst_end(F); i != e; ++i) {
-    Instruction *I = &(*i);
-    errs()<<*I<<"\n";
-
-
-    if (BuildDFG::isViscIntrinsic(I)) {
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
-      assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")
-        && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
-
-      std::string intrinsic_id = std::string(II->getCalledFunction()->getName().data());
-      std::string runtime_func_name = intrinsics_map[intrinsic_id];
-      std::string unique_name = runtime_func_name + std::to_string(currentID);
-      printf("\n ---- unique_name = %s \n ", unique_name.c_str());
-      std::vector<ApproxMetrics*> approx_metrics;
-      if(operation_metrics.find(unique_name) != operation_metrics.end()){
-         approx_metrics = operation_metrics[unique_name];
-      }
-      else{
-	errs()<<"Intrinsic Name NOT found in the map - Unexpected Error. Aborting ... \n\n";
-        abort();
-      }
-      
-      
-      unsigned int num_configs = approx_metrics.size();
-      std::vector<OperandBundleDef> conf_bundles;
-      for(unsigned int i = 0; i < num_configs; i++){
-	std::vector<Value*> norm_vals;
-
-	norm_vals.push_back(category_str);
-	Constant* categoryConst = ConstantDataArray::getString(M->getContext(), StringRef(approx_metrics[i]->category.c_str()), true);
-        GlobalVariable* category_value = new GlobalVariable(*M, categoryConst->getType(), true,
-							    GlobalValue::ExternalLinkage, categoryConst, "");
-	norm_vals.push_back(category_value);
-
-	norm_vals.push_back(rank_str);
-	Constant* constIntVal = ConstantInt::get(Type::getInt32Ty(M->getContext()), approx_metrics[i]->rank);
-	norm_vals.push_back(constIntVal);
-
-	// Adding mean l-norm metrics
-	norm_vals.push_back(mean_l1_str);
-	Constant* constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_l1);
-	norm_vals.push_back(constFPVal);
-
-	norm_vals.push_back(mean_l2_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_l2);
-	norm_vals.push_back(constFPVal);
-
-	norm_vals.push_back(mean_linf_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->mean_linf);
-	norm_vals.push_back(constFPVal);
-
-        // Relative l-norm Metrics
-	norm_vals.push_back(rel_l1_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_l1);
-	norm_vals.push_back(constFPVal);
-
-	norm_vals.push_back(rel_l2_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_l2);
-	norm_vals.push_back(constFPVal);
-
-	norm_vals.push_back(rel_linf_str);
-	constFPVal = ConstantFP::get(Type::getDoubleTy(M->getContext()), approx_metrics[i]->relative_linf);
-	norm_vals.push_back(constFPVal);
-
-
-	std::string config_name = "config_" + std::to_string(i+1);
-	OperandBundleDef norm_bundle(config_name, norm_vals);
- 
-	conf_bundles.push_back(norm_bundle);
-      }
-
-      ArrayRef<OperandBundleDef> bundle_arr(conf_bundles);
-
-      /*** Creating new Intrinsic call with Operand Bundles attached **/
-      Function* calledFunction = II->getCalledFunction();
-      unsigned num_args = II->getNumArgOperands();
-      std::vector<Value*> args;
-      for(unsigned i = 0; i < num_args; i++){
-        Value* argValue = II->getArgOperand(i);
-	args.push_back(argValue);
-      }
-
-      CallInst* CI = CallInst::Create(calledFunction,
-                 		      args, bundle_arr, "", II);
-
-      errs()<<"NOTE: New CallInst = "<<*CI<<"\n";
-      
-      II->replaceAllUsesWith(CI);
-      // Mark to remove at the end
-      IItoRemove.push_back(II);
-      
-      // Increment counter of op processed
-      currentID++;
-    }
-  }
-
-
-  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(),
-       re = IItoRemove.rend(); ri != re; ++ri) {
-    DEBUG(errs() << "Erasing: " << **ri << "\n");
-    errs() << "Erasing: " << **ri << "\n";
-    (*ri)->eraseFromParent();
-  }
-
-  
-}
-
-char InsertApproxInfoWrapperPass::ID = 0;
-static RegisterPass<InsertApproxInfoWrapperPass> X("insert-approxinfo",
-  "Pass to add approximation information (l-norm metrics) in the ApproxHPVM DFG",
-  false /* does not modify the CFG */,
-  false /* not transformation, just analysis */);
-
-
-
-
-  
-} // End of namespace
-
diff --git a/lib/InsertApproxInfo/LLVMBuild.txt b/lib/InsertApproxInfo/LLVMBuild.txt
deleted file mode 100644
index e9cf5afd4a307c1bd985238929ef06d89215f3ab..0000000000000000000000000000000000000000
--- a/lib/InsertApproxInfo/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/LocalMem/LLVMBuild.txt ------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = InsertApproxInfo
-parent = Transforms
diff --git a/lib/MergeDFN/CMakeLists.txt b/lib/MergeDFN/CMakeLists.txt
deleted file mode 100644
index 30e7330d0ccbeb2e508dea79ac22b50239d59f51..0000000000000000000000000000000000000000
--- a/lib/MergeDFN/CMakeLists.txt
+++ /dev/null
@@ -1,12 +0,0 @@
-if(WIN32 OR CYGWIN)
-  set(LLVM_LINK_COMPONENTS Core Support)
-endif()
-
-add_llvm_loadable_module( LLVMMergeDFN
-  MergeDFN.cpp
-
-  DEPENDS
-  intrinsics_gen
-  PLUGIN_TOOL
-  opt
-  )
diff --git a/lib/MergeDFN/LLVMBuild.txt b/lib/MergeDFN/LLVMBuild.txt
deleted file mode 100644
index 099486e6c3196d4a20ac851a0a3563fde4a5d05d..0000000000000000000000000000000000000000
--- a/lib/MergeDFN/LLVMBuild.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-;===- ./lib/Transforms/MergeDFN/LLVMBuild.txt ------------------*- Conf -*--===;
-;
-;                     The LLVM Compiler Infrastructure
-;
-; This file is distributed under the University of Illinois Open Source
-; License. See LICENSE.TXT for details.
-;
-;===------------------------------------------------------------------------===;
-;
-; This is an LLVMBuild description file for the components in this subdirectory.
-;
-; For more information on the LLVMBuild system, please see:
-;
-;   http://llvm.org/docs/LLVMBuild.html
-;
-;===------------------------------------------------------------------------===;
-
-[component_0]
-type = Library
-name = MergeDFN
-parent = Transforms
diff --git a/lib/MergeDFN/MergeDFN.cpp b/lib/MergeDFN/MergeDFN.cpp
deleted file mode 100644
index 35e70e35cef854c21cbcc65bb84a4bddc26727a2..0000000000000000000000000000000000000000
--- a/lib/MergeDFN/MergeDFN.cpp
+++ /dev/null
@@ -1,2338 +0,0 @@
-//=== DFG2LLVM_NVPTX.cpp ===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-
-#define ENABLE_ASSERTS
-
-#define DEBUG_TYPE "MergeDFN"
-#include "llvm/IR/DataLayout.h"
-#include "llvm/IR/IRBuilder.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/IR/InstIterator.h"
-#include "llvm/Transforms/Utils/ValueMapper.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Cloning.h"
-#include "llvm/IRReader/IRReader.h"
-#include "llvm/Linker/Linker.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/IR/Attributes.h"
-#include "llvm/SupportVISC/VISCTimer.h"
-#include "llvm/SupportVISC/DFG2LLVM.h"
-
-#include <sstream>
-
-using namespace llvm;
-using namespace builddfg;
-using namespace dfg2llvm;
-
-static cl::opt<std::string> Node1Name("mc1",
-                                 cl::init(""),
-                                 cl::Hidden,
-                                 cl::desc("First node candidate for merge"));
-static cl::opt<std::string> Node2Name("mc2",
-                                 cl::init(""),
-                                 cl::Hidden,
-                                 cl::desc("Second node candidate for merge"));
-
-namespace {
-// Helper class declarations
-
-// Helper function declarations
-
-// MergeDFN
-struct MergeDFN : public ModulePass {
-  static char ID; // Pass identification, replacement for typeid
-  MergeDFN() : ModulePass(ID) {}
-
-private:
-  // Member variables
-
-  // Functions
-
-public:
-  // Functions
-  bool runOnModule(Module &M);
-
-  void getAnalysisUsage(AnalysisUsage &AU) const {
-    AU.addRequired<BuildDFG>();
-    AU.addPreserved<BuildDFG>(); //TODO: Check
-  }
-
-};
-
-// Visitor for Code generation traversal (tree traversal for now)
-class MergeTraversal : public DFNodeVisitor {
-
-private:
-  //Member variables
-  Module &M;
-  BuildDFG &DFG;
-  DFNode *n1;
-  DFNode *n2;
-  DFNode *m;
-
-  //Functions
-  void testNodeName(DFNode* N);
-
-public:
-  // Constructor
-  MergeTraversal(Module &_M, BuildDFG &_DFG) : M(_M), DFG(_DFG) { 
-    n1 = NULL;
-    n2 = NULL;
-    m = NULL;
-  }
-
-  virtual void visit(DFInternalNode* N) {
-    // Follows a bottom-up approach to find the nodes.
-    for(DFGraph::children_iterator i = N->getChildGraph()->begin(),
-        e = N->getChildGraph()->end(); i != e; ++i) {
-      DFNode* child = *i;
-      child->applyDFNodeVisitor(*this);
-    }
-
-    DEBUG(errs() << "Testing Node (I) - " << N->getFuncPointer()->getName() << "\n");
-    testNodeName(N);
-    DEBUG(errs() << "\tDONE - " << "\n");
-
-  }
-
-  virtual void visit(DFLeafNode* N) {
-    DEBUG(errs() << "Testing Node (L) - " << N->getFuncPointer()->getName() << "\n");
-    testNodeName(N);
-    DEBUG(errs() << "DONE" << "\n");
-  }
-
-  bool isValidMergeChoise();
-
-  void mergeDFN();
-
-};
-
-//===--------------------- Helper Function Declarations --------------===//
-IntrinsicInst* createIdenticalCreateNodeWithDifferentFunction(Function* F,
-                                                            IntrinsicInst* II);
-IntrinsicInst* createNewCreateNodeBasedOn(Function* F, IntrinsicInst* II,
-                                                              Function* Fargs);
-IntrinsicInst* createIdenticalCreateEdgeWithDifferentPort(IntrinsicInst* II,
-unsigned port, bool srcport);
-IntrinsicInst* createIdenticalCreateEdgeWithDifferentNode(IntrinsicInst* II,
-IntrinsicInst* IInode, bool srcnode);
-IntrinsicInst* createIdenticalBindInputWithDifferentNode(IntrinsicInst* II,
-                                                         IntrinsicInst* IInode);
-IntrinsicInst* createIdenticalBindInputWithDifferentPort(IntrinsicInst* II,
-                                                         unsigned port,
-                                                         bool srcport);
-IntrinsicInst* createIdenticalBindOutputWithDifferentNode(IntrinsicInst* II,
-                                                         IntrinsicInst* IInode);
-IntrinsicInst* createIdenticalBindOutputWithDifferentPort(IntrinsicInst* II,
-                                                          unsigned port,
-                                                          bool srcport);
-void updateUsesOfCreateNodeInParent(IntrinsicInst* II1,
-                                    IntrinsicInst* II2,
-                                    IntrinsicInst* IInew,
-                                    std::map<unsigned, unsigned> InMap,
-                                    std::map<unsigned, unsigned> OutMap,
-                                    std::vector<DFEdge*> &DFEdgestoRemove,
-                                    BuildDFG &DFG);
-bool isIncomingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn);
-bool isOutgoingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn);
-bool hasSuccessor(DFNode* N1, DFNode* N2);
-bool hasImmediateSuccesssor(DFNode* N1, DFNode* N2);
-bool checkEdgesType(DFNode* N1, DFNode* N2);
-static void createArgTypes(DFNode* N1, DFNode* N2, std::vector<Type*> &ArgTypes);
-void getChildNodeSplit(DFInternalNode* N,
-                       std::vector<DFNode*> &AllocationNodes,
-                       std::vector<DFNode*> &ComputeNodes);
-void buildInputAndOutputMaps(DFNode* N1, DFNode* N2,
-                             std::map<unsigned, unsigned> &N1InMap,
-                             std::map<unsigned, unsigned> &N1OutMap,
-                             std::map<unsigned, unsigned> &N2InMap,
-                             std::map<unsigned, unsigned> &N2OutMap);
-void buildInAndOutEdgeMaps(DFNode* N1, DFNode* N2,
-                           std::map<unsigned, unsigned> &N1InMap,
-                           std::map<unsigned, unsigned> &N1OutMap,
-                           std::map<unsigned, unsigned> &N2InMap,
-                           std::map<unsigned, unsigned> &N2OutMap);
-static StructType* createReturnType(DFNode* N1, DFNode* N2);
-static void copyAttrList(DFNode* N1, DFNode* N2, Function* F);
-static void copyArgumentNames(DFNode* N1, DFNode* N2, Function* F);
-void createShiftMap(Function* F, unsigned fromPos, unsigned num,
-                    unsigned shift, std::vector<unsigned> &ShiftMap);
-void shiftArgs(Function* F, unsigned fromPos, unsigned num,
-               unsigned shift, std::vector<unsigned> &ShiftMap);
-static Function* createEmptyDFNodeFunction(DFNode* N1, DFNode* N2, Module &M);
-static Function* createLeafDFNodeFunction(DFNode* N1, DFNode* N2, Module &M,
-                                          unsigned numOfN1AllocArgs,
-                                          unsigned posOfN1AllocArgs,
-                                          unsigned numOfN2AllocArgs);
-static Function* createInternalDFNodeFunction(DFNode* N1, DFNode* N1an,
-                                              DFNode* N1cn, DFNode* N2,
-                                              DFNode* N2an, DFNode* N2cn,
-                                              Function* Fa, Function* Fc,
-                                              Module &M,
-                                              unsigned numOfN1AllocArgs,
-                                              unsigned posOfN1AllocArgs,
-                                              unsigned numOfN2AllocArgs);
-void createNewInternalNodeIntrinsics(DFNode* N1,
-                                     DFNode* N2,
-                                     DFNode* N1a,
-                                     DFNode* N1c,
-                                     DFNode* N2a,
-                                     DFNode* N2c,
-                                     IntrinsicInst* IInewa,
-                                     IntrinsicInst* IInewc,
-                                     Function* Fa, //FIXME: Unused
-                                     Function* Fc,
-                        std::vector<IntrinsicInst*> &IntrinsicInstructionsToAdd,
-                        std::vector<IntrinsicInst*> &IntermediateInstructions);
-Argument* getFunctionArgumentAt(Function* F, unsigned i);
-void removeUnnecessaryInputEdges(DFNode* N, DFNode* N1,
-                                 unsigned numOfN1AllocArgs,
-                                 unsigned numOfN2AllocArgs);
-void deleteInternalNodeFunction(DFNode* N, BuildDFG &DFG);
-static visc::Target getPreferredTarget(Function* F);
-static void addHint(Function* F, visc::Target T);
-static void removeHint(Function* F, visc::Target T);
-std::string getTestModuleName(Module &M);
-
-
-//===--------------------- MergeDFN Outlined Functions --------------===//
-void MergeTraversal::testNodeName(DFNode* N) {
-
-  if (N->getFuncPointer()->getName() == Node1Name) {
-  //if (N->getFuncPointer()->getName() ==  "WrapperDilate_cloned") {
-  //if (N->getFuncPointer()->getName() ==  "WrapperDilate_cloned_WrapperErode_cloned") {
-  //if (N->getFuncPointer()->getName() ==  "WrapperHorizontal_cloned") {
-  //if (N->getFuncPointer()->getName() ==  "WrapperHorizontal_cloned_WrapperVertical_cloned") {
-    n1 = N;
-  }
-  else if (N->getFuncPointer()->getName() == Node2Name) {
-  //else if (N->getFuncPointer()->getName() == "WrapperErode_cloned") {
-  //else if (N->getFuncPointer()->getName() == "WrapperLincomb_cloned") {
-  //else if (N->getFuncPointer()->getName() == "WrapperVertical_cloned") {
-  //else if (N->getFuncPointer()->getName() == "WrapperSquareRoot_cloned") {
-    n2 = N;
-  }
-}
-
-//TODO: use the topological sort to find merge candidates
-bool MergeTraversal::isValidMergeChoise() {
-  if (!n1 || !n2)
-    return false;
-
-  // Check that n1 and n2 have the same
-  // - parent
-  // - hint
-  // - number and size of dimensions of dynamic instances
-  bool valid = (n1->getParent() == n2->getParent()) &&
-               (getPreferredTarget(n1->getFuncPointer()) ==
-                                    getPreferredTarget(n2->getFuncPointer())) &&
-               (n1->getNumOfDim() == n2->getNumOfDim());
-
-  std::vector<Value*> n1dim = n1->getDimLimits();
-  std::vector<Value*> n2dim = n2->getDimLimits();
-  for (unsigned i = 0; (i < n1dim.size()) && valid ; i++)
-    valid = valid && (n1dim[i] == n2dim[i]);
-
-  // n1 should not be a successor of n2
-  valid = valid && !hasSuccessor(n2, n1);
-  // n2 should not be a successor of n1, other than an immediate successor
-  valid = valid && (!hasSuccessor(n1, n2) || hasImmediateSuccesssor(n1, n2));
-
-  if (!valid)
-    return false;
-
-  // Now, check specifically for one or two level cases
-  if (dyn_cast<DFLeafNode>(n1) && dyn_cast<DFLeafNode>(n1)) {
-    // For now, only allow one to one edges between them
-    return checkEdgesType(n1, n2);
-  }
-
-  //At this point, at least one of them is internal node
-
-  DFInternalNode* n1cast = dyn_cast<DFInternalNode>(n1);
-  DFInternalNode* n2cast = dyn_cast<DFInternalNode>(n2);
-
-  // If not both of them are internal nodes, it is not a valid merging
-  if (!n1cast || !n2cast)
-    return false;
-
-  // At this point, they are both internal nodes
-  // For internal nodes, we only allow one-to-one edges
-  valid = valid && checkEdgesType(n1->getParent(), n2->getParent()); // FIXME: n1 and n2?
-
-  // We need to check that they have the appropriate internal structure
-  std::vector<DFNode*> AllocNodes1, ComputeNodes1, AllocNodes2, ComputeNodes2;
-  getChildNodeSplit(n1cast, AllocNodes1, ComputeNodes1);
-  getChildNodeSplit(n2cast, AllocNodes2, ComputeNodes2);
-
-  // There must be at most a single allocation node within each one of them
-  // There must be exactly one compute node within each one of them
-  valid = valid &&
-    (AllocNodes1.size() <= 1) &&
-    (AllocNodes2.size() <= 1) &&
-    (ComputeNodes1.size() == 1) &&
-    (ComputeNodes2.size() == 1);
-
-  // The compute nodes must be leaf nodes with the same number and size of
-  // dimensions of dynamic instances
-  DFLeafNode* n1cn = dyn_cast<DFLeafNode>(ComputeNodes1[0]);
-  DFLeafNode* n2cn = dyn_cast<DFLeafNode>(ComputeNodes2[0]);
-  if (!n1cn || !n2cn)
-    return false;
-
-  errs() << "Checking if the sizes are same for internal nodes\n";
-
-  valid = valid && (n1cn->getNumOfDim() == n2cn->getNumOfDim());
-  std::vector<Value*> n1cndim = n1cn->getDimLimits();
-  std::vector<Value*> n2cndim = n2cn->getDimLimits();
-
-  for (unsigned i = 0; (i < n1cndim.size()) && valid ; i++) {
-    // These cannot fail, these valaues have been passed as arguments
-    Argument* n1arg = cast<Argument>(n1cndim[i]);
-    Argument* n2arg = cast<Argument>(n2cndim[i]);
-    unsigned n1argPos = n1arg->getArgNo();
-    unsigned n2argPos = n2arg->getArgNo();
-    // These values are coming from bind intrinsics, thus from the parent node
-    // The position of the argument is the same as the inPort of the incoming
-    // edge of their parent, n1 and n2.
-    DFEdge* n1argEdge = n1->getInDFEdgeAt(n1argPos);
-    DFEdge* n2argEdge = n2->getInDFEdgeAt(n2argPos);
-    // Get source position and node of these edges
-    unsigned n1SrcPos = n1argEdge->getSourcePosition();
-    DFNode* n1SrcNode = n1argEdge->getSourceDF();
-    unsigned n2SrcPos = n2argEdge->getSourcePosition();
-    DFNode* n2SrcNode = n2argEdge->getSourceDF();
-    valid = valid && (n1SrcPos == n2SrcPos) && (n1SrcNode == n2SrcNode);
-  }
-
-  // We must also make sure that any edge that is incoming to the allocation
-  // node of n2 is not from n1
-  if (AllocNodes2.size() == 1) {
-    DFNode* n2an = AllocNodes2[0];
-    unsigned inPort = 0;
-    for (DFNode::const_indfedge_iterator ei = n2an->indfedge_begin(),
-         ee = n2an->indfedge_end(); (ei != ee) && valid ; ei++, inPort++)
-      if (n2an->getExtendedInDFEdgeAt(inPort)->getSourceDF() == ComputeNodes1[0])
-        return false;
-  }
-
-  return valid;
-}
-
-void MergeTraversal::mergeDFN() {
-
-  Function* Fm;
-
-  if (dyn_cast<DFLeafNode>(n1)) { // One level node merging,
-    // n1 and n2 are leaf nodes
-    // Simply create the merged leaf function (with the calls)
-    Fm = createLeafDFNodeFunction(n1, n2, M, 0, 0, 0);
-    addHint(Fm, getPreferredTarget(n1->getFuncPointer()));
-    removeHint(n1->getFuncPointer(), getPreferredTarget(n1->getFuncPointer()));
-    removeHint(n2->getFuncPointer(), getPreferredTarget(n2->getFuncPointer()));
-  } else { // Two level node merging, n1 and n2 are internal nodes
-    // Correct form of internal nodes has been verified in isValidMerge
-    // Both n1 and n2 have at most two children:
-    // a compute node and maybe an allocation node
-    std::vector<DFNode*> AllocationNodes;
-    std::vector<DFNode*> ComputeNodes;
-
-    getChildNodeSplit(cast<DFInternalNode>(n1), AllocationNodes, ComputeNodes);
-    DFLeafNode* N1ComputeNode = cast<DFLeafNode>(ComputeNodes[0]);
-    DFLeafNode* N1AllocationNode =
-      (AllocationNodes.size() == 1) ? cast<DFLeafNode>(AllocationNodes[0]): NULL;
-    AllocationNodes.clear();
-    ComputeNodes.clear();
-    getChildNodeSplit(cast<DFInternalNode>(n2), AllocationNodes, ComputeNodes);
-    DFLeafNode* N2ComputeNode = cast<DFLeafNode>(ComputeNodes[0]);
-    DFLeafNode* N2AllocationNode =
-      (AllocationNodes.size() == 1) ? cast<DFLeafNode>(AllocationNodes[0]): NULL;
-
-    Function* Falloc = NULL;
-    if (N1AllocationNode && N2AllocationNode)
-      Falloc = createLeafDFNodeFunction(N1AllocationNode,
-                                        N2AllocationNode,
-                                        M, 0, 0, 0);
-    else if (N1AllocationNode)
-      Falloc = N1AllocationNode->getFuncPointer();
-    else if (N2AllocationNode)
-      Falloc = N2AllocationNode->getFuncPointer();
-
-    unsigned numOfN1AllocArgs = 0;
-    unsigned posOfN1AllocArgs = 0;
-    unsigned numOfN2AllocArgs = 0;
-    if (N1AllocationNode) {
-      StructType* F1RetTy =
-        cast<StructType>(N1AllocationNode->getFuncPointer()->getReturnType());
-        numOfN1AllocArgs = F1RetTy->getNumElements();
-        // The position where the allocation node's arguments of n1 alloc go in
-        // the merged function's parameter list is the same as it was in n1
-        // compute function, because all the incoming edges to n1 do not change.
-        // We need this information to shift the allocation parameters to the
-        // end of the merged function's parameter list
-        posOfN1AllocArgs =
-          N1AllocationNode->getOutDFEdgeAt(0)->getDestPosition();
-    }
-    if (N2AllocationNode) {
-      StructType* F2RetTy =
-        cast<StructType>(N2AllocationNode->getFuncPointer()->getReturnType());
-        numOfN2AllocArgs = F2RetTy->getNumElements();
-    }
-
-    errs () << "Working on leaf functions ...\n";
-    Function* Fcompute = 
-      createLeafDFNodeFunction(N1ComputeNode,
-                               N2ComputeNode,
-                               M, numOfN1AllocArgs,
-                               posOfN1AllocArgs, numOfN2AllocArgs);
-    addHint(Fcompute, getPreferredTarget(N1ComputeNode->getFuncPointer()));
-    removeHint(N1ComputeNode->getFuncPointer(),
-               getPreferredTarget(N1ComputeNode->getFuncPointer()));
-    removeHint(N2ComputeNode->getFuncPointer(),
-               getPreferredTarget(N2ComputeNode->getFuncPointer()));
-
-    errs () << "Leaf functions merged ...\n";
-    Fm = createInternalDFNodeFunction(n1, N1AllocationNode, N1ComputeNode,
-                                      n2, N2AllocationNode, N2ComputeNode,
-                                      Falloc, Fcompute,
-                                      M, numOfN1AllocArgs,
-                                      posOfN1AllocArgs, numOfN2AllocArgs);
-    addHint(Fm, getPreferredTarget(n1->getFuncPointer()));
-    removeHint(n1->getFuncPointer(), getPreferredTarget(n1->getFuncPointer()));
-    removeHint(n2->getFuncPointer(), getPreferredTarget(n2->getFuncPointer()));
-  }
-  errs () << "Leaf functions merged and Internal Function merged ...\n";
-  // This is before any code generation passes -> no genfunc
-
-  // FIX PARENT DFNode'S FUNCTION
-  DFInternalNode* ParentNode = n1->getParent();
-
-  // Find createNode intrinsics for initial nodes
-  IntrinsicInst* II1 = n1->getInstruction();
-  IntrinsicInst* II2 = n2->getInstruction();
-
-  // Generate createNode Intrinsic for new node and insert it
-  IntrinsicInst* CreateNodeII =
-    createIdenticalCreateNodeWithDifferentFunction(Fm, II1);
-
-  // It needs to be inserted before either of the two.
-  // Find which one is first and add the new intrinsic before it
-  IntrinsicInst* IIfirst = NULL;
-  for (inst_iterator ib = inst_begin(ParentNode->getFuncPointer()),
-       ie = inst_end(ParentNode->getFuncPointer());
-       (ib != ie) && !IIfirst ; ++ib) {
-    Instruction* I = &*ib; // Grab pointer to Instruction
-    if ((I == II1) || (I == II2)) {
-      IIfirst = cast<IntrinsicInst>(I);
-    }
-  }
-  CreateNodeII->insertBefore(IIfirst);
-
-/* The following is an alternative to using the BuildDFG interface. It only   *
- * creates this single node, not cnotinuing with the graph contained, thus    *
- * will not build the graph of the node if it is internal node. Instead, I    *
- * use the call DFG.handleCreateNode                                          */
-
-/*
-// -------------------------------------------------------------------------- //
-// Updating the graph directly
-  // Create the new node and add it to the graph
-  DFLeafNode* mergeDFNode = DFLeafNode::Create(CreateNodeII, Fm,
-                n1->getTargetHint(),
-                ParentNode,
-                n1->getNumOfDim(),
-                n1->getDimLimits());
-  //Done Later: fix rank of mergeDFNode and successors, after edges are fixed
-  //  mergeDFNode->setRank((n1->getRank() > n2->getRank()) ?
-  //                       (n1->getRank()) : (n2->getRank()) );
-
-  ParentNode->addChildToDFGraph(mergeDFNode);
-// -------------------------------------------------------------------------- //
-*/
-
-// -------------------------------------------------------------------------- //
-// Updating the BuildDFG result
-// remove the two nodes from mapping, add the new one
-  errs () << "Updating intrinsics\n";
-  DFG.removeElementFromHandleToDFNodeMap(II1);
-  DFG.removeElementFromHandleToDFNodeMap(II2);
-//  DFG.addElementToHandleToDFNodeMap(CreateNodeII, mergeDFNode);
-  DFG.handleCreateNode(ParentNode, CreateNodeII);
-  DFNode* mergeDFNode = DFG.getHandleToDFNodeMap()[CreateNodeII];
-
-// -------------------------------------------------------------------------- //
-
-  // Need to update every use of the createNode in the parent node function
-  // -- that would be in create edge and bind
-  std::map<unsigned, unsigned> N1InMap;
-  std::map<unsigned, unsigned> N1OutMap;
-  std::map<unsigned, unsigned> N2InMap;
-  std::map<unsigned, unsigned> N2OutMap;
-  // These maps map the old location of an argument/output (to its function's
-  // parameter list/out struct) to the new, after edges removed and functions
-  // merged
-  buildInputAndOutputMaps(n1, n2, N1InMap, N1OutMap, N2InMap, N2OutMap);
-
-  // Edges from n1 to n2 need to be deleted.
-  // They are placed here for deletion at the end.
-  std::vector<DFEdge*> DFEdgestoRemove;
-
-  // Update uses of createNode - that would be createEdge and bind intrinsics -
-  // to use the new createNode intrinsic
-  updateUsesOfCreateNodeInParent(II1, II2, CreateNodeII, N1InMap, N1OutMap,
-                                 DFEdgestoRemove, DFG);
-  updateUsesOfCreateNodeInParent(II2, II1, CreateNodeII, N2InMap, N2OutMap,
-                                 DFEdgestoRemove, DFG);
-
-  // Both II1 and II2 have no uses left. It is safe to remove them.
-  errs() << "Erasing: " << *II1 << "\n";
-  II1->eraseFromParent();
-  errs() << "Erasing: " << *II2 << "\n";
-  II2->eraseFromParent();
-
-// -------------------------------------------------------------------------- //
-// Updating the graph directly
-
-  // Update
-  // - dataflow edges
-  // - successor lists
-  // - incoming and outgoing edge lists
-  // The edges are updated directly, therefore in the DFGraph DFEdgeList as well
-
-  // For n1
-  for (DFNode::indfedge_iterator indfedgeI = n1->indfedge_begin(),
-       indfedgeE = n1->indfedge_end(); indfedgeI != indfedgeE; indfedgeI++) {
-    DFEdge* E = *indfedgeI;
-    // Incoming edges are retargeted to new node in graph
-    E->setDestDF(mergeDFNode);
-    // Incoming edges are added to the incoming edge list
-    // ( no need to add them in the outgoing edge list of source nodes,
-    // they are already there )
-    mergeDFNode->addInDFEdge(E);
-    // Merge node is added to the successor list of the sources of the edges
-    E->getSourceDF()->addSuccessor(mergeDFNode);
-  }
-
-  for (DFNode::outdfedge_iterator outdfedgeI = n1->outdfedge_begin(),
-       outdfedgeE = n1->outdfedge_end(); outdfedgeI != outdfedgeE; outdfedgeI++) {
-    DFEdge* E = *outdfedgeI;
-    // Outgoing edges to n2 are deleted
-    if (E->getDestDF() == n2)  {
-      ParentNode->getChildGraph()->deleteEdge(E);
-      continue;
-    }
-
-    // Outgoing edges are retargeted to start from the new node in graph
-    E->setSourceDF(mergeDFNode);
-    // Outgoing edges' source port is updated
-    E->setSourcePosition(N1OutMap[E->getSourcePosition()]);
-    // Outgoing edges are added to the outgoing edge list
-    // ( no need to add them in the incoming edge list of destination nodes,
-    // they are already there )
-    mergeDFNode->addOutDFEdge(E);
-    // The destination node is added to the successor list of merge node
-    mergeDFNode->addSuccessor(E->getDestDF());
-  }
-
-  // For n2
-  for (DFNode::indfedge_iterator indfedgeI = n2->indfedge_begin(),
-       indfedgeE = n2->indfedge_end(); indfedgeI != indfedgeE; indfedgeI++) {
-    DFEdge* E = *indfedgeI;
-    // Incoming edges from n1 have already been removed from the graph - ignore
-    if (E->getSourceDF() == n1) {
-      DEBUG(errs() << "Edges between n1-n2 have already been removed from graph\n");
-    }
-
-    // Incoming edges are retargeted to new node in graph
-    E->setDestDF(mergeDFNode);
-    // Incoming edges' destination port is updated
-    E->setDestPosition(N2InMap[E->getDestPosition()]);
-    // Incoming edges are added to the incoming edge list
-    // ( no need to add them in the outgoing edge list of source nodes,
-    // they are already there )
-    mergeDFNode->addInDFEdge(E);
-    // Merge node is added to the successor list of the sources of the edges
-    E->getSourceDF()->addSuccessor(mergeDFNode);
-  }
-
-  for (DFNode::outdfedge_iterator outdfedgeI = n2->outdfedge_begin(),
-       outdfedgeE = n2->outdfedge_end(); outdfedgeI != outdfedgeE; outdfedgeI++) {
-    DFEdge* E = *outdfedgeI;
-    // Outgoing edges are retargeted to start from the new node in graph
-    E->setSourceDF(mergeDFNode);
-    // Outgoing edges' source port is updated
-    E->setSourcePosition(N2OutMap[E->getSourcePosition()]);
-    // Outgoing edges are added to the outgoing edge list
-    // ( no need to add them in the incoming edge list of destination nodes,
-    // they are already there )
-    mergeDFNode->addOutDFEdge(E);
-    // The destination node is added to the successor list of merge node
-    mergeDFNode->addSuccessor(E->getDestDF());
-  }
-  
-
-// -------------------------------------------------------------------------- //
-
-
-// -------------------------------------------------------------------------- //
-// Updating the graph directly
-
-  // Compute rank of mergeDFNode and update rank of successors
-  mergeDFNode->setRank((n1->getRank() > n2->getRank()) ?
-                       (n1->getRank()) : (n2->getRank()) );
-
-  // Clear their incoming and outgoing edges vectors, and the successors list
-  n1->clearGraphElements();
-  n2->clearGraphElements();
-
-  // Clear them from the parent graph
-  ParentNode->removeChildFromDFGraph(n1);
-  ParentNode->removeChildFromDFGraph(n2);
-
-  /*
-  delete n1;
-  delete n2;
-  for (unsigned i = 0 ; i < DFEdgestoRemove.size(); i++)
-    delete DFEdgestoRemove[i];
-*/
-
-// -------------------------------------------------------------------------- //
-  errs() << "Removing similar arguments\n";
-  if (dyn_cast<DFLeafNode>(n1)) {
-    removeUnnecessaryInputEdges(mergeDFNode, n1, 0, 0);
-    // Erase old functions from module
-    n1->getFuncPointer()->replaceAllUsesWith(UndefValue::get(n1->getFuncPointer()->getType()));
-    n1->getFuncPointer()->eraseFromParent();
-    n2->getFuncPointer()->replaceAllUsesWith(UndefValue::get(n2->getFuncPointer()->getType()));
-    n2->getFuncPointer()->eraseFromParent();
-
-  } else {
-    std::vector<DFNode*> AllocationNodes;
-    std::vector<DFNode*> ComputeNodes;
-
-    // Get components of n1
-    getChildNodeSplit(cast<DFInternalNode>(n1), AllocationNodes, ComputeNodes);
-    DFLeafNode* N1ComputeNode = cast<DFLeafNode>(ComputeNodes[0]);
-    DFLeafNode* N1AllocationNode =
-      (AllocationNodes.size() == 1) ? cast<DFLeafNode>(AllocationNodes[0]): NULL;
-
-    AllocationNodes.clear();
-    ComputeNodes.clear();
-
-    // Get components of n2
-    getChildNodeSplit(cast<DFInternalNode>(n2), AllocationNodes, ComputeNodes);
-    DFLeafNode* N2AllocationNode =
-      (AllocationNodes.size() == 1) ? cast<DFLeafNode>(AllocationNodes[0]): NULL;
-    DFLeafNode* N2ComputeNode = cast<DFLeafNode>(ComputeNodes[0]);
-
-    AllocationNodes.clear();
-    ComputeNodes.clear();
-
-    // Get components of mergeDFNode
-    getChildNodeSplit(cast<DFInternalNode>(mergeDFNode), AllocationNodes,
-                      ComputeNodes);
-    DFLeafNode* ComputeNode = cast<DFLeafNode>(ComputeNodes[0]);
-
-    unsigned numOfN1AllocArgs = 0;
-    unsigned numOfN2AllocArgs = 0;
-    if (N1AllocationNode) {
-      StructType* F1RetTy =
-        cast<StructType>(N1AllocationNode->getFuncPointer()->getReturnType());
-        numOfN1AllocArgs = F1RetTy->getNumElements();
-    }
-    if (N2AllocationNode) {
-      StructType* F2RetTy =
-        cast<StructType>(N2AllocationNode->getFuncPointer()->getReturnType());
-        numOfN2AllocArgs = F2RetTy->getNumElements();
-    }
-
-    errs() << "Removing unnecessary input arguments\n";
-    removeUnnecessaryInputEdges(ComputeNode, N1ComputeNode, numOfN1AllocArgs,
-                                numOfN2AllocArgs);
-
-    N1ComputeNode->getFuncPointer()->replaceAllUsesWith(UndefValue::get(N1ComputeNode->getFuncPointer()->getType()));
-    N1ComputeNode->getFuncPointer()->eraseFromParent();
-    N2ComputeNode->getFuncPointer()->replaceAllUsesWith(UndefValue::get(N2ComputeNode->getFuncPointer()->getType()));
-    N2ComputeNode->getFuncPointer()->eraseFromParent();
-  }
-
-  errs() << "Deleting internal nodes\n";
-
-  deleteInternalNodeFunction(n1, DFG);
-  deleteInternalNodeFunction(n2, DFG);
-
-  errs() << "Returning\n";
-  return;
-}
-
-bool MergeDFN::runOnModule(Module &M) {
-  errs() << "\nMergeDFN PASS\n";
-
-  // Get the BuildDFG Analysis Results:
-  // - Dataflow graph
-  // - Maps from i8* handles to DFNode and DFEdge
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-
-  // DFInternalNode *Root = DFG.getRoot();
-  std::vector<DFInternalNode*> Roots = DFG.getRoots();
-
-  // Visitor for Code Generation Graph Traversal
-  MergeTraversal *MergeLookup = new MergeTraversal(M, DFG);
-
-  // Iterate over all the DFGs and produce code for each one of them
-  for (auto rootNode: Roots) {
-    // Initiate code generation for root DFNode
-    MergeLookup->visit(rootNode);
-  }
-
-  if (MergeLookup->isValidMergeChoise()) {
-    errs() << "Valid Merge Choise. Begin merging..\n";
-    DEBUG(errs() << "Valid Merge Choise. Begin merging..\n");
-    MergeLookup->mergeDFN();
-  } else {
-    errs() << "Not Valid Merge Choise. Abort merging.\n";
-    DEBUG(errs() << "Not Valid Merge Choise. Abort merging.\n");
-  }
-
-  delete MergeLookup;
-
-  return true;
-}
-
-/******************************************************************************
- *                              Helper functions                              *
- ******************************************************************************/
-
-// Creates a new createNode intrinsic, similar to II but with different
-// associated function F instead
-IntrinsicInst* createIdenticalCreateNodeWithDifferentFunction(Function* F,
-                                                            IntrinsicInst* II) {
-  Module* M = F->getParent();
-
-  // Find which createNode intrinsic we need to create
-  Function* CreateNodeF = Intrinsic::getDeclaration(M, II->getIntrinsicID());
-  Constant* Fp = ConstantExpr::getPointerCast(F,
-                                          Type::getInt8PtrTy(II->getContext()));
-
-  ArrayRef<Value*> CreateNodeArgs;
-  switch (II->getIntrinsicID()) {
-    case Intrinsic::visc_createNode:
-    {
-      CreateNodeArgs = ArrayRef<Value*>(Fp);
-      break;
-    }
-    case Intrinsic::visc_createNode1D:
-    {
-      Value* CreateNode1DArgs[] = {Fp, II->getArgOperand(1)};
-      CreateNodeArgs = ArrayRef<Value*>(CreateNode1DArgs, 2);
-      break;
-    }
-    case Intrinsic::visc_createNode2D:
-    {
-      Value* CreateNode2DArgs[] = {Fp, II->getArgOperand(1),
-                                       II->getArgOperand(2)};
-      CreateNodeArgs = ArrayRef<Value*>(CreateNode2DArgs, 3);
-      break;
-    }
-    case Intrinsic::visc_createNode3D:
-    {
-      Value* CreateNode3DArgs[] = {Fp, II->getArgOperand(1),
-                                       II->getArgOperand(2),
-                                       II->getArgOperand(3)};
-      CreateNodeArgs = ArrayRef<Value*>(CreateNode3DArgs, 4);
-      break;
-    }
-    default :
-      assert(false && "Unknown createNode intrinsic");
-      break;
-  }
-
-  CallInst* CI = CallInst::Create(CreateNodeF,
-                                  CreateNodeArgs,
-                                  F->getName()+".node");
-  IntrinsicInst* CreateNodeII = cast<IntrinsicInst>(CI);
-  return CreateNodeII;
-}
-
-// Creates a new createNode intrinsic based on II.
-// The new intrinsic has different associated function F instead. II is used to
-// determine the location (in the parameter list of function Fargs) where the
-// arguments of the new intrinsic can be found.
-IntrinsicInst* createNewCreateNodeBasedOn(Function* F, IntrinsicInst* II,
-                                                              Function* Fargs) {
-  Module* M = F->getParent();
-
-  // Find which createNode intrinsic we need to create
-  Function* CreateNodeF = Intrinsic::getDeclaration(M, II->getIntrinsicID());
-  Constant* Fp = ConstantExpr::getPointerCast(F,
-                                          Type::getInt8PtrTy(II->getContext()));
-
-  std::vector<Argument*> FArgList;
-  for (auto& arg: Fargs->getArgumentList()) {
-    FArgList.push_back(&arg);
-  }
-
-  ArrayRef<Value*> CreateNodeArgs;
-  switch (II->getIntrinsicID()) {
-    case Intrinsic::visc_createNode:
-    {
-      CreateNodeArgs = ArrayRef<Value*>(Fp);
-      break;
-    }
-    case Intrinsic::visc_createNode1D:
-    {
-      Value* CreateNode1DArgs[] = {Fp,
-                    FArgList[cast<Argument>(II->getArgOperand(1))->getArgNo()]};
-      CreateNodeArgs = ArrayRef<Value*>(CreateNode1DArgs, 2);
-      break;
-    }
-    case Intrinsic::visc_createNode2D:
-    {
-      Value* CreateNode2DArgs[] = {Fp,
-                    FArgList[cast<Argument>(II->getArgOperand(1))->getArgNo()],
-                    FArgList[cast<Argument>(II->getArgOperand(2))->getArgNo()]};
-      CreateNodeArgs = ArrayRef<Value*>(CreateNode2DArgs, 3);
-      break;
-    }
-    case Intrinsic::visc_createNode3D:
-    {
-      Value* CreateNode3DArgs[] = {Fp,
-                    FArgList[cast<Argument>(II->getArgOperand(1))->getArgNo()],
-                    FArgList[cast<Argument>(II->getArgOperand(2))->getArgNo()],
-                    FArgList[cast<Argument>(II->getArgOperand(3))->getArgNo()]};
-      CreateNodeArgs = ArrayRef<Value*>(CreateNode3DArgs, 4);
-      break;
-    }
-    default :
-      assert(false && "Unknown createNode intrinsic");
-      break;
-  }
-
-  CallInst* CI = CallInst::Create(CreateNodeF,
-                                  CreateNodeArgs,
-                                  F->getName()+".node");
-  IntrinsicInst* CreateNodeII = cast<IntrinsicInst>(CI);
-  return CreateNodeII;
-}
-
-
-// create an identical createEdge with different src (true) or dst (false) node
-IntrinsicInst* createIdenticalCreateEdgeWithDifferentNode(IntrinsicInst* II,
-IntrinsicInst* IInode, bool srcnode) {
-  // Argument of the function to be called
-  Value* SrcNode = (srcnode) ? IInode: II->getArgOperand(0);
-  Value* DstNode = (srcnode) ? II->getArgOperand(1): IInode;
-
-  Value* EdgeArgs[] = {SrcNode, DstNode,
-                       II->getArgOperand(2),
-                       II->getArgOperand(3),
-                       II->getArgOperand(4),
-                       II->getArgOperand(5)
-                      };
-
-//  Function* EdgeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createEdge);
-  Function* EdgeF = II->getCalledFunction();
-  CallInst* EdgeInst = CallInst::Create(EdgeF,
-                                        ArrayRef<Value*>(EdgeArgs, 6),
-                                        II->getName()+".repl");
-  IntrinsicInst* newII = dyn_cast<IntrinsicInst>(EdgeInst);
-  assert(newII && "Cannot cast createEdge to IntrinsicInst");
-
-  return newII;
-}
-
-// create an identical createEdge with different src (true) or dst (false) port
-IntrinsicInst* createIdenticalCreateEdgeWithDifferentPort(IntrinsicInst* II,
-unsigned port, bool srcport) {
-  // Argument of the function to be called
-  ConstantInt* PortConstant =
-    ConstantInt::get(Type::getInt32Ty(II->getContext()), port);
-  Value* SrcPort = (srcport) ? PortConstant: II->getArgOperand(3);
-  Value* DstPort = (srcport) ? II->getArgOperand(4): PortConstant;
-
-  Value* EdgeArgs[] = {II->getArgOperand(0),
-                       II->getArgOperand(1),
-                       II->getArgOperand(2),
-                       SrcPort, DstPort,
-                       II->getArgOperand(5)
-                      };
-
-//  Function* EdgeF = Intrinsic::getDeclaration(&M, Intrinsic::visc_createEdge);
-  Function* EdgeF = II->getCalledFunction();
-  CallInst* EdgeInst = CallInst::Create(EdgeF,
-                                        ArrayRef<Value*>(EdgeArgs, 6),
-                                        II->getName()+".repl");
-  IntrinsicInst* newII = dyn_cast<IntrinsicInst>(EdgeInst);
-  assert(newII && "Cannot cast createEdge to IntrinsicInst");
-
-  return newII;
-}
-
-// create an identical bindInput with different destination node
-IntrinsicInst* createIdenticalBindInputWithDifferentNode(IntrinsicInst* II,
-                                                        IntrinsicInst* IInode) {
-  Value* BindArgs[] = {IInode,
-                       II->getArgOperand(1),
-                       II->getArgOperand(2),
-                       II->getArgOperand(3)
-                      };
-//  Function* BindF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_input);
-  Function* BindF = II->getCalledFunction();
-  CallInst* BindInst = CallInst::Create(BindF,
-                                        ArrayRef<Value*>(BindArgs, 4),
-                                        "");
-  IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst);
-  assert(newII && "Cannot cast bind_output to IntrinsicInst");
-
-  return newII;
-}
-
-// create an identical bindInput with different src (true) or dst (false) port
-IntrinsicInst* createIdenticalBindInputWithDifferentPort(IntrinsicInst* II,
-                                                         unsigned port,
-                                                         bool srcport) {
-  // Argument of the function to be called
-  ConstantInt* PortConstant =
-    ConstantInt::get(Type::getInt32Ty(II->getContext()), port);
-  Value* SrcPort = (srcport) ? PortConstant: II->getArgOperand(1);
-  Value* DstPort = (srcport) ? II->getArgOperand(2): PortConstant;
-
-  Value* BindArgs[] = {II->getArgOperand(0),
-                       SrcPort,
-                       DstPort,
-                       II->getArgOperand(3)
-                      };
-//  Function* BindF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_input);
-  Function* BindF = II->getCalledFunction();
-  CallInst* BindInst = CallInst::Create(BindF,
-                                        ArrayRef<Value*>(BindArgs, 4),
-                                        "");
-  IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst);
-  assert(newII && "Cannot cast bind_output to IntrinsicInst");
-
-  return newII;
-}
-
-// create an identical bindOutput with different source node
-IntrinsicInst* createIdenticalBindOutputWithDifferentNode(IntrinsicInst* II,
-                                                        IntrinsicInst* IInode) {
-  Value* BindArgs[] = {IInode,
-                       II->getArgOperand(1),
-                       II->getArgOperand(2),
-                       II->getArgOperand(3)
-                      };
-//  Function* BindF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_output);
-  Function* BindF = II->getCalledFunction();
-  CallInst* BindInst = CallInst::Create(BindF,
-                                        ArrayRef<Value*>(BindArgs, 4),
-                                        "");
-  IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst);
-  assert(newII && "Cannot cast bind_output to IntrinsicInst");
-
-  return newII;
-}
-
-// create an identical bindOutput with different src (true) or dst (false) port
-IntrinsicInst* createIdenticalBindOutputWithDifferentPort(IntrinsicInst* II,
-                                                          unsigned port,
-                                                          bool srcport) {
-  // Argument of the function to be called
-  ConstantInt* PortConstant =
-    ConstantInt::get(Type::getInt32Ty(II->getContext()), port);
-  Value* SrcPort = (srcport) ? PortConstant: II->getArgOperand(1);
-  Value* DstPort = (srcport) ? II->getArgOperand(2): PortConstant;
-
-  Value* BindArgs[] = {II->getArgOperand(0),
-                       SrcPort,
-                       DstPort,
-                       II->getArgOperand(3)
-                      };
-//  Function* BindF = Intrinsic::getDeclaration(&M, Intrinsic::visc_bind_output);
-  Function* BindF = II->getCalledFunction();
-  CallInst* BindInst = CallInst::Create(BindF,
-                                        ArrayRef<Value*>(BindArgs, 4),
-                                        "");
-  IntrinsicInst* newII = dyn_cast<IntrinsicInst>(BindInst);
-  assert(newII && "Cannot cast bind_output to IntrinsicInst");
-
-  return newII;
-}
-
-// Function to find each use of a createNode intrinsic for a node existing
-// before merging and properly replace it with a use of the createNode for
-// node created after node merging
-// II1 is the createNode for the node that got merged, whose uses we want to replace
-// II2 is the createNode for the other node that got merged
-// (we need this to determine if en edge should be updated or deleted)
-// InMap and Outmap maps map the old location of an argument/output to the new
-// one, after edges removed and functions merged
-// CreateEdge for edges from n1 to n2 need to be deleted and associated
-// intrinsics to be removed. They are placed in the two vectors.
-void updateUsesOfCreateNodeInParent(IntrinsicInst* II1,
-                                    IntrinsicInst* II2,
-                                    IntrinsicInst* IInew,
-                                    std::map<unsigned, unsigned> InMap,
-                                    std::map<unsigned, unsigned> OutMap,
-                                    std::vector<DFEdge*> &DFEdgestoRemove,
-                                    BuildDFG &DFG) {
-  std::vector<IntrinsicInst*> IItoRemove;
-
-  for (Value::user_iterator i = II1->user_begin(), ie = II1->user_end();
-       i != ie; ++i) {
-    Instruction *VI = dyn_cast<Instruction>(*i);
-    IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI);
-    assert(II && "Use of a node handle outside of a visc intrinsic");
-
-    switch(II->getIntrinsicID()) {
-      case Intrinsic::visc_createEdge:
-        {
-        if (isOutgoingEdgeIntrinsic(II,II1)) { // check for outgoing edges
-          if (isIncomingEdgeIntrinsic(II,II2)) {
-            // edge is between merged nodes
-            // createEdge is marked for deletion, if not already there
-            if (std::find(IItoRemove.begin(),IItoRemove.end(),II) == IItoRemove.end()) {
-              IItoRemove.push_back(II);
-              // ------------------------------------------------------------ //
-              // Updating the BuildDFG result
-              // remove handle for non-existing edge in mapping
-              DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II];
-              DFG.removeElementFromHandleToDFEdgeMap(II);
-              DFEdgestoRemove.push_back(EdgeInMapping);
-              // ------------------------------------------------------------ //
-            }
-          } else { // Edge is outgoing, but to another node in the graph
-            // We need to change Src and SrcPort
-            // create an identical createEdge with different srcport
-            unsigned srcPos = cast<ConstantInt>(II->getOperand(3))->getZExtValue();
-            IntrinsicInst* newII =
-              createIdenticalCreateEdgeWithDifferentPort(II,
-                                                         OutMap[srcPos],
-                                                         true);
-            // and insert it before the current create edge
-            newII->insertBefore(II);
-            // change of operand II1 will happen at the end with replaceAllUsesWith
-            // mark this createEdge for deletion
-            IItoRemove.push_back(II);
-            // -------------------------------------------------------------- //
-            // Updating the BuildDFG result
-            // replace handle for edge in mapping
-            DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II];
-            DFG.removeElementFromHandleToDFEdgeMap(II);
-            DFG.addElementToHandleToDFEdgeMap(newII, EdgeInMapping);
-            // -------------------------------------------------------------- //
-          }
-        } else { // isIncomingEdgeIntrinsic(II,II1) : check for incoming edges
-          if (isOutgoingEdgeIntrinsic(II,II2)) {
-            // edge is between merged nodes
-            // createEdge is marked for deletion, if not already there
-            if (std::find(IItoRemove.begin(),IItoRemove.end(),II) == IItoRemove.end()) {
-              IItoRemove.push_back(II);
-              // ------------------------------------------------------------ //
-              // Updating the BuildDFG result
-              // remove handle for non-existing edge in mapping
-              DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II];
-              DFG.removeElementFromHandleToDFEdgeMap(II);
-              DFEdgestoRemove.push_back(EdgeInMapping);
-              // ------------------------------------------------------------ //
-            }
-          } else { // Edge is incoming, but from another node 
-            // We need to change Dst node and DstPort
-            // create an identical createEdge with different dstport
-            unsigned dstPos = cast<ConstantInt>(II->getOperand(4))->getZExtValue();
-            IntrinsicInst* newII =
-              createIdenticalCreateEdgeWithDifferentPort(II,
-                                                         InMap[dstPos],
-                                                         false);
-            // and insert it before the current create edge
-            newII->insertBefore(II);
-            // change of operand II1 will happen at the end with replaceAllUsesWith
-            // mark this createEdge for deletion
-            IItoRemove.push_back(II);
-            // -------------------------------------------------------------- //
-            // Updating the BuildDFG result
-            // replace handle for edge in mapping
-            DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II];
-            DFG.removeElementFromHandleToDFEdgeMap(II);
-            DFG.addElementToHandleToDFEdgeMap(newII, EdgeInMapping);
-            // -------------------------------------------------------------- //
-          }
-        }
-        }
-        break;
-      case Intrinsic::visc_bind_input:
-        {
-        // incoming bind from parent node 
-        // We need to change Dst node and DstPort
-        // create an identical bindInput with different dstport
-        unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-        IntrinsicInst* newII =
-              createIdenticalBindInputWithDifferentPort(II, InMap[dstPos], false);
-        // and insert it before the current bind
-        newII->insertBefore(II);
-        // change of operand II1 will happen at the end with replaceAllUsesWith
-        // mark this bind for deletion
-        IItoRemove.push_back(II);
-        // ------------------------------------------------------------------ //
-        // Updating the BuildDFG result
-        // replace handle for edge in mapping
-        DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II];
-        DFG.removeElementFromHandleToDFEdgeMap(II);
-        DFG.addElementToHandleToDFEdgeMap(newII, EdgeInMapping);
-        // ------------------------------------------------------------------ //
-        }
-        break;
-      case Intrinsic::visc_bind_output:
-        {
-        // outgoing bind to parent node 
-        // We need to change Src node and SrcPort
-        // create an identical bindOutput with different srcport
-        unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
-        IntrinsicInst* newII =
-              createIdenticalBindOutputWithDifferentPort(II, OutMap[srcPos], true);
-        // and insert it before the current bind
-        newII->insertBefore(II);
-        // change of operand II1 will happen at the end with replaceAllUsesWith
-        // mark this bind for deletion
-        IItoRemove.push_back(II);
-        // ------------------------------------------------------------------ //
-        // Updating the BuildDFG result
-        // replace handle for edge in mapping
-        DFEdge* EdgeInMapping = DFG.getHandleToDFEdgeMap()[II];
-        DFG.removeElementFromHandleToDFEdgeMap(II);
-        DFG.addElementToHandleToDFEdgeMap(newII, EdgeInMapping);
-        // ------------------------------------------------------------------ //
-        }
-        break;
-      default :
-        assert(false && "Unknown use of node handle");
-        break;
-    }
-  }
-
-  // Delete gathered instructions
-  for (std::vector<IntrinsicInst *>::iterator ib = IItoRemove.begin(),
-       ie = IItoRemove.end(); ib != ie; ++ib) {
-    DEBUG(errs() << "Erasing: " << **ib << "\n");
-    (*ib)->eraseFromParent();
-  }
-
-  // Change all remaining edge-bind intrinsics containing n1 to the new node
-  II1->replaceAllUsesWith(IInew);
-
-}
-
-// Query the king of edge described by a createEdge intrinsic
-// with respect to node handle IIn
-bool isIncomingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) {
-  Value* Src = IIe->getArgOperand(1);
-  IntrinsicInst* ArgII = cast<IntrinsicInst>(Src);
-//  IntrinsicInst* ArgII = cast<IntrinsicInst>(Src->stripPointerCasts());
-  assert(ArgII && "First argument of createEdge is not an intrinsic");
-  return (ArgII == IIn);
-}
-
-bool isOutgoingEdgeIntrinsic(IntrinsicInst* IIe, IntrinsicInst* IIn) {
-  Value* Src = IIe->getArgOperand(0);
-  IntrinsicInst* ArgII = cast<IntrinsicInst>(Src);
-//  IntrinsicInst* ArgII = cast<IntrinsicInst>(Src->stripPointerCasts());
-  assert(ArgII && "First argument of createEdge is not an intrinsic");
-  return (ArgII == IIn);
-}
-
-/*
- * Return true if n2 is a successor of n1
- */
-bool hasSuccessor(DFNode* N1, DFNode* N2) {
-  for (DFNode::const_successor_iterator i = N1->successors_begin(),
-                                        e = N1->successors_end();
-       i != e; i++) {
-    DFNode* N = *i;
-    if ((N == N2) || (hasSuccessor(N,N1))) return true;
-  }
-  return false;
-}
-
-/*
- * Return true if n2 is an immediate successor of n1
- */
-bool hasImmediateSuccesssor(DFNode* N1, DFNode* N2) {
-  for (DFNode::const_successor_iterator i = N1->successors_begin(),
-                                        e = N1->successors_end();
-       i != e; i++) {
-    DFNode* N = *i;
-    if (N == N2) return true;
-  }
-  return false;
-}
-
-/*
- * Return true if all edges between n1 and n2 are one-to-one
- */
-bool checkEdgesType(DFNode* N1, DFNode* N2) {
-  for (DFNode::const_outdfedge_iterator i = N1->outdfedge_begin(),
-                                        e = N1->outdfedge_end();
-       i != e; i++) {
-    DFEdge* E = *i;
-    if ((E->getDestDF() == N2) && (E->getEdgeType())) return false;
-  }
-  return true;
-}
-
-// Construct argument list
-// Assuming that N2 cannot be an ansestor of N1
-static void createArgTypes(DFNode* N1, DFNode* N2, std::vector<Type*> &ArgTypes) {
-  Function* F1 = N1->getFuncPointer();
-  Function* F2 = N2->getFuncPointer();
-
-  for(auto& arg: F1->getArgumentList()) {
-    DEBUG(errs() << arg << "\n");
-    ArgTypes.push_back(arg.getType());
-  }
-
-  unsigned inport = 0;
-  for(auto& arg: F2->getArgumentList()) {
-    DEBUG(errs() << arg << "\n");
-    if (N2->getExtendedInDFEdgeAt(inport)->getSourceDF() != N1)
-      ArgTypes.push_back(arg.getType());
-    inport++;
-  }
-
-}
-
-// Returns the allocation nodes and the compute nodes of a parent dataflow node
-void getChildNodeSplit(DFInternalNode* N,
-                       std::vector<DFNode*> &AllocationNodes,
-                       std::vector<DFNode*> &ComputeNodes) {
-  DFGraph::const_children_iterator ci = N->getChildGraph()->begin();
-  DFGraph::const_children_iterator ce = N->getChildGraph()->end();
-
-  for ( ; ci != ce; ci++ ) {
-    DFNode* child = *ci;
-    if (child->isAllocationNode())
-      AllocationNodes.push_back(child);
-    else if (!child->isDummyNode())
-     ComputeNodes.push_back(child);
-  }
-
-}
-
-// Creates a map between the old locations of parameters and outputs in the
-// functions before merging, and the new one after merge. Those that correspond
-// to edges that no longer exist (between the merged nodes) are not in the maps.
-void buildInputAndOutputMaps(DFNode* N1, DFNode* N2,
-                             std::map<unsigned, unsigned> &N1InMap,
-                             std::map<unsigned, unsigned> &N1OutMap,
-                             std::map<unsigned, unsigned> &N2InMap,
-                             std::map<unsigned, unsigned> &N2OutMap) {
-  unsigned n1NumInputs = 0;
-  for (unsigned i = 0; i < N1->getFuncPointer()->getArgumentList().size();
-       i++, n1NumInputs++) {
-    N1InMap[i] = i;
-  }
-  for (unsigned i = 0, inpos = 0;
-       i < N2->getFuncPointer()->getArgumentList().size(); i++) {
-    if (N2->getExtendedInDFEdgeAt(i)->getSourceDF() != N1) {
-      N2InMap[i] = inpos+n1NumInputs;
-      inpos++;
-    }
-  }
-
-  unsigned n1NumOutputs = 0;
-  StructType* F1RetTy = cast<StructType>(N1->getFuncPointer()->getReturnType());
-  for (unsigned i = 0; i < F1RetTy->getNumElements(); i++) {
-    if (N1->getExtendedOutDFEdgeAt(i)->getDestDF() != N2) {
-      N1OutMap[i] = n1NumOutputs;
-      n1NumOutputs++;
-    }
-  }
-
-  StructType* F2RetTy = cast<StructType>(N2->getFuncPointer()->getReturnType());
-  for (unsigned i = 0; i < F2RetTy->getNumElements(); i++) {
-    N2OutMap[i] = i+n1NumOutputs;
-  }
-
-  return;
-}
-
-// Creates a map between the old edge ports in the
-// nodes before merging, and the new one after merge. Those that correspond
-// to edges that no longer exist (between the merged nodes) are not in the maps.
-void buildInAndOutEdgeMaps(DFNode* N1, DFNode* N2,
-                           std::map<unsigned, unsigned> &N1InMap,
-                           std::map<unsigned, unsigned> &N1OutMap,
-                           std::map<unsigned, unsigned> &N2InMap,
-                           std::map<unsigned, unsigned> &N2OutMap) {
-
-  unsigned n1NumInEdges = N1->getFuncPointer()->getArgumentList().size();
-  for (unsigned i = 0; i < n1NumInEdges; i++) {
-    N1InMap[i] = i;
-  }
-
-  unsigned n1NumOutEdges = 0;
-  StructType* F1RetTy = cast<StructType>(N1->getFuncPointer()->getReturnType());
-  for (unsigned i = 0; i < F1RetTy->getNumElements(); i++) {
-    if (N1->getExtendedOutDFEdgeAt(i)->getDestDF() != N2) {
-      N1OutMap[i] = n1NumOutEdges;
-      n1NumOutEdges++;
-    }
-  }
-
-  unsigned n2NumInEdges = N2->getFuncPointer()->getArgumentList().size();
-  for (unsigned i = 0, inpos = 0; i < n2NumInEdges; i++) {
-    if (N2->getExtendedInDFEdgeAt(i)->getSourceDF() != N1) {
-      N2InMap[i] = inpos+n1NumInEdges;
-      inpos++;
-    }
-  }
-
-  StructType* F2RetTy = cast<StructType>(N2->getFuncPointer()->getReturnType());
-  for (unsigned i = 0; i < F2RetTy->getNumElements(); i++) {
-    N2OutMap[i] = i+n1NumOutEdges;
-  }
-
-  return;
-}
-
-// Construct return type
-// Assuming that N2 cannot be an ansestor of N1
-static StructType* createReturnType(DFNode* N1, DFNode* N2) {
-  Function* F1 = N1->getFuncPointer();
-  Function* F2 = N2->getFuncPointer();
-
-  StructType* F1RetTy = dyn_cast<StructType>(F1->getReturnType());
-  assert(F1RetTy && "Return Type must always be a struct");
-  StructType* F2RetTy = dyn_cast<StructType>(F2->getReturnType());
-  assert(F2RetTy && "Return Type must always be a struct");
-
-  std::vector<Type*> ReturnTypeElements;
-  unsigned outPos1 = 0, outPos2 = 0, outPosM = 0;
-  for (StructType::element_iterator i = F1RetTy->element_begin(),
-       e = F1RetTy->element_end();
-       (i != e) && (outPos1 < F1RetTy->getNumElements()); i++, outPos1++) {
-    if (N1->getExtendedOutDFEdgeAt(outPos1)->getDestDF() == N2)
-      continue;
-    ReturnTypeElements.push_back(*i);
-    outPosM++;
-  }
-
-  for (StructType::element_iterator i = F2RetTy->element_begin(),
-       e = F2RetTy->element_end();
-       i != e && outPos2 < F2RetTy->getNumElements(); i++, outPos2++) {
-    ReturnTypeElements.push_back(*i);
-    outPosM++;
-  }
-
-  errs() << "Return elements = " << ReturnTypeElements.size() << "\n";
-  StructType* FRetTy = StructType::create(F1->getContext(),
-                                  ArrayRef<Type*>(ReturnTypeElements),
-                                 (F1->getName()+"."+F2->getName()+".ty").str(), true);
-
-  errs() << "Struct type created\n";
-  return FRetTy;
-}
-
-// Copy attributes
-// Assuming that N2 cannot be an ansestor of N1
-static void copyAttrList(DFNode* N1, DFNode* N2, Function* F) {
-  Function* F1 = N1->getFuncPointer();
-  Function* F2 = N2->getFuncPointer();
-
-  Function::arg_iterator f1_ai = F1->arg_begin(), f1_ae = F1->arg_end();
-  Function::arg_iterator f2_ai = F2->arg_begin(), f2_ae = F2->arg_end();
-  Function::arg_iterator f_ai = F->arg_begin(), f_ae = F->arg_end();
-
-  unsigned inPos1 = 0, inPos2 = 0, inPosM = 0;
-  for(; f1_ai != f1_ae && f_ai != f_ae; ++f1_ai, ++f_ai, inPos1++, inPosM++) {
-    AttributeSet AS = F1->getAttributes();
-    DEBUG(errs() << "Copying attributes from " << F1->getName() << " at " << f1_ai->getArgNo() << "\n");
-    AttrBuilder AB(AS, f1_ai->getArgNo()+1);
-    AttributeSet argAS = AttributeSet::get(F1->getContext(), f_ai->getArgNo()+1, AB);
-    F->addAttributes(f_ai->getArgNo()+1, argAS);
-  }
-  for(; f2_ai != f2_ae && f_ai != f_ae; ++f2_ai, inPos2++) {
-    if (N2->getExtendedInDFEdgeAt(inPos2)->getSourceDF() == N1)
-      continue;
-
-    AttributeSet AS = F2->getAttributes();
-    DEBUG(errs() << "Copying attributes from " << F2->getName() << " at " << f2_ai->getArgNo() << "\n");
-    AttrBuilder AB(AS, f2_ai->getArgNo()+1);
-    AttributeSet argAS = AttributeSet::get(F2->getContext(), f_ai->getArgNo()+1, AB);
-    F->addAttributes(f_ai->getArgNo()+1, argAS);
-    ++f_ai;
-    inPosM++;
-  }
-}
-
-// Copy argument names
-static void copyArgumentNames(DFNode* N1, DFNode* N2, Function* F) {
-  Function* F1 = N1->getFuncPointer();
-  Function* F2 = N2->getFuncPointer();
-
-  Function::arg_iterator dest_it = F->arg_begin();
-
-  for(auto& arg: F1->getArgumentList()) {
-    dest_it->setName("n1_" + arg.getName());
-    dest_it++;
-  }
-
-  unsigned inport = 0;
-  for(auto& arg: F2->getArgumentList()) {
-    if (N2->getExtendedInDFEdgeAt(inport)->getSourceDF() != N1) {
-      dest_it->setName("n2_" + arg.getName());
-      dest_it++;
-    }
-    inport++;
-  }
-}
-
-// Creates shift map, which maps old position to new, after shifting num
-// arguments starting from fromPos by shift positions to the right.
-void createShiftMap(Function* F, unsigned fromPos, unsigned num,
-                    unsigned shift, std::vector<unsigned> &ShiftMap) {
-
-  for (unsigned i = 0; i < F->getArgumentList().size(); i++)
-    ShiftMap.push_back(i);
-
-  for (unsigned i = fromPos; i < fromPos + num; i++)
-    ShiftMap[i] += shift;
-
-  for (unsigned i = fromPos + num; i < fromPos + num + shift; i++)
-    ShiftMap[i] -= num;
-
-}
-
-// Shifts num arguments starting from fromPos by shift positions to the right,
-// replacing with the arguments at those positions.
-// Updates shift map, which maps old position to new.
-void shiftArgs(Function* F, unsigned fromPos, unsigned num,
-               unsigned shift, std::vector<unsigned> &ShiftMap) {
-  Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end();
-  Function::arg_iterator from = ai;
-
-  unsigned cnt;
-  for (cnt = 0; from != ae && cnt < fromPos; from++, cnt++) {
-  }
-  assert((cnt == fromPos) && "Invalid start position for argument shifting");
-  
-  Function::arg_iterator af = from;
-  std::vector<Type*> ArgTypes;
-  std::vector<StringRef> ArgNames;
-  unsigned argNo = 0;
-
-  //TODO: check if this copies attributes as well
-  ValueToValueMapTy VMap;
-  Function* F_copy = CloneFunction(F, VMap);
-  F_copy->removeFromParent();
-
-  // Arguments up until before from
-  for ( ; ai != from && ai != ae; ai++, argNo++) {
-    ArgTypes.push_back(ai->getType());
-    ArgNames.push_back(ai->getName());
-  }
-
-  // Arguments to be shifted (num arguments) are skipped for now
-  for (unsigned i = 0; (i < num) && (ai != ae); i++, ai++, argNo++) {
-    ShiftMap[argNo] += shift;
-  }
-
-  // Later arguments (#shift arguments) are pushed until we fill shift positions
-  for (unsigned i = 0; (ai != ae) && (i < shift); i++, ai++, argNo++) {
-    ArgTypes.push_back(ai->getType());
-    ArgNames.push_back(ai->getName());
-    ShiftMap[argNo] -= num;
-  }
-
-  // Arguments that were to be shifted (num arguments) are now pushed
-  for (unsigned i = 0; (i < num) && (af != ae); i++, af++, argNo++) {
-    ArgTypes.push_back(af->getType());
-    ArgNames.push_back(af->getName());
-  }
-
-  // Remaining arguments are pushed
-  for (; ai != ae; ai++) {
-    ArgTypes.push_back(ai->getType());
-    ArgNames.push_back(ai->getName());
-  }
-
-  // Change function type
-  FunctionType* FTy = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg());
-  PointerType* PTy = FTy->getPointerTo();
-  F->mutateType(PTy);
-
-  // Shift argument names
-  ai = F->arg_begin();
-  for (unsigned i = 0; ai != ae; ai++, i++) {
-    (*ai).setName(ArgNames[i]);
-  }
-
-  // Shift attributes by deleting them from F and copying them from F_copy
-
-  //Initialize required iterators for shift elements: from -> from_copy+shift
-  af = from;
-  Function::arg_iterator af_copy;
-  for (unsigned i = 0; i < af->getArgNo() + shift; i++, af_copy++) {
-  }
-  for (unsigned i = 0; i < num; i++, af++, af_copy++) {
-    AttributeSet ASf = F->getAttributes();
-    AttributeSet ASfc = F_copy->getAttributes();
-
-    AttrBuilder ABfc(ASfc, af_copy->getArgNo()+1);
-    AttributeSet argASfc = AttributeSet::get(F_copy->getContext(), af->getArgNo()+1, ABfc);
-    F->removeAttributes(af->getArgNo()+1,ASf.getParamAttributes(af->getArgNo()+1));
-    F->addAttributes(af->getArgNo()+1, argASfc);
-  }
-  //Initialize required iterators for num elements: to -> to_copy-num
-  af_copy = from;
-  for (unsigned i = 0; i < shift; i++, af++, af_copy++) {
-    AttributeSet ASf = F->getAttributes();
-    AttributeSet ASfc = F_copy->getAttributes();
-
-    AttrBuilder ABfc(ASfc, af_copy->getArgNo()+1);
-    AttributeSet argASfc = AttributeSet::get(F_copy->getContext(), af->getArgNo()+1, ABfc);
-    F->removeAttributes(af->getArgNo()+1,ASf.getParamAttributes(af->getArgNo()+1));
-    F->addAttributes(af->getArgNo()+1, argASfc);
-  }
-}
-
-/* 
- * Create type of merged function
- * - input arguments type
- * - struct return type
- * Get Attributes from original functions
- * Get parameter names from original functions
- * Insert an empty function of this type in the module
- */
-static Function* createEmptyDFNodeFunction(DFNode* N1, DFNode* N2, Module &M) {
-  Function* F1 = N1->getFuncPointer();
-  Function* F2 = N2->getFuncPointer();
-
-  errs () << "Constructing argument list\n";
- // Construct argument list
-  std::vector<Type*> ArgTypes;
-  createArgTypes(N1, N2, ArgTypes);
-
-  errs () << "Constructing return type\n";
-  // Construct return type
-  StructType* FRetTy = createReturnType(N1, N2);
-
-  FunctionType* FTy = FunctionType::get(FRetTy, ArgTypes, false);
-  // Create a function with the new type
-  Function* F = Function::Create(FTy, F1->getLinkage(),
-                                 F1->getName()+"_"+F2->getName(), &M);
-
-  errs () << "Copying argument names\n";
-  // Copy argument names from original functions
-  copyArgumentNames(N1, N2, F);
-  // Copy argument attributes from original functions
-  copyAttrList(N1, N2, F);
-
-   return F;
-}
-
-/* 
- * Create function of leaf node after merging
- * - create type
- * - Create the call instructions
- * - Create intermediate assignments
- * - Create assignments to output struct
- */
-static Function* createLeafDFNodeFunction(DFNode* N1, DFNode* N2, Module &M,
-  unsigned numOfN1AllocArgs, unsigned posOfN1AllocArgs,
-  unsigned numOfN2AllocArgs) {
-
-  errs () << "Creating function signature\n";
-  /*
-   * Create empty node function of the correct type
-   */
-  Function* F = createEmptyDFNodeFunction(N1, N2, M);
-
-  // Get return type, needed for building the assignmens to the return struct
-  StructType* FRetTy = cast<StructType>(F->getReturnType());
-
-  Function* F1 = N1->getFuncPointer();
-  Function* F2 = N2->getFuncPointer();
-
-  errs () << "Creating function body\n";
-  // This maps i: position in F argument list, to new position in F argument
-  // list (after shifting arguments maybe). Initially, no shift.
-  std::vector<unsigned> FArgsShiftMap(F->getArgumentList().size());
-  for (unsigned i = 0; i < FArgsShiftMap.size(); i++)
-    FArgsShiftMap[i] = i;
-
-  if (numOfN1AllocArgs) {
-    // Number of remaining f2 parameters is initial parameter number of f2
-    // minus the number of edges between n1 and n2. We can also find this by
-    // getting the number of parameters of the new function F and subtract the
-    // number of parameters of F1, since this did not change.
-    unsigned shiftOfN1AllocArgs = F->getArgumentList().size() -
-                                  F1->getArgumentList().size() -
-                                  numOfN2AllocArgs;
-    shiftArgs(F, posOfN1AllocArgs, numOfN1AllocArgs, shiftOfN1AllocArgs,
-              FArgsShiftMap);
-  }
-
-
-  // Add a basic block to the new, empty function
-  BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F);
-  ReturnInst* RI = ReturnInst::Create(M.getContext(),
-                                      UndefValue::get(FRetTy), BB);
-
-
-  errs () << "Creating function call\n";
-  // Get Argument list of new function into a vector (for easier indexing)
-  std::vector<Value*> FArgs;
-  for (auto& arg: F->getArgumentList()) {
-    FArgs.push_back(&arg);
-  }
-
-  // Create call instruction for first node
-  std::vector<Value*> Args;
-  for (unsigned i = 0; i < F1->getArgumentList().size(); i++) {
-    Args.push_back(FArgs[FArgsShiftMap[i]]);
-  }
-  CallInst* CI1 = CallInst::Create(F1,
-                                   ArrayRef<Value*>(Args),
-                                   "merged."+F1->getName(),
-                                   RI);
-  Args.clear();
-
-  errs () << "Creating function call for second node\n";
-  // Create call instruction for second node
-  for(unsigned fargNo = 0, i = 0;
-      i < F2->getArgumentList().size(); i++) {
-    Value* Arg;
-    if (N2->getExtendedInDFEdgeAt(i)->getSourceDF() == N1) {
-      ExtractValueInst *EI =
-        ExtractValueInst::Create(CI1,
-                                 N2->getExtendedInDFEdgeAt(i)->getSourcePosition(),
-                                 "",
-                                 RI);
-      Arg = EI;
-    } else {
-      Arg = FArgs[FArgsShiftMap[F1->getArgumentList().size() + fargNo++]];
-    }
-    Args.push_back(Arg);
-  }
-
-  CallInst* CI2 = CallInst::Create(F2,
-                                   ArrayRef<Value*>(Args),
-                                   "merged."+F2->getName(),
-                                   RI);
-
-  errs () << "Creating extract element instructions\n";
-  // Create extract element instructions for elements of output struct
-  std::vector<ExtractValueInst *> ExtractValueInstVec;
-
-  // First, from node n1: exclude those that go to n2
-  StructType *F1RetTy = dyn_cast<StructType>(F1->getReturnType());
-  for (unsigned i = 0; i < F1RetTy->getNumElements(); i++) {
-    if (N1->getExtendedOutDFEdgeAt(i)->getDestDF() != N2) {
-      ExtractValueInst *EI = ExtractValueInst::Create(CI1, i, "", RI);
-      ExtractValueInstVec.push_back(EI);
-    }
-  }
-  // Then, from node n2
-  StructType *F2RetTy = dyn_cast<StructType>(F2->getReturnType());
-  for (unsigned i = 0; i < F2RetTy->getNumElements(); i++) {
-    ExtractValueInst *EI = ExtractValueInst::Create(CI2, i, "", RI);
-    ExtractValueInstVec.push_back(EI);
-  }
-
-  errs () << "Creating output struct\n";
-  // Create output struct of type FRetTy
-  assert(FRetTy->getNumElements() == ExtractValueInstVec.size() &&
-    "Size of output struct does not match expected number of EE instructions");
-  Value* retVal = UndefValue::get(F->getReturnType());
-
-  for (unsigned i = 0; i < ExtractValueInstVec.size(); i++) {
-    InsertValueInst *IVI =
-      InsertValueInst::Create(retVal, ExtractValueInstVec[i], i, "", RI);
-    retVal = IVI;
-  }
-  ReturnInst* newRI = ReturnInst::Create(M.getContext(), retVal);
-  ReplaceInstWithInst(RI, newRI);
-
-  // Inline the two calls
-  InlineFunctionInfo IFI1, IFI2;
-  InlineFunction(CI1, IFI1, nullptr, false);
-  InlineFunction(CI2, IFI2, nullptr, false);
-
-  return F;
-}
-
-static Function* createInternalDFNodeFunction(DFNode* N1, DFNode* N1an,
-  DFNode* N1cn, DFNode* N2, DFNode* N2an, DFNode* N2cn, Function* Fa,
-  Function* Fc, Module &M, unsigned numOfN1AllocArgs, unsigned posOfN1AllocArgs,
-  unsigned numOfN2AllocArgs) {
-
-  /*
-   * Create empty node function of the correct type
-   */
-  Function* F = createEmptyDFNodeFunction(N1, N2, M);
-
-  // Get return type, needed for building the assignmens to the return struct
-  StructType* FRetTy = cast<StructType>(F->getReturnType());
-
-//  Function* F1 = N1->getFuncPointer();
-//  Function* F2 = N2->getFuncPointer();
-
-  // Add a basic block to the new, empty function
-  BasicBlock *BB = BasicBlock::Create(M.getContext(), "entry", F);
-  ReturnInst* RI = ReturnInst::Create(M.getContext(),
-                                      UndefValue::get(FRetTy), BB);
-
-  // Get Argument list of new function into a vector (for easier indexing)
-  std::vector<Value*> FArgs;
-  for (auto& arg: F->getArgumentList()) {
-    FArgs.push_back(&arg);
-  }
-
-  // Get pointers to functions inthe original graph
-//  Function* F1a = (N1an) ? N1an->getFuncPointer() : NULL;
-//  Function* F2a = (N2an) ? N2an->getFuncPointer() : NULL;
-//  Function* F1c = N1cn->getFuncPointer();
-//  Function* F2c = N2cn->getFuncPointer();
-
-  // Create the required createNode intrinsics
-  IntrinsicInst* AllocII = NULL;
-  if (N1an)
-    AllocII = createIdenticalCreateNodeWithDifferentFunction(Fa,
-                                                        N1an->getInstruction());
-  else if (N2an)
-    AllocII = createIdenticalCreateNodeWithDifferentFunction(Fa,
-                                                        N2an->getInstruction());
-  if (AllocII)
-    AllocII->insertBefore(RI);
-
-  // The position in F (new node function) of the node dimensions parameters is
-  // the same as it was in n1 internal node function, because n1 is the first
-  // one to be added to the resulting merged node.
-  IntrinsicInst* ComputeII =
-    createNewCreateNodeBasedOn(Fc, N1cn->getInstruction(), F);
-  ComputeII->insertBefore(RI);
-
-  // Vector to be populated with instructions to be added to internal node
-  std::vector<IntrinsicInst*> IntrinsicInstructionsToAdd;
-  std::vector<IntrinsicInst*> IntermediateInstructions;
-
-  createNewInternalNodeIntrinsics(N1, N2, N1an, N1cn, N2an, N2cn,
-                                  AllocII, ComputeII,
-                                  Fa /* FIXME: Unused */, Fc,
-                                  IntrinsicInstructionsToAdd,
-                                  IntermediateInstructions);
-
-  // Insert generated intrinsics at new internal function
-  for (auto& Inst: IntrinsicInstructionsToAdd) {
-    Inst->insertBefore(RI);
-  }
-
-  // Insert generated intrinsics at new internal function and erase
-  for (auto& Inst: IntermediateInstructions) {
-    Inst->insertBefore(RI);
-    Inst->eraseFromParent();
-  }
-
-  return F;
-}
-
-void createNewInternalNodeIntrinsics(DFNode* N1,
-                                     DFNode* N2,
-                                     DFNode* N1a,
-                                     DFNode* N1c,
-                                     DFNode* N2a,
-                                     DFNode* N2c,
-                                     IntrinsicInst* IInewa,
-                                     IntrinsicInst* IInewc,
-                                     Function* Fa, //FIXME: Unused
-                                     Function* Fc,
-                      std::vector<IntrinsicInst*>& IntrinsicInstructionsToAdd,
-                      std::vector<IntrinsicInst*>& IntermediateInstructions) {
-  IntrinsicInst* II1a = (N1a) ? N1a->getInstruction() : NULL;
-  IntrinsicInst* II1c = N1c->getInstruction();
-  IntrinsicInst* II2a = (N2a) ? N2a->getInstruction() : NULL;
-  IntrinsicInst* II2c = N2c->getInstruction();
-
-  Function* F1a = (N1a) ? N1a->getFuncPointer() : NULL;
-  Function* F1c = N1c->getFuncPointer();
-  Function* F2a = (N2a) ? N1a->getFuncPointer() : NULL;
-
-  unsigned n1aNumOfInputs = 0;
-  unsigned n1aNumOfOutputs = 0;
-  unsigned n1aPosOfOutputs = 0;
-  if (N1a) {
-    n1aNumOfInputs = F1a->getArgumentList().size();
-    n1aNumOfOutputs = cast<StructType>(F1a->getReturnType())->getNumElements();
-    n1aPosOfOutputs = N1a->getOutDFEdgeAt(0)->getDestPosition();
-  }
-  unsigned n2aNumOfOutputs = 0;
-  if (N2a) {
-    n2aNumOfOutputs = cast<StructType>(F2a->getReturnType())->getNumElements();
-  }
-
-  unsigned shiftOfN1AllocOutputs = Fc->getArgumentList().size() -
-                                   F1c->getArgumentList().size() -
-                                   n2aNumOfOutputs;
-
-  std::map<unsigned, unsigned> N1cInMap;
-  std::map<unsigned, unsigned> N1cOutMap;
-  std::map<unsigned, unsigned> N2cInMap;
-  std::map<unsigned, unsigned> N2cOutMap;
-  // These maps map the old location of an argument/output (to its function's
-  // parameter list/out struct) to the new, after edges removed and functions
-  // merged
-
-  // This accounts for argument shifting, due to allocation node n1
-  std::vector<unsigned> FcShiftMap;
-
-  buildInputAndOutputMaps(N1c, N2c, N1cInMap, N1cOutMap, N2cInMap, N2cOutMap);
-  createShiftMap(Fc, n1aPosOfOutputs, n1aNumOfOutputs, shiftOfN1AllocOutputs,
-                 FcShiftMap);
-
-
-  std::map<unsigned, unsigned> N1InDFEdgeMap;
-  std::map<unsigned, unsigned> N1OutDFEdgeMap;
-  std::map<unsigned, unsigned> N2InDFEdgeMap;
-  std::map<unsigned, unsigned> N2OutDFEdgeMap;
-  buildInAndOutEdgeMaps(N1, N2, N1InDFEdgeMap, N1OutDFEdgeMap, N2InDFEdgeMap,
-                        N2OutDFEdgeMap);
-
-
-  // Start with the intrinsics for allocation nodes n1a and n2a
-
-  // TODO: This is only for testing, not needed for functionality
-  std::map<IntrinsicInst*, IntrinsicInst*> CreateEdgeAndBindMap;
-
-  if (N1a) { // If there is an allocation node for the first node
-    for (Value::user_iterator i = II1a->user_begin(), ie = II1a->user_end();
-         i != ie; ++i) {
-      Value *v = *i;
-      Instruction *VI = dyn_cast<Instruction>(v);
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI);
-      assert(II && "Use of a node handle outside of a visc intrinsic");
-
-      switch(II->getIntrinsicID()) {
-        case Intrinsic::visc_createEdge:
-          // This is between allocation and compute node of n1.
-          {
-            // Change source to new allocation node
-            IntrinsicInst* IItemp1 =
-              createIdenticalCreateEdgeWithDifferentNode(II, IInewa, true);
-            // Do not change source port
-            // Change destination node to new compute node
-            IntrinsicInst* IItemp2 =
-              createIdenticalCreateEdgeWithDifferentNode(IItemp1, IInewc, false);
-            // Change destination port to new port, after inmap and shift
-            unsigned dstPos = cast<ConstantInt>(II->getOperand(4))->getZExtValue();
-            IntrinsicInst* EI =
-              createIdenticalCreateEdgeWithDifferentPort(IItemp2,
-                                           FcShiftMap[N1cInMap[dstPos]], false);
-            IntrinsicInstructionsToAdd.push_back(EI);
-            IntermediateInstructions.push_back(IItemp1);
-            IntermediateInstructions.push_back(IItemp2);
-            CreateEdgeAndBindMap[II] = EI;
-          }
-          break;
-        case Intrinsic::visc_bind_input:
-         // These are the inputs from the parent node.
-          {
-           // The destination ports will not change, only the destination will
-           // be changed to point to the new allocation node
-            IntrinsicInst* BI =
-              createIdenticalBindInputWithDifferentNode(II, IInewa);
-            IntrinsicInstructionsToAdd.push_back(BI);
-            CreateEdgeAndBindMap[II] = BI;
-          }
-          break;
-        case Intrinsic::visc_bind_output:
-          assert(false && "Allocation node handle found in visc_bind_output");
-          break;
-        default:
-          assert(false && "Unknown use of node handle");
-          break;
-      }
-    }
-  }
-
-  if (N2a) { // If there is an allocation node fot the second node
-    for (Value::user_iterator i = II2a->user_begin(), ie = II2a->user_end();
-         i != ie; ++i) {
-      Value *v = *i;
-      Instruction *VI = dyn_cast<Instruction>(v);
-      IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI);
-      assert(II && "Use of a node handle outside of a visc intrinsic");
-
-      switch(II->getIntrinsicID()) {
-        case Intrinsic::visc_createEdge:
-          // This is between allocation and compute node of n2.
-          {
-            // Change source to new allocation node
-            IntrinsicInst* IItemp1 =
-              createIdenticalCreateEdgeWithDifferentNode(II, IInewa, true);
-            // Change source port to after all outputs of n1a
-            unsigned srcPos = cast<ConstantInt>(II->getOperand(3))->getZExtValue();
-            IntrinsicInst* IItemp2 =
-              createIdenticalCreateEdgeWithDifferentPort(IItemp1,
-                                                srcPos + n1aNumOfOutputs, true);
-            // Change destination node to new compute node
-            IntrinsicInst* IItemp3 =
-              createIdenticalCreateEdgeWithDifferentNode(IItemp2, IInewc, false);
-            // Change destination port to new port, after inmap and shift
-            // Use of FcShiftMap is not required here - allocation outputs of 
-            // n2a will not get shifted, but it is OK to use (1-1 at this point)
-            unsigned dstPos = cast<ConstantInt>(II->getOperand(4))->getZExtValue();
-            IntrinsicInst* EI =
-              createIdenticalCreateEdgeWithDifferentPort(IItemp3,
-                                           FcShiftMap[N2cInMap[dstPos]], false);
-            IntrinsicInstructionsToAdd.push_back(EI);
-            IntermediateInstructions.push_back(IItemp1);
-            IntermediateInstructions.push_back(IItemp2);
-            IntermediateInstructions.push_back(IItemp3);
-            CreateEdgeAndBindMap[II] = EI;
-          }
-          break;
-        case Intrinsic::visc_bind_input:
-         // These are the inputs from the parent node.
-          {
-            // Change destination node to new allocation node
-            IntrinsicInst* IItemp1 = 
-              createIdenticalBindInputWithDifferentNode(II, IInewa);
-            // Change source port to new port, after edgeinmap
-            unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
-            IntrinsicInst* IItemp2 = 
-              createIdenticalBindInputWithDifferentPort(IItemp1,
-                                                       N2InDFEdgeMap[srcPos], true);
-            // Change destination port to new port, after inmap and shift
-            unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-            IntrinsicInst* BI = 
-              createIdenticalBindInputWithDifferentPort(IItemp2,
-                                                       dstPos + n1aNumOfInputs, false);
-            IntrinsicInstructionsToAdd.push_back(BI);
-            IntermediateInstructions.push_back(IItemp1);
-            IntermediateInstructions.push_back(IItemp2);
-            CreateEdgeAndBindMap[II] = BI;
-          }
-          break;
-        case Intrinsic::visc_bind_output:
-          assert(false && "Allocation node handle found in visc_bind_output");
-          break;
-        default:
-          assert(false && "Unknown use of node handle");
-          break;
-      }
-    }
-  }
-
-  // Continue with the intrinsics for compute nodes n1c and n2c
-
-  for (Value::user_iterator i = II1c->user_begin(), ie = II1c->user_end();
-       i != ie; ++i) { // Handle inputs and outputs of n1 compute node
-    Value *v = *i;
-    Instruction *VI = dyn_cast<Instruction>(v);
-    IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI);
-    assert(II && "Use of a node handle outside of a visc intrinsic");
-
-    switch(II->getIntrinsicID()) {
-      case Intrinsic::visc_createEdge:
-        // This is between allocation and compute node of n1.
-        {
-          // These edges should have been handled when dealing with the
-          // allocation nodes
-          assert(CreateEdgeAndBindMap.find(II) != CreateEdgeAndBindMap.end() &&
-            "Edge between A-C node should have been handled while processing A");
-        }
-        break;
-      case Intrinsic::visc_bind_input:
-       // These are the inputs from the parent node.
-        {
-         // The destination ports will not change, only the destination will
-         // be changed to point to the new compute node
-          IntrinsicInst* BI =
-            createIdenticalBindInputWithDifferentNode(II, IInewc);
-          IntrinsicInstructionsToAdd.push_back(BI);
-          CreateEdgeAndBindMap[II] = BI;
-        }
-        break;
-      case Intrinsic::visc_bind_output:
-       // These are the outputs to the parent node.
-        {
-          // If this goes to n2, ignore edge completely
-          unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
-          if (N1c->getExtendedOutDFEdgeAt(srcPos)->getDestDF() != N2c) {
-            // this bind creates an edge that ends up to another node in the graph
-            // Change source to new compute node
-            IntrinsicInst* IItemp1 =
-              createIdenticalBindOutputWithDifferentNode(II, IInewc);
-            // Change source port to new port after outmap
-            IntrinsicInst* IItemp2 =
-              createIdenticalBindOutputWithDifferentPort(IItemp1,
-                                                         N1cOutMap[srcPos], true);
-            // Change destination port to new port after edgeoutmap
-          unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-            IntrinsicInst* BI =
-              createIdenticalBindOutputWithDifferentPort(IItemp2,
-                                                         N1OutDFEdgeMap[dstPos], false);
-            IntrinsicInstructionsToAdd.push_back(BI);
-            IntermediateInstructions.push_back(IItemp1);
-            IntermediateInstructions.push_back(IItemp2);
-            CreateEdgeAndBindMap[II] = BI;
-          }
-        }
-        break;
-      default:
-        errs() << "Unknown use: " << *II << "\n";
-        assert(false && "Unknown use of node handle");
-        break;
-    }
-  }
-
-  for (Value::user_iterator i = II2c->user_begin(), ie = II2c->user_end();
-       i != ie; ++i) { // Handle inputs and outputs of n2 compute node
-    Value *v = *i;
-    Instruction *VI = dyn_cast<Instruction>(v);
-    IntrinsicInst* II = dyn_cast<IntrinsicInst>(VI);
-    assert(II && "Use of a node handle outside of a visc intrinsic");
-
-    switch(II->getIntrinsicID()) {
-      case Intrinsic::visc_createEdge:
-        // This is between allocation and compute node of n2.
-        {
-          // These edges should have been handled when dealing with the
-          // allocation nodes
-          assert(CreateEdgeAndBindMap.find(II) != CreateEdgeAndBindMap.end() &&
-            "Edge between A-C node should have been handled while processing A");
-        }
-        break;
-      case Intrinsic::visc_bind_input:
-       // These are the inputs from the parent node.
-        {
-         // If this is incoming from n1 compute node, ignore completely
-          unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-          if (N2c->getExtendedInDFEdgeAt(dstPos)->getSourceDF() != N1c) {
-            // this bind creates an edge that comes from another node in the graph
-            // Change destination to new compute node
-            IntrinsicInst* IItemp1 =
-                createIdenticalBindInputWithDifferentNode(II, IInewc);
-            // Change source port to new port after edgeinmap
-            unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
-            IntrinsicInst* IItemp2 =
-              createIdenticalBindInputWithDifferentPort(IItemp1,
-                                                        N2InDFEdgeMap[srcPos], true);
-            // Change destination port to new port after inmap and shift
-            unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-            IntrinsicInst* BI =
-              createIdenticalBindInputWithDifferentPort(IItemp2,
-                                                        FcShiftMap[N2cInMap[dstPos]], false);
-            IntrinsicInstructionsToAdd.push_back(BI);
-            IntermediateInstructions.push_back(IItemp1);
-            IntermediateInstructions.push_back(IItemp2);
-            CreateEdgeAndBindMap[II] = BI;
-          }
-        }
-        break;
-      case Intrinsic::visc_bind_output:
-       // These are the outputs to the parent node.
-        {
-          // this bind creates an edge that ends up to another node in the graph
-          // Change source to new compute node
-          IntrinsicInst* IItemp1 =
-              createIdenticalBindOutputWithDifferentNode(II, IInewc);
-          // Change source port to new port after outmap
-          unsigned srcPos = cast<ConstantInt>(II->getOperand(1))->getZExtValue();
-          IntrinsicInst* IItemp2 =
-            createIdenticalBindOutputWithDifferentPort(IItemp1,
-                                                       N2cOutMap[srcPos], true);
-          // Change destination port to new port after edgeoutmap
-          unsigned dstPos = cast<ConstantInt>(II->getOperand(2))->getZExtValue();
-          IntrinsicInst* BI =
-            createIdenticalBindOutputWithDifferentPort(IItemp2,
-                                                       N2OutDFEdgeMap[dstPos], false);
-          IntrinsicInstructionsToAdd.push_back(BI);
-          IntermediateInstructions.push_back(IItemp1);
-          IntermediateInstructions.push_back(IItemp2);
-          CreateEdgeAndBindMap[II] = BI;
-        }
-        break;
-      default:
-        assert(false && "Unknown use of node handle");
-        break;
-    }
-  }
-
-}
-
-void deleteInternalNodeFunction(DFNode* N, BuildDFG &DFG) {
-
-  if (dyn_cast<DFLeafNode>(N))
-    return;
-
-  for (inst_iterator i = inst_begin(N->getFuncPointer()),
-       e = inst_end(N->getFuncPointer()); i != e ; ++i) {
-    Instruction* I = &*i; // Grab pointer to Instruction
-    if(IntrinsicInst* II = dyn_cast<IntrinsicInst>(I)) {
-      switch(II->getIntrinsicID()) {
-        case Intrinsic::visc_createNode:
-        case Intrinsic::visc_createNode1D:
-        case Intrinsic::visc_createNode2D:
-        case Intrinsic::visc_createNode3D:
-          // ---------------------------------------------------------------- //
-          // Updating the BuildDFG result
-          // remove the node from mapping
-          DFG.removeElementFromHandleToDFNodeMap(II);
-          // ---------------------------------------------------------------- //
-          break;
-        case Intrinsic::visc_createEdge:
-        case Intrinsic::visc_bind_input:
-        case Intrinsic::visc_bind_output:
-          // ---------------------------------------------------------------- //
-          // Updating the BuildDFG result
-          // remove the edge from mapping
-          DFG.removeElementFromHandleToDFEdgeMap(II);
-          // ---------------------------------------------------------------- //
-          break;
-        default:
-          errs() << "Error: Invalid VISC Intrinsic inside Internal node!\n\t" << *II << "\n";
-          break;
-      }
-    }
-  }
-
-  // Erase Functions associated with node N
-  Function* F = N->getFuncPointer();
-
-errs() << "Removing " << F->getName() << "\n";
-  F->replaceAllUsesWith(UndefValue::get(F->getType()));
-  F->eraseFromParent();
-
-}
-
-/*
-void shiftAttrsToLeftBy(Function* F, unsigned shift, unsigned argNo) {
-  // Source attr location : i+shift (+1), dst : i (+1)
-  for (unsigned i = argno; i + shift < F->getArgumentList().size(); i++) {
-    AttributeSet AS = F->getAttributes();
-    AttrBuilder AB(AS, i+shift+1);
-    AttributeSet argAS = AttributeSet::get(F->getContext(), i+1, AB);
-    F->removeAttributes(i+1,AS.getParamAttributes(i+1));
-    F->addAttributes(i+1, argAS);
-  }
-
-}
-
-void shiftArgumentNamesToLeftBy(Function* F, unsigned shift, unsigned argNo) {
-  // Source attr location : i+shift (+1), dst : i (+1)
-
-  // Skip arguments up until argNo  
-  Function::arg_iterator ai = F->arg_begin(), ae = F->arg_end(), as = F->arg_begin();
-  for ( ; (ai != ae) && (ai->getArgNo() < argNo); ++ai, ++as) { }
-
-  // Find source of name  
-  for ( unsigned i = 0; (i < shift) && (as != ae); i++) {
-    ++as;
-  }
-
-  for ( ; (ai != ae) && (as != ae); ++ai, ++as) {
-    ai->setName(as->getName());
-  }
-
-}
-
-void removeFunctionArgument(Function* F, Argument *ArgToRemove) {
-
-  // Shift attributes one to the left
-  shiftAttrsToLeftBy(F, 1, ArgToRemove->getArgNo());
-  // Shift argument names one to the left
-  shiftArgumentNamesToLeftBy(F, 1, ArgToRemove->getArgNo());
-  // Update the type of F
-  std::vector<Type*> ArgTypes;
-  for(auto& arg: F->getArgumentList()) {
-    DEBUG(errs() << arg << "\n");
-    if (&arg != ArgToRemove)
-      ArgTypes.push_back(arg.getType());
-  }
-  FunctionType* FTy = FunctionType::get(F->getReturnType(), ArgTypes, F->isVarArg());
-  PointerType* PTy = FTy->getPointerTo();
-  F->mutateType(PTy);
-
-}
-*/
-
-Argument* getFunctionArgumentAt(Function* F, unsigned i) {
-  assert((i < F->getArgumentList().size()) && "Requesting argument in invalid position");
-  for (auto& arg: F->getArgumentList()) {
-    if (arg.getArgNo() == i)
-      return &arg;
-  }
-  return NULL;
-}
-
-// TODO
-void removeUnnecessaryInputEdges(DFNode* N, DFNode* N1,
-                                 unsigned numOfN1AllocArgs,
-                                 unsigned numOfN2AllocArgs) {
-  Function* F = N->getFuncPointer();
-  Function* F1 = N1->getFuncPointer();
-  // Compute these once - they may change while in the loop
-  unsigned f1ArgListSize = F1->getArgumentList().size();
-  unsigned fArgListSize = F->getArgumentList().size();
-  // Iterate over input parameters of F1 without allocation arguments
-  for (unsigned i = 0; i < f1ArgListSize - numOfN1AllocArgs; i++) {
-    DFEdge* N1InEdge = N->getInDFEdgeAt(i);
-    unsigned n1SrcPos = N1InEdge->getSourcePosition();
-    for (unsigned j = f1ArgListSize - numOfN1AllocArgs,
-                  pos = f1ArgListSize - numOfN1AllocArgs;
-         j < fArgListSize - numOfN2AllocArgs; j++, pos++) {
-      DFEdge* N2InEdge = N->getInDFEdgeAt(pos);
-      unsigned n2SrcPos = N2InEdge->getSourcePosition();
-      Argument* n1arg = getFunctionArgumentAt(F, i);
-      Argument* n2arg = getFunctionArgumentAt(F, j);
-      DEBUG(errs() << "Comparing " << *n1arg << " with " << *n2arg << "\n");
-      // If the edges are coming from the same position of the same source node
-      // If the arguments are not pointer arguments, or if they are pointer
-      // arguments without the out attribute (they are only used as inputs)
-      if ((N1InEdge->getSourceDF() == N2InEdge->getSourceDF()) &&
-          (n1SrcPos == n2SrcPos) &&
-          ((!(n1arg->getType()->isPointerTy()) &&
-            !(n2arg->getType()->isPointerTy())) ||
-           (!(hasAttribute(F, i, Attribute::Out)) &&
-            !(hasAttribute(F, pos, Attribute::Out))) ) ) {
-         DEBUG(errs() << "Replacing " << *n1arg << " with " << *n2arg << "\n");
-        // It is safe to remove the second argument and replace its uses with
-        // the first one
-        n2arg->replaceAllUsesWith(n1arg);
-//        removeFunctionArgument(F, n2arg); TODO
-//        removeInputEdgeAt(F, pos);        TODO
-      } else {
-        // It is not safe to remove the second argument. Update position
-//        pos++;                            TODO increase here instead of loop increment
-      }
-    }
-  }
-}
-
-// This function checks the metadata in visc code for a function's target hint
-static visc::Target getPreferredTarget(Function* F) {
-  DEBUG(errs() << "Finding preferred target for " << F->getName() << "\n");
-  Module* M = F->getParent();
-  // checking for GPU hint
-  NamedMDNode* HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu");
-  for(unsigned i = 0; i < HintNode->getNumOperands(); i++) {
-    MDNode* N = HintNode->getOperand(i);
-    Value* FHint = dyn_cast<ValueAsMetadata>(N->getOperand(0).get())->getValue();
-    if(F == FHint)
-      return visc::GPU_TARGET;
-  }
-
-  // checking for SPIR hint
-  HintNode = M->getOrInsertNamedMetadata("visc_hint_spir");
-  for(unsigned i = 0; i < HintNode->getNumOperands(); i++) {
-    MDNode* N = HintNode->getOperand(i);
-    Value* FHint = dyn_cast<ValueAsMetadata>(N->getOperand(0).get())->getValue();
-    if(F == FHint)
-      return visc::SPIR_TARGET;
-  }
-
-  // checking for CPU hint
-  HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu");
-  for(unsigned i = 0; i < HintNode->getNumOperands(); i++) {
-    MDNode* N = HintNode->getOperand(i);
-    Value* FHint = dyn_cast<ValueAsMetadata>(N->getOperand(0).get())->getValue();
-    if(F == FHint)
-      return visc::CPU_TARGET;
-  }
-  return visc::None;
-}
-
-// This function adds the hint as metadata in visc code
-static void addHint(Function* F, visc::Target T) {
-  // Get Module
-  Module* M = F->getParent();
-  DEBUG(errs() << "Set preferred target for " << F->getName() << ": " << T << "\n");
-
-  // Based on the hint, get the hint metadata
-  NamedMDNode* HintNode;
-  switch (T) {
-    case visc::GPU_TARGET:
-      HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu");
-      break;
-    case visc::SPIR_TARGET:
-      HintNode = M->getOrInsertNamedMetadata("visc_hint_spir");
-      break;
-    case visc::CPU_TARGET:
-      HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu");
-      break;
-    default:
-      llvm_unreachable("Unsupported Target Hint!");
-      break;
-  }
-
-  // Create a node for the function and add it to the hint node
-  MDNode* N = MDNode::get(M->getContext(), ArrayRef<Metadata*>(ValueAsMetadata::get(F)));
-  HintNode->addOperand(N);
-}
-
-// This function removes the hint as metadata in visc code
-static void removeHint(Function* F, visc::Target T) {
-  // Get Module
-  Module* M = F->getParent();
-  DEBUG(errs() << "Remove preferred target for " << F->getName() << ": " << T << "\n");
-
-  // Based on the hint, get the hint metadata
-  NamedMDNode* HintNode;
-  switch (T) {
-    case visc::GPU_TARGET:
-      HintNode = M->getOrInsertNamedMetadata("visc_hint_gpu");
-      break;
-    case visc::SPIR_TARGET:
-      HintNode = M->getOrInsertNamedMetadata("visc_hint_spir");
-      break;
-    case visc::CPU_TARGET:
-      HintNode = M->getOrInsertNamedMetadata("visc_hint_cpu");
-      break;
-    default:
-      llvm_unreachable("Unsupported Target Hint!");
-      break;
-  }
-
-  // Gather metadata nodes, and keep those not associated with this function
-  MDNode* N = MDNode::get(M->getContext(), ArrayRef<Metadata*>(ValueAsMetadata::get(F)));
-  std::vector<MDNode*> MDNodes;
-
-  for (unsigned i = 0; i < HintNode->getNumOperands(); i++) {
-    MDNode* MDN = HintNode->getOperand(i);
-    if (MDN == N) {
-      continue;
-    }
-    MDNodes.push_back(MDN);
-  }
-
-  HintNode->dropAllReferences();
-
-  for (unsigned i = 0; i < MDNodes.size(); i++) {
-    HintNode->addOperand(MDNodes[i]);
-  }
-
-}
-
-std::string getTestModuleName(Module &M) {
-  std::string mid = M.getModuleIdentifier();
-  return mid.append(".original.ll");
-}
-
-} // End of namespace mergedfn
-
-char MergeDFN::ID = 0;
-static RegisterPass<MergeDFN> X("mergedfn",
-                                "Dataflow node merging optimization",
-                                true /* modifies the CFG */,
-                                true /* transformation,   *
-                                      * not just analysis */);
-
diff --git a/lib/MergeDFN/MergeDFN.exports b/lib/MergeDFN/MergeDFN.exports
deleted file mode 100644
index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000