diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt b/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..dc98faafeca139586e3891b4defc4e20c8bfae00
--- /dev/null
+++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/CMakeLists.txt
@@ -0,0 +1,12 @@
+if(WIN32 OR CYGWIN)
+  set(LLVM_LINK_COMPONENTS Core Support)
+endif()
+
+add_llvm_loadable_module( LLVMDFG2LLVM_CUDNN
+  DFG2LLVM_CUDNN.cpp
+
+  DEPENDS
+  intrinsics_gen
+  PLUGIN_TOOL
+  opt
+  )
diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..3699a43a49c9beb11b011bd684cb8f038b03e00c
--- /dev/null
+++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.cpp
@@ -0,0 +1,338 @@
+//=== DFG2LLVM_CUDNN.cpp ===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+#define ENABLE_ASSERTS
+
+#define DEBUG_TYPE "DFG2LLVM_CUDNN"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/Transforms/Utils/ValueMapper.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/IRReader/IRReader.h"
+#include "llvm/Linker/Linker.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm-c/Core.h"
+#include "llvm/SupportVISC/VISCTimer.h"
+#include "llvm/SupportVISC/DFG2LLVM.h"
+#include "llvm/InPlaceDFG/InPlaceDFGAnalysis.h"
+#include <sstream>
+
+using namespace llvm;
+using namespace builddfg;
+using namespace dfg2llvm;
+
+using namespace inplacedfg;
+
+namespace {
+// Helper class declarations
+
+// DFG2LLVM_CUDNN - The first implementation.
+
+struct DFG2LLVM_CUDNN : public DFG2LLVM {
+  static char ID; // Pass identification, replacement for typeid
+  DFG2LLVM_CUDNN() : DFG2LLVM(ID) {}
+private:
+
+public:
+  bool runOnModule(Module &M);
+};
+
+// Visitor for Code generation traversal (tree traversal for now)
+class CGT_CUDNN : public CodeGenTraversal {
+
+private:
+  //Member variables
+  InPlaceDFGAnalysis::InPlaceDFGParameter *IPP;
+
+  // VISC Runtime API and Tensor runtime API
+  Constant* llvm_visc_initTensorRt; // FIXME: sync names
+  Constant* llvm_visc_cleanupTensorRt; // FIXME: add function
+  Constant* llvm_visc_requestTensor; // FIXME: add function
+
+  // Functions
+
+  // Virtual Functions
+  void init();
+  void initRuntimeAPI();
+  void codeGen(DFInternalNode* N);
+  void codeGen(DFLeafNode* N);
+
+public:
+
+  // Constructor
+  CGT_CUDNN(Module &_M, BuildDFG &_DFG, InPlaceDFGAnalysis::InPlaceDFGParameter &_IPP) : CodeGenTraversal(_M, _DFG), IPP(&_IPP) {
+    initRuntimeAPI();
+  }
+
+};
+
+void CGT_CUDNN::init() {
+  // FIXME: what to do here? If anything?
+}
+
+// Initialize the VISC runtime API. This makes it easier to insert these calls
+void CGT_CUDNN::initRuntimeAPI() {
+
+  // Load Runtime API Module
+  SMDiagnostic Err;
+
+  char* LLVM_SRC_ROOT = getenv("LLVM_SRC_ROOT");
+  assert(LLVM_SRC_ROOT != NULL && "Define LLVM_SRC_ROOT environment variable!");
+
+  // FIXME: set correct path
+  Twine llvmSrcRoot = LLVM_SRC_ROOT;
+  Twine runtimeAPI = llvmSrcRoot+"/../build/projects/hpvm-tensor-rt/hpvm-tensor-rt.ll";
+
+  runtimeModule = parseIRFile(runtimeAPI.str(), Err, M.getContext());
+  if(runtimeModule == nullptr)
+    DEBUG(errs() << Err.getMessage());
+  else
+    DEBUG(errs() << "Successfully loaded hpvm-tensor-rt API module\n");
+
+
+  // Get or insert Global declarations for
+  // - initialization
+  // - cleanup
+  // - request a tensor
+  DECLARE(llvm_visc_initTensorRt);
+  DECLARE(llvm_visc_cleanupTensorRt);
+  DECLARE(llvm_visc_requestTensor);
+
+  // Find visc.init and visc.cleanup calls, and add placeholder methods
+  // for initialization and cleanup of the hpvm tensor runtime
+
+  Function* VI = M.getFunction("llvm.visc.init");
+  assert(VI->getNumUses() == 1 && "__visc__init should only be used once\n");
+  InitCall = cast<Instruction>(*VI->user_begin());
+  CallInst::Create(llvm_visc_initTensorRt, ArrayRef<Value*>(), "", InitCall);
+
+  Function* VC = M.getFunction("llvm.visc.cleanup");
+  assert(VC->getNumUses() == 1 && "__visc__clear should only be used once\n");
+  CleanupCall = cast<Instruction>(*VC->user_begin());
+  CallInst::Create(llvm_visc_cleanupTensorRt, ArrayRef<Value*>(), "", CleanupCall);
+
+}
+
+
+void CGT_CUDNN::codeGen(DFInternalNode* N) {
+  errs () << "Inside node: " << N->getFuncPointer()->getName() << "\n";
+  errs () << "Skipping internal node\n";
+}
+
+void CGT_CUDNN::codeGen(DFLeafNode* N) {
+
+  // Skip code generation if it is a dummy node
+  if(N->isDummyNode()) {
+    DEBUG(errs() << "Skipping dummy node\n");
+    return;
+  }
+
+  // Abort code generation if it is an allocation node
+  if(N->isAllocationNode()) {
+    assert(false && "Allocation Node not expected in ApproxHPVM");
+    return;
+  }
+
+  // Generate code only if it has the right hint
+  if (!checkPreferredTarget(N, visc::CUDNN_TARGET)) {
+    errs() << "Skipping node: "<< N->getFuncPointer()->getName() << "\n";
+    return;
+  }
+
+  // Get the function associated with the dataflow node
+  Function *F = N->getFuncPointer();
+
+  // Look up if we have visited this function before. If we have, then just
+  // get the cloned function pointer from DFNode. Otherwise, create the cloned
+  // function and add it to the DFNode GenFunc.
+  Function *F_cudnn = N->getGenFuncForTarget(visc::CUDNN_TARGET);
+
+  assert((F_cudnn == NULL) &&
+         "Error: Visiting a node for which code already generated");
+
+  // Clone the function
+  ValueToValueMapTy VMap;
+  Twine FName = F->getName();
+  F_cudnn = CloneFunction(F, VMap);
+  F_cudnn->setName(FName+"_cudnn");
+
+  N->addGenFunc(F_cudnn, visc::CUDNN_TARGET, true);
+
+  // Adding nounwind to generated function : FIXME: needed?
+  DEBUG(errs() << "Adding nounwind to generated function\n");
+  F_cudnn->addAttribute(AttributeSet::FunctionIndex, Attribute::NoUnwind);
+
+  // Add llvm_visc_requestTensor calls for every pointer argument of the function
+  // (they are all expected to be tensors), at the beginning of the function.
+  // This is the first instruction of the function, insert them before this
+  Instruction* FI = &*(F_cudnn->getEntryBlock().begin());
+
+  // In this backend, the target device is GPU.
+  // Create an LLVM Value that represents the visc::GPU_TARGET FIXME
+
+  for (Function::arg_iterator ai = F_cudnn->arg_begin(),
+       ae = F_cudnn->arg_end(); ai != ae; ++ai) {
+    Argument* Arg = &*ai;
+    if (Arg->getType()->isPointerTy()) {
+      CallInst::Create(llvm_visc_requestTensor,
+                       ArrayRef<Value*>(Arg), // FIXME: add second argument
+                       "", FI);
+    }
+  }
+
+  std::vector<IntrinsicInst *> IItoRemove;
+
+  for (inst_iterator i = inst_begin(F_cudnn), e = inst_end(F_cudnn); i != e; ++i) {
+    Instruction *I = &(*i);
+
+    if (BuildDFG::isViscIntrinsic(I)) {
+      IntrinsicInst* II = dyn_cast<IntrinsicInst>(I);
+      assert((II->getCalledFunction()->getName()).startswith("llvm.visc.tensor")
+        && "Only HPVM tensor intrinsics allowed in ApproxHPVM leaf nodes\n");
+
+      /********************* Handle VISC Tensor intrinsics ********************/
+      switch (II->getIntrinsicID()) {
+
+      case Intrinsic::visc_tensor_mul:
+      { /* llvm.hpvm.tensor.mul */
+        // Tensor mul is not in place.
+        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor mul\n");
+
+        // Argument list for the runtime call
+        std::vector<Value*> Args;
+        Args.push_back(II->getOperand(0));
+        Args.push_back(II->getOperand(1));
+
+        // Create cudnn runtime function call
+        Constant* tensorGemmGPU;
+        DECLARE(tensorGemmGPU);
+        CallInst* CI = CallInst::Create(tensorGemmGPU,
+                                        Args, "", II);
+        // We can replace the call to hpvm.tensor.mul with the runtime call
+        II->replaceAllUsesWith(CI);
+
+        // Mark to remove at the end
+        IItoRemove.push_back(II);
+      }
+      break;
+      case Intrinsic::visc_tensor_add:
+      { /* llvm.hpvm.tensor.add */
+        DEBUG(errs() << F_cudnn->getName() << "\t: Handling tensor add\n");
+        // Tensor add(a,b) is in place for argument a.
+        Value *Op = II->getOperand(0);
+
+        // First, test if it is a parameter of the function
+        if (Argument *Arg = dyn_cast<Argument>(Op)) {
+          DEBUG(errs() << *Arg << "\t: argument, candidate for in place\n");
+          assert((Arg->getParent() == F_cudnn) &&
+            "Extra Parameter in body of Function\n");
+          // In this case, consult the result of in place analysis
+          // Find position in arg list
+          unsigned pos = Arg->getArgNo();
+          // If this parameter cannot be used for in place operation
+          // code gen cannot continue
+          assert((IPP->at(N)[pos]) &&
+            "Only in place operation provided by CUDNN runtime but \
+             parameter is not eligible for in place operation\n");
+        } else {
+          // If it is not an argument, then it needs to be the result of
+          // another intrinsic. These are new objects that are allocated,
+          // and consumed by next intrinsic. 
+          DEBUG(errs() << *Op << "\t: Test for intrinsic operation\n");
+          assert((dyn_cast<IntrinsicInst>(Op)) &&
+            "Operand not valid for in place operation. Code gen aborted.\n");
+        }
+
+        // Argument list for the runtime call
+        std::vector<Value*> Args;
+        Args.push_back(II->getOperand(0));
+        Args.push_back(II->getOperand(1));
+
+        // Create cudnn runtime function call
+        Constant* tensorAdd;
+        DECLARE(tensorAdd);
+        CallInst* CI = CallInst::Create(tensorAdd,
+                                 Args, "", II);
+        // We can replace the call to hpvm.tensor.add with the 1st argument
+        // that, due to in place operation, now contains the result
+        II->replaceAllUsesWith(II->getOperand(0));
+
+        // Mark to remove at the end
+        IItoRemove.push_back(II);
+      }
+      break;
+      default:
+        llvm_unreachable("Unknown VISC Intrinsic!");
+        break;
+      }
+    }
+  }
+
+  // We need to do this explicitly: DCE pass may not remove them.
+  // Traverse the vector backwards, otherwise definitions are deleted while
+  // their subsequent uses are still around.
+  for (std::vector<IntrinsicInst *>::reverse_iterator ri = IItoRemove.rbegin(),
+       re = IItoRemove.rend(); ri != re; ++ri) {
+    DEBUG(errs() << "Erasing: " << **ri << "\n");
+    (*ri)->eraseFromParent();
+  }
+
+  return;
+}
+
+bool DFG2LLVM_CUDNN::runOnModule(Module &M) {
+  errs() << "\nDFG2LLVM_CUDNN PASS\n";
+
+  // Get the BuildDFG Analysis Results:
+  // - Dataflow graph
+  BuildDFG &DFG = getAnalysis<BuildDFG>();
+
+  // Get the In Place Analysis Results
+  InPlaceDFGAnalysis::InPlaceDFGParameter IPP =
+    (getAnalysis<InPlaceDFGAnalysisWrapper>()).getIPP();
+  // Print results
+  printInPlaceDFGParameter(IPP);
+
+  std::vector<DFInternalNode*> Roots = DFG.getRoots();
+ 
+  // Visitor for Code Generation Graph Traversal
+  CGT_CUDNN *CGTVisitor = new CGT_CUDNN(M, DFG, IPP);
+
+  // Iterate over all the DFGs and produce code for each one of them
+  for (auto rootNode: Roots) {
+    // Initiate code generation for root DFNode
+    CGTVisitor->visit(rootNode);
+  }
+
+  //TODO: Edit module epilogue to remove the VISC intrinsic declarations
+  delete CGTVisitor;
+
+  return true;
+}
+
+
+/******************************************************************************
+ *                              Helper functions                              *
+ ******************************************************************************/
+
+
+} // End of namespace
+
+char DFG2LLVM_CUDNN::ID = 0;
+static RegisterPass<DFG2LLVM_CUDNN> X("dfg2llvm-cudnn",
+                                      "Dataflow Graph to LLVM for CUDNN Pass",
+                                      false /* does not modify the CFG */,
+                                      true /* transformation,   *
+                                            * not just analysis */);
+
diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports b/llvm/lib/Transforms/DFG2LLVM_CUDNN/DFG2LLVM_CUDNN.exports
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt b/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt
new file mode 100644
index 0000000000000000000000000000000000000000..1579b2fc47f527c7b10cc8e5f5b0e45fad742ad8
--- /dev/null
+++ b/llvm/lib/Transforms/DFG2LLVM_CUDNN/LLVMBuild.txt
@@ -0,0 +1,21 @@
+;===- ./lib/Transforms/DFG2LLVM_NVPTX/LLVMBuild.txt ------------*- Conf -*--===;
+;
+;                     The LLVM Compiler Infrastructure
+;
+; This file is distributed under the University of Illinois Open Source
+; License. See LICENSE.TXT for details.
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+
+[component_0]
+type = Library
+name = DFG2LLVM_CUDNN
+parent = Transforms