diff --git a/hpvm/llvm_patches/apply_patch.sh b/hpvm/llvm_patches/apply_patch.sh
deleted file mode 100644
index 289e5c11e319aa16262952d2d079f986c2e987b8..0000000000000000000000000000000000000000
--- a/hpvm/llvm_patches/apply_patch.sh
+++ /dev/null
@@ -1,34 +0,0 @@
-#!/bin/sh
-
-### File Copies
-cp include/IR/IntrinsicsHPVM.td  ${LLVM_SRC_ROOT}/include/llvm/IR/IntrinsicsHPVM.td
-
-
-## Header File Patches
-patch  ${LLVM_SRC_ROOT}/include/llvm/IR/Attributes.td  <  ./include/IR/Attributes.td.patch 
-
-patch  ${LLVM_SRC_ROOT}/include/llvm/IR/Intrinsics.td  <  ./include/IR/Intrinsics.td.patch
-
-patch  ${LLVM_SRC_ROOT}/include/llvm/Bitcode/LLVMBitCodes.h  <  ./include/Bitcode/LLVMBitCodes.h.patch
-
-patch  ${LLVM_SRC_ROOT}/include/llvm/Support/Debug.h   <   ./include/Support/Debug.h.patch
-
-
-#### Patching Sources 
-
-
-patch  ${LLVM_SRC_ROOT}/lib/AsmParser/LLLexer.cpp   <  ./lib/AsmParser/LLLexer.cpp.patch 
-
-patch  ${LLVM_SRC_ROOT}/lib/AsmParser/LLLexer.h   <  ./lib/AsmParser/LLLexer.h.patch
-
-patch  ${LLVM_SRC_ROOT}/lib/AsmParser/LLParser.cpp   <   ./lib/AsmParser/LLParser.cpp.patch
-
-patch  ${LLVM_SRC_ROOT}/lib/AsmParser/LLParser.h   <   ./lib/AsmParser/LLParser.h.patch
-
-patch  ${LLVM_SRC_ROOT}/lib/AsmParser/LLToken.h   <   ./lib/AsmParser/LLToken.h.patch
-
-patch  ${LLVM_SRC_ROOT}/lib/IR/Attributes.cpp  <  ./lib/IR/Attributes.cpp.patch
-
-patch  ${LLVM_SRC_ROOT}/lib/Bitcode/Reader/BitcodeReader.cpp   <   ./lib/Bitcode/Reader/BitcodeReader.cpp.patch
-
-patch  ${LLVM_SRC_ROOT}/lib/Bitcode/Writer/BitcodeWriter.cpp  <   ./lib/Bitcode/Writer/BitcodeWriter.cpp.patch
diff --git a/hpvm/llvm_patches/construct_patch.sh b/hpvm/llvm_patches/construct_patch.sh
deleted file mode 100644
index b957c853e71f59bc17e7def6d544c86eefd382b6..0000000000000000000000000000000000000000
--- a/hpvm/llvm_patches/construct_patch.sh
+++ /dev/null
@@ -1,12 +0,0 @@
-#!/bin/sh
-
-#### Computing Header Diff
-for file in Bitcode/LLVMBitCodes.h IR/Attributes.td IR/Intrinsics.td Support/Debug.h; do
-    diff -u $LLVM_SRC_ROOT/include/llvm/$file include/$file > include/$file.patch || true
-done
-#### Computing Source File Diff
-for file in AsmParser/LLLexer.cpp AsmParser/LLLexer.h AsmParser/LLParser.cpp \
-            AsmParser/LLParser.h AsmParser/LLToken.h IR/Attributes.cpp \
-            Bitcode/Reader/BitcodeReader.cpp Bitcode/Writer/BitcodeWriter.cpp; do
-    diff -u $LLVM_SRC_ROOT/lib/$file lib/$file > lib/$file.patch || true
-done
diff --git a/hpvm/llvm_patches/include/llvm/ADT/DirectedGraph.h b/hpvm/llvm_patches/include/llvm/ADT/DirectedGraph.h
new file mode 100644
index 0000000000000000000000000000000000000000..cfe98e178a91dfc413b13106d6ff41568562c71d
--- /dev/null
+++ b/hpvm/llvm_patches/include/llvm/ADT/DirectedGraph.h
@@ -0,0 +1,273 @@
+//===- llvm/ADT/DirectedGraph.h - Directed Graph ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface and a base class implementation for a
+// directed graph.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_DIRECTEDGRAPH_H
+#define LLVM_ADT_DIRECTEDGRAPH_H
+
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+
+namespace llvm {
+
+/// Represent an edge in the directed graph.
+/// The edge contains the target node it connects to.
+template <class NodeType, class EdgeType> class DGEdge {
+public:
+  DGEdge() = delete;
+  /// Create an edge pointing to the given node \p N.
+  explicit DGEdge(NodeType &N) : TargetNode(N) {}
+  explicit DGEdge(const DGEdge<NodeType, EdgeType> &E)
+      : TargetNode(E.TargetNode) {}
+  DGEdge<NodeType, EdgeType> &operator=(const DGEdge<NodeType, EdgeType> &E) {
+    TargetNode = E.TargetNode;
+    return *this;
+  }
+
+  /// Static polymorphism: delegate implementation (via isEqualTo) to the
+  /// derived class.
+  bool operator==(const EdgeType &E) const { return getDerived().isEqualTo(E); }
+  bool operator!=(const EdgeType &E) const { return !operator==(E); }
+
+  /// Retrieve the target node this edge connects to.
+  const NodeType &getTargetNode() const { return TargetNode; }
+  NodeType &getTargetNode() {
+    return const_cast<NodeType &>(
+        static_cast<const DGEdge<NodeType, EdgeType> &>(*this).getTargetNode());
+  }
+
+  /// Set the target node this edge connects to.
+  void setTargetNode(const NodeType &N) { TargetNode = N; }
+
+protected:
+  // As the default implementation use address comparison for equality.
+  bool isEqualTo(const EdgeType &E) const { return this == &E; }
+
+  // Cast the 'this' pointer to the derived type and return a reference.
+  EdgeType &getDerived() { return *static_cast<EdgeType *>(this); }
+  const EdgeType &getDerived() const {
+    return *static_cast<const EdgeType *>(this);
+  }
+
+  // The target node this edge connects to.
+  NodeType &TargetNode;
+};
+
+/// Represent a node in the directed graph.
+/// The node has a (possibly empty) list of outgoing edges.
+template <class NodeType, class EdgeType> class DGNode {
+public:
+  using EdgeListTy = SetVector<EdgeType *>;
+  using iterator = typename EdgeListTy::iterator;
+  using const_iterator = typename EdgeListTy::const_iterator;
+
+  /// Create a node with a single outgoing edge \p E.
+  explicit DGNode(EdgeType &E) : Edges() { Edges.insert(&E); }
+  DGNode() = default;
+
+  explicit DGNode(const DGNode<NodeType, EdgeType> &N) : Edges(N.Edges) {}
+  DGNode(DGNode<NodeType, EdgeType> &&N) : Edges(std::move(N.Edges)) {}
+
+  DGNode<NodeType, EdgeType> &operator=(const DGNode<NodeType, EdgeType> &N) {
+    Edges = N.Edges;
+    return *this;
+  }
+  DGNode<NodeType, EdgeType> &operator=(const DGNode<NodeType, EdgeType> &&N) {
+    Edges = std::move(N.Edges);
+    return *this;
+  }
+
+  /// Static polymorphism: delegate implementation (via isEqualTo) to the
+  /// derived class.
+  bool operator==(const NodeType &N) const { return getDerived().isEqualTo(N); }
+  bool operator!=(const NodeType &N) const { return !operator==(N); }
+
+  const_iterator begin() const { return Edges.begin(); }
+  const_iterator end() const { return Edges.end(); }
+  iterator begin() { return Edges.begin(); }
+  iterator end() { return Edges.end(); }
+  const EdgeType &front() const { return *Edges.front(); }
+  EdgeType &front() { return *Edges.front(); }
+  const EdgeType &back() const { return *Edges.back(); }
+  EdgeType &back() { return *Edges.back(); }
+
+  /// Collect in \p EL, all the edges from this node to \p N.
+  /// Return true if at least one edge was found, and false otherwise.
+  /// Note that this implementation allows more than one edge to connect
+  /// a given pair of nodes.
+  bool findEdgesTo(const NodeType &N, SmallVectorImpl<EdgeType *> &EL) const {
+    assert(EL.empty() && "Expected the list of edges to be empty.");
+    for (auto *E : Edges)
+      if (E->getTargetNode() == N)
+        EL.push_back(E);
+    return !EL.empty();
+  }
+
+  /// Add the given edge \p E to this node, if it doesn't exist already. Returns
+  /// true if the edge is added and false otherwise.
+  bool addEdge(EdgeType &E) { return Edges.insert(&E); }
+
+  /// Remove the given edge \p E from this node, if it exists.
+  void removeEdge(EdgeType &E) { Edges.remove(&E); }
+
+  /// Test whether there is an edge that goes from this node to \p N.
+  bool hasEdgeTo(const NodeType &N) const {
+    return (findEdgeTo(N) != Edges.end());
+  }
+
+  /// Retrieve the outgoing edges for the node.
+  const EdgeListTy &getEdges() const { return Edges; }
+  EdgeListTy &getEdges() {
+    return const_cast<EdgeListTy &>(
+        static_cast<const DGNode<NodeType, EdgeType> &>(*this).Edges);
+  }
+
+  /// Clear the outgoing edges.
+  void clear() { Edges.clear(); }
+
+protected:
+  // As the default implementation use address comparison for equality.
+  bool isEqualTo(const NodeType &N) const { return this == &N; }
+
+  // Cast the 'this' pointer to the derived type and return a reference.
+  NodeType &getDerived() { return *static_cast<NodeType *>(this); }
+  const NodeType &getDerived() const {
+    return *static_cast<const NodeType *>(this);
+  }
+
+  /// Find an edge to \p N. If more than one edge exists, this will return
+  /// the first one in the list of edges.
+  const_iterator findEdgeTo(const NodeType &N) const {
+    return llvm::find_if(
+        Edges, [&N](const EdgeType *E) { return E->getTargetNode() == N; });
+  }
+
+  // The list of outgoing edges.
+  EdgeListTy Edges;
+};
+
+/// Directed graph
+///
+/// The graph is represented by a table of nodes.
+/// Each node contains a (possibly empty) list of outgoing edges.
+/// Each edge contains the target node it connects to.
+template <class NodeType, class EdgeType> class DirectedGraph {
+protected:
+  using NodeListTy = SmallVector<NodeType *, 10>;
+  using EdgeListTy = SmallVector<EdgeType *, 10>;
+public:
+  using iterator = typename NodeListTy::iterator;
+  using const_iterator = typename NodeListTy::const_iterator;
+  using DGraphType = DirectedGraph<NodeType, EdgeType>;
+
+  DirectedGraph() = default;
+  explicit DirectedGraph(NodeType &N) : Nodes() { addNode(N); }
+  DirectedGraph(const DGraphType &G) : Nodes(G.Nodes) {}
+  DirectedGraph(DGraphType &&RHS) : Nodes(std::move(RHS.Nodes)) {}
+  DGraphType &operator=(const DGraphType &G) {
+    Nodes = G.Nodes;
+    return *this;
+  }
+  DGraphType &operator=(const DGraphType &&G) {
+    Nodes = std::move(G.Nodes);
+    return *this;
+  }
+
+  const_iterator begin() const { return Nodes.begin(); }
+  const_iterator end() const { return Nodes.end(); }
+  iterator begin() { return Nodes.begin(); }
+  iterator end() { return Nodes.end(); }
+  const NodeType &front() const { return *Nodes.front(); }
+  NodeType &front() { return *Nodes.front(); }
+  const NodeType &back() const { return *Nodes.back(); }
+  NodeType &back() { return *Nodes.back(); }
+
+  size_t size() const { return Nodes.size(); }
+
+  /// Find the given node \p N in the table.
+  const_iterator findNode(const NodeType &N) const {
+    return llvm::find_if(Nodes,
+                         [&N](const NodeType *Node) { return *Node == N; });
+  }
+  iterator findNode(const NodeType &N) {
+    return const_cast<iterator>(
+        static_cast<const DGraphType &>(*this).findNode(N));
+  }
+
+  /// Add the given node \p N to the graph if it is not already present.
+  bool addNode(NodeType &N) {
+    if (findNode(N) != Nodes.end())
+      return false;
+    Nodes.push_back(&N);
+    return true;
+  }
+
+  /// Collect in \p EL all edges that are coming into node \p N. Return true
+  /// if at least one edge was found, and false otherwise.
+  bool findIncomingEdgesToNode(const NodeType &N, SmallVectorImpl<EdgeType*> &EL) const {
+    assert(EL.empty() && "Expected the list of edges to be empty.");
+    EdgeListTy TempList;
+    for (auto *Node : Nodes) {
+      if (*Node == N)
+        continue;
+      Node->findEdgesTo(N, TempList);
+      EL.insert(EL.end(), TempList.begin(), TempList.end());
+      TempList.clear();
+    }
+    return !EL.empty();
+  }
+
+  /// Remove the given node \p N from the graph. If the node has incoming or
+  /// outgoing edges, they are also removed. Return true if the node was found
+  /// and then removed, and false if the node was not found in the graph to
+  /// begin with.
+  bool removeNode(NodeType &N) {
+    iterator IT = findNode(N);
+    if (IT == Nodes.end())
+      return false;
+    // Remove incoming edges.
+    EdgeListTy EL;
+    for (auto *Node : Nodes) {
+      if (*Node == N)
+        continue;
+      Node->findEdgesTo(N, EL);
+      for (auto *E : EL)
+        Node->removeEdge(*E);
+      EL.clear();
+    }
+    N.clear();
+    Nodes.erase(IT);
+    return true;
+  }
+
+  /// Assuming nodes \p Src and \p Dst are already in the graph, connect node \p
+  /// Src to node \p Dst using the provided edge \p E. Return true if \p Src is
+  /// not already connected to \p Dst via \p E, and false otherwise.
+  bool connect(NodeType &Src, NodeType &Dst, EdgeType &E) {
+    assert(findNode(Src) != Nodes.end() && "Src node should be present.");
+    assert(findNode(Dst) != Nodes.end() && "Dst node should be present.");
+    assert((E.getTargetNode() == Dst) &&
+           "Target of the given edge does not match Dst.");
+    return Src.addEdge(E);
+  }
+
+protected:
+  // The list of nodes in the graph.
+  NodeListTy Nodes;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ADT_DIRECTEDGRAPH_H
diff --git a/hpvm/llvm_patches/include/llvm/ADT/EnumeratedArray.h b/hpvm/llvm_patches/include/llvm/ADT/EnumeratedArray.h
new file mode 100644
index 0000000000000000000000000000000000000000..a9528115618cf62440a40fd68d79a20706105b3a
--- /dev/null
+++ b/hpvm/llvm_patches/include/llvm/ADT/EnumeratedArray.h
@@ -0,0 +1,48 @@
+//===- llvm/ADT/EnumeratedArray.h - Enumerated Array-------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an array type that can be indexed using scoped enum values.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_ENUMERATEDARRAY_H
+#define LLVM_ADT_ENUMERATEDARRAY_H
+
+#include <cassert>
+
+namespace llvm {
+
+template <typename ValueType, typename Enumeration,
+          Enumeration LargestEnum = Enumeration::Last, typename IndexType = int,
+          IndexType Size = 1 + static_cast<IndexType>(LargestEnum)>
+class EnumeratedArray {
+public:
+  EnumeratedArray() = default;
+  EnumeratedArray(ValueType V) {
+    for (IndexType IX = 0; IX < Size; ++IX) {
+      Underlying[IX] = V;
+    }
+  }
+  inline const ValueType &operator[](const Enumeration Index) const {
+    auto IX = static_cast<const IndexType>(Index);
+    assert(IX >= 0 && IX < Size && "Index is out of bounds.");
+    return Underlying[IX];
+  }
+  inline ValueType &operator[](const Enumeration Index) {
+    return const_cast<ValueType &>(
+        static_cast<const EnumeratedArray<ValueType, Enumeration, LargestEnum,
+                                          IndexType, Size> &>(*this)[Index]);
+  }
+
+private:
+  ValueType Underlying[Size];
+};
+
+} // namespace llvm
+
+#endif // LLVM_ADT_ENUMERATEDARRAY_H
diff --git a/hpvm/llvm_patches/include/llvm/Analysis/DDG.h b/hpvm/llvm_patches/include/llvm/Analysis/DDG.h
new file mode 100644
index 0000000000000000000000000000000000000000..165efc97a480e06b8ea5e8031e97da469d1f6418
--- /dev/null
+++ b/hpvm/llvm_patches/include/llvm/Analysis/DDG.h
@@ -0,0 +1,623 @@
+//===- llvm/Analysis/DDG.h --------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the Data-Dependence Graph (DDG).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DDG_H
+#define LLVM_ANALYSIS_DDG_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DirectedGraph.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DependenceGraphBuilder.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/GraphWriter.h"
+#include <sstream>
+
+
+namespace llvm {
+class DDGNode;
+class DDGEdge;
+using DDGNodeBase = DGNode<DDGNode, DDGEdge>;
+using DDGEdgeBase = DGEdge<DDGNode, DDGEdge>;
+using DDGBase = DirectedGraph<DDGNode, DDGEdge>;
+class LPMUpdater;
+
+/// Data Dependence Graph Node
+/// The graph can represent the following types of nodes:
+/// 1. Single instruction node containing just one instruction.
+/// 2. Multiple instruction node where two or more instructions from
+///    the same basic block are merged into one node.
+/// 3. Pi-block node which is a group of other DDG nodes that are part of a
+///    strongly-connected component of the graph.
+///    A pi-block node contains more than one single or multiple instruction
+///    nodes. The root node cannot be part of a pi-block.
+/// 4. Root node is a special node that connects to all components such that
+///    there is always a path from it to any node in the graph.
+class DDGNode : public DDGNodeBase {
+public:
+  using InstructionListType = SmallVectorImpl<Instruction *>;
+
+  enum class NodeKind {
+    Unknown,
+    SingleInstruction,
+    MultiInstruction,
+    PiBlock,
+    Root,
+  };
+
+  DDGNode() = delete;
+  DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {}
+  DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {}
+  DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {}
+  virtual ~DDGNode() = 0;
+
+  DDGNode &operator=(const DDGNode &N) {
+    DGNode::operator=(N);
+    Kind = N.Kind;
+    return *this;
+  }
+
+  DDGNode &operator=(DDGNode &&N) {
+    DGNode::operator=(std::move(N));
+    Kind = N.Kind;
+    return *this;
+  }
+
+  /// Getter for the kind of this node.
+  NodeKind getKind() const { return Kind; }
+
+  /// Collect a list of instructions, in \p IList, for which predicate \p Pred
+  /// evaluates to true when iterating over instructions of this node. Return
+  /// true if at least one instruction was collected, and false otherwise.
+  bool collectInstructions(llvm::function_ref<bool(Instruction *)> const &Pred,
+                           InstructionListType &IList) const;
+
+protected:
+  /// Setter for the kind of this node.
+  void setKind(NodeKind K) { Kind = K; }
+
+private:
+  NodeKind Kind;
+};
+
+/// Subclass of DDGNode representing the root node of the graph.
+/// There should only be one such node in a given graph.
+class RootDDGNode : public DDGNode {
+public:
+  RootDDGNode() : DDGNode(NodeKind::Root) {}
+  RootDDGNode(const RootDDGNode &N) = delete;
+  RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {}
+  ~RootDDGNode() {}
+
+  /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+  static bool classof(const DDGNode *N) {
+    return N->getKind() == NodeKind::Root;
+  }
+  static bool classof(const RootDDGNode *N) { return true; }
+};
+
+/// Subclass of DDGNode representing single or multi-instruction nodes.
+class SimpleDDGNode : public DDGNode {
+  friend class DDGBuilder;
+
+public:
+  SimpleDDGNode() = delete;
+  SimpleDDGNode(Instruction &I);
+  SimpleDDGNode(const SimpleDDGNode &N);
+  SimpleDDGNode(SimpleDDGNode &&N);
+  ~SimpleDDGNode();
+
+  SimpleDDGNode &operator=(const SimpleDDGNode &N) {
+    DDGNode::operator=(N);
+    InstList = N.InstList;
+    return *this;
+  }
+
+  SimpleDDGNode &operator=(SimpleDDGNode &&N) {
+    DDGNode::operator=(std::move(N));
+    InstList = std::move(N.InstList);
+    return *this;
+  }
+
+  /// Get the list of instructions in this node.
+  const InstructionListType &getInstructions() const {
+    assert(!InstList.empty() && "Instruction List is empty.");
+    return InstList;
+  }
+  InstructionListType &getInstructions() {
+    return const_cast<InstructionListType &>(
+        static_cast<const SimpleDDGNode *>(this)->getInstructions());
+  }
+
+  /// Get the first/last instruction in the node.
+  Instruction *getFirstInstruction() const { return getInstructions().front(); }
+  Instruction *getLastInstruction() const { return getInstructions().back(); }
+
+  /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+  static bool classof(const DDGNode *N) {
+    return N->getKind() == NodeKind::SingleInstruction ||
+           N->getKind() == NodeKind::MultiInstruction;
+  }
+  static bool classof(const SimpleDDGNode *N) { return true; }
+
+private:
+  /// Append the list of instructions in \p Input to this node.
+  void appendInstructions(const InstructionListType &Input) {
+    setKind((InstList.size() == 0 && Input.size() == 1)
+                ? NodeKind::SingleInstruction
+                : NodeKind::MultiInstruction);
+    InstList.insert(InstList.end(), Input.begin(), Input.end());
+  }
+  void appendInstructions(const SimpleDDGNode &Input) {
+    appendInstructions(Input.getInstructions());
+  }
+
+  /// List of instructions associated with a single or multi-instruction node.
+  SmallVector<Instruction *, 2> InstList;
+};
+
+/// Subclass of DDGNode representing a pi-block. A pi-block represents a group
+/// of DDG nodes that are part of a strongly-connected component of the graph.
+/// Replacing all the SCCs with pi-blocks results in an acyclic representation
+/// of the DDG. For example if we have:
+/// {a -> b}, {b -> c, d}, {c -> a}
+/// the cycle a -> b -> c -> a is abstracted into a pi-block "p" as follows:
+/// {p -> d} with "p" containing: {a -> b}, {b -> c}, {c -> a}
+class PiBlockDDGNode : public DDGNode {
+public:
+  using PiNodeList = SmallVector<DDGNode *, 4>;
+
+  PiBlockDDGNode() = delete;
+  PiBlockDDGNode(const PiNodeList &List);
+  PiBlockDDGNode(const PiBlockDDGNode &N);
+  PiBlockDDGNode(PiBlockDDGNode &&N);
+  ~PiBlockDDGNode();
+
+  PiBlockDDGNode &operator=(const PiBlockDDGNode &N) {
+    DDGNode::operator=(N);
+    NodeList = N.NodeList;
+    return *this;
+  }
+
+  PiBlockDDGNode &operator=(PiBlockDDGNode &&N) {
+    DDGNode::operator=(std::move(N));
+    NodeList = std::move(N.NodeList);
+    return *this;
+  }
+
+  /// Get the list of nodes in this pi-block.
+  const PiNodeList &getNodes() const {
+    assert(!NodeList.empty() && "Node list is empty.");
+    return NodeList;
+  }
+  PiNodeList &getNodes() {
+    return const_cast<PiNodeList &>(
+        static_cast<const PiBlockDDGNode *>(this)->getNodes());
+  }
+
+  /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc.
+  static bool classof(const DDGNode *N) {
+    return N->getKind() == NodeKind::PiBlock;
+  }
+
+private:
+  /// List of nodes in this pi-block.
+  PiNodeList NodeList;
+};
+
+/// Data Dependency Graph Edge.
+/// An edge in the DDG can represent a def-use relationship or
+/// a memory dependence based on the result of DependenceAnalysis.
+/// A rooted edge connects the root node to one of the components
+/// of the graph.
+class DDGEdge : public DDGEdgeBase {
+public:
+  /// The kind of edge in the DDG
+  enum class EdgeKind {
+    Unknown,
+    RegisterDefUse,
+    MemoryDependence,
+    Rooted,
+    Last = Rooted // Must be equal to the largest enum value.
+  };
+
+  explicit DDGEdge(DDGNode &N) = delete;
+  DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {}
+  DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {}
+  DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {}
+  DDGEdge &operator=(const DDGEdge &E) {
+    DDGEdgeBase::operator=(E);
+    Kind = E.Kind;
+    return *this;
+  }
+
+  DDGEdge &operator=(DDGEdge &&E) {
+    DDGEdgeBase::operator=(std::move(E));
+    Kind = E.Kind;
+    return *this;
+  }
+
+  /// Get the edge kind
+  EdgeKind getKind() const { return Kind; };
+
+  /// Return true if this is a def-use edge, and false otherwise.
+  bool isDefUse() const { return Kind == EdgeKind::RegisterDefUse; }
+
+  /// Return true if this is a memory dependence edge, and false otherwise.
+  bool isMemoryDependence() const { return Kind == EdgeKind::MemoryDependence; }
+
+  /// Return true if this is an edge stemming from the root node, and false
+  /// otherwise.
+  bool isRooted() const { return Kind == EdgeKind::Rooted; }
+
+private:
+  EdgeKind Kind;
+};
+
+/// Encapsulate some common data and functionality needed for different
+/// variations of data dependence graphs.
+template <typename NodeType> class DependenceGraphInfo {
+public:
+  using DependenceList = SmallVector<std::unique_ptr<Dependence>, 1>;
+
+  DependenceGraphInfo() = delete;
+  DependenceGraphInfo(const DependenceGraphInfo &G) = delete;
+  DependenceGraphInfo(const std::string &N, const DependenceInfo &DepInfo)
+      : Name(N), DI(DepInfo), Root(nullptr) {}
+  DependenceGraphInfo(DependenceGraphInfo &&G)
+      : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {}
+  virtual ~DependenceGraphInfo() {}
+
+  /// Return the label that is used to name this graph.
+  const StringRef getName() const { return Name; }
+
+  /// Return the root node of the graph.
+  NodeType &getRoot() const {
+    assert(Root && "Root node is not available yet. Graph construction may "
+                   "still be in progress\n");
+    return *Root;
+  }
+
+	/// Collect all the data dependency infos coming from any pair of memory
+  /// accesses from \p Src to \p Dst, and store them into \p Deps. Return true
+  /// if a dependence exists, and false otherwise.
+  bool getDependencies(const NodeType &Src, const NodeType &Dst,
+                       DependenceList &Deps) const;
+
+protected:
+  // Name of the graph.
+  std::string Name;
+
+  // Store a copy of DependenceInfo in the graph, so that individual memory
+  // dependencies don't need to be stored. Instead when the dependence is
+  // queried it is recomputed using @DI.
+  const DependenceInfo DI;
+
+  // A special node in the graph that has an edge to every connected component of
+  // the graph, to ensure all nodes are reachable in a graph walk.
+  NodeType *Root = nullptr;
+};
+
+//===--------------------------------------------------------------------===//
+// DependenceGraphInfo Implementation
+//===--------------------------------------------------------------------===//
+
+template <typename NodeType>
+bool DependenceGraphInfo<NodeType>::getDependencies(
+    const NodeType &Src, const NodeType &Dst, DependenceList &Deps) const {
+  assert(Deps.empty() && "Expected empty output list at the start.");
+
+  // List of memory access instructions from src and dst nodes.
+  SmallVector<Instruction *, 8> SrcIList, DstIList;
+  auto isMemoryAccess = [](const Instruction *I) {
+    return I->mayReadOrWriteMemory();
+  };
+  Src.collectInstructions(isMemoryAccess, SrcIList);
+  Dst.collectInstructions(isMemoryAccess, DstIList);
+
+  for (auto *SrcI : SrcIList)
+    for (auto *DstI : DstIList)
+      if (auto Dep =
+              const_cast<DependenceInfo *>(&DI)->depends(SrcI, DstI, true))
+        Deps.push_back(std::move(Dep));
+
+  return !Deps.empty();
+}
+
+using DDGInfo = DependenceGraphInfo<DDGNode>;
+
+/// Data Dependency Graph
+class DataDependenceGraph : public DDGBase, public DDGInfo {
+  friend AbstractDependenceGraphBuilder<DataDependenceGraph>;
+  friend class DDGBuilder;
+
+public:
+  using NodeType = DDGNode;
+  using EdgeType = DDGEdge;
+
+  DataDependenceGraph() = delete;
+  DataDependenceGraph(const DataDependenceGraph &G) = delete;
+  DataDependenceGraph(DataDependenceGraph &&G)
+      : DDGBase(std::move(G)), DDGInfo(std::move(G)) {}
+  DataDependenceGraph(Function &F, DependenceInfo &DI);
+  DataDependenceGraph(Loop &L, LoopInfo &LI, DependenceInfo &DI);
+  ~DataDependenceGraph();
+
+  /// If node \p N belongs to a pi-block return a pointer to the pi-block,
+  /// otherwise return null.
+  const PiBlockDDGNode *getPiBlock(const NodeType &N) const;
+
+protected:
+  /// Add node \p N to the graph, if it's not added yet, and keep track of the
+  /// root node as well as pi-blocks and their members. Return true if node is
+  /// successfully added.
+  bool addNode(NodeType &N);
+
+private:
+  using PiBlockMapType = DenseMap<const NodeType *, const PiBlockDDGNode *>;
+
+  /// Mapping from graph nodes to their containing pi-blocks. If a node is not
+  /// part of a pi-block, it will not appear in this map.
+  PiBlockMapType PiBlockMap;
+};
+
+/// Concrete implementation of a pure data dependence graph builder. This class
+/// provides custom implementation for the pure-virtual functions used in the
+/// generic dependence graph build algorithm.
+///
+/// For information about time complexity of the build algorithm see the
+/// comments near the declaration of AbstractDependenceGraphBuilder.
+class DDGBuilder : public AbstractDependenceGraphBuilder<DataDependenceGraph> {
+public:
+  DDGBuilder(DataDependenceGraph &G, DependenceInfo &D,
+             const BasicBlockListType &BBs)
+      : AbstractDependenceGraphBuilder(G, D, BBs) {}
+  DDGNode &createRootNode() final override {
+    auto *RN = new RootDDGNode();
+    assert(RN && "Failed to allocate memory for DDG root node.");
+    Graph.addNode(*RN);
+    return *RN;
+  }
+  DDGNode &createFineGrainedNode(Instruction &I) final override {
+    auto *SN = new SimpleDDGNode(I);
+    assert(SN && "Failed to allocate memory for simple DDG node.");
+    Graph.addNode(*SN);
+    return *SN;
+  }
+  DDGNode &createPiBlock(const NodeListType &L) final override {
+    auto *Pi = new PiBlockDDGNode(L);
+    assert(Pi && "Failed to allocate memory for pi-block node.");
+    Graph.addNode(*Pi);
+    return *Pi;
+  }
+  DDGEdge &createDefUseEdge(DDGNode &Src, DDGNode &Tgt) final override {
+    auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::RegisterDefUse);
+    assert(E && "Failed to allocate memory for edge");
+    Graph.connect(Src, Tgt, *E);
+    return *E;
+  }
+  DDGEdge &createMemoryEdge(DDGNode &Src, DDGNode &Tgt) final override {
+    auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::MemoryDependence);
+    assert(E && "Failed to allocate memory for edge");
+    Graph.connect(Src, Tgt, *E);
+    return *E;
+  }
+  DDGEdge &createRootedEdge(DDGNode &Src, DDGNode &Tgt) final override {
+    auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::Rooted);
+    assert(E && "Failed to allocate memory for edge");
+    assert(isa<RootDDGNode>(Src) && "Expected root node");
+    Graph.connect(Src, Tgt, *E);
+    return *E;
+  }
+
+  const NodeListType &getNodesInPiBlock(const DDGNode &N) final override {
+    auto *PiNode = dyn_cast<const PiBlockDDGNode>(&N);
+    assert(PiNode && "Expected a pi-block node.");
+    return PiNode->getNodes();
+  }
+
+  /// Return true if the two nodes \pSrc and \pTgt are both simple nodes and
+  /// the consecutive instructions after merging belong to the same basic block.
+  bool areNodesMergeable(const DDGNode &Src,
+                         const DDGNode &Tgt) const final override;
+  void mergeNodes(DDGNode &Src, DDGNode &Tgt) final override;
+  bool shouldSimplify() const final override;
+  bool shouldCreatePiBlocks() const final override;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N);
+raw_ostream &operator<<(raw_ostream &OS, const DDGNode::NodeKind K);
+raw_ostream &operator<<(raw_ostream &OS, const DDGEdge &E);
+raw_ostream &operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K);
+raw_ostream &operator<<(raw_ostream &OS, const DataDependenceGraph &G);
+
+//===--------------------------------------------------------------------===//
+// DDG Analysis Passes
+//===--------------------------------------------------------------------===//
+
+/// Analysis pass that builds the DDG for a loop.
+class DDGAnalysis : public AnalysisInfoMixin<DDGAnalysis> {
+public:
+  using Result = std::unique_ptr<DataDependenceGraph>;
+  Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR);
+
+private:
+  friend AnalysisInfoMixin<DDGAnalysis>;
+  static AnalysisKey Key;
+};
+
+/// Textual printer pass for the DDG of a loop.
+class DDGAnalysisPrinterPass : public PassInfoMixin<DDGAnalysisPrinterPass> {
+public:
+  explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM,
+                        LoopStandardAnalysisResults &AR, LPMUpdater &U);
+
+private:
+  raw_ostream &OS;
+};
+
+
+//===--------------------------------------------------------------------===//
+// GraphTraits specializations for the DDG
+//===--------------------------------------------------------------------===//
+
+
+
+/// non-const versions of the grapth trait specializations for DDG
+template <> struct GraphTraits<DDGNode *> {
+  using NodeRef = DDGNode *;
+
+  static DDGNode *DDGGetTargetNode(DGEdge<DDGNode, DDGEdge> *P) {
+    return &P->getTargetNode();
+  }
+
+  // Provide a mapped iterator so that the GraphTrait-based implementations can
+  // find the target nodes without having to explicitly go through the edges.
+  using ChildIteratorType =
+      mapped_iterator<DDGNode::iterator, decltype(&DDGGetTargetNode)>;
+  using ChildEdgeIteratorType = DDGNode::iterator;
+
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) {
+    return ChildIteratorType(N->begin(), &DDGGetTargetNode);
+  }
+  static ChildIteratorType child_end(NodeRef N) {
+    return ChildIteratorType(N->end(), &DDGGetTargetNode);
+  }
+
+  static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+    return N->begin();
+  }
+  static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); }
+};
+
+template <>
+struct GraphTraits<DataDependenceGraph *> : public GraphTraits<DDGNode *> {
+  using nodes_iterator = DataDependenceGraph::iterator;
+  static NodeRef getEntryNode(DataDependenceGraph *DG) {
+    return &DG->getRoot();
+  }
+  static nodes_iterator nodes_begin(DataDependenceGraph *DG) {
+    return DG->begin();
+  }
+  static nodes_iterator nodes_end(DataDependenceGraph *DG) { return DG->end(); }
+};
+
+template <> struct DOTGraphTraits<DataDependenceGraph*> : public DefaultDOTGraphTraits {
+	DOTGraphTraits (bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+	
+	static std::string getGraphName(DataDependenceGraph *Graph) { return "DDG";}
+
+	static std::string getGraphProperties(DataDependenceGraph *Graph) {
+		return "\tcompound=true;";
+	}
+
+	std::string getNodeLabel(DDGNode *Node, DataDependenceGraph *Graph) {
+		std::string Str;
+		raw_string_ostream ss(Str);
+		ss << Node << " : ";
+		switch(Node->getKind()) {
+			case DDGNode::NodeKind::Root :
+				ss << "Root Node\n";
+				break;
+			case DDGNode::NodeKind::SingleInstruction :
+			case DDGNode::NodeKind::MultiInstruction :
+				{
+				ss << "Simple Node\n";
+				SimpleDDGNode *SN = dyn_cast<SimpleDDGNode>(Node);
+				DDGNode::InstructionListType &instructions = SN->getInstructions();
+				for (auto i : instructions) {
+					ss << *i << "\n"; 
+				}
+				break;
+				}
+			case DDGNode::NodeKind::PiBlock :
+				{		
+					ss << "Pi Block\n";
+					PiBlockDDGNode *PBN = dyn_cast<PiBlockDDGNode>(Node);
+					PiBlockDDGNode::PiNodeList &nodes = PBN->getNodes();
+					for (auto n : nodes) {
+						ss << n << "\n";
+					}
+					break;
+				}
+			default :
+				ss << "Unknown\n";
+		}
+		return ss.str();	
+	}
+
+	std::string getEdgeLabel(DDGEdge *Edge, DataDependenceGraph *Graph) {
+		std::string Str;
+		raw_string_ostream ss(Str);
+		switch(Edge->getKind()) {
+			case DDGEdge::EdgeKind::RegisterDefUse :
+				ss << "def-use";
+				break;
+			case DDGEdge::EdgeKind::MemoryDependence :
+				ss << "memory" ;
+				break;
+			default :
+				ss << "";
+		}
+
+		return ss.str();
+	}
+};
+
+/// const versions of the grapth trait specializations for DDG
+template <> struct GraphTraits<const DDGNode *> {
+  using NodeRef = const DDGNode *;
+
+  static const DDGNode *DDGGetTargetNode(const DGEdge<DDGNode, DDGEdge> *P) {
+    return &P->getTargetNode();
+  }
+
+  // Provide a mapped iterator so that the GraphTrait-based implementations can
+  // find the target nodes without having to explicitly go through the edges.
+  using ChildIteratorType =
+      mapped_iterator<DDGNode::const_iterator, decltype(&DDGGetTargetNode)>;
+  using ChildEdgeIteratorType = DDGNode::const_iterator;
+
+  static NodeRef getEntryNode(NodeRef N) { return N; }
+  static ChildIteratorType child_begin(NodeRef N) {
+    return ChildIteratorType(N->begin(), &DDGGetTargetNode);
+  }
+  static ChildIteratorType child_end(NodeRef N) {
+    return ChildIteratorType(N->end(), &DDGGetTargetNode);
+  }
+
+  static ChildEdgeIteratorType child_edge_begin(NodeRef N) {
+    return N->begin();
+  }
+  static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); }
+};
+
+template <>
+struct GraphTraits<const DataDependenceGraph *>
+    : public GraphTraits<const DDGNode *> {
+  using nodes_iterator = DataDependenceGraph::const_iterator;
+  static NodeRef getEntryNode(const DataDependenceGraph *DG) {
+    return &DG->getRoot();
+  }
+  static nodes_iterator nodes_begin(const DataDependenceGraph *DG) {
+    return DG->begin();
+  }
+  static nodes_iterator nodes_end(const DataDependenceGraph *DG) {
+    return DG->end();
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DDG_H
diff --git a/hpvm/llvm_patches/include/llvm/Analysis/DependenceGraphBuilder.h b/hpvm/llvm_patches/include/llvm/Analysis/DependenceGraphBuilder.h
new file mode 100644
index 0000000000000000000000000000000000000000..6f4e1be94164f797e4fc053dac754f8e28839e9e
--- /dev/null
+++ b/hpvm/llvm_patches/include/llvm/Analysis/DependenceGraphBuilder.h
@@ -0,0 +1,203 @@
+//===- llvm/Analysis/DependenceGraphBuilder.h -------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines a builder interface that can be used to populate dependence
+// graphs such as DDG and PDG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
+
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/EquivalenceClasses.h"
+#include "llvm/ADT/SmallVector.h"
+
+namespace llvm {
+
+class BasicBlock;
+class DependenceInfo;
+class Instruction;
+
+/// This abstract builder class defines a set of high-level steps for creating
+/// DDG-like graphs. The client code is expected to inherit from this class and
+/// define concrete implementation for each of the pure virtual functions used
+/// in the high-level algorithm.
+template <class GraphType> class AbstractDependenceGraphBuilder {
+protected:
+  using BasicBlockListType = SmallVectorImpl<BasicBlock *>;
+
+private:
+  using NodeType = typename GraphType::NodeType;
+  using EdgeType = typename GraphType::EdgeType;
+
+public:
+  using ClassesType = EquivalenceClasses<BasicBlock *>;
+  using NodeListType = SmallVector<NodeType *, 4>;
+
+  AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D,
+                                 const BasicBlockListType &BBs)
+      : Graph(G), DI(D), BBList(BBs) {}
+  virtual ~AbstractDependenceGraphBuilder() {}
+
+  /// The main entry to the graph construction algorithm. It starts by
+  /// creating nodes in increasing order of granularity and then
+  /// adds def-use and memory edges. As one of the final stages, it
+  /// also creates pi-block nodes to facilitate codegen in transformations
+  /// that use dependence graphs.
+  ///
+  /// The algorithmic complexity of this implementation is O(V^2 * I^2), where V
+  /// is the number of vertecies (nodes) and I is the number of instructions in
+  /// each node. The total number of instructions, N, is equal to V * I,
+  /// therefore the worst-case time complexity is O(N^2). The average time
+  /// complexity is O((N^2)/2).
+  void populate() {
+    computeInstructionOrdinals();
+    createFineGrainedNodes();
+    createDefUseEdges();
+    createMemoryDependencyEdges();
+    simplify();
+    createAndConnectRootNode();
+    createPiBlocks();
+    sortNodesTopologically();
+  }
+
+  /// Compute ordinal numbers for each instruction and store them in a map for
+  /// future look up. These ordinals are used to compute node ordinals which are
+  /// in turn used to order nodes that are part of a cycle.
+  /// Instruction ordinals are assigned based on lexical program order.
+  void computeInstructionOrdinals();
+
+  /// Create fine grained nodes. These are typically atomic nodes that
+  /// consist of a single instruction.
+  void createFineGrainedNodes();
+
+  /// Analyze the def-use chains and create edges from the nodes containing
+  /// definitions to the nodes containing the uses.
+  void createDefUseEdges();
+
+  /// Analyze data dependencies that exist between memory loads or stores,
+  /// in the graph nodes and create edges between them.
+  void createMemoryDependencyEdges();
+
+  /// Create a root node and add edges such that each node in the graph is
+  /// reachable from the root.
+  void createAndConnectRootNode();
+
+  /// Apply graph abstraction to groups of nodes that belong to a strongly
+  /// connected component of the graph to create larger compound nodes
+  /// called pi-blocks. The purpose of this abstraction is to isolate sets of
+  /// program elements that need to stay together during codegen and turn
+  /// the dependence graph into an acyclic graph.
+  void createPiBlocks();
+
+  /// Go through all the nodes in the graph and collapse any two nodes
+  /// 'a' and 'b' if all of the following are true:
+  ///   - the only edge from 'a' is a def-use edge to 'b' and
+  ///   - the only edge to 'b' is a def-use edge from 'a' and
+  ///   - there is no cyclic edge from 'b' to 'a' and
+  ///   - all instructions in 'a' and 'b' belong to the same basic block and
+  ///   - both 'a' and 'b' are simple (single or multi instruction) nodes.
+  void simplify();
+
+  /// Topologically sort the graph nodes.
+  void sortNodesTopologically();
+
+protected:
+  /// Create the root node of the graph.
+  virtual NodeType &createRootNode() = 0;
+
+  /// Create an atomic node in the graph given a single instruction.
+  virtual NodeType &createFineGrainedNode(Instruction &I) = 0;
+
+  /// Create a pi-block node in the graph representing a group of nodes in an
+  /// SCC of the graph.
+  virtual NodeType &createPiBlock(const NodeListType &L) = 0;
+
+  /// Create a def-use edge going from \p Src to \p Tgt.
+  virtual EdgeType &createDefUseEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+  /// Create a memory dependence edge going from \p Src to \p Tgt.
+  virtual EdgeType &createMemoryEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+  /// Create a rooted edge going from \p Src to \p Tgt .
+  virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0;
+
+  /// Given a pi-block node, return a vector of all the nodes contained within
+  /// it.
+  virtual const NodeListType &getNodesInPiBlock(const NodeType &N) = 0;
+
+  /// Deallocate memory of edge \p E.
+  virtual void destroyEdge(EdgeType &E) { delete &E; }
+
+  /// Deallocate memory of node \p N.
+  virtual void destroyNode(NodeType &N) { delete &N; }
+
+  /// Return true if creation of pi-blocks are supported and desired,
+  /// and false otherwise.
+  virtual bool shouldCreatePiBlocks() const { return true; }
+
+  /// Return true if graph simplification step is requested, and false
+  /// otherwise.
+  virtual bool shouldSimplify() const { return true; }
+
+  /// Return true if it's safe to merge the two nodes.
+  virtual bool areNodesMergeable(const NodeType &A,
+                                 const NodeType &B) const = 0;
+
+  /// Append the content of node \p B into node \p A and remove \p B and
+  /// the edge between \p A and \p B from the graph.
+  virtual void mergeNodes(NodeType &A, NodeType &B) = 0;
+
+  /// Given an instruction \p I return its associated ordinal number.
+  size_t getOrdinal(Instruction &I) {
+    assert(InstOrdinalMap.find(&I) != InstOrdinalMap.end() &&
+           "No ordinal computed for this instruction.");
+    return InstOrdinalMap[&I];
+  }
+
+  /// Given a node \p N return its associated ordinal number.
+  size_t getOrdinal(NodeType &N) {
+    assert(NodeOrdinalMap.find(&N) != NodeOrdinalMap.end() &&
+           "No ordinal computed for this node.");
+    return NodeOrdinalMap[&N];
+  }
+
+  /// Map types to map instructions to nodes used when populating the graph.
+  using InstToNodeMap = DenseMap<Instruction *, NodeType *>;
+
+  /// Map Types to map instruction/nodes to an ordinal number.
+  using InstToOrdinalMap = DenseMap<Instruction *, size_t>;
+  using NodeToOrdinalMap = DenseMap<NodeType *, size_t>;
+
+  /// Reference to the graph that gets built by a concrete implementation of
+  /// this builder.
+  GraphType &Graph;
+
+  /// Dependence information used to create memory dependence edges in the
+  /// graph.
+  DependenceInfo &DI;
+
+  /// The list of basic blocks to consider when building the graph.
+  const BasicBlockListType &BBList;
+
+  /// A mapping from instructions to the corresponding nodes in the graph.
+  InstToNodeMap IMap;
+
+  /// A mapping from each instruction to an ordinal number. This map is used to
+  /// populate the \p NodeOrdinalMap.
+  InstToOrdinalMap InstOrdinalMap;
+
+  /// A mapping from nodes to an ordinal number. This map is used to sort nodes
+  /// in a pi-block based on program order.
+  NodeToOrdinalMap NodeOrdinalMap;
+};
+
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H
diff --git a/hpvm/llvm_patches/include/Bitcode/LLVMBitCodes.h b/hpvm/llvm_patches/include/llvm/Bitcode/LLVMBitCodes.h
similarity index 99%
rename from hpvm/llvm_patches/include/Bitcode/LLVMBitCodes.h
rename to hpvm/llvm_patches/include/llvm/Bitcode/LLVMBitCodes.h
index 5e59ba96f2331663289a040326ebd4e453bd1e86..90ba564bd985079a8fa6589ac01b16cd8824e8c8 100644
--- a/hpvm/llvm_patches/include/Bitcode/LLVMBitCodes.h
+++ b/hpvm/llvm_patches/include/llvm/Bitcode/LLVMBitCodes.h
@@ -633,6 +633,11 @@ enum AttributeKindCodes {
   ATTR_KIND_IN = 65,
   ATTR_KIND_OUT = 66,
   ATTR_KIND_INOUT = 67,
+  ATTR_KIND_PRIV = 68,
+  ATTR_KIND_BUFFERIN = 69,
+  ATTR_KIND_BUFFEROUT = 70,
+  ATTR_KIND_CHANNEL = 71,
+
 };
 
 enum ComdatSelectionKindCodes {
diff --git a/hpvm/llvm_patches/include/llvm/IR/Attributes.h b/hpvm/llvm_patches/include/llvm/IR/Attributes.h
new file mode 100644
index 0000000000000000000000000000000000000000..63767f66922f9f67d97caff8e0ab0f49c02ad420
--- /dev/null
+++ b/hpvm/llvm_patches/include/llvm/IR/Attributes.h
@@ -0,0 +1,895 @@
+//===- llvm/Attributes.h - Container for Attributes -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file contains the simple types necessary to represent the
+/// attributes associated with functions and their calls.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_IR_ATTRIBUTES_H
+#define LLVM_IR_ATTRIBUTES_H
+
+#include "llvm-c/Types.h"
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Config/llvm-config.h"
+#include "llvm/Support/PointerLikeTypeTraits.h"
+#include <bitset>
+#include <cassert>
+#include <cstdint>
+#include <map>
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+class AttrBuilder;
+class AttributeImpl;
+class AttributeListImpl;
+class AttributeSetNode;
+template<typename T> struct DenseMapInfo;
+class Function;
+class LLVMContext;
+class Type;
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// Functions, function parameters, and return types can have attributes
+/// to indicate how they should be treated by optimizations and code
+/// generation. This class represents one of those attributes. It's light-weight
+/// and should be passed around by-value.
+class Attribute {
+public:
+  /// This enumeration lists the attributes that can be associated with
+  /// parameters, function results, or the function itself.
+  ///
+  /// Note: The `uwtable' attribute is about the ABI or the user mandating an
+  /// entry in the unwind table. The `nounwind' attribute is about an exception
+  /// passing by the function.
+  ///
+  /// In a theoretical system that uses tables for profiling and SjLj for
+  /// exceptions, they would be fully independent. In a normal system that uses
+  /// tables for both, the semantics are:
+  ///
+  /// nil                = Needs an entry because an exception might pass by.
+  /// nounwind           = No need for an entry
+  /// uwtable            = Needs an entry because the ABI says so and because
+  ///                      an exception might pass by.
+  /// uwtable + nounwind = Needs an entry because the ABI says so.
+
+  enum AttrKind {
+    // IR-Level Attributes
+    None,                  ///< No attributes have been set
+    #define GET_ATTR_ENUM
+    #include "llvm/IR/Attributes.inc"
+    EndAttrKinds           ///< Sentinal value useful for loops
+  };
+
+private:
+  AttributeImpl *pImpl = nullptr;
+
+  Attribute(AttributeImpl *A) : pImpl(A) {}
+
+public:
+  Attribute() = default;
+
+  //===--------------------------------------------------------------------===//
+  // Attribute Construction
+  //===--------------------------------------------------------------------===//
+
+  /// Return a uniquified Attribute object.
+  static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0);
+  static Attribute get(LLVMContext &Context, StringRef Kind,
+                       StringRef Val = StringRef());
+  static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty);
+
+  /// Return a uniquified Attribute object that has the specific
+  /// alignment set.
+  static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align);
+  static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align);
+  static Attribute getWithDereferenceableBytes(LLVMContext &Context,
+                                              uint64_t Bytes);
+  static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context,
+                                                     uint64_t Bytes);
+  static Attribute getWithAllocSizeArgs(LLVMContext &Context,
+                                        unsigned ElemSizeArg,
+                                        const Optional<unsigned> &NumElemsArg);
+  static Attribute getWithByValType(LLVMContext &Context, Type *Ty);
+
+  //===--------------------------------------------------------------------===//
+  // Attribute Accessors
+  //===--------------------------------------------------------------------===//
+
+  /// Return true if the attribute is an Attribute::AttrKind type.
+  bool isEnumAttribute() const;
+
+  /// Return true if the attribute is an integer attribute.
+  bool isIntAttribute() const;
+
+  /// Return true if the attribute is a string (target-dependent)
+  /// attribute.
+  bool isStringAttribute() const;
+
+  /// Return true if the attribute is a type attribute.
+  bool isTypeAttribute() const;
+
+  /// Return true if the attribute is present.
+  bool hasAttribute(AttrKind Val) const;
+
+  /// Return true if the target-dependent attribute is present.
+  bool hasAttribute(StringRef Val) const;
+
+  /// Return the attribute's kind as an enum (Attribute::AttrKind). This
+  /// requires the attribute to be an enum or integer attribute.
+  Attribute::AttrKind getKindAsEnum() const;
+
+  /// Return the attribute's value as an integer. This requires that the
+  /// attribute be an integer attribute.
+  uint64_t getValueAsInt() const;
+
+  /// Return the attribute's kind as a string. This requires the
+  /// attribute to be a string attribute.
+  StringRef getKindAsString() const;
+
+  /// Return the attribute's value as a string. This requires the
+  /// attribute to be a string attribute.
+  StringRef getValueAsString() const;
+
+  /// Return the attribute's value as a Type. This requires the attribute to be
+  /// a type attribute.
+  Type *getValueAsType() const;
+
+  /// Returns the alignment field of an attribute as a byte alignment
+  /// value.
+  unsigned getAlignment() const;
+
+  /// \brief Returns the BuffSize field of an attribute.
+  unsigned getBuffSize() const;
+  
+  /// \brief Returns the ChannelDepth field of an attribute.
+  unsigned getChannelDepth() const;
+  
+  /// Returns the stack alignment field of an attribute as a byte
+  /// alignment value.
+  unsigned getStackAlignment() const;
+
+  /// Returns the number of dereferenceable bytes from the
+  /// dereferenceable attribute.
+  uint64_t getDereferenceableBytes() const;
+
+  /// Returns the number of dereferenceable_or_null bytes from the
+  /// dereferenceable_or_null attribute.
+  uint64_t getDereferenceableOrNullBytes() const;
+
+  /// Returns the argument numbers for the allocsize attribute (or pair(0, 0)
+  /// if not known).
+  std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
+
+  /// The Attribute is converted to a string of equivalent mnemonic. This
+  /// is, presumably, for writing out the mnemonics for the assembly writer.
+  std::string getAsString(bool InAttrGrp = false) const;
+
+  /// Equality and non-equality operators.
+  bool operator==(Attribute A) const { return pImpl == A.pImpl; }
+  bool operator!=(Attribute A) const { return pImpl != A.pImpl; }
+
+  /// Less-than operator. Useful for sorting the attributes list.
+  bool operator<(Attribute A) const;
+
+  void Profile(FoldingSetNodeID &ID) const {
+    ID.AddPointer(pImpl);
+  }
+
+  /// Return a raw pointer that uniquely identifies this attribute.
+  void *getRawPointer() const {
+    return pImpl;
+  }
+
+  /// Get an attribute from a raw pointer created by getRawPointer.
+  static Attribute fromRawPointer(void *RawPtr) {
+    return Attribute(reinterpret_cast<AttributeImpl*>(RawPtr));
+  }
+};
+
+// Specialized opaque value conversions.
+inline LLVMAttributeRef wrap(Attribute Attr) {
+  return reinterpret_cast<LLVMAttributeRef>(Attr.getRawPointer());
+}
+
+// Specialized opaque value conversions.
+inline Attribute unwrap(LLVMAttributeRef Attr) {
+  return Attribute::fromRawPointer(Attr);
+}
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// This class holds the attributes for a particular argument, parameter,
+/// function, or return value. It is an immutable value type that is cheap to
+/// copy. Adding and removing enum attributes is intended to be fast, but adding
+/// and removing string or integer attributes involves a FoldingSet lookup.
+class AttributeSet {
+  friend AttributeListImpl;
+  template <typename Ty> friend struct DenseMapInfo;
+
+  // TODO: Extract AvailableAttrs from AttributeSetNode and store them here.
+  // This will allow an efficient implementation of addAttribute and
+  // removeAttribute for enum attrs.
+
+  /// Private implementation pointer.
+  AttributeSetNode *SetNode = nullptr;
+
+private:
+  explicit AttributeSet(AttributeSetNode *ASN) : SetNode(ASN) {}
+
+public:
+  /// AttributeSet is a trivially copyable value type.
+  AttributeSet() = default;
+  AttributeSet(const AttributeSet &) = default;
+  ~AttributeSet() = default;
+
+  static AttributeSet get(LLVMContext &C, const AttrBuilder &B);
+  static AttributeSet get(LLVMContext &C, ArrayRef<Attribute> Attrs);
+
+  bool operator==(const AttributeSet &O) const { return SetNode == O.SetNode; }
+  bool operator!=(const AttributeSet &O) const { return !(*this == O); }
+
+  /// Add an argument attribute. Returns a new set because attribute sets are
+  /// immutable.
+  LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C,
+                                           Attribute::AttrKind Kind) const;
+
+  /// Add a target-dependent attribute. Returns a new set because attribute sets
+  /// are immutable.
+  LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C, StringRef Kind,
+                                           StringRef Value = StringRef()) const;
+
+  /// Add attributes to the attribute set. Returns a new set because attribute
+  /// sets are immutable.
+  LLVM_NODISCARD AttributeSet addAttributes(LLVMContext &C,
+                                            AttributeSet AS) const;
+
+  /// Remove the specified attribute from this set. Returns a new set because
+  /// attribute sets are immutable.
+  LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C,
+                                              Attribute::AttrKind Kind) const;
+
+  /// Remove the specified attribute from this set. Returns a new set because
+  /// attribute sets are immutable.
+  LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C,
+                                              StringRef Kind) const;
+
+  /// Remove the specified attributes from this set. Returns a new set because
+  /// attribute sets are immutable.
+  LLVM_NODISCARD AttributeSet
+  removeAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const;
+
+  /// Return the number of attributes in this set.
+  unsigned getNumAttributes() const;
+
+  /// Return true if attributes exists in this set.
+  bool hasAttributes() const { return SetNode != nullptr; }
+
+  /// Return true if the attribute exists in this set.
+  bool hasAttribute(Attribute::AttrKind Kind) const;
+
+  /// Return true if the attribute exists in this set.
+  bool hasAttribute(StringRef Kind) const;
+
+  /// Return the attribute object.
+  Attribute getAttribute(Attribute::AttrKind Kind) const;
+
+  /// Return the target-dependent attribute object.
+  Attribute getAttribute(StringRef Kind) const;
+
+  unsigned getAlignment() const;
+  unsigned getStackAlignment() const;
+  uint64_t getDereferenceableBytes() const;
+  uint64_t getDereferenceableOrNullBytes() const;
+  Type *getByValType() const;
+  std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
+  std::string getAsString(bool InAttrGrp = false) const;
+
+  using iterator = const Attribute *;
+
+  iterator begin() const;
+  iterator end() const;
+#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
+  void dump() const;
+#endif
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// Provide DenseMapInfo for AttributeSet.
+template <> struct DenseMapInfo<AttributeSet> {
+  static AttributeSet getEmptyKey() {
+    auto Val = static_cast<uintptr_t>(-1);
+    Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
+    return AttributeSet(reinterpret_cast<AttributeSetNode *>(Val));
+  }
+
+  static AttributeSet getTombstoneKey() {
+    auto Val = static_cast<uintptr_t>(-2);
+    Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable;
+    return AttributeSet(reinterpret_cast<AttributeSetNode *>(Val));
+  }
+
+  static unsigned getHashValue(AttributeSet AS) {
+    return (unsigned((uintptr_t)AS.SetNode) >> 4) ^
+           (unsigned((uintptr_t)AS.SetNode) >> 9);
+  }
+
+  static bool isEqual(AttributeSet LHS, AttributeSet RHS) { return LHS == RHS; }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// This class holds the attributes for a function, its return value, and
+/// its parameters. You access the attributes for each of them via an index into
+/// the AttributeList object. The function attributes are at index
+/// `AttributeList::FunctionIndex', the return value is at index
+/// `AttributeList::ReturnIndex', and the attributes for the parameters start at
+/// index `AttributeList::FirstArgIndex'.
+class AttributeList {
+public:
+  enum AttrIndex : unsigned {
+    ReturnIndex = 0U,
+    FunctionIndex = ~0U,
+    FirstArgIndex = 1,
+  };
+
+private:
+  friend class AttrBuilder;
+  friend class AttributeListImpl;
+  friend class AttributeSet;
+  friend class AttributeSetNode;
+  template <typename Ty> friend struct DenseMapInfo;
+
+  /// The attributes that we are managing. This can be null to represent
+  /// the empty attributes list.
+  AttributeListImpl *pImpl = nullptr;
+
+public:
+  /// Create an AttributeList with the specified parameters in it.
+  static AttributeList get(LLVMContext &C,
+                           ArrayRef<std::pair<unsigned, Attribute>> Attrs);
+  static AttributeList get(LLVMContext &C,
+                           ArrayRef<std::pair<unsigned, AttributeSet>> Attrs);
+
+  /// Create an AttributeList from attribute sets for a function, its
+  /// return value, and all of its arguments.
+  static AttributeList get(LLVMContext &C, AttributeSet FnAttrs,
+                           AttributeSet RetAttrs,
+                           ArrayRef<AttributeSet> ArgAttrs);
+
+private:
+  explicit AttributeList(AttributeListImpl *LI) : pImpl(LI) {}
+
+  static AttributeList getImpl(LLVMContext &C, ArrayRef<AttributeSet> AttrSets);
+
+public:
+  AttributeList() = default;
+
+  //===--------------------------------------------------------------------===//
+  // AttributeList Construction and Mutation
+  //===--------------------------------------------------------------------===//
+
+  /// Return an AttributeList with the specified parameters in it.
+  static AttributeList get(LLVMContext &C, ArrayRef<AttributeList> Attrs);
+  static AttributeList get(LLVMContext &C, unsigned Index,
+                           ArrayRef<Attribute::AttrKind> Kinds);
+  static AttributeList get(LLVMContext &C, unsigned Index,
+                           ArrayRef<StringRef> Kind);
+  static AttributeList get(LLVMContext &C, unsigned Index,
+                           const AttrBuilder &B);
+
+  /// Add an attribute to the attribute set at the given index.
+  /// Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
+                                            Attribute::AttrKind Kind) const;
+
+  /// Add an attribute to the attribute set at the given index.
+  /// Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList
+  addAttribute(LLVMContext &C, unsigned Index, StringRef Kind,
+               StringRef Value = StringRef()) const;
+
+  /// Add an attribute to the attribute set at the given index.
+  /// Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index,
+                                            Attribute A) const;
+
+  /// Add attributes to the attribute set at the given index.
+  /// Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addAttributes(LLVMContext &C, unsigned Index,
+                                             const AttrBuilder &B) const;
+
+  /// Add an argument attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addParamAttribute(
+      LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
+    return addAttribute(C, ArgNo + FirstArgIndex, Kind);
+  }
+
+  /// Add an argument attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList
+  addParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind,
+                    StringRef Value = StringRef()) const {
+    return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value);
+  }
+
+  /// Add an attribute to the attribute list at the given arg indices. Returns a
+  /// new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addParamAttribute(LLVMContext &C,
+                                                 ArrayRef<unsigned> ArgNos,
+                                                 Attribute A) const;
+
+  /// Add an argument attribute to the list. Returns a new list because
+  /// attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addParamAttributes(LLVMContext &C,
+                                                  unsigned ArgNo,
+                                                  const AttrBuilder &B) const {
+    return addAttributes(C, ArgNo + FirstArgIndex, B);
+  }
+
+  /// Remove the specified attribute at the specified index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
+                                               Attribute::AttrKind Kind) const;
+
+  /// Remove the specified attribute at the specified index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index,
+                                               StringRef Kind) const;
+
+  /// Remove the specified attributes at the specified index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeAttributes(
+      LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const;
+
+  /// Remove all attributes at the specified index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeAttributes(LLVMContext &C,
+                                                unsigned Index) const;
+
+  /// Remove the specified attribute at the specified arg index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeParamAttribute(
+      LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const {
+    return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
+  }
+
+  /// Remove the specified attribute at the specified arg index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeParamAttribute(LLVMContext &C,
+                                                    unsigned ArgNo,
+                                                    StringRef Kind) const {
+    return removeAttribute(C, ArgNo + FirstArgIndex, Kind);
+  }
+
+  /// Remove the specified attribute at the specified arg index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeParamAttributes(
+      LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const {
+    return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove);
+  }
+
+  /// Remove all attributes at the specified arg index from this
+  /// attribute list. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList removeParamAttributes(LLVMContext &C,
+                                                     unsigned ArgNo) const {
+    return removeAttributes(C, ArgNo + FirstArgIndex);
+  }
+
+  /// \brief Add the dereferenceable attribute to the attribute set at the given
+  /// index. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C,
+                                                      unsigned Index,
+                                                      uint64_t Bytes) const;
+
+  /// \brief Add the dereferenceable attribute to the attribute set at the given
+  /// arg index. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addDereferenceableParamAttr(
+      LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
+    return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes);
+  }
+
+  /// Add the dereferenceable_or_null attribute to the attribute set at
+  /// the given index. Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList addDereferenceableOrNullAttr(
+      LLVMContext &C, unsigned Index, uint64_t Bytes) const;
+
+  /// Add the dereferenceable_or_null attribute to the attribute set at
+  /// the given arg index. Returns a new list because attribute lists are
+  /// immutable.
+  LLVM_NODISCARD AttributeList addDereferenceableOrNullParamAttr(
+      LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const {
+    return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes);
+  }
+
+  /// Add the allocsize attribute to the attribute set at the given index.
+  /// Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList
+  addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg,
+                   const Optional<unsigned> &NumElemsArg);
+
+  /// Add the allocsize attribute to the attribute set at the given arg index.
+  /// Returns a new list because attribute lists are immutable.
+  LLVM_NODISCARD AttributeList
+  addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg,
+                        const Optional<unsigned> &NumElemsArg) {
+    return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg);
+  }
+
+  //===--------------------------------------------------------------------===//
+  // AttributeList Accessors
+  //===--------------------------------------------------------------------===//
+
+  /// Retrieve the LLVM context.
+  LLVMContext &getContext() const;
+
+  /// The attributes for the specified index are returned.
+  AttributeSet getAttributes(unsigned Index) const;
+
+  /// The attributes for the argument or parameter at the given index are
+  /// returned.
+  AttributeSet getParamAttributes(unsigned ArgNo) const;
+
+  /// The attributes for the ret value are returned.
+  AttributeSet getRetAttributes() const;
+
+  /// The function attributes are returned.
+  AttributeSet getFnAttributes() const;
+
+  /// Return true if the attribute exists at the given index.
+  bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+
+  /// Return true if the attribute exists at the given index.
+  bool hasAttribute(unsigned Index, StringRef Kind) const;
+
+  /// Return true if attribute exists at the given index.
+  bool hasAttributes(unsigned Index) const;
+
+  /// Return true if the attribute exists for the given argument
+  bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
+    return hasAttribute(ArgNo + FirstArgIndex, Kind);
+  }
+
+  /// Return true if the attribute exists for the given argument
+  bool hasParamAttr(unsigned ArgNo, StringRef Kind) const {
+    return hasAttribute(ArgNo + FirstArgIndex, Kind);
+  }
+
+  /// Return true if attributes exists for the given argument
+  bool hasParamAttrs(unsigned ArgNo) const {
+    return hasAttributes(ArgNo + FirstArgIndex);
+  }
+
+  /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
+  /// may be faster.
+  bool hasFnAttribute(Attribute::AttrKind Kind) const;
+
+  /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but
+  /// may be faster.
+  bool hasFnAttribute(StringRef Kind) const;
+
+  /// Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind).
+  bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const;
+
+  /// Return true if the specified attribute is set for at least one
+  /// parameter or for the return value. If Index is not nullptr, the index
+  /// of a parameter with the specified attribute is provided.
+  bool hasAttrSomewhere(Attribute::AttrKind Kind,
+                        unsigned *Index = nullptr) const;
+
+  /// Return the attribute object that exists at the given index.
+  Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const;
+
+  /// Return the attribute object that exists at the given index.
+  Attribute getAttribute(unsigned Index, StringRef Kind) const;
+
+  /// Return the attribute object that exists at the arg index.
+  Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const {
+    return getAttribute(ArgNo + FirstArgIndex, Kind);
+  }
+
+  /// Return the attribute object that exists at the given index.
+  Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const {
+    return getAttribute(ArgNo + FirstArgIndex, Kind);
+  }
+
+  /// Return the alignment of the return value.
+  unsigned getRetAlignment() const;
+
+  /// Return the alignment for the specified function parameter.
+  unsigned getParamAlignment(unsigned ArgNo) const;
+
+  /// Return the byval type for the specified function parameter.
+  Type *getParamByValType(unsigned ArgNo) const;
+
+  /// Get the stack alignment.
+  unsigned getStackAlignment(unsigned Index) const;
+
+  /// Get the number of dereferenceable bytes (or zero if unknown).
+  uint64_t getDereferenceableBytes(unsigned Index) const;
+
+  /// Get the number of dereferenceable bytes (or zero if unknown) of an
+  /// arg.
+  uint64_t getParamDereferenceableBytes(unsigned ArgNo) const {
+    return getDereferenceableBytes(ArgNo + FirstArgIndex);
+  }
+
+  /// Get the number of dereferenceable_or_null bytes (or zero if
+  /// unknown).
+  uint64_t getDereferenceableOrNullBytes(unsigned Index) const;
+
+  /// Get the number of dereferenceable_or_null bytes (or zero if
+  /// unknown) of an arg.
+  uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const {
+    return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex);
+  }
+
+  /// Get the allocsize argument numbers (or pair(0, 0) if unknown).
+  std::pair<unsigned, Optional<unsigned>>
+  getAllocSizeArgs(unsigned Index) const;
+
+  /// Return the attributes at the index as a string.
+  std::string getAsString(unsigned Index, bool InAttrGrp = false) const;
+
+  //===--------------------------------------------------------------------===//
+  // AttributeList Introspection
+  //===--------------------------------------------------------------------===//
+
+  using iterator = const AttributeSet *;
+
+  iterator begin() const;
+  iterator end() const;
+
+  unsigned getNumAttrSets() const;
+
+  /// Use these to iterate over the valid attribute indices.
+  unsigned index_begin() const { return AttributeList::FunctionIndex; }
+  unsigned index_end() const { return getNumAttrSets() - 1; }
+
+  /// operator==/!= - Provide equality predicates.
+  bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; }
+  bool operator!=(const AttributeList &RHS) const { return pImpl != RHS.pImpl; }
+
+  /// Return a raw pointer that uniquely identifies this attribute list.
+  void *getRawPointer() const {
+    return pImpl;
+  }
+
+  /// Return true if there are no attributes.
+  bool isEmpty() const { return pImpl == nullptr; }
+
+  void dump() const;
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// Provide DenseMapInfo for AttributeList.
+template <> struct DenseMapInfo<AttributeList> {
+  static AttributeList getEmptyKey() {
+    auto Val = static_cast<uintptr_t>(-1);
+    Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
+    return AttributeList(reinterpret_cast<AttributeListImpl *>(Val));
+  }
+
+  static AttributeList getTombstoneKey() {
+    auto Val = static_cast<uintptr_t>(-2);
+    Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable;
+    return AttributeList(reinterpret_cast<AttributeListImpl *>(Val));
+  }
+
+  static unsigned getHashValue(AttributeList AS) {
+    return (unsigned((uintptr_t)AS.pImpl) >> 4) ^
+           (unsigned((uintptr_t)AS.pImpl) >> 9);
+  }
+
+  static bool isEqual(AttributeList LHS, AttributeList RHS) {
+    return LHS == RHS;
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// This class is used in conjunction with the Attribute::get method to
+/// create an Attribute object. The object itself is uniquified. The Builder's
+/// value, however, is not. So this can be used as a quick way to test for
+/// equality, presence of attributes, etc.
+class AttrBuilder {
+  std::bitset<Attribute::EndAttrKinds> Attrs;
+  std::map<std::string, std::string> TargetDepAttrs;
+  uint64_t Alignment = 0;
+  uint64_t StackAlignment = 0;
+  uint64_t DerefBytes = 0;
+  uint64_t DerefOrNullBytes = 0;
+  uint64_t AllocSizeArgs = 0;
+  // Adding BuffSize to store size of buffer that will be used in FPGA transformations
+  uint64_t BuffSize = 0;
+  // Adding ChannelDepth to store depth of channel for FPGA transforms
+  uint64_t ChannelDepth = 0;
+
+  Type *ByValType = nullptr;
+
+public:
+  AttrBuilder() = default;
+
+  AttrBuilder(const Attribute &A) {
+    addAttribute(A);
+  }
+
+  AttrBuilder(AttributeList AS, unsigned Idx);
+  AttrBuilder(AttributeSet AS);
+
+  void clear();
+
+  /// Add an attribute to the builder.
+  AttrBuilder &addAttribute(Attribute::AttrKind Val);
+
+  /// Add the Attribute object to the builder.
+  AttrBuilder &addAttribute(Attribute A);
+
+  /// Add the target-dependent attribute to the builder.
+  AttrBuilder &addAttribute(StringRef A, StringRef V = StringRef());
+
+  /// Remove an attribute from the builder.
+  AttrBuilder &removeAttribute(Attribute::AttrKind Val);
+
+  /// Remove the attributes from the builder.
+  AttrBuilder &removeAttributes(AttributeList A, uint64_t WithoutIndex);
+
+  /// Remove the target-dependent attribute to the builder.
+  AttrBuilder &removeAttribute(StringRef A);
+
+  /// Add the attributes from the builder.
+  AttrBuilder &merge(const AttrBuilder &B);
+
+  /// Remove the attributes from the builder.
+  AttrBuilder &remove(const AttrBuilder &B);
+
+  /// Return true if the builder has any attribute that's in the
+  /// specified builder.
+  bool overlaps(const AttrBuilder &B) const;
+
+  /// Return true if the builder has the specified attribute.
+  bool contains(Attribute::AttrKind A) const {
+    assert((unsigned)A < Attribute::EndAttrKinds && "Attribute out of range!");
+    return Attrs[A];
+  }
+
+  /// Return true if the builder has the specified target-dependent
+  /// attribute.
+  bool contains(StringRef A) const;
+
+  /// Return true if the builder has IR-level attributes.
+  bool hasAttributes() const;
+
+  /// Return true if the builder has any attribute that's in the
+  /// specified attribute.
+  bool hasAttributes(AttributeList A, uint64_t Index) const;
+
+  /// Return true if the builder has an alignment attribute.
+  bool hasAlignmentAttr() const;
+
+  /// Retrieve the alignment attribute, if it exists.
+  uint64_t getAlignment() const { return Alignment; }
+
+  /// Retrieve the stack alignment attribute, if it exists.
+  uint64_t getStackAlignment() const { return StackAlignment; }
+
+  /// Retrieve the number of dereferenceable bytes, if the
+  /// dereferenceable attribute exists (zero is returned otherwise).
+  uint64_t getDereferenceableBytes() const { return DerefBytes; }
+
+  /// Retrieve the number of dereferenceable_or_null bytes, if the
+  /// dereferenceable_or_null attribute exists (zero is returned otherwise).
+  uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; }
+
+  /// Retrieve the maximum buffer size BuffSize, if the buffer or private
+  /// attribute exists (zero is returned otherwise).
+  uint64_t getBuffSize() const { return BuffSize; }
+
+  /// Retrieve the channel depth ChannelDepth, if the channel 
+  /// attribute exists (zero is returned otherwise).
+  uint64_t getChannelDepth() const { return ChannelDepth; }
+  
+  /// Retrieve the byval type.
+  Type *getByValType() const { return ByValType; }
+
+  /// Retrieve the allocsize args, if the allocsize attribute exists.  If it
+  /// doesn't exist, pair(0, 0) is returned.
+  std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
+
+  /// This turns an int buffer size into the form used internally in Attribute
+  AttrBuilder &addBufferOrPrivAttr(Attribute::AttrKind kind, unsigned size);
+
+  /// This turns an int channel depth into the form used internally in Attribute
+  AttrBuilder &addChannelAttr(Attribute::AttrKind kind, unsigned depth);
+  
+  /// This turns an int alignment (which must be a power of 2) into the
+  /// form used internally in Attribute.
+  AttrBuilder &addAlignmentAttr(unsigned Align);
+
+  /// This turns an int stack alignment (which must be a power of 2) into
+  /// the form used internally in Attribute.
+  AttrBuilder &addStackAlignmentAttr(unsigned Align);
+
+  /// This turns the number of dereferenceable bytes into the form used
+  /// internally in Attribute.
+  AttrBuilder &addDereferenceableAttr(uint64_t Bytes);
+
+  /// This turns the number of dereferenceable_or_null bytes into the
+  /// form used internally in Attribute.
+  AttrBuilder &addDereferenceableOrNullAttr(uint64_t Bytes);
+
+  /// This turns one (or two) ints into the form used internally in Attribute.
+  AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg,
+                                const Optional<unsigned> &NumElemsArg);
+
+  /// This turns a byval type into the form used internally in Attribute.
+  AttrBuilder &addByValAttr(Type *Ty);
+
+  /// Add an allocsize attribute, using the representation returned by
+  /// Attribute.getIntValue().
+  AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr);
+
+  /// Return true if the builder contains no target-independent
+  /// attributes.
+  bool empty() const { return Attrs.none(); }
+
+  // Iterators for target-dependent attributes.
+  using td_type = std::pair<std::string, std::string>;
+  using td_iterator = std::map<std::string, std::string>::iterator;
+  using td_const_iterator = std::map<std::string, std::string>::const_iterator;
+  using td_range = iterator_range<td_iterator>;
+  using td_const_range = iterator_range<td_const_iterator>;
+
+  td_iterator td_begin() { return TargetDepAttrs.begin(); }
+  td_iterator td_end() { return TargetDepAttrs.end(); }
+
+  td_const_iterator td_begin() const { return TargetDepAttrs.begin(); }
+  td_const_iterator td_end() const { return TargetDepAttrs.end(); }
+
+  td_range td_attrs() { return td_range(td_begin(), td_end()); }
+
+  td_const_range td_attrs() const {
+    return td_const_range(td_begin(), td_end());
+  }
+
+  bool td_empty() const { return TargetDepAttrs.empty(); }
+
+  bool operator==(const AttrBuilder &B);
+  bool operator!=(const AttrBuilder &B) {
+    return !(*this == B);
+  }
+};
+
+namespace AttributeFuncs {
+
+/// Which attributes cannot be applied to a type.
+AttrBuilder typeIncompatible(Type *Ty);
+
+/// \returns Return true if the two functions have compatible target-independent
+/// attributes for inlining purposes.
+bool areInlineCompatible(const Function &Caller, const Function &Callee);
+
+/// Merge caller's and callee's attributes.
+void mergeAttributesForInlining(Function &Caller, const Function &Callee);
+
+} // end namespace AttributeFuncs
+
+} // end namespace llvm
+
+#endif // LLVM_IR_ATTRIBUTES_H
diff --git a/hpvm/llvm_patches/include/IR/Attributes.td b/hpvm/llvm_patches/include/llvm/IR/Attributes.td
similarity index 97%
rename from hpvm/llvm_patches/include/IR/Attributes.td
rename to hpvm/llvm_patches/include/llvm/IR/Attributes.td
index c6ff8ef3c6c962f5444d718ff5a7e16ce392a522..8b7bf371cd904f410937c28d93be740fb46c01f9 100644
--- a/hpvm/llvm_patches/include/IR/Attributes.td
+++ b/hpvm/llvm_patches/include/llvm/IR/Attributes.td
@@ -161,6 +161,15 @@ def Out : EnumAttr<"out">;
 /// Pointer to read/write memory
 def InOut : EnumAttr<"inout">;
 
+// Pointer to private memory
+def Priv : EnumAttr<"priv">;
+
+// Pointer to bufferable memory
+def BufferIn : EnumAttr<"bufferin">;
+def BufferOut : EnumAttr<"bufferout">;
+
+// Pointer to potential channel
+def Channel : EnumAttr<"channel">;
 
 /// Alignment of stack for function (3 bits)  stored as log2 of alignment with
 /// +1 bias 0 means unaligned (different from alignstack=(1)).
diff --git a/hpvm/llvm_patches/include/IR/Intrinsics.td b/hpvm/llvm_patches/include/llvm/IR/Intrinsics.td
similarity index 100%
rename from hpvm/llvm_patches/include/IR/Intrinsics.td
rename to hpvm/llvm_patches/include/llvm/IR/Intrinsics.td
diff --git a/hpvm/llvm_patches/include/IR/IntrinsicsHPVM.td b/hpvm/llvm_patches/include/llvm/IR/IntrinsicsHPVM.td
similarity index 90%
rename from hpvm/llvm_patches/include/IR/IntrinsicsHPVM.td
rename to hpvm/llvm_patches/include/llvm/IR/IntrinsicsHPVM.td
index 0287f083849d9385faa00cc2943834b5b58439b0..e792c98e8ccf1c10b79b75318d258c0788bdfb8a 100644
--- a/hpvm/llvm_patches/include/IR/IntrinsicsHPVM.td
+++ b/hpvm/llvm_patches/include/llvm/IR/IntrinsicsHPVM.td
@@ -64,27 +64,27 @@ let TargetPrefix = "hpvm" in {
   def int_hpvm_requestMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>;
 
   /* Create Node intrinsic -
-   * i8* llvm.hpvm.createNode(function*);
+   * i8* llvm.hpvm.createNode(function*, i64);
    */
-  def int_hpvm_createNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>;
+  def int_hpvm_createNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty], []>;
 
   /* Create Node 1D array intrinsic -
-   * i8* llvm.hpvm.createNode1D(function*, i64);
+   * i8* llvm.hpvm.createNode1D(function*, i64, i64);
    */
   def int_hpvm_createNode1D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
-                                        llvm_i64_ty], []>;
+                                        llvm_i64_ty, llvm_i32_ty], []>;
 
   /* Create Node 2D array intrinsic -
-   * i8* llvm.hpvm.createNode2D(function*, i64, i64);
+   * i8* llvm.hpvm.createNode2D(function*, i64, i64, i64);
    */
   def int_hpvm_createNode2D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
-                                        llvm_i64_ty, llvm_i64_ty], []>;
+                                        llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], []>;
 
   /* Create Node 3D array intrinsic -
-   * i8* llvm.hpvm.createNode2D(function*, i64, i64, i64);
+   * i8* llvm.hpvm.createNode3D(function*, i64, i64, i64, i64);
    */
   def int_hpvm_createNode3D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty,
-                                        llvm_i64_ty, llvm_i64_ty, llvm_i64_ty],
+                                        llvm_i64_ty, llvm_i64_ty, llvm_i64_ty, llvm_i32_ty],
                                         []>;
 
   /* Create dataflow edge intrinsic -
@@ -204,9 +204,28 @@ let TargetPrefix = "hpvm" in {
   /* i32 llvm.hpvm.atomic.xor(i32*, i32)*/
   def int_hpvm_atomic_xor: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty],
                                     []>;
+  /* ============ HPVM-EPOCH Intrinsics ============= */
+  // Intrinsics for hpvm-EPOCH
+
+  /*  void llvm.hpvm.task(i32) */
+  def int_hpvm_task: Intrinsic<[], [llvm_i32_ty], []>;
+
+  /* ============ HPVM-FPGA Intrinsics ============= */
+  // Intrinsics for HPVM-FPGA
+
+  /*  void llvm.hpvm.nz.loop(i64, i32) */
+  def int_hpvm_nz_loop: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty], []>;
+
+  /* int/float llvm.hpvm.ch.read(i32) */
+  def int_hpvm_ch_read: Intrinsic<[llvm_any_ty], [llvm_i32_ty], []>;
+
+  /* void llvm.hpvm.ch.write.x(int/float, i32) */
+  def int_hpvm_ch_write: Intrinsic<[], [llvm_any_ty, llvm_i32_ty], []>;
 
+  /* void llvm.hpvm.sequentialized.loop(i32 depth, i32 dimension, i64 indvar, i64 bound) */
+  def int_hpvm_sequentialized_loop: Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>;
 
- /***************************************************************************/
+     /***************************************************************************/
   /*                            ApproxHPVM intrinsics                        */
   /***************************************************************************/
 
diff --git a/hpvm/llvm_patches/include/Support/Debug.h b/hpvm/llvm_patches/include/llvm/Support/Debug.h
similarity index 100%
rename from hpvm/llvm_patches/include/Support/Debug.h
rename to hpvm/llvm_patches/include/llvm/Support/Debug.h
diff --git a/hpvm/llvm_patches/include/llvm/Transforms/Scalar/ADCE.h b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/ADCE.h
new file mode 100644
index 0000000000000000000000000000000000000000..535049bd8b87250de76f36754b525b5cd220a8a3
--- /dev/null
+++ b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/ADCE.h
@@ -0,0 +1,167 @@
+//===- ADCE.h - Aggressive dead code elimination ----------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file provides the interface for the Aggressive Dead Code Elimination
+// pass. This pass optimistically assumes that all instructions are dead until
+// proven otherwise, allowing it to eliminate dead computations that other DCE
+// passes do not catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_ADCE_H
+#define LLVM_TRANSFORMS_SCALAR_ADCE_H
+
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+/// A DCE pass that assumes instructions are dead until proven otherwise.
+///
+/// This pass eliminates dead code by optimistically assuming that all
+/// instructions are dead until proven otherwise. This allows it to eliminate
+/// dead computations that other DCE passes do not catch, particularly involving
+/// loop computations.
+struct ADCEPass : PassInfoMixin<ADCEPass> {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &);
+};
+
+
+/// Information about Instructions
+struct InstInfoType {
+  /// True if the associated instruction is live.
+  bool Live = false;
+
+  /// Quick access to information for block containing associated Instruction.
+  struct BlockInfoType *Block = nullptr;
+};
+
+/// Information about basic blocks relevant to dead code elimination.
+struct BlockInfoType {
+  /// True when this block contains a live instructions.
+  bool Live = false;
+
+  /// True when this block ends in an unconditional branch.
+  bool UnconditionalBranch = false;
+
+  /// True when this block is known to have live PHI nodes.
+  bool HasLivePhiNodes = false;
+
+  /// Control dependence sources need to be live for this block.
+  bool CFLive = false;
+
+  /// Quick access to the LiveInfo for the terminator,
+  /// holds the value &InstInfo[Terminator]
+  InstInfoType *TerminatorLiveInfo = nullptr;
+
+  /// Corresponding BasicBlock.
+  BasicBlock *BB = nullptr;
+
+  /// Cache of BB->getTerminator().
+  Instruction *Terminator = nullptr;
+
+  /// Post-order numbering of reverse control flow graph.
+  unsigned PostOrder;
+
+  bool terminatorIsLive() const { return TerminatorLiveInfo->Live; }
+};
+
+class AggressiveDeadCodeElimination {
+  Function &F;
+
+  // ADCE does not use DominatorTree per se, but it updates it to preserve the
+  // analysis.
+  DominatorTree *DT;
+  PostDominatorTree &PDT;
+
+  /// Mapping of blocks to associated information, an element in BlockInfoVec.
+  /// Use MapVector to get deterministic iteration order.
+  MapVector<BasicBlock *, BlockInfoType> BlockInfo;
+  bool isLive(BasicBlock *BB) { return BlockInfo[BB].Live; }
+
+  /// Mapping of instructions to associated information.
+  DenseMap<Instruction *, InstInfoType> InstInfo;
+  bool isLive(Instruction *I) { return InstInfo[I].Live; }
+
+  /// Instructions known to be live where we need to mark
+  /// reaching definitions as live.
+  SmallVector<Instruction *, 128> Worklist;
+
+  /// Debug info scopes around a live instruction.
+  SmallPtrSet<const Metadata *, 32> AliveScopes;
+
+  /// Set of blocks with not known to have live terminators.
+  SmallSetVector<BasicBlock *, 16> BlocksWithDeadTerminators;
+
+  /// The set of blocks which we have determined whose control
+  /// dependence sources must be live and which have not had
+  /// those dependences analyzed.
+  SmallPtrSet<BasicBlock *, 16> NewLiveBlocks;
+
+  /// Set up auxiliary data structures for Instructions and BasicBlocks and
+  /// initialize the Worklist to the set of must-be-live Instruscions.
+  void initialize();
+
+  /// Return true for operations which are always treated as live.
+  bool isAlwaysLive(Instruction &I);
+
+  /// Return true for instrumentation instructions for value profiling.
+  bool isInstrumentsConstant(Instruction &I);
+
+  /// Propagate liveness to reaching definitions.
+  void markLiveInstructions();
+
+  /// Mark an instruction as live.
+  void markLive(Instruction *I);
+
+  /// Mark a block as live.
+  void markLive(BlockInfoType &BB);
+  void markLive(BasicBlock *BB) { markLive(BlockInfo[BB]); }
+
+  /// Mark terminators of control predecessors of a PHI node live.
+  void markPhiLive(PHINode *PN);
+
+  /// Record the Debug Scopes which surround live debug information.
+  void collectLiveScopes(const DILocalScope &LS);
+  void collectLiveScopes(const DILocation &DL);
+
+  /// Analyze dead branches to find those whose branches are the sources
+  /// of control dependences impacting a live block. Those branches are
+  /// marked live.
+  void markLiveBranchesFromControlDependences();
+
+  /// Remove instructions not marked live, return if any instruction was
+  /// removed.
+  bool removeDeadInstructions();
+
+  /// Identify connected sections of the control flow graph which have
+  /// dead terminators and rewrite the control flow graph to remove them.
+  void updateDeadRegions();
+
+  /// Set the BlockInfo::PostOrder field based on a post-order
+  /// numbering of the reverse control flow graph.
+  void computeReversePostOrder();
+
+  /// Make the terminator of this block an unconditional branch to \p Target.
+  void makeUnconditional(BasicBlock *BB, BasicBlock *Target);
+
+public:
+  AggressiveDeadCodeElimination(Function &F, DominatorTree *DT,
+                                PostDominatorTree &PDT)
+      : F(F), DT(DT), PDT(PDT) {}
+
+  bool performDeadCodeElimination();
+};
+} // end namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_ADCE_H
diff --git a/hpvm/llvm_patches/include/llvm/Transforms/Scalar/EarlyCSE.h b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/EarlyCSE.h
new file mode 100644
index 0000000000000000000000000000000000000000..67094d7547ca43fa82276b981b12b02bec139eed
--- /dev/null
+++ b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/EarlyCSE.h
@@ -0,0 +1,81 @@
+//===- EarlyCSE.h - Simple and fast CSE pass --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file provides the interface for a simple, fast CSE pass.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_EARLYCSE_H
+#define LLVM_TRANSFORMS_SCALAR_EARLYCSE_H
+
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+class Function;
+
+/// A simple and fast domtree-based CSE pass.
+///
+/// This pass does a simple depth-first walk over the dominator tree,
+/// eliminating trivially redundant instructions and using instsimplify to
+/// canonicalize things as it goes. It is intended to be fast and catch obvious
+/// cases so that instcombine and other passes are more effective. It is
+/// expected that a later pass of GVN will catch the interesting/hard cases.
+struct EarlyCSEPass : PassInfoMixin<EarlyCSEPass> {
+  EarlyCSEPass(bool UseMemorySSA = false) : UseMemorySSA(UseMemorySSA) {}
+
+  /// Run the pass over the function.
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+
+  bool UseMemorySSA;
+};
+
+/// A simple and fast domtree-based CSE pass.
+///
+/// This pass does a simple depth-first walk over the dominator tree,
+/// eliminating trivially redundant instructions and using instsimplify to
+/// canonicalize things as it goes. It is intended to be fast and catch obvious
+/// cases so that instcombine and other passes are more effective. It is
+/// expected that a later pass of GVN will catch the interesting/hard cases.
+template <bool UseMemorySSA>
+class EarlyCSELegacyCommonPass : public FunctionPass {
+public:
+  static char ID;
+
+  EarlyCSELegacyCommonPass() : FunctionPass(ID) {
+    if (UseMemorySSA)
+      initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
+    else
+      initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addRequired<TargetLibraryInfoWrapperPass>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    if (UseMemorySSA) {
+      AU.addRequired<MemorySSAWrapperPass>();
+      AU.addPreserved<MemorySSAWrapperPass>();
+    }
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.setPreservesCFG();
+  }
+};
+
+} // namespace llvm
+#endif // LLVM_TRANSFORMS_SCALAR_EARLYCSE_H
diff --git a/hpvm/llvm_patches/include/llvm/Transforms/Scalar/SimplifyCFG.h b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/SimplifyCFG.h
new file mode 100644
index 0000000000000000000000000000000000000000..d40099ef0dc5568f1fbc81941f7b582a9b352fca
--- /dev/null
+++ b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -0,0 +1,86 @@
+//===- SimplifyCFG.h - Simplify and canonicalize the CFG --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file provides the interface for the pass responsible for both
+/// simplifying and canonicalizing the CFG.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
+#define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
+
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+
+using namespace llvm;
+
+namespace llvm {
+
+/// A pass to simplify and canonicalize the CFG of a function.
+///
+/// This pass iteratively simplifies the entire CFG of a function. It may change
+/// or remove control flow to put the CFG into a canonical form expected by
+/// other passes of the mid-level optimizer. Depending on the specified options,
+/// it may further optimize control-flow to create non-canonical forms.
+class SimplifyCFGPass : public PassInfoMixin<SimplifyCFGPass> {
+  SimplifyCFGOptions Options;
+
+public:
+  /// The default constructor sets the pass options to create canonical IR,
+  /// rather than optimal IR. That is, by default we bypass transformations that
+  /// are likely to improve performance but make analysis for other passes more
+  /// difficult.
+  SimplifyCFGPass()
+      : SimplifyCFGPass(SimplifyCFGOptions()
+                            .forwardSwitchCondToPhi(false)
+                            .convertSwitchToLookupTable(false)
+                            .needCanonicalLoops(true)
+                            .sinkCommonInsts(false)) {}
+
+  /// Construct a pass with optional optimizations.
+  SimplifyCFGPass(const SimplifyCFGOptions &PassOptions);
+
+  /// Run the pass over the function.
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
+                         const SimplifyCFGOptions &Options);
+
+struct CFGSimplifyPass : public FunctionPass {
+  static char ID;
+  SimplifyCFGOptions Options;
+  std::function<bool(const Function &)> PredicateFtor;
+
+  CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false,
+                  bool ConvertSwitch = false, bool KeepLoops = true,
+                  bool SinkCommon = false,
+                  std::function<bool(const Function &)> Ftor = nullptr);
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F) || (PredicateFtor && !PredicateFtor(F)))
+      return false;
+
+    Options.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+    auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    return llvm::simplifyFunctionCFG(F, TTI, Options);
+  }
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+    AU.addRequired<TargetTransformInfoWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+} // namespace llvm
+
+#endif
diff --git a/hpvm/llvm_patches/include/llvm/Transforms/Utils/LoopSimplify.h b/hpvm/llvm_patches/include/llvm/Transforms/Utils/LoopSimplify.h
new file mode 100644
index 0000000000000000000000000000000000000000..96a5e571f0f2d2ec45c9cea68f3fe897e9dedc16
--- /dev/null
+++ b/hpvm/llvm_patches/include/llvm/Transforms/Utils/LoopSimplify.h
@@ -0,0 +1,109 @@
+//===- LoopSimplify.h - Loop Canonicalization Pass --------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header.  This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header).  This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TRANSFORMS_UTILS_LOOPSIMPLIFY_H
+#define LLVM_TRANSFORMS_UTILS_LOOPSIMPLIFY_H
+
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Utils.h"
+
+namespace llvm {
+
+class MemorySSAUpdater;
+
+/// This pass is responsible for loop canonicalization.
+class LoopSimplifyPass : public PassInfoMixin<LoopSimplifyPass> {
+public:
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+/// Simplify each loop in a loop nest recursively.
+///
+/// This takes a potentially un-simplified loop L (and its children) and turns
+/// it into a simplified loop nest with preheaders and single backedges. It will
+/// update \c DominatorTree, \c LoopInfo, \c ScalarEvolution and \c MemorySSA
+/// analyses if they're non-null, and LCSSA if \c PreserveLCSSA is true.
+bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE,
+                  AssumptionCache *AC, MemorySSAUpdater *MSSAU,
+                  bool PreserveLCSSA);
+
+struct LoopSimplify : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+  LoopSimplify() : FunctionPass(ID) {
+    initializeLoopSimplifyPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override;
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<AssumptionCacheTracker>();
+
+    // We need loop information to identify the loops...
+    AU.addRequired<DominatorTreeWrapperPass>();
+    AU.addPreserved<DominatorTreeWrapperPass>();
+
+    AU.addRequired<LoopInfoWrapperPass>();
+    AU.addPreserved<LoopInfoWrapperPass>();
+
+    AU.addPreserved<BasicAAWrapperPass>();
+    AU.addPreserved<AAResultsWrapperPass>();
+    AU.addPreserved<GlobalsAAWrapperPass>();
+    AU.addPreserved<ScalarEvolutionWrapperPass>();
+    AU.addPreserved<SCEVAAWrapperPass>();
+    AU.addPreservedID(LCSSAID);
+    AU.addPreserved<DependenceAnalysisWrapperPass>();
+    AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
+    AU.addPreserved<BranchProbabilityInfoWrapperPass>();
+    if (EnableMSSALoopDependency)
+      AU.addPreserved<MemorySSAWrapperPass>();
+  }
+
+  /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+  void verifyAnalysis() const override;
+};
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_LOOPSIMPLIFY_H
diff --git a/hpvm/llvm_patches/lib/Analysis/CMakeLists.txt b/hpvm/llvm_patches/lib/Analysis/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..0f1c6014a329fab214630a8f102f51d47279750c
--- /dev/null
+++ b/hpvm/llvm_patches/lib/Analysis/CMakeLists.txt
@@ -0,0 +1,105 @@
+add_llvm_library(LLVMAnalysis
+  AliasAnalysis.cpp
+  AliasAnalysisEvaluator.cpp
+  AliasAnalysisSummary.cpp
+  AliasSetTracker.cpp
+  Analysis.cpp
+  AssumptionCache.cpp
+  BasicAliasAnalysis.cpp
+  BlockFrequencyInfo.cpp
+  BlockFrequencyInfoImpl.cpp
+  BranchProbabilityInfo.cpp
+  CFG.cpp
+  CFGPrinter.cpp
+  CFLAndersAliasAnalysis.cpp
+  CFLSteensAliasAnalysis.cpp
+  CGSCCPassManager.cpp
+  CallGraph.cpp
+  CallGraphSCCPass.cpp
+  CallPrinter.cpp
+  CaptureTracking.cpp
+  CmpInstAnalysis.cpp
+  CostModel.cpp
+  CodeMetrics.cpp
+  ConstantFolding.cpp
+	DDG.cpp
+	Delinearization.cpp
+  DemandedBits.cpp
+  DependenceAnalysis.cpp
+	DependenceGraphBuilder.cpp
+  DivergenceAnalysis.cpp
+  DomPrinter.cpp
+  DomTreeUpdater.cpp
+  DominanceFrontier.cpp
+  EHPersonalities.cpp
+  GlobalsModRef.cpp
+  GuardUtils.cpp
+  IVDescriptors.cpp
+  IVUsers.cpp
+  IndirectCallPromotionAnalysis.cpp
+  InlineCost.cpp
+  InstCount.cpp
+  InstructionPrecedenceTracking.cpp
+  InstructionSimplify.cpp
+  Interval.cpp
+  IntervalPartition.cpp
+  LazyBranchProbabilityInfo.cpp
+  LazyBlockFrequencyInfo.cpp
+  LazyCallGraph.cpp
+  LazyValueInfo.cpp
+  LegacyDivergenceAnalysis.cpp
+  Lint.cpp
+  Loads.cpp
+  LoopAccessAnalysis.cpp
+  LoopAnalysisManager.cpp
+  LoopUnrollAnalyzer.cpp
+  LoopInfo.cpp
+  LoopPass.cpp
+  MemDepPrinter.cpp
+  MemDerefPrinter.cpp
+  MemoryBuiltins.cpp
+  MemoryDependenceAnalysis.cpp
+  MemoryLocation.cpp
+  MemorySSA.cpp
+  MemorySSAUpdater.cpp
+  ModuleDebugInfoPrinter.cpp
+  ModuleSummaryAnalysis.cpp
+  MustExecute.cpp
+  ObjCARCAliasAnalysis.cpp
+  ObjCARCAnalysisUtils.cpp
+  ObjCARCInstKind.cpp
+  OptimizationRemarkEmitter.cpp
+  OrderedBasicBlock.cpp
+  OrderedInstructions.cpp
+  PHITransAddr.cpp
+  PhiValues.cpp
+  PostDominators.cpp
+  ProfileSummaryInfo.cpp
+  PtrUseVisitor.cpp
+  RegionInfo.cpp
+  RegionPass.cpp
+  RegionPrinter.cpp
+  ScalarEvolution.cpp
+  ScalarEvolutionAliasAnalysis.cpp
+  ScalarEvolutionExpander.cpp
+  ScalarEvolutionNormalization.cpp
+  StackSafetyAnalysis.cpp
+  SyncDependenceAnalysis.cpp
+  SyntheticCountsUtils.cpp
+  TargetLibraryInfo.cpp
+  TargetTransformInfo.cpp
+  Trace.cpp
+  TypeBasedAliasAnalysis.cpp
+  TypeMetadataUtils.cpp
+  ScopedNoAliasAA.cpp
+  ValueLattice.cpp
+  ValueLatticeUtils.cpp
+  ValueTracking.cpp
+  VectorUtils.cpp
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Analysis
+
+  DEPENDS
+  intrinsics_gen
+  )
diff --git a/hpvm/llvm_patches/lib/Analysis/DDG.cpp b/hpvm/llvm_patches/lib/Analysis/DDG.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7bd3044918492ee29aae21599c77f6e66f38804c
--- /dev/null
+++ b/hpvm/llvm_patches/lib/Analysis/DDG.cpp
@@ -0,0 +1,326 @@
+//===- DDG.cpp - Data Dependence Graph -------------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// The implementation for the data dependence graph.
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/DDG.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/LoopIterator.h"
+#include "llvm/Support/CommandLine.h"
+#include <memory>
+
+using namespace llvm;
+
+static cl::opt<bool> SimplifyDDG(
+    "ddg-simplify", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+    cl::desc(
+        "Simplify DDG by merging nodes that have less interesting edges."));
+
+static cl::opt<bool>
+    CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, cl::ZeroOrMore,
+                   cl::desc("Create pi-block nodes."));
+
+#define DEBUG_TYPE "ddg"
+
+template class llvm::DGEdge<DDGNode, DDGEdge>;
+template class llvm::DGNode<DDGNode, DDGEdge>;
+template class llvm::DirectedGraph<DDGNode, DDGEdge>;
+
+//===--------------------------------------------------------------------===//
+// DDGNode implementation
+//===--------------------------------------------------------------------===//
+DDGNode::~DDGNode() {}
+
+bool DDGNode::collectInstructions(
+    llvm::function_ref<bool(Instruction *)> const &Pred,
+    InstructionListType &IList) const {
+  assert(IList.empty() && "Expected the IList to be empty on entry.");
+  if (isa<SimpleDDGNode>(this)) {
+    for (Instruction *I : cast<const SimpleDDGNode>(this)->getInstructions())
+      if (Pred(I))
+        IList.push_back(I);
+  } else if (isa<PiBlockDDGNode>(this)) {
+    for (const DDGNode *PN : cast<const PiBlockDDGNode>(this)->getNodes()) {
+      assert(!isa<PiBlockDDGNode>(PN) && "Nested PiBlocks are not supported.");
+      SmallVector<Instruction *, 8> TmpIList;
+      PN->collectInstructions(Pred, TmpIList);
+      IList.insert(IList.end(), TmpIList.begin(), TmpIList.end());
+    }
+  } else
+    llvm_unreachable("unimplemented type of node");
+  return !IList.empty();
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) {
+  const char *Out;
+  switch (K) {
+  case DDGNode::NodeKind::SingleInstruction:
+    Out = "single-instruction";
+    break;
+  case DDGNode::NodeKind::MultiInstruction:
+    Out = "multi-instruction";
+    break;
+  case DDGNode::NodeKind::PiBlock:
+    Out = "pi-block";
+    break;
+  case DDGNode::NodeKind::Root:
+    Out = "root";
+    break;
+  case DDGNode::NodeKind::Unknown:
+    Out = "?? (error)";
+    break;
+  }
+  OS << Out;
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) {
+  OS << "Node Address:" << &N << ":" << N.getKind() << "\n";
+  if (isa<SimpleDDGNode>(N)) {
+    OS << " Instructions:\n";
+    for (const Instruction *I : cast<const SimpleDDGNode>(N).getInstructions())
+      OS.indent(2) << *I << "\n";
+  } else if (isa<PiBlockDDGNode>(&N)) {
+    OS << "--- start of nodes in pi-block ---\n";
+    auto &Nodes = cast<const PiBlockDDGNode>(&N)->getNodes();
+    unsigned Count = 0;
+    for (const DDGNode *N : Nodes)
+      OS << *N << (++Count == Nodes.size() ? "" : "\n");
+    OS << "--- end of nodes in pi-block ---\n";
+  } else if (!isa<RootDDGNode>(N))
+    llvm_unreachable("unimplemented type of node");
+
+  OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n");
+  for (auto &E : N.getEdges())
+    OS.indent(2) << *E;
+  return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// SimpleDDGNode implementation
+//===--------------------------------------------------------------------===//
+
+SimpleDDGNode::SimpleDDGNode(Instruction &I)
+  : DDGNode(NodeKind::SingleInstruction), InstList() {
+  assert(InstList.empty() && "Expected empty list.");
+  InstList.push_back(&I);
+}
+
+SimpleDDGNode::SimpleDDGNode(const SimpleDDGNode &N)
+    : DDGNode(N), InstList(N.InstList) {
+  assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) ||
+          (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) &&
+         "constructing from invalid simple node.");
+}
+
+SimpleDDGNode::SimpleDDGNode(SimpleDDGNode &&N)
+    : DDGNode(std::move(N)), InstList(std::move(N.InstList)) {
+  assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) ||
+          (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) &&
+         "constructing from invalid simple node.");
+}
+
+SimpleDDGNode::~SimpleDDGNode() { InstList.clear(); }
+
+//===--------------------------------------------------------------------===//
+// PiBlockDDGNode implementation
+//===--------------------------------------------------------------------===//
+
+PiBlockDDGNode::PiBlockDDGNode(const PiNodeList &List)
+    : DDGNode(NodeKind::PiBlock), NodeList(List) {
+  assert(!NodeList.empty() && "pi-block node constructed with an empty list.");
+}
+
+PiBlockDDGNode::PiBlockDDGNode(const PiBlockDDGNode &N)
+    : DDGNode(N), NodeList(N.NodeList) {
+  assert(getKind() == NodeKind::PiBlock && !NodeList.empty() &&
+         "constructing from invalid pi-block node.");
+}
+
+PiBlockDDGNode::PiBlockDDGNode(PiBlockDDGNode &&N)
+    : DDGNode(std::move(N)), NodeList(std::move(N.NodeList)) {
+  assert(getKind() == NodeKind::PiBlock && !NodeList.empty() &&
+         "constructing from invalid pi-block node.");
+}
+
+PiBlockDDGNode::~PiBlockDDGNode() { NodeList.clear(); }
+
+//===--------------------------------------------------------------------===//
+// DDGEdge implementation
+//===--------------------------------------------------------------------===//
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) {
+  const char *Out;
+  switch (K) {
+  case DDGEdge::EdgeKind::RegisterDefUse:
+    Out = "def-use";
+    break;
+  case DDGEdge::EdgeKind::MemoryDependence:
+    Out = "memory";
+    break;
+  case DDGEdge::EdgeKind::Rooted:
+    Out = "rooted";
+    break;
+  case DDGEdge::EdgeKind::Unknown:
+    Out = "?? (error)";
+    break;
+  }
+  OS << Out;
+  return OS;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge &E) {
+  OS << "[" << E.getKind() << "] to " << &E.getTargetNode() << "\n";
+  return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// DataDependenceGraph implementation
+//===--------------------------------------------------------------------===//
+using BasicBlockListType = SmallVector<BasicBlock *, 8>;
+
+DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D)
+    : DependenceGraphInfo(F.getName().str(), D) {
+  // Put the basic blocks in program order for correct dependence
+  // directions.
+  BasicBlockListType BBList;
+  for (auto &SCC : make_range(scc_begin(&F), scc_end(&F)))
+    for (BasicBlock * BB : SCC)
+      BBList.push_back(BB);
+  std::reverse(BBList.begin(), BBList.end());
+  DDGBuilder(*this, D, BBList).populate();
+}
+
+DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI,
+                                         DependenceInfo &D)
+    : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." +
+                                L.getHeader()->getName())
+                              .str(),
+                          D) {
+  // Put the basic blocks in program order for correct dependence
+  // directions.
+  LoopBlocksDFS DFS(&L);
+  DFS.perform(&LI);
+  BasicBlockListType BBList;
+  for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO()))
+    BBList.push_back(BB);
+  DDGBuilder(*this, D, BBList).populate();
+}
+
+DataDependenceGraph::~DataDependenceGraph() {
+  for (auto *N : Nodes) {
+    for (auto *E : *N)
+      delete E;
+    delete N;
+  }
+}
+
+bool DataDependenceGraph::addNode(DDGNode &N) {
+  if (!DDGBase::addNode(N))
+    return false;
+
+  // In general, if the root node is already created and linked, it is not safe
+  // to add new nodes since they may be unreachable by the root. However,
+  // pi-block nodes need to be added after the root node is linked, and they are
+  // always reachable by the root, because they represent components that are
+  // already reachable by root.
+  auto *Pi = dyn_cast<PiBlockDDGNode>(&N);
+  assert((!Root || Pi) &&
+         "Root node is already added. No more nodes can be added.");
+
+  if (isa<RootDDGNode>(N))
+    Root = &N;
+
+  if (Pi)
+    for (DDGNode *NI : Pi->getNodes())
+      PiBlockMap.insert(std::make_pair(NI, Pi));
+
+  return true;
+}
+
+const PiBlockDDGNode *DataDependenceGraph::getPiBlock(const NodeType &N) const {
+  if (PiBlockMap.find(&N) == PiBlockMap.end())
+    return nullptr;
+  auto *Pi = PiBlockMap.find(&N)->second;
+  assert(PiBlockMap.find(Pi) == PiBlockMap.end() &&
+         "Nested pi-blocks detected.");
+  return Pi;
+}
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) {
+  for (DDGNode *Node : G)
+    // Avoid printing nodes that are part of a pi-block twice. They will get
+    // printed when the pi-block is printed.
+    if (!G.getPiBlock(*Node))
+      OS << *Node << "\n";
+  OS << "\n";
+  return OS;
+}
+
+//===--------------------------------------------------------------------===//
+// DDGBuilder implementation
+//===--------------------------------------------------------------------===//
+
+bool DDGBuilder::areNodesMergeable(const DDGNode &Src,
+                                   const DDGNode &Tgt) const {
+  // Only merge two nodes if they are both simple nodes and the consecutive
+  // instructions after merging belong to the same BB.
+  const auto *SimpleSrc = dyn_cast<const SimpleDDGNode>(&Src);
+  const auto *SimpleTgt = dyn_cast<const SimpleDDGNode>(&Tgt);
+  if (!SimpleSrc || !SimpleTgt)
+    return false;
+
+  return SimpleSrc->getLastInstruction()->getParent() ==
+         SimpleTgt->getFirstInstruction()->getParent();
+}
+
+void DDGBuilder::mergeNodes(DDGNode &A, DDGNode &B) {
+  DDGEdge &EdgeToFold = A.back();
+  assert(A.getEdges().size() == 1 && EdgeToFold.getTargetNode() == B &&
+         "Expected A to have a single edge to B.");
+  assert(isa<SimpleDDGNode>(&A) && isa<SimpleDDGNode>(&B) &&
+         "Expected simple nodes");
+
+  // Copy instructions from B to the end of A.
+  cast<SimpleDDGNode>(&A)->appendInstructions(*cast<SimpleDDGNode>(&B));
+
+  // Move to A any outgoing edges from B.
+  for (DDGEdge *BE : B)
+    Graph.connect(A, BE->getTargetNode(), *BE);
+
+  A.removeEdge(EdgeToFold);
+  destroyEdge(EdgeToFold);
+  Graph.removeNode(B);
+  destroyNode(B);
+}
+
+bool DDGBuilder::shouldSimplify() const { return SimplifyDDG; }
+
+bool DDGBuilder::shouldCreatePiBlocks() const { return CreatePiBlocks; }
+
+//===--------------------------------------------------------------------===//
+// DDG Analysis Passes
+//===--------------------------------------------------------------------===//
+
+/// DDG as a loop pass.
+DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM,
+                                     LoopStandardAnalysisResults &AR) {
+  Function *F = L.getHeader()->getParent();
+  DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI);
+  return std::make_unique<DataDependenceGraph>(L, AR.LI, DI);
+}
+AnalysisKey DDGAnalysis::Key;
+
+PreservedAnalyses DDGAnalysisPrinterPass::run(Loop &L, LoopAnalysisManager &AM,
+                                              LoopStandardAnalysisResults &AR,
+                                              LPMUpdater &U) {
+  OS << "'DDG' for loop '" << L.getHeader()->getName() << "':\n";
+  OS << *AM.getResult<DDGAnalysis>(L, AR);
+  return PreservedAnalyses::all();
+}
diff --git a/hpvm/llvm_patches/lib/Analysis/DependenceGraphBuilder.cpp b/hpvm/llvm_patches/lib/Analysis/DependenceGraphBuilder.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..95a39f984d6378af4d7a8042954a5913bbf1a971
--- /dev/null
+++ b/hpvm/llvm_patches/lib/Analysis/DependenceGraphBuilder.cpp
@@ -0,0 +1,535 @@
+//===- DependenceGraphBuilder.cpp ------------------------------------------==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// This file implements common steps of the build algorithm for construction
+// of dependence graphs such as DDG and PDG.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/DependenceGraphBuilder.h"
+#include "llvm/ADT/EnumeratedArray.h"
+#include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DDG.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "dgb"
+
+STATISTIC(TotalGraphs, "Number of dependence graphs created.");
+STATISTIC(TotalDefUseEdges, "Number of def-use edges created.");
+STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created.");
+STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created.");
+STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created.");
+STATISTIC(TotalConfusedEdges,
+          "Number of confused memory dependencies between two nodes.");
+STATISTIC(TotalEdgeReversals,
+          "Number of times the source and sink of dependence was reversed to "
+          "expose cycles in the graph.");
+
+using InstructionListType = SmallVector<Instruction *, 2>;
+
+//===--------------------------------------------------------------------===//
+// AbstractDependenceGraphBuilder implementation
+//===--------------------------------------------------------------------===//
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() {
+  // The BBList is expected to be in program order.
+  size_t NextOrdinal = 1;
+  for (auto *BB : BBList)
+    for (auto &I : *BB)
+      InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++));
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() {
+  ++TotalGraphs;
+  assert(IMap.empty() && "Expected empty instruction map at start");
+  for (BasicBlock *BB : BBList)
+    for (Instruction &I : *BB) {
+      auto &NewNode = createFineGrainedNode(I);
+      IMap.insert(std::make_pair(&I, &NewNode));
+      NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I)));
+      ++TotalFineGrainedNodes;
+    }
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() {
+  // Create a root node that connects to every connected component of the graph.
+  // This is done to allow graph iterators to visit all the disjoint components
+  // of the graph, in a single walk.
+  //
+  // This algorithm works by going through each node of the graph and for each
+  // node N, do a DFS starting from N. A rooted edge is established between the
+  // root node and N (if N is not yet visited). All the nodes reachable from N
+  // are marked as visited and are skipped in the DFS of subsequent nodes.
+  //
+  // Note: This algorithm tries to limit the number of edges out of the root
+  // node to some extent, but there may be redundant edges created depending on
+  // the iteration order. For example for a graph {A -> B}, an edge from the
+  // root node is added to both nodes if B is visited before A. While it does
+  // not result in minimal number of edges, this approach saves compile-time
+  // while keeping the number of edges in check.
+  auto &RootNode = createRootNode();
+  df_iterator_default_set<const NodeType *, 4> Visited;
+  for (auto *N : Graph) {
+    if (*N == RootNode)
+      continue;
+    for (auto I : depth_first_ext(N, Visited))
+      if (I == N)
+        createRootedEdge(RootNode, *N);
+  }
+}
+
+template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() {
+  if (!shouldCreatePiBlocks())
+    return;
+
+  LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n");
+
+  // The overall algorithm is as follows:
+  // 1. Identify SCCs and for each SCC create a pi-block node containing all
+  //    the nodes in that SCC.
+  // 2. Identify incoming edges incident to the nodes inside of the SCC and
+  //    reconnect them to the pi-block node.
+  // 3. Identify outgoing edges from the nodes inside of the SCC to nodes
+  //    outside of it and reconnect them so that the edges are coming out of the
+  //    SCC node instead.
+
+  // Adding nodes as we iterate through the SCCs cause the SCC
+  // iterators to get invalidated. To prevent this invalidation, we first
+  // collect a list of nodes that are part of an SCC, and then iterate over
+  // those lists to create the pi-block nodes. Each element of the list is a
+  // list of nodes in an SCC. Note: trivial SCCs containing a single node are
+  // ignored.
+  SmallVector<NodeListType, 4> ListOfSCCs;
+  for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) {
+    if (SCC.size() > 1)
+      ListOfSCCs.emplace_back(SCC.begin(), SCC.end());
+  }
+
+  for (NodeListType &NL : ListOfSCCs) {
+    LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size()
+                      << " nodes in it.\n");
+
+    // SCC iterator may put the nodes in an order that's different from the
+    // program order. To preserve original program order, we sort the list of
+    // nodes based on ordinal numbers computed earlier.
+    llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) {
+      return getOrdinal(*LHS) < getOrdinal(*RHS);
+    });
+
+    NodeType &PiNode = createPiBlock(NL);
+    ++TotalPiBlockNodes;
+
+    // Build a set to speed up the lookup for edges whose targets
+    // are inside the SCC.
+    SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end());
+
+    // We have the set of nodes in the SCC. We go through the set of nodes
+    // that are outside of the SCC and look for edges that cross the two sets.
+    for (NodeType *N : Graph) {
+
+      // Skip the SCC node and all the nodes inside of it.
+      if (*N == PiNode || NodesInSCC.count(N))
+        continue;
+
+      for (NodeType *SCCNode : NL) {
+
+        enum Direction {
+          Incoming,      // Incoming edges to the SCC
+          Outgoing,      // Edges going ot of the SCC
+          DirectionCount // To make the enum usable as an array index.
+        };
+
+        // Use these flags to help us avoid creating redundant edges. If there
+        // are more than one edges from an outside node to inside nodes, we only
+        // keep one edge from that node to the pi-block node. Similarly, if
+        // there are more than one edges from inside nodes to an outside node,
+        // we only keep one edge from the pi-block node to the outside node.
+        // There is a flag defined for each direction (incoming vs outgoing) and
+        // for each type of edge supported, using a two-dimensional boolean
+        // array.
+        using EdgeKind = typename EdgeType::EdgeKind;
+        EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{
+            false, false};
+
+        auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst,
+                                       const EdgeKind K) {
+          switch (K) {
+          case EdgeKind::RegisterDefUse:
+            createDefUseEdge(Src, Dst);
+            break;
+          case EdgeKind::MemoryDependence:
+            createMemoryEdge(Src, Dst);
+            break;
+          case EdgeKind::Rooted:
+            createRootedEdge(Src, Dst);
+            break;
+          default:
+            llvm_unreachable("Unsupported type of edge.");
+          }
+        };
+
+        auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New,
+                                  const Direction Dir) {
+          if (!Src->hasEdgeTo(*Dst))
+            return;
+          LLVM_DEBUG(dbgs()
+                     << "reconnecting("
+                     << (Dir == Direction::Incoming ? "incoming)" : "outgoing)")
+                     << ":\nSrc:" << *Src << "\nDst:" << *Dst
+                     << "\nNew:" << *New << "\n");
+          assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) &&
+                 "Invalid direction.");
+
+          SmallVector<EdgeType *, 10> EL;
+          Src->findEdgesTo(*Dst, EL);
+          for (EdgeType *OldEdge : EL) {
+            EdgeKind Kind = OldEdge->getKind();
+            if (!EdgeAlreadyCreated[Dir][Kind]) {
+              if (Dir == Direction::Incoming) {
+                createEdgeOfKind(*Src, *New, Kind);
+                LLVM_DEBUG(dbgs() << "created edge from Src to New.\n");
+              } else if (Dir == Direction::Outgoing) {
+                createEdgeOfKind(*New, *Dst, Kind);
+                LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n");
+              }
+              EdgeAlreadyCreated[Dir][Kind] = true;
+            }
+            Src->removeEdge(*OldEdge);
+            destroyEdge(*OldEdge);
+            LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n");
+          }
+        };
+
+        // Process incoming edges incident to the pi-block node.
+        reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming);
+
+        // Process edges that are coming out of the pi-block node.
+        reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing);
+      }
+    }
+  }
+
+  // Ordinal maps are no longer needed.
+  InstOrdinalMap.clear();
+  NodeOrdinalMap.clear();
+
+  LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n");
+}
+
+template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() {
+  for (NodeType *N : Graph) {
+    InstructionListType SrcIList;
+    N->collectInstructions([](const Instruction *I) { return true; }, SrcIList);
+
+    // Use a set to mark the targets that we link to N, so we don't add
+    // duplicate def-use edges when more than one instruction in a target node
+    // use results of instructions that are contained in N.
+    SmallPtrSet<NodeType *, 4> VisitedTargets;
+
+    for (Instruction *II : SrcIList) {
+      for (User *U : II->users()) {
+        Instruction *UI = dyn_cast<Instruction>(U);
+        if (!UI)
+          continue;
+        NodeType *DstNode = nullptr;
+        if (IMap.find(UI) != IMap.end())
+          DstNode = IMap.find(UI)->second;
+
+        // In the case of loops, the scope of the subgraph is all the
+        // basic blocks (and instructions within them) belonging to the loop. We
+        // simply ignore all the edges coming from (or going into) instructions
+        // or basic blocks outside of this range.
+        if (!DstNode) {
+          LLVM_DEBUG(
+              dbgs()
+              << "skipped def-use edge since the sink" << *UI
+              << " is outside the range of instructions being considered.\n");
+          continue;
+        }
+
+        // Self dependencies are ignored because they are redundant and
+        // uninteresting.
+        if (DstNode == N) {
+          LLVM_DEBUG(dbgs()
+                     << "skipped def-use edge since the sink and the source ("
+                     << N << ") are the same.\n");
+          continue;
+        }
+
+        if (VisitedTargets.insert(DstNode).second) {
+          createDefUseEdge(*N, *DstNode);
+          ++TotalDefUseEdges;
+        }
+      }
+    }
+  }
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() {
+  using DGIterator = typename G::iterator;
+  auto isMemoryAccess = [](const Instruction *I) {
+    return I->mayReadOrWriteMemory();
+  };
+  for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) {
+		LLVM_DEBUG(errs() << "Src Node: " << *SrcIt << "\n");
+    InstructionListType SrcIList;
+    (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList);
+    if (SrcIList.empty())
+      continue;
+
+    for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) {
+			if (**SrcIt == **DstIt)
+        continue;
+      InstructionListType DstIList;
+      (*DstIt)->collectInstructions(isMemoryAccess, DstIList);
+      if (DstIList.empty())
+        continue;
+      bool ForwardEdgeCreated = false;
+      bool BackwardEdgeCreated = false;
+			LLVM_DEBUG(errs() << "***********************************************\n");
+      for (Instruction *ISrc : SrcIList) {
+				LLVM_DEBUG(errs() << "Src: " << *ISrc << "\n");
+        for (Instruction *IDst : DstIList) {
+					LLVM_DEBUG(errs() << "Dst: " << *IDst << "\n");
+          auto D = DI.depends(ISrc, IDst, true);
+          if (!D) {
+            LLVM_DEBUG(errs() << "--> No Dependence, moving on!\n");
+						continue;
+
+					}
+					LLVM_DEBUG(D->dump(errs()));
+
+          // If we have a dependence with its left-most non-'=' direction
+          // being '>' we need to reverse the direction of the edge, because
+          // the source of the dependence cannot occur after the sink. For
+          // confused dependencies, we will create edges in both directions to
+          // represent the possibility of a cycle.
+
+          auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) {
+            if (!ForwardEdgeCreated) {
+              createMemoryEdge(Src, Dst);
+              ++TotalMemoryEdges;
+            }
+            if (!BackwardEdgeCreated) {
+              createMemoryEdge(Dst, Src);
+              ++TotalMemoryEdges;
+            }
+            ForwardEdgeCreated = BackwardEdgeCreated = true;
+            ++TotalConfusedEdges;
+          };
+
+          auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) {
+            if (!ForwardEdgeCreated) {
+              createMemoryEdge(Src, Dst);
+              ++TotalMemoryEdges;
+            }
+            ForwardEdgeCreated = true;
+          };
+
+          auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) {
+            if (!BackwardEdgeCreated) {
+              createMemoryEdge(Dst, Src);
+              ++TotalMemoryEdges;
+            }
+            BackwardEdgeCreated = true;
+          };
+
+          if (D->isConfused()) {
+						LLVM_DEBUG(errs() << "--> Confused Dependence: creating Confused Edge\n");
+            createConfusedEdges(**SrcIt, **DstIt);
+					} else if (D->isOrdered() && !D->isLoopIndependent()) {
+						LLVM_DEBUG(errs() << "--> Ordered, Loop-Dependent Dependence:\n");
+            bool ReversedEdge = false;
+            for (unsigned Level = 1; Level <= D->getLevels(); ++Level) {
+							LLVM_DEBUG(errs() << "----> Lvl: " << Level << ": ");
+              if (D->getDirection(Level) == Dependence::DVEntry::EQ) {
+								LLVM_DEBUG(errs() << "EQ\n");
+                continue;
+							} else if (D->getDirection(Level) == Dependence::DVEntry::GT) {
+                LLVM_DEBUG(errs() << "GT\n");
+								LLVM_DEBUG(errs() << "------> Invalid Dependence. Creating Backward Edge!\n");
+								createBackwardEdge(**SrcIt, **DstIt);
+                ReversedEdge = true;
+                ++TotalEdgeReversals;
+                break;
+              } else if (D->getDirection(Level) == Dependence::DVEntry::LT){
+								LLVM_DEBUG(errs() << "LT\n");
+                break;
+							} else {
+								LLVM_DEBUG(errs() << " Confused\n");
+								createConfusedEdges(**SrcIt, **DstIt);
+								break;
+							}
+						}
+						if (!ReversedEdge) {
+							LLVM_DEBUG(errs() << "------> Creating Forward Edge!\n");
+							createForwardEdge(**SrcIt, **DstIt);
+						}
+					} else {
+						LLVM_DEBUG(errs() << "--> Creating Forward Edge!\n");
+						createForwardEdge(**SrcIt, **DstIt);
+					}
+					// Avoid creating duplicate edges.
+					if (ForwardEdgeCreated && BackwardEdgeCreated) { 
+						LLVM_DEBUG(errs() << "-->  Created all possible edges between Src and Dst!\n");
+						break;
+					}
+				}
+
+				// If we've created edges in both directions, there is no more
+				// unique edge that we can create between these two nodes, so we
+				// can exit early.
+				if (ForwardEdgeCreated && BackwardEdgeCreated) {
+					LLVM_DEBUG(errs() << "No more unique edges possible!\n");
+					break;
+				}
+			}
+		}
+	}
+}
+
+template <class G> void AbstractDependenceGraphBuilder<G>::simplify() {
+	if (!shouldSimplify())
+		return;
+	LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n");
+
+	// This algorithm works by first collecting a set of candidate nodes that have
+	// an out-degree of one (in terms of def-use edges), and then ignoring those
+	// whose targets have an in-degree more than one. Each node in the resulting
+	// set can then be merged with its corresponding target and put back into the
+	// worklist until no further merge candidates are available.
+	SmallPtrSet<NodeType *, 32> CandidateSourceNodes;
+
+	// A mapping between nodes and their in-degree. To save space, this map
+	// only contains nodes that are targets of nodes in the CandidateSourceNodes.
+	DenseMap<NodeType *, unsigned> TargetInDegreeMap;
+
+	for (NodeType *N : Graph) {
+		if (N->getEdges().size() != 1)
+			continue;
+		EdgeType &Edge = N->back();
+		if (!Edge.isDefUse())
+			continue;
+		CandidateSourceNodes.insert(N);
+
+		// Insert an element into the in-degree map and initialize to zero. The
+		// count will get updated in the next step.
+		TargetInDegreeMap.insert({&Edge.getTargetNode(), 0});
+	}
+
+	LLVM_DEBUG({
+			dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size()
+			<< "\nNode with single outgoing def-use edge:\n";
+			for (NodeType *N : CandidateSourceNodes) {
+			dbgs() << N << "\n";
+			}
+			});
+
+	for (NodeType *N : Graph) {
+		for (EdgeType *E : *N) {
+			NodeType *Tgt = &E->getTargetNode();
+			auto TgtIT = TargetInDegreeMap.find(Tgt);
+			if (TgtIT != TargetInDegreeMap.end())
+				++(TgtIT->second);
+		}
+	}
+
+	LLVM_DEBUG({
+			dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size()
+			<< "\nContent of in-degree map:\n";
+			for (auto &I : TargetInDegreeMap) {
+			dbgs() << I.first << " --> " << I.second << "\n";
+			}
+			});
+
+	SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(),
+			CandidateSourceNodes.end());
+	while (!Worklist.empty()) {
+		NodeType &Src = *Worklist.pop_back_val();
+		// As nodes get merged, we need to skip any node that has been removed from
+		// the candidate set (see below).
+		if (CandidateSourceNodes.find(&Src) == CandidateSourceNodes.end())
+			continue;
+		CandidateSourceNodes.erase(&Src);
+
+		assert(Src.getEdges().size() == 1 &&
+				"Expected a single edge from the candidate src node.");
+		NodeType &Tgt = Src.back().getTargetNode();
+		assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() &&
+				"Expected target to be in the in-degree map.");
+
+		if (TargetInDegreeMap[&Tgt] != 1)
+			continue;
+
+		if (!areNodesMergeable(Src, Tgt))
+			continue;
+
+		// Do not merge if there is also an edge from target to src (immediate
+		// cycle).
+		if (Tgt.hasEdgeTo(Src))
+			continue;
+
+		LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n");
+
+		mergeNodes(Src, Tgt);
+
+		// If the target node is in the candidate set itself, we need to put the
+		// src node back into the worklist again so it gives the target a chance
+		// to get merged into it. For example if we have:
+		// {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a},
+		// then after merging (a) and (b) together, we need to put (a,b) back in
+		// the worklist so that (c) can get merged in as well resulting in
+		// {(a,b,c) -> d}
+		// We also need to remove the old target (b), from the worklist. We first
+		// remove it from the candidate set here, and skip any item from the
+		// worklist that is not in the set.
+		if (CandidateSourceNodes.find(&Tgt) != CandidateSourceNodes.end()) {
+			Worklist.push_back(&Src);
+			CandidateSourceNodes.insert(&Src);
+			CandidateSourceNodes.erase(&Tgt);
+			LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n");
+		}
+	}
+	LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n");
+}
+
+template <class G>
+void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() {
+
+	// If we don't create pi-blocks, then we may not have a DAG.
+	if (!shouldCreatePiBlocks())
+		return;
+
+	SmallVector<NodeType *, 64> NodesInPO;
+	using NodeKind = typename NodeType::NodeKind;
+	for (NodeType *N : post_order(&Graph)) {
+		if (N->getKind() == NodeKind::PiBlock) {
+			// Put members of the pi-block right after the pi-block itself, for
+			// convenience.
+			const NodeListType &PiBlockMembers = getNodesInPiBlock(*N);
+			NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(),
+					PiBlockMembers.end());
+		}
+		NodesInPO.push_back(N);
+	}
+
+	size_t OldSize = Graph.Nodes.size();
+	Graph.Nodes.clear();
+	for (NodeType *N : reverse(NodesInPO))
+		Graph.Nodes.push_back(N);
+	if (Graph.Nodes.size() != OldSize)
+		assert(false &&
+				"Expected the number of nodes to stay the same after the sort");
+}
+
+template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>;
+template class llvm::DependenceGraphInfo<DDGNode>;
diff --git a/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp b/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp
index 2c54392f8020ac7334117f1343214d085dbd6b84..211cefb8a17715ac77eca9d63d64a760a3fbd18a 100644
--- a/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp
+++ b/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp
@@ -859,6 +859,11 @@ lltok::Kind LLLexer::LexIdentifier() {
   KEYWORD(in);
   KEYWORD(out);
   KEYWORD(inout);
+  KEYWORD(priv);
+  KEYWORD(bufferin);
+  KEYWORD(bufferout);
+  KEYWORD(channel);
+
 
 #undef KEYWORD
 
diff --git a/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp b/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp
index 7446ff1e32dd79a18fd678446af56e6d193468ad..ed64537935c5cf8401303715f5fe761073894ea6 100644
--- a/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp
+++ b/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp
@@ -1474,6 +1474,10 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B,
     case lltok::kw_in:
     case lltok::kw_out:
     case lltok::kw_inout:
+    case lltok::kw_priv:
+    case lltok::kw_bufferin:
+    case lltok::kw_bufferout:
+    case lltok::kw_channel:
       HaveError |=
           Error(Lex.getLoc(),
                 "invalid use of parameter-only attribute on a function");
@@ -1818,6 +1822,41 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) {
     case lltok::kw_inout:
       B.addAttribute(Attribute::InOut);
       break;
+    case lltok::kw_priv: {
+      unsigned BuffSize;
+      if (ParseOptionalBufferSize(lltok::kw_priv, BuffSize)) {
+        // TODO: This has to change if we want to have a default buffer size
+        return true;
+      }
+      B.addBufferOrPrivAttr(Attribute::Priv, BuffSize);
+      continue;
+    }
+    case lltok::kw_bufferin: {          
+      unsigned BuffSize;
+      if (ParseOptionalBufferSize(lltok::kw_bufferin, BuffSize)) {
+        // TODO: This has to change if we want to have a default buffer size
+        return true;
+      }
+      B.addBufferOrPrivAttr(Attribute::BufferIn, BuffSize);
+      continue;
+    }
+    case lltok::kw_bufferout: {          
+      unsigned BuffSize;
+      if (ParseOptionalBufferSize(lltok::kw_bufferout, BuffSize)) {
+        // TODO: This has to change if we want to have a default buffer size
+        return true;
+      }
+      B.addBufferOrPrivAttr(Attribute::BufferOut, BuffSize);
+      continue;
+    }
+    case lltok::kw_channel: {
+      unsigned ChannelDepth;
+      if (ParseOptionalChannelDepth(lltok::kw_channel, ChannelDepth)) {
+        return true;
+      }
+      B.addChannelAttr(Attribute::Channel, ChannelDepth);
+      continue;
+    }
 
     case lltok::kw_alignstack:
     case lltok::kw_alwaysinline:
@@ -1931,6 +1970,11 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) {
     case lltok::kw_in:
     case lltok::kw_out:
     case lltok::kw_inout:
+    case lltok::kw_bufferin:
+    case lltok::kw_bufferout:
+    case lltok::kw_priv:
+    case lltok::kw_channel:
+
       HaveError |=
           Error(Lex.getLoc(), "invalid use of parameter-only attribute");
       break;
@@ -2354,6 +2398,30 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) {
   return false;
 }
 
+/// ParseOptionalBufferSize
+///   ::= /* empty */
+///   ::= AttrKind 4
+bool LLParser::ParseOptionalBufferSize(lltok::Kind AttrKind, unsigned &BuffSize) {
+  BuffSize = 0;
+  if (!EatIfPresent(AttrKind))
+    return false;
+  if (ParseUInt32(BuffSize))
+    return true;
+  return false;
+}
+
+/// ParseOptionalChannelDepth
+///   ::= /* empty */
+///   ::= AttrKind 4
+bool LLParser::ParseOptionalChannelDepth(lltok::Kind AttrKind, unsigned &ChannelDepth) {
+  ChannelDepth = 0;
+  if (!EatIfPresent(AttrKind))
+    return false;
+  if (ParseUInt32(ChannelDepth))
+    return true;
+  return false;
+}
+
 /// ParseOptionalDerefAttrBytes
 ///   ::= /* empty */
 ///   ::= AttrKind '(' 4 ')'
@@ -2874,6 +2942,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList,
                          std::move(Name));
 
     while (EatIfPresent(lltok::comma)) {
+      Attrs.clear();
       // Handle ... at end of arg list.
       if (EatIfPresent(lltok::dotdotdot)) {
         isVarArg = true;
diff --git a/hpvm/llvm_patches/lib/AsmParser/LLParser.h b/hpvm/llvm_patches/lib/AsmParser/LLParser.h
index bc1983232f0570d816a23bc1f92ce490a44dee59..566360436ff0bec4a50add03bc195634446fb821 100644
--- a/hpvm/llvm_patches/lib/AsmParser/LLParser.h
+++ b/hpvm/llvm_patches/lib/AsmParser/LLParser.h
@@ -307,6 +307,8 @@ private:
   bool ParseOptionalCallingConv(unsigned &CC);
   bool ParseOptionalAlignment(unsigned &Alignment);
   bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
+  bool ParseOptionalBufferSize(lltok::Kind AttrKind, unsigned &Size);
+  bool ParseOptionalChannelDepth(lltok::Kind AttrKind, unsigned &Depth);
   bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID,
                              AtomicOrdering &Ordering);
   bool ParseScope(SyncScope::ID &SSID);
diff --git a/hpvm/llvm_patches/lib/AsmParser/LLToken.h b/hpvm/llvm_patches/lib/AsmParser/LLToken.h
index cb0479b41c3b9e68d9697cd9d8adce4c80fa5c25..8eda688628cc54575b580d7cece7e98a2cec716b 100644
--- a/hpvm/llvm_patches/lib/AsmParser/LLToken.h
+++ b/hpvm/llvm_patches/lib/AsmParser/LLToken.h
@@ -355,6 +355,10 @@ enum Kind {
   kw_in,
   kw_out,
   kw_inout,
+  kw_priv,
+  kw_bufferin,
+  kw_bufferout,
+  kw_channel,
 
   // Metadata types.
   kw_distinct,
diff --git a/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp b/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp
index a1e64472850911013250976312a8dd7d8b879c98..557cae44c637af1414a96d8894f5c617d873149f 100644
--- a/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -1402,6 +1402,14 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) {
     return 3ULL << 1;
   case Attribute::InOut:
     return 3ULL << 2;
+  case Attribute::Priv:
+    return 3ULL << 3;
+  case Attribute::BufferIn:
+    return 3ULL << 4;
+  case Attribute::BufferOut:
+    return 3ULL << 5;
+  case Attribute::Channel:
+    return 3ULL << 6;
 
   case Attribute::NoSync:
     llvm_unreachable("nosync attribute not supported in raw format");
@@ -1441,7 +1449,11 @@ static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) {
         B.addAlignmentAttr(1ULL << ((A >> 16) - 1));
       else if (I == Attribute::StackAlignment)
         B.addStackAlignmentAttr(1ULL << ((A >> 26) - 1));
-      else
+      else if (I == Attribute::BufferIn || I == Attribute::BufferOut ||
+               I == Attribute::Priv || I == Attribute::Channel) {
+        errs() << "Bufferin/Bufferout/Priv/Channel record Val: " << Val << "\n";
+        llvm_unreachable("Should not reach here!");
+      } else
         B.addAttribute(I);
     }
   }
diff --git a/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp b/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp
index fd671c397583fad6ec8a9998635705417f59eed1..ea0460142cd7b40b2b882c12404d09d02abe2166 100644
--- a/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -780,6 +780,15 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) {
     return bitc::ATTR_KIND_OUT;
   case Attribute::InOut:
     return bitc::ATTR_KIND_INOUT;
+  case Attribute::Priv:
+    return bitc::ATTR_KIND_PRIV;
+  case Attribute::BufferIn:
+    return bitc::ATTR_KIND_BUFFERIN;
+  case Attribute::BufferOut:
+    return bitc::ATTR_KIND_BUFFEROUT;
+  case Attribute::Channel:
+    return bitc::ATTR_KIND_CHANNEL;
+
 
   case Attribute::EndAttrKinds:
     llvm_unreachable("Can not encode end-attribute kinds marker.");
diff --git a/hpvm/llvm_patches/lib/IR/AttributeImpl.h b/hpvm/llvm_patches/lib/IR/AttributeImpl.h
new file mode 100644
index 0000000000000000000000000000000000000000..19f27ade2969ed1e5afc139e46c0c5db7b467864
--- /dev/null
+++ b/hpvm/llvm_patches/lib/IR/AttributeImpl.h
@@ -0,0 +1,290 @@
+//===- AttributeImpl.h - Attribute Internals --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file defines various helper methods and classes used by
+/// LLVMContextImpl for creating and managing attributes.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIB_IR_ATTRIBUTEIMPL_H
+#define LLVM_LIB_IR_ATTRIBUTEIMPL_H
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/FoldingSet.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/Support/TrailingObjects.h"
+#include <cassert>
+#include <cstddef>
+#include <cstdint>
+#include <string>
+#include <utility>
+
+namespace llvm {
+
+class LLVMContext;
+class Type;
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// This class represents a single, uniqued attribute. That attribute
+/// could be a single enum, a tuple, or a string.
+class AttributeImpl : public FoldingSetNode {
+  unsigned char KindID; ///< Holds the AttrEntryKind of the attribute
+
+protected:
+  enum AttrEntryKind {
+    EnumAttrEntry,
+    IntAttrEntry,
+    StringAttrEntry,
+    TypeAttrEntry,
+  };
+
+  AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {}
+
+public:
+  // AttributesImpl is uniqued, these should not be available.
+  AttributeImpl(const AttributeImpl &) = delete;
+  AttributeImpl &operator=(const AttributeImpl &) = delete;
+
+  virtual ~AttributeImpl();
+
+  bool isEnumAttribute() const { return KindID == EnumAttrEntry; }
+  bool isIntAttribute() const { return KindID == IntAttrEntry; }
+  bool isStringAttribute() const { return KindID == StringAttrEntry; }
+  bool isTypeAttribute() const { return KindID == TypeAttrEntry; }
+
+  bool hasAttribute(Attribute::AttrKind A) const;
+  bool hasAttribute(StringRef Kind) const;
+
+  Attribute::AttrKind getKindAsEnum() const;
+  uint64_t getValueAsInt() const;
+
+  StringRef getKindAsString() const;
+  StringRef getValueAsString() const;
+
+  Type *getValueAsType() const;
+
+  /// Used when sorting the attributes.
+  bool operator<(const AttributeImpl &AI) const;
+
+  void Profile(FoldingSetNodeID &ID) const {
+    if (isEnumAttribute())
+      Profile(ID, getKindAsEnum(), static_cast<uint64_t>(0));
+    else if (isIntAttribute())
+      Profile(ID, getKindAsEnum(), getValueAsInt());
+    else if (isStringAttribute())
+      Profile(ID, getKindAsString(), getValueAsString());
+    else
+      Profile(ID, getKindAsEnum(), getValueAsType());
+  }
+
+  static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
+                      uint64_t Val) {
+    ID.AddInteger(Kind);
+    if (Val) ID.AddInteger(Val);
+  }
+
+  static void Profile(FoldingSetNodeID &ID, StringRef Kind, StringRef Values) {
+    ID.AddString(Kind);
+    if (!Values.empty()) ID.AddString(Values);
+  }
+
+  static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind,
+                      Type *Ty) {
+    ID.AddInteger(Kind);
+    ID.AddPointer(Ty);
+  }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// A set of classes that contain the value of the
+/// attribute object. There are three main categories: enum attribute entries,
+/// represented by Attribute::AttrKind; alignment attribute entries; and string
+/// attribute enties, which are for target-dependent attributes.
+
+class EnumAttributeImpl : public AttributeImpl {
+  virtual void anchor();
+
+  Attribute::AttrKind Kind;
+
+protected:
+  EnumAttributeImpl(AttrEntryKind ID, Attribute::AttrKind Kind)
+      : AttributeImpl(ID), Kind(Kind) {}
+
+public:
+  EnumAttributeImpl(Attribute::AttrKind Kind)
+      : AttributeImpl(EnumAttrEntry), Kind(Kind) {}
+
+  Attribute::AttrKind getEnumKind() const { return Kind; }
+};
+
+class IntAttributeImpl : public EnumAttributeImpl {
+  uint64_t Val;
+
+  void anchor() override;
+
+public:
+  IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val)
+      : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) {
+    assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment ||
+            Kind == Attribute::Dereferenceable ||
+            Kind == Attribute::DereferenceableOrNull ||
+            Kind == Attribute::AllocSize ||
+            Kind == Attribute::BufferIn ||
+            Kind == Attribute::BufferOut ||
+            Kind == Attribute::Priv ||
+            Kind == Attribute::Channel) &&
+           "Wrong kind for int attribute!");
+  }
+
+  uint64_t getValue() const { return Val; }
+};
+
+class StringAttributeImpl : public AttributeImpl {
+  virtual void anchor();
+
+  std::string Kind;
+  std::string Val;
+
+public:
+  StringAttributeImpl(StringRef Kind, StringRef Val = StringRef())
+      : AttributeImpl(StringAttrEntry), Kind(Kind), Val(Val) {}
+
+  StringRef getStringKind() const { return Kind; }
+  StringRef getStringValue() const { return Val; }
+};
+
+class TypeAttributeImpl : public EnumAttributeImpl {
+  virtual void anchor();
+
+  Type *Ty;
+
+public:
+  TypeAttributeImpl(Attribute::AttrKind Kind, Type *Ty)
+      : EnumAttributeImpl(TypeAttrEntry, Kind), Ty(Ty) {}
+
+  Type *getTypeValue() const { return Ty; }
+};
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// This class represents a group of attributes that apply to one
+/// element: function, return type, or parameter.
+class AttributeSetNode final
+    : public FoldingSetNode,
+      private TrailingObjects<AttributeSetNode, Attribute> {
+  friend TrailingObjects;
+
+  unsigned NumAttrs; ///< Number of attributes in this node.
+  /// Bitset with a bit for each available attribute Attribute::AttrKind.
+  uint8_t AvailableAttrs[12] = {};
+
+  AttributeSetNode(ArrayRef<Attribute> Attrs);
+
+public:
+  // AttributesSetNode is uniqued, these should not be available.
+  AttributeSetNode(const AttributeSetNode &) = delete;
+  AttributeSetNode &operator=(const AttributeSetNode &) = delete;
+
+  void operator delete(void *p) { ::operator delete(p); }
+
+  static AttributeSetNode *get(LLVMContext &C, const AttrBuilder &B);
+
+  static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs);
+
+  /// Return the number of attributes this AttributeList contains.
+  unsigned getNumAttributes() const { return NumAttrs; }
+
+  bool hasAttribute(Attribute::AttrKind Kind) const {
+    return AvailableAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8);
+  }
+  bool hasAttribute(StringRef Kind) const;
+  bool hasAttributes() const { return NumAttrs != 0; }
+
+  Attribute getAttribute(Attribute::AttrKind Kind) const;
+  Attribute getAttribute(StringRef Kind) const;
+
+  unsigned getAlignment() const;
+  unsigned getStackAlignment() const;
+  uint64_t getDereferenceableBytes() const;
+  uint64_t getDereferenceableOrNullBytes() const;
+  std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const;
+  std::string getAsString(bool InAttrGrp) const;
+  Type *getByValType() const;
+
+  using iterator = const Attribute *;
+
+  iterator begin() const { return getTrailingObjects<Attribute>(); }
+  iterator end() const { return begin() + NumAttrs; }
+
+  void Profile(FoldingSetNodeID &ID) const {
+    Profile(ID, makeArrayRef(begin(), end()));
+  }
+
+  static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) {
+    for (const auto &Attr : AttrList)
+      Attr.Profile(ID);
+  }
+};
+
+using IndexAttrPair = std::pair<unsigned, AttributeSet>;
+
+//===----------------------------------------------------------------------===//
+/// \class
+/// This class represents a set of attributes that apply to the function,
+/// return type, and parameters.
+class AttributeListImpl final
+    : public FoldingSetNode,
+      private TrailingObjects<AttributeListImpl, AttributeSet> {
+  friend class AttributeList;
+  friend TrailingObjects;
+
+private:
+  LLVMContext &Context;
+  unsigned NumAttrSets; ///< Number of entries in this set.
+  /// Bitset with a bit for each available attribute Attribute::AttrKind.
+  uint8_t AvailableFunctionAttrs[12] = {};
+
+  // Helper fn for TrailingObjects class.
+  size_t numTrailingObjects(OverloadToken<AttributeSet>) { return NumAttrSets; }
+
+public:
+  AttributeListImpl(LLVMContext &C, ArrayRef<AttributeSet> Sets);
+
+  // AttributesSetImpt is uniqued, these should not be available.
+  AttributeListImpl(const AttributeListImpl &) = delete;
+  AttributeListImpl &operator=(const AttributeListImpl &) = delete;
+
+  void operator delete(void *p) { ::operator delete(p); }
+
+  /// Get the context that created this AttributeListImpl.
+  LLVMContext &getContext() { return Context; }
+
+  /// Return true if the AttributeSet or the FunctionIndex has an
+  /// enum attribute of the given kind.
+  bool hasFnAttribute(Attribute::AttrKind Kind) const {
+    return AvailableFunctionAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8);
+  }
+
+  using iterator = const AttributeSet *;
+
+  iterator begin() const { return getTrailingObjects<AttributeSet>(); }
+  iterator end() const { return begin() + NumAttrSets; }
+
+  void Profile(FoldingSetNodeID &ID) const;
+  static void Profile(FoldingSetNodeID &ID, ArrayRef<AttributeSet> Nodes);
+
+  void dump() const;
+};
+
+} // end namespace llvm
+
+#endif // LLVM_LIB_IR_ATTRIBUTEIMPL_H
diff --git a/hpvm/llvm_patches/lib/IR/Attributes.cpp b/hpvm/llvm_patches/lib/IR/Attributes.cpp
index 29c47a9e1107524278dcc57c188b320821ba7d86..5c18b8d294920118b8e3ab74aff6df79c90311c2 100644
--- a/hpvm/llvm_patches/lib/IR/Attributes.cpp
+++ b/hpvm/llvm_patches/lib/IR/Attributes.cpp
@@ -251,6 +251,18 @@ bool Attribute::hasAttribute(StringRef Kind) const {
   return pImpl && pImpl->hasAttribute(Kind);
 }
 
+unsigned Attribute::getBuffSize() const {
+  assert(hasAttribute(Attribute::BufferIn) || hasAttribute(Attribute::BufferOut) || hasAttribute(Attribute::Priv ) &&
+         "Trying to get BuffSize from non-buffering attribute!");
+  return pImpl->getValueAsInt();
+}
+
+unsigned Attribute::getChannelDepth() const {
+  assert(hasAttribute(Attribute::Channel) &&
+         "Trying to get BuffSize from non-buffering attribute!");
+  return pImpl->getValueAsInt();
+}
+
 unsigned Attribute::getAlignment() const {
   assert(hasAttribute(Attribute::Alignment) &&
          "Trying to get alignment from non-alignment attribute!");
@@ -411,6 +423,30 @@ std::string Attribute::getAsString(bool InAttrGrp) const {
     return "out";
   if (hasAttribute(Attribute::InOut))
     return "inout";
+  if (hasAttribute(Attribute::Priv)) {
+    std::string Result;
+    Result += "priv ";
+    Result += utostr(getValueAsInt());
+    return Result;
+  }
+  if (hasAttribute(Attribute::BufferIn)) {
+    std::string Result;
+    Result += "bufferin ";
+    Result += utostr(getValueAsInt());
+    return Result;
+  }
+  if (hasAttribute(Attribute::BufferOut)) {
+    std::string Result;
+    Result += "bufferout ";
+    Result += utostr(getValueAsInt());
+    return Result;
+  }
+  if (hasAttribute(Attribute::Channel)) {
+    std::string Result;
+    Result += "channel ";
+    Result += utostr(getValueAsInt());
+    return Result;
+  }
 
   if (hasAttribute(Attribute::ByVal)) {
     std::string Result;
@@ -837,6 +873,14 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) {
       Attr = Attribute::getWithAllocSizeArgs(C, A.first, A.second);
       break;
     }
+    case Attribute::BufferIn:
+    case Attribute::BufferOut:
+    case Attribute::Priv:
+      Attr = Attribute::get(C, Kind, B.getBuffSize());
+      break;
+    case Attribute::Channel:
+      Attr = Attribute::get(C, Kind, B.getChannelDepth());
+      break;
     default:
       Attr = Attribute::get(C, Kind);
     }
@@ -1483,12 +1527,16 @@ void AttrBuilder::clear() {
   Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0;
   AllocSizeArgs = 0;
   ByValType = nullptr;
+  BuffSize = 0;
+  ChannelDepth = 0;
 }
 
 AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) {
   assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!");
   assert(Val != Attribute::Alignment && Val != Attribute::StackAlignment &&
          Val != Attribute::Dereferenceable && Val != Attribute::AllocSize &&
+         Val != Attribute::BufferIn && Val != Attribute::BufferOut &&
+         Val != Attribute::Priv && Val != Attribute::Channel &&
          "Adding integer attribute without adding a value!");
   Attrs[Val] = true;
   return *this;
@@ -1515,6 +1563,10 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) {
     DerefOrNullBytes = Attr.getDereferenceableOrNullBytes();
   else if (Kind == Attribute::AllocSize)
     AllocSizeArgs = Attr.getValueAsInt();
+  else if (Kind == Attribute::BufferIn || Kind == Attribute::BufferOut || Kind == Attribute::Priv) 
+    BuffSize = Attr.getValueAsInt();
+  else if (Kind == Attribute::Channel)
+    ChannelDepth = Attr.getValueAsInt();
   return *this;
 }
 
@@ -1539,6 +1591,10 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) {
     DerefOrNullBytes = 0;
   else if (Val == Attribute::AllocSize)
     AllocSizeArgs = 0;
+  else if (Val == Attribute::BufferIn || Val == Attribute::BufferOut || Val == Attribute::Priv) 
+    BuffSize = 0;
+  else if (Val == Attribute::Channel)
+    ChannelDepth = 0;
 
   return *this;
 }
@@ -1559,6 +1615,20 @@ std::pair<unsigned, Optional<unsigned>> AttrBuilder::getAllocSizeArgs() const {
   return unpackAllocSizeArgs(AllocSizeArgs);
 }
 
+AttrBuilder &AttrBuilder::addBufferOrPrivAttr(Attribute::AttrKind kind, unsigned size) {
+  assert (BuffSize == 0 && "Another mutually-exclusive buffering attribute has already been set!");
+  Attrs[kind] = true;
+  BuffSize = size;
+  return *this;
+}
+
+AttrBuilder &AttrBuilder::addChannelAttr(Attribute::AttrKind kind, unsigned depth) {
+  assert (ChannelDepth == 0 && "ChannelDepth is not 0, something is wrong!");
+  Attrs[kind] = true;
+  ChannelDepth = depth;
+  return *this;
+}
+
 AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) {
   if (Align == 0)
     return *this;
@@ -1644,6 +1714,12 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) {
   if (!ByValType)
     ByValType = B.ByValType;
 
+  if (!BuffSize)
+    BuffSize = B.BuffSize;
+
+  if (!ChannelDepth)
+    ChannelDepth = B.ChannelDepth;
+
   Attrs |= B.Attrs;
 
   for (auto I : B.td_attrs())
@@ -1672,6 +1748,12 @@ AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) {
   if (B.ByValType)
     ByValType = nullptr;
 
+  if (B.BuffSize)
+    BuffSize = 0;
+
+  if (B.ChannelDepth)
+    ChannelDepth = 0;
+
   Attrs &= ~B.Attrs;
 
   for (auto I : B.td_attrs())
diff --git a/hpvm/llvm_patches/lib/Passes/PassBuilder.cpp b/hpvm/llvm_patches/lib/Passes/PassBuilder.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..aac06593efd9fb0cc1c4f06ba3af6caa688da64e
--- /dev/null
+++ b/hpvm/llvm_patches/lib/Passes/PassBuilder.cpp
@@ -0,0 +1,2301 @@
+//===- Parsing, selection, and construction of pass pipelines -------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// This file provides the implementation of the PassBuilder based on our
+/// static pass registry as well as related functionality. It also provides
+/// helpers to aid in analyzing, debugging, and testing passes and pass
+/// pipelines.
+///
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/ADT/StringSwitch.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AliasAnalysisEvaluator.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Analysis/CFGPrinter.h"
+#include "llvm/Analysis/CFLAndersAliasAnalysis.h"
+#include "llvm/Analysis/CFLSteensAliasAnalysis.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Analysis/DDG.h"
+#include "llvm/Analysis/DemandedBits.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DominanceFrontier.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IVUsers.h"
+#include "llvm/Analysis/LazyCallGraph.h"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Analysis/LoopAccessAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/ModuleSummaryAnalysis.h"
+#include "llvm/Analysis/OptimizationRemarkEmitter.h"
+#include "llvm/Analysis/PhiValues.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/RegionInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/Analysis/ScopedNoAliasAA.h"
+#include "llvm/Analysis/StackSafetyAnalysis.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
+#include "llvm/CodeGen/PreISelIntrinsicLowering.h"
+#include "llvm/CodeGen/UnreachableBlockElim.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/IRPrintingPasses.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/SafepointIRVerifier.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FormatVariadic.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h"
+#include "llvm/Transforms/IPO/AlwaysInliner.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
+#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/Transforms/IPO/CalledValuePropagation.h"
+#include "llvm/Transforms/IPO/ConstantMerge.h"
+#include "llvm/Transforms/IPO/CrossDSOCFI.h"
+#include "llvm/Transforms/IPO/DeadArgumentElimination.h"
+#include "llvm/Transforms/IPO/ElimAvailExtern.h"
+#include "llvm/Transforms/IPO/ForceFunctionAttrs.h"
+#include "llvm/Transforms/IPO/FunctionAttrs.h"
+#include "llvm/Transforms/IPO/FunctionImport.h"
+#include "llvm/Transforms/IPO/GlobalDCE.h"
+#include "llvm/Transforms/IPO/GlobalOpt.h"
+#include "llvm/Transforms/IPO/GlobalSplit.h"
+#include "llvm/Transforms/IPO/HotColdSplitting.h"
+#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
+#include "llvm/Transforms/IPO/Inliner.h"
+#include "llvm/Transforms/IPO/Internalize.h"
+#include "llvm/Transforms/IPO/LowerTypeTests.h"
+#include "llvm/Transforms/IPO/PartialInlining.h"
+#include "llvm/Transforms/IPO/SCCP.h"
+#include "llvm/Transforms/IPO/SampleProfile.h"
+#include "llvm/Transforms/IPO/StripDeadPrototypes.h"
+#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h"
+#include "llvm/Transforms/IPO/WholeProgramDevirt.h"
+#include "llvm/Transforms/InstCombine/InstCombine.h"
+#include "llvm/Transforms/Instrumentation.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/BoundsChecking.h"
+#include "llvm/Transforms/Instrumentation/CGProfile.h"
+#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
+#include "llvm/Transforms/Instrumentation/GCOVProfiler.h"
+#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h"
+#include "llvm/Transforms/Instrumentation/InstrOrderFile.h"
+#include "llvm/Transforms/Instrumentation/InstrProfiling.h"
+#include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
+#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h"
+#include "llvm/Transforms/Instrumentation/PoisonChecking.h"
+#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
+#include "llvm/Transforms/Scalar/ADCE.h"
+#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h"
+#include "llvm/Transforms/Scalar/BDCE.h"
+#include "llvm/Transforms/Scalar/CallSiteSplitting.h"
+#include "llvm/Transforms/Scalar/ConstantHoisting.h"
+#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h"
+#include "llvm/Transforms/Scalar/DCE.h"
+#include "llvm/Transforms/Scalar/DeadStoreElimination.h"
+#include "llvm/Transforms/Scalar/DivRemPairs.h"
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/Transforms/Scalar/Float2Int.h"
+#include "llvm/Transforms/Scalar/GVN.h"
+#include "llvm/Transforms/Scalar/GuardWidening.h"
+#include "llvm/Transforms/Scalar/IVUsersPrinter.h"
+#include "llvm/Transforms/Scalar/IndVarSimplify.h"
+#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h"
+#include "llvm/Transforms/Scalar/InstSimplifyPass.h"
+#include "llvm/Transforms/Scalar/JumpThreading.h"
+#include "llvm/Transforms/Scalar/LICM.h"
+#include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h"
+#include "llvm/Transforms/Scalar/LoopDataPrefetch.h"
+#include "llvm/Transforms/Scalar/LoopDeletion.h"
+#include "llvm/Transforms/Scalar/LoopDistribute.h"
+#include "llvm/Transforms/Scalar/LoopFuse.h"
+#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h"
+#include "llvm/Transforms/Scalar/LoopInstSimplify.h"
+#include "llvm/Transforms/Scalar/LoopLoadElimination.h"
+#include "llvm/Transforms/Scalar/LoopPassManager.h"
+#include "llvm/Transforms/Scalar/LoopPredication.h"
+#include "llvm/Transforms/Scalar/LoopRotation.h"
+#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h"
+#include "llvm/Transforms/Scalar/LoopSink.h"
+#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
+#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h"
+#include "llvm/Transforms/Scalar/LoopUnrollPass.h"
+#include "llvm/Transforms/Scalar/LowerAtomic.h"
+#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h"
+#include "llvm/Transforms/Scalar/LowerWidenableCondition.h"
+#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h"
+#include "llvm/Transforms/Scalar/MemCpyOptimizer.h"
+#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h"
+#include "llvm/Transforms/Scalar/MergeICmps.h"
+#include "llvm/Transforms/Scalar/NaryReassociate.h"
+#include "llvm/Transforms/Scalar/NewGVN.h"
+#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h"
+#include "llvm/Transforms/Scalar/Reassociate.h"
+#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
+#include "llvm/Transforms/Scalar/SCCP.h"
+#include "llvm/Transforms/Scalar/SROA.h"
+#include "llvm/Transforms/Scalar/Scalarizer.h"
+#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Scalar/Sink.h"
+#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h"
+#include "llvm/Transforms/Scalar/SpeculativeExecution.h"
+#include "llvm/Transforms/Scalar/TailRecursionElimination.h"
+#include "llvm/Transforms/Scalar/WarnMissedTransforms.h"
+#include "llvm/Transforms/Utils/AddDiscriminators.h"
+#include "llvm/Transforms/Utils/BreakCriticalEdges.h"
+#include "llvm/Transforms/Utils/CanonicalizeAliases.h"
+#include "llvm/Transforms/Utils/EntryExitInstrumenter.h"
+#include "llvm/Transforms/Utils/LCSSA.h"
+#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h"
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/Transforms/Utils/LowerInvoke.h"
+#include "llvm/Transforms/Utils/Mem2Reg.h"
+#include "llvm/Transforms/Utils/NameAnonGlobals.h"
+#include "llvm/Transforms/Utils/SymbolRewriter.h"
+#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h"
+#include "llvm/Transforms/Vectorize/LoopVectorize.h"
+#include "llvm/Transforms/Vectorize/SLPVectorizer.h"
+
+using namespace llvm;
+
+static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations",
+                                             cl::ReallyHidden, cl::init(4));
+static cl::opt<bool>
+    RunPartialInlining("enable-npm-partial-inlining", cl::init(false),
+                       cl::Hidden, cl::ZeroOrMore,
+                       cl::desc("Run Partial inlinining pass"));
+
+static cl::opt<bool>
+    RunNewGVN("enable-npm-newgvn", cl::init(false),
+              cl::Hidden, cl::ZeroOrMore,
+              cl::desc("Run NewGVN instead of GVN"));
+
+static cl::opt<bool> EnableGVNHoist(
+    "enable-npm-gvn-hoist", cl::init(false), cl::Hidden,
+    cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
+
+static cl::opt<bool> EnableGVNSink(
+    "enable-npm-gvn-sink", cl::init(false), cl::Hidden,
+    cl::desc("Enable the GVN hoisting pass for the new PM (default = off)"));
+
+static cl::opt<bool> EnableUnrollAndJam(
+    "enable-npm-unroll-and-jam", cl::init(false), cl::Hidden,
+    cl::desc("Enable the Unroll and Jam pass for the new PM (default = off)"));
+
+static cl::opt<bool> EnableSyntheticCounts(
+    "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore,
+    cl::desc("Run synthetic function entry count generation "
+             "pass"));
+
+static Regex DefaultAliasRegex(
+    "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$");
+
+// This option is used in simplifying testing SampleFDO optimizations for
+// profile loading.
+static cl::opt<bool>
+    EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden,
+              cl::desc("Enable control height reduction optimization (CHR)"));
+
+PipelineTuningOptions::PipelineTuningOptions() {
+  LoopInterleaving = EnableLoopInterleaving;
+  LoopVectorization = EnableLoopVectorization;
+  SLPVectorization = RunSLPVectorization;
+  LoopUnrolling = true;
+  ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll;
+  LicmMssaOptCap = SetLicmMssaOptCap;
+  LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap;
+}
+
+extern cl::opt<bool> EnableHotColdSplit;
+extern cl::opt<bool> EnableOrderFileInstrumentation;
+
+extern cl::opt<bool> FlattenedProfileUsed;
+
+static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) {
+  switch (Level) {
+  case PassBuilder::O0:
+  case PassBuilder::O1:
+  case PassBuilder::O2:
+  case PassBuilder::O3:
+    return false;
+
+  case PassBuilder::Os:
+  case PassBuilder::Oz:
+    return true;
+  }
+  llvm_unreachable("Invalid optimization level!");
+}
+
+namespace {
+
+/// No-op module pass which does nothing.
+struct NoOpModulePass {
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &) {
+    return PreservedAnalyses::all();
+  }
+  static StringRef name() { return "NoOpModulePass"; }
+};
+
+/// No-op module analysis.
+class NoOpModuleAnalysis : public AnalysisInfoMixin<NoOpModuleAnalysis> {
+  friend AnalysisInfoMixin<NoOpModuleAnalysis>;
+  static AnalysisKey Key;
+
+public:
+  struct Result {};
+  Result run(Module &, ModuleAnalysisManager &) { return Result(); }
+  static StringRef name() { return "NoOpModuleAnalysis"; }
+};
+
+/// No-op CGSCC pass which does nothing.
+struct NoOpCGSCCPass {
+  PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &,
+                        LazyCallGraph &, CGSCCUpdateResult &UR) {
+    return PreservedAnalyses::all();
+  }
+  static StringRef name() { return "NoOpCGSCCPass"; }
+};
+
+/// No-op CGSCC analysis.
+class NoOpCGSCCAnalysis : public AnalysisInfoMixin<NoOpCGSCCAnalysis> {
+  friend AnalysisInfoMixin<NoOpCGSCCAnalysis>;
+  static AnalysisKey Key;
+
+public:
+  struct Result {};
+  Result run(LazyCallGraph::SCC &, CGSCCAnalysisManager &, LazyCallGraph &G) {
+    return Result();
+  }
+  static StringRef name() { return "NoOpCGSCCAnalysis"; }
+};
+
+/// No-op function pass which does nothing.
+struct NoOpFunctionPass {
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &) {
+    return PreservedAnalyses::all();
+  }
+  static StringRef name() { return "NoOpFunctionPass"; }
+};
+
+/// No-op function analysis.
+class NoOpFunctionAnalysis : public AnalysisInfoMixin<NoOpFunctionAnalysis> {
+  friend AnalysisInfoMixin<NoOpFunctionAnalysis>;
+  static AnalysisKey Key;
+
+public:
+  struct Result {};
+  Result run(Function &, FunctionAnalysisManager &) { return Result(); }
+  static StringRef name() { return "NoOpFunctionAnalysis"; }
+};
+
+/// No-op loop pass which does nothing.
+struct NoOpLoopPass {
+  PreservedAnalyses run(Loop &L, LoopAnalysisManager &,
+                        LoopStandardAnalysisResults &, LPMUpdater &) {
+    return PreservedAnalyses::all();
+  }
+  static StringRef name() { return "NoOpLoopPass"; }
+};
+
+/// No-op loop analysis.
+class NoOpLoopAnalysis : public AnalysisInfoMixin<NoOpLoopAnalysis> {
+  friend AnalysisInfoMixin<NoOpLoopAnalysis>;
+  static AnalysisKey Key;
+
+public:
+  struct Result {};
+  Result run(Loop &, LoopAnalysisManager &, LoopStandardAnalysisResults &) {
+    return Result();
+  }
+  static StringRef name() { return "NoOpLoopAnalysis"; }
+};
+
+AnalysisKey NoOpModuleAnalysis::Key;
+AnalysisKey NoOpCGSCCAnalysis::Key;
+AnalysisKey NoOpFunctionAnalysis::Key;
+AnalysisKey NoOpLoopAnalysis::Key;
+
+} // End anonymous namespace.
+
+void PassBuilder::invokePeepholeEPCallbacks(
+    FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) {
+  for (auto &C : PeepholeEPCallbacks)
+    C(FPM, Level);
+}
+
+void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) {
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
+  MAM.registerPass([&] { return CREATE_PASS; });
+#include "PassRegistry.def"
+
+  for (auto &C : ModuleAnalysisRegistrationCallbacks)
+    C(MAM);
+}
+
+void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) {
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
+  CGAM.registerPass([&] { return CREATE_PASS; });
+#include "PassRegistry.def"
+
+  for (auto &C : CGSCCAnalysisRegistrationCallbacks)
+    C(CGAM);
+}
+
+void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) {
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
+  FAM.registerPass([&] { return CREATE_PASS; });
+#include "PassRegistry.def"
+
+  for (auto &C : FunctionAnalysisRegistrationCallbacks)
+    C(FAM);
+}
+
+void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) {
+#define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
+  LAM.registerPass([&] { return CREATE_PASS; });
+#include "PassRegistry.def"
+
+  for (auto &C : LoopAnalysisRegistrationCallbacks)
+    C(LAM);
+}
+
+FunctionPassManager
+PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level,
+                                                 ThinLTOPhase Phase,
+                                                 bool DebugLogging) {
+  assert(Level != O0 && "Must request optimizations!");
+  FunctionPassManager FPM(DebugLogging);
+
+  // Form SSA out of local memory accesses after breaking apart aggregates into
+  // scalars.
+  FPM.addPass(SROA());
+
+  // Catch trivial redundancies
+  FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */));
+
+  // Hoisting of scalars and load expressions.
+  if (EnableGVNHoist)
+    FPM.addPass(GVNHoistPass());
+
+  // Global value numbering based sinking.
+  if (EnableGVNSink) {
+    FPM.addPass(GVNSinkPass());
+    FPM.addPass(SimplifyCFGPass());
+  }
+
+  // Speculative execution if the target has divergent branches; otherwise nop.
+  FPM.addPass(SpeculativeExecutionPass());
+
+  // Optimize based on known information about branches, and cleanup afterward.
+  FPM.addPass(JumpThreadingPass());
+  FPM.addPass(CorrelatedValuePropagationPass());
+  FPM.addPass(SimplifyCFGPass());
+  if (Level == O3)
+    FPM.addPass(AggressiveInstCombinePass());
+  FPM.addPass(InstCombinePass());
+
+  if (!isOptimizingForSize(Level))
+    FPM.addPass(LibCallsShrinkWrapPass());
+
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  // For PGO use pipeline, try to optimize memory intrinsics such as memcpy
+  // using the size value profile. Don't perform this when optimizing for size.
+  if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse &&
+      !isOptimizingForSize(Level))
+    FPM.addPass(PGOMemOPSizeOpt());
+
+  FPM.addPass(TailCallElimPass());
+  FPM.addPass(SimplifyCFGPass());
+
+  // Form canonically associated expression trees, and simplify the trees using
+  // basic mathematical properties. For example, this will form (nearly)
+  // minimal multiplication trees.
+  FPM.addPass(ReassociatePass());
+
+  // Add the primary loop simplification pipeline.
+  // FIXME: Currently this is split into two loop pass pipelines because we run
+  // some function passes in between them. These can and should be removed
+  // and/or replaced by scheduling the loop pass equivalents in the correct
+  // positions. But those equivalent passes aren't powerful enough yet.
+  // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still
+  // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to
+  // fully replace `SimplifyCFGPass`, and the closest to the other we have is
+  // `LoopInstSimplify`.
+  LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging);
+
+  // Simplify the loop body. We do this initially to clean up after other loop
+  // passes run, either when iterating on a loop or on inner loops with
+  // implications on the outer loop.
+  LPM1.addPass(LoopInstSimplifyPass());
+  LPM1.addPass(LoopSimplifyCFGPass());
+
+  // Rotate Loop - disable header duplication at -Oz
+  LPM1.addPass(LoopRotatePass(Level != Oz));
+  LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap));
+  LPM1.addPass(SimpleLoopUnswitchPass());
+  LPM2.addPass(IndVarSimplifyPass());
+  LPM2.addPass(LoopIdiomRecognizePass());
+
+  for (auto &C : LateLoopOptimizationsEPCallbacks)
+    C(LPM2, Level);
+
+  LPM2.addPass(LoopDeletionPass());
+  // Do not enable unrolling in PreLinkThinLTO phase during sample PGO
+  // because it changes IR to makes profile annotation in back compile
+  // inaccurate.
+  if ((Phase != ThinLTOPhase::PreLink || !PGOOpt ||
+       PGOOpt->Action != PGOOptions::SampleUse) &&
+      PTO.LoopUnrolling)
+    LPM2.addPass(
+        LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll));
+
+  for (auto &C : LoopOptimizerEndEPCallbacks)
+    C(LPM2, Level);
+
+  // We provide the opt remark emitter pass for LICM to use. We only need to do
+  // this once as it is immutable.
+  FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging));
+  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(InstCombinePass());
+  FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging));
+
+  // Eliminate redundancies.
+  if (Level != O1) {
+    // These passes add substantial compile time so skip them at O1.
+    FPM.addPass(MergedLoadStoreMotionPass());
+    if (RunNewGVN)
+      FPM.addPass(NewGVNPass());
+    else
+      FPM.addPass(GVN());
+  }
+
+  // Specially optimize memory movement as it doesn't look like dataflow in SSA.
+  FPM.addPass(MemCpyOptPass());
+
+  // Sparse conditional constant propagation.
+  // FIXME: It isn't clear why we do this *after* loop passes rather than
+  // before...
+  FPM.addPass(SCCPPass());
+
+  // Delete dead bit computations (instcombine runs after to fold away the dead
+  // computations, and then ADCE will run later to exploit any new DCE
+  // opportunities that creates).
+  FPM.addPass(BDCEPass());
+
+  // Run instcombine after redundancy and dead bit elimination to exploit
+  // opportunities opened up by them.
+  FPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  // Re-consider control flow based optimizations after redundancy elimination,
+  // redo DCE, etc.
+  FPM.addPass(JumpThreadingPass());
+  FPM.addPass(CorrelatedValuePropagationPass());
+  FPM.addPass(DSEPass());
+  FPM.addPass(createFunctionToLoopPassAdaptor(
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      DebugLogging));
+
+  for (auto &C : ScalarOptimizerLateEPCallbacks)
+    C(FPM, Level);
+
+  // Finally, do an expensive DCE pass to catch all the dead code exposed by
+  // the simplifications and basic cleanup after all the simplifications.
+  FPM.addPass(ADCEPass());
+  FPM.addPass(SimplifyCFGPass());
+  FPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  if (EnableCHR && Level == O3 && PGOOpt &&
+      (PGOOpt->Action == PGOOptions::IRUse ||
+       PGOOpt->Action == PGOOptions::SampleUse))
+    FPM.addPass(ControlHeightReductionPass());
+
+  return FPM;
+}
+
+void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging,
+                                    PassBuilder::OptimizationLevel Level,
+                                    bool RunProfileGen, bool IsCS,
+                                    std::string ProfileFile,
+                                    std::string ProfileRemappingFile) {
+  // Generally running simplification passes and the inliner with an high
+  // threshold results in smaller executables, but there may be cases where
+  // the size grows, so let's be conservative here and skip this simplification
+  // at -Os/Oz. We will not do this  inline for context sensistive PGO (when
+  // IsCS is true).
+  if (!isOptimizingForSize(Level) && !IsCS) {
+    InlineParams IP;
+
+    // In the old pass manager, this is a cl::opt. Should still this be one?
+    IP.DefaultThreshold = 75;
+
+    // FIXME: The hint threshold has the same value used by the regular inliner.
+    // This should probably be lowered after performance testing.
+    // FIXME: this comment is cargo culted from the old pass manager, revisit).
+    IP.HintThreshold = 325;
+
+    CGSCCPassManager CGPipeline(DebugLogging);
+
+    CGPipeline.addPass(InlinerPass(IP));
+
+    FunctionPassManager FPM;
+    FPM.addPass(SROA());
+    FPM.addPass(EarlyCSEPass());    // Catch trivial redundancies.
+    FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks.
+    FPM.addPass(InstCombinePass()); // Combine silly sequences.
+    invokePeepholeEPCallbacks(FPM, Level);
+
+    CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+
+    MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline)));
+  }
+
+  // Delete anything that is now dead to make sure that we don't instrument
+  // dead code. Instrumentation can end up keeping dead code around and
+  // dramatically increase code size.
+  MPM.addPass(GlobalDCEPass());
+
+  if (RunProfileGen) {
+    MPM.addPass(PGOInstrumentationGen(IsCS));
+
+    FunctionPassManager FPM;
+    FPM.addPass(
+        createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
+    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+
+    // Add the profile lowering pass.
+    InstrProfOptions Options;
+    if (!ProfileFile.empty())
+      Options.InstrProfileOutput = ProfileFile;
+    Options.DoCounterPromotion = true;
+    Options.UseBFIInPromotion = IsCS;
+    MPM.addPass(InstrProfiling(Options, IsCS));
+  } else if (!ProfileFile.empty()) {
+    MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+  }
+}
+
+static InlineParams
+getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) {
+  auto O3 = PassBuilder::O3;
+  unsigned OptLevel = Level > O3 ? 2 : Level;
+  unsigned SizeLevel = Level > O3 ? Level - O3 : 0;
+  return getInlineParams(OptLevel, SizeLevel);
+}
+
+ModulePassManager
+PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
+                                               ThinLTOPhase Phase,
+                                               bool DebugLogging) {
+  ModulePassManager MPM(DebugLogging);
+
+  bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse);
+
+  // In ThinLTO mode, when flattened profile is used, all the available
+  // profile information will be annotated in PreLink phase so there is
+  // no need to load the profile again in PostLink.
+  bool LoadSampleProfile =
+      HasSampleProfile &&
+      !(FlattenedProfileUsed && Phase == ThinLTOPhase::PostLink);
+
+  // During the ThinLTO backend phase we perform early indirect call promotion
+  // here, before globalopt. Otherwise imported available_externally functions
+  // look unreferenced and are removed. If we are going to load the sample
+  // profile then defer until later.
+  // TODO: See if we can move later and consolidate with the location where
+  // we perform ICP when we are loading a sample profile.
+  // TODO: We pass HasSampleProfile (whether there was a sample profile file
+  // passed to the compile) to the SamplePGO flag of ICP. This is used to
+  // determine whether the new direct calls are annotated with prof metadata.
+  // Ideally this should be determined from whether the IR is annotated with
+  // sample profile, and not whether the a sample profile was provided on the
+  // command line. E.g. for flattened profiles where we will not be reloading
+  // the sample profile in the ThinLTO backend, we ideally shouldn't have to
+  // provide the sample profile file.
+  if (Phase == ThinLTOPhase::PostLink && !LoadSampleProfile)
+    MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile));
+
+  // Do basic inference of function attributes from known properties of system
+  // libraries and other oracles.
+  MPM.addPass(InferFunctionAttrsPass());
+
+  // Create an early function pass manager to cleanup the output of the
+  // frontend.
+  FunctionPassManager EarlyFPM(DebugLogging);
+  EarlyFPM.addPass(SimplifyCFGPass());
+  EarlyFPM.addPass(SROA());
+  EarlyFPM.addPass(EarlyCSEPass());
+  EarlyFPM.addPass(LowerExpectIntrinsicPass());
+  if (Level == O3)
+    EarlyFPM.addPass(CallSiteSplittingPass());
+
+  // In SamplePGO ThinLTO backend, we need instcombine before profile annotation
+  // to convert bitcast to direct calls so that they can be inlined during the
+  // profile annotation prepration step.
+  // More details about SamplePGO design can be found in:
+  // https://research.google.com/pubs/pub45290.html
+  // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured.
+  if (LoadSampleProfile)
+    EarlyFPM.addPass(InstCombinePass());
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
+
+  if (LoadSampleProfile) {
+    // Annotate sample profile right after early FPM to ensure freshness of
+    // the debug info.
+    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
+                                        PGOOpt->ProfileRemappingFile,
+                                        Phase == ThinLTOPhase::PreLink));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+    // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard
+    // for the profile annotation to be accurate in the ThinLTO backend.
+    if (Phase != ThinLTOPhase::PreLink)
+      // We perform early indirect call promotion here, before globalopt.
+      // This is important for the ThinLTO backend phase because otherwise
+      // imported available_externally functions look unreferenced and are
+      // removed.
+      MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink,
+                                           true /* SamplePGO */));
+  }
+
+  // Interprocedural constant propagation now that basic cleanup has occurred
+  // and prior to optimizing globals.
+  // FIXME: This position in the pipeline hasn't been carefully considered in
+  // years, it should be re-analyzed.
+  MPM.addPass(IPSCCPPass());
+
+  // Attach metadata to indirect call sites indicating the set of functions
+  // they may target at run-time. This should follow IPSCCP.
+  MPM.addPass(CalledValuePropagationPass());
+
+  // Optimize globals to try and fold them into constants.
+  MPM.addPass(GlobalOptPass());
+
+  // Promote any localized globals to SSA registers.
+  // FIXME: Should this instead by a run of SROA?
+  // FIXME: We should probably run instcombine and simplify-cfg afterward to
+  // delete control flows that are dead once globals have been folded to
+  // constants.
+  MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+  // Remove any dead arguments exposed by cleanups and constand folding
+  // globals.
+  MPM.addPass(DeadArgumentEliminationPass());
+
+  // Create a small function pass pipeline to cleanup after all the global
+  // optimizations.
+  FunctionPassManager GlobalCleanupPM(DebugLogging);
+  GlobalCleanupPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(GlobalCleanupPM, Level);
+
+  GlobalCleanupPM.addPass(SimplifyCFGPass());
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM)));
+
+  // Add all the requested passes for instrumentation PGO, if requested.
+  if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
+      (PGOOpt->Action == PGOOptions::IRInstr ||
+       PGOOpt->Action == PGOOptions::IRUse)) {
+    addPGOInstrPasses(MPM, DebugLogging, Level,
+                      /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr,
+                      /* IsCS */ false, PGOOpt->ProfileFile,
+                      PGOOpt->ProfileRemappingFile);
+    MPM.addPass(PGOIndirectCallPromotion(false, false));
+  }
+  if (PGOOpt && Phase != ThinLTOPhase::PostLink &&
+      PGOOpt->CSAction == PGOOptions::CSIRInstr)
+    MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile));
+
+  // Synthesize function entry counts for non-PGO compilation.
+  if (EnableSyntheticCounts && !PGOOpt)
+    MPM.addPass(SyntheticCountsPropagation());
+
+  // Require the GlobalsAA analysis for the module so we can query it within
+  // the CGSCC pipeline.
+  MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+
+  // Require the ProfileSummaryAnalysis for the module so we can query it within
+  // the inliner pass.
+  MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+
+  // Now begin the main postorder CGSCC pipeline.
+  // FIXME: The current CGSCC pipeline has its origins in the legacy pass
+  // manager and trying to emulate its precise behavior. Much of this doesn't
+  // make a lot of sense and we should revisit the core CGSCC structure.
+  CGSCCPassManager MainCGPipeline(DebugLogging);
+
+  // Note: historically, the PruneEH pass was run first to deduce nounwind and
+  // generally clean up exception handling overhead. It isn't clear this is
+  // valuable as the inliner doesn't currently care whether it is inlining an
+  // invoke or a call.
+
+  // Run the inliner first. The theory is that we are walking bottom-up and so
+  // the callees have already been fully optimized, and we want to inline them
+  // into the callers so that our optimizations can reflect that.
+  // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO
+  // because it makes profile annotation in the backend inaccurate.
+  InlineParams IP = getInlineParamsFromOptLevel(Level);
+  if (Phase == ThinLTOPhase::PreLink && PGOOpt &&
+      PGOOpt->Action == PGOOptions::SampleUse)
+    IP.HotCallSiteThreshold = 0;
+  MainCGPipeline.addPass(InlinerPass(IP));
+
+  // Now deduce any function attributes based in the current code.
+  MainCGPipeline.addPass(PostOrderFunctionAttrsPass());
+
+  // When at O3 add argument promotion to the pass pipeline.
+  // FIXME: It isn't at all clear why this should be limited to O3.
+  if (Level == O3)
+    MainCGPipeline.addPass(ArgumentPromotionPass());
+
+  // Lastly, add the core function simplification pipeline nested inside the
+  // CGSCC walk.
+  MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(
+      buildFunctionSimplificationPipeline(Level, Phase, DebugLogging)));
+
+  for (auto &C : CGSCCOptimizerLateEPCallbacks)
+    C(MainCGPipeline, Level);
+
+  // We wrap the CGSCC pipeline in a devirtualization repeater. This will try
+  // to detect when we devirtualize indirect calls and iterate the SCC passes
+  // in that case to try and catch knock-on inlining or function attrs
+  // opportunities. Then we add it to the module pipeline by walking the SCCs
+  // in postorder (or bottom-up).
+  MPM.addPass(
+      createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass(
+          std::move(MainCGPipeline), MaxDevirtIterations)));
+
+  return MPM;
+}
+
+ModulePassManager PassBuilder::buildModuleOptimizationPipeline(
+    OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) {
+  ModulePassManager MPM(DebugLogging);
+
+  // Optimize globals now that the module is fully simplified.
+  MPM.addPass(GlobalOptPass());
+  MPM.addPass(GlobalDCEPass());
+
+  // Run partial inlining pass to partially inline functions that have
+  // large bodies.
+  if (RunPartialInlining)
+    MPM.addPass(PartialInlinerPass());
+
+  // Remove avail extern fns and globals definitions since we aren't compiling
+  // an object file for later LTO. For LTO we want to preserve these so they
+  // are eligible for inlining at link-time. Note if they are unreferenced they
+  // will be removed by GlobalDCE later, so this only impacts referenced
+  // available externally globals. Eventually they will be suppressed during
+  // codegen, but eliminating here enables more opportunity for GlobalDCE as it
+  // may make globals referenced by available external functions dead and saves
+  // running remaining passes on the eliminated functions. These should be
+  // preserved during prelinking for link-time inlining decisions.
+  if (!LTOPreLink)
+    MPM.addPass(EliminateAvailableExternallyPass());
+
+  if (EnableOrderFileInstrumentation)
+    MPM.addPass(InstrOrderFilePass());
+
+  // Do RPO function attribute inference across the module to forward-propagate
+  // attributes where applicable.
+  // FIXME: Is this really an optimization rather than a canonicalization?
+  MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+  // Do a post inline PGO instrumentation and use pass. This is a context
+  // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as
+  // cross-module inline has not been done yet. The context sensitive
+  // instrumentation is after all the inlines are done.
+  if (!LTOPreLink && PGOOpt) {
+    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+      addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true,
+                        /* IsCS */ true, PGOOpt->CSProfileGenFile,
+                        PGOOpt->ProfileRemappingFile);
+    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+      addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false,
+                        /* IsCS */ true, PGOOpt->ProfileFile,
+                        PGOOpt->ProfileRemappingFile);
+  }
+
+  // Re-require GloblasAA here prior to function passes. This is particularly
+  // useful as the above will have inlined, DCE'ed, and function-attr
+  // propagated everything. We should at this point have a reasonably minimal
+  // and richly annotated call graph. By computing aliasing and mod/ref
+  // information for all local globals here, the late loop passes and notably
+  // the vectorizer will be able to use them to help recognize vectorizable
+  // memory operations.
+  MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
+
+  FunctionPassManager OptimizePM(DebugLogging);
+  OptimizePM.addPass(Float2IntPass());
+  // FIXME: We need to run some loop optimizations to re-rotate loops after
+  // simplify-cfg and others undo their rotation.
+
+  // Optimize the loop execution. These passes operate on entire loop nests
+  // rather than on each loop in an inside-out manner, and so they are actually
+  // function passes.
+
+  for (auto &C : VectorizerStartEPCallbacks)
+    C(OptimizePM, Level);
+
+  // First rotate loops that may have been un-rotated by prior passes.
+  OptimizePM.addPass(
+      createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging));
+
+  // Distribute loops to allow partial vectorization.  I.e. isolate dependences
+  // into separate loop that would otherwise inhibit vectorization.  This is
+  // currently only performed for loops marked with the metadata
+  // llvm.loop.distribute=true or when -enable-loop-distribute is specified.
+  OptimizePM.addPass(LoopDistributePass());
+
+  // Now run the core loop vectorizer.
+  OptimizePM.addPass(LoopVectorizePass(
+      LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization)));
+
+  // Eliminate loads by forwarding stores from the previous iteration to loads
+  // of the current iteration.
+  OptimizePM.addPass(LoopLoadEliminationPass());
+
+  // Cleanup after the loop optimization passes.
+  OptimizePM.addPass(InstCombinePass());
+
+  // Now that we've formed fast to execute loop structures, we do further
+  // optimizations. These are run afterward as they might block doing complex
+  // analyses and transforms such as what are needed for loop vectorization.
+
+  // Cleanup after loop vectorization, etc. Simplification passes like CVP and
+  // GVN, loop transforms, and others have already run, so it's now better to
+  // convert to more optimized IR using more aggressive simplify CFG options.
+  // The extra sinking transform can create larger basic blocks, so do this
+  // before SLP vectorization.
+  OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
+                                     forwardSwitchCondToPhi(true).
+                                     convertSwitchToLookupTable(true).
+                                     needCanonicalLoops(false).
+                                     sinkCommonInsts(true)));
+
+  // Optimize parallel scalar instruction chains into SIMD instructions.
+  if (PTO.SLPVectorization)
+    OptimizePM.addPass(SLPVectorizerPass());
+
+  OptimizePM.addPass(InstCombinePass());
+
+  // Unroll small loops to hide loop backedge latency and saturate any parallel
+  // execution resources of an out-of-order processor. We also then need to
+  // clean up redundancies and loop invariant code.
+  // FIXME: It would be really good to use a loop-integrated instruction
+  // combiner for cleanup here so that the unrolling and LICM can be pipelined
+  // across the loop nests.
+  // We do UnrollAndJam in a separate LPM to ensure it happens before unroll
+  if (EnableUnrollAndJam) {
+    OptimizePM.addPass(
+        createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
+  }
+  if (PTO.LoopUnrolling)
+    OptimizePM.addPass(LoopUnrollPass(
+        LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll)));
+  OptimizePM.addPass(WarnMissedTransformationsPass());
+  OptimizePM.addPass(InstCombinePass());
+  OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
+  OptimizePM.addPass(createFunctionToLoopPassAdaptor(
+      LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap),
+      DebugLogging));
+
+  // Now that we've vectorized and unrolled loops, we may have more refined
+  // alignment information, try to re-derive it here.
+  OptimizePM.addPass(AlignmentFromAssumptionsPass());
+
+  // Split out cold code. Splitting is done late to avoid hiding context from
+  // other optimizations and inadvertently regressing performance. The tradeoff
+  // is that this has a higher code size cost than splitting early.
+  if (EnableHotColdSplit && !LTOPreLink)
+    MPM.addPass(HotColdSplittingPass());
+
+  // LoopSink pass sinks instructions hoisted by LICM, which serves as a
+  // canonicalization pass that enables other optimizations. As a result,
+  // LoopSink pass needs to be a very late IR pass to avoid undoing LICM
+  // result too early.
+  OptimizePM.addPass(LoopSinkPass());
+
+  // And finally clean up LCSSA form before generating code.
+  OptimizePM.addPass(InstSimplifyPass());
+
+  // This hoists/decomposes div/rem ops. It should run after other sink/hoist
+  // passes to avoid re-sinking, but before SimplifyCFG because it can allow
+  // flattening of blocks.
+  OptimizePM.addPass(DivRemPairsPass());
+
+  // LoopSink (and other loop passes since the last simplifyCFG) might have
+  // resulted in single-entry-single-exit or empty blocks. Clean up the CFG.
+  OptimizePM.addPass(SimplifyCFGPass());
+
+  // Optimize PHIs by speculating around them when profitable. Note that this
+  // pass needs to be run after any PRE or similar pass as it is essentially
+  // inserting redundancies into the program. This even includes SimplifyCFG.
+  OptimizePM.addPass(SpeculateAroundPHIsPass());
+
+  for (auto &C : OptimizerLastEPCallbacks)
+    C(OptimizePM, Level);
+
+  // Add the core optimizing pipeline.
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM)));
+
+  MPM.addPass(CGProfilePass());
+
+  // Now we need to do some global optimization transforms.
+  // FIXME: It would seem like these should come first in the optimization
+  // pipeline and maybe be the bottom of the canonicalization pipeline? Weird
+  // ordering here.
+  MPM.addPass(GlobalDCEPass());
+  MPM.addPass(ConstantMergePass());
+
+  return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level,
+                                           bool DebugLogging, bool LTOPreLink) {
+  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+
+  ModulePassManager MPM(DebugLogging);
+
+  // Force any function attributes we want the rest of the pipeline to observe.
+  MPM.addPass(ForceFunctionAttrsPass());
+
+  // Apply module pipeline start EP callback.
+  for (auto &C : PipelineStartEPCallbacks)
+    C(MPM);
+
+  if (PGOOpt && PGOOpt->SamplePGOSupport)
+    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+  // Add the core simplification pipeline.
+  MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None,
+                                                DebugLogging));
+
+  // Now add the optimization pipeline.
+  MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink));
+
+  return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level,
+                                                bool DebugLogging) {
+  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+
+  ModulePassManager MPM(DebugLogging);
+
+  // Force any function attributes we want the rest of the pipeline to observe.
+  MPM.addPass(ForceFunctionAttrsPass());
+
+  if (PGOOpt && PGOOpt->SamplePGOSupport)
+    MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass()));
+
+  // Apply module pipeline start EP callback.
+  for (auto &C : PipelineStartEPCallbacks)
+    C(MPM);
+
+  // If we are planning to perform ThinLTO later, we don't bloat the code with
+  // unrolling/vectorization/... now. Just simplify the module as much as we
+  // can.
+  MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PreLink,
+                                                DebugLogging));
+
+  // Run partial inlining pass to partially inline functions that have
+  // large bodies.
+  // FIXME: It isn't clear whether this is really the right place to run this
+  // in ThinLTO. Because there is another canonicalization and simplification
+  // phase that will run after the thin link, running this here ends up with
+  // less information than will be available later and it may grow functions in
+  // ways that aren't beneficial.
+  if (RunPartialInlining)
+    MPM.addPass(PartialInlinerPass());
+
+  // Reduce the size of the IR as much as possible.
+  MPM.addPass(GlobalOptPass());
+
+  return MPM;
+}
+
+ModulePassManager PassBuilder::buildThinLTODefaultPipeline(
+    OptimizationLevel Level, bool DebugLogging,
+    const ModuleSummaryIndex *ImportSummary) {
+  ModulePassManager MPM(DebugLogging);
+
+  if (ImportSummary) {
+    // These passes import type identifier resolutions for whole-program
+    // devirtualization and CFI. They must run early because other passes may
+    // disturb the specific instruction patterns that these passes look for,
+    // creating dependencies on resolutions that may not appear in the summary.
+    //
+    // For example, GVN may transform the pattern assume(type.test) appearing in
+    // two basic blocks into assume(phi(type.test, type.test)), which would
+    // transform a dependency on a WPD resolution into a dependency on a type
+    // identifier resolution for CFI.
+    //
+    // Also, WPD has access to more precise information than ICP and can
+    // devirtualize more effectively, so it should operate on the IR first.
+    //
+    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+    // metadata and intrinsics.
+    MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary));
+    MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary));
+  }
+
+  if (Level == O0)
+    return MPM;
+
+  // Force any function attributes we want the rest of the pipeline to observe.
+  MPM.addPass(ForceFunctionAttrsPass());
+
+  // Add the core simplification pipeline.
+  MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink,
+                                                DebugLogging));
+
+  // Now add the optimization pipeline.
+  MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging));
+
+  return MPM;
+}
+
+ModulePassManager
+PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level,
+                                            bool DebugLogging) {
+  assert(Level != O0 && "Must request optimizations for the default pipeline!");
+  // FIXME: We should use a customized pre-link pipeline!
+  return buildPerModuleDefaultPipeline(Level, DebugLogging,
+                                       /* LTOPreLink */true);
+}
+
+ModulePassManager
+PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
+                                     ModuleSummaryIndex *ExportSummary) {
+  ModulePassManager MPM(DebugLogging);
+
+  if (Level == O0) {
+    // The WPD and LowerTypeTest passes need to run at -O0 to lower type
+    // metadata and intrinsics.
+    MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+    return MPM;
+  }
+
+  if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) {
+    // Load sample profile before running the LTO optimization pipeline.
+    MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile,
+                                        PGOOpt->ProfileRemappingFile,
+                                        false /* ThinLTOPhase::PreLink */));
+    // Cache ProfileSummaryAnalysis once to avoid the potential need to insert
+    // RequireAnalysisPass for PSI before subsequent non-module passes.
+    MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>());
+  }
+
+  // Remove unused virtual tables to improve the quality of code generated by
+  // whole-program devirtualization and bitset lowering.
+  MPM.addPass(GlobalDCEPass());
+
+  // Force any function attributes we want the rest of the pipeline to observe.
+  MPM.addPass(ForceFunctionAttrsPass());
+
+  // Do basic inference of function attributes from known properties of system
+  // libraries and other oracles.
+  MPM.addPass(InferFunctionAttrsPass());
+
+  if (Level > 1) {
+    FunctionPassManager EarlyFPM(DebugLogging);
+    EarlyFPM.addPass(CallSiteSplittingPass());
+    MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM)));
+
+    // Indirect call promotion. This should promote all the targets that are
+    // left by the earlier promotion pass that promotes intra-module targets.
+    // This two-step promotion is to save the compile time. For LTO, it should
+    // produce the same result as if we only do promotion here.
+    MPM.addPass(PGOIndirectCallPromotion(
+        true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse));
+    // Propagate constants at call sites into the functions they call.  This
+    // opens opportunities for globalopt (and inlining) by substituting function
+    // pointers passed as arguments to direct uses of functions.
+   MPM.addPass(IPSCCPPass());
+
+   // Attach metadata to indirect call sites indicating the set of functions
+   // they may target at run-time. This should follow IPSCCP.
+   MPM.addPass(CalledValuePropagationPass());
+  }
+
+  // Now deduce any function attributes based in the current code.
+  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+              PostOrderFunctionAttrsPass()));
+
+  // Do RPO function attribute inference across the module to forward-propagate
+  // attributes where applicable.
+  // FIXME: Is this really an optimization rather than a canonicalization?
+  MPM.addPass(ReversePostOrderFunctionAttrsPass());
+
+  // Use in-range annotations on GEP indices to split globals where beneficial.
+  MPM.addPass(GlobalSplitPass());
+
+  // Run whole program optimization of virtual call when the list of callees
+  // is fixed.
+  MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
+
+  // Stop here at -O1.
+  if (Level == 1) {
+    // The LowerTypeTestsPass needs to run to lower type metadata and the
+    // type.test intrinsics. The pass does nothing if CFI is disabled.
+    MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+    return MPM;
+  }
+
+  // Optimize globals to try and fold them into constants.
+  MPM.addPass(GlobalOptPass());
+
+  // Promote any localized globals to SSA registers.
+  MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass()));
+
+  // Linking modules together can lead to duplicate global constant, only
+  // keep one copy of each constant.
+  MPM.addPass(ConstantMergePass());
+
+  // Remove unused arguments from functions.
+  MPM.addPass(DeadArgumentEliminationPass());
+
+  // Reduce the code after globalopt and ipsccp.  Both can open up significant
+  // simplification opportunities, and both can propagate functions through
+  // function pointers.  When this happens, we often have to resolve varargs
+  // calls, etc, so let instcombine do this.
+  FunctionPassManager PeepholeFPM(DebugLogging);
+  if (Level == O3)
+    PeepholeFPM.addPass(AggressiveInstCombinePass());
+  PeepholeFPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(PeepholeFPM, Level);
+
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM)));
+
+  // Note: historically, the PruneEH pass was run first to deduce nounwind and
+  // generally clean up exception handling overhead. It isn't clear this is
+  // valuable as the inliner doesn't currently care whether it is inlining an
+  // invoke or a call.
+  // Run the inliner now.
+  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+      InlinerPass(getInlineParamsFromOptLevel(Level))));
+
+  // Optimize globals again after we ran the inliner.
+  MPM.addPass(GlobalOptPass());
+
+  // Garbage collect dead functions.
+  // FIXME: Add ArgumentPromotion pass after once it's ported.
+  MPM.addPass(GlobalDCEPass());
+
+  FunctionPassManager FPM(DebugLogging);
+  // The IPO Passes may leave cruft around. Clean up after them.
+  FPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(FPM, Level);
+
+  FPM.addPass(JumpThreadingPass());
+
+  // Do a post inline PGO instrumentation and use pass. This is a context
+  // sensitive PGO pass.
+  if (PGOOpt) {
+    if (PGOOpt->CSAction == PGOOptions::CSIRInstr)
+      addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true,
+                        /* IsCS */ true, PGOOpt->CSProfileGenFile,
+                        PGOOpt->ProfileRemappingFile);
+    else if (PGOOpt->CSAction == PGOOptions::CSIRUse)
+      addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false,
+                        /* IsCS */ true, PGOOpt->ProfileFile,
+                        PGOOpt->ProfileRemappingFile);
+  }
+
+  // Break up allocas
+  FPM.addPass(SROA());
+
+  // LTO provides additional opportunities for tailcall elimination due to
+  // link-time inlining, and visibility of nocapture attribute.
+  FPM.addPass(TailCallElimPass());
+
+  // Run a few AA driver optimizations here and now to cleanup the code.
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+
+  MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(
+              PostOrderFunctionAttrsPass()));
+  // FIXME: here we run IP alias analysis in the legacy PM.
+
+  FunctionPassManager MainFPM;
+
+  // FIXME: once we fix LoopPass Manager, add LICM here.
+  // FIXME: once we provide support for enabling MLSM, add it here.
+  if (RunNewGVN)
+    MainFPM.addPass(NewGVNPass());
+  else
+    MainFPM.addPass(GVN());
+
+  // Remove dead memcpy()'s.
+  MainFPM.addPass(MemCpyOptPass());
+
+  // Nuke dead stores.
+  MainFPM.addPass(DSEPass());
+
+  // FIXME: at this point, we run a bunch of loop passes:
+  // indVarSimplify, loopDeletion, loopInterchange, loopUnroll,
+  // loopVectorize. Enable them once the remaining issue with LPM
+  // are sorted out.
+
+  MainFPM.addPass(InstCombinePass());
+  MainFPM.addPass(SimplifyCFGPass());
+  MainFPM.addPass(SCCPPass());
+  MainFPM.addPass(InstCombinePass());
+  MainFPM.addPass(BDCEPass());
+
+  // FIXME: We may want to run SLPVectorizer here.
+  // After vectorization, assume intrinsics may tell us more
+  // about pointer alignments.
+#if 0
+  MainFPM.add(AlignmentFromAssumptionsPass());
+#endif
+
+  // FIXME: Conditionally run LoadCombine here, after it's ported
+  // (in case we still have this pass, given its questionable usefulness).
+
+  MainFPM.addPass(InstCombinePass());
+  invokePeepholeEPCallbacks(MainFPM, Level);
+  MainFPM.addPass(JumpThreadingPass());
+  MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM)));
+
+  // Create a function that performs CFI checks for cross-DSO calls with
+  // targets in the current module.
+  MPM.addPass(CrossDSOCFIPass());
+
+  // Lower type metadata and the type.test intrinsic. This pass supports
+  // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs
+  // to be run at link time if CFI is enabled. This pass does nothing if
+  // CFI is disabled.
+  MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+
+  // Enable splitting late in the FullLTO post-link pipeline. This is done in
+  // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
+  if (EnableHotColdSplit)
+    MPM.addPass(HotColdSplittingPass());
+
+  // Add late LTO optimization passes.
+  // Delete basic blocks, which optimization passes may have killed.
+  MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass()));
+
+  // Drop bodies of available eternally objects to improve GlobalDCE.
+  MPM.addPass(EliminateAvailableExternallyPass());
+
+  // Now that we have optimized the program, discard unreachable functions.
+  MPM.addPass(GlobalDCEPass());
+
+  // FIXME: Maybe enable MergeFuncs conditionally after it's ported.
+  return MPM;
+}
+
+AAManager PassBuilder::buildDefaultAAPipeline() {
+  AAManager AA;
+
+  // The order in which these are registered determines their priority when
+  // being queried.
+
+  // First we register the basic alias analysis that provides the majority of
+  // per-function local AA logic. This is a stateless, on-demand local set of
+  // AA techniques.
+  AA.registerFunctionAnalysis<BasicAA>();
+
+  // Next we query fast, specialized alias analyses that wrap IR-embedded
+  // information about aliasing.
+  AA.registerFunctionAnalysis<ScopedNoAliasAA>();
+  AA.registerFunctionAnalysis<TypeBasedAA>();
+
+  // Add support for querying global aliasing information when available.
+  // Because the `AAManager` is a function analysis and `GlobalsAA` is a module
+  // analysis, all that the `AAManager` can do is query for any *cached*
+  // results from `GlobalsAA` through a readonly proxy.
+  AA.registerModuleAnalysis<GlobalsAA>();
+
+  return AA;
+}
+
+static Optional<int> parseRepeatPassName(StringRef Name) {
+  if (!Name.consume_front("repeat<") || !Name.consume_back(">"))
+    return None;
+  int Count;
+  if (Name.getAsInteger(0, Count) || Count <= 0)
+    return None;
+  return Count;
+}
+
+static Optional<int> parseDevirtPassName(StringRef Name) {
+  if (!Name.consume_front("devirt<") || !Name.consume_back(">"))
+    return None;
+  int Count;
+  if (Name.getAsInteger(0, Count) || Count <= 0)
+    return None;
+  return Count;
+}
+
+static bool checkParametrizedPassName(StringRef Name, StringRef PassName) {
+  if (!Name.consume_front(PassName))
+    return false;
+  // normal pass name w/o parameters == default parameters
+  if (Name.empty())
+    return true;
+  return Name.startswith("<") && Name.endswith(">");
+}
+
+namespace {
+
+/// This performs customized parsing of pass name with parameters.
+///
+/// We do not need parametrization of passes in textual pipeline very often,
+/// yet on a rare occasion ability to specify parameters right there can be
+/// useful.
+///
+/// \p Name - parameterized specification of a pass from a textual pipeline
+/// is a string in a form of :
+///      PassName '<' parameter-list '>'
+///
+/// Parameter list is being parsed by the parser callable argument, \p Parser,
+/// It takes a string-ref of parameters and returns either StringError or a
+/// parameter list in a form of a custom parameters type, all wrapped into
+/// Expected<> template class.
+///
+template <typename ParametersParseCallableT>
+auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name,
+                         StringRef PassName) -> decltype(Parser(StringRef{})) {
+  using ParametersT = typename decltype(Parser(StringRef{}))::value_type;
+
+  StringRef Params = Name;
+  if (!Params.consume_front(PassName)) {
+    assert(false &&
+           "unable to strip pass name from parametrized pass specification");
+  }
+  if (Params.empty())
+    return ParametersT{};
+  if (!Params.consume_front("<") || !Params.consume_back(">")) {
+    assert(false && "invalid format for parametrized pass name");
+  }
+
+  Expected<ParametersT> Result = Parser(Params);
+  assert((Result || Result.template errorIsA<StringError>()) &&
+         "Pass parameter parser can only return StringErrors.");
+  return std::move(Result);
+}
+
+/// Parser of parameters for LoopUnroll pass.
+Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
+  LoopUnrollOptions UnrollOpts;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+    int OptLevel = StringSwitch<int>(ParamName)
+                       .Case("O0", 0)
+                       .Case("O1", 1)
+                       .Case("O2", 2)
+                       .Case("O3", 3)
+                       .Default(-1);
+    if (OptLevel >= 0) {
+      UnrollOpts.setOptLevel(OptLevel);
+      continue;
+    }
+
+    bool Enable = !ParamName.consume_front("no-");
+    if (ParamName == "partial") {
+      UnrollOpts.setPartial(Enable);
+    } else if (ParamName == "peeling") {
+      UnrollOpts.setPeeling(Enable);
+    } else if (ParamName == "runtime") {
+      UnrollOpts.setRuntime(Enable);
+    } else if (ParamName == "upperbound") {
+      UnrollOpts.setUpperBound(Enable);
+    } else {
+      return make_error<StringError>(
+          formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return UnrollOpts;
+}
+
+Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) {
+  MemorySanitizerOptions Result;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    if (ParamName == "recover") {
+      Result.Recover = true;
+    } else if (ParamName == "kernel") {
+      Result.Kernel = true;
+    } else if (ParamName.consume_front("track-origins=")) {
+      if (ParamName.getAsInteger(0, Result.TrackOrigins))
+        return make_error<StringError>(
+            formatv("invalid argument to MemorySanitizer pass track-origins "
+                    "parameter: '{0}' ",
+                    ParamName)
+                .str(),
+            inconvertibleErrorCode());
+    } else {
+      return make_error<StringError>(
+          formatv("invalid MemorySanitizer pass parameter '{0}' ", ParamName)
+              .str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Result;
+}
+
+/// Parser of parameters for SimplifyCFG pass.
+Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) {
+  SimplifyCFGOptions Result;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    bool Enable = !ParamName.consume_front("no-");
+    if (ParamName == "forward-switch-cond") {
+      Result.forwardSwitchCondToPhi(Enable);
+    } else if (ParamName == "switch-to-lookup") {
+      Result.convertSwitchToLookupTable(Enable);
+    } else if (ParamName == "keep-loops") {
+      Result.needCanonicalLoops(Enable);
+    } else if (ParamName == "sink-common-insts") {
+      Result.sinkCommonInsts(Enable);
+    } else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) {
+      APInt BonusInstThreshold;
+      if (ParamName.getAsInteger(0, BonusInstThreshold))
+        return make_error<StringError>(
+            formatv("invalid argument to SimplifyCFG pass bonus-threshold "
+                    "parameter: '{0}' ",
+                    ParamName).str(),
+            inconvertibleErrorCode());
+      Result.bonusInstThreshold(BonusInstThreshold.getSExtValue());
+    } else {
+      return make_error<StringError>(
+          formatv("invalid SimplifyCFG pass parameter '{0}' ", ParamName).str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Result;
+}
+
+/// Parser of parameters for LoopVectorize pass.
+Expected<LoopVectorizeOptions> parseLoopVectorizeOptions(StringRef Params) {
+  LoopVectorizeOptions Opts;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    bool Enable = !ParamName.consume_front("no-");
+    if (ParamName == "interleave-forced-only") {
+      Opts.setInterleaveOnlyWhenForced(Enable);
+    } else if (ParamName == "vectorize-forced-only") {
+      Opts.setVectorizeOnlyWhenForced(Enable);
+    } else {
+      return make_error<StringError>(
+          formatv("invalid LoopVectorize parameter '{0}' ", ParamName).str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Opts;
+}
+
+Expected<bool> parseLoopUnswitchOptions(StringRef Params) {
+  bool Result = false;
+  while (!Params.empty()) {
+    StringRef ParamName;
+    std::tie(ParamName, Params) = Params.split(';');
+
+    bool Enable = !ParamName.consume_front("no-");
+    if (ParamName == "nontrivial") {
+      Result = Enable;
+    } else {
+      return make_error<StringError>(
+          formatv("invalid LoopUnswitch pass parameter '{0}' ", ParamName)
+              .str(),
+          inconvertibleErrorCode());
+    }
+  }
+  return Result;
+}
+} // namespace
+
+/// Tests whether a pass name starts with a valid prefix for a default pipeline
+/// alias.
+static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) {
+  return Name.startswith("default") || Name.startswith("thinlto") ||
+         Name.startswith("lto");
+}
+
+/// Tests whether registered callbacks will accept a given pass name.
+///
+/// When parsing a pipeline text, the type of the outermost pipeline may be
+/// omitted, in which case the type is automatically determined from the first
+/// pass name in the text. This may be a name that is handled through one of the
+/// callbacks. We check this through the oridinary parsing callbacks by setting
+/// up a dummy PassManager in order to not force the client to also handle this
+/// type of query.
+template <typename PassManagerT, typename CallbacksT>
+static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) {
+  if (!Callbacks.empty()) {
+    PassManagerT DummyPM;
+    for (auto &CB : Callbacks)
+      if (CB(Name, DummyPM, {}))
+        return true;
+  }
+  return false;
+}
+
+template <typename CallbacksT>
+static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) {
+  // Manually handle aliases for pre-configured pipeline fragments.
+  if (startsWithDefaultPipelineAliasPrefix(Name))
+    return DefaultAliasRegex.match(Name);
+
+  // Explicitly handle pass manager names.
+  if (Name == "module")
+    return true;
+  if (Name == "cgscc")
+    return true;
+  if (Name == "function")
+    return true;
+
+  // Explicitly handle custom-parsed pass names.
+  if (parseRepeatPassName(Name))
+    return true;
+
+#define MODULE_PASS(NAME, CREATE_PASS)                                         \
+  if (Name == NAME)                                                            \
+    return true;
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
+  if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
+    return true;
+#include "PassRegistry.def"
+
+  return callbacksAcceptPassName<ModulePassManager>(Name, Callbacks);
+}
+
+template <typename CallbacksT>
+static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) {
+  // Explicitly handle pass manager names.
+  if (Name == "cgscc")
+    return true;
+  if (Name == "function")
+    return true;
+
+  // Explicitly handle custom-parsed pass names.
+  if (parseRepeatPassName(Name))
+    return true;
+  if (parseDevirtPassName(Name))
+    return true;
+
+#define CGSCC_PASS(NAME, CREATE_PASS)                                          \
+  if (Name == NAME)                                                            \
+    return true;
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
+  if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
+    return true;
+#include "PassRegistry.def"
+
+  return callbacksAcceptPassName<CGSCCPassManager>(Name, Callbacks);
+}
+
+template <typename CallbacksT>
+static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) {
+  // Explicitly handle pass manager names.
+  if (Name == "function")
+    return true;
+  if (Name == "loop")
+    return true;
+
+  // Explicitly handle custom-parsed pass names.
+  if (parseRepeatPassName(Name))
+    return true;
+
+#define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
+  if (Name == NAME)                                                            \
+    return true;
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                   \
+  if (checkParametrizedPassName(Name, NAME))                                   \
+    return true;
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
+  if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
+    return true;
+#include "PassRegistry.def"
+
+  return callbacksAcceptPassName<FunctionPassManager>(Name, Callbacks);
+}
+
+template <typename CallbacksT>
+static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) {
+  // Explicitly handle pass manager names.
+  if (Name == "loop")
+    return true;
+
+  // Explicitly handle custom-parsed pass names.
+  if (parseRepeatPassName(Name))
+    return true;
+
+#define LOOP_PASS(NAME, CREATE_PASS)                                           \
+  if (Name == NAME)                                                            \
+    return true;
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
+  if (checkParametrizedPassName(Name, NAME))                                   \
+    return true;
+#define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
+  if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">")           \
+    return true;
+#include "PassRegistry.def"
+
+  return callbacksAcceptPassName<LoopPassManager>(Name, Callbacks);
+}
+
+Optional<std::vector<PassBuilder::PipelineElement>>
+PassBuilder::parsePipelineText(StringRef Text) {
+  std::vector<PipelineElement> ResultPipeline;
+
+  SmallVector<std::vector<PipelineElement> *, 4> PipelineStack = {
+      &ResultPipeline};
+  for (;;) {
+    std::vector<PipelineElement> &Pipeline = *PipelineStack.back();
+    size_t Pos = Text.find_first_of(",()");
+    Pipeline.push_back({Text.substr(0, Pos), {}});
+
+    // If we have a single terminating name, we're done.
+    if (Pos == Text.npos)
+      break;
+
+    char Sep = Text[Pos];
+    Text = Text.substr(Pos + 1);
+    if (Sep == ',')
+      // Just a name ending in a comma, continue.
+      continue;
+
+    if (Sep == '(') {
+      // Push the inner pipeline onto the stack to continue processing.
+      PipelineStack.push_back(&Pipeline.back().InnerPipeline);
+      continue;
+    }
+
+    assert(Sep == ')' && "Bogus separator!");
+    // When handling the close parenthesis, we greedily consume them to avoid
+    // empty strings in the pipeline.
+    do {
+      // If we try to pop the outer pipeline we have unbalanced parentheses.
+      if (PipelineStack.size() == 1)
+        return None;
+
+      PipelineStack.pop_back();
+    } while (Text.consume_front(")"));
+
+    // Check if we've finished parsing.
+    if (Text.empty())
+      break;
+
+    // Otherwise, the end of an inner pipeline always has to be followed by
+    // a comma, and then we can continue.
+    if (!Text.consume_front(","))
+      return None;
+  }
+
+  if (PipelineStack.size() > 1)
+    // Unbalanced paretheses.
+    return None;
+
+  assert(PipelineStack.back() == &ResultPipeline &&
+         "Wrong pipeline at the bottom of the stack!");
+  return {std::move(ResultPipeline)};
+}
+
+Error PassBuilder::parseModulePass(ModulePassManager &MPM,
+                                   const PipelineElement &E,
+                                   bool VerifyEachPass, bool DebugLogging) {
+  auto &Name = E.Name;
+  auto &InnerPipeline = E.InnerPipeline;
+
+  // First handle complex passes like the pass managers which carry pipelines.
+  if (!InnerPipeline.empty()) {
+    if (Name == "module") {
+      ModulePassManager NestedMPM(DebugLogging);
+      if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline,
+                                             VerifyEachPass, DebugLogging))
+        return Err;
+      MPM.addPass(std::move(NestedMPM));
+      return Error::success();
+    }
+    if (Name == "cgscc") {
+      CGSCCPassManager CGPM(DebugLogging);
+      if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass,
+                                            DebugLogging))
+        return Err;
+      MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM)));
+      return Error::success();
+    }
+    if (Name == "function") {
+      FunctionPassManager FPM(DebugLogging);
+      if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline,
+                                               VerifyEachPass, DebugLogging))
+        return Err;
+      MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM)));
+      return Error::success();
+    }
+    if (auto Count = parseRepeatPassName(Name)) {
+      ModulePassManager NestedMPM(DebugLogging);
+      if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline,
+                                             VerifyEachPass, DebugLogging))
+        return Err;
+      MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM)));
+      return Error::success();
+    }
+
+    for (auto &C : ModulePipelineParsingCallbacks)
+      if (C(Name, MPM, InnerPipeline))
+        return Error::success();
+
+    // Normal passes can't have pipelines.
+    return make_error<StringError>(
+        formatv("invalid use of '{0}' pass as module pipeline", Name).str(),
+        inconvertibleErrorCode());
+    ;
+  }
+
+  // Manually handle aliases for pre-configured pipeline fragments.
+  if (startsWithDefaultPipelineAliasPrefix(Name)) {
+    SmallVector<StringRef, 3> Matches;
+    if (!DefaultAliasRegex.match(Name, &Matches))
+      return make_error<StringError>(
+          formatv("unknown default pipeline alias '{0}'", Name).str(),
+          inconvertibleErrorCode());
+
+    assert(Matches.size() == 3 && "Must capture two matched strings!");
+
+    OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2])
+                              .Case("O0", O0)
+                              .Case("O1", O1)
+                              .Case("O2", O2)
+                              .Case("O3", O3)
+                              .Case("Os", Os)
+                              .Case("Oz", Oz);
+    if (L == O0)
+      // At O0 we do nothing at all!
+      return Error::success();
+
+    if (Matches[1] == "default") {
+      MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging));
+    } else if (Matches[1] == "thinlto-pre-link") {
+      MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L, DebugLogging));
+    } else if (Matches[1] == "thinlto") {
+      MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging, nullptr));
+    } else if (Matches[1] == "lto-pre-link") {
+      MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging));
+    } else {
+      assert(Matches[1] == "lto" && "Not one of the matched options!");
+      MPM.addPass(buildLTODefaultPipeline(L, DebugLogging, nullptr));
+    }
+    return Error::success();
+  }
+
+  // Finally expand the basic registered passes from the .inc file.
+#define MODULE_PASS(NAME, CREATE_PASS)                                         \
+  if (Name == NAME) {                                                          \
+    MPM.addPass(CREATE_PASS);                                                  \
+    return Error::success();                                                   \
+  }
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
+  if (Name == "require<" NAME ">") {                                           \
+    MPM.addPass(                                                               \
+        RequireAnalysisPass<                                                   \
+            std::remove_reference<decltype(CREATE_PASS)>::type, Module>());    \
+    return Error::success();                                                   \
+  }                                                                            \
+  if (Name == "invalidate<" NAME ">") {                                        \
+    MPM.addPass(InvalidateAnalysisPass<                                        \
+                std::remove_reference<decltype(CREATE_PASS)>::type>());        \
+    return Error::success();                                                   \
+  }
+#include "PassRegistry.def"
+
+  for (auto &C : ModulePipelineParsingCallbacks)
+    if (C(Name, MPM, InnerPipeline))
+      return Error::success();
+  return make_error<StringError>(
+      formatv("unknown module pass '{0}'", Name).str(),
+      inconvertibleErrorCode());
+}
+
+Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
+                                  const PipelineElement &E, bool VerifyEachPass,
+                                  bool DebugLogging) {
+  auto &Name = E.Name;
+  auto &InnerPipeline = E.InnerPipeline;
+
+  // First handle complex passes like the pass managers which carry pipelines.
+  if (!InnerPipeline.empty()) {
+    if (Name == "cgscc") {
+      CGSCCPassManager NestedCGPM(DebugLogging);
+      if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
+                                            VerifyEachPass, DebugLogging))
+        return Err;
+      // Add the nested pass manager with the appropriate adaptor.
+      CGPM.addPass(std::move(NestedCGPM));
+      return Error::success();
+    }
+    if (Name == "function") {
+      FunctionPassManager FPM(DebugLogging);
+      if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline,
+                                               VerifyEachPass, DebugLogging))
+        return Err;
+      // Add the nested pass manager with the appropriate adaptor.
+      CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
+      return Error::success();
+    }
+    if (auto Count = parseRepeatPassName(Name)) {
+      CGSCCPassManager NestedCGPM(DebugLogging);
+      if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
+                                            VerifyEachPass, DebugLogging))
+        return Err;
+      CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM)));
+      return Error::success();
+    }
+    if (auto MaxRepetitions = parseDevirtPassName(Name)) {
+      CGSCCPassManager NestedCGPM(DebugLogging);
+      if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline,
+                                            VerifyEachPass, DebugLogging))
+        return Err;
+      CGPM.addPass(
+          createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions));
+      return Error::success();
+    }
+
+    for (auto &C : CGSCCPipelineParsingCallbacks)
+      if (C(Name, CGPM, InnerPipeline))
+        return Error::success();
+
+    // Normal passes can't have pipelines.
+    return make_error<StringError>(
+        formatv("invalid use of '{0}' pass as cgscc pipeline", Name).str(),
+        inconvertibleErrorCode());
+  }
+
+// Now expand the basic registered passes from the .inc file.
+#define CGSCC_PASS(NAME, CREATE_PASS)                                          \
+  if (Name == NAME) {                                                          \
+    CGPM.addPass(CREATE_PASS);                                                 \
+    return Error::success();                                                   \
+  }
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS)                                      \
+  if (Name == "require<" NAME ">") {                                           \
+    CGPM.addPass(RequireAnalysisPass<                                          \
+                 std::remove_reference<decltype(CREATE_PASS)>::type,           \
+                 LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &,    \
+                 CGSCCUpdateResult &>());                                      \
+    return Error::success();                                                   \
+  }                                                                            \
+  if (Name == "invalidate<" NAME ">") {                                        \
+    CGPM.addPass(InvalidateAnalysisPass<                                       \
+                 std::remove_reference<decltype(CREATE_PASS)>::type>());       \
+    return Error::success();                                                   \
+  }
+#include "PassRegistry.def"
+
+  for (auto &C : CGSCCPipelineParsingCallbacks)
+    if (C(Name, CGPM, InnerPipeline))
+      return Error::success();
+  return make_error<StringError>(
+      formatv("unknown cgscc pass '{0}'", Name).str(),
+      inconvertibleErrorCode());
+}
+
+Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
+                                     const PipelineElement &E,
+                                     bool VerifyEachPass, bool DebugLogging) {
+  auto &Name = E.Name;
+  auto &InnerPipeline = E.InnerPipeline;
+
+  // First handle complex passes like the pass managers which carry pipelines.
+  if (!InnerPipeline.empty()) {
+    if (Name == "function") {
+      FunctionPassManager NestedFPM(DebugLogging);
+      if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline,
+                                               VerifyEachPass, DebugLogging))
+        return Err;
+      // Add the nested pass manager with the appropriate adaptor.
+      FPM.addPass(std::move(NestedFPM));
+      return Error::success();
+    }
+    if (Name == "loop") {
+      LoopPassManager LPM(DebugLogging);
+      if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass,
+                                           DebugLogging))
+        return Err;
+      // Add the nested pass manager with the appropriate adaptor.
+      FPM.addPass(
+          createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging));
+      return Error::success();
+    }
+    if (auto Count = parseRepeatPassName(Name)) {
+      FunctionPassManager NestedFPM(DebugLogging);
+      if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline,
+                                               VerifyEachPass, DebugLogging))
+        return Err;
+      FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM)));
+      return Error::success();
+    }
+
+    for (auto &C : FunctionPipelineParsingCallbacks)
+      if (C(Name, FPM, InnerPipeline))
+        return Error::success();
+
+    // Normal passes can't have pipelines.
+    return make_error<StringError>(
+        formatv("invalid use of '{0}' pass as function pipeline", Name).str(),
+        inconvertibleErrorCode());
+  }
+
+// Now expand the basic registered passes from the .inc file.
+#define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
+  if (Name == NAME) {                                                          \
+    FPM.addPass(CREATE_PASS);                                                  \
+    return Error::success();                                                   \
+  }
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                   \
+  if (checkParametrizedPassName(Name, NAME)) {                                 \
+    auto Params = parsePassParameters(PARSER, Name, NAME);                     \
+    if (!Params)                                                               \
+      return Params.takeError();                                               \
+    FPM.addPass(CREATE_PASS(Params.get()));                                    \
+    return Error::success();                                                   \
+  }
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
+  if (Name == "require<" NAME ">") {                                           \
+    FPM.addPass(                                                               \
+        RequireAnalysisPass<                                                   \
+            std::remove_reference<decltype(CREATE_PASS)>::type, Function>());  \
+    return Error::success();                                                   \
+  }                                                                            \
+  if (Name == "invalidate<" NAME ">") {                                        \
+    FPM.addPass(InvalidateAnalysisPass<                                        \
+                std::remove_reference<decltype(CREATE_PASS)>::type>());        \
+    return Error::success();                                                   \
+  }
+#include "PassRegistry.def"
+
+  for (auto &C : FunctionPipelineParsingCallbacks)
+    if (C(Name, FPM, InnerPipeline))
+      return Error::success();
+  return make_error<StringError>(
+      formatv("unknown function pass '{0}'", Name).str(),
+      inconvertibleErrorCode());
+}
+
+Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E,
+                                 bool VerifyEachPass, bool DebugLogging) {
+  StringRef Name = E.Name;
+  auto &InnerPipeline = E.InnerPipeline;
+
+  // First handle complex passes like the pass managers which carry pipelines.
+  if (!InnerPipeline.empty()) {
+    if (Name == "loop") {
+      LoopPassManager NestedLPM(DebugLogging);
+      if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline,
+                                           VerifyEachPass, DebugLogging))
+        return Err;
+      // Add the nested pass manager with the appropriate adaptor.
+      LPM.addPass(std::move(NestedLPM));
+      return Error::success();
+    }
+    if (auto Count = parseRepeatPassName(Name)) {
+      LoopPassManager NestedLPM(DebugLogging);
+      if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline,
+                                           VerifyEachPass, DebugLogging))
+        return Err;
+      LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM)));
+      return Error::success();
+    }
+
+    for (auto &C : LoopPipelineParsingCallbacks)
+      if (C(Name, LPM, InnerPipeline))
+        return Error::success();
+
+    // Normal passes can't have pipelines.
+    return make_error<StringError>(
+        formatv("invalid use of '{0}' pass as loop pipeline", Name).str(),
+        inconvertibleErrorCode());
+  }
+
+// Now expand the basic registered passes from the .inc file.
+#define LOOP_PASS(NAME, CREATE_PASS)                                           \
+  if (Name == NAME) {                                                          \
+    LPM.addPass(CREATE_PASS);                                                  \
+    return Error::success();                                                   \
+  }
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
+  if (checkParametrizedPassName(Name, NAME)) {                                 \
+    auto Params = parsePassParameters(PARSER, Name, NAME);                     \
+    if (!Params)                                                               \
+      return Params.takeError();                                               \
+    LPM.addPass(CREATE_PASS(Params.get()));                                    \
+    return Error::success();                                                   \
+  }
+#define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
+  if (Name == "require<" NAME ">") {                                           \
+    LPM.addPass(RequireAnalysisPass<                                           \
+                std::remove_reference<decltype(CREATE_PASS)>::type, Loop,      \
+                LoopAnalysisManager, LoopStandardAnalysisResults &,            \
+                LPMUpdater &>());                                              \
+    return Error::success();                                                   \
+  }                                                                            \
+  if (Name == "invalidate<" NAME ">") {                                        \
+    LPM.addPass(InvalidateAnalysisPass<                                        \
+                std::remove_reference<decltype(CREATE_PASS)>::type>());        \
+    return Error::success();                                                   \
+  }
+#include "PassRegistry.def"
+
+  for (auto &C : LoopPipelineParsingCallbacks)
+    if (C(Name, LPM, InnerPipeline))
+      return Error::success();
+  return make_error<StringError>(formatv("unknown loop pass '{0}'", Name).str(),
+                                 inconvertibleErrorCode());
+}
+
+bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) {
+#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS)                               \
+  if (Name == NAME) {                                                          \
+    AA.registerModuleAnalysis<                                                 \
+        std::remove_reference<decltype(CREATE_PASS)>::type>();                 \
+    return true;                                                               \
+  }
+#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS)                             \
+  if (Name == NAME) {                                                          \
+    AA.registerFunctionAnalysis<                                               \
+        std::remove_reference<decltype(CREATE_PASS)>::type>();                 \
+    return true;                                                               \
+  }
+#include "PassRegistry.def"
+
+  for (auto &C : AAParsingCallbacks)
+    if (C(Name, AA))
+      return true;
+  return false;
+}
+
+Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM,
+                                         ArrayRef<PipelineElement> Pipeline,
+                                         bool VerifyEachPass,
+                                         bool DebugLogging) {
+  for (const auto &Element : Pipeline) {
+    if (auto Err = parseLoopPass(LPM, Element, VerifyEachPass, DebugLogging))
+      return Err;
+    // FIXME: No verifier support for Loop passes!
+  }
+  return Error::success();
+}
+
+Error PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM,
+                                             ArrayRef<PipelineElement> Pipeline,
+                                             bool VerifyEachPass,
+                                             bool DebugLogging) {
+  for (const auto &Element : Pipeline) {
+    if (auto Err =
+            parseFunctionPass(FPM, Element, VerifyEachPass, DebugLogging))
+      return Err;
+    if (VerifyEachPass)
+      FPM.addPass(VerifierPass());
+  }
+  return Error::success();
+}
+
+Error PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM,
+                                          ArrayRef<PipelineElement> Pipeline,
+                                          bool VerifyEachPass,
+                                          bool DebugLogging) {
+  for (const auto &Element : Pipeline) {
+    if (auto Err = parseCGSCCPass(CGPM, Element, VerifyEachPass, DebugLogging))
+      return Err;
+    // FIXME: No verifier support for CGSCC passes!
+  }
+  return Error::success();
+}
+
+void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM,
+                                       FunctionAnalysisManager &FAM,
+                                       CGSCCAnalysisManager &CGAM,
+                                       ModuleAnalysisManager &MAM) {
+  MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); });
+  MAM.registerPass([&] { return CGSCCAnalysisManagerModuleProxy(CGAM); });
+  CGAM.registerPass([&] { return ModuleAnalysisManagerCGSCCProxy(MAM); });
+  FAM.registerPass([&] { return CGSCCAnalysisManagerFunctionProxy(CGAM); });
+  FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); });
+  FAM.registerPass([&] { return LoopAnalysisManagerFunctionProxy(LAM); });
+  LAM.registerPass([&] { return FunctionAnalysisManagerLoopProxy(FAM); });
+}
+
+Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM,
+                                           ArrayRef<PipelineElement> Pipeline,
+                                           bool VerifyEachPass,
+                                           bool DebugLogging) {
+  for (const auto &Element : Pipeline) {
+    if (auto Err = parseModulePass(MPM, Element, VerifyEachPass, DebugLogging))
+      return Err;
+    if (VerifyEachPass)
+      MPM.addPass(VerifierPass());
+  }
+  return Error::success();
+}
+
+// Primary pass pipeline description parsing routine for a \c ModulePassManager
+// FIXME: Should this routine accept a TargetMachine or require the caller to
+// pre-populate the analysis managers with target-specific stuff?
+Error PassBuilder::parsePassPipeline(ModulePassManager &MPM,
+                                     StringRef PipelineText,
+                                     bool VerifyEachPass, bool DebugLogging) {
+  auto Pipeline = parsePipelineText(PipelineText);
+  if (!Pipeline || Pipeline->empty())
+    return make_error<StringError>(
+        formatv("invalid pipeline '{0}'", PipelineText).str(),
+        inconvertibleErrorCode());
+
+  // If the first name isn't at the module layer, wrap the pipeline up
+  // automatically.
+  StringRef FirstName = Pipeline->front().Name;
+
+  if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) {
+    if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) {
+      Pipeline = {{"cgscc", std::move(*Pipeline)}};
+    } else if (isFunctionPassName(FirstName,
+                                  FunctionPipelineParsingCallbacks)) {
+      Pipeline = {{"function", std::move(*Pipeline)}};
+    } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) {
+      Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}};
+    } else {
+      for (auto &C : TopLevelPipelineParsingCallbacks)
+        if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging))
+          return Error::success();
+
+      // Unknown pass or pipeline name!
+      auto &InnerPipeline = Pipeline->front().InnerPipeline;
+      return make_error<StringError>(
+          formatv("unknown {0} name '{1}'",
+                  (InnerPipeline.empty() ? "pass" : "pipeline"), FirstName)
+              .str(),
+          inconvertibleErrorCode());
+    }
+  }
+
+  if (auto Err =
+          parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging))
+    return Err;
+  return Error::success();
+}
+
+// Primary pass pipeline description parsing routine for a \c CGSCCPassManager
+Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM,
+                                     StringRef PipelineText,
+                                     bool VerifyEachPass, bool DebugLogging) {
+  auto Pipeline = parsePipelineText(PipelineText);
+  if (!Pipeline || Pipeline->empty())
+    return make_error<StringError>(
+        formatv("invalid pipeline '{0}'", PipelineText).str(),
+        inconvertibleErrorCode());
+
+  StringRef FirstName = Pipeline->front().Name;
+  if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks))
+    return make_error<StringError>(
+        formatv("unknown cgscc pass '{0}' in pipeline '{1}'", FirstName,
+                PipelineText)
+            .str(),
+        inconvertibleErrorCode());
+
+  if (auto Err =
+          parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging))
+    return Err;
+  return Error::success();
+}
+
+// Primary pass pipeline description parsing routine for a \c
+// FunctionPassManager
+Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM,
+                                     StringRef PipelineText,
+                                     bool VerifyEachPass, bool DebugLogging) {
+  auto Pipeline = parsePipelineText(PipelineText);
+  if (!Pipeline || Pipeline->empty())
+    return make_error<StringError>(
+        formatv("invalid pipeline '{0}'", PipelineText).str(),
+        inconvertibleErrorCode());
+
+  StringRef FirstName = Pipeline->front().Name;
+  if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks))
+    return make_error<StringError>(
+        formatv("unknown function pass '{0}' in pipeline '{1}'", FirstName,
+                PipelineText)
+            .str(),
+        inconvertibleErrorCode());
+
+  if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass,
+                                           DebugLogging))
+    return Err;
+  return Error::success();
+}
+
+// Primary pass pipeline description parsing routine for a \c LoopPassManager
+Error PassBuilder::parsePassPipeline(LoopPassManager &CGPM,
+                                     StringRef PipelineText,
+                                     bool VerifyEachPass, bool DebugLogging) {
+  auto Pipeline = parsePipelineText(PipelineText);
+  if (!Pipeline || Pipeline->empty())
+    return make_error<StringError>(
+        formatv("invalid pipeline '{0}'", PipelineText).str(),
+        inconvertibleErrorCode());
+
+  if (auto Err =
+          parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging))
+    return Err;
+
+  return Error::success();
+}
+
+Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) {
+  // If the pipeline just consists of the word 'default' just replace the AA
+  // manager with our default one.
+  if (PipelineText == "default") {
+    AA = buildDefaultAAPipeline();
+    return Error::success();
+  }
+
+  while (!PipelineText.empty()) {
+    StringRef Name;
+    std::tie(Name, PipelineText) = PipelineText.split(',');
+    if (!parseAAPassName(AA, Name))
+      return make_error<StringError>(
+          formatv("unknown alias analysis name '{0}'", Name).str(),
+          inconvertibleErrorCode());
+  }
+
+  return Error::success();
+}
diff --git a/hpvm/llvm_patches/lib/Passes/PassRegistry.def b/hpvm/llvm_patches/lib/Passes/PassRegistry.def
new file mode 100644
index 0000000000000000000000000000000000000000..eab4026fcd4a9d2bf1b15ec24b4925958c0a31f0
--- /dev/null
+++ b/hpvm/llvm_patches/lib/Passes/PassRegistry.def
@@ -0,0 +1,317 @@
+//===- PassRegistry.def - Registry of passes --------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file is used as the registry of passes that are part of the core LLVM
+// libraries. This file describes both transformation passes and analyses
+// Analyses are registered while transformation passes have names registered
+// that can be used when providing a textual pass pipeline.
+//
+//===----------------------------------------------------------------------===//
+
+// NOTE: NO INCLUDE GUARD DESIRED!
+
+#ifndef MODULE_ANALYSIS
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)
+#endif
+MODULE_ANALYSIS("callgraph", CallGraphAnalysis())
+MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis())
+MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis())
+MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis())
+MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis())
+MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis())
+MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
+MODULE_ANALYSIS("verify", VerifierAnalysis())
+MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis())
+
+#ifndef MODULE_ALIAS_ANALYSIS
+#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS)                               \
+  MODULE_ANALYSIS(NAME, CREATE_PASS)
+#endif
+MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA())
+#undef MODULE_ALIAS_ANALYSIS
+#undef MODULE_ANALYSIS
+
+#ifndef MODULE_PASS
+#define MODULE_PASS(NAME, CREATE_PASS)
+#endif
+MODULE_PASS("always-inline", AlwaysInlinerPass())
+MODULE_PASS("attributor", AttributorPass())
+MODULE_PASS("called-value-propagation", CalledValuePropagationPass())
+MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass())
+MODULE_PASS("cg-profile", CGProfilePass())
+MODULE_PASS("constmerge", ConstantMergePass())
+MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass())
+MODULE_PASS("deadargelim", DeadArgumentEliminationPass())
+MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass())
+MODULE_PASS("forceattrs", ForceFunctionAttrsPass())
+MODULE_PASS("function-import", FunctionImportPass())
+MODULE_PASS("globaldce", GlobalDCEPass())
+MODULE_PASS("globalopt", GlobalOptPass())
+MODULE_PASS("globalsplit", GlobalSplitPass())
+MODULE_PASS("hotcoldsplit", HotColdSplittingPass())
+MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false))
+MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true))
+MODULE_PASS("inferattrs", InferFunctionAttrsPass())
+MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass())
+MODULE_PASS("instrorderfile", InstrOrderFilePass())
+MODULE_PASS("instrprof", InstrProfiling())
+MODULE_PASS("internalize", InternalizePass())
+MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+MODULE_PASS("ipsccp", IPSCCPPass())
+MODULE_PASS("lowertypetests", LowerTypeTestsPass(nullptr, nullptr))
+MODULE_PASS("name-anon-globals", NameAnonGlobalPass())
+MODULE_PASS("no-op-module", NoOpModulePass())
+MODULE_PASS("partial-inliner", PartialInlinerPass())
+MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion())
+MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen())
+MODULE_PASS("pgo-instr-use", PGOInstrumentationUse())
+MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass())
+MODULE_PASS("print-profile-summary", ProfileSummaryPrinterPass(dbgs()))
+MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs()))
+MODULE_PASS("print", PrintModulePass(dbgs()))
+MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs()))
+MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs()))
+MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs()))
+MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC())
+MODULE_PASS("rewrite-symbols", RewriteSymbolPass())
+MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass())
+MODULE_PASS("sample-profile", SampleProfileLoaderPass())
+MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass())
+MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation())
+MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr))
+MODULE_PASS("verify", VerifierPass())
+MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false))
+MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false))
+MODULE_PASS("poison-checking", PoisonCheckingPass())
+#undef MODULE_PASS
+
+#ifndef CGSCC_ANALYSIS
+#define CGSCC_ANALYSIS(NAME, CREATE_PASS)
+#endif
+CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis())
+CGSCC_ANALYSIS("fam-proxy", FunctionAnalysisManagerCGSCCProxy())
+CGSCC_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+#undef CGSCC_ANALYSIS
+
+#ifndef CGSCC_PASS
+#define CGSCC_PASS(NAME, CREATE_PASS)
+#endif
+CGSCC_PASS("argpromotion", ArgumentPromotionPass())
+CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
+CGSCC_PASS("inline", InlinerPass())
+CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
+#undef CGSCC_PASS
+
+#ifndef FUNCTION_ANALYSIS
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)
+#endif
+FUNCTION_ANALYSIS("aa", AAManager())
+FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis())
+FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis())
+FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis())
+FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis())
+FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis())
+FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis())
+FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis())
+FUNCTION_ANALYSIS("loops", LoopAnalysis())
+FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
+FUNCTION_ANALYSIS("da", DependenceAnalysis())
+FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis())
+FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis())
+FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis())
+FUNCTION_ANALYSIS("regions", RegionInfoAnalysis())
+FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis())
+FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis())
+FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis())
+FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis())
+FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis())
+FUNCTION_ANALYSIS("targetir",
+                  TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis())
+FUNCTION_ANALYSIS("verify", VerifierAnalysis())
+FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+
+#ifndef FUNCTION_ALIAS_ANALYSIS
+#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS)                             \
+  FUNCTION_ANALYSIS(NAME, CREATE_PASS)
+#endif
+FUNCTION_ALIAS_ANALYSIS("basic-aa", BasicAA())
+FUNCTION_ALIAS_ANALYSIS("cfl-anders-aa", CFLAndersAA())
+FUNCTION_ALIAS_ANALYSIS("cfl-steens-aa", CFLSteensAA())
+FUNCTION_ALIAS_ANALYSIS("scev-aa", SCEVAA())
+FUNCTION_ALIAS_ANALYSIS("scoped-noalias-aa", ScopedNoAliasAA())
+FUNCTION_ALIAS_ANALYSIS("type-based-aa", TypeBasedAA())
+#undef FUNCTION_ALIAS_ANALYSIS
+#undef FUNCTION_ANALYSIS
+
+#ifndef FUNCTION_PASS
+#define FUNCTION_PASS(NAME, CREATE_PASS)
+#endif
+FUNCTION_PASS("aa-eval", AAEvaluator())
+FUNCTION_PASS("adce", ADCEPass())
+FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass())
+FUNCTION_PASS("aggressive-instcombine", AggressiveInstCombinePass())
+FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass())
+FUNCTION_PASS("bdce", BDCEPass())
+FUNCTION_PASS("bounds-checking", BoundsCheckingPass())
+FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass())
+FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass())
+FUNCTION_PASS("consthoist", ConstantHoistingPass())
+FUNCTION_PASS("chr", ControlHeightReductionPass())
+FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
+FUNCTION_PASS("dce", DCEPass())
+FUNCTION_PASS("div-rem-pairs", DivRemPairsPass())
+FUNCTION_PASS("dse", DSEPass())
+FUNCTION_PASS("dot-cfg", CFGPrinterPass())
+FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass())
+FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
+FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true))
+FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false))
+FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass())
+FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true))
+FUNCTION_PASS("gvn-hoist", GVNHoistPass())
+FUNCTION_PASS("instcombine", InstCombinePass())
+FUNCTION_PASS("instsimplify", InstSimplifyPass())
+FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+FUNCTION_PASS("float2int", Float2IntPass())
+FUNCTION_PASS("no-op-function", NoOpFunctionPass())
+FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass())
+FUNCTION_PASS("loweratomic", LowerAtomicPass())
+FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass())
+FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass())
+FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass())
+FUNCTION_PASS("guard-widening", GuardWideningPass())
+FUNCTION_PASS("gvn", GVN())
+FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass())
+FUNCTION_PASS("loop-simplify", LoopSimplifyPass())
+FUNCTION_PASS("loop-sink", LoopSinkPass())
+FUNCTION_PASS("lowerinvoke", LowerInvokePass())
+FUNCTION_PASS("mem2reg", PromotePass())
+FUNCTION_PASS("memcpyopt", MemCpyOptPass())
+FUNCTION_PASS("mergeicmps", MergeICmpsPass())
+FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass())
+FUNCTION_PASS("nary-reassociate", NaryReassociatePass())
+FUNCTION_PASS("newgvn", NewGVNPass())
+FUNCTION_PASS("jump-threading", JumpThreadingPass())
+FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass())
+FUNCTION_PASS("lcssa", LCSSAPass())
+FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass())
+FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass())
+FUNCTION_PASS("loop-fuse", LoopFusePass())
+FUNCTION_PASS("loop-distribute", LoopDistributePass())
+FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt())
+FUNCTION_PASS("print", PrintFunctionPass(dbgs()))
+FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs()))
+FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs()))
+FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs()))
+FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs()))
+FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs()))
+FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
+FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs()))
+FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs()))
+FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
+FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs()))
+FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
+FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs()))
+FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs()))
+FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs()))
+FUNCTION_PASS("reassociate", ReassociatePass())
+FUNCTION_PASS("scalarizer", ScalarizerPass())
+FUNCTION_PASS("sccp", SCCPPass())
+FUNCTION_PASS("sink", SinkingPass())
+FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass())
+FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass())
+FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass())
+FUNCTION_PASS("sroa", SROA())
+FUNCTION_PASS("tailcallelim", TailCallElimPass())
+FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
+FUNCTION_PASS("verify", VerifierPass())
+FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
+FUNCTION_PASS("verify<loops>", LoopVerifierPass())
+FUNCTION_PASS("verify<memoryssa>", MemorySSAVerifierPass())
+FUNCTION_PASS("verify<regions>", RegionInfoVerifierPass())
+FUNCTION_PASS("verify<safepoint-ir>", SafepointIRVerifierPass())
+FUNCTION_PASS("view-cfg", CFGViewerPass())
+FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass())
+FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass())
+FUNCTION_PASS("asan", AddressSanitizerPass(false, false, false))
+FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false))
+FUNCTION_PASS("msan", MemorySanitizerPass({}))
+FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true}))
+FUNCTION_PASS("tsan", ThreadSanitizerPass())
+#undef FUNCTION_PASS
+
+#ifndef FUNCTION_PASS_WITH_PARAMS
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
+#endif
+FUNCTION_PASS_WITH_PARAMS("unroll",
+                           [](LoopUnrollOptions Opts) {
+                             return LoopUnrollPass(Opts);
+                           },
+                           parseLoopUnrollOptions)
+FUNCTION_PASS_WITH_PARAMS("msan",
+                           [](MemorySanitizerOptions Opts) {
+                             return MemorySanitizerPass(Opts);
+                           },
+                           parseMSanPassOptions)
+FUNCTION_PASS_WITH_PARAMS("simplify-cfg",
+                           [](SimplifyCFGOptions Opts) {
+                             return SimplifyCFGPass(Opts);
+                           },
+                           parseSimplifyCFGOptions)
+FUNCTION_PASS_WITH_PARAMS("loop-vectorize",
+                           [](LoopVectorizeOptions Opts) {
+                             return LoopVectorizePass(Opts);
+                           },
+                           parseLoopVectorizeOptions)
+#undef FUNCTION_PASS_WITH_PARAMS
+
+#ifndef LOOP_ANALYSIS
+#define LOOP_ANALYSIS(NAME, CREATE_PASS)
+#endif
+LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
+LOOP_ANALYSIS("access-info", LoopAccessAnalysis())
+LOOP_ANALYSIS("ddg", DDGAnalysis())
+LOOP_ANALYSIS("ivusers", IVUsersAnalysis())
+LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
+#undef LOOP_ANALYSIS
+
+#ifndef LOOP_PASS
+#define LOOP_PASS(NAME, CREATE_PASS)
+#endif
+LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass())
+LOOP_PASS("licm", LICMPass())
+LOOP_PASS("loop-idiom", LoopIdiomRecognizePass())
+LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass())
+LOOP_PASS("rotate", LoopRotatePass())
+LOOP_PASS("no-op-loop", NoOpLoopPass())
+LOOP_PASS("print", PrintLoopPass(dbgs()))
+LOOP_PASS("loop-deletion", LoopDeletionPass())
+LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass())
+LOOP_PASS("strength-reduce", LoopStrengthReducePass())
+LOOP_PASS("indvars", IndVarSimplifyPass())
+LOOP_PASS("irce", IRCEPass())
+LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass())
+LOOP_PASS("unroll-full", LoopFullUnrollPass())
+LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
+LOOP_PASS("print<DDG>", DDGAnalysisPrinterPass(dbgs()))
+LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
+LOOP_PASS("loop-predication", LoopPredicationPass())
+LOOP_PASS("guard-widening", GuardWideningPass())
+#undef LOOP_PASS
+
+#ifndef LOOP_PASS_WITH_PARAMS
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)
+#endif
+LOOP_PASS_WITH_PARAMS("unswitch",
+                           [](bool NonTrivial) {
+                             return SimpleLoopUnswitchPass(NonTrivial);
+                           },
+                           parseLoopUnswitchOptions)
+#undef LOOP_PASS_WITH_PARAMS
diff --git a/hpvm/llvm_patches/lib/Transforms/Scalar/ADCE.cpp b/hpvm/llvm_patches/lib/Transforms/Scalar/ADCE.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4a3b257ac957fdd3f7cb90da13650f1c5f2a0669
--- /dev/null
+++ b/hpvm/llvm_patches/lib/Transforms/Scalar/ADCE.cpp
@@ -0,0 +1,608 @@
+//===- ADCE.cpp - Code to perform dead code elimination -------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the Aggressive Dead Code Elimination pass.  This pass
+// optimistically assumes that all instructions are dead until proven otherwise,
+// allowing it to eliminate dead computations that other DCE passes do not
+// catch, particularly involving loop computations.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/ADCE.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PostOrderIterator.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/IteratedDominanceFrontier.h"
+#include "llvm/Analysis/PostDominators.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/DebugLoc.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/ProfileData/InstrProf.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include <cassert>
+#include <cstddef>
+#include <utility>
+
+using namespace llvm;
+
+#define DEBUG_TYPE "adce"
+
+STATISTIC(NumRemoved, "Number of instructions removed");
+STATISTIC(NumBranchesRemoved, "Number of branch instructions removed");
+
+// This is a temporary option until we change the interface to this pass based
+// on optimization level.
+static cl::opt<bool> RemoveControlFlowFlag("adce-remove-control-flow",
+                                           cl::init(true), cl::Hidden);
+
+// This option enables removing of may-be-infinite loops which have no other
+// effect.
+static cl::opt<bool> RemoveLoops("adce-remove-loops", cl::init(false),
+                                 cl::Hidden);
+
+bool AggressiveDeadCodeElimination::performDeadCodeElimination() {
+  initialize();
+  markLiveInstructions();
+  return removeDeadInstructions();
+}
+
+static bool isUnconditionalBranch(Instruction *Term) {
+  auto *BR = dyn_cast<BranchInst>(Term);
+  return BR && BR->isUnconditional();
+}
+
+void AggressiveDeadCodeElimination::initialize() {
+  auto NumBlocks = F.size();
+
+  // We will have an entry in the map for each block so we grow the
+  // structure to twice that size to keep the load factor low in the hash table.
+  BlockInfo.reserve(NumBlocks);
+  size_t NumInsts = 0;
+
+  // Iterate over blocks and initialize BlockInfoVec entries, count
+  // instructions to size the InstInfo hash table.
+  for (auto &BB : F) {
+    NumInsts += BB.size();
+    auto &Info = BlockInfo[&BB];
+    Info.BB = &BB;
+    Info.Terminator = BB.getTerminator();
+    Info.UnconditionalBranch = isUnconditionalBranch(Info.Terminator);
+  }
+
+  // Initialize instruction map and set pointers to block info.
+  InstInfo.reserve(NumInsts);
+  for (auto &BBInfo : BlockInfo)
+    for (Instruction &I : *BBInfo.second.BB)
+      InstInfo[&I].Block = &BBInfo.second;
+
+  // Since BlockInfoVec holds pointers into InstInfo and vice-versa, we may not
+  // add any more elements to either after this point.
+  for (auto &BBInfo : BlockInfo)
+    BBInfo.second.TerminatorLiveInfo = &InstInfo[BBInfo.second.Terminator];
+
+  // Collect the set of "root" instructions that are known live.
+  for (Instruction &I : instructions(F))
+    if (isAlwaysLive(I))
+      markLive(&I);
+
+  if (!RemoveControlFlowFlag)
+    return;
+
+  if (!RemoveLoops) {
+    // This stores state for the depth-first iterator. In addition
+    // to recording which nodes have been visited we also record whether
+    // a node is currently on the "stack" of active ancestors of the current
+    // node.
+    using StatusMap = DenseMap<BasicBlock *, bool>;
+
+    class DFState : public StatusMap {
+    public:
+      std::pair<StatusMap::iterator, bool> insert(BasicBlock *BB) {
+        return StatusMap::insert(std::make_pair(BB, true));
+      }
+
+      // Invoked after we have visited all children of a node.
+      void completed(BasicBlock *BB) { (*this)[BB] = false; }
+
+      // Return true if \p BB is currently on the active stack
+      // of ancestors.
+      bool onStack(BasicBlock *BB) {
+        auto Iter = find(BB);
+        return Iter != end() && Iter->second;
+      }
+    } State;
+
+    State.reserve(F.size());
+    // Iterate over blocks in depth-first pre-order and
+    // treat all edges to a block already seen as loop back edges
+    // and mark the branch live it if there is a back edge.
+    for (auto *BB: depth_first_ext(&F.getEntryBlock(), State)) {
+      Instruction *Term = BB->getTerminator();
+      if (isLive(Term))
+        continue;
+
+      for (auto *Succ : successors(BB))
+        if (State.onStack(Succ)) {
+          // back edge....
+          markLive(Term);
+          break;
+        }
+    }
+  }
+
+  // Mark blocks live if there is no path from the block to a
+  // return of the function.
+  // We do this by seeing which of the postdomtree root children exit the
+  // program, and for all others, mark the subtree live.
+  for (auto &PDTChild : children<DomTreeNode *>(PDT.getRootNode())) {
+    auto *BB = PDTChild->getBlock();
+    auto &Info = BlockInfo[BB];
+    // Real function return
+    if (isa<ReturnInst>(Info.Terminator)) {
+      LLVM_DEBUG(dbgs() << "post-dom root child is a return: " << BB->getName()
+                        << '\n';);
+      continue;
+    }
+
+    // This child is something else, like an infinite loop.
+    for (auto DFNode : depth_first(PDTChild))
+      markLive(BlockInfo[DFNode->getBlock()].Terminator);
+  }
+
+  // Treat the entry block as always live
+  auto *BB = &F.getEntryBlock();
+  auto &EntryInfo = BlockInfo[BB];
+  EntryInfo.Live = true;
+  if (EntryInfo.UnconditionalBranch)
+    markLive(EntryInfo.Terminator);
+
+  // Build initial collection of blocks with dead terminators
+  for (auto &BBInfo : BlockInfo)
+    if (!BBInfo.second.terminatorIsLive())
+      BlocksWithDeadTerminators.insert(BBInfo.second.BB);
+}
+
+bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) {
+  // TODO -- use llvm::isInstructionTriviallyDead
+  if (I.isEHPad() || I.mayHaveSideEffects()) {
+    // Skip any value profile instrumentation calls if they are
+    // instrumenting constants.
+    if (isInstrumentsConstant(I))
+      return false;
+    return true;
+  }
+  if (!I.isTerminator())
+    return false;
+  if (RemoveControlFlowFlag && (isa<BranchInst>(I) || isa<SwitchInst>(I)))
+    return false;
+  return true;
+}
+
+// Check if this instruction is a runtime call for value profiling and
+// if it's instrumenting a constant.
+bool AggressiveDeadCodeElimination::isInstrumentsConstant(Instruction &I) {
+  // TODO -- move this test into llvm::isInstructionTriviallyDead
+  if (CallInst *CI = dyn_cast<CallInst>(&I))
+    if (Function *Callee = CI->getCalledFunction())
+      if (Callee->getName().equals(getInstrProfValueProfFuncName()))
+        if (isa<Constant>(CI->getArgOperand(0)))
+          return true;
+  return false;
+}
+
+void AggressiveDeadCodeElimination::markLiveInstructions() {
+  // Propagate liveness backwards to operands.
+  do {
+    // Worklist holds newly discovered live instructions
+    // where we need to mark the inputs as live.
+    while (!Worklist.empty()) {
+      Instruction *LiveInst = Worklist.pop_back_val();
+      LLVM_DEBUG(dbgs() << "work live: "; LiveInst->dump(););
+
+      for (Use &OI : LiveInst->operands())
+        if (Instruction *Inst = dyn_cast<Instruction>(OI))
+          markLive(Inst);
+
+      if (auto *PN = dyn_cast<PHINode>(LiveInst))
+        markPhiLive(PN);
+    }
+
+    // After data flow liveness has been identified, examine which branch
+    // decisions are required to determine live instructions are executed.
+    markLiveBranchesFromControlDependences();
+
+  } while (!Worklist.empty());
+}
+
+void AggressiveDeadCodeElimination::markLive(Instruction *I) {
+  auto &Info = InstInfo[I];
+  if (Info.Live)
+    return;
+
+  LLVM_DEBUG(dbgs() << "mark live: "; I->dump());
+  Info.Live = true;
+  Worklist.push_back(I);
+
+  // Collect the live debug info scopes attached to this instruction.
+  if (const DILocation *DL = I->getDebugLoc())
+    collectLiveScopes(*DL);
+
+  // Mark the containing block live
+  auto &BBInfo = *Info.Block;
+  if (BBInfo.Terminator == I) {
+    BlocksWithDeadTerminators.remove(BBInfo.BB);
+    // For live terminators, mark destination blocks
+    // live to preserve this control flow edges.
+    if (!BBInfo.UnconditionalBranch)
+      for (auto *BB : successors(I->getParent()))
+        markLive(BB);
+  }
+  markLive(BBInfo);
+}
+
+void AggressiveDeadCodeElimination::markLive(BlockInfoType &BBInfo) {
+  if (BBInfo.Live)
+    return;
+  LLVM_DEBUG(dbgs() << "mark block live: " << BBInfo.BB->getName() << '\n');
+  BBInfo.Live = true;
+  if (!BBInfo.CFLive) {
+    BBInfo.CFLive = true;
+    NewLiveBlocks.insert(BBInfo.BB);
+  }
+
+  // Mark unconditional branches at the end of live
+  // blocks as live since there is no work to do for them later
+  if (BBInfo.UnconditionalBranch)
+    markLive(BBInfo.Terminator);
+}
+
+void AggressiveDeadCodeElimination::collectLiveScopes(const DILocalScope &LS) {
+  if (!AliveScopes.insert(&LS).second)
+    return;
+
+  if (isa<DISubprogram>(LS))
+    return;
+
+  // Tail-recurse through the scope chain.
+  collectLiveScopes(cast<DILocalScope>(*LS.getScope()));
+}
+
+void AggressiveDeadCodeElimination::collectLiveScopes(const DILocation &DL) {
+  // Even though DILocations are not scopes, shove them into AliveScopes so we
+  // don't revisit them.
+  if (!AliveScopes.insert(&DL).second)
+    return;
+
+  // Collect live scopes from the scope chain.
+  collectLiveScopes(*DL.getScope());
+
+  // Tail-recurse through the inlined-at chain.
+  if (const DILocation *IA = DL.getInlinedAt())
+    collectLiveScopes(*IA);
+}
+
+void AggressiveDeadCodeElimination::markPhiLive(PHINode *PN) {
+  auto &Info = BlockInfo[PN->getParent()];
+  // Only need to check this once per block.
+  if (Info.HasLivePhiNodes)
+    return;
+  Info.HasLivePhiNodes = true;
+
+  // If a predecessor block is not live, mark it as control-flow live
+  // which will trigger marking live branches upon which
+  // that block is control dependent.
+  for (auto *PredBB : predecessors(Info.BB)) {
+    auto &Info = BlockInfo[PredBB];
+    if (!Info.CFLive) {
+      Info.CFLive = true;
+      NewLiveBlocks.insert(PredBB);
+    }
+  }
+}
+
+void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() {
+  if (BlocksWithDeadTerminators.empty())
+    return;
+
+  LLVM_DEBUG({
+    dbgs() << "new live blocks:\n";
+    for (auto *BB : NewLiveBlocks)
+      dbgs() << "\t" << BB->getName() << '\n';
+    dbgs() << "dead terminator blocks:\n";
+    for (auto *BB : BlocksWithDeadTerminators)
+      dbgs() << "\t" << BB->getName() << '\n';
+  });
+
+  // The dominance frontier of a live block X in the reverse
+  // control graph is the set of blocks upon which X is control
+  // dependent. The following sequence computes the set of blocks
+  // which currently have dead terminators that are control
+  // dependence sources of a block which is in NewLiveBlocks.
+
+  const SmallPtrSet<BasicBlock *, 16> BWDT{
+      BlocksWithDeadTerminators.begin(),
+      BlocksWithDeadTerminators.end()
+  };
+  SmallVector<BasicBlock *, 32> IDFBlocks;
+  ReverseIDFCalculator IDFs(PDT);
+  IDFs.setDefiningBlocks(NewLiveBlocks);
+  IDFs.setLiveInBlocks(BWDT);
+  IDFs.calculate(IDFBlocks);
+  NewLiveBlocks.clear();
+
+  // Dead terminators which control live blocks are now marked live.
+  for (auto *BB : IDFBlocks) {
+    LLVM_DEBUG(dbgs() << "live control in: " << BB->getName() << '\n');
+    markLive(BB->getTerminator());
+  }
+}
+
+//===----------------------------------------------------------------------===//
+//
+//  Routines to update the CFG and SSA information before removing dead code.
+//
+//===----------------------------------------------------------------------===//
+bool AggressiveDeadCodeElimination::removeDeadInstructions() {
+  // Updates control and dataflow around dead blocks
+  updateDeadRegions();
+
+  LLVM_DEBUG({
+    for (Instruction &I : instructions(F)) {
+      // Check if the instruction is alive.
+      if (isLive(&I))
+        continue;
+
+      if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) {
+        // Check if the scope of this variable location is alive.
+        if (AliveScopes.count(DII->getDebugLoc()->getScope()))
+          continue;
+
+        // If intrinsic is pointing at a live SSA value, there may be an
+        // earlier optimization bug: if we know the location of the variable,
+        // why isn't the scope of the location alive?
+        if (Value *V = DII->getVariableLocation())
+          if (Instruction *II = dyn_cast<Instruction>(V))
+            if (isLive(II))
+              dbgs() << "Dropping debug info for " << *DII << "\n";
+      }
+    }
+  });
+
+  // The inverse of the live set is the dead set.  These are those instructions
+  // that have no side effects and do not influence the control flow or return
+  // value of the function, and may therefore be deleted safely.
+  // NOTE: We reuse the Worklist vector here for memory efficiency.
+  for (Instruction &I : instructions(F)) {
+    // Check if the instruction is alive.
+    if (isLive(&I))
+      continue;
+
+    if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I)) {
+      // Check if the scope of this variable location is alive.
+      if (AliveScopes.count(DII->getDebugLoc()->getScope()))
+        continue;
+
+      // Fallthrough and drop the intrinsic.
+    }
+
+    // Prepare to delete.
+    Worklist.push_back(&I);
+    I.dropAllReferences();
+  }
+
+  for (Instruction *&I : Worklist) {
+    ++NumRemoved;
+    I->eraseFromParent();
+  }
+
+  return !Worklist.empty();
+}
+
+// A dead region is the set of dead blocks with a common live post-dominator.
+void AggressiveDeadCodeElimination::updateDeadRegions() {
+  LLVM_DEBUG({
+    dbgs() << "final dead terminator blocks: " << '\n';
+    for (auto *BB : BlocksWithDeadTerminators)
+      dbgs() << '\t' << BB->getName()
+             << (BlockInfo[BB].Live ? " LIVE\n" : "\n");
+  });
+
+  // Don't compute the post ordering unless we needed it.
+  bool HavePostOrder = false;
+
+  for (auto *BB : BlocksWithDeadTerminators) {
+    auto &Info = BlockInfo[BB];
+    if (Info.UnconditionalBranch) {
+      InstInfo[Info.Terminator].Live = true;
+      continue;
+    }
+
+    if (!HavePostOrder) {
+      computeReversePostOrder();
+      HavePostOrder = true;
+    }
+
+    // Add an unconditional branch to the successor closest to the
+    // end of the function which insures a path to the exit for each
+    // live edge.
+    BlockInfoType *PreferredSucc = nullptr;
+    for (auto *Succ : successors(BB)) {
+      auto *Info = &BlockInfo[Succ];
+      if (!PreferredSucc || PreferredSucc->PostOrder < Info->PostOrder)
+        PreferredSucc = Info;
+    }
+    assert((PreferredSucc && PreferredSucc->PostOrder > 0) &&
+           "Failed to find safe successor for dead branch");
+
+    // Collect removed successors to update the (Post)DominatorTrees.
+    SmallPtrSet<BasicBlock *, 4> RemovedSuccessors;
+    bool First = true;
+    for (auto *Succ : successors(BB)) {
+      if (!First || Succ != PreferredSucc->BB) {
+        Succ->removePredecessor(BB);
+        RemovedSuccessors.insert(Succ);
+      } else
+        First = false;
+    }
+    makeUnconditional(BB, PreferredSucc->BB);
+
+    // Inform the dominators about the deleted CFG edges.
+    SmallVector<DominatorTree::UpdateType, 4> DeletedEdges;
+    for (auto *Succ : RemovedSuccessors) {
+      // It might have happened that the same successor appeared multiple times
+      // and the CFG edge wasn't really removed.
+      if (Succ != PreferredSucc->BB) {
+        LLVM_DEBUG(dbgs() << "ADCE: (Post)DomTree edge enqueued for deletion"
+                          << BB->getName() << " -> " << Succ->getName()
+                          << "\n");
+        DeletedEdges.push_back({DominatorTree::Delete, BB, Succ});
+      }
+    }
+
+    DomTreeUpdater(DT, &PDT, DomTreeUpdater::UpdateStrategy::Eager)
+        .applyUpdates(DeletedEdges);
+
+    NumBranchesRemoved += 1;
+  }
+}
+
+// reverse top-sort order
+void AggressiveDeadCodeElimination::computeReversePostOrder() {
+  // This provides a post-order numbering of the reverse control flow graph
+  // Note that it is incomplete in the presence of infinite loops but we don't
+  // need numbers blocks which don't reach the end of the functions since
+  // all branches in those blocks are forced live.
+
+  // For each block without successors, extend the DFS from the block
+  // backward through the graph
+  SmallPtrSet<BasicBlock*, 16> Visited;
+  unsigned PostOrder = 0;
+  for (auto &BB : F) {
+    if (succ_begin(&BB) != succ_end(&BB))
+      continue;
+    for (BasicBlock *Block : inverse_post_order_ext(&BB,Visited))
+      BlockInfo[Block].PostOrder = PostOrder++;
+  }
+}
+
+void AggressiveDeadCodeElimination::makeUnconditional(BasicBlock *BB,
+                                                      BasicBlock *Target) {
+  Instruction *PredTerm = BB->getTerminator();
+  // Collect the live debug info scopes attached to this instruction.
+  if (const DILocation *DL = PredTerm->getDebugLoc())
+    collectLiveScopes(*DL);
+
+  // Just mark live an existing unconditional branch
+  if (isUnconditionalBranch(PredTerm)) {
+    PredTerm->setSuccessor(0, Target);
+    InstInfo[PredTerm].Live = true;
+    return;
+  }
+  LLVM_DEBUG(dbgs() << "making unconditional " << BB->getName() << '\n');
+  NumBranchesRemoved += 1;
+  IRBuilder<> Builder(PredTerm);
+  auto *NewTerm = Builder.CreateBr(Target);
+  InstInfo[NewTerm].Live = true;
+  if (const DILocation *DL = PredTerm->getDebugLoc())
+    NewTerm->setDebugLoc(DL);
+
+  InstInfo.erase(PredTerm);
+  PredTerm->eraseFromParent();
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Pass Manager integration code
+//
+//===----------------------------------------------------------------------===//
+PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &FAM) {
+  // ADCE does not need DominatorTree, but require DominatorTree here
+  // to update analysis if it is already available.
+  auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F);
+  auto &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F);
+  if (!AggressiveDeadCodeElimination(F, DT, PDT).performDeadCodeElimination())
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<GlobalsAA>();
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<PostDominatorTreeAnalysis>();
+  return PA;
+}
+
+namespace {
+
+struct ADCELegacyPass : public FunctionPass {
+  static char ID; // Pass identification, replacement for typeid
+
+  ADCELegacyPass() : FunctionPass(ID) {
+    initializeADCELegacyPassPass(*PassRegistry::getPassRegistry());
+  }
+
+  bool runOnFunction(Function &F) override {
+    if (skipFunction(F))
+      return false;
+
+    // ADCE does not need DominatorTree, but require DominatorTree here
+    // to update analysis if it is already available.
+    auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
+    auto *DT = DTWP ? &DTWP->getDomTree() : nullptr;
+    auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree();
+    return AggressiveDeadCodeElimination(F, DT, PDT)
+        .performDeadCodeElimination();
+  }
+
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<PostDominatorTreeWrapperPass>();
+    if (!RemoveControlFlowFlag)
+      AU.setPreservesCFG();
+    else {
+      AU.addPreserved<DominatorTreeWrapperPass>();
+      AU.addPreserved<PostDominatorTreeWrapperPass>();
+    }
+    AU.addPreserved<GlobalsAAWrapperPass>();
+  }
+};
+
+} // end anonymous namespace
+
+char ADCELegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(ADCELegacyPass, "adce",
+                      "Aggressive Dead Code Elimination", false, false)
+INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass)
+INITIALIZE_PASS_END(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination",
+                    false, false)
+
+FunctionPass *llvm::createAggressiveDCEPass() { return new ADCELegacyPass(); }
diff --git a/hpvm/llvm_patches/lib/Transforms/Scalar/EarlyCSE.cpp b/hpvm/llvm_patches/lib/Transforms/Scalar/EarlyCSE.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..4c2bdd6f4a93caaeb7cc4d7d2c0cf63bbc34ccac
--- /dev/null
+++ b/hpvm/llvm_patches/lib/Transforms/Scalar/EarlyCSE.cpp
@@ -0,0 +1,1388 @@
+//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs a simple dominator tree walk that eliminates trivially
+// redundant instructions.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Scalar/EarlyCSE.h"
+#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/Hashing.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/GuardUtils.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/ValueTracking.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
+#include "llvm/IR/Instruction.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/IR/PatternMatch.h"
+#include "llvm/IR/Type.h"
+#include "llvm/IR/Use.h"
+#include "llvm/IR/Value.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/AtomicOrdering.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/DebugCounter.h"
+#include "llvm/Support/RecyclingAllocator.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Utils/GuardUtils.h"
+#include <cassert>
+#include <deque>
+#include <memory>
+#include <utility>
+
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+#define DEBUG_TYPE "early-cse"
+
+STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd");
+STATISTIC(NumCSE,      "Number of instructions CSE'd");
+STATISTIC(NumCSECVP,   "Number of compare instructions CVP'd");
+STATISTIC(NumCSELoad,  "Number of load instructions CSE'd");
+STATISTIC(NumCSECall,  "Number of call instructions CSE'd");
+STATISTIC(NumDSE,      "Number of trivial dead stores removed");
+
+DEBUG_COUNTER(CSECounter, "early-cse",
+              "Controls which instructions are removed");
+
+static cl::opt<unsigned> EarlyCSEMssaOptCap(
+    "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden,
+    cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange "
+             "for faster compile. Caps the MemorySSA clobbering calls."));
+
+static cl::opt<bool> EarlyCSEDebugHash(
+    "earlycse-debug-hash", cl::init(false), cl::Hidden,
+    cl::desc("Perform extra assertion checking to verify that SimpleValue's hash "
+             "function is well-behaved w.r.t. its isEqual predicate"));
+
+//===----------------------------------------------------------------------===//
+// SimpleValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Struct representing the available values in the scoped hash table.
+struct SimpleValue {
+  Instruction *Inst;
+
+  SimpleValue(Instruction *I) : Inst(I) {
+    assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+  }
+
+  bool isSentinel() const {
+    return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
+           Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
+
+  static bool canHandle(Instruction *Inst) {
+    // This can only handle non-void readnone functions.
+    if (CallInst *CI = dyn_cast<CallInst>(Inst))
+      return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy();
+    return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) ||
+           isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) ||
+           isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) ||
+           isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) ||
+           isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst);
+  }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+template <> struct DenseMapInfo<SimpleValue> {
+  static inline SimpleValue getEmptyKey() {
+    return DenseMapInfo<Instruction *>::getEmptyKey();
+  }
+
+  static inline SimpleValue getTombstoneKey() {
+    return DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
+
+  static unsigned getHashValue(SimpleValue Val);
+  static bool isEqual(SimpleValue LHS, SimpleValue RHS);
+};
+
+} // end namespace llvm
+
+/// Match a 'select' including an optional 'not's of the condition.
+static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A,
+                                           Value *&B,
+                                           SelectPatternFlavor &Flavor) {
+  // Return false if V is not even a select.
+  if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B))))
+    return false;
+
+  // Look through a 'not' of the condition operand by swapping A/B.
+  Value *CondNot;
+  if (match(Cond, m_Not(m_Value(CondNot)))) {
+    Cond = CondNot;
+    std::swap(A, B);
+  }
+
+  // Set flavor if we find a match, or set it to unknown otherwise; in
+  // either case, return true to indicate that this is a select we can
+  // process.
+  if (auto *CmpI = dyn_cast<ICmpInst>(Cond))
+    Flavor = matchDecomposedSelectPattern(CmpI, A, B, A, B).Flavor;
+  else
+    Flavor = SPF_UNKNOWN;
+
+  return true;
+}
+
+static unsigned getHashValueImpl(SimpleValue Val) {
+  Instruction *Inst = Val.Inst;
+  // Hash in all of the operands as pointers.
+  if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) {
+    Value *LHS = BinOp->getOperand(0);
+    Value *RHS = BinOp->getOperand(1);
+    if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1))
+      std::swap(LHS, RHS);
+
+    return hash_combine(BinOp->getOpcode(), LHS, RHS);
+  }
+
+  if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) {
+    // Compares can be commuted by swapping the comparands and
+    // updating the predicate.  Choose the form that has the
+    // comparands in sorted order, or in the case of a tie, the
+    // one with the lower predicate.
+    Value *LHS = CI->getOperand(0);
+    Value *RHS = CI->getOperand(1);
+    CmpInst::Predicate Pred = CI->getPredicate();
+    CmpInst::Predicate SwappedPred = CI->getSwappedPredicate();
+    if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) {
+      std::swap(LHS, RHS);
+      Pred = SwappedPred;
+    }
+    return hash_combine(Inst->getOpcode(), Pred, LHS, RHS);
+  }
+
+  // Hash general selects to allow matching commuted true/false operands.
+  SelectPatternFlavor SPF;
+  Value *Cond, *A, *B;
+  if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) {
+    // Hash min/max/abs (cmp + select) to allow for commuted operands.
+    // Min/max may also have non-canonical compare predicate (eg, the compare for
+    // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the
+    // compare.
+    // TODO: We should also detect FP min/max.
+    if (SPF == SPF_SMIN || SPF == SPF_SMAX ||
+        SPF == SPF_UMIN || SPF == SPF_UMAX) {
+      if (A > B)
+        std::swap(A, B);
+      return hash_combine(Inst->getOpcode(), SPF, A, B);
+    }
+    if (SPF == SPF_ABS || SPF == SPF_NABS) {
+      // ABS/NABS always puts the input in A and its negation in B.
+      return hash_combine(Inst->getOpcode(), SPF, A, B);
+    }
+
+    // Hash general selects to allow matching commuted true/false operands.
+
+    // If we do not have a compare as the condition, just hash in the condition.
+    CmpInst::Predicate Pred;
+    Value *X, *Y;
+    if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y))))
+      return hash_combine(Inst->getOpcode(), Cond, A, B);
+
+    // Similar to cmp normalization (above) - canonicalize the predicate value:
+    // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A
+    if (CmpInst::getInversePredicate(Pred) < Pred) {
+      Pred = CmpInst::getInversePredicate(Pred);
+      std::swap(A, B);
+    }
+    return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B);
+  }
+
+  if (CastInst *CI = dyn_cast<CastInst>(Inst))
+    return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0));
+
+  if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst))
+    return hash_combine(EVI->getOpcode(), EVI->getOperand(0),
+                        hash_combine_range(EVI->idx_begin(), EVI->idx_end()));
+
+  if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst))
+    return hash_combine(IVI->getOpcode(), IVI->getOperand(0),
+                        IVI->getOperand(1),
+                        hash_combine_range(IVI->idx_begin(), IVI->idx_end()));
+
+  assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) ||
+          isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) ||
+          isa<ShuffleVectorInst>(Inst)) &&
+         "Invalid/unknown instruction");
+
+  // Mix in the opcode.
+  return hash_combine(
+      Inst->getOpcode(),
+      hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
+}
+
+unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) {
+#ifndef NDEBUG
+  // If -earlycse-debug-hash was specified, return a constant -- this
+  // will force all hashing to collide, so we'll exhaustively search
+  // the table for a match, and the assertion in isEqual will fire if
+  // there's a bug causing equal keys to hash differently.
+  if (EarlyCSEDebugHash)
+    return 0;
+#endif
+  return getHashValueImpl(Val);
+}
+
+static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) {
+  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+
+  if (LHS.isSentinel() || RHS.isSentinel())
+    return LHSI == RHSI;
+
+  if (LHSI->getOpcode() != RHSI->getOpcode())
+    return false;
+  if (LHSI->isIdenticalToWhenDefined(RHSI))
+    return true;
+
+  // If we're not strictly identical, we still might be a commutable instruction
+  if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) {
+    if (!LHSBinOp->isCommutative())
+      return false;
+
+    assert(isa<BinaryOperator>(RHSI) &&
+           "same opcode, but different instruction type?");
+    BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI);
+
+    // Commuted equality
+    return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) &&
+           LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0);
+  }
+  if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) {
+    assert(isa<CmpInst>(RHSI) &&
+           "same opcode, but different instruction type?");
+    CmpInst *RHSCmp = cast<CmpInst>(RHSI);
+    // Commuted equality
+    return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) &&
+           LHSCmp->getOperand(1) == RHSCmp->getOperand(0) &&
+           LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate();
+  }
+
+  // Min/max/abs can occur with commuted operands, non-canonical predicates,
+  // and/or non-canonical operands.
+  // Selects can be non-trivially equivalent via inverted conditions and swaps.
+  SelectPatternFlavor LSPF, RSPF;
+  Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB;
+  if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) &&
+      matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) {
+    if (LSPF == RSPF) {
+      // TODO: We should also detect FP min/max.
+      if (LSPF == SPF_SMIN || LSPF == SPF_SMAX ||
+          LSPF == SPF_UMIN || LSPF == SPF_UMAX)
+        return ((LHSA == RHSA && LHSB == RHSB) ||
+                (LHSA == RHSB && LHSB == RHSA));
+
+      if (LSPF == SPF_ABS || LSPF == SPF_NABS) {
+        // Abs results are placed in a defined order by matchSelectPattern.
+        return LHSA == RHSA && LHSB == RHSB;
+      }
+
+      // select Cond, A, B <--> select not(Cond), B, A
+      if (CondL == CondR && LHSA == RHSA && LHSB == RHSB)
+        return true;
+    }
+
+    // If the true/false operands are swapped and the conditions are compares
+    // with inverted predicates, the selects are equal:
+    // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A
+    //
+    // This also handles patterns with a double-negation in the sense of not +
+    // inverse, because we looked through a 'not' in the matching function and
+    // swapped A/B:
+    // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A
+    //
+    // This intentionally does NOT handle patterns with a double-negation in
+    // the sense of not + not, because doing so could result in values
+    // comparing
+    // as equal that hash differently in the min/max/abs cases like:
+    // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y
+    //   ^ hashes as min                  ^ would not hash as min
+    // In the context of the EarlyCSE pass, however, such cases never reach
+    // this code, as we simplify the double-negation before hashing the second
+    // select (and so still succeed at CSEing them).
+    if (LHSA == RHSB && LHSB == RHSA) {
+      CmpInst::Predicate PredL, PredR;
+      Value *X, *Y;
+      if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) &&
+          match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) &&
+          CmpInst::getInversePredicate(PredL) == PredR)
+        return true;
+    }
+  }
+
+  return false;
+}
+
+bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) {
+  // These comparisons are nontrivial, so assert that equality implies
+  // hash equality (DenseMap demands this as an invariant).
+  bool Result = isEqualImpl(LHS, RHS);
+  assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) ||
+         getHashValueImpl(LHS) == getHashValueImpl(RHS));
+  return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// CallValue
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// Struct representing the available call values in the scoped hash
+/// table.
+struct CallValue {
+  Instruction *Inst;
+
+  CallValue(Instruction *I) : Inst(I) {
+    assert((isSentinel() || canHandle(I)) && "Inst can't be handled!");
+  }
+
+  bool isSentinel() const {
+    return Inst == DenseMapInfo<Instruction *>::getEmptyKey() ||
+           Inst == DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
+
+  static bool canHandle(Instruction *Inst) {
+    // Don't value number anything that returns void.
+    if (Inst->getType()->isVoidTy())
+      return false;
+
+    CallInst *CI = dyn_cast<CallInst>(Inst);
+    if (!CI || !CI->onlyReadsMemory())
+      return false;
+    return true;
+  }
+};
+
+} // end anonymous namespace
+
+namespace llvm {
+
+template <> struct DenseMapInfo<CallValue> {
+  static inline CallValue getEmptyKey() {
+    return DenseMapInfo<Instruction *>::getEmptyKey();
+  }
+
+  static inline CallValue getTombstoneKey() {
+    return DenseMapInfo<Instruction *>::getTombstoneKey();
+  }
+
+  static unsigned getHashValue(CallValue Val);
+  static bool isEqual(CallValue LHS, CallValue RHS);
+};
+
+} // end namespace llvm
+
+unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) {
+  Instruction *Inst = Val.Inst;
+  // Hash all of the operands as pointers and mix in the opcode.
+  return hash_combine(
+      Inst->getOpcode(),
+      hash_combine_range(Inst->value_op_begin(), Inst->value_op_end()));
+}
+
+bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) {
+  Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst;
+  if (LHS.isSentinel() || RHS.isSentinel())
+    return LHSI == RHSI;
+  return LHSI->isIdenticalTo(RHSI);
+}
+
+//===----------------------------------------------------------------------===//
+// EarlyCSE implementation
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+/// A simple and fast domtree-based CSE pass.
+///
+/// This pass does a simple depth-first walk over the dominator tree,
+/// eliminating trivially redundant instructions and using instsimplify to
+/// canonicalize things as it goes. It is intended to be fast and catch obvious
+/// cases so that instcombine and other passes are more effective. It is
+/// expected that a later pass of GVN will catch the interesting/hard cases.
+class EarlyCSE {
+public:
+  const TargetLibraryInfo &TLI;
+  const TargetTransformInfo &TTI;
+  DominatorTree &DT;
+  AssumptionCache &AC;
+  const SimplifyQuery SQ;
+  MemorySSA *MSSA;
+  std::unique_ptr<MemorySSAUpdater> MSSAUpdater;
+
+  using AllocatorTy =
+      RecyclingAllocator<BumpPtrAllocator,
+                         ScopedHashTableVal<SimpleValue, Value *>>;
+  using ScopedHTType =
+      ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
+                      AllocatorTy>;
+
+  /// A scoped hash table of the current values of all of our simple
+  /// scalar expressions.
+  ///
+  /// As we walk down the domtree, we look to see if instructions are in this:
+  /// if so, we replace them with what we find, otherwise we insert them so
+  /// that dominated values can succeed in their lookup.
+  ScopedHTType AvailableValues;
+
+  /// A scoped hash table of the current values of previously encountered
+  /// memory locations.
+  ///
+  /// This allows us to get efficient access to dominating loads or stores when
+  /// we have a fully redundant load.  In addition to the most recent load, we
+  /// keep track of a generation count of the read, which is compared against
+  /// the current generation count.  The current generation count is incremented
+  /// after every possibly writing memory operation, which ensures that we only
+  /// CSE loads with other loads that have no intervening store.  Ordering
+  /// events (such as fences or atomic instructions) increment the generation
+  /// count as well; essentially, we model these as writes to all possible
+  /// locations.  Note that atomic and/or volatile loads and stores can be
+  /// present the table; it is the responsibility of the consumer to inspect
+  /// the atomicity/volatility if needed.
+  struct LoadValue {
+    Instruction *DefInst = nullptr;
+    unsigned Generation = 0;
+    int MatchingId = -1;
+    bool IsAtomic = false;
+
+    LoadValue() = default;
+    LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId,
+              bool IsAtomic)
+        : DefInst(Inst), Generation(Generation), MatchingId(MatchingId),
+          IsAtomic(IsAtomic) {}
+  };
+
+  using LoadMapAllocator =
+      RecyclingAllocator<BumpPtrAllocator,
+                         ScopedHashTableVal<Value *, LoadValue>>;
+  using LoadHTType =
+      ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>,
+                      LoadMapAllocator>;
+
+  LoadHTType AvailableLoads;
+
+  // A scoped hash table mapping memory locations (represented as typed
+  // addresses) to generation numbers at which that memory location became
+  // (henceforth indefinitely) invariant.
+  using InvariantMapAllocator =
+      RecyclingAllocator<BumpPtrAllocator,
+                         ScopedHashTableVal<MemoryLocation, unsigned>>;
+  using InvariantHTType =
+      ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>,
+                      InvariantMapAllocator>;
+  InvariantHTType AvailableInvariants;
+
+  /// A scoped hash table of the current values of read-only call
+  /// values.
+  ///
+  /// It uses the same generation count as loads.
+  using CallHTType =
+      ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>;
+  CallHTType AvailableCalls;
+
+  /// This is the current generation of the memory value.
+  unsigned CurrentGeneration = 0;
+
+  /// Set up the EarlyCSE runner for a particular function.
+  EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI,
+           const TargetTransformInfo &TTI, DominatorTree &DT,
+           AssumptionCache &AC, MemorySSA *MSSA)
+      : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA),
+        MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {}
+
+  bool run();
+
+private:
+  unsigned ClobberCounter = 0;
+  // Almost a POD, but needs to call the constructors for the scoped hash
+  // tables so that a new scope gets pushed on. These are RAII so that the
+  // scope gets popped when the NodeScope is destroyed.
+  class NodeScope {
+  public:
+    NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
+              InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls)
+      : Scope(AvailableValues), LoadScope(AvailableLoads),
+        InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {}
+    NodeScope(const NodeScope &) = delete;
+    NodeScope &operator=(const NodeScope &) = delete;
+
+  private:
+    ScopedHTType::ScopeTy Scope;
+    LoadHTType::ScopeTy LoadScope;
+    InvariantHTType::ScopeTy InvariantScope;
+    CallHTType::ScopeTy CallScope;
+  };
+
+  // Contains all the needed information to create a stack for doing a depth
+  // first traversal of the tree. This includes scopes for values, loads, and
+  // calls as well as the generation. There is a child iterator so that the
+  // children do not need to be store separately.
+  class StackNode {
+  public:
+    StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads,
+              InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls,
+              unsigned cg, DomTreeNode *n, DomTreeNode::iterator child,
+              DomTreeNode::iterator end)
+        : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child),
+          EndIter(end),
+          Scopes(AvailableValues, AvailableLoads, AvailableInvariants,
+                 AvailableCalls)
+          {}
+    StackNode(const StackNode &) = delete;
+    StackNode &operator=(const StackNode &) = delete;
+
+    // Accessors.
+    unsigned currentGeneration() { return CurrentGeneration; }
+    unsigned childGeneration() { return ChildGeneration; }
+    void childGeneration(unsigned generation) { ChildGeneration = generation; }
+    DomTreeNode *node() { return Node; }
+    DomTreeNode::iterator childIter() { return ChildIter; }
+
+    DomTreeNode *nextChild() {
+      DomTreeNode *child = *ChildIter;
+      ++ChildIter;
+      return child;
+    }
+
+    DomTreeNode::iterator end() { return EndIter; }
+    bool isProcessed() { return Processed; }
+    void process() { Processed = true; }
+
+  private:
+    unsigned CurrentGeneration;
+    unsigned ChildGeneration;
+    DomTreeNode *Node;
+    DomTreeNode::iterator ChildIter;
+    DomTreeNode::iterator EndIter;
+    NodeScope Scopes;
+    bool Processed = false;
+  };
+
+  /// Wrapper class to handle memory instructions, including loads,
+  /// stores and intrinsic loads and stores defined by the target.
+  class ParseMemoryInst {
+  public:
+    ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI)
+      : Inst(Inst) {
+      if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst))
+        if (TTI.getTgtMemIntrinsic(II, Info))
+          IsTargetMemInst = true;
+    }
+
+    bool isLoad() const {
+      if (IsTargetMemInst) return Info.ReadMem;
+      return isa<LoadInst>(Inst);
+    }
+
+    bool isStore() const {
+      if (IsTargetMemInst) return Info.WriteMem;
+      return isa<StoreInst>(Inst);
+    }
+
+    bool isAtomic() const {
+      if (IsTargetMemInst)
+        return Info.Ordering != AtomicOrdering::NotAtomic;
+      return Inst->isAtomic();
+    }
+
+    bool isUnordered() const {
+      if (IsTargetMemInst)
+        return Info.isUnordered();
+
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        return LI->isUnordered();
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        return SI->isUnordered();
+      }
+      // Conservative answer
+      return !Inst->isAtomic();
+    }
+
+    bool isVolatile() const {
+      if (IsTargetMemInst)
+        return Info.IsVolatile;
+
+      if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
+        return LI->isVolatile();
+      } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+        return SI->isVolatile();
+      }
+      // Conservative answer
+      return true;
+    }
+
+    bool isInvariantLoad() const {
+      if (auto *LI = dyn_cast<LoadInst>(Inst))
+        return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr;
+      return false;
+    }
+
+    bool isMatchingMemLoc(const ParseMemoryInst &Inst) const {
+      return (getPointerOperand() == Inst.getPointerOperand() &&
+              getMatchingId() == Inst.getMatchingId());
+    }
+
+    bool isValid() const { return getPointerOperand() != nullptr; }
+
+    // For regular (non-intrinsic) loads/stores, this is set to -1. For
+    // intrinsic loads/stores, the id is retrieved from the corresponding
+    // field in the MemIntrinsicInfo structure.  That field contains
+    // non-negative values only.
+    int getMatchingId() const {
+      if (IsTargetMemInst) return Info.MatchingId;
+      return -1;
+    }
+
+    Value *getPointerOperand() const {
+      if (IsTargetMemInst) return Info.PtrVal;
+      return getLoadStorePointerOperand(Inst);
+    }
+
+    bool mayReadFromMemory() const {
+      if (IsTargetMemInst) return Info.ReadMem;
+      return Inst->mayReadFromMemory();
+    }
+
+    bool mayWriteToMemory() const {
+      if (IsTargetMemInst) return Info.WriteMem;
+      return Inst->mayWriteToMemory();
+    }
+
+  private:
+    bool IsTargetMemInst = false;
+    MemIntrinsicInfo Info;
+    Instruction *Inst;
+  };
+
+  bool processNode(DomTreeNode *Node);
+
+  bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI,
+                             const BasicBlock *BB, const BasicBlock *Pred);
+
+  Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const {
+    if (auto *LI = dyn_cast<LoadInst>(Inst))
+      return LI;
+    if (auto *SI = dyn_cast<StoreInst>(Inst))
+      return SI->getValueOperand();
+    assert(isa<IntrinsicInst>(Inst) && "Instruction not supported");
+    return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
+                                                 ExpectedType);
+  }
+
+  /// Return true if the instruction is known to only operate on memory
+  /// provably invariant in the given "generation".
+  bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt);
+
+  bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
+                           Instruction *EarlierInst, Instruction *LaterInst);
+
+  void removeMSSA(Instruction *Inst) {
+    if (!MSSA)
+      return;
+    if (VerifyMemorySSA)
+      MSSA->verifyMemorySSA();
+    // Removing a store here can leave MemorySSA in an unoptimized state by
+    // creating MemoryPhis that have identical arguments and by creating
+    // MemoryUses whose defining access is not an actual clobber. The phi case
+    // is handled by MemorySSA when passing OptimizePhis = true to
+    // removeMemoryAccess.  The non-optimized MemoryUse case is lazily updated
+    // by MemorySSA's getClobberingMemoryAccess.
+    MSSAUpdater->removeMemoryAccess(Inst, true);
+  }
+};
+
+} // end anonymous namespace
+
+/// Determine if the memory referenced by LaterInst is from the same heap
+/// version as EarlierInst.
+/// This is currently called in two scenarios:
+///
+///   load p
+///   ...
+///   load p
+///
+/// and
+///
+///   x = load p
+///   ...
+///   store x, p
+///
+/// in both cases we want to verify that there are no possible writes to the
+/// memory referenced by p between the earlier and later instruction.
+bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
+                                   unsigned LaterGeneration,
+                                   Instruction *EarlierInst,
+                                   Instruction *LaterInst) {
+  // Check the simple memory generation tracking first.
+  if (EarlierGeneration == LaterGeneration)
+    return true;
+
+  if (!MSSA)
+    return false;
+
+  // If MemorySSA has determined that one of EarlierInst or LaterInst does not
+  // read/write memory, then we can safely return true here.
+  // FIXME: We could be more aggressive when checking doesNotAccessMemory(),
+  // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass
+  // by also checking the MemorySSA MemoryAccess on the instruction.  Initial
+  // experiments suggest this isn't worthwhile, at least for C/C++ code compiled
+  // with the default optimization pipeline.
+  auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst);
+  if (!EarlierMA)
+    return true;
+  auto *LaterMA = MSSA->getMemoryAccess(LaterInst);
+  if (!LaterMA)
+    return true;
+
+  // Since we know LaterDef dominates LaterInst and EarlierInst dominates
+  // LaterInst, if LaterDef dominates EarlierInst then it can't occur between
+  // EarlierInst and LaterInst and neither can any other write that potentially
+  // clobbers LaterInst.
+  MemoryAccess *LaterDef;
+  if (ClobberCounter < EarlyCSEMssaOptCap) {
+    LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
+    ClobberCounter++;
+  } else
+    LaterDef = LaterMA->getDefiningAccess();
+
+  return MSSA->dominates(LaterDef, EarlierMA);
+}
+
+bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) {
+  // A location loaded from with an invariant_load is assumed to *never* change
+  // within the visible scope of the compilation.
+  if (auto *LI = dyn_cast<LoadInst>(I))
+    if (LI->getMetadata(LLVMContext::MD_invariant_load))
+      return true;
+
+  auto MemLocOpt = MemoryLocation::getOrNone(I);
+  if (!MemLocOpt)
+    // "target" intrinsic forms of loads aren't currently known to
+    // MemoryLocation::get.  TODO
+    return false;
+  MemoryLocation MemLoc = *MemLocOpt;
+  if (!AvailableInvariants.count(MemLoc))
+    return false;
+
+  // Is the generation at which this became invariant older than the
+  // current one?
+  return AvailableInvariants.lookup(MemLoc) <= GenAt;
+}
+
+bool EarlyCSE::handleBranchCondition(Instruction *CondInst,
+                                     const BranchInst *BI, const BasicBlock *BB,
+                                     const BasicBlock *Pred) {
+  assert(BI->isConditional() && "Should be a conditional branch!");
+  assert(BI->getCondition() == CondInst && "Wrong condition?");
+  assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB);
+  auto *TorF = (BI->getSuccessor(0) == BB)
+                   ? ConstantInt::getTrue(BB->getContext())
+                   : ConstantInt::getFalse(BB->getContext());
+  auto MatchBinOp = [](Instruction *I, unsigned Opcode) {
+    if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I))
+      return BOp->getOpcode() == Opcode;
+    return false;
+  };
+  // If the condition is AND operation, we can propagate its operands into the
+  // true branch. If it is OR operation, we can propagate them into the false
+  // branch.
+  unsigned PropagateOpcode =
+      (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or;
+
+  bool MadeChanges = false;
+  SmallVector<Instruction *, 4> WorkList;
+  SmallPtrSet<Instruction *, 4> Visited;
+  WorkList.push_back(CondInst);
+  while (!WorkList.empty()) {
+    Instruction *Curr = WorkList.pop_back_val();
+
+    AvailableValues.insert(Curr, TorF);
+    LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '"
+                      << Curr->getName() << "' as " << *TorF << " in "
+                      << BB->getName() << "\n");
+    if (!DebugCounter::shouldExecute(CSECounter)) {
+      LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
+    } else {
+      // Replace all dominated uses with the known value.
+      if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT,
+                                                    BasicBlockEdge(Pred, BB))) {
+        NumCSECVP += Count;
+        MadeChanges = true;
+      }
+    }
+
+    if (MatchBinOp(Curr, PropagateOpcode))
+      for (auto &Op : cast<BinaryOperator>(Curr)->operands())
+        if (Instruction *OPI = dyn_cast<Instruction>(Op))
+          if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second)
+            WorkList.push_back(OPI);
+  }
+
+  return MadeChanges;
+}
+
+bool EarlyCSE::processNode(DomTreeNode *Node) {
+  bool Changed = false;
+  BasicBlock *BB = Node->getBlock();
+
+  // If this block has a single predecessor, then the predecessor is the parent
+  // of the domtree node and all of the live out memory values are still current
+  // in this block.  If this block has multiple predecessors, then they could
+  // have invalidated the live-out memory values of our parent value.  For now,
+  // just be conservative and invalidate memory if this block has multiple
+  // predecessors.
+  if (!BB->getSinglePredecessor())
+    ++CurrentGeneration;
+
+  // If this node has a single predecessor which ends in a conditional branch,
+  // we can infer the value of the branch condition given that we took this
+  // path.  We need the single predecessor to ensure there's not another path
+  // which reaches this block where the condition might hold a different
+  // value.  Since we're adding this to the scoped hash table (like any other
+  // def), it will have been popped if we encounter a future merge block.
+  if (BasicBlock *Pred = BB->getSinglePredecessor()) {
+    auto *BI = dyn_cast<BranchInst>(Pred->getTerminator());
+    if (BI && BI->isConditional()) {
+      auto *CondInst = dyn_cast<Instruction>(BI->getCondition());
+      if (CondInst && SimpleValue::canHandle(CondInst))
+        Changed |= handleBranchCondition(CondInst, BI, BB, Pred);
+    }
+  }
+
+  /// LastStore - Keep track of the last non-volatile store that we saw... for
+  /// as long as there in no instruction that reads memory.  If we see a store
+  /// to the same location, we delete the dead store.  This zaps trivial dead
+  /// stores which can occur in bitfield code among other things.
+  Instruction *LastStore = nullptr;
+
+  // See if any instructions in the block can be eliminated.  If so, do it.  If
+  // not, add them to AvailableValues.
+  for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) {
+    Instruction *Inst = &*I++;
+
+    // Dead instructions should just be removed.
+    if (isInstructionTriviallyDead(Inst, &TLI)) {
+      LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
+      if (!DebugCounter::shouldExecute(CSECounter)) {
+        LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
+        continue;
+      }
+      if (!salvageDebugInfo(*Inst))
+        replaceDbgUsesWithUndef(Inst);
+      removeMSSA(Inst);
+      Inst->eraseFromParent();
+      Changed = true;
+      ++NumSimplify;
+      continue;
+    }
+
+    // Skip assume intrinsics, they don't really have side effects (although
+    // they're marked as such to ensure preservation of control dependencies),
+    // and this pass will not bother with its removal. However, we should mark
+    // its condition as true for all dominated blocks.
+    if (match(Inst, m_Intrinsic<Intrinsic::assume>())) {
+      auto *CondI =
+          dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0));
+      if (CondI && SimpleValue::canHandle(CondI)) {
+        LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst
+                          << '\n');
+        AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+      } else
+        LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n');
+      continue;
+    }
+
+    // Skip sideeffect intrinsics, for the same reason as assume intrinsics.
+    if (match(Inst, m_Intrinsic<Intrinsic::sideeffect>())) {
+      LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst << '\n');
+      continue;
+    }
+
+    // We can skip all invariant.start intrinsics since they only read memory,
+    // and we can forward values across it. For invariant starts without
+    // invariant ends, we can use the fact that the invariantness never ends to
+    // start a scope in the current generaton which is true for all future
+    // generations.  Also, we dont need to consume the last store since the
+    // semantics of invariant.start allow us to perform   DSE of the last
+    // store, if there was a store following invariant.start. Consider:
+    //
+    // store 30, i8* p
+    // invariant.start(p)
+    // store 40, i8* p
+    // We can DSE the store to 30, since the store 40 to invariant location p
+    // causes undefined behaviour.
+    if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) {
+      // If there are any uses, the scope might end.
+      if (!Inst->use_empty())
+        continue;
+      auto *CI = cast<CallInst>(Inst);
+      MemoryLocation MemLoc = MemoryLocation::getForArgument(CI, 1, TLI);
+      // Don't start a scope if we already have a better one pushed
+      if (!AvailableInvariants.count(MemLoc))
+        AvailableInvariants.insert(MemLoc, CurrentGeneration);
+      continue;
+    }
+
+    if (isGuard(Inst)) {
+      if (auto *CondI =
+              dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) {
+        if (SimpleValue::canHandle(CondI)) {
+          // Do we already know the actual value of this condition?
+          if (auto *KnownCond = AvailableValues.lookup(CondI)) {
+            // Is the condition known to be true?
+            if (isa<ConstantInt>(KnownCond) &&
+                cast<ConstantInt>(KnownCond)->isOne()) {
+              LLVM_DEBUG(dbgs()
+                         << "EarlyCSE removing guard: " << *Inst << '\n');
+              removeMSSA(Inst);
+              Inst->eraseFromParent();
+              Changed = true;
+              continue;
+            } else
+              // Use the known value if it wasn't true.
+              cast<CallInst>(Inst)->setArgOperand(0, KnownCond);
+          }
+          // The condition we're on guarding here is true for all dominated
+          // locations.
+          AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext()));
+        }
+      }
+
+      // Guard intrinsics read all memory, but don't write any memory.
+      // Accordingly, don't update the generation but consume the last store (to
+      // avoid an incorrect DSE).
+      LastStore = nullptr;
+      continue;
+    }
+
+    // If the instruction can be simplified (e.g. X+0 = X) then replace it with
+    // its simpler value.
+    if (Value *V = SimplifyInstruction(Inst, SQ)) {
+      LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << "  to: " << *V
+                        << '\n');
+      if (!DebugCounter::shouldExecute(CSECounter)) {
+        LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
+      } else {
+        bool Killed = false;
+        if (!Inst->use_empty()) {
+          Inst->replaceAllUsesWith(V);
+          Changed = true;
+        }
+        if (isInstructionTriviallyDead(Inst, &TLI)) {
+          removeMSSA(Inst);
+          Inst->eraseFromParent();
+          Changed = true;
+          Killed = true;
+        }
+        if (Changed)
+          ++NumSimplify;
+        if (Killed)
+          continue;
+      }
+    }
+
+    // If this is a simple instruction that we can value number, process it.
+    if (SimpleValue::canHandle(Inst)) {
+      // See if the instruction has an available value.  If so, use it.
+      if (Value *V = AvailableValues.lookup(Inst)) {
+        LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << "  to: " << *V
+                          << '\n');
+        if (!DebugCounter::shouldExecute(CSECounter)) {
+          LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
+          continue;
+        }
+        if (auto *I = dyn_cast<Instruction>(V))
+          I->andIRFlags(Inst);
+        Inst->replaceAllUsesWith(V);
+        removeMSSA(Inst);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSE;
+        continue;
+      }
+
+      // Otherwise, just remember that this value is available.
+      AvailableValues.insert(Inst, Inst);
+      continue;
+    }
+
+    ParseMemoryInst MemInst(Inst, TTI);
+    // If this is a non-volatile load, process it.
+    if (MemInst.isValid() && MemInst.isLoad()) {
+      // (conservatively) we can't peak past the ordering implied by this
+      // operation, but we can add this load to our set of available values
+      if (MemInst.isVolatile() || !MemInst.isUnordered()) {
+        LastStore = nullptr;
+        ++CurrentGeneration;
+      }
+
+      if (MemInst.isInvariantLoad()) {
+        // If we pass an invariant load, we know that memory location is
+        // indefinitely constant from the moment of first dereferenceability.
+        // We conservatively treat the invariant_load as that moment.  If we
+        // pass a invariant load after already establishing a scope, don't
+        // restart it since we want to preserve the earliest point seen.
+        auto MemLoc = MemoryLocation::get(Inst);
+        if (!AvailableInvariants.count(MemLoc))
+          AvailableInvariants.insert(MemLoc, CurrentGeneration);
+      }
+
+      // If we have an available version of this load, and if it is the right
+      // generation or the load is known to be from an invariant location,
+      // replace this instruction.
+      //
+      // If either the dominating load or the current load are invariant, then
+      // we can assume the current load loads the same value as the dominating
+      // load.
+      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+      if (InVal.DefInst != nullptr &&
+          InVal.MatchingId == MemInst.getMatchingId() &&
+          // We don't yet handle removing loads with ordering of any kind.
+          !MemInst.isVolatile() && MemInst.isUnordered() &&
+          // We can't replace an atomic load with one which isn't also atomic.
+          InVal.IsAtomic >= MemInst.isAtomic() &&
+          (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
+           isSameMemGeneration(InVal.Generation, CurrentGeneration,
+                               InVal.DefInst, Inst))) {
+        Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
+        if (Op != nullptr) {
+          LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
+                            << "  to: " << *InVal.DefInst << '\n');
+          if (!DebugCounter::shouldExecute(CSECounter)) {
+            LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
+            continue;
+          }
+          if (!Inst->use_empty())
+            Inst->replaceAllUsesWith(Op);
+          removeMSSA(Inst);
+          Inst->eraseFromParent();
+          Changed = true;
+          ++NumCSELoad;
+          continue;
+        }
+      }
+
+      // Otherwise, remember that we have this instruction.
+      AvailableLoads.insert(
+          MemInst.getPointerOperand(),
+          LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+                    MemInst.isAtomic()));
+      LastStore = nullptr;
+      continue;
+    }
+
+    // If this instruction may read from memory or throw (and potentially read
+    // from memory in the exception handler), forget LastStore.  Load/store
+    // intrinsics will indicate both a read and a write to memory.  The target
+    // may override this (e.g. so that a store intrinsic does not read from
+    // memory, and thus will be treated the same as a regular store for
+    // commoning purposes).
+    if ((Inst->mayReadFromMemory() || Inst->mayThrow()) &&
+        !(MemInst.isValid() && !MemInst.mayReadFromMemory()))
+      LastStore = nullptr;
+
+    // If this is a read-only call, process it.
+    if (CallValue::canHandle(Inst)) {
+      // If we have an available version of this call, and if it is the right
+      // generation, replace this instruction.
+      std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
+      if (InVal.first != nullptr &&
+          isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
+                              Inst)) {
+        LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
+                          << "  to: " << *InVal.first << '\n');
+        if (!DebugCounter::shouldExecute(CSECounter)) {
+          LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
+          continue;
+        }
+        if (!Inst->use_empty())
+          Inst->replaceAllUsesWith(InVal.first);
+        removeMSSA(Inst);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumCSECall;
+        continue;
+      }
+
+      // Otherwise, remember that we have this instruction.
+      AvailableCalls.insert(
+          Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration));
+      continue;
+    }
+
+    // A release fence requires that all stores complete before it, but does
+    // not prevent the reordering of following loads 'before' the fence.  As a
+    // result, we don't need to consider it as writing to memory and don't need
+    // to advance the generation.  We do need to prevent DSE across the fence,
+    // but that's handled above.
+    if (FenceInst *FI = dyn_cast<FenceInst>(Inst))
+      if (FI->getOrdering() == AtomicOrdering::Release) {
+        assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above");
+        continue;
+      }
+
+    // write back DSE - If we write back the same value we just loaded from
+    // the same location and haven't passed any intervening writes or ordering
+    // operations, we can remove the write.  The primary benefit is in allowing
+    // the available load table to remain valid and value forward past where
+    // the store originally was.
+    if (MemInst.isValid() && MemInst.isStore()) {
+      LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
+      if (InVal.DefInst &&
+          InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
+          InVal.MatchingId == MemInst.getMatchingId() &&
+          // We don't yet handle removing stores with ordering of any kind.
+          !MemInst.isVolatile() && MemInst.isUnordered() &&
+          (isOperatingOnInvariantMemAt(Inst, InVal.Generation) ||
+           isSameMemGeneration(InVal.Generation, CurrentGeneration,
+                               InVal.DefInst, Inst))) {
+        // It is okay to have a LastStore to a different pointer here if MemorySSA
+        // tells us that the load and store are from the same memory generation.
+        // In that case, LastStore should keep its present value since we're
+        // removing the current store.
+        assert((!LastStore ||
+                ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
+                    MemInst.getPointerOperand() ||
+                MSSA) &&
+               "can't have an intervening store if not using MemorySSA!");
+        LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
+        if (!DebugCounter::shouldExecute(CSECounter)) {
+          LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
+          continue;
+        }
+        removeMSSA(Inst);
+        Inst->eraseFromParent();
+        Changed = true;
+        ++NumDSE;
+        // We can avoid incrementing the generation count since we were able
+        // to eliminate this store.
+        continue;
+      }
+    }
+
+    // Okay, this isn't something we can CSE at all.  Check to see if it is
+    // something that could modify memory.  If so, our available memory values
+    // cannot be used so bump the generation count.
+    if (Inst->mayWriteToMemory()) {
+      ++CurrentGeneration;
+
+      if (MemInst.isValid() && MemInst.isStore()) {
+        // We do a trivial form of DSE if there are two stores to the same
+        // location with no intervening loads.  Delete the earlier store.
+        // At the moment, we don't remove ordered stores, but do remove
+        // unordered atomic stores.  There's no special requirement (for
+        // unordered atomics) about removing atomic stores only in favor of
+        // other atomic stores since we were going to execute the non-atomic
+        // one anyway and the atomic one might never have become visible.
+        if (LastStore) {
+          ParseMemoryInst LastStoreMemInst(LastStore, TTI);
+          assert(LastStoreMemInst.isUnordered() &&
+                 !LastStoreMemInst.isVolatile() &&
+                 "Violated invariant");
+          if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
+            LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
+                              << "  due to: " << *Inst << '\n');
+            if (!DebugCounter::shouldExecute(CSECounter)) {
+              LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n");
+            } else {
+              removeMSSA(LastStore);
+              LastStore->eraseFromParent();
+              Changed = true;
+              ++NumDSE;
+              LastStore = nullptr;
+            }
+          }
+          // fallthrough - we can exploit information about this store
+        }
+
+        // Okay, we just invalidated anything we knew about loaded values.  Try
+        // to salvage *something* by remembering that the stored value is a live
+        // version of the pointer.  It is safe to forward from volatile stores
+        // to non-volatile loads, so we don't have to check for volatility of
+        // the store.
+        AvailableLoads.insert(
+            MemInst.getPointerOperand(),
+            LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(),
+                      MemInst.isAtomic()));
+
+        // Remember that this was the last unordered store we saw for DSE. We
+        // don't yet handle DSE on ordered or volatile stores since we don't
+        // have a good way to model the ordering requirement for following
+        // passes  once the store is removed.  We could insert a fence, but
+        // since fences are slightly stronger than stores in their ordering,
+        // it's not clear this is a profitable transform. Another option would
+        // be to merge the ordering with that of the post dominating store.
+        if (MemInst.isUnordered() && !MemInst.isVolatile())
+          LastStore = Inst;
+        else
+          LastStore = nullptr;
+      }
+    }
+  }
+
+  return Changed;
+}
+
+bool EarlyCSE::run() {
+  // Note, deque is being used here because there is significant performance
+  // gains over vector when the container becomes very large due to the
+  // specific access patterns. For more information see the mailing list
+  // discussion on this:
+  // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html
+  std::deque<StackNode *> nodesToProcess;
+
+  bool Changed = false;
+
+  // Process the root node.
+  nodesToProcess.push_back(new StackNode(
+      AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls,
+      CurrentGeneration, DT.getRootNode(),
+      DT.getRootNode()->begin(), DT.getRootNode()->end()));
+
+  assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it.");
+
+  // Process the stack.
+  while (!nodesToProcess.empty()) {
+    // Grab the first item off the stack. Set the current generation, remove
+    // the node from the stack, and process it.
+    StackNode *NodeToProcess = nodesToProcess.back();
+
+    // Initialize class members.
+    CurrentGeneration = NodeToProcess->currentGeneration();
+
+    // Check if the node needs to be processed.
+    if (!NodeToProcess->isProcessed()) {
+      // Process the node.
+      Changed |= processNode(NodeToProcess->node());
+      NodeToProcess->childGeneration(CurrentGeneration);
+      NodeToProcess->process();
+    } else if (NodeToProcess->childIter() != NodeToProcess->end()) {
+      // Push the next child onto the stack.
+      DomTreeNode *child = NodeToProcess->nextChild();
+      nodesToProcess.push_back(
+          new StackNode(AvailableValues, AvailableLoads, AvailableInvariants,
+                        AvailableCalls, NodeToProcess->childGeneration(),
+                        child, child->begin(), child->end()));
+    } else {
+      // It has been processed, and there are no more children to process,
+      // so delete it and pop it off the stack.
+      delete NodeToProcess;
+      nodesToProcess.pop_back();
+    }
+  } // while (!nodes...)
+
+  return Changed;
+}
+
+PreservedAnalyses EarlyCSEPass::run(Function &F,
+                                    FunctionAnalysisManager &AM) {
+  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
+  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+  auto &AC = AM.getResult<AssumptionAnalysis>(F);
+  auto *MSSA =
+      UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
+
+  EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
+
+  if (!CSE.run())
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserveSet<CFGAnalyses>();
+  PA.preserve<GlobalsAA>();
+  if (UseMemorySSA)
+    PA.preserve<MemorySSAAnalysis>();
+  return PA;
+}
+
+template<bool UseMemorySSA>
+bool EarlyCSELegacyCommonPass<UseMemorySSA>::runOnFunction(Function &F) {
+    if (skipFunction(F))
+      return false;
+
+    auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
+    auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+    auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+    auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+    auto *MSSA =
+        UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
+
+    EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA);
+
+    return CSE.run();
+  }
+
+using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
+
+template<>
+char EarlyCSELegacyPass::ID = 0;
+
+INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
+
+using EarlyCSEMemSSALegacyPass =
+    EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
+
+template<>
+char EarlyCSEMemSSALegacyPass::ID = 0;
+
+FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
+  if (UseMemorySSA)
+    return new EarlyCSEMemSSALegacyPass();
+  else
+    return new EarlyCSELegacyPass();
+}
+
+INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
+                      "Early CSE w/ MemorySSA", false, false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
+INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
+                    "Early CSE w/ MemorySSA", false, false)
diff --git a/hpvm/llvm_patches/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/hpvm/llvm_patches/lib/Transforms/Scalar/SimplifyCFGPass.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..d7004dbbfb445b261711e84e2815176b8cd1d383
--- /dev/null
+++ b/hpvm/llvm_patches/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -0,0 +1,272 @@
+//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements dead code elimination and basic block merging, along
+// with a collection of other peephole control flow optimizations.  For example:
+//
+//   * Removes basic blocks with no predecessors.
+//   * Merges a basic block into its predecessor if there is only one and the
+//     predecessor only has one successor.
+//   * Eliminates PHI nodes for basic blocks with a single predecessor.
+//   * Eliminates a basic block that only contains an unconditional branch.
+//   * Changes invoke instructions to nounwind functions to be calls.
+//   * Change things like "if (x) if (y)" into "if (x&y)".
+//   * etc..
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/IR/Attributes.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Pass.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Transforms/Scalar.h"
+#include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include <utility>
+using namespace llvm;
+
+#define DEBUG_TYPE "simplifycfg"
+
+static cl::opt<unsigned> UserBonusInstThreshold(
+    "bonus-inst-threshold", cl::Hidden, cl::init(1),
+    cl::desc("Control the number of bonus instructions (default = 1)"));
+
+static cl::opt<bool> UserKeepLoops(
+    "keep-loops", cl::Hidden, cl::init(true),
+    cl::desc("Preserve canonical loop structure (default = true)"));
+
+static cl::opt<bool> UserSwitchToLookup(
+    "switch-to-lookup", cl::Hidden, cl::init(false),
+    cl::desc("Convert switches to lookup tables (default = false)"));
+
+static cl::opt<bool> UserForwardSwitchCond(
+    "forward-switch-cond", cl::Hidden, cl::init(false),
+    cl::desc("Forward switch condition to phi ops (default = false)"));
+
+static cl::opt<bool> UserSinkCommonInsts(
+    "sink-common-insts", cl::Hidden, cl::init(false),
+    cl::desc("Sink common instructions (default = false)"));
+
+
+STATISTIC(NumSimpl, "Number of blocks simplified");
+
+/// If we have more than one empty (other than phi node) return blocks,
+/// merge them together to promote recursive block merging.
+static bool mergeEmptyReturnBlocks(Function &F) {
+  bool Changed = false;
+
+  BasicBlock *RetBlock = nullptr;
+
+  // Scan all the blocks in the function, looking for empty return blocks.
+  for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) {
+    BasicBlock &BB = *BBI++;
+
+    // Only look at return blocks.
+    ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator());
+    if (!Ret) continue;
+
+    // Only look at the block if it is empty or the only other thing in it is a
+    // single PHI node that is the operand to the return.
+    if (Ret != &BB.front()) {
+      // Check for something else in the block.
+      BasicBlock::iterator I(Ret);
+      --I;
+      // Skip over debug info.
+      while (isa<DbgInfoIntrinsic>(I) && I != BB.begin())
+        --I;
+      if (!isa<DbgInfoIntrinsic>(I) &&
+          (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 ||
+           Ret->getOperand(0) != &*I))
+        continue;
+    }
+
+    // If this is the first returning block, remember it and keep going.
+    if (!RetBlock) {
+      RetBlock = &BB;
+      continue;
+    }
+
+    // Otherwise, we found a duplicate return block.  Merge the two.
+    Changed = true;
+
+    // Case when there is no input to the return or when the returned values
+    // agree is trivial.  Note that they can't agree if there are phis in the
+    // blocks.
+    if (Ret->getNumOperands() == 0 ||
+        Ret->getOperand(0) ==
+          cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) {
+      BB.replaceAllUsesWith(RetBlock);
+      BB.eraseFromParent();
+      continue;
+    }
+
+    // If the canonical return block has no PHI node, create one now.
+    PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin());
+    if (!RetBlockPHI) {
+      Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0);
+      pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock);
+      RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(),
+                                    std::distance(PB, PE), "merge",
+                                    &RetBlock->front());
+
+      for (pred_iterator PI = PB; PI != PE; ++PI)
+        RetBlockPHI->addIncoming(InVal, *PI);
+      RetBlock->getTerminator()->setOperand(0, RetBlockPHI);
+    }
+
+    // Turn BB into a block that just unconditionally branches to the return
+    // block.  This handles the case when the two return blocks have a common
+    // predecessor but that return different things.
+    RetBlockPHI->addIncoming(Ret->getOperand(0), &BB);
+    BB.getTerminator()->eraseFromParent();
+    BranchInst::Create(RetBlock, &BB);
+  }
+
+  return Changed;
+}
+
+/// Call SimplifyCFG on all the blocks in the function,
+/// iterating until no more changes are made.
+static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI,
+                                   const SimplifyCFGOptions &Options) {
+  bool Changed = false;
+  bool LocalChange = true;
+
+  SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges;
+  FindFunctionBackedges(F, Edges);
+  SmallPtrSet<BasicBlock *, 16> LoopHeaders;
+  for (unsigned i = 0, e = Edges.size(); i != e; ++i)
+    LoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second));
+
+  while (LocalChange) {
+    LocalChange = false;
+
+    // Loop over all of the basic blocks and remove them if they are unneeded.
+    for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) {
+      if (simplifyCFG(&*BBIt++, TTI, Options, &LoopHeaders)) {
+        LocalChange = true;
+        ++NumSimpl;
+      }
+    }
+    Changed |= LocalChange;
+  }
+  return Changed;
+}
+
+bool llvm::simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
+                                const SimplifyCFGOptions &Options) {
+  bool EverChanged = removeUnreachableBlocks(F);
+  EverChanged |= mergeEmptyReturnBlocks(F);
+  EverChanged |= iterativelySimplifyCFG(F, TTI, Options);
+
+  // If neither pass changed anything, we're done.
+  if (!EverChanged) return false;
+
+  // iterativelySimplifyCFG can (rarely) make some loops dead.  If this happens,
+  // removeUnreachableBlocks is needed to nuke them, which means we should
+  // iterate between the two optimizations.  We structure the code like this to
+  // avoid rerunning iterativelySimplifyCFG if the second pass of
+  // removeUnreachableBlocks doesn't do anything.
+  if (!removeUnreachableBlocks(F))
+    return true;
+
+  do {
+    EverChanged = iterativelySimplifyCFG(F, TTI, Options);
+    EverChanged |= removeUnreachableBlocks(F);
+  } while (EverChanged);
+
+  return true;
+}
+
+// Command-line settings override compile-time settings.
+SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) {
+  Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences()
+                                   ? UserBonusInstThreshold
+                                   : Opts.BonusInstThreshold;
+  Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences()
+                                       ? UserForwardSwitchCond
+                                       : Opts.ForwardSwitchCondToPhi;
+  Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences()
+                                           ? UserSwitchToLookup
+                                           : Opts.ConvertSwitchToLookupTable;
+  Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
+                                  ? UserKeepLoops
+                                  : Opts.NeedCanonicalLoop;
+  Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
+                                ? UserSinkCommonInsts
+                                : Opts.SinkCommonInsts;
+}
+
+PreservedAnalyses SimplifyCFGPass::run(Function &F,
+                                       FunctionAnalysisManager &AM) {
+  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+  Options.AC = &AM.getResult<AssumptionAnalysis>(F);
+  if (!simplifyFunctionCFG(F, TTI, Options))
+    return PreservedAnalyses::all();
+  PreservedAnalyses PA;
+  PA.preserve<GlobalsAA>();
+  return PA;
+}
+
+CFGSimplifyPass::CFGSimplifyPass(unsigned Threshold, bool ForwardSwitchCond,
+                  bool ConvertSwitch, bool KeepLoops,
+                  bool SinkCommon,
+                  std::function<bool(const Function &)> Ftor)
+      : FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
+
+    initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
+
+    // Check for command-line overrides of options for debug/customization.
+    Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences()
+                                    ? UserBonusInstThreshold
+                                    : Threshold;
+
+    Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences()
+                                         ? UserForwardSwitchCond
+                                         : ForwardSwitchCond;
+
+    Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences()
+                                             ? UserSwitchToLookup
+                                             : ConvertSwitch;
+
+    Options.NeedCanonicalLoop =
+        UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops;
+
+    Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
+                                  ? UserSinkCommonInsts
+                                  : SinkCommon;
+  }
+char CFGSimplifyPass::ID = 0;
+INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
+                      false)
+INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
+                    false)
+
+// Public interface to the CFGSimplification pass
+FunctionPass *
+llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
+                                  bool ConvertSwitch, bool KeepLoops,
+                                  bool SinkCommon,
+                                  std::function<bool(const Function &)> Ftor) {
+  return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch,
+                             KeepLoops, SinkCommon, std::move(Ftor));
+}
diff --git a/hpvm/llvm_patches/lib/Transforms/Utils/LoopSimplify.cpp b/hpvm/llvm_patches/lib/Transforms/Utils/LoopSimplify.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..614d38b58cb2742ef9b2e1ef576f78cc9f8f09c7
--- /dev/null
+++ b/hpvm/llvm_patches/lib/Transforms/Utils/LoopSimplify.cpp
@@ -0,0 +1,882 @@
+//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs several transformations to transform natural loops into a
+// simpler form, which makes subsequent analyses and transformations simpler and
+// more effective.
+//
+// Loop pre-header insertion guarantees that there is a single, non-critical
+// entry edge from outside of the loop to the loop header.  This simplifies a
+// number of analyses and transformations, such as LICM.
+//
+// Loop exit-block insertion guarantees that all exit blocks from the loop
+// (blocks which are outside of the loop that have predecessors inside of the
+// loop) only have predecessors from inside of the loop (and are thus dominated
+// by the loop header).  This simplifies transformations such as store-sinking
+// that are built into LICM.
+//
+// This pass also guarantees that loops will have exactly one backedge.
+//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
+// Similar complications arise from callbr instructions, particularly in
+// asm-goto where blockaddress expressions are used.
+//
+// Note that the simplifycfg pass will clean up blocks which are split out but
+// end up being unnecessary, so usage of this pass should not pessimize
+// generated code.
+//
+// This pass obviously modifies the CFG, but updates loop information and
+// dominator information.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/Utils/LoopSimplify.h"
+#include "llvm/ADT/DepthFirstIterator.h"
+#include "llvm/ADT/SetOperations.h"
+#include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/GlobalsModRef.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/MemorySSA.h"
+#include "llvm/Analysis/MemorySSAUpdater.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
+#include "llvm/IR/CFG.h"
+#include "llvm/IR/Constants.h"
+#include "llvm/IR/DataLayout.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/Type.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
+using namespace llvm;
+
+#define DEBUG_TYPE "loop-simplify"
+
+STATISTIC(NumNested  , "Number of nested loops split out");
+
+// If the block isn't already, move the new block to right after some 'outside
+// block' block.  This prevents the preheader from being placed inside the loop
+// body, e.g. when the loop hasn't been rotated.
+static void placeSplitBlockCarefully(BasicBlock *NewBB,
+                                     SmallVectorImpl<BasicBlock *> &SplitPreds,
+                                     Loop *L) {
+  // Check to see if NewBB is already well placed.
+  Function::iterator BBI = --NewBB->getIterator();
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    if (&*BBI == SplitPreds[i])
+      return;
+  }
+
+  // If it isn't already after an outside block, move it after one.  This is
+  // always good as it makes the uncond branch from the outside block into a
+  // fall-through.
+
+  // Figure out *which* outside block to put this after.  Prefer an outside
+  // block that neighbors a BB actually in the loop.
+  BasicBlock *FoundBB = nullptr;
+  for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) {
+    Function::iterator BBI = SplitPreds[i]->getIterator();
+    if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) {
+      FoundBB = SplitPreds[i];
+      break;
+    }
+  }
+
+  // If our heuristic for a *good* bb to place this after doesn't find
+  // anything, just pick something.  It's likely better than leaving it within
+  // the loop.
+  if (!FoundBB)
+    FoundBB = SplitPreds[0];
+  NewBB->moveAfter(FoundBB);
+}
+
+/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a
+/// preheader, this method is called to insert one.  This method has two phases:
+/// preheader insertion and analysis updating.
+///
+BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT,
+                                         LoopInfo *LI, MemorySSAUpdater *MSSAU,
+                                         bool PreserveLCSSA) {
+  BasicBlock *Header = L->getHeader();
+
+  // Compute the set of predecessors of the loop that are not in the loop.
+  SmallVector<BasicBlock*, 8> OutsideBlocks;
+  for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
+       PI != PE; ++PI) {
+    BasicBlock *P = *PI;
+    if (!L->contains(P)) {         // Coming in from outside the loop?
+      // If the loop is branched to from an indirect terminator, we won't
+      // be able to fully transform the loop, because it prohibits
+      // edge splitting.
+      if (P->getTerminator()->isIndirectTerminator())
+        return nullptr;
+
+      // Keep track of it.
+      OutsideBlocks.push_back(P);
+    }
+  }
+
+  // Split out the loop pre-header.
+  BasicBlock *PreheaderBB;
+  PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT,
+                                       LI, MSSAU, PreserveLCSSA);
+  if (!PreheaderBB)
+    return nullptr;
+
+  LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header "
+                    << PreheaderBB->getName() << "\n");
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L);
+
+  return PreheaderBB;
+}
+
+/// Add the specified block, and all of its predecessors, to the specified set,
+/// if it's not already in there.  Stop predecessor traversal when we reach
+/// StopBlock.
+static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock,
+                                  std::set<BasicBlock*> &Blocks) {
+  SmallVector<BasicBlock *, 8> Worklist;
+  Worklist.push_back(InputBB);
+  do {
+    BasicBlock *BB = Worklist.pop_back_val();
+    if (Blocks.insert(BB).second && BB != StopBlock)
+      // If BB is not already processed and it is not a stop block then
+      // insert its predecessor in the work list
+      for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) {
+        BasicBlock *WBB = *I;
+        Worklist.push_back(WBB);
+      }
+  } while (!Worklist.empty());
+}
+
+/// The first part of loop-nestification is to find a PHI node that tells
+/// us how to partition the loops.
+static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT,
+                                        AssumptionCache *AC) {
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+  for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) {
+    PHINode *PN = cast<PHINode>(I);
+    ++I;
+    if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
+      // This is a degenerate PHI already, don't modify it!
+      PN->replaceAllUsesWith(V);
+      PN->eraseFromParent();
+      continue;
+    }
+
+    // Scan this PHI node looking for a use of the PHI node by itself.
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+      if (PN->getIncomingValue(i) == PN &&
+          L->contains(PN->getIncomingBlock(i)))
+        // We found something tasty to remove.
+        return PN;
+  }
+  return nullptr;
+}
+
+/// If this loop has multiple backedges, try to pull one of them out into
+/// a nested loop.
+///
+/// This is important for code that looks like
+/// this:
+///
+///  Loop:
+///     ...
+///     br cond, Loop, Next
+///     ...
+///     br cond2, Loop, Out
+///
+/// To identify this common case, we look at the PHI nodes in the header of the
+/// loop.  PHI nodes with unchanging values on one backedge correspond to values
+/// that change in the "outer" loop, but not in the "inner" loop.
+///
+/// If we are able to separate out a loop, return the new outer loop that was
+/// created.
+///
+static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader,
+                                DominatorTree *DT, LoopInfo *LI,
+                                ScalarEvolution *SE, bool PreserveLCSSA,
+                                AssumptionCache *AC, MemorySSAUpdater *MSSAU) {
+  // Don't try to separate loops without a preheader.
+  if (!Preheader)
+    return nullptr;
+
+  // The header is not a landing pad; preheader insertion should ensure this.
+  BasicBlock *Header = L->getHeader();
+  assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
+
+  PHINode *PN = findPHIToPartitionLoops(L, DT, AC);
+  if (!PN) return nullptr;  // No known way to partition.
+
+  // Pull out all predecessors that have varying values in the loop.  This
+  // handles the case when a PHI node has multiple instances of itself as
+  // arguments.
+  SmallVector<BasicBlock*, 8> OuterLoopPreds;
+  for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+    if (PN->getIncomingValue(i) != PN ||
+        !L->contains(PN->getIncomingBlock(i))) {
+      // We can't split indirect control flow edges.
+      if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator())
+        return nullptr;
+      OuterLoopPreds.push_back(PN->getIncomingBlock(i));
+    }
+  }
+  LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
+  // If ScalarEvolution is around and knows anything about values in
+  // this loop, tell it to forget them, because we're about to
+  // substantially change it.
+  if (SE)
+    SE->forgetLoop(L);
+
+  BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer",
+                                             DT, LI, MSSAU, PreserveLCSSA);
+
+  // Make sure that NewBB is put someplace intelligent, which doesn't mess up
+  // code layout too horribly.
+  placeSplitBlockCarefully(NewBB, OuterLoopPreds, L);
+
+  // Create the new outer loop.
+  Loop *NewOuter = LI->AllocateLoop();
+
+  // Change the parent loop to use the outer loop as its child now.
+  if (Loop *Parent = L->getParentLoop())
+    Parent->replaceChildLoopWith(L, NewOuter);
+  else
+    LI->changeTopLevelLoop(L, NewOuter);
+
+  // L is now a subloop of our outer loop.
+  NewOuter->addChildLoop(L);
+
+  for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
+       I != E; ++I)
+    NewOuter->addBlockEntry(*I);
+
+  // Now reset the header in L, which had been moved by
+  // SplitBlockPredecessors for the outer loop.
+  L->moveToHeader(Header);
+
+  // Determine which blocks should stay in L and which should be moved out to
+  // the Outer loop now.
+  std::set<BasicBlock*> BlocksInL;
+  for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) {
+    BasicBlock *P = *PI;
+    if (DT->dominates(Header, P))
+      addBlockAndPredsToSet(P, Header, BlocksInL);
+  }
+
+  // Scan all of the loop children of L, moving them to OuterLoop if they are
+  // not part of the inner loop.
+  const std::vector<Loop*> &SubLoops = L->getSubLoops();
+  for (size_t I = 0; I != SubLoops.size(); )
+    if (BlocksInL.count(SubLoops[I]->getHeader()))
+      ++I;   // Loop remains in L
+    else
+      NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I));
+
+  SmallVector<BasicBlock *, 8> OuterLoopBlocks;
+  OuterLoopBlocks.push_back(NewBB);
+  // Now that we know which blocks are in L and which need to be moved to
+  // OuterLoop, move any blocks that need it.
+  for (unsigned i = 0; i != L->getBlocks().size(); ++i) {
+    BasicBlock *BB = L->getBlocks()[i];
+    if (!BlocksInL.count(BB)) {
+      // Move this block to the parent, updating the exit blocks sets
+      L->removeBlockFromLoop(BB);
+      if ((*LI)[BB] == L) {
+        LI->changeLoopFor(BB, NewOuter);
+        OuterLoopBlocks.push_back(BB);
+      }
+      --i;
+    }
+  }
+
+  // Split edges to exit blocks from the inner loop, if they emerged in the
+  // process of separating the outer one.
+  formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA);
+
+  if (PreserveLCSSA) {
+    // Fix LCSSA form for L. Some values, which previously were only used inside
+    // L, can now be used in NewOuter loop. We need to insert phi-nodes for them
+    // in corresponding exit blocks.
+    // We don't need to form LCSSA recursively, because there cannot be uses
+    // inside a newly created loop of defs from inner loops as those would
+    // already be a use of an LCSSA phi node.
+    formLCSSA(*L, *DT, LI, SE);
+
+    assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) &&
+           "LCSSA is broken after separating nested loops!");
+  }
+
+  return NewOuter;
+}
+
+/// This method is called when the specified loop has more than one
+/// backedge in it.
+///
+/// If this occurs, revector all of these backedges to target a new basic block
+/// and have that block branch to the loop header.  This ensures that loops
+/// have exactly one backedge.
+static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader,
+                                             DominatorTree *DT, LoopInfo *LI,
+                                             MemorySSAUpdater *MSSAU) {
+  assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
+
+  // Get information about the loop
+  BasicBlock *Header = L->getHeader();
+  Function *F = Header->getParent();
+
+  // Unique backedge insertion currently depends on having a preheader.
+  if (!Preheader)
+    return nullptr;
+
+  // The header is not an EH pad; preheader insertion should ensure this.
+  assert(!Header->isEHPad() && "Can't insert backedge to EH pad");
+
+  // Figure out which basic blocks contain back-edges to the loop header.
+  std::vector<BasicBlock*> BackedgeBlocks;
+  for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){
+    BasicBlock *P = *I;
+
+    // Indirect edges cannot be split, so we must fail if we find one.
+    if (P->getTerminator()->isIndirectTerminator())
+      return nullptr;
+
+    if (P != Preheader) BackedgeBlocks.push_back(P);
+  }
+
+  // Create and insert the new backedge block...
+  BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(),
+                                           Header->getName() + ".backedge", F);
+  BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+  BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc());
+
+  LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block "
+                    << BEBlock->getName() << "\n");
+
+  // Move the new backedge block to right after the last backedge block.
+  Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator();
+  F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
+
+  // Now that the block has been inserted into the function, create PHI nodes in
+  // the backedge block which correspond to any PHI nodes in the header block.
+  for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) {
+    PHINode *PN = cast<PHINode>(I);
+    PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(),
+                                     PN->getName()+".be", BETerminator);
+
+    // Loop over the PHI node, moving all entries except the one for the
+    // preheader over to the new PHI node.
+    unsigned PreheaderIdx = ~0U;
+    bool HasUniqueIncomingValue = true;
+    Value *UniqueValue = nullptr;
+    for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+      BasicBlock *IBB = PN->getIncomingBlock(i);
+      Value *IV = PN->getIncomingValue(i);
+      if (IBB == Preheader) {
+        PreheaderIdx = i;
+      } else {
+        NewPN->addIncoming(IV, IBB);
+        if (HasUniqueIncomingValue) {
+          if (!UniqueValue)
+            UniqueValue = IV;
+          else if (UniqueValue != IV)
+            HasUniqueIncomingValue = false;
+        }
+      }
+    }
+
+    // Delete all of the incoming values from the old PN except the preheader's
+    assert(PreheaderIdx != ~0U && "PHI has no preheader entry??");
+    if (PreheaderIdx != 0) {
+      PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx));
+      PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx));
+    }
+    // Nuke all entries except the zero'th.
+    for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i)
+      PN->removeIncomingValue(e-i, false);
+
+    // Finally, add the newly constructed PHI node as the entry for the BEBlock.
+    PN->addIncoming(NewPN, BEBlock);
+
+    // As an optimization, if all incoming values in the new PhiNode (which is a
+    // subset of the incoming values of the old PHI node) have the same value,
+    // eliminate the PHI Node.
+    if (HasUniqueIncomingValue) {
+      NewPN->replaceAllUsesWith(UniqueValue);
+      BEBlock->getInstList().erase(NewPN);
+    }
+  }
+
+  // Now that all of the PHI nodes have been inserted and adjusted, modify the
+  // backedge blocks to jump to the BEBlock instead of the header.
+  // If one of the backedges has llvm.loop metadata attached, we remove
+  // it from the backedge and add it to BEBlock.
+  unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop");
+  MDNode *LoopMD = nullptr;
+  for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) {
+    Instruction *TI = BackedgeBlocks[i]->getTerminator();
+    if (!LoopMD)
+      LoopMD = TI->getMetadata(LoopMDKind);
+    TI->setMetadata(LoopMDKind, nullptr);
+    TI->replaceSuccessorWith(Header, BEBlock);
+  }
+  BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD);
+
+  //===--- Update all analyses which we must preserve now -----------------===//
+
+  // Update Loop Information - we know that this block is now in the current
+  // loop and all parent loops.
+  L->addBasicBlockToLoop(BEBlock, *LI);
+
+  // Update dominator information
+  DT->splitBlock(BEBlock);
+
+  if (MSSAU)
+    MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader,
+                                                      BEBlock);
+
+  return BEBlock;
+}
+
+/// Simplify one loop and queue further loops for simplification.
+static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist,
+                            DominatorTree *DT, LoopInfo *LI,
+                            ScalarEvolution *SE, AssumptionCache *AC,
+                            MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
+  bool Changed = false;
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
+ReprocessLoop:
+
+  // Check to see that no blocks (other than the header) in this loop have
+  // predecessors that are not in the loop.  This is not valid for natural
+  // loops, but can occur if the blocks are unreachable.  Since they are
+  // unreachable we can just shamelessly delete those CFG edges!
+  for (Loop::block_iterator BB = L->block_begin(), E = L->block_end();
+       BB != E; ++BB) {
+    if (*BB == L->getHeader()) continue;
+
+    SmallPtrSet<BasicBlock*, 4> BadPreds;
+    for (pred_iterator PI = pred_begin(*BB),
+         PE = pred_end(*BB); PI != PE; ++PI) {
+      BasicBlock *P = *PI;
+      if (!L->contains(P))
+        BadPreds.insert(P);
+    }
+
+    // Delete each unique out-of-loop (and thus dead) predecessor.
+    for (BasicBlock *P : BadPreds) {
+
+      LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor "
+                        << P->getName() << "\n");
+
+      // Zap the dead pred's terminator and replace it with unreachable.
+      Instruction *TI = P->getTerminator();
+      changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA,
+                          /*DTU=*/nullptr, MSSAU);
+      Changed = true;
+    }
+  }
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
+  // If there are exiting blocks with branches on undef, resolve the undef in
+  // the direction which will exit the loop. This will help simplify loop
+  // trip count computations.
+  SmallVector<BasicBlock*, 8> ExitingBlocks;
+  L->getExitingBlocks(ExitingBlocks);
+  for (BasicBlock *ExitingBlock : ExitingBlocks)
+    if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()))
+      if (BI->isConditional()) {
+        if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+          LLVM_DEBUG(dbgs()
+                     << "LoopSimplify: Resolving \"br i1 undef\" to exit in "
+                     << ExitingBlock->getName() << "\n");
+
+          BI->setCondition(ConstantInt::get(Cond->getType(),
+                                            !L->contains(BI->getSuccessor(0))));
+
+          Changed = true;
+        }
+      }
+
+  // Does the loop already have a preheader?  If so, don't insert one.
+  BasicBlock *Preheader = L->getLoopPreheader();
+  if (!Preheader) {
+    Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA);
+    if (Preheader)
+      Changed = true;
+  }
+
+  // Next, check to make sure that all exit nodes of the loop only have
+  // predecessors that are inside of the loop.  This check guarantees that the
+  // loop preheader/header will dominate the exit blocks.  If the exit block has
+  // predecessors from outside of the loop, split the edge now.
+  if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA))
+    Changed = true;
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
+  // If the header has more than two predecessors at this point (from the
+  // preheader and from multiple backedges), we must adjust the loop.
+  BasicBlock *LoopLatch = L->getLoopLatch();
+  if (!LoopLatch) {
+    // If this is really a nested loop, rip it out into a child loop.  Don't do
+    // this for loops with a giant number of backedges, just factor them into a
+    // common backedge instead.
+    if (L->getNumBackEdges() < 8) {
+      if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE,
+                                            PreserveLCSSA, AC, MSSAU)) {
+        ++NumNested;
+        // Enqueue the outer loop as it should be processed next in our
+        // depth-first nest walk.
+        Worklist.push_back(OuterL);
+
+        // This is a big restructuring change, reprocess the whole loop.
+        Changed = true;
+        // GCC doesn't tail recursion eliminate this.
+        // FIXME: It isn't clear we can't rely on LLVM to TRE this.
+        goto ReprocessLoop;
+      }
+    }
+
+    // If we either couldn't, or didn't want to, identify nesting of the loops,
+    // insert a new block that all backedges target, then make it jump to the
+    // loop header.
+    LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU);
+    if (LoopLatch)
+      Changed = true;
+  }
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
+  const DataLayout &DL = L->getHeader()->getModule()->getDataLayout();
+
+  // Scan over the PHI nodes in the loop header.  Since they now have only two
+  // incoming values (the loop is canonicalized), we may have simplified the PHI
+  // down to 'X = phi [X, Y]', which should be replaced with 'Y'.
+  PHINode *PN;
+  for (BasicBlock::iterator I = L->getHeader()->begin();
+       (PN = dyn_cast<PHINode>(I++)); )
+    if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) {
+      if (SE) SE->forgetValue(PN);
+      if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) {
+        PN->replaceAllUsesWith(V);
+        PN->eraseFromParent();
+      }
+    }
+
+  // If this loop has multiple exits and the exits all go to the same
+  // block, attempt to merge the exits. This helps several passes, such
+  // as LoopRotation, which do not support loops with multiple exits.
+  // SimplifyCFG also does this (and this code uses the same utility
+  // function), however this code is loop-aware, where SimplifyCFG is
+  // not. That gives it the advantage of being able to hoist
+  // loop-invariant instructions out of the way to open up more
+  // opportunities, and the disadvantage of having the responsibility
+  // to preserve dominator information.
+  auto HasUniqueExitBlock = [&]() {
+    BasicBlock *UniqueExit = nullptr;
+    for (auto *ExitingBB : ExitingBlocks)
+      for (auto *SuccBB : successors(ExitingBB)) {
+        if (L->contains(SuccBB))
+          continue;
+
+        if (!UniqueExit)
+          UniqueExit = SuccBB;
+        else if (UniqueExit != SuccBB)
+          return false;
+      }
+
+    return true;
+  };
+  if (HasUniqueExitBlock()) {
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      BasicBlock *ExitingBlock = ExitingBlocks[i];
+      if (!ExitingBlock->getSinglePredecessor()) continue;
+      BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+      if (!BI || !BI->isConditional()) continue;
+      CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition());
+      if (!CI || CI->getParent() != ExitingBlock) continue;
+
+      // Attempt to hoist out all instructions except for the
+      // comparison and the branch.
+      bool AllInvariant = true;
+      bool AnyInvariant = false;
+      for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) {
+        Instruction *Inst = &*I++;
+        if (Inst == CI)
+          continue;
+        if (!L->makeLoopInvariant(
+                Inst, AnyInvariant,
+                Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) {
+          AllInvariant = false;
+          break;
+        }
+      }
+      if (AnyInvariant) {
+        Changed = true;
+        // The loop disposition of all SCEV expressions that depend on any
+        // hoisted values have also changed.
+        if (SE)
+          SE->forgetLoopDispositions(L);
+      }
+      if (!AllInvariant) continue;
+
+      // The block has now been cleared of all instructions except for
+      // a comparison and a conditional branch. SimplifyCFG may be able
+      // to fold it now.
+      if (!FoldBranchToCommonDest(BI, MSSAU))
+        continue;
+
+      // Success. The block is now dead, so remove it from the loop,
+      // update the dominator tree and delete it.
+      LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block "
+                        << ExitingBlock->getName() << "\n");
+
+      assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
+      Changed = true;
+      LI->removeBlock(ExitingBlock);
+
+      DomTreeNode *Node = DT->getNode(ExitingBlock);
+      const std::vector<DomTreeNodeBase<BasicBlock> *> &Children =
+        Node->getChildren();
+      while (!Children.empty()) {
+        DomTreeNode *Child = Children.front();
+        DT->changeImmediateDominator(Child, Node->getIDom());
+      }
+      DT->eraseNode(ExitingBlock);
+      if (MSSAU) {
+        SmallSetVector<BasicBlock *, 8> ExitBlockSet;
+        ExitBlockSet.insert(ExitingBlock);
+        MSSAU->removeBlocks(ExitBlockSet);
+      }
+
+      BI->getSuccessor(0)->removePredecessor(
+          ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
+      BI->getSuccessor(1)->removePredecessor(
+          ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA);
+      ExitingBlock->eraseFromParent();
+    }
+  }
+
+  // Changing exit conditions for blocks may affect exit counts of this loop and
+  // any of its paretns, so we must invalidate the entire subtree if we've made
+  // any changes.
+  if (Changed && SE)
+    SE->forgetTopmostLoop(L);
+
+  if (MSSAU && VerifyMemorySSA)
+    MSSAU->getMemorySSA()->verifyMemorySSA();
+
+  return Changed;
+}
+
+bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI,
+                        ScalarEvolution *SE, AssumptionCache *AC,
+                        MemorySSAUpdater *MSSAU, bool PreserveLCSSA) {
+  bool Changed = false;
+
+#ifndef NDEBUG
+  // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA
+  // form.
+  if (PreserveLCSSA) {
+    assert(DT && "DT not available.");
+    assert(LI && "LI not available.");
+    assert(L->isRecursivelyLCSSAForm(*DT, *LI) &&
+           "Requested to preserve LCSSA, but it's already broken.");
+  }
+#endif
+
+  // Worklist maintains our depth-first queue of loops in this nest to process.
+  SmallVector<Loop *, 4> Worklist;
+  Worklist.push_back(L);
+
+  // Walk the worklist from front to back, pushing newly found sub loops onto
+  // the back. This will let us process loops from back to front in depth-first
+  // order. We can use this simple process because loops form a tree.
+  for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) {
+    Loop *L2 = Worklist[Idx];
+    Worklist.append(L2->begin(), L2->end());
+  }
+
+  while (!Worklist.empty())
+    Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE,
+                               AC, MSSAU, PreserveLCSSA);
+
+  return Changed;
+}
+
+
+char LoopSimplify::ID = 0;
+INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", false, false)
+INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
+INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
+INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
+INITIALIZE_PASS_END(LoopSimplify, "loop-simplify",
+                "Canonicalize natural loops", false, false)
+
+// Publicly exposed interface to pass...
+char &llvm::LoopSimplifyID = LoopSimplify::ID;
+Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); }
+
+/// runOnFunction - Run down all loops in the CFG (recursively, but we could do
+/// it in any convenient order) inserting preheaders...
+///
+bool LoopSimplify::runOnFunction(Function &F) {
+  bool Changed = false;
+  LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+  DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+  auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>();
+  ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr;
+  AssumptionCache *AC =
+      &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+  MemorySSA *MSSA = nullptr;
+  std::unique_ptr<MemorySSAUpdater> MSSAU;
+  if (EnableMSSALoopDependency) {
+    auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
+    if (MSSAAnalysis) {
+      MSSA = &MSSAAnalysis->getMSSA();
+      MSSAU = make_unique<MemorySSAUpdater>(MSSA);
+    }
+  }
+
+  bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+  // Simplify each loop nest in the function.
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA);
+
+#ifndef NDEBUG
+  if (PreserveLCSSA) {
+    bool InLCSSA = all_of(
+        *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); });
+    assert(InLCSSA && "LCSSA is broken after loop-simplify.");
+  }
+#endif
+  return Changed;
+}
+
+PreservedAnalyses LoopSimplifyPass::run(Function &F,
+                                        FunctionAnalysisManager &AM) {
+  bool Changed = false;
+  LoopInfo *LI = &AM.getResult<LoopAnalysis>(F);
+  DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F);
+  ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F);
+  AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F);
+
+  // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA
+  // after simplifying the loops. MemorySSA is not preserved either.
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    Changed |=
+        simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false);
+
+  if (!Changed)
+    return PreservedAnalyses::all();
+
+  PreservedAnalyses PA;
+  PA.preserve<DominatorTreeAnalysis>();
+  PA.preserve<LoopAnalysis>();
+  PA.preserve<BasicAA>();
+  PA.preserve<GlobalsAA>();
+  PA.preserve<SCEVAA>();
+  PA.preserve<ScalarEvolutionAnalysis>();
+  PA.preserve<DependenceAnalysis>();
+  // BPI maps conditional terminators to probabilities, LoopSimplify can insert
+  // blocks, but it does so only by splitting existing blocks and edges. This
+  // results in the interesting property that all new terminators inserted are
+  // unconditional branches which do not appear in BPI. All deletions are
+  // handled via ValueHandle callbacks w/in BPI.
+  PA.preserve<BranchProbabilityAnalysis>();
+  return PA;
+}
+
+// FIXME: Restore this code when we re-enable verification in verifyAnalysis
+// below.
+#if 0
+static void verifyLoop(Loop *L) {
+  // Verify subloops.
+  for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I)
+    verifyLoop(*I);
+
+  // It used to be possible to just assert L->isLoopSimplifyForm(), however
+  // with the introduction of indirectbr, there are now cases where it's
+  // not possible to transform a loop as necessary. We can at least check
+  // that there is an indirectbr near any time there's trouble.
+
+  // Indirectbr can interfere with preheader and unique backedge insertion.
+  if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+    bool HasIndBrPred = false;
+    for (pred_iterator PI = pred_begin(L->getHeader()),
+         PE = pred_end(L->getHeader()); PI != PE; ++PI)
+      if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+        HasIndBrPred = true;
+        break;
+      }
+    assert(HasIndBrPred &&
+           "LoopSimplify has no excuse for missing loop header info!");
+    (void)HasIndBrPred;
+  }
+
+  // Indirectbr can interfere with exit block canonicalization.
+  if (!L->hasDedicatedExits()) {
+    bool HasIndBrExiting = false;
+    SmallVector<BasicBlock*, 8> ExitingBlocks;
+    L->getExitingBlocks(ExitingBlocks);
+    for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+      if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+        HasIndBrExiting = true;
+        break;
+      }
+    }
+
+    assert(HasIndBrExiting &&
+           "LoopSimplify has no excuse for missing exit block info!");
+    (void)HasIndBrExiting;
+  }
+}
+#endif
+
+void LoopSimplify::verifyAnalysis() const {
+  // FIXME: This routine is being called mid-way through the loop pass manager
+  // as loop passes destroy this analysis. That's actually fine, but we have no
+  // way of expressing that here. Once all of the passes that destroy this are
+  // hoisted out of the loop pass manager we can add back verification here.
+#if 0
+  for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I)
+    verifyLoop(*I);
+#endif
+}
diff --git a/hpvm/llvm_patches/patch_llvm.sh b/hpvm/llvm_patches/patch_llvm.sh
new file mode 100644
index 0000000000000000000000000000000000000000..fb1ff433847667e3b44ceb8b897015111f9d4bb2
--- /dev/null
+++ b/hpvm/llvm_patches/patch_llvm.sh
@@ -0,0 +1,20 @@
+#!/bin/sh
+   
+SH="$(readlink -f /proc/$$/exe)"
+if [[ "$SH" == "/bin/zsh" ]]; then
+  DIR="${0:A:h}"
+else
+  DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+fi
+
+cd $DIR
+shopt -s globstar
+for f in ./**/*
+do
+  if [ -f "$f" ]; then
+    if [[ ( $f == *.cpp ) || ( $f == *.h ) || ( $f == *.td )  || ( $f == *.txt ) || ( $f == *.def ) ]]; then
+      diff -Nu $LLVM_SRC_ROOT/$f $f > $f.patch
+      patch $LLVM_SRC_ROOT/$f < $f.patch
+    fi
+  fi
+done
diff --git a/hpvm/scripts/hpvm_installer.py b/hpvm/scripts/hpvm_installer.py
index ae06ca12f6914f748da061d5ed64a1106c5123aa..950c73515e3ff8474e6de17368514fb8123aa8ae 100755
--- a/hpvm/scripts/hpvm_installer.py
+++ b/hpvm/scripts/hpvm_installer.py
@@ -157,7 +157,7 @@ Example: "DCMAKE_BUILD_TYPE=Release DCMAKE_INSTALL_PREFIX=install".
 Arguments: """
     )
     args.cmake_args = input()
-    if args.cmake_args.strip() != "":    
+    if args.cmake_args.strip() != "":
       args.cmake_args = [f"-{arg}" for arg in args.cmake_args.split(" ")]
 
     args.no_pypkg = not input_with_check(
@@ -187,7 +187,7 @@ def print_args(args):
 
 def check_python_version():
     from sys import version_info, version, executable
-    
+
     lowest, highest = PYTHON_REQ
     if not (lowest <= version_info < highest):
         lowest_str = ".".join([str(n) for n in lowest])
@@ -276,8 +276,7 @@ def link_and_patch():
             symlink(ROOT_DIR / link, hpvm / link)
     print("Applying HPVM patches...")
     chdir(ROOT_DIR / "llvm_patches")
-    check_call(["bash", "./construct_patch.sh"])
-    check_call(["bash", "./apply_patch.sh"])
+    check_call(["bash", "./patch_llvm.sh"])
     print("Patches applied.")
     chdir(cwd)
 
@@ -370,7 +369,7 @@ def main():
             """
 HPVM not installed.
 To complete installation, follow these instructions:
-- Create and navigate to a folder "./build" 
+- Create and navigate to a folder "./build"
 - Run "cmake ../llvm [options]". Find potential options in README.md.
 - Run "make -j<number of threads> hpvm-clang" and then "make install"
 For more details refer to README.md.