diff --git a/hpvm/llvm_patches/apply_patch.sh b/hpvm/llvm_patches/apply_patch.sh deleted file mode 100644 index 289e5c11e319aa16262952d2d079f986c2e987b8..0000000000000000000000000000000000000000 --- a/hpvm/llvm_patches/apply_patch.sh +++ /dev/null @@ -1,34 +0,0 @@ -#!/bin/sh - -### File Copies -cp include/IR/IntrinsicsHPVM.td ${LLVM_SRC_ROOT}/include/llvm/IR/IntrinsicsHPVM.td - - -## Header File Patches -patch ${LLVM_SRC_ROOT}/include/llvm/IR/Attributes.td < ./include/IR/Attributes.td.patch - -patch ${LLVM_SRC_ROOT}/include/llvm/IR/Intrinsics.td < ./include/IR/Intrinsics.td.patch - -patch ${LLVM_SRC_ROOT}/include/llvm/Bitcode/LLVMBitCodes.h < ./include/Bitcode/LLVMBitCodes.h.patch - -patch ${LLVM_SRC_ROOT}/include/llvm/Support/Debug.h < ./include/Support/Debug.h.patch - - -#### Patching Sources - - -patch ${LLVM_SRC_ROOT}/lib/AsmParser/LLLexer.cpp < ./lib/AsmParser/LLLexer.cpp.patch - -patch ${LLVM_SRC_ROOT}/lib/AsmParser/LLLexer.h < ./lib/AsmParser/LLLexer.h.patch - -patch ${LLVM_SRC_ROOT}/lib/AsmParser/LLParser.cpp < ./lib/AsmParser/LLParser.cpp.patch - -patch ${LLVM_SRC_ROOT}/lib/AsmParser/LLParser.h < ./lib/AsmParser/LLParser.h.patch - -patch ${LLVM_SRC_ROOT}/lib/AsmParser/LLToken.h < ./lib/AsmParser/LLToken.h.patch - -patch ${LLVM_SRC_ROOT}/lib/IR/Attributes.cpp < ./lib/IR/Attributes.cpp.patch - -patch ${LLVM_SRC_ROOT}/lib/Bitcode/Reader/BitcodeReader.cpp < ./lib/Bitcode/Reader/BitcodeReader.cpp.patch - -patch ${LLVM_SRC_ROOT}/lib/Bitcode/Writer/BitcodeWriter.cpp < ./lib/Bitcode/Writer/BitcodeWriter.cpp.patch diff --git a/hpvm/llvm_patches/construct_patch.sh b/hpvm/llvm_patches/construct_patch.sh deleted file mode 100644 index b957c853e71f59bc17e7def6d544c86eefd382b6..0000000000000000000000000000000000000000 --- a/hpvm/llvm_patches/construct_patch.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/sh - -#### Computing Header Diff -for file in Bitcode/LLVMBitCodes.h IR/Attributes.td IR/Intrinsics.td Support/Debug.h; do - diff -u $LLVM_SRC_ROOT/include/llvm/$file include/$file > include/$file.patch || true -done -#### Computing Source File Diff -for file in AsmParser/LLLexer.cpp AsmParser/LLLexer.h AsmParser/LLParser.cpp \ - AsmParser/LLParser.h AsmParser/LLToken.h IR/Attributes.cpp \ - Bitcode/Reader/BitcodeReader.cpp Bitcode/Writer/BitcodeWriter.cpp; do - diff -u $LLVM_SRC_ROOT/lib/$file lib/$file > lib/$file.patch || true -done diff --git a/hpvm/llvm_patches/include/llvm/ADT/DirectedGraph.h b/hpvm/llvm_patches/include/llvm/ADT/DirectedGraph.h new file mode 100644 index 0000000000000000000000000000000000000000..cfe98e178a91dfc413b13106d6ff41568562c71d --- /dev/null +++ b/hpvm/llvm_patches/include/llvm/ADT/DirectedGraph.h @@ -0,0 +1,273 @@ +//===- llvm/ADT/DirectedGraph.h - Directed Graph ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the interface and a base class implementation for a +// directed graph. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_DIRECTEDGRAPH_H +#define LLVM_ADT_DIRECTEDGRAPH_H + +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" + +namespace llvm { + +/// Represent an edge in the directed graph. +/// The edge contains the target node it connects to. +template <class NodeType, class EdgeType> class DGEdge { +public: + DGEdge() = delete; + /// Create an edge pointing to the given node \p N. + explicit DGEdge(NodeType &N) : TargetNode(N) {} + explicit DGEdge(const DGEdge<NodeType, EdgeType> &E) + : TargetNode(E.TargetNode) {} + DGEdge<NodeType, EdgeType> &operator=(const DGEdge<NodeType, EdgeType> &E) { + TargetNode = E.TargetNode; + return *this; + } + + /// Static polymorphism: delegate implementation (via isEqualTo) to the + /// derived class. + bool operator==(const EdgeType &E) const { return getDerived().isEqualTo(E); } + bool operator!=(const EdgeType &E) const { return !operator==(E); } + + /// Retrieve the target node this edge connects to. + const NodeType &getTargetNode() const { return TargetNode; } + NodeType &getTargetNode() { + return const_cast<NodeType &>( + static_cast<const DGEdge<NodeType, EdgeType> &>(*this).getTargetNode()); + } + + /// Set the target node this edge connects to. + void setTargetNode(const NodeType &N) { TargetNode = N; } + +protected: + // As the default implementation use address comparison for equality. + bool isEqualTo(const EdgeType &E) const { return this == &E; } + + // Cast the 'this' pointer to the derived type and return a reference. + EdgeType &getDerived() { return *static_cast<EdgeType *>(this); } + const EdgeType &getDerived() const { + return *static_cast<const EdgeType *>(this); + } + + // The target node this edge connects to. + NodeType &TargetNode; +}; + +/// Represent a node in the directed graph. +/// The node has a (possibly empty) list of outgoing edges. +template <class NodeType, class EdgeType> class DGNode { +public: + using EdgeListTy = SetVector<EdgeType *>; + using iterator = typename EdgeListTy::iterator; + using const_iterator = typename EdgeListTy::const_iterator; + + /// Create a node with a single outgoing edge \p E. + explicit DGNode(EdgeType &E) : Edges() { Edges.insert(&E); } + DGNode() = default; + + explicit DGNode(const DGNode<NodeType, EdgeType> &N) : Edges(N.Edges) {} + DGNode(DGNode<NodeType, EdgeType> &&N) : Edges(std::move(N.Edges)) {} + + DGNode<NodeType, EdgeType> &operator=(const DGNode<NodeType, EdgeType> &N) { + Edges = N.Edges; + return *this; + } + DGNode<NodeType, EdgeType> &operator=(const DGNode<NodeType, EdgeType> &&N) { + Edges = std::move(N.Edges); + return *this; + } + + /// Static polymorphism: delegate implementation (via isEqualTo) to the + /// derived class. + bool operator==(const NodeType &N) const { return getDerived().isEqualTo(N); } + bool operator!=(const NodeType &N) const { return !operator==(N); } + + const_iterator begin() const { return Edges.begin(); } + const_iterator end() const { return Edges.end(); } + iterator begin() { return Edges.begin(); } + iterator end() { return Edges.end(); } + const EdgeType &front() const { return *Edges.front(); } + EdgeType &front() { return *Edges.front(); } + const EdgeType &back() const { return *Edges.back(); } + EdgeType &back() { return *Edges.back(); } + + /// Collect in \p EL, all the edges from this node to \p N. + /// Return true if at least one edge was found, and false otherwise. + /// Note that this implementation allows more than one edge to connect + /// a given pair of nodes. + bool findEdgesTo(const NodeType &N, SmallVectorImpl<EdgeType *> &EL) const { + assert(EL.empty() && "Expected the list of edges to be empty."); + for (auto *E : Edges) + if (E->getTargetNode() == N) + EL.push_back(E); + return !EL.empty(); + } + + /// Add the given edge \p E to this node, if it doesn't exist already. Returns + /// true if the edge is added and false otherwise. + bool addEdge(EdgeType &E) { return Edges.insert(&E); } + + /// Remove the given edge \p E from this node, if it exists. + void removeEdge(EdgeType &E) { Edges.remove(&E); } + + /// Test whether there is an edge that goes from this node to \p N. + bool hasEdgeTo(const NodeType &N) const { + return (findEdgeTo(N) != Edges.end()); + } + + /// Retrieve the outgoing edges for the node. + const EdgeListTy &getEdges() const { return Edges; } + EdgeListTy &getEdges() { + return const_cast<EdgeListTy &>( + static_cast<const DGNode<NodeType, EdgeType> &>(*this).Edges); + } + + /// Clear the outgoing edges. + void clear() { Edges.clear(); } + +protected: + // As the default implementation use address comparison for equality. + bool isEqualTo(const NodeType &N) const { return this == &N; } + + // Cast the 'this' pointer to the derived type and return a reference. + NodeType &getDerived() { return *static_cast<NodeType *>(this); } + const NodeType &getDerived() const { + return *static_cast<const NodeType *>(this); + } + + /// Find an edge to \p N. If more than one edge exists, this will return + /// the first one in the list of edges. + const_iterator findEdgeTo(const NodeType &N) const { + return llvm::find_if( + Edges, [&N](const EdgeType *E) { return E->getTargetNode() == N; }); + } + + // The list of outgoing edges. + EdgeListTy Edges; +}; + +/// Directed graph +/// +/// The graph is represented by a table of nodes. +/// Each node contains a (possibly empty) list of outgoing edges. +/// Each edge contains the target node it connects to. +template <class NodeType, class EdgeType> class DirectedGraph { +protected: + using NodeListTy = SmallVector<NodeType *, 10>; + using EdgeListTy = SmallVector<EdgeType *, 10>; +public: + using iterator = typename NodeListTy::iterator; + using const_iterator = typename NodeListTy::const_iterator; + using DGraphType = DirectedGraph<NodeType, EdgeType>; + + DirectedGraph() = default; + explicit DirectedGraph(NodeType &N) : Nodes() { addNode(N); } + DirectedGraph(const DGraphType &G) : Nodes(G.Nodes) {} + DirectedGraph(DGraphType &&RHS) : Nodes(std::move(RHS.Nodes)) {} + DGraphType &operator=(const DGraphType &G) { + Nodes = G.Nodes; + return *this; + } + DGraphType &operator=(const DGraphType &&G) { + Nodes = std::move(G.Nodes); + return *this; + } + + const_iterator begin() const { return Nodes.begin(); } + const_iterator end() const { return Nodes.end(); } + iterator begin() { return Nodes.begin(); } + iterator end() { return Nodes.end(); } + const NodeType &front() const { return *Nodes.front(); } + NodeType &front() { return *Nodes.front(); } + const NodeType &back() const { return *Nodes.back(); } + NodeType &back() { return *Nodes.back(); } + + size_t size() const { return Nodes.size(); } + + /// Find the given node \p N in the table. + const_iterator findNode(const NodeType &N) const { + return llvm::find_if(Nodes, + [&N](const NodeType *Node) { return *Node == N; }); + } + iterator findNode(const NodeType &N) { + return const_cast<iterator>( + static_cast<const DGraphType &>(*this).findNode(N)); + } + + /// Add the given node \p N to the graph if it is not already present. + bool addNode(NodeType &N) { + if (findNode(N) != Nodes.end()) + return false; + Nodes.push_back(&N); + return true; + } + + /// Collect in \p EL all edges that are coming into node \p N. Return true + /// if at least one edge was found, and false otherwise. + bool findIncomingEdgesToNode(const NodeType &N, SmallVectorImpl<EdgeType*> &EL) const { + assert(EL.empty() && "Expected the list of edges to be empty."); + EdgeListTy TempList; + for (auto *Node : Nodes) { + if (*Node == N) + continue; + Node->findEdgesTo(N, TempList); + EL.insert(EL.end(), TempList.begin(), TempList.end()); + TempList.clear(); + } + return !EL.empty(); + } + + /// Remove the given node \p N from the graph. If the node has incoming or + /// outgoing edges, they are also removed. Return true if the node was found + /// and then removed, and false if the node was not found in the graph to + /// begin with. + bool removeNode(NodeType &N) { + iterator IT = findNode(N); + if (IT == Nodes.end()) + return false; + // Remove incoming edges. + EdgeListTy EL; + for (auto *Node : Nodes) { + if (*Node == N) + continue; + Node->findEdgesTo(N, EL); + for (auto *E : EL) + Node->removeEdge(*E); + EL.clear(); + } + N.clear(); + Nodes.erase(IT); + return true; + } + + /// Assuming nodes \p Src and \p Dst are already in the graph, connect node \p + /// Src to node \p Dst using the provided edge \p E. Return true if \p Src is + /// not already connected to \p Dst via \p E, and false otherwise. + bool connect(NodeType &Src, NodeType &Dst, EdgeType &E) { + assert(findNode(Src) != Nodes.end() && "Src node should be present."); + assert(findNode(Dst) != Nodes.end() && "Dst node should be present."); + assert((E.getTargetNode() == Dst) && + "Target of the given edge does not match Dst."); + return Src.addEdge(E); + } + +protected: + // The list of nodes in the graph. + NodeListTy Nodes; +}; + +} // namespace llvm + +#endif // LLVM_ADT_DIRECTEDGRAPH_H diff --git a/hpvm/llvm_patches/include/llvm/ADT/EnumeratedArray.h b/hpvm/llvm_patches/include/llvm/ADT/EnumeratedArray.h new file mode 100644 index 0000000000000000000000000000000000000000..a9528115618cf62440a40fd68d79a20706105b3a --- /dev/null +++ b/hpvm/llvm_patches/include/llvm/ADT/EnumeratedArray.h @@ -0,0 +1,48 @@ +//===- llvm/ADT/EnumeratedArray.h - Enumerated Array-------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines an array type that can be indexed using scoped enum values. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_ENUMERATEDARRAY_H +#define LLVM_ADT_ENUMERATEDARRAY_H + +#include <cassert> + +namespace llvm { + +template <typename ValueType, typename Enumeration, + Enumeration LargestEnum = Enumeration::Last, typename IndexType = int, + IndexType Size = 1 + static_cast<IndexType>(LargestEnum)> +class EnumeratedArray { +public: + EnumeratedArray() = default; + EnumeratedArray(ValueType V) { + for (IndexType IX = 0; IX < Size; ++IX) { + Underlying[IX] = V; + } + } + inline const ValueType &operator[](const Enumeration Index) const { + auto IX = static_cast<const IndexType>(Index); + assert(IX >= 0 && IX < Size && "Index is out of bounds."); + return Underlying[IX]; + } + inline ValueType &operator[](const Enumeration Index) { + return const_cast<ValueType &>( + static_cast<const EnumeratedArray<ValueType, Enumeration, LargestEnum, + IndexType, Size> &>(*this)[Index]); + } + +private: + ValueType Underlying[Size]; +}; + +} // namespace llvm + +#endif // LLVM_ADT_ENUMERATEDARRAY_H diff --git a/hpvm/llvm_patches/include/llvm/Analysis/DDG.h b/hpvm/llvm_patches/include/llvm/Analysis/DDG.h new file mode 100644 index 0000000000000000000000000000000000000000..165efc97a480e06b8ea5e8031e97da469d1f6418 --- /dev/null +++ b/hpvm/llvm_patches/include/llvm/Analysis/DDG.h @@ -0,0 +1,623 @@ +//===- llvm/Analysis/DDG.h --------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the Data-Dependence Graph (DDG). +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DDG_H +#define LLVM_ANALYSIS_DDG_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DirectedGraph.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/DependenceGraphBuilder.h" +#include "llvm/Analysis/LoopAnalysisManager.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/GraphWriter.h" +#include <sstream> + + +namespace llvm { +class DDGNode; +class DDGEdge; +using DDGNodeBase = DGNode<DDGNode, DDGEdge>; +using DDGEdgeBase = DGEdge<DDGNode, DDGEdge>; +using DDGBase = DirectedGraph<DDGNode, DDGEdge>; +class LPMUpdater; + +/// Data Dependence Graph Node +/// The graph can represent the following types of nodes: +/// 1. Single instruction node containing just one instruction. +/// 2. Multiple instruction node where two or more instructions from +/// the same basic block are merged into one node. +/// 3. Pi-block node which is a group of other DDG nodes that are part of a +/// strongly-connected component of the graph. +/// A pi-block node contains more than one single or multiple instruction +/// nodes. The root node cannot be part of a pi-block. +/// 4. Root node is a special node that connects to all components such that +/// there is always a path from it to any node in the graph. +class DDGNode : public DDGNodeBase { +public: + using InstructionListType = SmallVectorImpl<Instruction *>; + + enum class NodeKind { + Unknown, + SingleInstruction, + MultiInstruction, + PiBlock, + Root, + }; + + DDGNode() = delete; + DDGNode(const NodeKind K) : DDGNodeBase(), Kind(K) {} + DDGNode(const DDGNode &N) : DDGNodeBase(N), Kind(N.Kind) {} + DDGNode(DDGNode &&N) : DDGNodeBase(std::move(N)), Kind(N.Kind) {} + virtual ~DDGNode() = 0; + + DDGNode &operator=(const DDGNode &N) { + DGNode::operator=(N); + Kind = N.Kind; + return *this; + } + + DDGNode &operator=(DDGNode &&N) { + DGNode::operator=(std::move(N)); + Kind = N.Kind; + return *this; + } + + /// Getter for the kind of this node. + NodeKind getKind() const { return Kind; } + + /// Collect a list of instructions, in \p IList, for which predicate \p Pred + /// evaluates to true when iterating over instructions of this node. Return + /// true if at least one instruction was collected, and false otherwise. + bool collectInstructions(llvm::function_ref<bool(Instruction *)> const &Pred, + InstructionListType &IList) const; + +protected: + /// Setter for the kind of this node. + void setKind(NodeKind K) { Kind = K; } + +private: + NodeKind Kind; +}; + +/// Subclass of DDGNode representing the root node of the graph. +/// There should only be one such node in a given graph. +class RootDDGNode : public DDGNode { +public: + RootDDGNode() : DDGNode(NodeKind::Root) {} + RootDDGNode(const RootDDGNode &N) = delete; + RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {} + ~RootDDGNode() {} + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::Root; + } + static bool classof(const RootDDGNode *N) { return true; } +}; + +/// Subclass of DDGNode representing single or multi-instruction nodes. +class SimpleDDGNode : public DDGNode { + friend class DDGBuilder; + +public: + SimpleDDGNode() = delete; + SimpleDDGNode(Instruction &I); + SimpleDDGNode(const SimpleDDGNode &N); + SimpleDDGNode(SimpleDDGNode &&N); + ~SimpleDDGNode(); + + SimpleDDGNode &operator=(const SimpleDDGNode &N) { + DDGNode::operator=(N); + InstList = N.InstList; + return *this; + } + + SimpleDDGNode &operator=(SimpleDDGNode &&N) { + DDGNode::operator=(std::move(N)); + InstList = std::move(N.InstList); + return *this; + } + + /// Get the list of instructions in this node. + const InstructionListType &getInstructions() const { + assert(!InstList.empty() && "Instruction List is empty."); + return InstList; + } + InstructionListType &getInstructions() { + return const_cast<InstructionListType &>( + static_cast<const SimpleDDGNode *>(this)->getInstructions()); + } + + /// Get the first/last instruction in the node. + Instruction *getFirstInstruction() const { return getInstructions().front(); } + Instruction *getLastInstruction() const { return getInstructions().back(); } + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::SingleInstruction || + N->getKind() == NodeKind::MultiInstruction; + } + static bool classof(const SimpleDDGNode *N) { return true; } + +private: + /// Append the list of instructions in \p Input to this node. + void appendInstructions(const InstructionListType &Input) { + setKind((InstList.size() == 0 && Input.size() == 1) + ? NodeKind::SingleInstruction + : NodeKind::MultiInstruction); + InstList.insert(InstList.end(), Input.begin(), Input.end()); + } + void appendInstructions(const SimpleDDGNode &Input) { + appendInstructions(Input.getInstructions()); + } + + /// List of instructions associated with a single or multi-instruction node. + SmallVector<Instruction *, 2> InstList; +}; + +/// Subclass of DDGNode representing a pi-block. A pi-block represents a group +/// of DDG nodes that are part of a strongly-connected component of the graph. +/// Replacing all the SCCs with pi-blocks results in an acyclic representation +/// of the DDG. For example if we have: +/// {a -> b}, {b -> c, d}, {c -> a} +/// the cycle a -> b -> c -> a is abstracted into a pi-block "p" as follows: +/// {p -> d} with "p" containing: {a -> b}, {b -> c}, {c -> a} +class PiBlockDDGNode : public DDGNode { +public: + using PiNodeList = SmallVector<DDGNode *, 4>; + + PiBlockDDGNode() = delete; + PiBlockDDGNode(const PiNodeList &List); + PiBlockDDGNode(const PiBlockDDGNode &N); + PiBlockDDGNode(PiBlockDDGNode &&N); + ~PiBlockDDGNode(); + + PiBlockDDGNode &operator=(const PiBlockDDGNode &N) { + DDGNode::operator=(N); + NodeList = N.NodeList; + return *this; + } + + PiBlockDDGNode &operator=(PiBlockDDGNode &&N) { + DDGNode::operator=(std::move(N)); + NodeList = std::move(N.NodeList); + return *this; + } + + /// Get the list of nodes in this pi-block. + const PiNodeList &getNodes() const { + assert(!NodeList.empty() && "Node list is empty."); + return NodeList; + } + PiNodeList &getNodes() { + return const_cast<PiNodeList &>( + static_cast<const PiBlockDDGNode *>(this)->getNodes()); + } + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::PiBlock; + } + +private: + /// List of nodes in this pi-block. + PiNodeList NodeList; +}; + +/// Data Dependency Graph Edge. +/// An edge in the DDG can represent a def-use relationship or +/// a memory dependence based on the result of DependenceAnalysis. +/// A rooted edge connects the root node to one of the components +/// of the graph. +class DDGEdge : public DDGEdgeBase { +public: + /// The kind of edge in the DDG + enum class EdgeKind { + Unknown, + RegisterDefUse, + MemoryDependence, + Rooted, + Last = Rooted // Must be equal to the largest enum value. + }; + + explicit DDGEdge(DDGNode &N) = delete; + DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {} + DDGEdge(const DDGEdge &E) : DDGEdgeBase(E), Kind(E.getKind()) {} + DDGEdge(DDGEdge &&E) : DDGEdgeBase(std::move(E)), Kind(E.Kind) {} + DDGEdge &operator=(const DDGEdge &E) { + DDGEdgeBase::operator=(E); + Kind = E.Kind; + return *this; + } + + DDGEdge &operator=(DDGEdge &&E) { + DDGEdgeBase::operator=(std::move(E)); + Kind = E.Kind; + return *this; + } + + /// Get the edge kind + EdgeKind getKind() const { return Kind; }; + + /// Return true if this is a def-use edge, and false otherwise. + bool isDefUse() const { return Kind == EdgeKind::RegisterDefUse; } + + /// Return true if this is a memory dependence edge, and false otherwise. + bool isMemoryDependence() const { return Kind == EdgeKind::MemoryDependence; } + + /// Return true if this is an edge stemming from the root node, and false + /// otherwise. + bool isRooted() const { return Kind == EdgeKind::Rooted; } + +private: + EdgeKind Kind; +}; + +/// Encapsulate some common data and functionality needed for different +/// variations of data dependence graphs. +template <typename NodeType> class DependenceGraphInfo { +public: + using DependenceList = SmallVector<std::unique_ptr<Dependence>, 1>; + + DependenceGraphInfo() = delete; + DependenceGraphInfo(const DependenceGraphInfo &G) = delete; + DependenceGraphInfo(const std::string &N, const DependenceInfo &DepInfo) + : Name(N), DI(DepInfo), Root(nullptr) {} + DependenceGraphInfo(DependenceGraphInfo &&G) + : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {} + virtual ~DependenceGraphInfo() {} + + /// Return the label that is used to name this graph. + const StringRef getName() const { return Name; } + + /// Return the root node of the graph. + NodeType &getRoot() const { + assert(Root && "Root node is not available yet. Graph construction may " + "still be in progress\n"); + return *Root; + } + + /// Collect all the data dependency infos coming from any pair of memory + /// accesses from \p Src to \p Dst, and store them into \p Deps. Return true + /// if a dependence exists, and false otherwise. + bool getDependencies(const NodeType &Src, const NodeType &Dst, + DependenceList &Deps) const; + +protected: + // Name of the graph. + std::string Name; + + // Store a copy of DependenceInfo in the graph, so that individual memory + // dependencies don't need to be stored. Instead when the dependence is + // queried it is recomputed using @DI. + const DependenceInfo DI; + + // A special node in the graph that has an edge to every connected component of + // the graph, to ensure all nodes are reachable in a graph walk. + NodeType *Root = nullptr; +}; + +//===--------------------------------------------------------------------===// +// DependenceGraphInfo Implementation +//===--------------------------------------------------------------------===// + +template <typename NodeType> +bool DependenceGraphInfo<NodeType>::getDependencies( + const NodeType &Src, const NodeType &Dst, DependenceList &Deps) const { + assert(Deps.empty() && "Expected empty output list at the start."); + + // List of memory access instructions from src and dst nodes. + SmallVector<Instruction *, 8> SrcIList, DstIList; + auto isMemoryAccess = [](const Instruction *I) { + return I->mayReadOrWriteMemory(); + }; + Src.collectInstructions(isMemoryAccess, SrcIList); + Dst.collectInstructions(isMemoryAccess, DstIList); + + for (auto *SrcI : SrcIList) + for (auto *DstI : DstIList) + if (auto Dep = + const_cast<DependenceInfo *>(&DI)->depends(SrcI, DstI, true)) + Deps.push_back(std::move(Dep)); + + return !Deps.empty(); +} + +using DDGInfo = DependenceGraphInfo<DDGNode>; + +/// Data Dependency Graph +class DataDependenceGraph : public DDGBase, public DDGInfo { + friend AbstractDependenceGraphBuilder<DataDependenceGraph>; + friend class DDGBuilder; + +public: + using NodeType = DDGNode; + using EdgeType = DDGEdge; + + DataDependenceGraph() = delete; + DataDependenceGraph(const DataDependenceGraph &G) = delete; + DataDependenceGraph(DataDependenceGraph &&G) + : DDGBase(std::move(G)), DDGInfo(std::move(G)) {} + DataDependenceGraph(Function &F, DependenceInfo &DI); + DataDependenceGraph(Loop &L, LoopInfo &LI, DependenceInfo &DI); + ~DataDependenceGraph(); + + /// If node \p N belongs to a pi-block return a pointer to the pi-block, + /// otherwise return null. + const PiBlockDDGNode *getPiBlock(const NodeType &N) const; + +protected: + /// Add node \p N to the graph, if it's not added yet, and keep track of the + /// root node as well as pi-blocks and their members. Return true if node is + /// successfully added. + bool addNode(NodeType &N); + +private: + using PiBlockMapType = DenseMap<const NodeType *, const PiBlockDDGNode *>; + + /// Mapping from graph nodes to their containing pi-blocks. If a node is not + /// part of a pi-block, it will not appear in this map. + PiBlockMapType PiBlockMap; +}; + +/// Concrete implementation of a pure data dependence graph builder. This class +/// provides custom implementation for the pure-virtual functions used in the +/// generic dependence graph build algorithm. +/// +/// For information about time complexity of the build algorithm see the +/// comments near the declaration of AbstractDependenceGraphBuilder. +class DDGBuilder : public AbstractDependenceGraphBuilder<DataDependenceGraph> { +public: + DDGBuilder(DataDependenceGraph &G, DependenceInfo &D, + const BasicBlockListType &BBs) + : AbstractDependenceGraphBuilder(G, D, BBs) {} + DDGNode &createRootNode() final override { + auto *RN = new RootDDGNode(); + assert(RN && "Failed to allocate memory for DDG root node."); + Graph.addNode(*RN); + return *RN; + } + DDGNode &createFineGrainedNode(Instruction &I) final override { + auto *SN = new SimpleDDGNode(I); + assert(SN && "Failed to allocate memory for simple DDG node."); + Graph.addNode(*SN); + return *SN; + } + DDGNode &createPiBlock(const NodeListType &L) final override { + auto *Pi = new PiBlockDDGNode(L); + assert(Pi && "Failed to allocate memory for pi-block node."); + Graph.addNode(*Pi); + return *Pi; + } + DDGEdge &createDefUseEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::RegisterDefUse); + assert(E && "Failed to allocate memory for edge"); + Graph.connect(Src, Tgt, *E); + return *E; + } + DDGEdge &createMemoryEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::MemoryDependence); + assert(E && "Failed to allocate memory for edge"); + Graph.connect(Src, Tgt, *E); + return *E; + } + DDGEdge &createRootedEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::Rooted); + assert(E && "Failed to allocate memory for edge"); + assert(isa<RootDDGNode>(Src) && "Expected root node"); + Graph.connect(Src, Tgt, *E); + return *E; + } + + const NodeListType &getNodesInPiBlock(const DDGNode &N) final override { + auto *PiNode = dyn_cast<const PiBlockDDGNode>(&N); + assert(PiNode && "Expected a pi-block node."); + return PiNode->getNodes(); + } + + /// Return true if the two nodes \pSrc and \pTgt are both simple nodes and + /// the consecutive instructions after merging belong to the same basic block. + bool areNodesMergeable(const DDGNode &Src, + const DDGNode &Tgt) const final override; + void mergeNodes(DDGNode &Src, DDGNode &Tgt) final override; + bool shouldSimplify() const final override; + bool shouldCreatePiBlocks() const final override; +}; + +raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N); +raw_ostream &operator<<(raw_ostream &OS, const DDGNode::NodeKind K); +raw_ostream &operator<<(raw_ostream &OS, const DDGEdge &E); +raw_ostream &operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K); +raw_ostream &operator<<(raw_ostream &OS, const DataDependenceGraph &G); + +//===--------------------------------------------------------------------===// +// DDG Analysis Passes +//===--------------------------------------------------------------------===// + +/// Analysis pass that builds the DDG for a loop. +class DDGAnalysis : public AnalysisInfoMixin<DDGAnalysis> { +public: + using Result = std::unique_ptr<DataDependenceGraph>; + Result run(Loop &L, LoopAnalysisManager &AM, LoopStandardAnalysisResults &AR); + +private: + friend AnalysisInfoMixin<DDGAnalysis>; + static AnalysisKey Key; +}; + +/// Textual printer pass for the DDG of a loop. +class DDGAnalysisPrinterPass : public PassInfoMixin<DDGAnalysisPrinterPass> { +public: + explicit DDGAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {} + PreservedAnalyses run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, LPMUpdater &U); + +private: + raw_ostream &OS; +}; + + +//===--------------------------------------------------------------------===// +// GraphTraits specializations for the DDG +//===--------------------------------------------------------------------===// + + + +/// non-const versions of the grapth trait specializations for DDG +template <> struct GraphTraits<DDGNode *> { + using NodeRef = DDGNode *; + + static DDGNode *DDGGetTargetNode(DGEdge<DDGNode, DDGEdge> *P) { + return &P->getTargetNode(); + } + + // Provide a mapped iterator so that the GraphTrait-based implementations can + // find the target nodes without having to explicitly go through the edges. + using ChildIteratorType = + mapped_iterator<DDGNode::iterator, decltype(&DDGGetTargetNode)>; + using ChildEdgeIteratorType = DDGNode::iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &DDGGetTargetNode); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &DDGGetTargetNode); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } +}; + +template <> +struct GraphTraits<DataDependenceGraph *> : public GraphTraits<DDGNode *> { + using nodes_iterator = DataDependenceGraph::iterator; + static NodeRef getEntryNode(DataDependenceGraph *DG) { + return &DG->getRoot(); + } + static nodes_iterator nodes_begin(DataDependenceGraph *DG) { + return DG->begin(); + } + static nodes_iterator nodes_end(DataDependenceGraph *DG) { return DG->end(); } +}; + +template <> struct DOTGraphTraits<DataDependenceGraph*> : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(DataDependenceGraph *Graph) { return "DDG";} + + static std::string getGraphProperties(DataDependenceGraph *Graph) { + return "\tcompound=true;"; + } + + std::string getNodeLabel(DDGNode *Node, DataDependenceGraph *Graph) { + std::string Str; + raw_string_ostream ss(Str); + ss << Node << " : "; + switch(Node->getKind()) { + case DDGNode::NodeKind::Root : + ss << "Root Node\n"; + break; + case DDGNode::NodeKind::SingleInstruction : + case DDGNode::NodeKind::MultiInstruction : + { + ss << "Simple Node\n"; + SimpleDDGNode *SN = dyn_cast<SimpleDDGNode>(Node); + DDGNode::InstructionListType &instructions = SN->getInstructions(); + for (auto i : instructions) { + ss << *i << "\n"; + } + break; + } + case DDGNode::NodeKind::PiBlock : + { + ss << "Pi Block\n"; + PiBlockDDGNode *PBN = dyn_cast<PiBlockDDGNode>(Node); + PiBlockDDGNode::PiNodeList &nodes = PBN->getNodes(); + for (auto n : nodes) { + ss << n << "\n"; + } + break; + } + default : + ss << "Unknown\n"; + } + return ss.str(); + } + + std::string getEdgeLabel(DDGEdge *Edge, DataDependenceGraph *Graph) { + std::string Str; + raw_string_ostream ss(Str); + switch(Edge->getKind()) { + case DDGEdge::EdgeKind::RegisterDefUse : + ss << "def-use"; + break; + case DDGEdge::EdgeKind::MemoryDependence : + ss << "memory" ; + break; + default : + ss << ""; + } + + return ss.str(); + } +}; + +/// const versions of the grapth trait specializations for DDG +template <> struct GraphTraits<const DDGNode *> { + using NodeRef = const DDGNode *; + + static const DDGNode *DDGGetTargetNode(const DGEdge<DDGNode, DDGEdge> *P) { + return &P->getTargetNode(); + } + + // Provide a mapped iterator so that the GraphTrait-based implementations can + // find the target nodes without having to explicitly go through the edges. + using ChildIteratorType = + mapped_iterator<DDGNode::const_iterator, decltype(&DDGGetTargetNode)>; + using ChildEdgeIteratorType = DDGNode::const_iterator; + + static NodeRef getEntryNode(NodeRef N) { return N; } + static ChildIteratorType child_begin(NodeRef N) { + return ChildIteratorType(N->begin(), &DDGGetTargetNode); + } + static ChildIteratorType child_end(NodeRef N) { + return ChildIteratorType(N->end(), &DDGGetTargetNode); + } + + static ChildEdgeIteratorType child_edge_begin(NodeRef N) { + return N->begin(); + } + static ChildEdgeIteratorType child_edge_end(NodeRef N) { return N->end(); } +}; + +template <> +struct GraphTraits<const DataDependenceGraph *> + : public GraphTraits<const DDGNode *> { + using nodes_iterator = DataDependenceGraph::const_iterator; + static NodeRef getEntryNode(const DataDependenceGraph *DG) { + return &DG->getRoot(); + } + static nodes_iterator nodes_begin(const DataDependenceGraph *DG) { + return DG->begin(); + } + static nodes_iterator nodes_end(const DataDependenceGraph *DG) { + return DG->end(); + } +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DDG_H diff --git a/hpvm/llvm_patches/include/llvm/Analysis/DependenceGraphBuilder.h b/hpvm/llvm_patches/include/llvm/Analysis/DependenceGraphBuilder.h new file mode 100644 index 0000000000000000000000000000000000000000..6f4e1be94164f797e4fc053dac754f8e28839e9e --- /dev/null +++ b/hpvm/llvm_patches/include/llvm/Analysis/DependenceGraphBuilder.h @@ -0,0 +1,203 @@ +//===- llvm/Analysis/DependenceGraphBuilder.h -------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a builder interface that can be used to populate dependence +// graphs such as DDG and PDG. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H +#define LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/EquivalenceClasses.h" +#include "llvm/ADT/SmallVector.h" + +namespace llvm { + +class BasicBlock; +class DependenceInfo; +class Instruction; + +/// This abstract builder class defines a set of high-level steps for creating +/// DDG-like graphs. The client code is expected to inherit from this class and +/// define concrete implementation for each of the pure virtual functions used +/// in the high-level algorithm. +template <class GraphType> class AbstractDependenceGraphBuilder { +protected: + using BasicBlockListType = SmallVectorImpl<BasicBlock *>; + +private: + using NodeType = typename GraphType::NodeType; + using EdgeType = typename GraphType::EdgeType; + +public: + using ClassesType = EquivalenceClasses<BasicBlock *>; + using NodeListType = SmallVector<NodeType *, 4>; + + AbstractDependenceGraphBuilder(GraphType &G, DependenceInfo &D, + const BasicBlockListType &BBs) + : Graph(G), DI(D), BBList(BBs) {} + virtual ~AbstractDependenceGraphBuilder() {} + + /// The main entry to the graph construction algorithm. It starts by + /// creating nodes in increasing order of granularity and then + /// adds def-use and memory edges. As one of the final stages, it + /// also creates pi-block nodes to facilitate codegen in transformations + /// that use dependence graphs. + /// + /// The algorithmic complexity of this implementation is O(V^2 * I^2), where V + /// is the number of vertecies (nodes) and I is the number of instructions in + /// each node. The total number of instructions, N, is equal to V * I, + /// therefore the worst-case time complexity is O(N^2). The average time + /// complexity is O((N^2)/2). + void populate() { + computeInstructionOrdinals(); + createFineGrainedNodes(); + createDefUseEdges(); + createMemoryDependencyEdges(); + simplify(); + createAndConnectRootNode(); + createPiBlocks(); + sortNodesTopologically(); + } + + /// Compute ordinal numbers for each instruction and store them in a map for + /// future look up. These ordinals are used to compute node ordinals which are + /// in turn used to order nodes that are part of a cycle. + /// Instruction ordinals are assigned based on lexical program order. + void computeInstructionOrdinals(); + + /// Create fine grained nodes. These are typically atomic nodes that + /// consist of a single instruction. + void createFineGrainedNodes(); + + /// Analyze the def-use chains and create edges from the nodes containing + /// definitions to the nodes containing the uses. + void createDefUseEdges(); + + /// Analyze data dependencies that exist between memory loads or stores, + /// in the graph nodes and create edges between them. + void createMemoryDependencyEdges(); + + /// Create a root node and add edges such that each node in the graph is + /// reachable from the root. + void createAndConnectRootNode(); + + /// Apply graph abstraction to groups of nodes that belong to a strongly + /// connected component of the graph to create larger compound nodes + /// called pi-blocks. The purpose of this abstraction is to isolate sets of + /// program elements that need to stay together during codegen and turn + /// the dependence graph into an acyclic graph. + void createPiBlocks(); + + /// Go through all the nodes in the graph and collapse any two nodes + /// 'a' and 'b' if all of the following are true: + /// - the only edge from 'a' is a def-use edge to 'b' and + /// - the only edge to 'b' is a def-use edge from 'a' and + /// - there is no cyclic edge from 'b' to 'a' and + /// - all instructions in 'a' and 'b' belong to the same basic block and + /// - both 'a' and 'b' are simple (single or multi instruction) nodes. + void simplify(); + + /// Topologically sort the graph nodes. + void sortNodesTopologically(); + +protected: + /// Create the root node of the graph. + virtual NodeType &createRootNode() = 0; + + /// Create an atomic node in the graph given a single instruction. + virtual NodeType &createFineGrainedNode(Instruction &I) = 0; + + /// Create a pi-block node in the graph representing a group of nodes in an + /// SCC of the graph. + virtual NodeType &createPiBlock(const NodeListType &L) = 0; + + /// Create a def-use edge going from \p Src to \p Tgt. + virtual EdgeType &createDefUseEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Create a memory dependence edge going from \p Src to \p Tgt. + virtual EdgeType &createMemoryEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Create a rooted edge going from \p Src to \p Tgt . + virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0; + + /// Given a pi-block node, return a vector of all the nodes contained within + /// it. + virtual const NodeListType &getNodesInPiBlock(const NodeType &N) = 0; + + /// Deallocate memory of edge \p E. + virtual void destroyEdge(EdgeType &E) { delete &E; } + + /// Deallocate memory of node \p N. + virtual void destroyNode(NodeType &N) { delete &N; } + + /// Return true if creation of pi-blocks are supported and desired, + /// and false otherwise. + virtual bool shouldCreatePiBlocks() const { return true; } + + /// Return true if graph simplification step is requested, and false + /// otherwise. + virtual bool shouldSimplify() const { return true; } + + /// Return true if it's safe to merge the two nodes. + virtual bool areNodesMergeable(const NodeType &A, + const NodeType &B) const = 0; + + /// Append the content of node \p B into node \p A and remove \p B and + /// the edge between \p A and \p B from the graph. + virtual void mergeNodes(NodeType &A, NodeType &B) = 0; + + /// Given an instruction \p I return its associated ordinal number. + size_t getOrdinal(Instruction &I) { + assert(InstOrdinalMap.find(&I) != InstOrdinalMap.end() && + "No ordinal computed for this instruction."); + return InstOrdinalMap[&I]; + } + + /// Given a node \p N return its associated ordinal number. + size_t getOrdinal(NodeType &N) { + assert(NodeOrdinalMap.find(&N) != NodeOrdinalMap.end() && + "No ordinal computed for this node."); + return NodeOrdinalMap[&N]; + } + + /// Map types to map instructions to nodes used when populating the graph. + using InstToNodeMap = DenseMap<Instruction *, NodeType *>; + + /// Map Types to map instruction/nodes to an ordinal number. + using InstToOrdinalMap = DenseMap<Instruction *, size_t>; + using NodeToOrdinalMap = DenseMap<NodeType *, size_t>; + + /// Reference to the graph that gets built by a concrete implementation of + /// this builder. + GraphType &Graph; + + /// Dependence information used to create memory dependence edges in the + /// graph. + DependenceInfo &DI; + + /// The list of basic blocks to consider when building the graph. + const BasicBlockListType &BBList; + + /// A mapping from instructions to the corresponding nodes in the graph. + InstToNodeMap IMap; + + /// A mapping from each instruction to an ordinal number. This map is used to + /// populate the \p NodeOrdinalMap. + InstToOrdinalMap InstOrdinalMap; + + /// A mapping from nodes to an ordinal number. This map is used to sort nodes + /// in a pi-block based on program order. + NodeToOrdinalMap NodeOrdinalMap; +}; + +} // namespace llvm + +#endif // LLVM_ANALYSIS_DEPENDENCE_GRAPH_BUILDER_H diff --git a/hpvm/llvm_patches/include/Bitcode/LLVMBitCodes.h b/hpvm/llvm_patches/include/llvm/Bitcode/LLVMBitCodes.h similarity index 99% rename from hpvm/llvm_patches/include/Bitcode/LLVMBitCodes.h rename to hpvm/llvm_patches/include/llvm/Bitcode/LLVMBitCodes.h index 5e59ba96f2331663289a040326ebd4e453bd1e86..90ba564bd985079a8fa6589ac01b16cd8824e8c8 100644 --- a/hpvm/llvm_patches/include/Bitcode/LLVMBitCodes.h +++ b/hpvm/llvm_patches/include/llvm/Bitcode/LLVMBitCodes.h @@ -633,6 +633,11 @@ enum AttributeKindCodes { ATTR_KIND_IN = 65, ATTR_KIND_OUT = 66, ATTR_KIND_INOUT = 67, + ATTR_KIND_PRIV = 68, + ATTR_KIND_BUFFERIN = 69, + ATTR_KIND_BUFFEROUT = 70, + ATTR_KIND_CHANNEL = 71, + }; enum ComdatSelectionKindCodes { diff --git a/hpvm/llvm_patches/include/llvm/IR/Attributes.h b/hpvm/llvm_patches/include/llvm/IR/Attributes.h new file mode 100644 index 0000000000000000000000000000000000000000..63767f66922f9f67d97caff8e0ab0f49c02ad420 --- /dev/null +++ b/hpvm/llvm_patches/include/llvm/IR/Attributes.h @@ -0,0 +1,895 @@ +//===- llvm/Attributes.h - Container for Attributes -------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file contains the simple types necessary to represent the +/// attributes associated with functions and their calls. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_ATTRIBUTES_H +#define LLVM_IR_ATTRIBUTES_H + +#include "llvm-c/Types.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Config/llvm-config.h" +#include "llvm/Support/PointerLikeTypeTraits.h" +#include <bitset> +#include <cassert> +#include <cstdint> +#include <map> +#include <string> +#include <utility> + +namespace llvm { + +class AttrBuilder; +class AttributeImpl; +class AttributeListImpl; +class AttributeSetNode; +template<typename T> struct DenseMapInfo; +class Function; +class LLVMContext; +class Type; + +//===----------------------------------------------------------------------===// +/// \class +/// Functions, function parameters, and return types can have attributes +/// to indicate how they should be treated by optimizations and code +/// generation. This class represents one of those attributes. It's light-weight +/// and should be passed around by-value. +class Attribute { +public: + /// This enumeration lists the attributes that can be associated with + /// parameters, function results, or the function itself. + /// + /// Note: The `uwtable' attribute is about the ABI or the user mandating an + /// entry in the unwind table. The `nounwind' attribute is about an exception + /// passing by the function. + /// + /// In a theoretical system that uses tables for profiling and SjLj for + /// exceptions, they would be fully independent. In a normal system that uses + /// tables for both, the semantics are: + /// + /// nil = Needs an entry because an exception might pass by. + /// nounwind = No need for an entry + /// uwtable = Needs an entry because the ABI says so and because + /// an exception might pass by. + /// uwtable + nounwind = Needs an entry because the ABI says so. + + enum AttrKind { + // IR-Level Attributes + None, ///< No attributes have been set + #define GET_ATTR_ENUM + #include "llvm/IR/Attributes.inc" + EndAttrKinds ///< Sentinal value useful for loops + }; + +private: + AttributeImpl *pImpl = nullptr; + + Attribute(AttributeImpl *A) : pImpl(A) {} + +public: + Attribute() = default; + + //===--------------------------------------------------------------------===// + // Attribute Construction + //===--------------------------------------------------------------------===// + + /// Return a uniquified Attribute object. + static Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val = 0); + static Attribute get(LLVMContext &Context, StringRef Kind, + StringRef Val = StringRef()); + static Attribute get(LLVMContext &Context, AttrKind Kind, Type *Ty); + + /// Return a uniquified Attribute object that has the specific + /// alignment set. + static Attribute getWithAlignment(LLVMContext &Context, uint64_t Align); + static Attribute getWithStackAlignment(LLVMContext &Context, uint64_t Align); + static Attribute getWithDereferenceableBytes(LLVMContext &Context, + uint64_t Bytes); + static Attribute getWithDereferenceableOrNullBytes(LLVMContext &Context, + uint64_t Bytes); + static Attribute getWithAllocSizeArgs(LLVMContext &Context, + unsigned ElemSizeArg, + const Optional<unsigned> &NumElemsArg); + static Attribute getWithByValType(LLVMContext &Context, Type *Ty); + + //===--------------------------------------------------------------------===// + // Attribute Accessors + //===--------------------------------------------------------------------===// + + /// Return true if the attribute is an Attribute::AttrKind type. + bool isEnumAttribute() const; + + /// Return true if the attribute is an integer attribute. + bool isIntAttribute() const; + + /// Return true if the attribute is a string (target-dependent) + /// attribute. + bool isStringAttribute() const; + + /// Return true if the attribute is a type attribute. + bool isTypeAttribute() const; + + /// Return true if the attribute is present. + bool hasAttribute(AttrKind Val) const; + + /// Return true if the target-dependent attribute is present. + bool hasAttribute(StringRef Val) const; + + /// Return the attribute's kind as an enum (Attribute::AttrKind). This + /// requires the attribute to be an enum or integer attribute. + Attribute::AttrKind getKindAsEnum() const; + + /// Return the attribute's value as an integer. This requires that the + /// attribute be an integer attribute. + uint64_t getValueAsInt() const; + + /// Return the attribute's kind as a string. This requires the + /// attribute to be a string attribute. + StringRef getKindAsString() const; + + /// Return the attribute's value as a string. This requires the + /// attribute to be a string attribute. + StringRef getValueAsString() const; + + /// Return the attribute's value as a Type. This requires the attribute to be + /// a type attribute. + Type *getValueAsType() const; + + /// Returns the alignment field of an attribute as a byte alignment + /// value. + unsigned getAlignment() const; + + /// \brief Returns the BuffSize field of an attribute. + unsigned getBuffSize() const; + + /// \brief Returns the ChannelDepth field of an attribute. + unsigned getChannelDepth() const; + + /// Returns the stack alignment field of an attribute as a byte + /// alignment value. + unsigned getStackAlignment() const; + + /// Returns the number of dereferenceable bytes from the + /// dereferenceable attribute. + uint64_t getDereferenceableBytes() const; + + /// Returns the number of dereferenceable_or_null bytes from the + /// dereferenceable_or_null attribute. + uint64_t getDereferenceableOrNullBytes() const; + + /// Returns the argument numbers for the allocsize attribute (or pair(0, 0) + /// if not known). + std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const; + + /// The Attribute is converted to a string of equivalent mnemonic. This + /// is, presumably, for writing out the mnemonics for the assembly writer. + std::string getAsString(bool InAttrGrp = false) const; + + /// Equality and non-equality operators. + bool operator==(Attribute A) const { return pImpl == A.pImpl; } + bool operator!=(Attribute A) const { return pImpl != A.pImpl; } + + /// Less-than operator. Useful for sorting the attributes list. + bool operator<(Attribute A) const; + + void Profile(FoldingSetNodeID &ID) const { + ID.AddPointer(pImpl); + } + + /// Return a raw pointer that uniquely identifies this attribute. + void *getRawPointer() const { + return pImpl; + } + + /// Get an attribute from a raw pointer created by getRawPointer. + static Attribute fromRawPointer(void *RawPtr) { + return Attribute(reinterpret_cast<AttributeImpl*>(RawPtr)); + } +}; + +// Specialized opaque value conversions. +inline LLVMAttributeRef wrap(Attribute Attr) { + return reinterpret_cast<LLVMAttributeRef>(Attr.getRawPointer()); +} + +// Specialized opaque value conversions. +inline Attribute unwrap(LLVMAttributeRef Attr) { + return Attribute::fromRawPointer(Attr); +} + +//===----------------------------------------------------------------------===// +/// \class +/// This class holds the attributes for a particular argument, parameter, +/// function, or return value. It is an immutable value type that is cheap to +/// copy. Adding and removing enum attributes is intended to be fast, but adding +/// and removing string or integer attributes involves a FoldingSet lookup. +class AttributeSet { + friend AttributeListImpl; + template <typename Ty> friend struct DenseMapInfo; + + // TODO: Extract AvailableAttrs from AttributeSetNode and store them here. + // This will allow an efficient implementation of addAttribute and + // removeAttribute for enum attrs. + + /// Private implementation pointer. + AttributeSetNode *SetNode = nullptr; + +private: + explicit AttributeSet(AttributeSetNode *ASN) : SetNode(ASN) {} + +public: + /// AttributeSet is a trivially copyable value type. + AttributeSet() = default; + AttributeSet(const AttributeSet &) = default; + ~AttributeSet() = default; + + static AttributeSet get(LLVMContext &C, const AttrBuilder &B); + static AttributeSet get(LLVMContext &C, ArrayRef<Attribute> Attrs); + + bool operator==(const AttributeSet &O) const { return SetNode == O.SetNode; } + bool operator!=(const AttributeSet &O) const { return !(*this == O); } + + /// Add an argument attribute. Returns a new set because attribute sets are + /// immutable. + LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C, + Attribute::AttrKind Kind) const; + + /// Add a target-dependent attribute. Returns a new set because attribute sets + /// are immutable. + LLVM_NODISCARD AttributeSet addAttribute(LLVMContext &C, StringRef Kind, + StringRef Value = StringRef()) const; + + /// Add attributes to the attribute set. Returns a new set because attribute + /// sets are immutable. + LLVM_NODISCARD AttributeSet addAttributes(LLVMContext &C, + AttributeSet AS) const; + + /// Remove the specified attribute from this set. Returns a new set because + /// attribute sets are immutable. + LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C, + Attribute::AttrKind Kind) const; + + /// Remove the specified attribute from this set. Returns a new set because + /// attribute sets are immutable. + LLVM_NODISCARD AttributeSet removeAttribute(LLVMContext &C, + StringRef Kind) const; + + /// Remove the specified attributes from this set. Returns a new set because + /// attribute sets are immutable. + LLVM_NODISCARD AttributeSet + removeAttributes(LLVMContext &C, const AttrBuilder &AttrsToRemove) const; + + /// Return the number of attributes in this set. + unsigned getNumAttributes() const; + + /// Return true if attributes exists in this set. + bool hasAttributes() const { return SetNode != nullptr; } + + /// Return true if the attribute exists in this set. + bool hasAttribute(Attribute::AttrKind Kind) const; + + /// Return true if the attribute exists in this set. + bool hasAttribute(StringRef Kind) const; + + /// Return the attribute object. + Attribute getAttribute(Attribute::AttrKind Kind) const; + + /// Return the target-dependent attribute object. + Attribute getAttribute(StringRef Kind) const; + + unsigned getAlignment() const; + unsigned getStackAlignment() const; + uint64_t getDereferenceableBytes() const; + uint64_t getDereferenceableOrNullBytes() const; + Type *getByValType() const; + std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const; + std::string getAsString(bool InAttrGrp = false) const; + + using iterator = const Attribute *; + + iterator begin() const; + iterator end() const; +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + void dump() const; +#endif +}; + +//===----------------------------------------------------------------------===// +/// \class +/// Provide DenseMapInfo for AttributeSet. +template <> struct DenseMapInfo<AttributeSet> { + static AttributeSet getEmptyKey() { + auto Val = static_cast<uintptr_t>(-1); + Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable; + return AttributeSet(reinterpret_cast<AttributeSetNode *>(Val)); + } + + static AttributeSet getTombstoneKey() { + auto Val = static_cast<uintptr_t>(-2); + Val <<= PointerLikeTypeTraits<void *>::NumLowBitsAvailable; + return AttributeSet(reinterpret_cast<AttributeSetNode *>(Val)); + } + + static unsigned getHashValue(AttributeSet AS) { + return (unsigned((uintptr_t)AS.SetNode) >> 4) ^ + (unsigned((uintptr_t)AS.SetNode) >> 9); + } + + static bool isEqual(AttributeSet LHS, AttributeSet RHS) { return LHS == RHS; } +}; + +//===----------------------------------------------------------------------===// +/// \class +/// This class holds the attributes for a function, its return value, and +/// its parameters. You access the attributes for each of them via an index into +/// the AttributeList object. The function attributes are at index +/// `AttributeList::FunctionIndex', the return value is at index +/// `AttributeList::ReturnIndex', and the attributes for the parameters start at +/// index `AttributeList::FirstArgIndex'. +class AttributeList { +public: + enum AttrIndex : unsigned { + ReturnIndex = 0U, + FunctionIndex = ~0U, + FirstArgIndex = 1, + }; + +private: + friend class AttrBuilder; + friend class AttributeListImpl; + friend class AttributeSet; + friend class AttributeSetNode; + template <typename Ty> friend struct DenseMapInfo; + + /// The attributes that we are managing. This can be null to represent + /// the empty attributes list. + AttributeListImpl *pImpl = nullptr; + +public: + /// Create an AttributeList with the specified parameters in it. + static AttributeList get(LLVMContext &C, + ArrayRef<std::pair<unsigned, Attribute>> Attrs); + static AttributeList get(LLVMContext &C, + ArrayRef<std::pair<unsigned, AttributeSet>> Attrs); + + /// Create an AttributeList from attribute sets for a function, its + /// return value, and all of its arguments. + static AttributeList get(LLVMContext &C, AttributeSet FnAttrs, + AttributeSet RetAttrs, + ArrayRef<AttributeSet> ArgAttrs); + +private: + explicit AttributeList(AttributeListImpl *LI) : pImpl(LI) {} + + static AttributeList getImpl(LLVMContext &C, ArrayRef<AttributeSet> AttrSets); + +public: + AttributeList() = default; + + //===--------------------------------------------------------------------===// + // AttributeList Construction and Mutation + //===--------------------------------------------------------------------===// + + /// Return an AttributeList with the specified parameters in it. + static AttributeList get(LLVMContext &C, ArrayRef<AttributeList> Attrs); + static AttributeList get(LLVMContext &C, unsigned Index, + ArrayRef<Attribute::AttrKind> Kinds); + static AttributeList get(LLVMContext &C, unsigned Index, + ArrayRef<StringRef> Kind); + static AttributeList get(LLVMContext &C, unsigned Index, + const AttrBuilder &B); + + /// Add an attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index, + Attribute::AttrKind Kind) const; + + /// Add an attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList + addAttribute(LLVMContext &C, unsigned Index, StringRef Kind, + StringRef Value = StringRef()) const; + + /// Add an attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList addAttribute(LLVMContext &C, unsigned Index, + Attribute A) const; + + /// Add attributes to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList addAttributes(LLVMContext &C, unsigned Index, + const AttrBuilder &B) const; + + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList addParamAttribute( + LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const { + return addAttribute(C, ArgNo + FirstArgIndex, Kind); + } + + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList + addParamAttribute(LLVMContext &C, unsigned ArgNo, StringRef Kind, + StringRef Value = StringRef()) const { + return addAttribute(C, ArgNo + FirstArgIndex, Kind, Value); + } + + /// Add an attribute to the attribute list at the given arg indices. Returns a + /// new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList addParamAttribute(LLVMContext &C, + ArrayRef<unsigned> ArgNos, + Attribute A) const; + + /// Add an argument attribute to the list. Returns a new list because + /// attribute lists are immutable. + LLVM_NODISCARD AttributeList addParamAttributes(LLVMContext &C, + unsigned ArgNo, + const AttrBuilder &B) const { + return addAttributes(C, ArgNo + FirstArgIndex, B); + } + + /// Remove the specified attribute at the specified index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index, + Attribute::AttrKind Kind) const; + + /// Remove the specified attribute at the specified index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeAttribute(LLVMContext &C, unsigned Index, + StringRef Kind) const; + + /// Remove the specified attributes at the specified index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeAttributes( + LLVMContext &C, unsigned Index, const AttrBuilder &AttrsToRemove) const; + + /// Remove all attributes at the specified index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeAttributes(LLVMContext &C, + unsigned Index) const; + + /// Remove the specified attribute at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeParamAttribute( + LLVMContext &C, unsigned ArgNo, Attribute::AttrKind Kind) const { + return removeAttribute(C, ArgNo + FirstArgIndex, Kind); + } + + /// Remove the specified attribute at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeParamAttribute(LLVMContext &C, + unsigned ArgNo, + StringRef Kind) const { + return removeAttribute(C, ArgNo + FirstArgIndex, Kind); + } + + /// Remove the specified attribute at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeParamAttributes( + LLVMContext &C, unsigned ArgNo, const AttrBuilder &AttrsToRemove) const { + return removeAttributes(C, ArgNo + FirstArgIndex, AttrsToRemove); + } + + /// Remove all attributes at the specified arg index from this + /// attribute list. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList removeParamAttributes(LLVMContext &C, + unsigned ArgNo) const { + return removeAttributes(C, ArgNo + FirstArgIndex); + } + + /// \brief Add the dereferenceable attribute to the attribute set at the given + /// index. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList addDereferenceableAttr(LLVMContext &C, + unsigned Index, + uint64_t Bytes) const; + + /// \brief Add the dereferenceable attribute to the attribute set at the given + /// arg index. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList addDereferenceableParamAttr( + LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const { + return addDereferenceableAttr(C, ArgNo + FirstArgIndex, Bytes); + } + + /// Add the dereferenceable_or_null attribute to the attribute set at + /// the given index. Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList addDereferenceableOrNullAttr( + LLVMContext &C, unsigned Index, uint64_t Bytes) const; + + /// Add the dereferenceable_or_null attribute to the attribute set at + /// the given arg index. Returns a new list because attribute lists are + /// immutable. + LLVM_NODISCARD AttributeList addDereferenceableOrNullParamAttr( + LLVMContext &C, unsigned ArgNo, uint64_t Bytes) const { + return addDereferenceableOrNullAttr(C, ArgNo + FirstArgIndex, Bytes); + } + + /// Add the allocsize attribute to the attribute set at the given index. + /// Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList + addAllocSizeAttr(LLVMContext &C, unsigned Index, unsigned ElemSizeArg, + const Optional<unsigned> &NumElemsArg); + + /// Add the allocsize attribute to the attribute set at the given arg index. + /// Returns a new list because attribute lists are immutable. + LLVM_NODISCARD AttributeList + addAllocSizeParamAttr(LLVMContext &C, unsigned ArgNo, unsigned ElemSizeArg, + const Optional<unsigned> &NumElemsArg) { + return addAllocSizeAttr(C, ArgNo + FirstArgIndex, ElemSizeArg, NumElemsArg); + } + + //===--------------------------------------------------------------------===// + // AttributeList Accessors + //===--------------------------------------------------------------------===// + + /// Retrieve the LLVM context. + LLVMContext &getContext() const; + + /// The attributes for the specified index are returned. + AttributeSet getAttributes(unsigned Index) const; + + /// The attributes for the argument or parameter at the given index are + /// returned. + AttributeSet getParamAttributes(unsigned ArgNo) const; + + /// The attributes for the ret value are returned. + AttributeSet getRetAttributes() const; + + /// The function attributes are returned. + AttributeSet getFnAttributes() const; + + /// Return true if the attribute exists at the given index. + bool hasAttribute(unsigned Index, Attribute::AttrKind Kind) const; + + /// Return true if the attribute exists at the given index. + bool hasAttribute(unsigned Index, StringRef Kind) const; + + /// Return true if attribute exists at the given index. + bool hasAttributes(unsigned Index) const; + + /// Return true if the attribute exists for the given argument + bool hasParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { + return hasAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// Return true if the attribute exists for the given argument + bool hasParamAttr(unsigned ArgNo, StringRef Kind) const { + return hasAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// Return true if attributes exists for the given argument + bool hasParamAttrs(unsigned ArgNo) const { + return hasAttributes(ArgNo + FirstArgIndex); + } + + /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but + /// may be faster. + bool hasFnAttribute(Attribute::AttrKind Kind) const; + + /// Equivalent to hasAttribute(AttributeList::FunctionIndex, Kind) but + /// may be faster. + bool hasFnAttribute(StringRef Kind) const; + + /// Equivalent to hasAttribute(ArgNo + FirstArgIndex, Kind). + bool hasParamAttribute(unsigned ArgNo, Attribute::AttrKind Kind) const; + + /// Return true if the specified attribute is set for at least one + /// parameter or for the return value. If Index is not nullptr, the index + /// of a parameter with the specified attribute is provided. + bool hasAttrSomewhere(Attribute::AttrKind Kind, + unsigned *Index = nullptr) const; + + /// Return the attribute object that exists at the given index. + Attribute getAttribute(unsigned Index, Attribute::AttrKind Kind) const; + + /// Return the attribute object that exists at the given index. + Attribute getAttribute(unsigned Index, StringRef Kind) const; + + /// Return the attribute object that exists at the arg index. + Attribute getParamAttr(unsigned ArgNo, Attribute::AttrKind Kind) const { + return getAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// Return the attribute object that exists at the given index. + Attribute getParamAttr(unsigned ArgNo, StringRef Kind) const { + return getAttribute(ArgNo + FirstArgIndex, Kind); + } + + /// Return the alignment of the return value. + unsigned getRetAlignment() const; + + /// Return the alignment for the specified function parameter. + unsigned getParamAlignment(unsigned ArgNo) const; + + /// Return the byval type for the specified function parameter. + Type *getParamByValType(unsigned ArgNo) const; + + /// Get the stack alignment. + unsigned getStackAlignment(unsigned Index) const; + + /// Get the number of dereferenceable bytes (or zero if unknown). + uint64_t getDereferenceableBytes(unsigned Index) const; + + /// Get the number of dereferenceable bytes (or zero if unknown) of an + /// arg. + uint64_t getParamDereferenceableBytes(unsigned ArgNo) const { + return getDereferenceableBytes(ArgNo + FirstArgIndex); + } + + /// Get the number of dereferenceable_or_null bytes (or zero if + /// unknown). + uint64_t getDereferenceableOrNullBytes(unsigned Index) const; + + /// Get the number of dereferenceable_or_null bytes (or zero if + /// unknown) of an arg. + uint64_t getParamDereferenceableOrNullBytes(unsigned ArgNo) const { + return getDereferenceableOrNullBytes(ArgNo + FirstArgIndex); + } + + /// Get the allocsize argument numbers (or pair(0, 0) if unknown). + std::pair<unsigned, Optional<unsigned>> + getAllocSizeArgs(unsigned Index) const; + + /// Return the attributes at the index as a string. + std::string getAsString(unsigned Index, bool InAttrGrp = false) const; + + //===--------------------------------------------------------------------===// + // AttributeList Introspection + //===--------------------------------------------------------------------===// + + using iterator = const AttributeSet *; + + iterator begin() const; + iterator end() const; + + unsigned getNumAttrSets() const; + + /// Use these to iterate over the valid attribute indices. + unsigned index_begin() const { return AttributeList::FunctionIndex; } + unsigned index_end() const { return getNumAttrSets() - 1; } + + /// operator==/!= - Provide equality predicates. + bool operator==(const AttributeList &RHS) const { return pImpl == RHS.pImpl; } + bool operator!=(const AttributeList &RHS) const { return pImpl != RHS.pImpl; } + + /// Return a raw pointer that uniquely identifies this attribute list. + void *getRawPointer() const { + return pImpl; + } + + /// Return true if there are no attributes. + bool isEmpty() const { return pImpl == nullptr; } + + void dump() const; +}; + +//===----------------------------------------------------------------------===// +/// \class +/// Provide DenseMapInfo for AttributeList. +template <> struct DenseMapInfo<AttributeList> { + static AttributeList getEmptyKey() { + auto Val = static_cast<uintptr_t>(-1); + Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable; + return AttributeList(reinterpret_cast<AttributeListImpl *>(Val)); + } + + static AttributeList getTombstoneKey() { + auto Val = static_cast<uintptr_t>(-2); + Val <<= PointerLikeTypeTraits<void*>::NumLowBitsAvailable; + return AttributeList(reinterpret_cast<AttributeListImpl *>(Val)); + } + + static unsigned getHashValue(AttributeList AS) { + return (unsigned((uintptr_t)AS.pImpl) >> 4) ^ + (unsigned((uintptr_t)AS.pImpl) >> 9); + } + + static bool isEqual(AttributeList LHS, AttributeList RHS) { + return LHS == RHS; + } +}; + +//===----------------------------------------------------------------------===// +/// \class +/// This class is used in conjunction with the Attribute::get method to +/// create an Attribute object. The object itself is uniquified. The Builder's +/// value, however, is not. So this can be used as a quick way to test for +/// equality, presence of attributes, etc. +class AttrBuilder { + std::bitset<Attribute::EndAttrKinds> Attrs; + std::map<std::string, std::string> TargetDepAttrs; + uint64_t Alignment = 0; + uint64_t StackAlignment = 0; + uint64_t DerefBytes = 0; + uint64_t DerefOrNullBytes = 0; + uint64_t AllocSizeArgs = 0; + // Adding BuffSize to store size of buffer that will be used in FPGA transformations + uint64_t BuffSize = 0; + // Adding ChannelDepth to store depth of channel for FPGA transforms + uint64_t ChannelDepth = 0; + + Type *ByValType = nullptr; + +public: + AttrBuilder() = default; + + AttrBuilder(const Attribute &A) { + addAttribute(A); + } + + AttrBuilder(AttributeList AS, unsigned Idx); + AttrBuilder(AttributeSet AS); + + void clear(); + + /// Add an attribute to the builder. + AttrBuilder &addAttribute(Attribute::AttrKind Val); + + /// Add the Attribute object to the builder. + AttrBuilder &addAttribute(Attribute A); + + /// Add the target-dependent attribute to the builder. + AttrBuilder &addAttribute(StringRef A, StringRef V = StringRef()); + + /// Remove an attribute from the builder. + AttrBuilder &removeAttribute(Attribute::AttrKind Val); + + /// Remove the attributes from the builder. + AttrBuilder &removeAttributes(AttributeList A, uint64_t WithoutIndex); + + /// Remove the target-dependent attribute to the builder. + AttrBuilder &removeAttribute(StringRef A); + + /// Add the attributes from the builder. + AttrBuilder &merge(const AttrBuilder &B); + + /// Remove the attributes from the builder. + AttrBuilder &remove(const AttrBuilder &B); + + /// Return true if the builder has any attribute that's in the + /// specified builder. + bool overlaps(const AttrBuilder &B) const; + + /// Return true if the builder has the specified attribute. + bool contains(Attribute::AttrKind A) const { + assert((unsigned)A < Attribute::EndAttrKinds && "Attribute out of range!"); + return Attrs[A]; + } + + /// Return true if the builder has the specified target-dependent + /// attribute. + bool contains(StringRef A) const; + + /// Return true if the builder has IR-level attributes. + bool hasAttributes() const; + + /// Return true if the builder has any attribute that's in the + /// specified attribute. + bool hasAttributes(AttributeList A, uint64_t Index) const; + + /// Return true if the builder has an alignment attribute. + bool hasAlignmentAttr() const; + + /// Retrieve the alignment attribute, if it exists. + uint64_t getAlignment() const { return Alignment; } + + /// Retrieve the stack alignment attribute, if it exists. + uint64_t getStackAlignment() const { return StackAlignment; } + + /// Retrieve the number of dereferenceable bytes, if the + /// dereferenceable attribute exists (zero is returned otherwise). + uint64_t getDereferenceableBytes() const { return DerefBytes; } + + /// Retrieve the number of dereferenceable_or_null bytes, if the + /// dereferenceable_or_null attribute exists (zero is returned otherwise). + uint64_t getDereferenceableOrNullBytes() const { return DerefOrNullBytes; } + + /// Retrieve the maximum buffer size BuffSize, if the buffer or private + /// attribute exists (zero is returned otherwise). + uint64_t getBuffSize() const { return BuffSize; } + + /// Retrieve the channel depth ChannelDepth, if the channel + /// attribute exists (zero is returned otherwise). + uint64_t getChannelDepth() const { return ChannelDepth; } + + /// Retrieve the byval type. + Type *getByValType() const { return ByValType; } + + /// Retrieve the allocsize args, if the allocsize attribute exists. If it + /// doesn't exist, pair(0, 0) is returned. + std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const; + + /// This turns an int buffer size into the form used internally in Attribute + AttrBuilder &addBufferOrPrivAttr(Attribute::AttrKind kind, unsigned size); + + /// This turns an int channel depth into the form used internally in Attribute + AttrBuilder &addChannelAttr(Attribute::AttrKind kind, unsigned depth); + + /// This turns an int alignment (which must be a power of 2) into the + /// form used internally in Attribute. + AttrBuilder &addAlignmentAttr(unsigned Align); + + /// This turns an int stack alignment (which must be a power of 2) into + /// the form used internally in Attribute. + AttrBuilder &addStackAlignmentAttr(unsigned Align); + + /// This turns the number of dereferenceable bytes into the form used + /// internally in Attribute. + AttrBuilder &addDereferenceableAttr(uint64_t Bytes); + + /// This turns the number of dereferenceable_or_null bytes into the + /// form used internally in Attribute. + AttrBuilder &addDereferenceableOrNullAttr(uint64_t Bytes); + + /// This turns one (or two) ints into the form used internally in Attribute. + AttrBuilder &addAllocSizeAttr(unsigned ElemSizeArg, + const Optional<unsigned> &NumElemsArg); + + /// This turns a byval type into the form used internally in Attribute. + AttrBuilder &addByValAttr(Type *Ty); + + /// Add an allocsize attribute, using the representation returned by + /// Attribute.getIntValue(). + AttrBuilder &addAllocSizeAttrFromRawRepr(uint64_t RawAllocSizeRepr); + + /// Return true if the builder contains no target-independent + /// attributes. + bool empty() const { return Attrs.none(); } + + // Iterators for target-dependent attributes. + using td_type = std::pair<std::string, std::string>; + using td_iterator = std::map<std::string, std::string>::iterator; + using td_const_iterator = std::map<std::string, std::string>::const_iterator; + using td_range = iterator_range<td_iterator>; + using td_const_range = iterator_range<td_const_iterator>; + + td_iterator td_begin() { return TargetDepAttrs.begin(); } + td_iterator td_end() { return TargetDepAttrs.end(); } + + td_const_iterator td_begin() const { return TargetDepAttrs.begin(); } + td_const_iterator td_end() const { return TargetDepAttrs.end(); } + + td_range td_attrs() { return td_range(td_begin(), td_end()); } + + td_const_range td_attrs() const { + return td_const_range(td_begin(), td_end()); + } + + bool td_empty() const { return TargetDepAttrs.empty(); } + + bool operator==(const AttrBuilder &B); + bool operator!=(const AttrBuilder &B) { + return !(*this == B); + } +}; + +namespace AttributeFuncs { + +/// Which attributes cannot be applied to a type. +AttrBuilder typeIncompatible(Type *Ty); + +/// \returns Return true if the two functions have compatible target-independent +/// attributes for inlining purposes. +bool areInlineCompatible(const Function &Caller, const Function &Callee); + +/// Merge caller's and callee's attributes. +void mergeAttributesForInlining(Function &Caller, const Function &Callee); + +} // end namespace AttributeFuncs + +} // end namespace llvm + +#endif // LLVM_IR_ATTRIBUTES_H diff --git a/hpvm/llvm_patches/include/IR/Attributes.td b/hpvm/llvm_patches/include/llvm/IR/Attributes.td similarity index 97% rename from hpvm/llvm_patches/include/IR/Attributes.td rename to hpvm/llvm_patches/include/llvm/IR/Attributes.td index c6ff8ef3c6c962f5444d718ff5a7e16ce392a522..8b7bf371cd904f410937c28d93be740fb46c01f9 100644 --- a/hpvm/llvm_patches/include/IR/Attributes.td +++ b/hpvm/llvm_patches/include/llvm/IR/Attributes.td @@ -161,6 +161,15 @@ def Out : EnumAttr<"out">; /// Pointer to read/write memory def InOut : EnumAttr<"inout">; +// Pointer to private memory +def Priv : EnumAttr<"priv">; + +// Pointer to bufferable memory +def BufferIn : EnumAttr<"bufferin">; +def BufferOut : EnumAttr<"bufferout">; + +// Pointer to potential channel +def Channel : EnumAttr<"channel">; /// Alignment of stack for function (3 bits) stored as log2 of alignment with /// +1 bias 0 means unaligned (different from alignstack=(1)). diff --git a/hpvm/llvm_patches/include/IR/Intrinsics.td b/hpvm/llvm_patches/include/llvm/IR/Intrinsics.td similarity index 100% rename from hpvm/llvm_patches/include/IR/Intrinsics.td rename to hpvm/llvm_patches/include/llvm/IR/Intrinsics.td diff --git a/hpvm/llvm_patches/include/IR/IntrinsicsHPVM.td b/hpvm/llvm_patches/include/llvm/IR/IntrinsicsHPVM.td similarity index 90% rename from hpvm/llvm_patches/include/IR/IntrinsicsHPVM.td rename to hpvm/llvm_patches/include/llvm/IR/IntrinsicsHPVM.td index 0287f083849d9385faa00cc2943834b5b58439b0..e792c98e8ccf1c10b79b75318d258c0788bdfb8a 100644 --- a/hpvm/llvm_patches/include/IR/IntrinsicsHPVM.td +++ b/hpvm/llvm_patches/include/llvm/IR/IntrinsicsHPVM.td @@ -64,27 +64,27 @@ let TargetPrefix = "hpvm" in { def int_hpvm_requestMemory : Intrinsic<[], [llvm_ptr_ty, llvm_i64_ty], []>; /* Create Node intrinsic - - * i8* llvm.hpvm.createNode(function*); + * i8* llvm.hpvm.createNode(function*, i64); */ - def int_hpvm_createNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty], []>; + def int_hpvm_createNode : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, llvm_i32_ty], []>; /* Create Node 1D array intrinsic - - * i8* llvm.hpvm.createNode1D(function*, i64); + * i8* llvm.hpvm.createNode1D(function*, i64, i64); */ def int_hpvm_createNode1D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, - llvm_i64_ty], []>; + llvm_i64_ty, llvm_i32_ty], []>; /* Create Node 2D array intrinsic - - * i8* llvm.hpvm.createNode2D(function*, i64, i64); + * i8* llvm.hpvm.createNode2D(function*, i64, i64, i64); */ def int_hpvm_createNode2D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, - llvm_i64_ty, llvm_i64_ty], []>; + llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], []>; /* Create Node 3D array intrinsic - - * i8* llvm.hpvm.createNode2D(function*, i64, i64, i64); + * i8* llvm.hpvm.createNode3D(function*, i64, i64, i64, i64); */ def int_hpvm_createNode3D : Intrinsic<[llvm_ptr_ty], [llvm_ptr_ty, - llvm_i64_ty, llvm_i64_ty, llvm_i64_ty], + llvm_i64_ty, llvm_i64_ty, llvm_i64_ty, llvm_i32_ty], []>; /* Create dataflow edge intrinsic - @@ -204,9 +204,28 @@ let TargetPrefix = "hpvm" in { /* i32 llvm.hpvm.atomic.xor(i32*, i32)*/ def int_hpvm_atomic_xor: Intrinsic<[llvm_i32_ty], [llvm_ptr_ty, llvm_i32_ty], []>; + /* ============ HPVM-EPOCH Intrinsics ============= */ + // Intrinsics for hpvm-EPOCH + + /* void llvm.hpvm.task(i32) */ + def int_hpvm_task: Intrinsic<[], [llvm_i32_ty], []>; + + /* ============ HPVM-FPGA Intrinsics ============= */ + // Intrinsics for HPVM-FPGA + + /* void llvm.hpvm.nz.loop(i64, i32) */ + def int_hpvm_nz_loop: Intrinsic<[], [llvm_i64_ty, llvm_i32_ty], []>; + + /* int/float llvm.hpvm.ch.read(i32) */ + def int_hpvm_ch_read: Intrinsic<[llvm_any_ty], [llvm_i32_ty], []>; + + /* void llvm.hpvm.ch.write.x(int/float, i32) */ + def int_hpvm_ch_write: Intrinsic<[], [llvm_any_ty, llvm_i32_ty], []>; + /* void llvm.hpvm.sequentialized.loop(i32 depth, i32 dimension, i64 indvar, i64 bound) */ + def int_hpvm_sequentialized_loop: Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i64_ty, llvm_i64_ty], []>; - /***************************************************************************/ + /***************************************************************************/ /* ApproxHPVM intrinsics */ /***************************************************************************/ diff --git a/hpvm/llvm_patches/include/Support/Debug.h b/hpvm/llvm_patches/include/llvm/Support/Debug.h similarity index 100% rename from hpvm/llvm_patches/include/Support/Debug.h rename to hpvm/llvm_patches/include/llvm/Support/Debug.h diff --git a/hpvm/llvm_patches/include/llvm/Transforms/Scalar/ADCE.h b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/ADCE.h new file mode 100644 index 0000000000000000000000000000000000000000..535049bd8b87250de76f36754b525b5cd220a8a3 --- /dev/null +++ b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/ADCE.h @@ -0,0 +1,167 @@ +//===- ADCE.h - Aggressive dead code elimination ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file provides the interface for the Aggressive Dead Code Elimination +// pass. This pass optimistically assumes that all instructions are dead until +// proven otherwise, allowing it to eliminate dead computations that other DCE +// passes do not catch, particularly involving loop computations. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_ADCE_H +#define LLVM_TRANSFORMS_SCALAR_ADCE_H + +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; + +/// A DCE pass that assumes instructions are dead until proven otherwise. +/// +/// This pass eliminates dead code by optimistically assuming that all +/// instructions are dead until proven otherwise. This allows it to eliminate +/// dead computations that other DCE passes do not catch, particularly involving +/// loop computations. +struct ADCEPass : PassInfoMixin<ADCEPass> { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &); +}; + + +/// Information about Instructions +struct InstInfoType { + /// True if the associated instruction is live. + bool Live = false; + + /// Quick access to information for block containing associated Instruction. + struct BlockInfoType *Block = nullptr; +}; + +/// Information about basic blocks relevant to dead code elimination. +struct BlockInfoType { + /// True when this block contains a live instructions. + bool Live = false; + + /// True when this block ends in an unconditional branch. + bool UnconditionalBranch = false; + + /// True when this block is known to have live PHI nodes. + bool HasLivePhiNodes = false; + + /// Control dependence sources need to be live for this block. + bool CFLive = false; + + /// Quick access to the LiveInfo for the terminator, + /// holds the value &InstInfo[Terminator] + InstInfoType *TerminatorLiveInfo = nullptr; + + /// Corresponding BasicBlock. + BasicBlock *BB = nullptr; + + /// Cache of BB->getTerminator(). + Instruction *Terminator = nullptr; + + /// Post-order numbering of reverse control flow graph. + unsigned PostOrder; + + bool terminatorIsLive() const { return TerminatorLiveInfo->Live; } +}; + +class AggressiveDeadCodeElimination { + Function &F; + + // ADCE does not use DominatorTree per se, but it updates it to preserve the + // analysis. + DominatorTree *DT; + PostDominatorTree &PDT; + + /// Mapping of blocks to associated information, an element in BlockInfoVec. + /// Use MapVector to get deterministic iteration order. + MapVector<BasicBlock *, BlockInfoType> BlockInfo; + bool isLive(BasicBlock *BB) { return BlockInfo[BB].Live; } + + /// Mapping of instructions to associated information. + DenseMap<Instruction *, InstInfoType> InstInfo; + bool isLive(Instruction *I) { return InstInfo[I].Live; } + + /// Instructions known to be live where we need to mark + /// reaching definitions as live. + SmallVector<Instruction *, 128> Worklist; + + /// Debug info scopes around a live instruction. + SmallPtrSet<const Metadata *, 32> AliveScopes; + + /// Set of blocks with not known to have live terminators. + SmallSetVector<BasicBlock *, 16> BlocksWithDeadTerminators; + + /// The set of blocks which we have determined whose control + /// dependence sources must be live and which have not had + /// those dependences analyzed. + SmallPtrSet<BasicBlock *, 16> NewLiveBlocks; + + /// Set up auxiliary data structures for Instructions and BasicBlocks and + /// initialize the Worklist to the set of must-be-live Instruscions. + void initialize(); + + /// Return true for operations which are always treated as live. + bool isAlwaysLive(Instruction &I); + + /// Return true for instrumentation instructions for value profiling. + bool isInstrumentsConstant(Instruction &I); + + /// Propagate liveness to reaching definitions. + void markLiveInstructions(); + + /// Mark an instruction as live. + void markLive(Instruction *I); + + /// Mark a block as live. + void markLive(BlockInfoType &BB); + void markLive(BasicBlock *BB) { markLive(BlockInfo[BB]); } + + /// Mark terminators of control predecessors of a PHI node live. + void markPhiLive(PHINode *PN); + + /// Record the Debug Scopes which surround live debug information. + void collectLiveScopes(const DILocalScope &LS); + void collectLiveScopes(const DILocation &DL); + + /// Analyze dead branches to find those whose branches are the sources + /// of control dependences impacting a live block. Those branches are + /// marked live. + void markLiveBranchesFromControlDependences(); + + /// Remove instructions not marked live, return if any instruction was + /// removed. + bool removeDeadInstructions(); + + /// Identify connected sections of the control flow graph which have + /// dead terminators and rewrite the control flow graph to remove them. + void updateDeadRegions(); + + /// Set the BlockInfo::PostOrder field based on a post-order + /// numbering of the reverse control flow graph. + void computeReversePostOrder(); + + /// Make the terminator of this block an unconditional branch to \p Target. + void makeUnconditional(BasicBlock *BB, BasicBlock *Target); + +public: + AggressiveDeadCodeElimination(Function &F, DominatorTree *DT, + PostDominatorTree &PDT) + : F(F), DT(DT), PDT(PDT) {} + + bool performDeadCodeElimination(); +}; +} // end namespace llvm + +#endif // LLVM_TRANSFORMS_SCALAR_ADCE_H diff --git a/hpvm/llvm_patches/include/llvm/Transforms/Scalar/EarlyCSE.h b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/EarlyCSE.h new file mode 100644 index 0000000000000000000000000000000000000000..67094d7547ca43fa82276b981b12b02bec139eed --- /dev/null +++ b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/EarlyCSE.h @@ -0,0 +1,81 @@ +//===- EarlyCSE.h - Simple and fast CSE pass --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +/// \file +/// This file provides the interface for a simple, fast CSE pass. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_EARLYCSE_H +#define LLVM_TRANSFORMS_SCALAR_EARLYCSE_H + +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class Function; + +/// A simple and fast domtree-based CSE pass. +/// +/// This pass does a simple depth-first walk over the dominator tree, +/// eliminating trivially redundant instructions and using instsimplify to +/// canonicalize things as it goes. It is intended to be fast and catch obvious +/// cases so that instcombine and other passes are more effective. It is +/// expected that a later pass of GVN will catch the interesting/hard cases. +struct EarlyCSEPass : PassInfoMixin<EarlyCSEPass> { + EarlyCSEPass(bool UseMemorySSA = false) : UseMemorySSA(UseMemorySSA) {} + + /// Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + bool UseMemorySSA; +}; + +/// A simple and fast domtree-based CSE pass. +/// +/// This pass does a simple depth-first walk over the dominator tree, +/// eliminating trivially redundant instructions and using instsimplify to +/// canonicalize things as it goes. It is intended to be fast and catch obvious +/// cases so that instcombine and other passes are more effective. It is +/// expected that a later pass of GVN will catch the interesting/hard cases. +template <bool UseMemorySSA> +class EarlyCSELegacyCommonPass : public FunctionPass { +public: + static char ID; + + EarlyCSELegacyCommonPass() : FunctionPass(ID) { + if (UseMemorySSA) + initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry()); + else + initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addRequired<TargetLibraryInfoWrapperPass>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + if (UseMemorySSA) { + AU.addRequired<MemorySSAWrapperPass>(); + AU.addPreserved<MemorySSAWrapperPass>(); + } + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.setPreservesCFG(); + } +}; + +} // namespace llvm +#endif // LLVM_TRANSFORMS_SCALAR_EARLYCSE_H diff --git a/hpvm/llvm_patches/include/llvm/Transforms/Scalar/SimplifyCFG.h b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/SimplifyCFG.h new file mode 100644 index 0000000000000000000000000000000000000000..d40099ef0dc5568f1fbc81941f7b582a9b352fca --- /dev/null +++ b/hpvm/llvm_patches/include/llvm/Transforms/Scalar/SimplifyCFG.h @@ -0,0 +1,86 @@ +//===- SimplifyCFG.h - Simplify and canonicalize the CFG --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the interface for the pass responsible for both +/// simplifying and canonicalizing the CFG. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H +#define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H + +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/TargetTransformInfo.h" + +using namespace llvm; + +namespace llvm { + +/// A pass to simplify and canonicalize the CFG of a function. +/// +/// This pass iteratively simplifies the entire CFG of a function. It may change +/// or remove control flow to put the CFG into a canonical form expected by +/// other passes of the mid-level optimizer. Depending on the specified options, +/// it may further optimize control-flow to create non-canonical forms. +class SimplifyCFGPass : public PassInfoMixin<SimplifyCFGPass> { + SimplifyCFGOptions Options; + +public: + /// The default constructor sets the pass options to create canonical IR, + /// rather than optimal IR. That is, by default we bypass transformations that + /// are likely to improve performance but make analysis for other passes more + /// difficult. + SimplifyCFGPass() + : SimplifyCFGPass(SimplifyCFGOptions() + .forwardSwitchCondToPhi(false) + .convertSwitchToLookupTable(false) + .needCanonicalLoops(true) + .sinkCommonInsts(false)) {} + + /// Construct a pass with optional optimizations. + SimplifyCFGPass(const SimplifyCFGOptions &PassOptions); + + /// Run the pass over the function. + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, + const SimplifyCFGOptions &Options); + +struct CFGSimplifyPass : public FunctionPass { + static char ID; + SimplifyCFGOptions Options; + std::function<bool(const Function &)> PredicateFtor; + + CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false, + bool ConvertSwitch = false, bool KeepLoops = true, + bool SinkCommon = false, + std::function<bool(const Function &)> Ftor = nullptr); + + bool runOnFunction(Function &F) override { + if (skipFunction(F) || (PredicateFtor && !PredicateFtor(F))) + return false; + + Options.AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + return llvm::simplifyFunctionCFG(F, TTI, Options); + } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + AU.addRequired<TargetTransformInfoWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; +} // namespace llvm + +#endif diff --git a/hpvm/llvm_patches/include/llvm/Transforms/Utils/LoopSimplify.h b/hpvm/llvm_patches/include/llvm/Transforms/Utils/LoopSimplify.h new file mode 100644 index 0000000000000000000000000000000000000000..96a5e571f0f2d2ec45c9cea68f3fe897e9dedc16 --- /dev/null +++ b/hpvm/llvm_patches/include/llvm/Transforms/Utils/LoopSimplify.h @@ -0,0 +1,109 @@ +//===- LoopSimplify.h - Loop Canonicalization Pass --------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs several transformations to transform natural loops into a +// simpler form, which makes subsequent analyses and transformations simpler and +// more effective. +// +// Loop pre-header insertion guarantees that there is a single, non-critical +// entry edge from outside of the loop to the loop header. This simplifies a +// number of analyses and transformations, such as LICM. +// +// Loop exit-block insertion guarantees that all exit blocks from the loop +// (blocks which are outside of the loop that have predecessors inside of the +// loop) only have predecessors from inside of the loop (and are thus dominated +// by the loop header). This simplifies transformations such as store-sinking +// that are built into LICM. +// +// This pass also guarantees that loops will have exactly one backedge. +// +// Indirectbr instructions introduce several complications. If the loop +// contains or is entered by an indirectbr instruction, it may not be possible +// to transform the loop and make these guarantees. Client code should check +// that these conditions are true before relying on them. +// +// Note that the simplifycfg pass will clean up blocks which are split out but +// end up being unnecessary, so usage of this pass should not pessimize +// generated code. +// +// This pass obviously modifies the CFG, but updates loop information and +// dominator information. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_TRANSFORMS_UTILS_LOOPSIMPLIFY_H +#define LLVM_TRANSFORMS_UTILS_LOOPSIMPLIFY_H + +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/PassManager.h" +#include "llvm/Transforms/Utils.h" + +namespace llvm { + +class MemorySSAUpdater; + +/// This pass is responsible for loop canonicalization. +class LoopSimplifyPass : public PassInfoMixin<LoopSimplifyPass> { +public: + PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); +}; + +/// Simplify each loop in a loop nest recursively. +/// +/// This takes a potentially un-simplified loop L (and its children) and turns +/// it into a simplified loop nest with preheaders and single backedges. It will +/// update \c DominatorTree, \c LoopInfo, \c ScalarEvolution and \c MemorySSA +/// analyses if they're non-null, and LCSSA if \c PreserveLCSSA is true. +bool simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, ScalarEvolution *SE, + AssumptionCache *AC, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA); + +struct LoopSimplify : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + LoopSimplify() : FunctionPass(ID) { + initializeLoopSimplifyPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<AssumptionCacheTracker>(); + + // We need loop information to identify the loops... + AU.addRequired<DominatorTreeWrapperPass>(); + AU.addPreserved<DominatorTreeWrapperPass>(); + + AU.addRequired<LoopInfoWrapperPass>(); + AU.addPreserved<LoopInfoWrapperPass>(); + + AU.addPreserved<BasicAAWrapperPass>(); + AU.addPreserved<AAResultsWrapperPass>(); + AU.addPreserved<GlobalsAAWrapperPass>(); + AU.addPreserved<ScalarEvolutionWrapperPass>(); + AU.addPreserved<SCEVAAWrapperPass>(); + AU.addPreservedID(LCSSAID); + AU.addPreserved<DependenceAnalysisWrapperPass>(); + AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added. + AU.addPreserved<BranchProbabilityInfoWrapperPass>(); + if (EnableMSSALoopDependency) + AU.addPreserved<MemorySSAWrapperPass>(); + } + + /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees. + void verifyAnalysis() const override; +}; +} // namespace llvm + +#endif // LLVM_TRANSFORMS_UTILS_LOOPSIMPLIFY_H diff --git a/hpvm/llvm_patches/lib/Analysis/CMakeLists.txt b/hpvm/llvm_patches/lib/Analysis/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0f1c6014a329fab214630a8f102f51d47279750c --- /dev/null +++ b/hpvm/llvm_patches/lib/Analysis/CMakeLists.txt @@ -0,0 +1,105 @@ +add_llvm_library(LLVMAnalysis + AliasAnalysis.cpp + AliasAnalysisEvaluator.cpp + AliasAnalysisSummary.cpp + AliasSetTracker.cpp + Analysis.cpp + AssumptionCache.cpp + BasicAliasAnalysis.cpp + BlockFrequencyInfo.cpp + BlockFrequencyInfoImpl.cpp + BranchProbabilityInfo.cpp + CFG.cpp + CFGPrinter.cpp + CFLAndersAliasAnalysis.cpp + CFLSteensAliasAnalysis.cpp + CGSCCPassManager.cpp + CallGraph.cpp + CallGraphSCCPass.cpp + CallPrinter.cpp + CaptureTracking.cpp + CmpInstAnalysis.cpp + CostModel.cpp + CodeMetrics.cpp + ConstantFolding.cpp + DDG.cpp + Delinearization.cpp + DemandedBits.cpp + DependenceAnalysis.cpp + DependenceGraphBuilder.cpp + DivergenceAnalysis.cpp + DomPrinter.cpp + DomTreeUpdater.cpp + DominanceFrontier.cpp + EHPersonalities.cpp + GlobalsModRef.cpp + GuardUtils.cpp + IVDescriptors.cpp + IVUsers.cpp + IndirectCallPromotionAnalysis.cpp + InlineCost.cpp + InstCount.cpp + InstructionPrecedenceTracking.cpp + InstructionSimplify.cpp + Interval.cpp + IntervalPartition.cpp + LazyBranchProbabilityInfo.cpp + LazyBlockFrequencyInfo.cpp + LazyCallGraph.cpp + LazyValueInfo.cpp + LegacyDivergenceAnalysis.cpp + Lint.cpp + Loads.cpp + LoopAccessAnalysis.cpp + LoopAnalysisManager.cpp + LoopUnrollAnalyzer.cpp + LoopInfo.cpp + LoopPass.cpp + MemDepPrinter.cpp + MemDerefPrinter.cpp + MemoryBuiltins.cpp + MemoryDependenceAnalysis.cpp + MemoryLocation.cpp + MemorySSA.cpp + MemorySSAUpdater.cpp + ModuleDebugInfoPrinter.cpp + ModuleSummaryAnalysis.cpp + MustExecute.cpp + ObjCARCAliasAnalysis.cpp + ObjCARCAnalysisUtils.cpp + ObjCARCInstKind.cpp + OptimizationRemarkEmitter.cpp + OrderedBasicBlock.cpp + OrderedInstructions.cpp + PHITransAddr.cpp + PhiValues.cpp + PostDominators.cpp + ProfileSummaryInfo.cpp + PtrUseVisitor.cpp + RegionInfo.cpp + RegionPass.cpp + RegionPrinter.cpp + ScalarEvolution.cpp + ScalarEvolutionAliasAnalysis.cpp + ScalarEvolutionExpander.cpp + ScalarEvolutionNormalization.cpp + StackSafetyAnalysis.cpp + SyncDependenceAnalysis.cpp + SyntheticCountsUtils.cpp + TargetLibraryInfo.cpp + TargetTransformInfo.cpp + Trace.cpp + TypeBasedAliasAnalysis.cpp + TypeMetadataUtils.cpp + ScopedNoAliasAA.cpp + ValueLattice.cpp + ValueLatticeUtils.cpp + ValueTracking.cpp + VectorUtils.cpp + + ADDITIONAL_HEADER_DIRS + ${LLVM_MAIN_INCLUDE_DIR}/llvm/Analysis + + DEPENDS + intrinsics_gen + ) diff --git a/hpvm/llvm_patches/lib/Analysis/DDG.cpp b/hpvm/llvm_patches/lib/Analysis/DDG.cpp new file mode 100644 index 0000000000000000000000000000000000000000..7bd3044918492ee29aae21599c77f6e66f38804c --- /dev/null +++ b/hpvm/llvm_patches/lib/Analysis/DDG.cpp @@ -0,0 +1,326 @@ +//===- DDG.cpp - Data Dependence Graph -------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// The implementation for the data dependence graph. +//===----------------------------------------------------------------------===// +#include "llvm/Analysis/DDG.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/LoopIterator.h" +#include "llvm/Support/CommandLine.h" +#include <memory> + +using namespace llvm; + +static cl::opt<bool> SimplifyDDG( + "ddg-simplify", cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc( + "Simplify DDG by merging nodes that have less interesting edges.")); + +static cl::opt<bool> + CreatePiBlocks("ddg-pi-blocks", cl::init(true), cl::Hidden, cl::ZeroOrMore, + cl::desc("Create pi-block nodes.")); + +#define DEBUG_TYPE "ddg" + +template class llvm::DGEdge<DDGNode, DDGEdge>; +template class llvm::DGNode<DDGNode, DDGEdge>; +template class llvm::DirectedGraph<DDGNode, DDGEdge>; + +//===--------------------------------------------------------------------===// +// DDGNode implementation +//===--------------------------------------------------------------------===// +DDGNode::~DDGNode() {} + +bool DDGNode::collectInstructions( + llvm::function_ref<bool(Instruction *)> const &Pred, + InstructionListType &IList) const { + assert(IList.empty() && "Expected the IList to be empty on entry."); + if (isa<SimpleDDGNode>(this)) { + for (Instruction *I : cast<const SimpleDDGNode>(this)->getInstructions()) + if (Pred(I)) + IList.push_back(I); + } else if (isa<PiBlockDDGNode>(this)) { + for (const DDGNode *PN : cast<const PiBlockDDGNode>(this)->getNodes()) { + assert(!isa<PiBlockDDGNode>(PN) && "Nested PiBlocks are not supported."); + SmallVector<Instruction *, 8> TmpIList; + PN->collectInstructions(Pred, TmpIList); + IList.insert(IList.end(), TmpIList.begin(), TmpIList.end()); + } + } else + llvm_unreachable("unimplemented type of node"); + return !IList.empty(); +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) { + const char *Out; + switch (K) { + case DDGNode::NodeKind::SingleInstruction: + Out = "single-instruction"; + break; + case DDGNode::NodeKind::MultiInstruction: + Out = "multi-instruction"; + break; + case DDGNode::NodeKind::PiBlock: + Out = "pi-block"; + break; + case DDGNode::NodeKind::Root: + Out = "root"; + break; + case DDGNode::NodeKind::Unknown: + Out = "?? (error)"; + break; + } + OS << Out; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { + OS << "Node Address:" << &N << ":" << N.getKind() << "\n"; + if (isa<SimpleDDGNode>(N)) { + OS << " Instructions:\n"; + for (const Instruction *I : cast<const SimpleDDGNode>(N).getInstructions()) + OS.indent(2) << *I << "\n"; + } else if (isa<PiBlockDDGNode>(&N)) { + OS << "--- start of nodes in pi-block ---\n"; + auto &Nodes = cast<const PiBlockDDGNode>(&N)->getNodes(); + unsigned Count = 0; + for (const DDGNode *N : Nodes) + OS << *N << (++Count == Nodes.size() ? "" : "\n"); + OS << "--- end of nodes in pi-block ---\n"; + } else if (!isa<RootDDGNode>(N)) + llvm_unreachable("unimplemented type of node"); + + OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n"); + for (auto &E : N.getEdges()) + OS.indent(2) << *E; + return OS; +} + +//===--------------------------------------------------------------------===// +// SimpleDDGNode implementation +//===--------------------------------------------------------------------===// + +SimpleDDGNode::SimpleDDGNode(Instruction &I) + : DDGNode(NodeKind::SingleInstruction), InstList() { + assert(InstList.empty() && "Expected empty list."); + InstList.push_back(&I); +} + +SimpleDDGNode::SimpleDDGNode(const SimpleDDGNode &N) + : DDGNode(N), InstList(N.InstList) { + assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) || + (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) && + "constructing from invalid simple node."); +} + +SimpleDDGNode::SimpleDDGNode(SimpleDDGNode &&N) + : DDGNode(std::move(N)), InstList(std::move(N.InstList)) { + assert(((getKind() == NodeKind::SingleInstruction && InstList.size() == 1) || + (getKind() == NodeKind::MultiInstruction && InstList.size() > 1)) && + "constructing from invalid simple node."); +} + +SimpleDDGNode::~SimpleDDGNode() { InstList.clear(); } + +//===--------------------------------------------------------------------===// +// PiBlockDDGNode implementation +//===--------------------------------------------------------------------===// + +PiBlockDDGNode::PiBlockDDGNode(const PiNodeList &List) + : DDGNode(NodeKind::PiBlock), NodeList(List) { + assert(!NodeList.empty() && "pi-block node constructed with an empty list."); +} + +PiBlockDDGNode::PiBlockDDGNode(const PiBlockDDGNode &N) + : DDGNode(N), NodeList(N.NodeList) { + assert(getKind() == NodeKind::PiBlock && !NodeList.empty() && + "constructing from invalid pi-block node."); +} + +PiBlockDDGNode::PiBlockDDGNode(PiBlockDDGNode &&N) + : DDGNode(std::move(N)), NodeList(std::move(N.NodeList)) { + assert(getKind() == NodeKind::PiBlock && !NodeList.empty() && + "constructing from invalid pi-block node."); +} + +PiBlockDDGNode::~PiBlockDDGNode() { NodeList.clear(); } + +//===--------------------------------------------------------------------===// +// DDGEdge implementation +//===--------------------------------------------------------------------===// + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) { + const char *Out; + switch (K) { + case DDGEdge::EdgeKind::RegisterDefUse: + Out = "def-use"; + break; + case DDGEdge::EdgeKind::MemoryDependence: + Out = "memory"; + break; + case DDGEdge::EdgeKind::Rooted: + Out = "rooted"; + break; + case DDGEdge::EdgeKind::Unknown: + Out = "?? (error)"; + break; + } + OS << Out; + return OS; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge &E) { + OS << "[" << E.getKind() << "] to " << &E.getTargetNode() << "\n"; + return OS; +} + +//===--------------------------------------------------------------------===// +// DataDependenceGraph implementation +//===--------------------------------------------------------------------===// +using BasicBlockListType = SmallVector<BasicBlock *, 8>; + +DataDependenceGraph::DataDependenceGraph(Function &F, DependenceInfo &D) + : DependenceGraphInfo(F.getName().str(), D) { + // Put the basic blocks in program order for correct dependence + // directions. + BasicBlockListType BBList; + for (auto &SCC : make_range(scc_begin(&F), scc_end(&F))) + for (BasicBlock * BB : SCC) + BBList.push_back(BB); + std::reverse(BBList.begin(), BBList.end()); + DDGBuilder(*this, D, BBList).populate(); +} + +DataDependenceGraph::DataDependenceGraph(Loop &L, LoopInfo &LI, + DependenceInfo &D) + : DependenceGraphInfo(Twine(L.getHeader()->getParent()->getName() + "." + + L.getHeader()->getName()) + .str(), + D) { + // Put the basic blocks in program order for correct dependence + // directions. + LoopBlocksDFS DFS(&L); + DFS.perform(&LI); + BasicBlockListType BBList; + for (BasicBlock *BB : make_range(DFS.beginRPO(), DFS.endRPO())) + BBList.push_back(BB); + DDGBuilder(*this, D, BBList).populate(); +} + +DataDependenceGraph::~DataDependenceGraph() { + for (auto *N : Nodes) { + for (auto *E : *N) + delete E; + delete N; + } +} + +bool DataDependenceGraph::addNode(DDGNode &N) { + if (!DDGBase::addNode(N)) + return false; + + // In general, if the root node is already created and linked, it is not safe + // to add new nodes since they may be unreachable by the root. However, + // pi-block nodes need to be added after the root node is linked, and they are + // always reachable by the root, because they represent components that are + // already reachable by root. + auto *Pi = dyn_cast<PiBlockDDGNode>(&N); + assert((!Root || Pi) && + "Root node is already added. No more nodes can be added."); + + if (isa<RootDDGNode>(N)) + Root = &N; + + if (Pi) + for (DDGNode *NI : Pi->getNodes()) + PiBlockMap.insert(std::make_pair(NI, Pi)); + + return true; +} + +const PiBlockDDGNode *DataDependenceGraph::getPiBlock(const NodeType &N) const { + if (PiBlockMap.find(&N) == PiBlockMap.end()) + return nullptr; + auto *Pi = PiBlockMap.find(&N)->second; + assert(PiBlockMap.find(Pi) == PiBlockMap.end() && + "Nested pi-blocks detected."); + return Pi; +} + +raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) { + for (DDGNode *Node : G) + // Avoid printing nodes that are part of a pi-block twice. They will get + // printed when the pi-block is printed. + if (!G.getPiBlock(*Node)) + OS << *Node << "\n"; + OS << "\n"; + return OS; +} + +//===--------------------------------------------------------------------===// +// DDGBuilder implementation +//===--------------------------------------------------------------------===// + +bool DDGBuilder::areNodesMergeable(const DDGNode &Src, + const DDGNode &Tgt) const { + // Only merge two nodes if they are both simple nodes and the consecutive + // instructions after merging belong to the same BB. + const auto *SimpleSrc = dyn_cast<const SimpleDDGNode>(&Src); + const auto *SimpleTgt = dyn_cast<const SimpleDDGNode>(&Tgt); + if (!SimpleSrc || !SimpleTgt) + return false; + + return SimpleSrc->getLastInstruction()->getParent() == + SimpleTgt->getFirstInstruction()->getParent(); +} + +void DDGBuilder::mergeNodes(DDGNode &A, DDGNode &B) { + DDGEdge &EdgeToFold = A.back(); + assert(A.getEdges().size() == 1 && EdgeToFold.getTargetNode() == B && + "Expected A to have a single edge to B."); + assert(isa<SimpleDDGNode>(&A) && isa<SimpleDDGNode>(&B) && + "Expected simple nodes"); + + // Copy instructions from B to the end of A. + cast<SimpleDDGNode>(&A)->appendInstructions(*cast<SimpleDDGNode>(&B)); + + // Move to A any outgoing edges from B. + for (DDGEdge *BE : B) + Graph.connect(A, BE->getTargetNode(), *BE); + + A.removeEdge(EdgeToFold); + destroyEdge(EdgeToFold); + Graph.removeNode(B); + destroyNode(B); +} + +bool DDGBuilder::shouldSimplify() const { return SimplifyDDG; } + +bool DDGBuilder::shouldCreatePiBlocks() const { return CreatePiBlocks; } + +//===--------------------------------------------------------------------===// +// DDG Analysis Passes +//===--------------------------------------------------------------------===// + +/// DDG as a loop pass. +DDGAnalysis::Result DDGAnalysis::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR) { + Function *F = L.getHeader()->getParent(); + DependenceInfo DI(F, &AR.AA, &AR.SE, &AR.LI); + return std::make_unique<DataDependenceGraph>(L, AR.LI, DI); +} +AnalysisKey DDGAnalysis::Key; + +PreservedAnalyses DDGAnalysisPrinterPass::run(Loop &L, LoopAnalysisManager &AM, + LoopStandardAnalysisResults &AR, + LPMUpdater &U) { + OS << "'DDG' for loop '" << L.getHeader()->getName() << "':\n"; + OS << *AM.getResult<DDGAnalysis>(L, AR); + return PreservedAnalyses::all(); +} diff --git a/hpvm/llvm_patches/lib/Analysis/DependenceGraphBuilder.cpp b/hpvm/llvm_patches/lib/Analysis/DependenceGraphBuilder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..95a39f984d6378af4d7a8042954a5913bbf1a971 --- /dev/null +++ b/hpvm/llvm_patches/lib/Analysis/DependenceGraphBuilder.cpp @@ -0,0 +1,535 @@ +//===- DependenceGraphBuilder.cpp ------------------------------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// This file implements common steps of the build algorithm for construction +// of dependence graphs such as DDG and PDG. +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/DependenceGraphBuilder.h" +#include "llvm/ADT/EnumeratedArray.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DDG.h" + +using namespace llvm; + +#define DEBUG_TYPE "dgb" + +STATISTIC(TotalGraphs, "Number of dependence graphs created."); +STATISTIC(TotalDefUseEdges, "Number of def-use edges created."); +STATISTIC(TotalMemoryEdges, "Number of memory dependence edges created."); +STATISTIC(TotalFineGrainedNodes, "Number of fine-grained nodes created."); +STATISTIC(TotalPiBlockNodes, "Number of pi-block nodes created."); +STATISTIC(TotalConfusedEdges, + "Number of confused memory dependencies between two nodes."); +STATISTIC(TotalEdgeReversals, + "Number of times the source and sink of dependence was reversed to " + "expose cycles in the graph."); + +using InstructionListType = SmallVector<Instruction *, 2>; + +//===--------------------------------------------------------------------===// +// AbstractDependenceGraphBuilder implementation +//===--------------------------------------------------------------------===// + +template <class G> +void AbstractDependenceGraphBuilder<G>::computeInstructionOrdinals() { + // The BBList is expected to be in program order. + size_t NextOrdinal = 1; + for (auto *BB : BBList) + for (auto &I : *BB) + InstOrdinalMap.insert(std::make_pair(&I, NextOrdinal++)); +} + +template <class G> +void AbstractDependenceGraphBuilder<G>::createFineGrainedNodes() { + ++TotalGraphs; + assert(IMap.empty() && "Expected empty instruction map at start"); + for (BasicBlock *BB : BBList) + for (Instruction &I : *BB) { + auto &NewNode = createFineGrainedNode(I); + IMap.insert(std::make_pair(&I, &NewNode)); + NodeOrdinalMap.insert(std::make_pair(&NewNode, getOrdinal(I))); + ++TotalFineGrainedNodes; + } +} + +template <class G> +void AbstractDependenceGraphBuilder<G>::createAndConnectRootNode() { + // Create a root node that connects to every connected component of the graph. + // This is done to allow graph iterators to visit all the disjoint components + // of the graph, in a single walk. + // + // This algorithm works by going through each node of the graph and for each + // node N, do a DFS starting from N. A rooted edge is established between the + // root node and N (if N is not yet visited). All the nodes reachable from N + // are marked as visited and are skipped in the DFS of subsequent nodes. + // + // Note: This algorithm tries to limit the number of edges out of the root + // node to some extent, but there may be redundant edges created depending on + // the iteration order. For example for a graph {A -> B}, an edge from the + // root node is added to both nodes if B is visited before A. While it does + // not result in minimal number of edges, this approach saves compile-time + // while keeping the number of edges in check. + auto &RootNode = createRootNode(); + df_iterator_default_set<const NodeType *, 4> Visited; + for (auto *N : Graph) { + if (*N == RootNode) + continue; + for (auto I : depth_first_ext(N, Visited)) + if (I == N) + createRootedEdge(RootNode, *N); + } +} + +template <class G> void AbstractDependenceGraphBuilder<G>::createPiBlocks() { + if (!shouldCreatePiBlocks()) + return; + + LLVM_DEBUG(dbgs() << "==== Start of Creation of Pi-Blocks ===\n"); + + // The overall algorithm is as follows: + // 1. Identify SCCs and for each SCC create a pi-block node containing all + // the nodes in that SCC. + // 2. Identify incoming edges incident to the nodes inside of the SCC and + // reconnect them to the pi-block node. + // 3. Identify outgoing edges from the nodes inside of the SCC to nodes + // outside of it and reconnect them so that the edges are coming out of the + // SCC node instead. + + // Adding nodes as we iterate through the SCCs cause the SCC + // iterators to get invalidated. To prevent this invalidation, we first + // collect a list of nodes that are part of an SCC, and then iterate over + // those lists to create the pi-block nodes. Each element of the list is a + // list of nodes in an SCC. Note: trivial SCCs containing a single node are + // ignored. + SmallVector<NodeListType, 4> ListOfSCCs; + for (auto &SCC : make_range(scc_begin(&Graph), scc_end(&Graph))) { + if (SCC.size() > 1) + ListOfSCCs.emplace_back(SCC.begin(), SCC.end()); + } + + for (NodeListType &NL : ListOfSCCs) { + LLVM_DEBUG(dbgs() << "Creating pi-block node with " << NL.size() + << " nodes in it.\n"); + + // SCC iterator may put the nodes in an order that's different from the + // program order. To preserve original program order, we sort the list of + // nodes based on ordinal numbers computed earlier. + llvm::sort(NL, [&](NodeType *LHS, NodeType *RHS) { + return getOrdinal(*LHS) < getOrdinal(*RHS); + }); + + NodeType &PiNode = createPiBlock(NL); + ++TotalPiBlockNodes; + + // Build a set to speed up the lookup for edges whose targets + // are inside the SCC. + SmallPtrSet<NodeType *, 4> NodesInSCC(NL.begin(), NL.end()); + + // We have the set of nodes in the SCC. We go through the set of nodes + // that are outside of the SCC and look for edges that cross the two sets. + for (NodeType *N : Graph) { + + // Skip the SCC node and all the nodes inside of it. + if (*N == PiNode || NodesInSCC.count(N)) + continue; + + for (NodeType *SCCNode : NL) { + + enum Direction { + Incoming, // Incoming edges to the SCC + Outgoing, // Edges going ot of the SCC + DirectionCount // To make the enum usable as an array index. + }; + + // Use these flags to help us avoid creating redundant edges. If there + // are more than one edges from an outside node to inside nodes, we only + // keep one edge from that node to the pi-block node. Similarly, if + // there are more than one edges from inside nodes to an outside node, + // we only keep one edge from the pi-block node to the outside node. + // There is a flag defined for each direction (incoming vs outgoing) and + // for each type of edge supported, using a two-dimensional boolean + // array. + using EdgeKind = typename EdgeType::EdgeKind; + EnumeratedArray<bool, EdgeKind> EdgeAlreadyCreated[DirectionCount]{ + false, false}; + + auto createEdgeOfKind = [this](NodeType &Src, NodeType &Dst, + const EdgeKind K) { + switch (K) { + case EdgeKind::RegisterDefUse: + createDefUseEdge(Src, Dst); + break; + case EdgeKind::MemoryDependence: + createMemoryEdge(Src, Dst); + break; + case EdgeKind::Rooted: + createRootedEdge(Src, Dst); + break; + default: + llvm_unreachable("Unsupported type of edge."); + } + }; + + auto reconnectEdges = [&](NodeType *Src, NodeType *Dst, NodeType *New, + const Direction Dir) { + if (!Src->hasEdgeTo(*Dst)) + return; + LLVM_DEBUG(dbgs() + << "reconnecting(" + << (Dir == Direction::Incoming ? "incoming)" : "outgoing)") + << ":\nSrc:" << *Src << "\nDst:" << *Dst + << "\nNew:" << *New << "\n"); + assert((Dir == Direction::Incoming || Dir == Direction::Outgoing) && + "Invalid direction."); + + SmallVector<EdgeType *, 10> EL; + Src->findEdgesTo(*Dst, EL); + for (EdgeType *OldEdge : EL) { + EdgeKind Kind = OldEdge->getKind(); + if (!EdgeAlreadyCreated[Dir][Kind]) { + if (Dir == Direction::Incoming) { + createEdgeOfKind(*Src, *New, Kind); + LLVM_DEBUG(dbgs() << "created edge from Src to New.\n"); + } else if (Dir == Direction::Outgoing) { + createEdgeOfKind(*New, *Dst, Kind); + LLVM_DEBUG(dbgs() << "created edge from New to Dst.\n"); + } + EdgeAlreadyCreated[Dir][Kind] = true; + } + Src->removeEdge(*OldEdge); + destroyEdge(*OldEdge); + LLVM_DEBUG(dbgs() << "removed old edge between Src and Dst.\n\n"); + } + }; + + // Process incoming edges incident to the pi-block node. + reconnectEdges(N, SCCNode, &PiNode, Direction::Incoming); + + // Process edges that are coming out of the pi-block node. + reconnectEdges(SCCNode, N, &PiNode, Direction::Outgoing); + } + } + } + + // Ordinal maps are no longer needed. + InstOrdinalMap.clear(); + NodeOrdinalMap.clear(); + + LLVM_DEBUG(dbgs() << "==== End of Creation of Pi-Blocks ===\n"); +} + +template <class G> void AbstractDependenceGraphBuilder<G>::createDefUseEdges() { + for (NodeType *N : Graph) { + InstructionListType SrcIList; + N->collectInstructions([](const Instruction *I) { return true; }, SrcIList); + + // Use a set to mark the targets that we link to N, so we don't add + // duplicate def-use edges when more than one instruction in a target node + // use results of instructions that are contained in N. + SmallPtrSet<NodeType *, 4> VisitedTargets; + + for (Instruction *II : SrcIList) { + for (User *U : II->users()) { + Instruction *UI = dyn_cast<Instruction>(U); + if (!UI) + continue; + NodeType *DstNode = nullptr; + if (IMap.find(UI) != IMap.end()) + DstNode = IMap.find(UI)->second; + + // In the case of loops, the scope of the subgraph is all the + // basic blocks (and instructions within them) belonging to the loop. We + // simply ignore all the edges coming from (or going into) instructions + // or basic blocks outside of this range. + if (!DstNode) { + LLVM_DEBUG( + dbgs() + << "skipped def-use edge since the sink" << *UI + << " is outside the range of instructions being considered.\n"); + continue; + } + + // Self dependencies are ignored because they are redundant and + // uninteresting. + if (DstNode == N) { + LLVM_DEBUG(dbgs() + << "skipped def-use edge since the sink and the source (" + << N << ") are the same.\n"); + continue; + } + + if (VisitedTargets.insert(DstNode).second) { + createDefUseEdge(*N, *DstNode); + ++TotalDefUseEdges; + } + } + } + } +} + +template <class G> +void AbstractDependenceGraphBuilder<G>::createMemoryDependencyEdges() { + using DGIterator = typename G::iterator; + auto isMemoryAccess = [](const Instruction *I) { + return I->mayReadOrWriteMemory(); + }; + for (DGIterator SrcIt = Graph.begin(), E = Graph.end(); SrcIt != E; ++SrcIt) { + LLVM_DEBUG(errs() << "Src Node: " << *SrcIt << "\n"); + InstructionListType SrcIList; + (*SrcIt)->collectInstructions(isMemoryAccess, SrcIList); + if (SrcIList.empty()) + continue; + + for (DGIterator DstIt = SrcIt; DstIt != E; ++DstIt) { + if (**SrcIt == **DstIt) + continue; + InstructionListType DstIList; + (*DstIt)->collectInstructions(isMemoryAccess, DstIList); + if (DstIList.empty()) + continue; + bool ForwardEdgeCreated = false; + bool BackwardEdgeCreated = false; + LLVM_DEBUG(errs() << "***********************************************\n"); + for (Instruction *ISrc : SrcIList) { + LLVM_DEBUG(errs() << "Src: " << *ISrc << "\n"); + for (Instruction *IDst : DstIList) { + LLVM_DEBUG(errs() << "Dst: " << *IDst << "\n"); + auto D = DI.depends(ISrc, IDst, true); + if (!D) { + LLVM_DEBUG(errs() << "--> No Dependence, moving on!\n"); + continue; + + } + LLVM_DEBUG(D->dump(errs())); + + // If we have a dependence with its left-most non-'=' direction + // being '>' we need to reverse the direction of the edge, because + // the source of the dependence cannot occur after the sink. For + // confused dependencies, we will create edges in both directions to + // represent the possibility of a cycle. + + auto createConfusedEdges = [&](NodeType &Src, NodeType &Dst) { + if (!ForwardEdgeCreated) { + createMemoryEdge(Src, Dst); + ++TotalMemoryEdges; + } + if (!BackwardEdgeCreated) { + createMemoryEdge(Dst, Src); + ++TotalMemoryEdges; + } + ForwardEdgeCreated = BackwardEdgeCreated = true; + ++TotalConfusedEdges; + }; + + auto createForwardEdge = [&](NodeType &Src, NodeType &Dst) { + if (!ForwardEdgeCreated) { + createMemoryEdge(Src, Dst); + ++TotalMemoryEdges; + } + ForwardEdgeCreated = true; + }; + + auto createBackwardEdge = [&](NodeType &Src, NodeType &Dst) { + if (!BackwardEdgeCreated) { + createMemoryEdge(Dst, Src); + ++TotalMemoryEdges; + } + BackwardEdgeCreated = true; + }; + + if (D->isConfused()) { + LLVM_DEBUG(errs() << "--> Confused Dependence: creating Confused Edge\n"); + createConfusedEdges(**SrcIt, **DstIt); + } else if (D->isOrdered() && !D->isLoopIndependent()) { + LLVM_DEBUG(errs() << "--> Ordered, Loop-Dependent Dependence:\n"); + bool ReversedEdge = false; + for (unsigned Level = 1; Level <= D->getLevels(); ++Level) { + LLVM_DEBUG(errs() << "----> Lvl: " << Level << ": "); + if (D->getDirection(Level) == Dependence::DVEntry::EQ) { + LLVM_DEBUG(errs() << "EQ\n"); + continue; + } else if (D->getDirection(Level) == Dependence::DVEntry::GT) { + LLVM_DEBUG(errs() << "GT\n"); + LLVM_DEBUG(errs() << "------> Invalid Dependence. Creating Backward Edge!\n"); + createBackwardEdge(**SrcIt, **DstIt); + ReversedEdge = true; + ++TotalEdgeReversals; + break; + } else if (D->getDirection(Level) == Dependence::DVEntry::LT){ + LLVM_DEBUG(errs() << "LT\n"); + break; + } else { + LLVM_DEBUG(errs() << " Confused\n"); + createConfusedEdges(**SrcIt, **DstIt); + break; + } + } + if (!ReversedEdge) { + LLVM_DEBUG(errs() << "------> Creating Forward Edge!\n"); + createForwardEdge(**SrcIt, **DstIt); + } + } else { + LLVM_DEBUG(errs() << "--> Creating Forward Edge!\n"); + createForwardEdge(**SrcIt, **DstIt); + } + // Avoid creating duplicate edges. + if (ForwardEdgeCreated && BackwardEdgeCreated) { + LLVM_DEBUG(errs() << "--> Created all possible edges between Src and Dst!\n"); + break; + } + } + + // If we've created edges in both directions, there is no more + // unique edge that we can create between these two nodes, so we + // can exit early. + if (ForwardEdgeCreated && BackwardEdgeCreated) { + LLVM_DEBUG(errs() << "No more unique edges possible!\n"); + break; + } + } + } + } +} + +template <class G> void AbstractDependenceGraphBuilder<G>::simplify() { + if (!shouldSimplify()) + return; + LLVM_DEBUG(dbgs() << "==== Start of Graph Simplification ===\n"); + + // This algorithm works by first collecting a set of candidate nodes that have + // an out-degree of one (in terms of def-use edges), and then ignoring those + // whose targets have an in-degree more than one. Each node in the resulting + // set can then be merged with its corresponding target and put back into the + // worklist until no further merge candidates are available. + SmallPtrSet<NodeType *, 32> CandidateSourceNodes; + + // A mapping between nodes and their in-degree. To save space, this map + // only contains nodes that are targets of nodes in the CandidateSourceNodes. + DenseMap<NodeType *, unsigned> TargetInDegreeMap; + + for (NodeType *N : Graph) { + if (N->getEdges().size() != 1) + continue; + EdgeType &Edge = N->back(); + if (!Edge.isDefUse()) + continue; + CandidateSourceNodes.insert(N); + + // Insert an element into the in-degree map and initialize to zero. The + // count will get updated in the next step. + TargetInDegreeMap.insert({&Edge.getTargetNode(), 0}); + } + + LLVM_DEBUG({ + dbgs() << "Size of candidate src node list:" << CandidateSourceNodes.size() + << "\nNode with single outgoing def-use edge:\n"; + for (NodeType *N : CandidateSourceNodes) { + dbgs() << N << "\n"; + } + }); + + for (NodeType *N : Graph) { + for (EdgeType *E : *N) { + NodeType *Tgt = &E->getTargetNode(); + auto TgtIT = TargetInDegreeMap.find(Tgt); + if (TgtIT != TargetInDegreeMap.end()) + ++(TgtIT->second); + } + } + + LLVM_DEBUG({ + dbgs() << "Size of target in-degree map:" << TargetInDegreeMap.size() + << "\nContent of in-degree map:\n"; + for (auto &I : TargetInDegreeMap) { + dbgs() << I.first << " --> " << I.second << "\n"; + } + }); + + SmallVector<NodeType *, 32> Worklist(CandidateSourceNodes.begin(), + CandidateSourceNodes.end()); + while (!Worklist.empty()) { + NodeType &Src = *Worklist.pop_back_val(); + // As nodes get merged, we need to skip any node that has been removed from + // the candidate set (see below). + if (CandidateSourceNodes.find(&Src) == CandidateSourceNodes.end()) + continue; + CandidateSourceNodes.erase(&Src); + + assert(Src.getEdges().size() == 1 && + "Expected a single edge from the candidate src node."); + NodeType &Tgt = Src.back().getTargetNode(); + assert(TargetInDegreeMap.find(&Tgt) != TargetInDegreeMap.end() && + "Expected target to be in the in-degree map."); + + if (TargetInDegreeMap[&Tgt] != 1) + continue; + + if (!areNodesMergeable(Src, Tgt)) + continue; + + // Do not merge if there is also an edge from target to src (immediate + // cycle). + if (Tgt.hasEdgeTo(Src)) + continue; + + LLVM_DEBUG(dbgs() << "Merging:" << Src << "\nWith:" << Tgt << "\n"); + + mergeNodes(Src, Tgt); + + // If the target node is in the candidate set itself, we need to put the + // src node back into the worklist again so it gives the target a chance + // to get merged into it. For example if we have: + // {(a)->(b), (b)->(c), (c)->(d), ...} and the worklist is initially {b, a}, + // then after merging (a) and (b) together, we need to put (a,b) back in + // the worklist so that (c) can get merged in as well resulting in + // {(a,b,c) -> d} + // We also need to remove the old target (b), from the worklist. We first + // remove it from the candidate set here, and skip any item from the + // worklist that is not in the set. + if (CandidateSourceNodes.find(&Tgt) != CandidateSourceNodes.end()) { + Worklist.push_back(&Src); + CandidateSourceNodes.insert(&Src); + CandidateSourceNodes.erase(&Tgt); + LLVM_DEBUG(dbgs() << "Putting " << &Src << " back in the worklist.\n"); + } + } + LLVM_DEBUG(dbgs() << "=== End of Graph Simplification ===\n"); +} + +template <class G> +void AbstractDependenceGraphBuilder<G>::sortNodesTopologically() { + + // If we don't create pi-blocks, then we may not have a DAG. + if (!shouldCreatePiBlocks()) + return; + + SmallVector<NodeType *, 64> NodesInPO; + using NodeKind = typename NodeType::NodeKind; + for (NodeType *N : post_order(&Graph)) { + if (N->getKind() == NodeKind::PiBlock) { + // Put members of the pi-block right after the pi-block itself, for + // convenience. + const NodeListType &PiBlockMembers = getNodesInPiBlock(*N); + NodesInPO.insert(NodesInPO.end(), PiBlockMembers.begin(), + PiBlockMembers.end()); + } + NodesInPO.push_back(N); + } + + size_t OldSize = Graph.Nodes.size(); + Graph.Nodes.clear(); + for (NodeType *N : reverse(NodesInPO)) + Graph.Nodes.push_back(N); + if (Graph.Nodes.size() != OldSize) + assert(false && + "Expected the number of nodes to stay the same after the sort"); +} + +template class llvm::AbstractDependenceGraphBuilder<DataDependenceGraph>; +template class llvm::DependenceGraphInfo<DDGNode>; diff --git a/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp b/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp index 2c54392f8020ac7334117f1343214d085dbd6b84..211cefb8a17715ac77eca9d63d64a760a3fbd18a 100644 --- a/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp +++ b/hpvm/llvm_patches/lib/AsmParser/LLLexer.cpp @@ -859,6 +859,11 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(in); KEYWORD(out); KEYWORD(inout); + KEYWORD(priv); + KEYWORD(bufferin); + KEYWORD(bufferout); + KEYWORD(channel); + #undef KEYWORD diff --git a/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp b/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp index 7446ff1e32dd79a18fd678446af56e6d193468ad..ed64537935c5cf8401303715f5fe761073894ea6 100644 --- a/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp +++ b/hpvm/llvm_patches/lib/AsmParser/LLParser.cpp @@ -1474,6 +1474,10 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_in: case lltok::kw_out: case lltok::kw_inout: + case lltok::kw_priv: + case lltok::kw_bufferin: + case lltok::kw_bufferout: + case lltok::kw_channel: HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute on a function"); @@ -1818,6 +1822,41 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_inout: B.addAttribute(Attribute::InOut); break; + case lltok::kw_priv: { + unsigned BuffSize; + if (ParseOptionalBufferSize(lltok::kw_priv, BuffSize)) { + // TODO: This has to change if we want to have a default buffer size + return true; + } + B.addBufferOrPrivAttr(Attribute::Priv, BuffSize); + continue; + } + case lltok::kw_bufferin: { + unsigned BuffSize; + if (ParseOptionalBufferSize(lltok::kw_bufferin, BuffSize)) { + // TODO: This has to change if we want to have a default buffer size + return true; + } + B.addBufferOrPrivAttr(Attribute::BufferIn, BuffSize); + continue; + } + case lltok::kw_bufferout: { + unsigned BuffSize; + if (ParseOptionalBufferSize(lltok::kw_bufferout, BuffSize)) { + // TODO: This has to change if we want to have a default buffer size + return true; + } + B.addBufferOrPrivAttr(Attribute::BufferOut, BuffSize); + continue; + } + case lltok::kw_channel: { + unsigned ChannelDepth; + if (ParseOptionalChannelDepth(lltok::kw_channel, ChannelDepth)) { + return true; + } + B.addChannelAttr(Attribute::Channel, ChannelDepth); + continue; + } case lltok::kw_alignstack: case lltok::kw_alwaysinline: @@ -1931,6 +1970,11 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_in: case lltok::kw_out: case lltok::kw_inout: + case lltok::kw_bufferin: + case lltok::kw_bufferout: + case lltok::kw_priv: + case lltok::kw_channel: + HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute"); break; @@ -2354,6 +2398,30 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { return false; } +/// ParseOptionalBufferSize +/// ::= /* empty */ +/// ::= AttrKind 4 +bool LLParser::ParseOptionalBufferSize(lltok::Kind AttrKind, unsigned &BuffSize) { + BuffSize = 0; + if (!EatIfPresent(AttrKind)) + return false; + if (ParseUInt32(BuffSize)) + return true; + return false; +} + +/// ParseOptionalChannelDepth +/// ::= /* empty */ +/// ::= AttrKind 4 +bool LLParser::ParseOptionalChannelDepth(lltok::Kind AttrKind, unsigned &ChannelDepth) { + ChannelDepth = 0; + if (!EatIfPresent(AttrKind)) + return false; + if (ParseUInt32(ChannelDepth)) + return true; + return false; +} + /// ParseOptionalDerefAttrBytes /// ::= /* empty */ /// ::= AttrKind '(' 4 ')' @@ -2874,6 +2942,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, std::move(Name)); while (EatIfPresent(lltok::comma)) { + Attrs.clear(); // Handle ... at end of arg list. if (EatIfPresent(lltok::dotdotdot)) { isVarArg = true; diff --git a/hpvm/llvm_patches/lib/AsmParser/LLParser.h b/hpvm/llvm_patches/lib/AsmParser/LLParser.h index bc1983232f0570d816a23bc1f92ce490a44dee59..566360436ff0bec4a50add03bc195634446fb821 100644 --- a/hpvm/llvm_patches/lib/AsmParser/LLParser.h +++ b/hpvm/llvm_patches/lib/AsmParser/LLParser.h @@ -307,6 +307,8 @@ private: bool ParseOptionalCallingConv(unsigned &CC); bool ParseOptionalAlignment(unsigned &Alignment); bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes); + bool ParseOptionalBufferSize(lltok::Kind AttrKind, unsigned &Size); + bool ParseOptionalChannelDepth(lltok::Kind AttrKind, unsigned &Depth); bool ParseScopeAndOrdering(bool isAtomic, SyncScope::ID &SSID, AtomicOrdering &Ordering); bool ParseScope(SyncScope::ID &SSID); diff --git a/hpvm/llvm_patches/lib/AsmParser/LLToken.h b/hpvm/llvm_patches/lib/AsmParser/LLToken.h index cb0479b41c3b9e68d9697cd9d8adce4c80fa5c25..8eda688628cc54575b580d7cece7e98a2cec716b 100644 --- a/hpvm/llvm_patches/lib/AsmParser/LLToken.h +++ b/hpvm/llvm_patches/lib/AsmParser/LLToken.h @@ -355,6 +355,10 @@ enum Kind { kw_in, kw_out, kw_inout, + kw_priv, + kw_bufferin, + kw_bufferout, + kw_channel, // Metadata types. kw_distinct, diff --git a/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp b/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp index a1e64472850911013250976312a8dd7d8b879c98..557cae44c637af1414a96d8894f5c617d873149f 100644 --- a/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/hpvm/llvm_patches/lib/Bitcode/Reader/BitcodeReader.cpp @@ -1402,6 +1402,14 @@ static uint64_t getRawAttributeMask(Attribute::AttrKind Val) { return 3ULL << 1; case Attribute::InOut: return 3ULL << 2; + case Attribute::Priv: + return 3ULL << 3; + case Attribute::BufferIn: + return 3ULL << 4; + case Attribute::BufferOut: + return 3ULL << 5; + case Attribute::Channel: + return 3ULL << 6; case Attribute::NoSync: llvm_unreachable("nosync attribute not supported in raw format"); @@ -1441,7 +1449,11 @@ static void addRawAttributeValue(AttrBuilder &B, uint64_t Val) { B.addAlignmentAttr(1ULL << ((A >> 16) - 1)); else if (I == Attribute::StackAlignment) B.addStackAlignmentAttr(1ULL << ((A >> 26) - 1)); - else + else if (I == Attribute::BufferIn || I == Attribute::BufferOut || + I == Attribute::Priv || I == Attribute::Channel) { + errs() << "Bufferin/Bufferout/Priv/Channel record Val: " << Val << "\n"; + llvm_unreachable("Should not reach here!"); + } else B.addAttribute(I); } } diff --git a/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp b/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp index fd671c397583fad6ec8a9998635705417f59eed1..ea0460142cd7b40b2b882c12404d09d02abe2166 100644 --- a/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/hpvm/llvm_patches/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -780,6 +780,15 @@ static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { return bitc::ATTR_KIND_OUT; case Attribute::InOut: return bitc::ATTR_KIND_INOUT; + case Attribute::Priv: + return bitc::ATTR_KIND_PRIV; + case Attribute::BufferIn: + return bitc::ATTR_KIND_BUFFERIN; + case Attribute::BufferOut: + return bitc::ATTR_KIND_BUFFEROUT; + case Attribute::Channel: + return bitc::ATTR_KIND_CHANNEL; + case Attribute::EndAttrKinds: llvm_unreachable("Can not encode end-attribute kinds marker."); diff --git a/hpvm/llvm_patches/lib/IR/AttributeImpl.h b/hpvm/llvm_patches/lib/IR/AttributeImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..19f27ade2969ed1e5afc139e46c0c5db7b467864 --- /dev/null +++ b/hpvm/llvm_patches/lib/IR/AttributeImpl.h @@ -0,0 +1,290 @@ +//===- AttributeImpl.h - Attribute Internals --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines various helper methods and classes used by +/// LLVMContextImpl for creating and managing attributes. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_IR_ATTRIBUTEIMPL_H +#define LLVM_LIB_IR_ATTRIBUTEIMPL_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/IR/Attributes.h" +#include "llvm/Support/TrailingObjects.h" +#include <cassert> +#include <cstddef> +#include <cstdint> +#include <string> +#include <utility> + +namespace llvm { + +class LLVMContext; +class Type; + +//===----------------------------------------------------------------------===// +/// \class +/// This class represents a single, uniqued attribute. That attribute +/// could be a single enum, a tuple, or a string. +class AttributeImpl : public FoldingSetNode { + unsigned char KindID; ///< Holds the AttrEntryKind of the attribute + +protected: + enum AttrEntryKind { + EnumAttrEntry, + IntAttrEntry, + StringAttrEntry, + TypeAttrEntry, + }; + + AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {} + +public: + // AttributesImpl is uniqued, these should not be available. + AttributeImpl(const AttributeImpl &) = delete; + AttributeImpl &operator=(const AttributeImpl &) = delete; + + virtual ~AttributeImpl(); + + bool isEnumAttribute() const { return KindID == EnumAttrEntry; } + bool isIntAttribute() const { return KindID == IntAttrEntry; } + bool isStringAttribute() const { return KindID == StringAttrEntry; } + bool isTypeAttribute() const { return KindID == TypeAttrEntry; } + + bool hasAttribute(Attribute::AttrKind A) const; + bool hasAttribute(StringRef Kind) const; + + Attribute::AttrKind getKindAsEnum() const; + uint64_t getValueAsInt() const; + + StringRef getKindAsString() const; + StringRef getValueAsString() const; + + Type *getValueAsType() const; + + /// Used when sorting the attributes. + bool operator<(const AttributeImpl &AI) const; + + void Profile(FoldingSetNodeID &ID) const { + if (isEnumAttribute()) + Profile(ID, getKindAsEnum(), static_cast<uint64_t>(0)); + else if (isIntAttribute()) + Profile(ID, getKindAsEnum(), getValueAsInt()); + else if (isStringAttribute()) + Profile(ID, getKindAsString(), getValueAsString()); + else + Profile(ID, getKindAsEnum(), getValueAsType()); + } + + static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind, + uint64_t Val) { + ID.AddInteger(Kind); + if (Val) ID.AddInteger(Val); + } + + static void Profile(FoldingSetNodeID &ID, StringRef Kind, StringRef Values) { + ID.AddString(Kind); + if (!Values.empty()) ID.AddString(Values); + } + + static void Profile(FoldingSetNodeID &ID, Attribute::AttrKind Kind, + Type *Ty) { + ID.AddInteger(Kind); + ID.AddPointer(Ty); + } +}; + +//===----------------------------------------------------------------------===// +/// \class +/// A set of classes that contain the value of the +/// attribute object. There are three main categories: enum attribute entries, +/// represented by Attribute::AttrKind; alignment attribute entries; and string +/// attribute enties, which are for target-dependent attributes. + +class EnumAttributeImpl : public AttributeImpl { + virtual void anchor(); + + Attribute::AttrKind Kind; + +protected: + EnumAttributeImpl(AttrEntryKind ID, Attribute::AttrKind Kind) + : AttributeImpl(ID), Kind(Kind) {} + +public: + EnumAttributeImpl(Attribute::AttrKind Kind) + : AttributeImpl(EnumAttrEntry), Kind(Kind) {} + + Attribute::AttrKind getEnumKind() const { return Kind; } +}; + +class IntAttributeImpl : public EnumAttributeImpl { + uint64_t Val; + + void anchor() override; + +public: + IntAttributeImpl(Attribute::AttrKind Kind, uint64_t Val) + : EnumAttributeImpl(IntAttrEntry, Kind), Val(Val) { + assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment || + Kind == Attribute::Dereferenceable || + Kind == Attribute::DereferenceableOrNull || + Kind == Attribute::AllocSize || + Kind == Attribute::BufferIn || + Kind == Attribute::BufferOut || + Kind == Attribute::Priv || + Kind == Attribute::Channel) && + "Wrong kind for int attribute!"); + } + + uint64_t getValue() const { return Val; } +}; + +class StringAttributeImpl : public AttributeImpl { + virtual void anchor(); + + std::string Kind; + std::string Val; + +public: + StringAttributeImpl(StringRef Kind, StringRef Val = StringRef()) + : AttributeImpl(StringAttrEntry), Kind(Kind), Val(Val) {} + + StringRef getStringKind() const { return Kind; } + StringRef getStringValue() const { return Val; } +}; + +class TypeAttributeImpl : public EnumAttributeImpl { + virtual void anchor(); + + Type *Ty; + +public: + TypeAttributeImpl(Attribute::AttrKind Kind, Type *Ty) + : EnumAttributeImpl(TypeAttrEntry, Kind), Ty(Ty) {} + + Type *getTypeValue() const { return Ty; } +}; + +//===----------------------------------------------------------------------===// +/// \class +/// This class represents a group of attributes that apply to one +/// element: function, return type, or parameter. +class AttributeSetNode final + : public FoldingSetNode, + private TrailingObjects<AttributeSetNode, Attribute> { + friend TrailingObjects; + + unsigned NumAttrs; ///< Number of attributes in this node. + /// Bitset with a bit for each available attribute Attribute::AttrKind. + uint8_t AvailableAttrs[12] = {}; + + AttributeSetNode(ArrayRef<Attribute> Attrs); + +public: + // AttributesSetNode is uniqued, these should not be available. + AttributeSetNode(const AttributeSetNode &) = delete; + AttributeSetNode &operator=(const AttributeSetNode &) = delete; + + void operator delete(void *p) { ::operator delete(p); } + + static AttributeSetNode *get(LLVMContext &C, const AttrBuilder &B); + + static AttributeSetNode *get(LLVMContext &C, ArrayRef<Attribute> Attrs); + + /// Return the number of attributes this AttributeList contains. + unsigned getNumAttributes() const { return NumAttrs; } + + bool hasAttribute(Attribute::AttrKind Kind) const { + return AvailableAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8); + } + bool hasAttribute(StringRef Kind) const; + bool hasAttributes() const { return NumAttrs != 0; } + + Attribute getAttribute(Attribute::AttrKind Kind) const; + Attribute getAttribute(StringRef Kind) const; + + unsigned getAlignment() const; + unsigned getStackAlignment() const; + uint64_t getDereferenceableBytes() const; + uint64_t getDereferenceableOrNullBytes() const; + std::pair<unsigned, Optional<unsigned>> getAllocSizeArgs() const; + std::string getAsString(bool InAttrGrp) const; + Type *getByValType() const; + + using iterator = const Attribute *; + + iterator begin() const { return getTrailingObjects<Attribute>(); } + iterator end() const { return begin() + NumAttrs; } + + void Profile(FoldingSetNodeID &ID) const { + Profile(ID, makeArrayRef(begin(), end())); + } + + static void Profile(FoldingSetNodeID &ID, ArrayRef<Attribute> AttrList) { + for (const auto &Attr : AttrList) + Attr.Profile(ID); + } +}; + +using IndexAttrPair = std::pair<unsigned, AttributeSet>; + +//===----------------------------------------------------------------------===// +/// \class +/// This class represents a set of attributes that apply to the function, +/// return type, and parameters. +class AttributeListImpl final + : public FoldingSetNode, + private TrailingObjects<AttributeListImpl, AttributeSet> { + friend class AttributeList; + friend TrailingObjects; + +private: + LLVMContext &Context; + unsigned NumAttrSets; ///< Number of entries in this set. + /// Bitset with a bit for each available attribute Attribute::AttrKind. + uint8_t AvailableFunctionAttrs[12] = {}; + + // Helper fn for TrailingObjects class. + size_t numTrailingObjects(OverloadToken<AttributeSet>) { return NumAttrSets; } + +public: + AttributeListImpl(LLVMContext &C, ArrayRef<AttributeSet> Sets); + + // AttributesSetImpt is uniqued, these should not be available. + AttributeListImpl(const AttributeListImpl &) = delete; + AttributeListImpl &operator=(const AttributeListImpl &) = delete; + + void operator delete(void *p) { ::operator delete(p); } + + /// Get the context that created this AttributeListImpl. + LLVMContext &getContext() { return Context; } + + /// Return true if the AttributeSet or the FunctionIndex has an + /// enum attribute of the given kind. + bool hasFnAttribute(Attribute::AttrKind Kind) const { + return AvailableFunctionAttrs[Kind / 8] & ((uint64_t)1) << (Kind % 8); + } + + using iterator = const AttributeSet *; + + iterator begin() const { return getTrailingObjects<AttributeSet>(); } + iterator end() const { return begin() + NumAttrSets; } + + void Profile(FoldingSetNodeID &ID) const; + static void Profile(FoldingSetNodeID &ID, ArrayRef<AttributeSet> Nodes); + + void dump() const; +}; + +} // end namespace llvm + +#endif // LLVM_LIB_IR_ATTRIBUTEIMPL_H diff --git a/hpvm/llvm_patches/lib/IR/Attributes.cpp b/hpvm/llvm_patches/lib/IR/Attributes.cpp index 29c47a9e1107524278dcc57c188b320821ba7d86..5c18b8d294920118b8e3ab74aff6df79c90311c2 100644 --- a/hpvm/llvm_patches/lib/IR/Attributes.cpp +++ b/hpvm/llvm_patches/lib/IR/Attributes.cpp @@ -251,6 +251,18 @@ bool Attribute::hasAttribute(StringRef Kind) const { return pImpl && pImpl->hasAttribute(Kind); } +unsigned Attribute::getBuffSize() const { + assert(hasAttribute(Attribute::BufferIn) || hasAttribute(Attribute::BufferOut) || hasAttribute(Attribute::Priv ) && + "Trying to get BuffSize from non-buffering attribute!"); + return pImpl->getValueAsInt(); +} + +unsigned Attribute::getChannelDepth() const { + assert(hasAttribute(Attribute::Channel) && + "Trying to get BuffSize from non-buffering attribute!"); + return pImpl->getValueAsInt(); +} + unsigned Attribute::getAlignment() const { assert(hasAttribute(Attribute::Alignment) && "Trying to get alignment from non-alignment attribute!"); @@ -411,6 +423,30 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "out"; if (hasAttribute(Attribute::InOut)) return "inout"; + if (hasAttribute(Attribute::Priv)) { + std::string Result; + Result += "priv "; + Result += utostr(getValueAsInt()); + return Result; + } + if (hasAttribute(Attribute::BufferIn)) { + std::string Result; + Result += "bufferin "; + Result += utostr(getValueAsInt()); + return Result; + } + if (hasAttribute(Attribute::BufferOut)) { + std::string Result; + Result += "bufferout "; + Result += utostr(getValueAsInt()); + return Result; + } + if (hasAttribute(Attribute::Channel)) { + std::string Result; + Result += "channel "; + Result += utostr(getValueAsInt()); + return Result; + } if (hasAttribute(Attribute::ByVal)) { std::string Result; @@ -837,6 +873,14 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, const AttrBuilder &B) { Attr = Attribute::getWithAllocSizeArgs(C, A.first, A.second); break; } + case Attribute::BufferIn: + case Attribute::BufferOut: + case Attribute::Priv: + Attr = Attribute::get(C, Kind, B.getBuffSize()); + break; + case Attribute::Channel: + Attr = Attribute::get(C, Kind, B.getChannelDepth()); + break; default: Attr = Attribute::get(C, Kind); } @@ -1483,12 +1527,16 @@ void AttrBuilder::clear() { Alignment = StackAlignment = DerefBytes = DerefOrNullBytes = 0; AllocSizeArgs = 0; ByValType = nullptr; + BuffSize = 0; + ChannelDepth = 0; } AttrBuilder &AttrBuilder::addAttribute(Attribute::AttrKind Val) { assert((unsigned)Val < Attribute::EndAttrKinds && "Attribute out of range!"); assert(Val != Attribute::Alignment && Val != Attribute::StackAlignment && Val != Attribute::Dereferenceable && Val != Attribute::AllocSize && + Val != Attribute::BufferIn && Val != Attribute::BufferOut && + Val != Attribute::Priv && Val != Attribute::Channel && "Adding integer attribute without adding a value!"); Attrs[Val] = true; return *this; @@ -1515,6 +1563,10 @@ AttrBuilder &AttrBuilder::addAttribute(Attribute Attr) { DerefOrNullBytes = Attr.getDereferenceableOrNullBytes(); else if (Kind == Attribute::AllocSize) AllocSizeArgs = Attr.getValueAsInt(); + else if (Kind == Attribute::BufferIn || Kind == Attribute::BufferOut || Kind == Attribute::Priv) + BuffSize = Attr.getValueAsInt(); + else if (Kind == Attribute::Channel) + ChannelDepth = Attr.getValueAsInt(); return *this; } @@ -1539,6 +1591,10 @@ AttrBuilder &AttrBuilder::removeAttribute(Attribute::AttrKind Val) { DerefOrNullBytes = 0; else if (Val == Attribute::AllocSize) AllocSizeArgs = 0; + else if (Val == Attribute::BufferIn || Val == Attribute::BufferOut || Val == Attribute::Priv) + BuffSize = 0; + else if (Val == Attribute::Channel) + ChannelDepth = 0; return *this; } @@ -1559,6 +1615,20 @@ std::pair<unsigned, Optional<unsigned>> AttrBuilder::getAllocSizeArgs() const { return unpackAllocSizeArgs(AllocSizeArgs); } +AttrBuilder &AttrBuilder::addBufferOrPrivAttr(Attribute::AttrKind kind, unsigned size) { + assert (BuffSize == 0 && "Another mutually-exclusive buffering attribute has already been set!"); + Attrs[kind] = true; + BuffSize = size; + return *this; +} + +AttrBuilder &AttrBuilder::addChannelAttr(Attribute::AttrKind kind, unsigned depth) { + assert (ChannelDepth == 0 && "ChannelDepth is not 0, something is wrong!"); + Attrs[kind] = true; + ChannelDepth = depth; + return *this; +} + AttrBuilder &AttrBuilder::addAlignmentAttr(unsigned Align) { if (Align == 0) return *this; @@ -1644,6 +1714,12 @@ AttrBuilder &AttrBuilder::merge(const AttrBuilder &B) { if (!ByValType) ByValType = B.ByValType; + if (!BuffSize) + BuffSize = B.BuffSize; + + if (!ChannelDepth) + ChannelDepth = B.ChannelDepth; + Attrs |= B.Attrs; for (auto I : B.td_attrs()) @@ -1672,6 +1748,12 @@ AttrBuilder &AttrBuilder::remove(const AttrBuilder &B) { if (B.ByValType) ByValType = nullptr; + if (B.BuffSize) + BuffSize = 0; + + if (B.ChannelDepth) + ChannelDepth = 0; + Attrs &= ~B.Attrs; for (auto I : B.td_attrs()) diff --git a/hpvm/llvm_patches/lib/Passes/PassBuilder.cpp b/hpvm/llvm_patches/lib/Passes/PassBuilder.cpp new file mode 100644 index 0000000000000000000000000000000000000000..aac06593efd9fb0cc1c4f06ba3af6caa688da64e --- /dev/null +++ b/hpvm/llvm_patches/lib/Passes/PassBuilder.cpp @@ -0,0 +1,2301 @@ +//===- Parsing, selection, and construction of pass pipelines -------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// \file +/// +/// This file provides the implementation of the PassBuilder based on our +/// static pass registry as well as related functionality. It also provides +/// helpers to aid in analyzing, debugging, and testing passes and pass +/// pipelines. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Passes/PassBuilder.h" +#include "llvm/ADT/StringSwitch.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AliasAnalysisEvaluator.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFGPrinter.h" +#include "llvm/Analysis/CFLAndersAliasAnalysis.h" +#include "llvm/Analysis/CFLSteensAliasAnalysis.h" +#include "llvm/Analysis/CGSCCPassManager.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/DDG.h" +#include "llvm/Analysis/DemandedBits.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/DominanceFrontier.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/IVUsers.h" +#include "llvm/Analysis/LazyCallGraph.h" +#include "llvm/Analysis/LazyValueInfo.h" +#include "llvm/Analysis/LoopAccessAnalysis.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/ModuleSummaryAnalysis.h" +#include "llvm/Analysis/OptimizationRemarkEmitter.h" +#include "llvm/Analysis/PhiValues.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/Analysis/RegionInfo.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/Analysis/ScopedNoAliasAA.h" +#include "llvm/Analysis/StackSafetyAnalysis.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Analysis/TypeBasedAliasAnalysis.h" +#include "llvm/CodeGen/PreISelIntrinsicLowering.h" +#include "llvm/CodeGen/UnreachableBlockElim.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/IRPrintingPasses.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/SafepointIRVerifier.h" +#include "llvm/IR/Verifier.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/FormatVariadic.h" +#include "llvm/Support/Regex.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Transforms/AggressiveInstCombine/AggressiveInstCombine.h" +#include "llvm/Transforms/IPO/AlwaysInliner.h" +#include "llvm/Transforms/IPO/ArgumentPromotion.h" +#include "llvm/Transforms/IPO/Attributor.h" +#include "llvm/Transforms/IPO/CalledValuePropagation.h" +#include "llvm/Transforms/IPO/ConstantMerge.h" +#include "llvm/Transforms/IPO/CrossDSOCFI.h" +#include "llvm/Transforms/IPO/DeadArgumentElimination.h" +#include "llvm/Transforms/IPO/ElimAvailExtern.h" +#include "llvm/Transforms/IPO/ForceFunctionAttrs.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" +#include "llvm/Transforms/IPO/FunctionImport.h" +#include "llvm/Transforms/IPO/GlobalDCE.h" +#include "llvm/Transforms/IPO/GlobalOpt.h" +#include "llvm/Transforms/IPO/GlobalSplit.h" +#include "llvm/Transforms/IPO/HotColdSplitting.h" +#include "llvm/Transforms/IPO/InferFunctionAttrs.h" +#include "llvm/Transforms/IPO/Inliner.h" +#include "llvm/Transforms/IPO/Internalize.h" +#include "llvm/Transforms/IPO/LowerTypeTests.h" +#include "llvm/Transforms/IPO/PartialInlining.h" +#include "llvm/Transforms/IPO/SCCP.h" +#include "llvm/Transforms/IPO/SampleProfile.h" +#include "llvm/Transforms/IPO/StripDeadPrototypes.h" +#include "llvm/Transforms/IPO/SyntheticCountsPropagation.h" +#include "llvm/Transforms/IPO/WholeProgramDevirt.h" +#include "llvm/Transforms/InstCombine/InstCombine.h" +#include "llvm/Transforms/Instrumentation.h" +#include "llvm/Transforms/Instrumentation/AddressSanitizer.h" +#include "llvm/Transforms/Instrumentation/BoundsChecking.h" +#include "llvm/Transforms/Instrumentation/CGProfile.h" +#include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" +#include "llvm/Transforms/Instrumentation/GCOVProfiler.h" +#include "llvm/Transforms/Instrumentation/HWAddressSanitizer.h" +#include "llvm/Transforms/Instrumentation/InstrOrderFile.h" +#include "llvm/Transforms/Instrumentation/InstrProfiling.h" +#include "llvm/Transforms/Instrumentation/MemorySanitizer.h" +#include "llvm/Transforms/Instrumentation/PGOInstrumentation.h" +#include "llvm/Transforms/Instrumentation/PoisonChecking.h" +#include "llvm/Transforms/Instrumentation/ThreadSanitizer.h" +#include "llvm/Transforms/Scalar/ADCE.h" +#include "llvm/Transforms/Scalar/AlignmentFromAssumptions.h" +#include "llvm/Transforms/Scalar/BDCE.h" +#include "llvm/Transforms/Scalar/CallSiteSplitting.h" +#include "llvm/Transforms/Scalar/ConstantHoisting.h" +#include "llvm/Transforms/Scalar/CorrelatedValuePropagation.h" +#include "llvm/Transforms/Scalar/DCE.h" +#include "llvm/Transforms/Scalar/DeadStoreElimination.h" +#include "llvm/Transforms/Scalar/DivRemPairs.h" +#include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/Transforms/Scalar/Float2Int.h" +#include "llvm/Transforms/Scalar/GVN.h" +#include "llvm/Transforms/Scalar/GuardWidening.h" +#include "llvm/Transforms/Scalar/IVUsersPrinter.h" +#include "llvm/Transforms/Scalar/IndVarSimplify.h" +#include "llvm/Transforms/Scalar/InductiveRangeCheckElimination.h" +#include "llvm/Transforms/Scalar/InstSimplifyPass.h" +#include "llvm/Transforms/Scalar/JumpThreading.h" +#include "llvm/Transforms/Scalar/LICM.h" +#include "llvm/Transforms/Scalar/LoopAccessAnalysisPrinter.h" +#include "llvm/Transforms/Scalar/LoopDataPrefetch.h" +#include "llvm/Transforms/Scalar/LoopDeletion.h" +#include "llvm/Transforms/Scalar/LoopDistribute.h" +#include "llvm/Transforms/Scalar/LoopFuse.h" +#include "llvm/Transforms/Scalar/LoopIdiomRecognize.h" +#include "llvm/Transforms/Scalar/LoopInstSimplify.h" +#include "llvm/Transforms/Scalar/LoopLoadElimination.h" +#include "llvm/Transforms/Scalar/LoopPassManager.h" +#include "llvm/Transforms/Scalar/LoopPredication.h" +#include "llvm/Transforms/Scalar/LoopRotation.h" +#include "llvm/Transforms/Scalar/LoopSimplifyCFG.h" +#include "llvm/Transforms/Scalar/LoopSink.h" +#include "llvm/Transforms/Scalar/LoopStrengthReduce.h" +#include "llvm/Transforms/Scalar/LoopUnrollAndJamPass.h" +#include "llvm/Transforms/Scalar/LoopUnrollPass.h" +#include "llvm/Transforms/Scalar/LowerAtomic.h" +#include "llvm/Transforms/Scalar/LowerExpectIntrinsic.h" +#include "llvm/Transforms/Scalar/LowerGuardIntrinsic.h" +#include "llvm/Transforms/Scalar/LowerWidenableCondition.h" +#include "llvm/Transforms/Scalar/MakeGuardsExplicit.h" +#include "llvm/Transforms/Scalar/MemCpyOptimizer.h" +#include "llvm/Transforms/Scalar/MergedLoadStoreMotion.h" +#include "llvm/Transforms/Scalar/MergeICmps.h" +#include "llvm/Transforms/Scalar/NaryReassociate.h" +#include "llvm/Transforms/Scalar/NewGVN.h" +#include "llvm/Transforms/Scalar/PartiallyInlineLibCalls.h" +#include "llvm/Transforms/Scalar/Reassociate.h" +#include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" +#include "llvm/Transforms/Scalar/SCCP.h" +#include "llvm/Transforms/Scalar/SROA.h" +#include "llvm/Transforms/Scalar/Scalarizer.h" +#include "llvm/Transforms/Scalar/SimpleLoopUnswitch.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" +#include "llvm/Transforms/Scalar/Sink.h" +#include "llvm/Transforms/Scalar/SpeculateAroundPHIs.h" +#include "llvm/Transforms/Scalar/SpeculativeExecution.h" +#include "llvm/Transforms/Scalar/TailRecursionElimination.h" +#include "llvm/Transforms/Scalar/WarnMissedTransforms.h" +#include "llvm/Transforms/Utils/AddDiscriminators.h" +#include "llvm/Transforms/Utils/BreakCriticalEdges.h" +#include "llvm/Transforms/Utils/CanonicalizeAliases.h" +#include "llvm/Transforms/Utils/EntryExitInstrumenter.h" +#include "llvm/Transforms/Utils/LCSSA.h" +#include "llvm/Transforms/Utils/LibCallsShrinkWrap.h" +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/Transforms/Utils/LowerInvoke.h" +#include "llvm/Transforms/Utils/Mem2Reg.h" +#include "llvm/Transforms/Utils/NameAnonGlobals.h" +#include "llvm/Transforms/Utils/SymbolRewriter.h" +#include "llvm/Transforms/Vectorize/LoadStoreVectorizer.h" +#include "llvm/Transforms/Vectorize/LoopVectorize.h" +#include "llvm/Transforms/Vectorize/SLPVectorizer.h" + +using namespace llvm; + +static cl::opt<unsigned> MaxDevirtIterations("pm-max-devirt-iterations", + cl::ReallyHidden, cl::init(4)); +static cl::opt<bool> + RunPartialInlining("enable-npm-partial-inlining", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Run Partial inlinining pass")); + +static cl::opt<bool> + RunNewGVN("enable-npm-newgvn", cl::init(false), + cl::Hidden, cl::ZeroOrMore, + cl::desc("Run NewGVN instead of GVN")); + +static cl::opt<bool> EnableGVNHoist( + "enable-npm-gvn-hoist", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); + +static cl::opt<bool> EnableGVNSink( + "enable-npm-gvn-sink", cl::init(false), cl::Hidden, + cl::desc("Enable the GVN hoisting pass for the new PM (default = off)")); + +static cl::opt<bool> EnableUnrollAndJam( + "enable-npm-unroll-and-jam", cl::init(false), cl::Hidden, + cl::desc("Enable the Unroll and Jam pass for the new PM (default = off)")); + +static cl::opt<bool> EnableSyntheticCounts( + "enable-npm-synthetic-counts", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Run synthetic function entry count generation " + "pass")); + +static Regex DefaultAliasRegex( + "^(default|thinlto-pre-link|thinlto|lto-pre-link|lto)<(O[0123sz])>$"); + +// This option is used in simplifying testing SampleFDO optimizations for +// profile loading. +static cl::opt<bool> + EnableCHR("enable-chr-npm", cl::init(true), cl::Hidden, + cl::desc("Enable control height reduction optimization (CHR)")); + +PipelineTuningOptions::PipelineTuningOptions() { + LoopInterleaving = EnableLoopInterleaving; + LoopVectorization = EnableLoopVectorization; + SLPVectorization = RunSLPVectorization; + LoopUnrolling = true; + ForgetAllSCEVInLoopUnroll = ForgetSCEVInLoopUnroll; + LicmMssaOptCap = SetLicmMssaOptCap; + LicmMssaNoAccForPromotionCap = SetLicmMssaNoAccForPromotionCap; +} + +extern cl::opt<bool> EnableHotColdSplit; +extern cl::opt<bool> EnableOrderFileInstrumentation; + +extern cl::opt<bool> FlattenedProfileUsed; + +static bool isOptimizingForSize(PassBuilder::OptimizationLevel Level) { + switch (Level) { + case PassBuilder::O0: + case PassBuilder::O1: + case PassBuilder::O2: + case PassBuilder::O3: + return false; + + case PassBuilder::Os: + case PassBuilder::Oz: + return true; + } + llvm_unreachable("Invalid optimization level!"); +} + +namespace { + +/// No-op module pass which does nothing. +struct NoOpModulePass { + PreservedAnalyses run(Module &M, ModuleAnalysisManager &) { + return PreservedAnalyses::all(); + } + static StringRef name() { return "NoOpModulePass"; } +}; + +/// No-op module analysis. +class NoOpModuleAnalysis : public AnalysisInfoMixin<NoOpModuleAnalysis> { + friend AnalysisInfoMixin<NoOpModuleAnalysis>; + static AnalysisKey Key; + +public: + struct Result {}; + Result run(Module &, ModuleAnalysisManager &) { return Result(); } + static StringRef name() { return "NoOpModuleAnalysis"; } +}; + +/// No-op CGSCC pass which does nothing. +struct NoOpCGSCCPass { + PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &, + LazyCallGraph &, CGSCCUpdateResult &UR) { + return PreservedAnalyses::all(); + } + static StringRef name() { return "NoOpCGSCCPass"; } +}; + +/// No-op CGSCC analysis. +class NoOpCGSCCAnalysis : public AnalysisInfoMixin<NoOpCGSCCAnalysis> { + friend AnalysisInfoMixin<NoOpCGSCCAnalysis>; + static AnalysisKey Key; + +public: + struct Result {}; + Result run(LazyCallGraph::SCC &, CGSCCAnalysisManager &, LazyCallGraph &G) { + return Result(); + } + static StringRef name() { return "NoOpCGSCCAnalysis"; } +}; + +/// No-op function pass which does nothing. +struct NoOpFunctionPass { + PreservedAnalyses run(Function &F, FunctionAnalysisManager &) { + return PreservedAnalyses::all(); + } + static StringRef name() { return "NoOpFunctionPass"; } +}; + +/// No-op function analysis. +class NoOpFunctionAnalysis : public AnalysisInfoMixin<NoOpFunctionAnalysis> { + friend AnalysisInfoMixin<NoOpFunctionAnalysis>; + static AnalysisKey Key; + +public: + struct Result {}; + Result run(Function &, FunctionAnalysisManager &) { return Result(); } + static StringRef name() { return "NoOpFunctionAnalysis"; } +}; + +/// No-op loop pass which does nothing. +struct NoOpLoopPass { + PreservedAnalyses run(Loop &L, LoopAnalysisManager &, + LoopStandardAnalysisResults &, LPMUpdater &) { + return PreservedAnalyses::all(); + } + static StringRef name() { return "NoOpLoopPass"; } +}; + +/// No-op loop analysis. +class NoOpLoopAnalysis : public AnalysisInfoMixin<NoOpLoopAnalysis> { + friend AnalysisInfoMixin<NoOpLoopAnalysis>; + static AnalysisKey Key; + +public: + struct Result {}; + Result run(Loop &, LoopAnalysisManager &, LoopStandardAnalysisResults &) { + return Result(); + } + static StringRef name() { return "NoOpLoopAnalysis"; } +}; + +AnalysisKey NoOpModuleAnalysis::Key; +AnalysisKey NoOpCGSCCAnalysis::Key; +AnalysisKey NoOpFunctionAnalysis::Key; +AnalysisKey NoOpLoopAnalysis::Key; + +} // End anonymous namespace. + +void PassBuilder::invokePeepholeEPCallbacks( + FunctionPassManager &FPM, PassBuilder::OptimizationLevel Level) { + for (auto &C : PeepholeEPCallbacks) + C(FPM, Level); +} + +void PassBuilder::registerModuleAnalyses(ModuleAnalysisManager &MAM) { +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ + MAM.registerPass([&] { return CREATE_PASS; }); +#include "PassRegistry.def" + + for (auto &C : ModuleAnalysisRegistrationCallbacks) + C(MAM); +} + +void PassBuilder::registerCGSCCAnalyses(CGSCCAnalysisManager &CGAM) { +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ + CGAM.registerPass([&] { return CREATE_PASS; }); +#include "PassRegistry.def" + + for (auto &C : CGSCCAnalysisRegistrationCallbacks) + C(CGAM); +} + +void PassBuilder::registerFunctionAnalyses(FunctionAnalysisManager &FAM) { +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ + FAM.registerPass([&] { return CREATE_PASS; }); +#include "PassRegistry.def" + + for (auto &C : FunctionAnalysisRegistrationCallbacks) + C(FAM); +} + +void PassBuilder::registerLoopAnalyses(LoopAnalysisManager &LAM) { +#define LOOP_ANALYSIS(NAME, CREATE_PASS) \ + LAM.registerPass([&] { return CREATE_PASS; }); +#include "PassRegistry.def" + + for (auto &C : LoopAnalysisRegistrationCallbacks) + C(LAM); +} + +FunctionPassManager +PassBuilder::buildFunctionSimplificationPipeline(OptimizationLevel Level, + ThinLTOPhase Phase, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations!"); + FunctionPassManager FPM(DebugLogging); + + // Form SSA out of local memory accesses after breaking apart aggregates into + // scalars. + FPM.addPass(SROA()); + + // Catch trivial redundancies + FPM.addPass(EarlyCSEPass(true /* Enable mem-ssa. */)); + + // Hoisting of scalars and load expressions. + if (EnableGVNHoist) + FPM.addPass(GVNHoistPass()); + + // Global value numbering based sinking. + if (EnableGVNSink) { + FPM.addPass(GVNSinkPass()); + FPM.addPass(SimplifyCFGPass()); + } + + // Speculative execution if the target has divergent branches; otherwise nop. + FPM.addPass(SpeculativeExecutionPass()); + + // Optimize based on known information about branches, and cleanup afterward. + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(SimplifyCFGPass()); + if (Level == O3) + FPM.addPass(AggressiveInstCombinePass()); + FPM.addPass(InstCombinePass()); + + if (!isOptimizingForSize(Level)) + FPM.addPass(LibCallsShrinkWrapPass()); + + invokePeepholeEPCallbacks(FPM, Level); + + // For PGO use pipeline, try to optimize memory intrinsics such as memcpy + // using the size value profile. Don't perform this when optimizing for size. + if (PGOOpt && PGOOpt->Action == PGOOptions::IRUse && + !isOptimizingForSize(Level)) + FPM.addPass(PGOMemOPSizeOpt()); + + FPM.addPass(TailCallElimPass()); + FPM.addPass(SimplifyCFGPass()); + + // Form canonically associated expression trees, and simplify the trees using + // basic mathematical properties. For example, this will form (nearly) + // minimal multiplication trees. + FPM.addPass(ReassociatePass()); + + // Add the primary loop simplification pipeline. + // FIXME: Currently this is split into two loop pass pipelines because we run + // some function passes in between them. These can and should be removed + // and/or replaced by scheduling the loop pass equivalents in the correct + // positions. But those equivalent passes aren't powerful enough yet. + // Specifically, `SimplifyCFGPass` and `InstCombinePass` are currently still + // used. We have `LoopSimplifyCFGPass` which isn't yet powerful enough yet to + // fully replace `SimplifyCFGPass`, and the closest to the other we have is + // `LoopInstSimplify`. + LoopPassManager LPM1(DebugLogging), LPM2(DebugLogging); + + // Simplify the loop body. We do this initially to clean up after other loop + // passes run, either when iterating on a loop or on inner loops with + // implications on the outer loop. + LPM1.addPass(LoopInstSimplifyPass()); + LPM1.addPass(LoopSimplifyCFGPass()); + + // Rotate Loop - disable header duplication at -Oz + LPM1.addPass(LoopRotatePass(Level != Oz)); + LPM1.addPass(LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap)); + LPM1.addPass(SimpleLoopUnswitchPass()); + LPM2.addPass(IndVarSimplifyPass()); + LPM2.addPass(LoopIdiomRecognizePass()); + + for (auto &C : LateLoopOptimizationsEPCallbacks) + C(LPM2, Level); + + LPM2.addPass(LoopDeletionPass()); + // Do not enable unrolling in PreLinkThinLTO phase during sample PGO + // because it changes IR to makes profile annotation in back compile + // inaccurate. + if ((Phase != ThinLTOPhase::PreLink || !PGOOpt || + PGOOpt->Action != PGOOptions::SampleUse) && + PTO.LoopUnrolling) + LPM2.addPass( + LoopFullUnrollPass(Level, false, PTO.ForgetAllSCEVInLoopUnroll)); + + for (auto &C : LoopOptimizerEndEPCallbacks) + C(LPM2, Level); + + // We provide the opt remark emitter pass for LICM to use. We only need to do + // this once as it is immutable. + FPM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM1), DebugLogging)); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + FPM.addPass(createFunctionToLoopPassAdaptor(std::move(LPM2), DebugLogging)); + + // Eliminate redundancies. + if (Level != O1) { + // These passes add substantial compile time so skip them at O1. + FPM.addPass(MergedLoadStoreMotionPass()); + if (RunNewGVN) + FPM.addPass(NewGVNPass()); + else + FPM.addPass(GVN()); + } + + // Specially optimize memory movement as it doesn't look like dataflow in SSA. + FPM.addPass(MemCpyOptPass()); + + // Sparse conditional constant propagation. + // FIXME: It isn't clear why we do this *after* loop passes rather than + // before... + FPM.addPass(SCCPPass()); + + // Delete dead bit computations (instcombine runs after to fold away the dead + // computations, and then ADCE will run later to exploit any new DCE + // opportunities that creates). + FPM.addPass(BDCEPass()); + + // Run instcombine after redundancy and dead bit elimination to exploit + // opportunities opened up by them. + FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); + + // Re-consider control flow based optimizations after redundancy elimination, + // redo DCE, etc. + FPM.addPass(JumpThreadingPass()); + FPM.addPass(CorrelatedValuePropagationPass()); + FPM.addPass(DSEPass()); + FPM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + DebugLogging)); + + for (auto &C : ScalarOptimizerLateEPCallbacks) + C(FPM, Level); + + // Finally, do an expensive DCE pass to catch all the dead code exposed by + // the simplifications and basic cleanup after all the simplifications. + FPM.addPass(ADCEPass()); + FPM.addPass(SimplifyCFGPass()); + FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); + + if (EnableCHR && Level == O3 && PGOOpt && + (PGOOpt->Action == PGOOptions::IRUse || + PGOOpt->Action == PGOOptions::SampleUse)) + FPM.addPass(ControlHeightReductionPass()); + + return FPM; +} + +void PassBuilder::addPGOInstrPasses(ModulePassManager &MPM, bool DebugLogging, + PassBuilder::OptimizationLevel Level, + bool RunProfileGen, bool IsCS, + std::string ProfileFile, + std::string ProfileRemappingFile) { + // Generally running simplification passes and the inliner with an high + // threshold results in smaller executables, but there may be cases where + // the size grows, so let's be conservative here and skip this simplification + // at -Os/Oz. We will not do this inline for context sensistive PGO (when + // IsCS is true). + if (!isOptimizingForSize(Level) && !IsCS) { + InlineParams IP; + + // In the old pass manager, this is a cl::opt. Should still this be one? + IP.DefaultThreshold = 75; + + // FIXME: The hint threshold has the same value used by the regular inliner. + // This should probably be lowered after performance testing. + // FIXME: this comment is cargo culted from the old pass manager, revisit). + IP.HintThreshold = 325; + + CGSCCPassManager CGPipeline(DebugLogging); + + CGPipeline.addPass(InlinerPass(IP)); + + FunctionPassManager FPM; + FPM.addPass(SROA()); + FPM.addPass(EarlyCSEPass()); // Catch trivial redundancies. + FPM.addPass(SimplifyCFGPass()); // Merge & remove basic blocks. + FPM.addPass(InstCombinePass()); // Combine silly sequences. + invokePeepholeEPCallbacks(FPM, Level); + + CGPipeline.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPipeline))); + } + + // Delete anything that is now dead to make sure that we don't instrument + // dead code. Instrumentation can end up keeping dead code around and + // dramatically increase code size. + MPM.addPass(GlobalDCEPass()); + + if (RunProfileGen) { + MPM.addPass(PGOInstrumentationGen(IsCS)); + + FunctionPassManager FPM; + FPM.addPass( + createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + + // Add the profile lowering pass. + InstrProfOptions Options; + if (!ProfileFile.empty()) + Options.InstrProfileOutput = ProfileFile; + Options.DoCounterPromotion = true; + Options.UseBFIInPromotion = IsCS; + MPM.addPass(InstrProfiling(Options, IsCS)); + } else if (!ProfileFile.empty()) { + MPM.addPass(PGOInstrumentationUse(ProfileFile, ProfileRemappingFile, IsCS)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); + } +} + +static InlineParams +getInlineParamsFromOptLevel(PassBuilder::OptimizationLevel Level) { + auto O3 = PassBuilder::O3; + unsigned OptLevel = Level > O3 ? 2 : Level; + unsigned SizeLevel = Level > O3 ? Level - O3 : 0; + return getInlineParams(OptLevel, SizeLevel); +} + +ModulePassManager +PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, + ThinLTOPhase Phase, + bool DebugLogging) { + ModulePassManager MPM(DebugLogging); + + bool HasSampleProfile = PGOOpt && (PGOOpt->Action == PGOOptions::SampleUse); + + // In ThinLTO mode, when flattened profile is used, all the available + // profile information will be annotated in PreLink phase so there is + // no need to load the profile again in PostLink. + bool LoadSampleProfile = + HasSampleProfile && + !(FlattenedProfileUsed && Phase == ThinLTOPhase::PostLink); + + // During the ThinLTO backend phase we perform early indirect call promotion + // here, before globalopt. Otherwise imported available_externally functions + // look unreferenced and are removed. If we are going to load the sample + // profile then defer until later. + // TODO: See if we can move later and consolidate with the location where + // we perform ICP when we are loading a sample profile. + // TODO: We pass HasSampleProfile (whether there was a sample profile file + // passed to the compile) to the SamplePGO flag of ICP. This is used to + // determine whether the new direct calls are annotated with prof metadata. + // Ideally this should be determined from whether the IR is annotated with + // sample profile, and not whether the a sample profile was provided on the + // command line. E.g. for flattened profiles where we will not be reloading + // the sample profile in the ThinLTO backend, we ideally shouldn't have to + // provide the sample profile file. + if (Phase == ThinLTOPhase::PostLink && !LoadSampleProfile) + MPM.addPass(PGOIndirectCallPromotion(true /* InLTO */, HasSampleProfile)); + + // Do basic inference of function attributes from known properties of system + // libraries and other oracles. + MPM.addPass(InferFunctionAttrsPass()); + + // Create an early function pass manager to cleanup the output of the + // frontend. + FunctionPassManager EarlyFPM(DebugLogging); + EarlyFPM.addPass(SimplifyCFGPass()); + EarlyFPM.addPass(SROA()); + EarlyFPM.addPass(EarlyCSEPass()); + EarlyFPM.addPass(LowerExpectIntrinsicPass()); + if (Level == O3) + EarlyFPM.addPass(CallSiteSplittingPass()); + + // In SamplePGO ThinLTO backend, we need instcombine before profile annotation + // to convert bitcast to direct calls so that they can be inlined during the + // profile annotation prepration step. + // More details about SamplePGO design can be found in: + // https://research.google.com/pubs/pub45290.html + // FIXME: revisit how SampleProfileLoad/Inliner/ICP is structured. + if (LoadSampleProfile) + EarlyFPM.addPass(InstCombinePass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + + if (LoadSampleProfile) { + // Annotate sample profile right after early FPM to ensure freshness of + // the debug info. + MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile, + Phase == ThinLTOPhase::PreLink)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); + // Do not invoke ICP in the ThinLTOPrelink phase as it makes it hard + // for the profile annotation to be accurate in the ThinLTO backend. + if (Phase != ThinLTOPhase::PreLink) + // We perform early indirect call promotion here, before globalopt. + // This is important for the ThinLTO backend phase because otherwise + // imported available_externally functions look unreferenced and are + // removed. + MPM.addPass(PGOIndirectCallPromotion(Phase == ThinLTOPhase::PostLink, + true /* SamplePGO */)); + } + + // Interprocedural constant propagation now that basic cleanup has occurred + // and prior to optimizing globals. + // FIXME: This position in the pipeline hasn't been carefully considered in + // years, it should be re-analyzed. + MPM.addPass(IPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); + + // Optimize globals to try and fold them into constants. + MPM.addPass(GlobalOptPass()); + + // Promote any localized globals to SSA registers. + // FIXME: Should this instead by a run of SROA? + // FIXME: We should probably run instcombine and simplify-cfg afterward to + // delete control flows that are dead once globals have been folded to + // constants. + MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); + + // Remove any dead arguments exposed by cleanups and constand folding + // globals. + MPM.addPass(DeadArgumentEliminationPass()); + + // Create a small function pass pipeline to cleanup after all the global + // optimizations. + FunctionPassManager GlobalCleanupPM(DebugLogging); + GlobalCleanupPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(GlobalCleanupPM, Level); + + GlobalCleanupPM.addPass(SimplifyCFGPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(GlobalCleanupPM))); + + // Add all the requested passes for instrumentation PGO, if requested. + if (PGOOpt && Phase != ThinLTOPhase::PostLink && + (PGOOpt->Action == PGOOptions::IRInstr || + PGOOpt->Action == PGOOptions::IRUse)) { + addPGOInstrPasses(MPM, DebugLogging, Level, + /* RunProfileGen */ PGOOpt->Action == PGOOptions::IRInstr, + /* IsCS */ false, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + MPM.addPass(PGOIndirectCallPromotion(false, false)); + } + if (PGOOpt && Phase != ThinLTOPhase::PostLink && + PGOOpt->CSAction == PGOOptions::CSIRInstr) + MPM.addPass(PGOInstrumentationGenCreateVar(PGOOpt->CSProfileGenFile)); + + // Synthesize function entry counts for non-PGO compilation. + if (EnableSyntheticCounts && !PGOOpt) + MPM.addPass(SyntheticCountsPropagation()); + + // Require the GlobalsAA analysis for the module so we can query it within + // the CGSCC pipeline. + MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); + + // Require the ProfileSummaryAnalysis for the module so we can query it within + // the inliner pass. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); + + // Now begin the main postorder CGSCC pipeline. + // FIXME: The current CGSCC pipeline has its origins in the legacy pass + // manager and trying to emulate its precise behavior. Much of this doesn't + // make a lot of sense and we should revisit the core CGSCC structure. + CGSCCPassManager MainCGPipeline(DebugLogging); + + // Note: historically, the PruneEH pass was run first to deduce nounwind and + // generally clean up exception handling overhead. It isn't clear this is + // valuable as the inliner doesn't currently care whether it is inlining an + // invoke or a call. + + // Run the inliner first. The theory is that we are walking bottom-up and so + // the callees have already been fully optimized, and we want to inline them + // into the callers so that our optimizations can reflect that. + // For PreLinkThinLTO pass, we disable hot-caller heuristic for sample PGO + // because it makes profile annotation in the backend inaccurate. + InlineParams IP = getInlineParamsFromOptLevel(Level); + if (Phase == ThinLTOPhase::PreLink && PGOOpt && + PGOOpt->Action == PGOOptions::SampleUse) + IP.HotCallSiteThreshold = 0; + MainCGPipeline.addPass(InlinerPass(IP)); + + // Now deduce any function attributes based in the current code. + MainCGPipeline.addPass(PostOrderFunctionAttrsPass()); + + // When at O3 add argument promotion to the pass pipeline. + // FIXME: It isn't at all clear why this should be limited to O3. + if (Level == O3) + MainCGPipeline.addPass(ArgumentPromotionPass()); + + // Lastly, add the core function simplification pipeline nested inside the + // CGSCC walk. + MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor( + buildFunctionSimplificationPipeline(Level, Phase, DebugLogging))); + + for (auto &C : CGSCCOptimizerLateEPCallbacks) + C(MainCGPipeline, Level); + + // We wrap the CGSCC pipeline in a devirtualization repeater. This will try + // to detect when we devirtualize indirect calls and iterate the SCC passes + // in that case to try and catch knock-on inlining or function attrs + // opportunities. Then we add it to the module pipeline by walking the SCCs + // in postorder (or bottom-up). + MPM.addPass( + createModuleToPostOrderCGSCCPassAdaptor(createDevirtSCCRepeatedPass( + std::move(MainCGPipeline), MaxDevirtIterations))); + + return MPM; +} + +ModulePassManager PassBuilder::buildModuleOptimizationPipeline( + OptimizationLevel Level, bool DebugLogging, bool LTOPreLink) { + ModulePassManager MPM(DebugLogging); + + // Optimize globals now that the module is fully simplified. + MPM.addPass(GlobalOptPass()); + MPM.addPass(GlobalDCEPass()); + + // Run partial inlining pass to partially inline functions that have + // large bodies. + if (RunPartialInlining) + MPM.addPass(PartialInlinerPass()); + + // Remove avail extern fns and globals definitions since we aren't compiling + // an object file for later LTO. For LTO we want to preserve these so they + // are eligible for inlining at link-time. Note if they are unreferenced they + // will be removed by GlobalDCE later, so this only impacts referenced + // available externally globals. Eventually they will be suppressed during + // codegen, but eliminating here enables more opportunity for GlobalDCE as it + // may make globals referenced by available external functions dead and saves + // running remaining passes on the eliminated functions. These should be + // preserved during prelinking for link-time inlining decisions. + if (!LTOPreLink) + MPM.addPass(EliminateAvailableExternallyPass()); + + if (EnableOrderFileInstrumentation) + MPM.addPass(InstrOrderFilePass()); + + // Do RPO function attribute inference across the module to forward-propagate + // attributes where applicable. + // FIXME: Is this really an optimization rather than a canonicalization? + MPM.addPass(ReversePostOrderFunctionAttrsPass()); + + // Do a post inline PGO instrumentation and use pass. This is a context + // sensitive PGO pass. We don't want to do this in LTOPreLink phrase as + // cross-module inline has not been done yet. The context sensitive + // instrumentation is after all the inlines are done. + if (!LTOPreLink && PGOOpt) { + if (PGOOpt->CSAction == PGOOptions::CSIRInstr) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, + /* IsCS */ true, PGOOpt->CSProfileGenFile, + PGOOpt->ProfileRemappingFile); + else if (PGOOpt->CSAction == PGOOptions::CSIRUse) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, + /* IsCS */ true, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + } + + // Re-require GloblasAA here prior to function passes. This is particularly + // useful as the above will have inlined, DCE'ed, and function-attr + // propagated everything. We should at this point have a reasonably minimal + // and richly annotated call graph. By computing aliasing and mod/ref + // information for all local globals here, the late loop passes and notably + // the vectorizer will be able to use them to help recognize vectorizable + // memory operations. + MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>()); + + FunctionPassManager OptimizePM(DebugLogging); + OptimizePM.addPass(Float2IntPass()); + // FIXME: We need to run some loop optimizations to re-rotate loops after + // simplify-cfg and others undo their rotation. + + // Optimize the loop execution. These passes operate on entire loop nests + // rather than on each loop in an inside-out manner, and so they are actually + // function passes. + + for (auto &C : VectorizerStartEPCallbacks) + C(OptimizePM, Level); + + // First rotate loops that may have been un-rotated by prior passes. + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(LoopRotatePass(), DebugLogging)); + + // Distribute loops to allow partial vectorization. I.e. isolate dependences + // into separate loop that would otherwise inhibit vectorization. This is + // currently only performed for loops marked with the metadata + // llvm.loop.distribute=true or when -enable-loop-distribute is specified. + OptimizePM.addPass(LoopDistributePass()); + + // Now run the core loop vectorizer. + OptimizePM.addPass(LoopVectorizePass( + LoopVectorizeOptions(!PTO.LoopInterleaving, !PTO.LoopVectorization))); + + // Eliminate loads by forwarding stores from the previous iteration to loads + // of the current iteration. + OptimizePM.addPass(LoopLoadEliminationPass()); + + // Cleanup after the loop optimization passes. + OptimizePM.addPass(InstCombinePass()); + + // Now that we've formed fast to execute loop structures, we do further + // optimizations. These are run afterward as they might block doing complex + // analyses and transforms such as what are needed for loop vectorization. + + // Cleanup after loop vectorization, etc. Simplification passes like CVP and + // GVN, loop transforms, and others have already run, so it's now better to + // convert to more optimized IR using more aggressive simplify CFG options. + // The extra sinking transform can create larger basic blocks, so do this + // before SLP vectorization. + OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions(). + forwardSwitchCondToPhi(true). + convertSwitchToLookupTable(true). + needCanonicalLoops(false). + sinkCommonInsts(true))); + + // Optimize parallel scalar instruction chains into SIMD instructions. + if (PTO.SLPVectorization) + OptimizePM.addPass(SLPVectorizerPass()); + + OptimizePM.addPass(InstCombinePass()); + + // Unroll small loops to hide loop backedge latency and saturate any parallel + // execution resources of an out-of-order processor. We also then need to + // clean up redundancies and loop invariant code. + // FIXME: It would be really good to use a loop-integrated instruction + // combiner for cleanup here so that the unrolling and LICM can be pipelined + // across the loop nests. + // We do UnrollAndJam in a separate LPM to ensure it happens before unroll + if (EnableUnrollAndJam) { + OptimizePM.addPass( + createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level))); + } + if (PTO.LoopUnrolling) + OptimizePM.addPass(LoopUnrollPass( + LoopUnrollOptions(Level, false, PTO.ForgetAllSCEVInLoopUnroll))); + OptimizePM.addPass(WarnMissedTransformationsPass()); + OptimizePM.addPass(InstCombinePass()); + OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>()); + OptimizePM.addPass(createFunctionToLoopPassAdaptor( + LICMPass(PTO.LicmMssaOptCap, PTO.LicmMssaNoAccForPromotionCap), + DebugLogging)); + + // Now that we've vectorized and unrolled loops, we may have more refined + // alignment information, try to re-derive it here. + OptimizePM.addPass(AlignmentFromAssumptionsPass()); + + // Split out cold code. Splitting is done late to avoid hiding context from + // other optimizations and inadvertently regressing performance. The tradeoff + // is that this has a higher code size cost than splitting early. + if (EnableHotColdSplit && !LTOPreLink) + MPM.addPass(HotColdSplittingPass()); + + // LoopSink pass sinks instructions hoisted by LICM, which serves as a + // canonicalization pass that enables other optimizations. As a result, + // LoopSink pass needs to be a very late IR pass to avoid undoing LICM + // result too early. + OptimizePM.addPass(LoopSinkPass()); + + // And finally clean up LCSSA form before generating code. + OptimizePM.addPass(InstSimplifyPass()); + + // This hoists/decomposes div/rem ops. It should run after other sink/hoist + // passes to avoid re-sinking, but before SimplifyCFG because it can allow + // flattening of blocks. + OptimizePM.addPass(DivRemPairsPass()); + + // LoopSink (and other loop passes since the last simplifyCFG) might have + // resulted in single-entry-single-exit or empty blocks. Clean up the CFG. + OptimizePM.addPass(SimplifyCFGPass()); + + // Optimize PHIs by speculating around them when profitable. Note that this + // pass needs to be run after any PRE or similar pass as it is essentially + // inserting redundancies into the program. This even includes SimplifyCFG. + OptimizePM.addPass(SpeculateAroundPHIsPass()); + + for (auto &C : OptimizerLastEPCallbacks) + C(OptimizePM, Level); + + // Add the core optimizing pipeline. + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(OptimizePM))); + + MPM.addPass(CGProfilePass()); + + // Now we need to do some global optimization transforms. + // FIXME: It would seem like these should come first in the optimization + // pipeline and maybe be the bottom of the canonicalization pipeline? Weird + // ordering here. + MPM.addPass(GlobalDCEPass()); + MPM.addPass(ConstantMergePass()); + + return MPM; +} + +ModulePassManager +PassBuilder::buildPerModuleDefaultPipeline(OptimizationLevel Level, + bool DebugLogging, bool LTOPreLink) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // Apply module pipeline start EP callback. + for (auto &C : PipelineStartEPCallbacks) + C(MPM); + + if (PGOOpt && PGOOpt->SamplePGOSupport) + MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + + // Add the core simplification pipeline. + MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::None, + DebugLogging)); + + // Now add the optimization pipeline. + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging, LTOPreLink)); + + return MPM; +} + +ModulePassManager +PassBuilder::buildThinLTOPreLinkDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + + ModulePassManager MPM(DebugLogging); + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + if (PGOOpt && PGOOpt->SamplePGOSupport) + MPM.addPass(createModuleToFunctionPassAdaptor(AddDiscriminatorsPass())); + + // Apply module pipeline start EP callback. + for (auto &C : PipelineStartEPCallbacks) + C(MPM); + + // If we are planning to perform ThinLTO later, we don't bloat the code with + // unrolling/vectorization/... now. Just simplify the module as much as we + // can. + MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PreLink, + DebugLogging)); + + // Run partial inlining pass to partially inline functions that have + // large bodies. + // FIXME: It isn't clear whether this is really the right place to run this + // in ThinLTO. Because there is another canonicalization and simplification + // phase that will run after the thin link, running this here ends up with + // less information than will be available later and it may grow functions in + // ways that aren't beneficial. + if (RunPartialInlining) + MPM.addPass(PartialInlinerPass()); + + // Reduce the size of the IR as much as possible. + MPM.addPass(GlobalOptPass()); + + return MPM; +} + +ModulePassManager PassBuilder::buildThinLTODefaultPipeline( + OptimizationLevel Level, bool DebugLogging, + const ModuleSummaryIndex *ImportSummary) { + ModulePassManager MPM(DebugLogging); + + if (ImportSummary) { + // These passes import type identifier resolutions for whole-program + // devirtualization and CFI. They must run early because other passes may + // disturb the specific instruction patterns that these passes look for, + // creating dependencies on resolutions that may not appear in the summary. + // + // For example, GVN may transform the pattern assume(type.test) appearing in + // two basic blocks into assume(phi(type.test, type.test)), which would + // transform a dependency on a WPD resolution into a dependency on a type + // identifier resolution for CFI. + // + // Also, WPD has access to more precise information than ICP and can + // devirtualize more effectively, so it should operate on the IR first. + // + // The WPD and LowerTypeTest passes need to run at -O0 to lower type + // metadata and intrinsics. + MPM.addPass(WholeProgramDevirtPass(nullptr, ImportSummary)); + MPM.addPass(LowerTypeTestsPass(nullptr, ImportSummary)); + } + + if (Level == O0) + return MPM; + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // Add the core simplification pipeline. + MPM.addPass(buildModuleSimplificationPipeline(Level, ThinLTOPhase::PostLink, + DebugLogging)); + + // Now add the optimization pipeline. + MPM.addPass(buildModuleOptimizationPipeline(Level, DebugLogging)); + + return MPM; +} + +ModulePassManager +PassBuilder::buildLTOPreLinkDefaultPipeline(OptimizationLevel Level, + bool DebugLogging) { + assert(Level != O0 && "Must request optimizations for the default pipeline!"); + // FIXME: We should use a customized pre-link pipeline! + return buildPerModuleDefaultPipeline(Level, DebugLogging, + /* LTOPreLink */true); +} + +ModulePassManager +PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging, + ModuleSummaryIndex *ExportSummary) { + ModulePassManager MPM(DebugLogging); + + if (Level == O0) { + // The WPD and LowerTypeTest passes need to run at -O0 to lower type + // metadata and intrinsics. + MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); + MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + return MPM; + } + + if (PGOOpt && PGOOpt->Action == PGOOptions::SampleUse) { + // Load sample profile before running the LTO optimization pipeline. + MPM.addPass(SampleProfileLoaderPass(PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile, + false /* ThinLTOPhase::PreLink */)); + // Cache ProfileSummaryAnalysis once to avoid the potential need to insert + // RequireAnalysisPass for PSI before subsequent non-module passes. + MPM.addPass(RequireAnalysisPass<ProfileSummaryAnalysis, Module>()); + } + + // Remove unused virtual tables to improve the quality of code generated by + // whole-program devirtualization and bitset lowering. + MPM.addPass(GlobalDCEPass()); + + // Force any function attributes we want the rest of the pipeline to observe. + MPM.addPass(ForceFunctionAttrsPass()); + + // Do basic inference of function attributes from known properties of system + // libraries and other oracles. + MPM.addPass(InferFunctionAttrsPass()); + + if (Level > 1) { + FunctionPassManager EarlyFPM(DebugLogging); + EarlyFPM.addPass(CallSiteSplittingPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(EarlyFPM))); + + // Indirect call promotion. This should promote all the targets that are + // left by the earlier promotion pass that promotes intra-module targets. + // This two-step promotion is to save the compile time. For LTO, it should + // produce the same result as if we only do promotion here. + MPM.addPass(PGOIndirectCallPromotion( + true /* InLTO */, PGOOpt && PGOOpt->Action == PGOOptions::SampleUse)); + // Propagate constants at call sites into the functions they call. This + // opens opportunities for globalopt (and inlining) by substituting function + // pointers passed as arguments to direct uses of functions. + MPM.addPass(IPSCCPPass()); + + // Attach metadata to indirect call sites indicating the set of functions + // they may target at run-time. This should follow IPSCCP. + MPM.addPass(CalledValuePropagationPass()); + } + + // Now deduce any function attributes based in the current code. + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + PostOrderFunctionAttrsPass())); + + // Do RPO function attribute inference across the module to forward-propagate + // attributes where applicable. + // FIXME: Is this really an optimization rather than a canonicalization? + MPM.addPass(ReversePostOrderFunctionAttrsPass()); + + // Use in-range annotations on GEP indices to split globals where beneficial. + MPM.addPass(GlobalSplitPass()); + + // Run whole program optimization of virtual call when the list of callees + // is fixed. + MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr)); + + // Stop here at -O1. + if (Level == 1) { + // The LowerTypeTestsPass needs to run to lower type metadata and the + // type.test intrinsics. The pass does nothing if CFI is disabled. + MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + return MPM; + } + + // Optimize globals to try and fold them into constants. + MPM.addPass(GlobalOptPass()); + + // Promote any localized globals to SSA registers. + MPM.addPass(createModuleToFunctionPassAdaptor(PromotePass())); + + // Linking modules together can lead to duplicate global constant, only + // keep one copy of each constant. + MPM.addPass(ConstantMergePass()); + + // Remove unused arguments from functions. + MPM.addPass(DeadArgumentEliminationPass()); + + // Reduce the code after globalopt and ipsccp. Both can open up significant + // simplification opportunities, and both can propagate functions through + // function pointers. When this happens, we often have to resolve varargs + // calls, etc, so let instcombine do this. + FunctionPassManager PeepholeFPM(DebugLogging); + if (Level == O3) + PeepholeFPM.addPass(AggressiveInstCombinePass()); + PeepholeFPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(PeepholeFPM, Level); + + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(PeepholeFPM))); + + // Note: historically, the PruneEH pass was run first to deduce nounwind and + // generally clean up exception handling overhead. It isn't clear this is + // valuable as the inliner doesn't currently care whether it is inlining an + // invoke or a call. + // Run the inliner now. + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + InlinerPass(getInlineParamsFromOptLevel(Level)))); + + // Optimize globals again after we ran the inliner. + MPM.addPass(GlobalOptPass()); + + // Garbage collect dead functions. + // FIXME: Add ArgumentPromotion pass after once it's ported. + MPM.addPass(GlobalDCEPass()); + + FunctionPassManager FPM(DebugLogging); + // The IPO Passes may leave cruft around. Clean up after them. + FPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(FPM, Level); + + FPM.addPass(JumpThreadingPass()); + + // Do a post inline PGO instrumentation and use pass. This is a context + // sensitive PGO pass. + if (PGOOpt) { + if (PGOOpt->CSAction == PGOOptions::CSIRInstr) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ true, + /* IsCS */ true, PGOOpt->CSProfileGenFile, + PGOOpt->ProfileRemappingFile); + else if (PGOOpt->CSAction == PGOOptions::CSIRUse) + addPGOInstrPasses(MPM, DebugLogging, Level, /* RunProfileGen */ false, + /* IsCS */ true, PGOOpt->ProfileFile, + PGOOpt->ProfileRemappingFile); + } + + // Break up allocas + FPM.addPass(SROA()); + + // LTO provides additional opportunities for tailcall elimination due to + // link-time inlining, and visibility of nocapture attribute. + FPM.addPass(TailCallElimPass()); + + // Run a few AA driver optimizations here and now to cleanup the code. + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor( + PostOrderFunctionAttrsPass())); + // FIXME: here we run IP alias analysis in the legacy PM. + + FunctionPassManager MainFPM; + + // FIXME: once we fix LoopPass Manager, add LICM here. + // FIXME: once we provide support for enabling MLSM, add it here. + if (RunNewGVN) + MainFPM.addPass(NewGVNPass()); + else + MainFPM.addPass(GVN()); + + // Remove dead memcpy()'s. + MainFPM.addPass(MemCpyOptPass()); + + // Nuke dead stores. + MainFPM.addPass(DSEPass()); + + // FIXME: at this point, we run a bunch of loop passes: + // indVarSimplify, loopDeletion, loopInterchange, loopUnroll, + // loopVectorize. Enable them once the remaining issue with LPM + // are sorted out. + + MainFPM.addPass(InstCombinePass()); + MainFPM.addPass(SimplifyCFGPass()); + MainFPM.addPass(SCCPPass()); + MainFPM.addPass(InstCombinePass()); + MainFPM.addPass(BDCEPass()); + + // FIXME: We may want to run SLPVectorizer here. + // After vectorization, assume intrinsics may tell us more + // about pointer alignments. +#if 0 + MainFPM.add(AlignmentFromAssumptionsPass()); +#endif + + // FIXME: Conditionally run LoadCombine here, after it's ported + // (in case we still have this pass, given its questionable usefulness). + + MainFPM.addPass(InstCombinePass()); + invokePeepholeEPCallbacks(MainFPM, Level); + MainFPM.addPass(JumpThreadingPass()); + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(MainFPM))); + + // Create a function that performs CFI checks for cross-DSO calls with + // targets in the current module. + MPM.addPass(CrossDSOCFIPass()); + + // Lower type metadata and the type.test intrinsic. This pass supports + // clang's control flow integrity mechanisms (-fsanitize=cfi*) and needs + // to be run at link time if CFI is enabled. This pass does nothing if + // CFI is disabled. + MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr)); + + // Enable splitting late in the FullLTO post-link pipeline. This is done in + // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses). + if (EnableHotColdSplit) + MPM.addPass(HotColdSplittingPass()); + + // Add late LTO optimization passes. + // Delete basic blocks, which optimization passes may have killed. + MPM.addPass(createModuleToFunctionPassAdaptor(SimplifyCFGPass())); + + // Drop bodies of available eternally objects to improve GlobalDCE. + MPM.addPass(EliminateAvailableExternallyPass()); + + // Now that we have optimized the program, discard unreachable functions. + MPM.addPass(GlobalDCEPass()); + + // FIXME: Maybe enable MergeFuncs conditionally after it's ported. + return MPM; +} + +AAManager PassBuilder::buildDefaultAAPipeline() { + AAManager AA; + + // The order in which these are registered determines their priority when + // being queried. + + // First we register the basic alias analysis that provides the majority of + // per-function local AA logic. This is a stateless, on-demand local set of + // AA techniques. + AA.registerFunctionAnalysis<BasicAA>(); + + // Next we query fast, specialized alias analyses that wrap IR-embedded + // information about aliasing. + AA.registerFunctionAnalysis<ScopedNoAliasAA>(); + AA.registerFunctionAnalysis<TypeBasedAA>(); + + // Add support for querying global aliasing information when available. + // Because the `AAManager` is a function analysis and `GlobalsAA` is a module + // analysis, all that the `AAManager` can do is query for any *cached* + // results from `GlobalsAA` through a readonly proxy. + AA.registerModuleAnalysis<GlobalsAA>(); + + return AA; +} + +static Optional<int> parseRepeatPassName(StringRef Name) { + if (!Name.consume_front("repeat<") || !Name.consume_back(">")) + return None; + int Count; + if (Name.getAsInteger(0, Count) || Count <= 0) + return None; + return Count; +} + +static Optional<int> parseDevirtPassName(StringRef Name) { + if (!Name.consume_front("devirt<") || !Name.consume_back(">")) + return None; + int Count; + if (Name.getAsInteger(0, Count) || Count <= 0) + return None; + return Count; +} + +static bool checkParametrizedPassName(StringRef Name, StringRef PassName) { + if (!Name.consume_front(PassName)) + return false; + // normal pass name w/o parameters == default parameters + if (Name.empty()) + return true; + return Name.startswith("<") && Name.endswith(">"); +} + +namespace { + +/// This performs customized parsing of pass name with parameters. +/// +/// We do not need parametrization of passes in textual pipeline very often, +/// yet on a rare occasion ability to specify parameters right there can be +/// useful. +/// +/// \p Name - parameterized specification of a pass from a textual pipeline +/// is a string in a form of : +/// PassName '<' parameter-list '>' +/// +/// Parameter list is being parsed by the parser callable argument, \p Parser, +/// It takes a string-ref of parameters and returns either StringError or a +/// parameter list in a form of a custom parameters type, all wrapped into +/// Expected<> template class. +/// +template <typename ParametersParseCallableT> +auto parsePassParameters(ParametersParseCallableT &&Parser, StringRef Name, + StringRef PassName) -> decltype(Parser(StringRef{})) { + using ParametersT = typename decltype(Parser(StringRef{}))::value_type; + + StringRef Params = Name; + if (!Params.consume_front(PassName)) { + assert(false && + "unable to strip pass name from parametrized pass specification"); + } + if (Params.empty()) + return ParametersT{}; + if (!Params.consume_front("<") || !Params.consume_back(">")) { + assert(false && "invalid format for parametrized pass name"); + } + + Expected<ParametersT> Result = Parser(Params); + assert((Result || Result.template errorIsA<StringError>()) && + "Pass parameter parser can only return StringErrors."); + return std::move(Result); +} + +/// Parser of parameters for LoopUnroll pass. +Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) { + LoopUnrollOptions UnrollOpts; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + int OptLevel = StringSwitch<int>(ParamName) + .Case("O0", 0) + .Case("O1", 1) + .Case("O2", 2) + .Case("O3", 3) + .Default(-1); + if (OptLevel >= 0) { + UnrollOpts.setOptLevel(OptLevel); + continue; + } + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "partial") { + UnrollOpts.setPartial(Enable); + } else if (ParamName == "peeling") { + UnrollOpts.setPeeling(Enable); + } else if (ParamName == "runtime") { + UnrollOpts.setRuntime(Enable); + } else if (ParamName == "upperbound") { + UnrollOpts.setUpperBound(Enable); + } else { + return make_error<StringError>( + formatv("invalid LoopUnrollPass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return UnrollOpts; +} + +Expected<MemorySanitizerOptions> parseMSanPassOptions(StringRef Params) { + MemorySanitizerOptions Result; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + if (ParamName == "recover") { + Result.Recover = true; + } else if (ParamName == "kernel") { + Result.Kernel = true; + } else if (ParamName.consume_front("track-origins=")) { + if (ParamName.getAsInteger(0, Result.TrackOrigins)) + return make_error<StringError>( + formatv("invalid argument to MemorySanitizer pass track-origins " + "parameter: '{0}' ", + ParamName) + .str(), + inconvertibleErrorCode()); + } else { + return make_error<StringError>( + formatv("invalid MemorySanitizer pass parameter '{0}' ", ParamName) + .str(), + inconvertibleErrorCode()); + } + } + return Result; +} + +/// Parser of parameters for SimplifyCFG pass. +Expected<SimplifyCFGOptions> parseSimplifyCFGOptions(StringRef Params) { + SimplifyCFGOptions Result; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "forward-switch-cond") { + Result.forwardSwitchCondToPhi(Enable); + } else if (ParamName == "switch-to-lookup") { + Result.convertSwitchToLookupTable(Enable); + } else if (ParamName == "keep-loops") { + Result.needCanonicalLoops(Enable); + } else if (ParamName == "sink-common-insts") { + Result.sinkCommonInsts(Enable); + } else if (Enable && ParamName.consume_front("bonus-inst-threshold=")) { + APInt BonusInstThreshold; + if (ParamName.getAsInteger(0, BonusInstThreshold)) + return make_error<StringError>( + formatv("invalid argument to SimplifyCFG pass bonus-threshold " + "parameter: '{0}' ", + ParamName).str(), + inconvertibleErrorCode()); + Result.bonusInstThreshold(BonusInstThreshold.getSExtValue()); + } else { + return make_error<StringError>( + formatv("invalid SimplifyCFG pass parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return Result; +} + +/// Parser of parameters for LoopVectorize pass. +Expected<LoopVectorizeOptions> parseLoopVectorizeOptions(StringRef Params) { + LoopVectorizeOptions Opts; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "interleave-forced-only") { + Opts.setInterleaveOnlyWhenForced(Enable); + } else if (ParamName == "vectorize-forced-only") { + Opts.setVectorizeOnlyWhenForced(Enable); + } else { + return make_error<StringError>( + formatv("invalid LoopVectorize parameter '{0}' ", ParamName).str(), + inconvertibleErrorCode()); + } + } + return Opts; +} + +Expected<bool> parseLoopUnswitchOptions(StringRef Params) { + bool Result = false; + while (!Params.empty()) { + StringRef ParamName; + std::tie(ParamName, Params) = Params.split(';'); + + bool Enable = !ParamName.consume_front("no-"); + if (ParamName == "nontrivial") { + Result = Enable; + } else { + return make_error<StringError>( + formatv("invalid LoopUnswitch pass parameter '{0}' ", ParamName) + .str(), + inconvertibleErrorCode()); + } + } + return Result; +} +} // namespace + +/// Tests whether a pass name starts with a valid prefix for a default pipeline +/// alias. +static bool startsWithDefaultPipelineAliasPrefix(StringRef Name) { + return Name.startswith("default") || Name.startswith("thinlto") || + Name.startswith("lto"); +} + +/// Tests whether registered callbacks will accept a given pass name. +/// +/// When parsing a pipeline text, the type of the outermost pipeline may be +/// omitted, in which case the type is automatically determined from the first +/// pass name in the text. This may be a name that is handled through one of the +/// callbacks. We check this through the oridinary parsing callbacks by setting +/// up a dummy PassManager in order to not force the client to also handle this +/// type of query. +template <typename PassManagerT, typename CallbacksT> +static bool callbacksAcceptPassName(StringRef Name, CallbacksT &Callbacks) { + if (!Callbacks.empty()) { + PassManagerT DummyPM; + for (auto &CB : Callbacks) + if (CB(Name, DummyPM, {})) + return true; + } + return false; +} + +template <typename CallbacksT> +static bool isModulePassName(StringRef Name, CallbacksT &Callbacks) { + // Manually handle aliases for pre-configured pipeline fragments. + if (startsWithDefaultPipelineAliasPrefix(Name)) + return DefaultAliasRegex.match(Name); + + // Explicitly handle pass manager names. + if (Name == "module") + return true; + if (Name == "cgscc") + return true; + if (Name == "function") + return true; + + // Explicitly handle custom-parsed pass names. + if (parseRepeatPassName(Name)) + return true; + +#define MODULE_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) \ + return true; +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ + return true; +#include "PassRegistry.def" + + return callbacksAcceptPassName<ModulePassManager>(Name, Callbacks); +} + +template <typename CallbacksT> +static bool isCGSCCPassName(StringRef Name, CallbacksT &Callbacks) { + // Explicitly handle pass manager names. + if (Name == "cgscc") + return true; + if (Name == "function") + return true; + + // Explicitly handle custom-parsed pass names. + if (parseRepeatPassName(Name)) + return true; + if (parseDevirtPassName(Name)) + return true; + +#define CGSCC_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) \ + return true; +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ + return true; +#include "PassRegistry.def" + + return callbacksAcceptPassName<CGSCCPassManager>(Name, Callbacks); +} + +template <typename CallbacksT> +static bool isFunctionPassName(StringRef Name, CallbacksT &Callbacks) { + // Explicitly handle pass manager names. + if (Name == "function") + return true; + if (Name == "loop") + return true; + + // Explicitly handle custom-parsed pass names. + if (parseRepeatPassName(Name)) + return true; + +#define FUNCTION_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) \ + return true; +#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) \ + return true; +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ + return true; +#include "PassRegistry.def" + + return callbacksAcceptPassName<FunctionPassManager>(Name, Callbacks); +} + +template <typename CallbacksT> +static bool isLoopPassName(StringRef Name, CallbacksT &Callbacks) { + // Explicitly handle pass manager names. + if (Name == "loop") + return true; + + // Explicitly handle custom-parsed pass names. + if (parseRepeatPassName(Name)) + return true; + +#define LOOP_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) \ + return true; +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) \ + return true; +#define LOOP_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">" || Name == "invalidate<" NAME ">") \ + return true; +#include "PassRegistry.def" + + return callbacksAcceptPassName<LoopPassManager>(Name, Callbacks); +} + +Optional<std::vector<PassBuilder::PipelineElement>> +PassBuilder::parsePipelineText(StringRef Text) { + std::vector<PipelineElement> ResultPipeline; + + SmallVector<std::vector<PipelineElement> *, 4> PipelineStack = { + &ResultPipeline}; + for (;;) { + std::vector<PipelineElement> &Pipeline = *PipelineStack.back(); + size_t Pos = Text.find_first_of(",()"); + Pipeline.push_back({Text.substr(0, Pos), {}}); + + // If we have a single terminating name, we're done. + if (Pos == Text.npos) + break; + + char Sep = Text[Pos]; + Text = Text.substr(Pos + 1); + if (Sep == ',') + // Just a name ending in a comma, continue. + continue; + + if (Sep == '(') { + // Push the inner pipeline onto the stack to continue processing. + PipelineStack.push_back(&Pipeline.back().InnerPipeline); + continue; + } + + assert(Sep == ')' && "Bogus separator!"); + // When handling the close parenthesis, we greedily consume them to avoid + // empty strings in the pipeline. + do { + // If we try to pop the outer pipeline we have unbalanced parentheses. + if (PipelineStack.size() == 1) + return None; + + PipelineStack.pop_back(); + } while (Text.consume_front(")")); + + // Check if we've finished parsing. + if (Text.empty()) + break; + + // Otherwise, the end of an inner pipeline always has to be followed by + // a comma, and then we can continue. + if (!Text.consume_front(",")) + return None; + } + + if (PipelineStack.size() > 1) + // Unbalanced paretheses. + return None; + + assert(PipelineStack.back() == &ResultPipeline && + "Wrong pipeline at the bottom of the stack!"); + return {std::move(ResultPipeline)}; +} + +Error PassBuilder::parseModulePass(ModulePassManager &MPM, + const PipelineElement &E, + bool VerifyEachPass, bool DebugLogging) { + auto &Name = E.Name; + auto &InnerPipeline = E.InnerPipeline; + + // First handle complex passes like the pass managers which carry pipelines. + if (!InnerPipeline.empty()) { + if (Name == "module") { + ModulePassManager NestedMPM(DebugLogging); + if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + MPM.addPass(std::move(NestedMPM)); + return Error::success(); + } + if (Name == "cgscc") { + CGSCCPassManager CGPM(DebugLogging); + if (auto Err = parseCGSCCPassPipeline(CGPM, InnerPipeline, VerifyEachPass, + DebugLogging)) + return Err; + MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(std::move(CGPM))); + return Error::success(); + } + if (Name == "function") { + FunctionPassManager FPM(DebugLogging); + if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + MPM.addPass(createModuleToFunctionPassAdaptor(std::move(FPM))); + return Error::success(); + } + if (auto Count = parseRepeatPassName(Name)) { + ModulePassManager NestedMPM(DebugLogging); + if (auto Err = parseModulePassPipeline(NestedMPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + MPM.addPass(createRepeatedPass(*Count, std::move(NestedMPM))); + return Error::success(); + } + + for (auto &C : ModulePipelineParsingCallbacks) + if (C(Name, MPM, InnerPipeline)) + return Error::success(); + + // Normal passes can't have pipelines. + return make_error<StringError>( + formatv("invalid use of '{0}' pass as module pipeline", Name).str(), + inconvertibleErrorCode()); + ; + } + + // Manually handle aliases for pre-configured pipeline fragments. + if (startsWithDefaultPipelineAliasPrefix(Name)) { + SmallVector<StringRef, 3> Matches; + if (!DefaultAliasRegex.match(Name, &Matches)) + return make_error<StringError>( + formatv("unknown default pipeline alias '{0}'", Name).str(), + inconvertibleErrorCode()); + + assert(Matches.size() == 3 && "Must capture two matched strings!"); + + OptimizationLevel L = StringSwitch<OptimizationLevel>(Matches[2]) + .Case("O0", O0) + .Case("O1", O1) + .Case("O2", O2) + .Case("O3", O3) + .Case("Os", Os) + .Case("Oz", Oz); + if (L == O0) + // At O0 we do nothing at all! + return Error::success(); + + if (Matches[1] == "default") { + MPM.addPass(buildPerModuleDefaultPipeline(L, DebugLogging)); + } else if (Matches[1] == "thinlto-pre-link") { + MPM.addPass(buildThinLTOPreLinkDefaultPipeline(L, DebugLogging)); + } else if (Matches[1] == "thinlto") { + MPM.addPass(buildThinLTODefaultPipeline(L, DebugLogging, nullptr)); + } else if (Matches[1] == "lto-pre-link") { + MPM.addPass(buildLTOPreLinkDefaultPipeline(L, DebugLogging)); + } else { + assert(Matches[1] == "lto" && "Not one of the matched options!"); + MPM.addPass(buildLTODefaultPipeline(L, DebugLogging, nullptr)); + } + return Error::success(); + } + + // Finally expand the basic registered passes from the .inc file. +#define MODULE_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + MPM.addPass(CREATE_PASS); \ + return Error::success(); \ + } +#define MODULE_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">") { \ + MPM.addPass( \ + RequireAnalysisPass< \ + std::remove_reference<decltype(CREATE_PASS)>::type, Module>()); \ + return Error::success(); \ + } \ + if (Name == "invalidate<" NAME ">") { \ + MPM.addPass(InvalidateAnalysisPass< \ + std::remove_reference<decltype(CREATE_PASS)>::type>()); \ + return Error::success(); \ + } +#include "PassRegistry.def" + + for (auto &C : ModulePipelineParsingCallbacks) + if (C(Name, MPM, InnerPipeline)) + return Error::success(); + return make_error<StringError>( + formatv("unknown module pass '{0}'", Name).str(), + inconvertibleErrorCode()); +} + +Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM, + const PipelineElement &E, bool VerifyEachPass, + bool DebugLogging) { + auto &Name = E.Name; + auto &InnerPipeline = E.InnerPipeline; + + // First handle complex passes like the pass managers which carry pipelines. + if (!InnerPipeline.empty()) { + if (Name == "cgscc") { + CGSCCPassManager NestedCGPM(DebugLogging); + if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + // Add the nested pass manager with the appropriate adaptor. + CGPM.addPass(std::move(NestedCGPM)); + return Error::success(); + } + if (Name == "function") { + FunctionPassManager FPM(DebugLogging); + if (auto Err = parseFunctionPassPipeline(FPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + // Add the nested pass manager with the appropriate adaptor. + CGPM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM))); + return Error::success(); + } + if (auto Count = parseRepeatPassName(Name)) { + CGSCCPassManager NestedCGPM(DebugLogging); + if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + CGPM.addPass(createRepeatedPass(*Count, std::move(NestedCGPM))); + return Error::success(); + } + if (auto MaxRepetitions = parseDevirtPassName(Name)) { + CGSCCPassManager NestedCGPM(DebugLogging); + if (auto Err = parseCGSCCPassPipeline(NestedCGPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + CGPM.addPass( + createDevirtSCCRepeatedPass(std::move(NestedCGPM), *MaxRepetitions)); + return Error::success(); + } + + for (auto &C : CGSCCPipelineParsingCallbacks) + if (C(Name, CGPM, InnerPipeline)) + return Error::success(); + + // Normal passes can't have pipelines. + return make_error<StringError>( + formatv("invalid use of '{0}' pass as cgscc pipeline", Name).str(), + inconvertibleErrorCode()); + } + +// Now expand the basic registered passes from the .inc file. +#define CGSCC_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + CGPM.addPass(CREATE_PASS); \ + return Error::success(); \ + } +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">") { \ + CGPM.addPass(RequireAnalysisPass< \ + std::remove_reference<decltype(CREATE_PASS)>::type, \ + LazyCallGraph::SCC, CGSCCAnalysisManager, LazyCallGraph &, \ + CGSCCUpdateResult &>()); \ + return Error::success(); \ + } \ + if (Name == "invalidate<" NAME ">") { \ + CGPM.addPass(InvalidateAnalysisPass< \ + std::remove_reference<decltype(CREATE_PASS)>::type>()); \ + return Error::success(); \ + } +#include "PassRegistry.def" + + for (auto &C : CGSCCPipelineParsingCallbacks) + if (C(Name, CGPM, InnerPipeline)) + return Error::success(); + return make_error<StringError>( + formatv("unknown cgscc pass '{0}'", Name).str(), + inconvertibleErrorCode()); +} + +Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM, + const PipelineElement &E, + bool VerifyEachPass, bool DebugLogging) { + auto &Name = E.Name; + auto &InnerPipeline = E.InnerPipeline; + + // First handle complex passes like the pass managers which carry pipelines. + if (!InnerPipeline.empty()) { + if (Name == "function") { + FunctionPassManager NestedFPM(DebugLogging); + if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + // Add the nested pass manager with the appropriate adaptor. + FPM.addPass(std::move(NestedFPM)); + return Error::success(); + } + if (Name == "loop") { + LoopPassManager LPM(DebugLogging); + if (auto Err = parseLoopPassPipeline(LPM, InnerPipeline, VerifyEachPass, + DebugLogging)) + return Err; + // Add the nested pass manager with the appropriate adaptor. + FPM.addPass( + createFunctionToLoopPassAdaptor(std::move(LPM), DebugLogging)); + return Error::success(); + } + if (auto Count = parseRepeatPassName(Name)) { + FunctionPassManager NestedFPM(DebugLogging); + if (auto Err = parseFunctionPassPipeline(NestedFPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + FPM.addPass(createRepeatedPass(*Count, std::move(NestedFPM))); + return Error::success(); + } + + for (auto &C : FunctionPipelineParsingCallbacks) + if (C(Name, FPM, InnerPipeline)) + return Error::success(); + + // Normal passes can't have pipelines. + return make_error<StringError>( + formatv("invalid use of '{0}' pass as function pipeline", Name).str(), + inconvertibleErrorCode()); + } + +// Now expand the basic registered passes from the .inc file. +#define FUNCTION_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + FPM.addPass(CREATE_PASS); \ + return Error::success(); \ + } +#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + FPM.addPass(CREATE_PASS(Params.get())); \ + return Error::success(); \ + } +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">") { \ + FPM.addPass( \ + RequireAnalysisPass< \ + std::remove_reference<decltype(CREATE_PASS)>::type, Function>()); \ + return Error::success(); \ + } \ + if (Name == "invalidate<" NAME ">") { \ + FPM.addPass(InvalidateAnalysisPass< \ + std::remove_reference<decltype(CREATE_PASS)>::type>()); \ + return Error::success(); \ + } +#include "PassRegistry.def" + + for (auto &C : FunctionPipelineParsingCallbacks) + if (C(Name, FPM, InnerPipeline)) + return Error::success(); + return make_error<StringError>( + formatv("unknown function pass '{0}'", Name).str(), + inconvertibleErrorCode()); +} + +Error PassBuilder::parseLoopPass(LoopPassManager &LPM, const PipelineElement &E, + bool VerifyEachPass, bool DebugLogging) { + StringRef Name = E.Name; + auto &InnerPipeline = E.InnerPipeline; + + // First handle complex passes like the pass managers which carry pipelines. + if (!InnerPipeline.empty()) { + if (Name == "loop") { + LoopPassManager NestedLPM(DebugLogging); + if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + // Add the nested pass manager with the appropriate adaptor. + LPM.addPass(std::move(NestedLPM)); + return Error::success(); + } + if (auto Count = parseRepeatPassName(Name)) { + LoopPassManager NestedLPM(DebugLogging); + if (auto Err = parseLoopPassPipeline(NestedLPM, InnerPipeline, + VerifyEachPass, DebugLogging)) + return Err; + LPM.addPass(createRepeatedPass(*Count, std::move(NestedLPM))); + return Error::success(); + } + + for (auto &C : LoopPipelineParsingCallbacks) + if (C(Name, LPM, InnerPipeline)) + return Error::success(); + + // Normal passes can't have pipelines. + return make_error<StringError>( + formatv("invalid use of '{0}' pass as loop pipeline", Name).str(), + inconvertibleErrorCode()); + } + +// Now expand the basic registered passes from the .inc file. +#define LOOP_PASS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + LPM.addPass(CREATE_PASS); \ + return Error::success(); \ + } +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) \ + if (checkParametrizedPassName(Name, NAME)) { \ + auto Params = parsePassParameters(PARSER, Name, NAME); \ + if (!Params) \ + return Params.takeError(); \ + LPM.addPass(CREATE_PASS(Params.get())); \ + return Error::success(); \ + } +#define LOOP_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == "require<" NAME ">") { \ + LPM.addPass(RequireAnalysisPass< \ + std::remove_reference<decltype(CREATE_PASS)>::type, Loop, \ + LoopAnalysisManager, LoopStandardAnalysisResults &, \ + LPMUpdater &>()); \ + return Error::success(); \ + } \ + if (Name == "invalidate<" NAME ">") { \ + LPM.addPass(InvalidateAnalysisPass< \ + std::remove_reference<decltype(CREATE_PASS)>::type>()); \ + return Error::success(); \ + } +#include "PassRegistry.def" + + for (auto &C : LoopPipelineParsingCallbacks) + if (C(Name, LPM, InnerPipeline)) + return Error::success(); + return make_error<StringError>(formatv("unknown loop pass '{0}'", Name).str(), + inconvertibleErrorCode()); +} + +bool PassBuilder::parseAAPassName(AAManager &AA, StringRef Name) { +#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + AA.registerModuleAnalysis< \ + std::remove_reference<decltype(CREATE_PASS)>::type>(); \ + return true; \ + } +#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ + if (Name == NAME) { \ + AA.registerFunctionAnalysis< \ + std::remove_reference<decltype(CREATE_PASS)>::type>(); \ + return true; \ + } +#include "PassRegistry.def" + + for (auto &C : AAParsingCallbacks) + if (C(Name, AA)) + return true; + return false; +} + +Error PassBuilder::parseLoopPassPipeline(LoopPassManager &LPM, + ArrayRef<PipelineElement> Pipeline, + bool VerifyEachPass, + bool DebugLogging) { + for (const auto &Element : Pipeline) { + if (auto Err = parseLoopPass(LPM, Element, VerifyEachPass, DebugLogging)) + return Err; + // FIXME: No verifier support for Loop passes! + } + return Error::success(); +} + +Error PassBuilder::parseFunctionPassPipeline(FunctionPassManager &FPM, + ArrayRef<PipelineElement> Pipeline, + bool VerifyEachPass, + bool DebugLogging) { + for (const auto &Element : Pipeline) { + if (auto Err = + parseFunctionPass(FPM, Element, VerifyEachPass, DebugLogging)) + return Err; + if (VerifyEachPass) + FPM.addPass(VerifierPass()); + } + return Error::success(); +} + +Error PassBuilder::parseCGSCCPassPipeline(CGSCCPassManager &CGPM, + ArrayRef<PipelineElement> Pipeline, + bool VerifyEachPass, + bool DebugLogging) { + for (const auto &Element : Pipeline) { + if (auto Err = parseCGSCCPass(CGPM, Element, VerifyEachPass, DebugLogging)) + return Err; + // FIXME: No verifier support for CGSCC passes! + } + return Error::success(); +} + +void PassBuilder::crossRegisterProxies(LoopAnalysisManager &LAM, + FunctionAnalysisManager &FAM, + CGSCCAnalysisManager &CGAM, + ModuleAnalysisManager &MAM) { + MAM.registerPass([&] { return FunctionAnalysisManagerModuleProxy(FAM); }); + MAM.registerPass([&] { return CGSCCAnalysisManagerModuleProxy(CGAM); }); + CGAM.registerPass([&] { return ModuleAnalysisManagerCGSCCProxy(MAM); }); + FAM.registerPass([&] { return CGSCCAnalysisManagerFunctionProxy(CGAM); }); + FAM.registerPass([&] { return ModuleAnalysisManagerFunctionProxy(MAM); }); + FAM.registerPass([&] { return LoopAnalysisManagerFunctionProxy(LAM); }); + LAM.registerPass([&] { return FunctionAnalysisManagerLoopProxy(FAM); }); +} + +Error PassBuilder::parseModulePassPipeline(ModulePassManager &MPM, + ArrayRef<PipelineElement> Pipeline, + bool VerifyEachPass, + bool DebugLogging) { + for (const auto &Element : Pipeline) { + if (auto Err = parseModulePass(MPM, Element, VerifyEachPass, DebugLogging)) + return Err; + if (VerifyEachPass) + MPM.addPass(VerifierPass()); + } + return Error::success(); +} + +// Primary pass pipeline description parsing routine for a \c ModulePassManager +// FIXME: Should this routine accept a TargetMachine or require the caller to +// pre-populate the analysis managers with target-specific stuff? +Error PassBuilder::parsePassPipeline(ModulePassManager &MPM, + StringRef PipelineText, + bool VerifyEachPass, bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return make_error<StringError>( + formatv("invalid pipeline '{0}'", PipelineText).str(), + inconvertibleErrorCode()); + + // If the first name isn't at the module layer, wrap the pipeline up + // automatically. + StringRef FirstName = Pipeline->front().Name; + + if (!isModulePassName(FirstName, ModulePipelineParsingCallbacks)) { + if (isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) { + Pipeline = {{"cgscc", std::move(*Pipeline)}}; + } else if (isFunctionPassName(FirstName, + FunctionPipelineParsingCallbacks)) { + Pipeline = {{"function", std::move(*Pipeline)}}; + } else if (isLoopPassName(FirstName, LoopPipelineParsingCallbacks)) { + Pipeline = {{"function", {{"loop", std::move(*Pipeline)}}}}; + } else { + for (auto &C : TopLevelPipelineParsingCallbacks) + if (C(MPM, *Pipeline, VerifyEachPass, DebugLogging)) + return Error::success(); + + // Unknown pass or pipeline name! + auto &InnerPipeline = Pipeline->front().InnerPipeline; + return make_error<StringError>( + formatv("unknown {0} name '{1}'", + (InnerPipeline.empty() ? "pass" : "pipeline"), FirstName) + .str(), + inconvertibleErrorCode()); + } + } + + if (auto Err = + parseModulePassPipeline(MPM, *Pipeline, VerifyEachPass, DebugLogging)) + return Err; + return Error::success(); +} + +// Primary pass pipeline description parsing routine for a \c CGSCCPassManager +Error PassBuilder::parsePassPipeline(CGSCCPassManager &CGPM, + StringRef PipelineText, + bool VerifyEachPass, bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return make_error<StringError>( + formatv("invalid pipeline '{0}'", PipelineText).str(), + inconvertibleErrorCode()); + + StringRef FirstName = Pipeline->front().Name; + if (!isCGSCCPassName(FirstName, CGSCCPipelineParsingCallbacks)) + return make_error<StringError>( + formatv("unknown cgscc pass '{0}' in pipeline '{1}'", FirstName, + PipelineText) + .str(), + inconvertibleErrorCode()); + + if (auto Err = + parseCGSCCPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging)) + return Err; + return Error::success(); +} + +// Primary pass pipeline description parsing routine for a \c +// FunctionPassManager +Error PassBuilder::parsePassPipeline(FunctionPassManager &FPM, + StringRef PipelineText, + bool VerifyEachPass, bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return make_error<StringError>( + formatv("invalid pipeline '{0}'", PipelineText).str(), + inconvertibleErrorCode()); + + StringRef FirstName = Pipeline->front().Name; + if (!isFunctionPassName(FirstName, FunctionPipelineParsingCallbacks)) + return make_error<StringError>( + formatv("unknown function pass '{0}' in pipeline '{1}'", FirstName, + PipelineText) + .str(), + inconvertibleErrorCode()); + + if (auto Err = parseFunctionPassPipeline(FPM, *Pipeline, VerifyEachPass, + DebugLogging)) + return Err; + return Error::success(); +} + +// Primary pass pipeline description parsing routine for a \c LoopPassManager +Error PassBuilder::parsePassPipeline(LoopPassManager &CGPM, + StringRef PipelineText, + bool VerifyEachPass, bool DebugLogging) { + auto Pipeline = parsePipelineText(PipelineText); + if (!Pipeline || Pipeline->empty()) + return make_error<StringError>( + formatv("invalid pipeline '{0}'", PipelineText).str(), + inconvertibleErrorCode()); + + if (auto Err = + parseLoopPassPipeline(CGPM, *Pipeline, VerifyEachPass, DebugLogging)) + return Err; + + return Error::success(); +} + +Error PassBuilder::parseAAPipeline(AAManager &AA, StringRef PipelineText) { + // If the pipeline just consists of the word 'default' just replace the AA + // manager with our default one. + if (PipelineText == "default") { + AA = buildDefaultAAPipeline(); + return Error::success(); + } + + while (!PipelineText.empty()) { + StringRef Name; + std::tie(Name, PipelineText) = PipelineText.split(','); + if (!parseAAPassName(AA, Name)) + return make_error<StringError>( + formatv("unknown alias analysis name '{0}'", Name).str(), + inconvertibleErrorCode()); + } + + return Error::success(); +} diff --git a/hpvm/llvm_patches/lib/Passes/PassRegistry.def b/hpvm/llvm_patches/lib/Passes/PassRegistry.def new file mode 100644 index 0000000000000000000000000000000000000000..eab4026fcd4a9d2bf1b15ec24b4925958c0a31f0 --- /dev/null +++ b/hpvm/llvm_patches/lib/Passes/PassRegistry.def @@ -0,0 +1,317 @@ +//===- PassRegistry.def - Registry of passes --------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file is used as the registry of passes that are part of the core LLVM +// libraries. This file describes both transformation passes and analyses +// Analyses are registered while transformation passes have names registered +// that can be used when providing a textual pass pipeline. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef MODULE_ANALYSIS +#define MODULE_ANALYSIS(NAME, CREATE_PASS) +#endif +MODULE_ANALYSIS("callgraph", CallGraphAnalysis()) +MODULE_ANALYSIS("lcg", LazyCallGraphAnalysis()) +MODULE_ANALYSIS("module-summary", ModuleSummaryIndexAnalysis()) +MODULE_ANALYSIS("no-op-module", NoOpModuleAnalysis()) +MODULE_ANALYSIS("profile-summary", ProfileSummaryAnalysis()) +MODULE_ANALYSIS("stack-safety", StackSafetyGlobalAnalysis()) +MODULE_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) +MODULE_ANALYSIS("verify", VerifierAnalysis()) +MODULE_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) +MODULE_ANALYSIS("asan-globals-md", ASanGlobalsMetadataAnalysis()) + +#ifndef MODULE_ALIAS_ANALYSIS +#define MODULE_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ + MODULE_ANALYSIS(NAME, CREATE_PASS) +#endif +MODULE_ALIAS_ANALYSIS("globals-aa", GlobalsAA()) +#undef MODULE_ALIAS_ANALYSIS +#undef MODULE_ANALYSIS + +#ifndef MODULE_PASS +#define MODULE_PASS(NAME, CREATE_PASS) +#endif +MODULE_PASS("always-inline", AlwaysInlinerPass()) +MODULE_PASS("attributor", AttributorPass()) +MODULE_PASS("called-value-propagation", CalledValuePropagationPass()) +MODULE_PASS("canonicalize-aliases", CanonicalizeAliasesPass()) +MODULE_PASS("cg-profile", CGProfilePass()) +MODULE_PASS("constmerge", ConstantMergePass()) +MODULE_PASS("cross-dso-cfi", CrossDSOCFIPass()) +MODULE_PASS("deadargelim", DeadArgumentEliminationPass()) +MODULE_PASS("elim-avail-extern", EliminateAvailableExternallyPass()) +MODULE_PASS("forceattrs", ForceFunctionAttrsPass()) +MODULE_PASS("function-import", FunctionImportPass()) +MODULE_PASS("globaldce", GlobalDCEPass()) +MODULE_PASS("globalopt", GlobalOptPass()) +MODULE_PASS("globalsplit", GlobalSplitPass()) +MODULE_PASS("hotcoldsplit", HotColdSplittingPass()) +MODULE_PASS("hwasan", HWAddressSanitizerPass(false, false)) +MODULE_PASS("khwasan", HWAddressSanitizerPass(true, true)) +MODULE_PASS("inferattrs", InferFunctionAttrsPass()) +MODULE_PASS("insert-gcov-profiling", GCOVProfilerPass()) +MODULE_PASS("instrorderfile", InstrOrderFilePass()) +MODULE_PASS("instrprof", InstrProfiling()) +MODULE_PASS("internalize", InternalizePass()) +MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass()) +MODULE_PASS("ipsccp", IPSCCPPass()) +MODULE_PASS("lowertypetests", LowerTypeTestsPass(nullptr, nullptr)) +MODULE_PASS("name-anon-globals", NameAnonGlobalPass()) +MODULE_PASS("no-op-module", NoOpModulePass()) +MODULE_PASS("partial-inliner", PartialInlinerPass()) +MODULE_PASS("pgo-icall-prom", PGOIndirectCallPromotion()) +MODULE_PASS("pgo-instr-gen", PGOInstrumentationGen()) +MODULE_PASS("pgo-instr-use", PGOInstrumentationUse()) +MODULE_PASS("pre-isel-intrinsic-lowering", PreISelIntrinsicLoweringPass()) +MODULE_PASS("print-profile-summary", ProfileSummaryPrinterPass(dbgs())) +MODULE_PASS("print-callgraph", CallGraphPrinterPass(dbgs())) +MODULE_PASS("print", PrintModulePass(dbgs())) +MODULE_PASS("print-lcg", LazyCallGraphPrinterPass(dbgs())) +MODULE_PASS("print-lcg-dot", LazyCallGraphDOTPrinterPass(dbgs())) +MODULE_PASS("print-stack-safety", StackSafetyGlobalPrinterPass(dbgs())) +MODULE_PASS("rewrite-statepoints-for-gc", RewriteStatepointsForGC()) +MODULE_PASS("rewrite-symbols", RewriteSymbolPass()) +MODULE_PASS("rpo-functionattrs", ReversePostOrderFunctionAttrsPass()) +MODULE_PASS("sample-profile", SampleProfileLoaderPass()) +MODULE_PASS("strip-dead-prototypes", StripDeadPrototypesPass()) +MODULE_PASS("synthetic-counts-propagation", SyntheticCountsPropagation()) +MODULE_PASS("wholeprogramdevirt", WholeProgramDevirtPass(nullptr, nullptr)) +MODULE_PASS("verify", VerifierPass()) +MODULE_PASS("asan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/false, false, true, false)) +MODULE_PASS("kasan-module", ModuleAddressSanitizerPass(/*CompileKernel=*/true, false, true, false)) +MODULE_PASS("poison-checking", PoisonCheckingPass()) +#undef MODULE_PASS + +#ifndef CGSCC_ANALYSIS +#define CGSCC_ANALYSIS(NAME, CREATE_PASS) +#endif +CGSCC_ANALYSIS("no-op-cgscc", NoOpCGSCCAnalysis()) +CGSCC_ANALYSIS("fam-proxy", FunctionAnalysisManagerCGSCCProxy()) +CGSCC_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) +#undef CGSCC_ANALYSIS + +#ifndef CGSCC_PASS +#define CGSCC_PASS(NAME, CREATE_PASS) +#endif +CGSCC_PASS("argpromotion", ArgumentPromotionPass()) +CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass()) +CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass()) +CGSCC_PASS("inline", InlinerPass()) +CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass()) +#undef CGSCC_PASS + +#ifndef FUNCTION_ANALYSIS +#define FUNCTION_ANALYSIS(NAME, CREATE_PASS) +#endif +FUNCTION_ANALYSIS("aa", AAManager()) +FUNCTION_ANALYSIS("assumptions", AssumptionAnalysis()) +FUNCTION_ANALYSIS("block-freq", BlockFrequencyAnalysis()) +FUNCTION_ANALYSIS("branch-prob", BranchProbabilityAnalysis()) +FUNCTION_ANALYSIS("domtree", DominatorTreeAnalysis()) +FUNCTION_ANALYSIS("postdomtree", PostDominatorTreeAnalysis()) +FUNCTION_ANALYSIS("demanded-bits", DemandedBitsAnalysis()) +FUNCTION_ANALYSIS("domfrontier", DominanceFrontierAnalysis()) +FUNCTION_ANALYSIS("loops", LoopAnalysis()) +FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis()) +FUNCTION_ANALYSIS("da", DependenceAnalysis()) +FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis()) +FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis()) +FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis()) +FUNCTION_ANALYSIS("regions", RegionInfoAnalysis()) +FUNCTION_ANALYSIS("no-op-function", NoOpFunctionAnalysis()) +FUNCTION_ANALYSIS("opt-remark-emit", OptimizationRemarkEmitterAnalysis()) +FUNCTION_ANALYSIS("scalar-evolution", ScalarEvolutionAnalysis()) +FUNCTION_ANALYSIS("stack-safety-local", StackSafetyAnalysis()) +FUNCTION_ANALYSIS("targetlibinfo", TargetLibraryAnalysis()) +FUNCTION_ANALYSIS("targetir", + TM ? TM->getTargetIRAnalysis() : TargetIRAnalysis()) +FUNCTION_ANALYSIS("verify", VerifierAnalysis()) +FUNCTION_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) + +#ifndef FUNCTION_ALIAS_ANALYSIS +#define FUNCTION_ALIAS_ANALYSIS(NAME, CREATE_PASS) \ + FUNCTION_ANALYSIS(NAME, CREATE_PASS) +#endif +FUNCTION_ALIAS_ANALYSIS("basic-aa", BasicAA()) +FUNCTION_ALIAS_ANALYSIS("cfl-anders-aa", CFLAndersAA()) +FUNCTION_ALIAS_ANALYSIS("cfl-steens-aa", CFLSteensAA()) +FUNCTION_ALIAS_ANALYSIS("scev-aa", SCEVAA()) +FUNCTION_ALIAS_ANALYSIS("scoped-noalias-aa", ScopedNoAliasAA()) +FUNCTION_ALIAS_ANALYSIS("type-based-aa", TypeBasedAA()) +#undef FUNCTION_ALIAS_ANALYSIS +#undef FUNCTION_ANALYSIS + +#ifndef FUNCTION_PASS +#define FUNCTION_PASS(NAME, CREATE_PASS) +#endif +FUNCTION_PASS("aa-eval", AAEvaluator()) +FUNCTION_PASS("adce", ADCEPass()) +FUNCTION_PASS("add-discriminators", AddDiscriminatorsPass()) +FUNCTION_PASS("aggressive-instcombine", AggressiveInstCombinePass()) +FUNCTION_PASS("alignment-from-assumptions", AlignmentFromAssumptionsPass()) +FUNCTION_PASS("bdce", BDCEPass()) +FUNCTION_PASS("bounds-checking", BoundsCheckingPass()) +FUNCTION_PASS("break-crit-edges", BreakCriticalEdgesPass()) +FUNCTION_PASS("callsite-splitting", CallSiteSplittingPass()) +FUNCTION_PASS("consthoist", ConstantHoistingPass()) +FUNCTION_PASS("chr", ControlHeightReductionPass()) +FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass()) +FUNCTION_PASS("dce", DCEPass()) +FUNCTION_PASS("div-rem-pairs", DivRemPairsPass()) +FUNCTION_PASS("dse", DSEPass()) +FUNCTION_PASS("dot-cfg", CFGPrinterPass()) +FUNCTION_PASS("dot-cfg-only", CFGOnlyPrinterPass()) +FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false)) +FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true)) +FUNCTION_PASS("ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/false)) +FUNCTION_PASS("make-guards-explicit", MakeGuardsExplicitPass()) +FUNCTION_PASS("post-inline-ee-instrument", EntryExitInstrumenterPass(/*PostInlining=*/true)) +FUNCTION_PASS("gvn-hoist", GVNHoistPass()) +FUNCTION_PASS("instcombine", InstCombinePass()) +FUNCTION_PASS("instsimplify", InstSimplifyPass()) +FUNCTION_PASS("invalidate<all>", InvalidateAllAnalysesPass()) +FUNCTION_PASS("float2int", Float2IntPass()) +FUNCTION_PASS("no-op-function", NoOpFunctionPass()) +FUNCTION_PASS("libcalls-shrinkwrap", LibCallsShrinkWrapPass()) +FUNCTION_PASS("loweratomic", LowerAtomicPass()) +FUNCTION_PASS("lower-expect", LowerExpectIntrinsicPass()) +FUNCTION_PASS("lower-guard-intrinsic", LowerGuardIntrinsicPass()) +FUNCTION_PASS("lower-widenable-condition", LowerWidenableConditionPass()) +FUNCTION_PASS("guard-widening", GuardWideningPass()) +FUNCTION_PASS("gvn", GVN()) +FUNCTION_PASS("load-store-vectorizer", LoadStoreVectorizerPass()) +FUNCTION_PASS("loop-simplify", LoopSimplifyPass()) +FUNCTION_PASS("loop-sink", LoopSinkPass()) +FUNCTION_PASS("lowerinvoke", LowerInvokePass()) +FUNCTION_PASS("mem2reg", PromotePass()) +FUNCTION_PASS("memcpyopt", MemCpyOptPass()) +FUNCTION_PASS("mergeicmps", MergeICmpsPass()) +FUNCTION_PASS("mldst-motion", MergedLoadStoreMotionPass()) +FUNCTION_PASS("nary-reassociate", NaryReassociatePass()) +FUNCTION_PASS("newgvn", NewGVNPass()) +FUNCTION_PASS("jump-threading", JumpThreadingPass()) +FUNCTION_PASS("partially-inline-libcalls", PartiallyInlineLibCallsPass()) +FUNCTION_PASS("lcssa", LCSSAPass()) +FUNCTION_PASS("loop-data-prefetch", LoopDataPrefetchPass()) +FUNCTION_PASS("loop-load-elim", LoopLoadEliminationPass()) +FUNCTION_PASS("loop-fuse", LoopFusePass()) +FUNCTION_PASS("loop-distribute", LoopDistributePass()) +FUNCTION_PASS("pgo-memop-opt", PGOMemOPSizeOpt()) +FUNCTION_PASS("print", PrintFunctionPass(dbgs())) +FUNCTION_PASS("print<assumptions>", AssumptionPrinterPass(dbgs())) +FUNCTION_PASS("print<block-freq>", BlockFrequencyPrinterPass(dbgs())) +FUNCTION_PASS("print<branch-prob>", BranchProbabilityPrinterPass(dbgs())) +FUNCTION_PASS("print<da>", DependenceAnalysisPrinterPass(dbgs())) +FUNCTION_PASS("print<domtree>", DominatorTreePrinterPass(dbgs())) +FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs())) +FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs())) +FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs())) +FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs())) +FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs())) +FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs())) +FUNCTION_PASS("print<regions>", RegionInfoPrinterPass(dbgs())) +FUNCTION_PASS("print<scalar-evolution>", ScalarEvolutionPrinterPass(dbgs())) +FUNCTION_PASS("print<stack-safety-local>", StackSafetyPrinterPass(dbgs())) +FUNCTION_PASS("reassociate", ReassociatePass()) +FUNCTION_PASS("scalarizer", ScalarizerPass()) +FUNCTION_PASS("sccp", SCCPPass()) +FUNCTION_PASS("sink", SinkingPass()) +FUNCTION_PASS("slp-vectorizer", SLPVectorizerPass()) +FUNCTION_PASS("speculative-execution", SpeculativeExecutionPass()) +FUNCTION_PASS("spec-phis", SpeculateAroundPHIsPass()) +FUNCTION_PASS("sroa", SROA()) +FUNCTION_PASS("tailcallelim", TailCallElimPass()) +FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass()) +FUNCTION_PASS("verify", VerifierPass()) +FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass()) +FUNCTION_PASS("verify<loops>", LoopVerifierPass()) +FUNCTION_PASS("verify<memoryssa>", MemorySSAVerifierPass()) +FUNCTION_PASS("verify<regions>", RegionInfoVerifierPass()) +FUNCTION_PASS("verify<safepoint-ir>", SafepointIRVerifierPass()) +FUNCTION_PASS("view-cfg", CFGViewerPass()) +FUNCTION_PASS("view-cfg-only", CFGOnlyViewerPass()) +FUNCTION_PASS("transform-warning", WarnMissedTransformationsPass()) +FUNCTION_PASS("asan", AddressSanitizerPass(false, false, false)) +FUNCTION_PASS("kasan", AddressSanitizerPass(true, false, false)) +FUNCTION_PASS("msan", MemorySanitizerPass({})) +FUNCTION_PASS("kmsan", MemorySanitizerPass({0, false, /*Kernel=*/true})) +FUNCTION_PASS("tsan", ThreadSanitizerPass()) +#undef FUNCTION_PASS + +#ifndef FUNCTION_PASS_WITH_PARAMS +#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) +#endif +FUNCTION_PASS_WITH_PARAMS("unroll", + [](LoopUnrollOptions Opts) { + return LoopUnrollPass(Opts); + }, + parseLoopUnrollOptions) +FUNCTION_PASS_WITH_PARAMS("msan", + [](MemorySanitizerOptions Opts) { + return MemorySanitizerPass(Opts); + }, + parseMSanPassOptions) +FUNCTION_PASS_WITH_PARAMS("simplify-cfg", + [](SimplifyCFGOptions Opts) { + return SimplifyCFGPass(Opts); + }, + parseSimplifyCFGOptions) +FUNCTION_PASS_WITH_PARAMS("loop-vectorize", + [](LoopVectorizeOptions Opts) { + return LoopVectorizePass(Opts); + }, + parseLoopVectorizeOptions) +#undef FUNCTION_PASS_WITH_PARAMS + +#ifndef LOOP_ANALYSIS +#define LOOP_ANALYSIS(NAME, CREATE_PASS) +#endif +LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis()) +LOOP_ANALYSIS("access-info", LoopAccessAnalysis()) +LOOP_ANALYSIS("ddg", DDGAnalysis()) +LOOP_ANALYSIS("ivusers", IVUsersAnalysis()) +LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC)) +#undef LOOP_ANALYSIS + +#ifndef LOOP_PASS +#define LOOP_PASS(NAME, CREATE_PASS) +#endif +LOOP_PASS("invalidate<all>", InvalidateAllAnalysesPass()) +LOOP_PASS("licm", LICMPass()) +LOOP_PASS("loop-idiom", LoopIdiomRecognizePass()) +LOOP_PASS("loop-instsimplify", LoopInstSimplifyPass()) +LOOP_PASS("rotate", LoopRotatePass()) +LOOP_PASS("no-op-loop", NoOpLoopPass()) +LOOP_PASS("print", PrintLoopPass(dbgs())) +LOOP_PASS("loop-deletion", LoopDeletionPass()) +LOOP_PASS("simplify-cfg", LoopSimplifyCFGPass()) +LOOP_PASS("strength-reduce", LoopStrengthReducePass()) +LOOP_PASS("indvars", IndVarSimplifyPass()) +LOOP_PASS("irce", IRCEPass()) +LOOP_PASS("unroll-and-jam", LoopUnrollAndJamPass()) +LOOP_PASS("unroll-full", LoopFullUnrollPass()) +LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs())) +LOOP_PASS("print<DDG>", DDGAnalysisPrinterPass(dbgs())) +LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs())) +LOOP_PASS("loop-predication", LoopPredicationPass()) +LOOP_PASS("guard-widening", GuardWideningPass()) +#undef LOOP_PASS + +#ifndef LOOP_PASS_WITH_PARAMS +#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER) +#endif +LOOP_PASS_WITH_PARAMS("unswitch", + [](bool NonTrivial) { + return SimpleLoopUnswitchPass(NonTrivial); + }, + parseLoopUnswitchOptions) +#undef LOOP_PASS_WITH_PARAMS diff --git a/hpvm/llvm_patches/lib/Transforms/Scalar/ADCE.cpp b/hpvm/llvm_patches/lib/Transforms/Scalar/ADCE.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4a3b257ac957fdd3f7cb90da13650f1c5f2a0669 --- /dev/null +++ b/hpvm/llvm_patches/lib/Transforms/Scalar/ADCE.cpp @@ -0,0 +1,608 @@ +//===- ADCE.cpp - Code to perform dead code elimination -------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements the Aggressive Dead Code Elimination pass. This pass +// optimistically assumes that all instructions are dead until proven otherwise, +// allowing it to eliminate dead computations that other DCE passes do not +// catch, particularly involving loop computations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/ADCE.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/GraphTraits.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DomTreeUpdater.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/IteratedDominanceFrontier.h" +#include "llvm/Analysis/PostDominators.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/DebugInfoMetadata.h" +#include "llvm/IR/DebugLoc.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProf.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include <cassert> +#include <cstddef> +#include <utility> + +using namespace llvm; + +#define DEBUG_TYPE "adce" + +STATISTIC(NumRemoved, "Number of instructions removed"); +STATISTIC(NumBranchesRemoved, "Number of branch instructions removed"); + +// This is a temporary option until we change the interface to this pass based +// on optimization level. +static cl::opt<bool> RemoveControlFlowFlag("adce-remove-control-flow", + cl::init(true), cl::Hidden); + +// This option enables removing of may-be-infinite loops which have no other +// effect. +static cl::opt<bool> RemoveLoops("adce-remove-loops", cl::init(false), + cl::Hidden); + +bool AggressiveDeadCodeElimination::performDeadCodeElimination() { + initialize(); + markLiveInstructions(); + return removeDeadInstructions(); +} + +static bool isUnconditionalBranch(Instruction *Term) { + auto *BR = dyn_cast<BranchInst>(Term); + return BR && BR->isUnconditional(); +} + +void AggressiveDeadCodeElimination::initialize() { + auto NumBlocks = F.size(); + + // We will have an entry in the map for each block so we grow the + // structure to twice that size to keep the load factor low in the hash table. + BlockInfo.reserve(NumBlocks); + size_t NumInsts = 0; + + // Iterate over blocks and initialize BlockInfoVec entries, count + // instructions to size the InstInfo hash table. + for (auto &BB : F) { + NumInsts += BB.size(); + auto &Info = BlockInfo[&BB]; + Info.BB = &BB; + Info.Terminator = BB.getTerminator(); + Info.UnconditionalBranch = isUnconditionalBranch(Info.Terminator); + } + + // Initialize instruction map and set pointers to block info. + InstInfo.reserve(NumInsts); + for (auto &BBInfo : BlockInfo) + for (Instruction &I : *BBInfo.second.BB) + InstInfo[&I].Block = &BBInfo.second; + + // Since BlockInfoVec holds pointers into InstInfo and vice-versa, we may not + // add any more elements to either after this point. + for (auto &BBInfo : BlockInfo) + BBInfo.second.TerminatorLiveInfo = &InstInfo[BBInfo.second.Terminator]; + + // Collect the set of "root" instructions that are known live. + for (Instruction &I : instructions(F)) + if (isAlwaysLive(I)) + markLive(&I); + + if (!RemoveControlFlowFlag) + return; + + if (!RemoveLoops) { + // This stores state for the depth-first iterator. In addition + // to recording which nodes have been visited we also record whether + // a node is currently on the "stack" of active ancestors of the current + // node. + using StatusMap = DenseMap<BasicBlock *, bool>; + + class DFState : public StatusMap { + public: + std::pair<StatusMap::iterator, bool> insert(BasicBlock *BB) { + return StatusMap::insert(std::make_pair(BB, true)); + } + + // Invoked after we have visited all children of a node. + void completed(BasicBlock *BB) { (*this)[BB] = false; } + + // Return true if \p BB is currently on the active stack + // of ancestors. + bool onStack(BasicBlock *BB) { + auto Iter = find(BB); + return Iter != end() && Iter->second; + } + } State; + + State.reserve(F.size()); + // Iterate over blocks in depth-first pre-order and + // treat all edges to a block already seen as loop back edges + // and mark the branch live it if there is a back edge. + for (auto *BB: depth_first_ext(&F.getEntryBlock(), State)) { + Instruction *Term = BB->getTerminator(); + if (isLive(Term)) + continue; + + for (auto *Succ : successors(BB)) + if (State.onStack(Succ)) { + // back edge.... + markLive(Term); + break; + } + } + } + + // Mark blocks live if there is no path from the block to a + // return of the function. + // We do this by seeing which of the postdomtree root children exit the + // program, and for all others, mark the subtree live. + for (auto &PDTChild : children<DomTreeNode *>(PDT.getRootNode())) { + auto *BB = PDTChild->getBlock(); + auto &Info = BlockInfo[BB]; + // Real function return + if (isa<ReturnInst>(Info.Terminator)) { + LLVM_DEBUG(dbgs() << "post-dom root child is a return: " << BB->getName() + << '\n';); + continue; + } + + // This child is something else, like an infinite loop. + for (auto DFNode : depth_first(PDTChild)) + markLive(BlockInfo[DFNode->getBlock()].Terminator); + } + + // Treat the entry block as always live + auto *BB = &F.getEntryBlock(); + auto &EntryInfo = BlockInfo[BB]; + EntryInfo.Live = true; + if (EntryInfo.UnconditionalBranch) + markLive(EntryInfo.Terminator); + + // Build initial collection of blocks with dead terminators + for (auto &BBInfo : BlockInfo) + if (!BBInfo.second.terminatorIsLive()) + BlocksWithDeadTerminators.insert(BBInfo.second.BB); +} + +bool AggressiveDeadCodeElimination::isAlwaysLive(Instruction &I) { + // TODO -- use llvm::isInstructionTriviallyDead + if (I.isEHPad() || I.mayHaveSideEffects()) { + // Skip any value profile instrumentation calls if they are + // instrumenting constants. + if (isInstrumentsConstant(I)) + return false; + return true; + } + if (!I.isTerminator()) + return false; + if (RemoveControlFlowFlag && (isa<BranchInst>(I) || isa<SwitchInst>(I))) + return false; + return true; +} + +// Check if this instruction is a runtime call for value profiling and +// if it's instrumenting a constant. +bool AggressiveDeadCodeElimination::isInstrumentsConstant(Instruction &I) { + // TODO -- move this test into llvm::isInstructionTriviallyDead + if (CallInst *CI = dyn_cast<CallInst>(&I)) + if (Function *Callee = CI->getCalledFunction()) + if (Callee->getName().equals(getInstrProfValueProfFuncName())) + if (isa<Constant>(CI->getArgOperand(0))) + return true; + return false; +} + +void AggressiveDeadCodeElimination::markLiveInstructions() { + // Propagate liveness backwards to operands. + do { + // Worklist holds newly discovered live instructions + // where we need to mark the inputs as live. + while (!Worklist.empty()) { + Instruction *LiveInst = Worklist.pop_back_val(); + LLVM_DEBUG(dbgs() << "work live: "; LiveInst->dump();); + + for (Use &OI : LiveInst->operands()) + if (Instruction *Inst = dyn_cast<Instruction>(OI)) + markLive(Inst); + + if (auto *PN = dyn_cast<PHINode>(LiveInst)) + markPhiLive(PN); + } + + // After data flow liveness has been identified, examine which branch + // decisions are required to determine live instructions are executed. + markLiveBranchesFromControlDependences(); + + } while (!Worklist.empty()); +} + +void AggressiveDeadCodeElimination::markLive(Instruction *I) { + auto &Info = InstInfo[I]; + if (Info.Live) + return; + + LLVM_DEBUG(dbgs() << "mark live: "; I->dump()); + Info.Live = true; + Worklist.push_back(I); + + // Collect the live debug info scopes attached to this instruction. + if (const DILocation *DL = I->getDebugLoc()) + collectLiveScopes(*DL); + + // Mark the containing block live + auto &BBInfo = *Info.Block; + if (BBInfo.Terminator == I) { + BlocksWithDeadTerminators.remove(BBInfo.BB); + // For live terminators, mark destination blocks + // live to preserve this control flow edges. + if (!BBInfo.UnconditionalBranch) + for (auto *BB : successors(I->getParent())) + markLive(BB); + } + markLive(BBInfo); +} + +void AggressiveDeadCodeElimination::markLive(BlockInfoType &BBInfo) { + if (BBInfo.Live) + return; + LLVM_DEBUG(dbgs() << "mark block live: " << BBInfo.BB->getName() << '\n'); + BBInfo.Live = true; + if (!BBInfo.CFLive) { + BBInfo.CFLive = true; + NewLiveBlocks.insert(BBInfo.BB); + } + + // Mark unconditional branches at the end of live + // blocks as live since there is no work to do for them later + if (BBInfo.UnconditionalBranch) + markLive(BBInfo.Terminator); +} + +void AggressiveDeadCodeElimination::collectLiveScopes(const DILocalScope &LS) { + if (!AliveScopes.insert(&LS).second) + return; + + if (isa<DISubprogram>(LS)) + return; + + // Tail-recurse through the scope chain. + collectLiveScopes(cast<DILocalScope>(*LS.getScope())); +} + +void AggressiveDeadCodeElimination::collectLiveScopes(const DILocation &DL) { + // Even though DILocations are not scopes, shove them into AliveScopes so we + // don't revisit them. + if (!AliveScopes.insert(&DL).second) + return; + + // Collect live scopes from the scope chain. + collectLiveScopes(*DL.getScope()); + + // Tail-recurse through the inlined-at chain. + if (const DILocation *IA = DL.getInlinedAt()) + collectLiveScopes(*IA); +} + +void AggressiveDeadCodeElimination::markPhiLive(PHINode *PN) { + auto &Info = BlockInfo[PN->getParent()]; + // Only need to check this once per block. + if (Info.HasLivePhiNodes) + return; + Info.HasLivePhiNodes = true; + + // If a predecessor block is not live, mark it as control-flow live + // which will trigger marking live branches upon which + // that block is control dependent. + for (auto *PredBB : predecessors(Info.BB)) { + auto &Info = BlockInfo[PredBB]; + if (!Info.CFLive) { + Info.CFLive = true; + NewLiveBlocks.insert(PredBB); + } + } +} + +void AggressiveDeadCodeElimination::markLiveBranchesFromControlDependences() { + if (BlocksWithDeadTerminators.empty()) + return; + + LLVM_DEBUG({ + dbgs() << "new live blocks:\n"; + for (auto *BB : NewLiveBlocks) + dbgs() << "\t" << BB->getName() << '\n'; + dbgs() << "dead terminator blocks:\n"; + for (auto *BB : BlocksWithDeadTerminators) + dbgs() << "\t" << BB->getName() << '\n'; + }); + + // The dominance frontier of a live block X in the reverse + // control graph is the set of blocks upon which X is control + // dependent. The following sequence computes the set of blocks + // which currently have dead terminators that are control + // dependence sources of a block which is in NewLiveBlocks. + + const SmallPtrSet<BasicBlock *, 16> BWDT{ + BlocksWithDeadTerminators.begin(), + BlocksWithDeadTerminators.end() + }; + SmallVector<BasicBlock *, 32> IDFBlocks; + ReverseIDFCalculator IDFs(PDT); + IDFs.setDefiningBlocks(NewLiveBlocks); + IDFs.setLiveInBlocks(BWDT); + IDFs.calculate(IDFBlocks); + NewLiveBlocks.clear(); + + // Dead terminators which control live blocks are now marked live. + for (auto *BB : IDFBlocks) { + LLVM_DEBUG(dbgs() << "live control in: " << BB->getName() << '\n'); + markLive(BB->getTerminator()); + } +} + +//===----------------------------------------------------------------------===// +// +// Routines to update the CFG and SSA information before removing dead code. +// +//===----------------------------------------------------------------------===// +bool AggressiveDeadCodeElimination::removeDeadInstructions() { + // Updates control and dataflow around dead blocks + updateDeadRegions(); + + LLVM_DEBUG({ + for (Instruction &I : instructions(F)) { + // Check if the instruction is alive. + if (isLive(&I)) + continue; + + if (auto *DII = dyn_cast<DbgVariableIntrinsic>(&I)) { + // Check if the scope of this variable location is alive. + if (AliveScopes.count(DII->getDebugLoc()->getScope())) + continue; + + // If intrinsic is pointing at a live SSA value, there may be an + // earlier optimization bug: if we know the location of the variable, + // why isn't the scope of the location alive? + if (Value *V = DII->getVariableLocation()) + if (Instruction *II = dyn_cast<Instruction>(V)) + if (isLive(II)) + dbgs() << "Dropping debug info for " << *DII << "\n"; + } + } + }); + + // The inverse of the live set is the dead set. These are those instructions + // that have no side effects and do not influence the control flow or return + // value of the function, and may therefore be deleted safely. + // NOTE: We reuse the Worklist vector here for memory efficiency. + for (Instruction &I : instructions(F)) { + // Check if the instruction is alive. + if (isLive(&I)) + continue; + + if (auto *DII = dyn_cast<DbgInfoIntrinsic>(&I)) { + // Check if the scope of this variable location is alive. + if (AliveScopes.count(DII->getDebugLoc()->getScope())) + continue; + + // Fallthrough and drop the intrinsic. + } + + // Prepare to delete. + Worklist.push_back(&I); + I.dropAllReferences(); + } + + for (Instruction *&I : Worklist) { + ++NumRemoved; + I->eraseFromParent(); + } + + return !Worklist.empty(); +} + +// A dead region is the set of dead blocks with a common live post-dominator. +void AggressiveDeadCodeElimination::updateDeadRegions() { + LLVM_DEBUG({ + dbgs() << "final dead terminator blocks: " << '\n'; + for (auto *BB : BlocksWithDeadTerminators) + dbgs() << '\t' << BB->getName() + << (BlockInfo[BB].Live ? " LIVE\n" : "\n"); + }); + + // Don't compute the post ordering unless we needed it. + bool HavePostOrder = false; + + for (auto *BB : BlocksWithDeadTerminators) { + auto &Info = BlockInfo[BB]; + if (Info.UnconditionalBranch) { + InstInfo[Info.Terminator].Live = true; + continue; + } + + if (!HavePostOrder) { + computeReversePostOrder(); + HavePostOrder = true; + } + + // Add an unconditional branch to the successor closest to the + // end of the function which insures a path to the exit for each + // live edge. + BlockInfoType *PreferredSucc = nullptr; + for (auto *Succ : successors(BB)) { + auto *Info = &BlockInfo[Succ]; + if (!PreferredSucc || PreferredSucc->PostOrder < Info->PostOrder) + PreferredSucc = Info; + } + assert((PreferredSucc && PreferredSucc->PostOrder > 0) && + "Failed to find safe successor for dead branch"); + + // Collect removed successors to update the (Post)DominatorTrees. + SmallPtrSet<BasicBlock *, 4> RemovedSuccessors; + bool First = true; + for (auto *Succ : successors(BB)) { + if (!First || Succ != PreferredSucc->BB) { + Succ->removePredecessor(BB); + RemovedSuccessors.insert(Succ); + } else + First = false; + } + makeUnconditional(BB, PreferredSucc->BB); + + // Inform the dominators about the deleted CFG edges. + SmallVector<DominatorTree::UpdateType, 4> DeletedEdges; + for (auto *Succ : RemovedSuccessors) { + // It might have happened that the same successor appeared multiple times + // and the CFG edge wasn't really removed. + if (Succ != PreferredSucc->BB) { + LLVM_DEBUG(dbgs() << "ADCE: (Post)DomTree edge enqueued for deletion" + << BB->getName() << " -> " << Succ->getName() + << "\n"); + DeletedEdges.push_back({DominatorTree::Delete, BB, Succ}); + } + } + + DomTreeUpdater(DT, &PDT, DomTreeUpdater::UpdateStrategy::Eager) + .applyUpdates(DeletedEdges); + + NumBranchesRemoved += 1; + } +} + +// reverse top-sort order +void AggressiveDeadCodeElimination::computeReversePostOrder() { + // This provides a post-order numbering of the reverse control flow graph + // Note that it is incomplete in the presence of infinite loops but we don't + // need numbers blocks which don't reach the end of the functions since + // all branches in those blocks are forced live. + + // For each block without successors, extend the DFS from the block + // backward through the graph + SmallPtrSet<BasicBlock*, 16> Visited; + unsigned PostOrder = 0; + for (auto &BB : F) { + if (succ_begin(&BB) != succ_end(&BB)) + continue; + for (BasicBlock *Block : inverse_post_order_ext(&BB,Visited)) + BlockInfo[Block].PostOrder = PostOrder++; + } +} + +void AggressiveDeadCodeElimination::makeUnconditional(BasicBlock *BB, + BasicBlock *Target) { + Instruction *PredTerm = BB->getTerminator(); + // Collect the live debug info scopes attached to this instruction. + if (const DILocation *DL = PredTerm->getDebugLoc()) + collectLiveScopes(*DL); + + // Just mark live an existing unconditional branch + if (isUnconditionalBranch(PredTerm)) { + PredTerm->setSuccessor(0, Target); + InstInfo[PredTerm].Live = true; + return; + } + LLVM_DEBUG(dbgs() << "making unconditional " << BB->getName() << '\n'); + NumBranchesRemoved += 1; + IRBuilder<> Builder(PredTerm); + auto *NewTerm = Builder.CreateBr(Target); + InstInfo[NewTerm].Live = true; + if (const DILocation *DL = PredTerm->getDebugLoc()) + NewTerm->setDebugLoc(DL); + + InstInfo.erase(PredTerm); + PredTerm->eraseFromParent(); +} + +//===----------------------------------------------------------------------===// +// +// Pass Manager integration code +// +//===----------------------------------------------------------------------===// +PreservedAnalyses ADCEPass::run(Function &F, FunctionAnalysisManager &FAM) { + // ADCE does not need DominatorTree, but require DominatorTree here + // to update analysis if it is already available. + auto *DT = FAM.getCachedResult<DominatorTreeAnalysis>(F); + auto &PDT = FAM.getResult<PostDominatorTreeAnalysis>(F); + if (!AggressiveDeadCodeElimination(F, DT, PDT).performDeadCodeElimination()) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + PA.preserve<GlobalsAA>(); + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<PostDominatorTreeAnalysis>(); + return PA; +} + +namespace { + +struct ADCELegacyPass : public FunctionPass { + static char ID; // Pass identification, replacement for typeid + + ADCELegacyPass() : FunctionPass(ID) { + initializeADCELegacyPassPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) override { + if (skipFunction(F)) + return false; + + // ADCE does not need DominatorTree, but require DominatorTree here + // to update analysis if it is already available. + auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>(); + auto *DT = DTWP ? &DTWP->getDomTree() : nullptr; + auto &PDT = getAnalysis<PostDominatorTreeWrapperPass>().getPostDomTree(); + return AggressiveDeadCodeElimination(F, DT, PDT) + .performDeadCodeElimination(); + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired<PostDominatorTreeWrapperPass>(); + if (!RemoveControlFlowFlag) + AU.setPreservesCFG(); + else { + AU.addPreserved<DominatorTreeWrapperPass>(); + AU.addPreserved<PostDominatorTreeWrapperPass>(); + } + AU.addPreserved<GlobalsAAWrapperPass>(); + } +}; + +} // end anonymous namespace + +char ADCELegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(ADCELegacyPass, "adce", + "Aggressive Dead Code Elimination", false, false) +INITIALIZE_PASS_DEPENDENCY(PostDominatorTreeWrapperPass) +INITIALIZE_PASS_END(ADCELegacyPass, "adce", "Aggressive Dead Code Elimination", + false, false) + +FunctionPass *llvm::createAggressiveDCEPass() { return new ADCELegacyPass(); } diff --git a/hpvm/llvm_patches/lib/Transforms/Scalar/EarlyCSE.cpp b/hpvm/llvm_patches/lib/Transforms/Scalar/EarlyCSE.cpp new file mode 100644 index 0000000000000000000000000000000000000000..4c2bdd6f4a93caaeb7cc4d7d2c0cf63bbc34ccac --- /dev/null +++ b/hpvm/llvm_patches/lib/Transforms/Scalar/EarlyCSE.cpp @@ -0,0 +1,1388 @@ +//===- EarlyCSE.cpp - Simple and fast CSE pass ----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs a simple dominator tree walk that eliminates trivially +// redundant instructions. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Scalar/EarlyCSE.h" +#include "llvm/ADT/DenseMapInfo.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopedHashTable.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/GuardUtils.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/TargetLibraryInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/PassManager.h" +#include "llvm/IR/PatternMatch.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Use.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/AtomicOrdering.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/DebugCounter.h" +#include "llvm/Support/RecyclingAllocator.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Utils/GuardUtils.h" +#include <cassert> +#include <deque> +#include <memory> +#include <utility> + +using namespace llvm; +using namespace llvm::PatternMatch; + +#define DEBUG_TYPE "early-cse" + +STATISTIC(NumSimplify, "Number of instructions simplified or DCE'd"); +STATISTIC(NumCSE, "Number of instructions CSE'd"); +STATISTIC(NumCSECVP, "Number of compare instructions CVP'd"); +STATISTIC(NumCSELoad, "Number of load instructions CSE'd"); +STATISTIC(NumCSECall, "Number of call instructions CSE'd"); +STATISTIC(NumDSE, "Number of trivial dead stores removed"); + +DEBUG_COUNTER(CSECounter, "early-cse", + "Controls which instructions are removed"); + +static cl::opt<unsigned> EarlyCSEMssaOptCap( + "earlycse-mssa-optimization-cap", cl::init(500), cl::Hidden, + cl::desc("Enable imprecision in EarlyCSE in pathological cases, in exchange " + "for faster compile. Caps the MemorySSA clobbering calls.")); + +static cl::opt<bool> EarlyCSEDebugHash( + "earlycse-debug-hash", cl::init(false), cl::Hidden, + cl::desc("Perform extra assertion checking to verify that SimpleValue's hash " + "function is well-behaved w.r.t. its isEqual predicate")); + +//===----------------------------------------------------------------------===// +// SimpleValue +//===----------------------------------------------------------------------===// + +namespace { + +/// Struct representing the available values in the scoped hash table. +struct SimpleValue { + Instruction *Inst; + + SimpleValue(Instruction *I) : Inst(I) { + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + } + + bool isSentinel() const { + return Inst == DenseMapInfo<Instruction *>::getEmptyKey() || + Inst == DenseMapInfo<Instruction *>::getTombstoneKey(); + } + + static bool canHandle(Instruction *Inst) { + // This can only handle non-void readnone functions. + if (CallInst *CI = dyn_cast<CallInst>(Inst)) + return CI->doesNotAccessMemory() && !CI->getType()->isVoidTy(); + return isa<CastInst>(Inst) || isa<BinaryOperator>(Inst) || + isa<GetElementPtrInst>(Inst) || isa<CmpInst>(Inst) || + isa<SelectInst>(Inst) || isa<ExtractElementInst>(Inst) || + isa<InsertElementInst>(Inst) || isa<ShuffleVectorInst>(Inst) || + isa<ExtractValueInst>(Inst) || isa<InsertValueInst>(Inst); + } +}; + +} // end anonymous namespace + +namespace llvm { + +template <> struct DenseMapInfo<SimpleValue> { + static inline SimpleValue getEmptyKey() { + return DenseMapInfo<Instruction *>::getEmptyKey(); + } + + static inline SimpleValue getTombstoneKey() { + return DenseMapInfo<Instruction *>::getTombstoneKey(); + } + + static unsigned getHashValue(SimpleValue Val); + static bool isEqual(SimpleValue LHS, SimpleValue RHS); +}; + +} // end namespace llvm + +/// Match a 'select' including an optional 'not's of the condition. +static bool matchSelectWithOptionalNotCond(Value *V, Value *&Cond, Value *&A, + Value *&B, + SelectPatternFlavor &Flavor) { + // Return false if V is not even a select. + if (!match(V, m_Select(m_Value(Cond), m_Value(A), m_Value(B)))) + return false; + + // Look through a 'not' of the condition operand by swapping A/B. + Value *CondNot; + if (match(Cond, m_Not(m_Value(CondNot)))) { + Cond = CondNot; + std::swap(A, B); + } + + // Set flavor if we find a match, or set it to unknown otherwise; in + // either case, return true to indicate that this is a select we can + // process. + if (auto *CmpI = dyn_cast<ICmpInst>(Cond)) + Flavor = matchDecomposedSelectPattern(CmpI, A, B, A, B).Flavor; + else + Flavor = SPF_UNKNOWN; + + return true; +} + +static unsigned getHashValueImpl(SimpleValue Val) { + Instruction *Inst = Val.Inst; + // Hash in all of the operands as pointers. + if (BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Inst)) { + Value *LHS = BinOp->getOperand(0); + Value *RHS = BinOp->getOperand(1); + if (BinOp->isCommutative() && BinOp->getOperand(0) > BinOp->getOperand(1)) + std::swap(LHS, RHS); + + return hash_combine(BinOp->getOpcode(), LHS, RHS); + } + + if (CmpInst *CI = dyn_cast<CmpInst>(Inst)) { + // Compares can be commuted by swapping the comparands and + // updating the predicate. Choose the form that has the + // comparands in sorted order, or in the case of a tie, the + // one with the lower predicate. + Value *LHS = CI->getOperand(0); + Value *RHS = CI->getOperand(1); + CmpInst::Predicate Pred = CI->getPredicate(); + CmpInst::Predicate SwappedPred = CI->getSwappedPredicate(); + if (std::tie(LHS, Pred) > std::tie(RHS, SwappedPred)) { + std::swap(LHS, RHS); + Pred = SwappedPred; + } + return hash_combine(Inst->getOpcode(), Pred, LHS, RHS); + } + + // Hash general selects to allow matching commuted true/false operands. + SelectPatternFlavor SPF; + Value *Cond, *A, *B; + if (matchSelectWithOptionalNotCond(Inst, Cond, A, B, SPF)) { + // Hash min/max/abs (cmp + select) to allow for commuted operands. + // Min/max may also have non-canonical compare predicate (eg, the compare for + // smin may use 'sgt' rather than 'slt'), and non-canonical operands in the + // compare. + // TODO: We should also detect FP min/max. + if (SPF == SPF_SMIN || SPF == SPF_SMAX || + SPF == SPF_UMIN || SPF == SPF_UMAX) { + if (A > B) + std::swap(A, B); + return hash_combine(Inst->getOpcode(), SPF, A, B); + } + if (SPF == SPF_ABS || SPF == SPF_NABS) { + // ABS/NABS always puts the input in A and its negation in B. + return hash_combine(Inst->getOpcode(), SPF, A, B); + } + + // Hash general selects to allow matching commuted true/false operands. + + // If we do not have a compare as the condition, just hash in the condition. + CmpInst::Predicate Pred; + Value *X, *Y; + if (!match(Cond, m_Cmp(Pred, m_Value(X), m_Value(Y)))) + return hash_combine(Inst->getOpcode(), Cond, A, B); + + // Similar to cmp normalization (above) - canonicalize the predicate value: + // select (icmp Pred, X, Y), A, B --> select (icmp InvPred, X, Y), B, A + if (CmpInst::getInversePredicate(Pred) < Pred) { + Pred = CmpInst::getInversePredicate(Pred); + std::swap(A, B); + } + return hash_combine(Inst->getOpcode(), Pred, X, Y, A, B); + } + + if (CastInst *CI = dyn_cast<CastInst>(Inst)) + return hash_combine(CI->getOpcode(), CI->getType(), CI->getOperand(0)); + + if (const ExtractValueInst *EVI = dyn_cast<ExtractValueInst>(Inst)) + return hash_combine(EVI->getOpcode(), EVI->getOperand(0), + hash_combine_range(EVI->idx_begin(), EVI->idx_end())); + + if (const InsertValueInst *IVI = dyn_cast<InsertValueInst>(Inst)) + return hash_combine(IVI->getOpcode(), IVI->getOperand(0), + IVI->getOperand(1), + hash_combine_range(IVI->idx_begin(), IVI->idx_end())); + + assert((isa<CallInst>(Inst) || isa<GetElementPtrInst>(Inst) || + isa<ExtractElementInst>(Inst) || isa<InsertElementInst>(Inst) || + isa<ShuffleVectorInst>(Inst)) && + "Invalid/unknown instruction"); + + // Mix in the opcode. + return hash_combine( + Inst->getOpcode(), + hash_combine_range(Inst->value_op_begin(), Inst->value_op_end())); +} + +unsigned DenseMapInfo<SimpleValue>::getHashValue(SimpleValue Val) { +#ifndef NDEBUG + // If -earlycse-debug-hash was specified, return a constant -- this + // will force all hashing to collide, so we'll exhaustively search + // the table for a match, and the assertion in isEqual will fire if + // there's a bug causing equal keys to hash differently. + if (EarlyCSEDebugHash) + return 0; +#endif + return getHashValueImpl(Val); +} + +static bool isEqualImpl(SimpleValue LHS, SimpleValue RHS) { + Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; + + if (LHS.isSentinel() || RHS.isSentinel()) + return LHSI == RHSI; + + if (LHSI->getOpcode() != RHSI->getOpcode()) + return false; + if (LHSI->isIdenticalToWhenDefined(RHSI)) + return true; + + // If we're not strictly identical, we still might be a commutable instruction + if (BinaryOperator *LHSBinOp = dyn_cast<BinaryOperator>(LHSI)) { + if (!LHSBinOp->isCommutative()) + return false; + + assert(isa<BinaryOperator>(RHSI) && + "same opcode, but different instruction type?"); + BinaryOperator *RHSBinOp = cast<BinaryOperator>(RHSI); + + // Commuted equality + return LHSBinOp->getOperand(0) == RHSBinOp->getOperand(1) && + LHSBinOp->getOperand(1) == RHSBinOp->getOperand(0); + } + if (CmpInst *LHSCmp = dyn_cast<CmpInst>(LHSI)) { + assert(isa<CmpInst>(RHSI) && + "same opcode, but different instruction type?"); + CmpInst *RHSCmp = cast<CmpInst>(RHSI); + // Commuted equality + return LHSCmp->getOperand(0) == RHSCmp->getOperand(1) && + LHSCmp->getOperand(1) == RHSCmp->getOperand(0) && + LHSCmp->getSwappedPredicate() == RHSCmp->getPredicate(); + } + + // Min/max/abs can occur with commuted operands, non-canonical predicates, + // and/or non-canonical operands. + // Selects can be non-trivially equivalent via inverted conditions and swaps. + SelectPatternFlavor LSPF, RSPF; + Value *CondL, *CondR, *LHSA, *RHSA, *LHSB, *RHSB; + if (matchSelectWithOptionalNotCond(LHSI, CondL, LHSA, LHSB, LSPF) && + matchSelectWithOptionalNotCond(RHSI, CondR, RHSA, RHSB, RSPF)) { + if (LSPF == RSPF) { + // TODO: We should also detect FP min/max. + if (LSPF == SPF_SMIN || LSPF == SPF_SMAX || + LSPF == SPF_UMIN || LSPF == SPF_UMAX) + return ((LHSA == RHSA && LHSB == RHSB) || + (LHSA == RHSB && LHSB == RHSA)); + + if (LSPF == SPF_ABS || LSPF == SPF_NABS) { + // Abs results are placed in a defined order by matchSelectPattern. + return LHSA == RHSA && LHSB == RHSB; + } + + // select Cond, A, B <--> select not(Cond), B, A + if (CondL == CondR && LHSA == RHSA && LHSB == RHSB) + return true; + } + + // If the true/false operands are swapped and the conditions are compares + // with inverted predicates, the selects are equal: + // select (icmp Pred, X, Y), A, B <--> select (icmp InvPred, X, Y), B, A + // + // This also handles patterns with a double-negation in the sense of not + + // inverse, because we looked through a 'not' in the matching function and + // swapped A/B: + // select (cmp Pred, X, Y), A, B <--> select (not (cmp InvPred, X, Y)), B, A + // + // This intentionally does NOT handle patterns with a double-negation in + // the sense of not + not, because doing so could result in values + // comparing + // as equal that hash differently in the min/max/abs cases like: + // select (cmp slt, X, Y), X, Y <--> select (not (not (cmp slt, X, Y))), X, Y + // ^ hashes as min ^ would not hash as min + // In the context of the EarlyCSE pass, however, such cases never reach + // this code, as we simplify the double-negation before hashing the second + // select (and so still succeed at CSEing them). + if (LHSA == RHSB && LHSB == RHSA) { + CmpInst::Predicate PredL, PredR; + Value *X, *Y; + if (match(CondL, m_Cmp(PredL, m_Value(X), m_Value(Y))) && + match(CondR, m_Cmp(PredR, m_Specific(X), m_Specific(Y))) && + CmpInst::getInversePredicate(PredL) == PredR) + return true; + } + } + + return false; +} + +bool DenseMapInfo<SimpleValue>::isEqual(SimpleValue LHS, SimpleValue RHS) { + // These comparisons are nontrivial, so assert that equality implies + // hash equality (DenseMap demands this as an invariant). + bool Result = isEqualImpl(LHS, RHS); + assert(!Result || (LHS.isSentinel() && LHS.Inst == RHS.Inst) || + getHashValueImpl(LHS) == getHashValueImpl(RHS)); + return Result; +} + +//===----------------------------------------------------------------------===// +// CallValue +//===----------------------------------------------------------------------===// + +namespace { + +/// Struct representing the available call values in the scoped hash +/// table. +struct CallValue { + Instruction *Inst; + + CallValue(Instruction *I) : Inst(I) { + assert((isSentinel() || canHandle(I)) && "Inst can't be handled!"); + } + + bool isSentinel() const { + return Inst == DenseMapInfo<Instruction *>::getEmptyKey() || + Inst == DenseMapInfo<Instruction *>::getTombstoneKey(); + } + + static bool canHandle(Instruction *Inst) { + // Don't value number anything that returns void. + if (Inst->getType()->isVoidTy()) + return false; + + CallInst *CI = dyn_cast<CallInst>(Inst); + if (!CI || !CI->onlyReadsMemory()) + return false; + return true; + } +}; + +} // end anonymous namespace + +namespace llvm { + +template <> struct DenseMapInfo<CallValue> { + static inline CallValue getEmptyKey() { + return DenseMapInfo<Instruction *>::getEmptyKey(); + } + + static inline CallValue getTombstoneKey() { + return DenseMapInfo<Instruction *>::getTombstoneKey(); + } + + static unsigned getHashValue(CallValue Val); + static bool isEqual(CallValue LHS, CallValue RHS); +}; + +} // end namespace llvm + +unsigned DenseMapInfo<CallValue>::getHashValue(CallValue Val) { + Instruction *Inst = Val.Inst; + // Hash all of the operands as pointers and mix in the opcode. + return hash_combine( + Inst->getOpcode(), + hash_combine_range(Inst->value_op_begin(), Inst->value_op_end())); +} + +bool DenseMapInfo<CallValue>::isEqual(CallValue LHS, CallValue RHS) { + Instruction *LHSI = LHS.Inst, *RHSI = RHS.Inst; + if (LHS.isSentinel() || RHS.isSentinel()) + return LHSI == RHSI; + return LHSI->isIdenticalTo(RHSI); +} + +//===----------------------------------------------------------------------===// +// EarlyCSE implementation +//===----------------------------------------------------------------------===// + +namespace { + +/// A simple and fast domtree-based CSE pass. +/// +/// This pass does a simple depth-first walk over the dominator tree, +/// eliminating trivially redundant instructions and using instsimplify to +/// canonicalize things as it goes. It is intended to be fast and catch obvious +/// cases so that instcombine and other passes are more effective. It is +/// expected that a later pass of GVN will catch the interesting/hard cases. +class EarlyCSE { +public: + const TargetLibraryInfo &TLI; + const TargetTransformInfo &TTI; + DominatorTree &DT; + AssumptionCache &AC; + const SimplifyQuery SQ; + MemorySSA *MSSA; + std::unique_ptr<MemorySSAUpdater> MSSAUpdater; + + using AllocatorTy = + RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<SimpleValue, Value *>>; + using ScopedHTType = + ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>, + AllocatorTy>; + + /// A scoped hash table of the current values of all of our simple + /// scalar expressions. + /// + /// As we walk down the domtree, we look to see if instructions are in this: + /// if so, we replace them with what we find, otherwise we insert them so + /// that dominated values can succeed in their lookup. + ScopedHTType AvailableValues; + + /// A scoped hash table of the current values of previously encountered + /// memory locations. + /// + /// This allows us to get efficient access to dominating loads or stores when + /// we have a fully redundant load. In addition to the most recent load, we + /// keep track of a generation count of the read, which is compared against + /// the current generation count. The current generation count is incremented + /// after every possibly writing memory operation, which ensures that we only + /// CSE loads with other loads that have no intervening store. Ordering + /// events (such as fences or atomic instructions) increment the generation + /// count as well; essentially, we model these as writes to all possible + /// locations. Note that atomic and/or volatile loads and stores can be + /// present the table; it is the responsibility of the consumer to inspect + /// the atomicity/volatility if needed. + struct LoadValue { + Instruction *DefInst = nullptr; + unsigned Generation = 0; + int MatchingId = -1; + bool IsAtomic = false; + + LoadValue() = default; + LoadValue(Instruction *Inst, unsigned Generation, unsigned MatchingId, + bool IsAtomic) + : DefInst(Inst), Generation(Generation), MatchingId(MatchingId), + IsAtomic(IsAtomic) {} + }; + + using LoadMapAllocator = + RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<Value *, LoadValue>>; + using LoadHTType = + ScopedHashTable<Value *, LoadValue, DenseMapInfo<Value *>, + LoadMapAllocator>; + + LoadHTType AvailableLoads; + + // A scoped hash table mapping memory locations (represented as typed + // addresses) to generation numbers at which that memory location became + // (henceforth indefinitely) invariant. + using InvariantMapAllocator = + RecyclingAllocator<BumpPtrAllocator, + ScopedHashTableVal<MemoryLocation, unsigned>>; + using InvariantHTType = + ScopedHashTable<MemoryLocation, unsigned, DenseMapInfo<MemoryLocation>, + InvariantMapAllocator>; + InvariantHTType AvailableInvariants; + + /// A scoped hash table of the current values of read-only call + /// values. + /// + /// It uses the same generation count as loads. + using CallHTType = + ScopedHashTable<CallValue, std::pair<Instruction *, unsigned>>; + CallHTType AvailableCalls; + + /// This is the current generation of the memory value. + unsigned CurrentGeneration = 0; + + /// Set up the EarlyCSE runner for a particular function. + EarlyCSE(const DataLayout &DL, const TargetLibraryInfo &TLI, + const TargetTransformInfo &TTI, DominatorTree &DT, + AssumptionCache &AC, MemorySSA *MSSA) + : TLI(TLI), TTI(TTI), DT(DT), AC(AC), SQ(DL, &TLI, &DT, &AC), MSSA(MSSA), + MSSAUpdater(llvm::make_unique<MemorySSAUpdater>(MSSA)) {} + + bool run(); + +private: + unsigned ClobberCounter = 0; + // Almost a POD, but needs to call the constructors for the scoped hash + // tables so that a new scope gets pushed on. These are RAII so that the + // scope gets popped when the NodeScope is destroyed. + class NodeScope { + public: + NodeScope(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads, + InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls) + : Scope(AvailableValues), LoadScope(AvailableLoads), + InvariantScope(AvailableInvariants), CallScope(AvailableCalls) {} + NodeScope(const NodeScope &) = delete; + NodeScope &operator=(const NodeScope &) = delete; + + private: + ScopedHTType::ScopeTy Scope; + LoadHTType::ScopeTy LoadScope; + InvariantHTType::ScopeTy InvariantScope; + CallHTType::ScopeTy CallScope; + }; + + // Contains all the needed information to create a stack for doing a depth + // first traversal of the tree. This includes scopes for values, loads, and + // calls as well as the generation. There is a child iterator so that the + // children do not need to be store separately. + class StackNode { + public: + StackNode(ScopedHTType &AvailableValues, LoadHTType &AvailableLoads, + InvariantHTType &AvailableInvariants, CallHTType &AvailableCalls, + unsigned cg, DomTreeNode *n, DomTreeNode::iterator child, + DomTreeNode::iterator end) + : CurrentGeneration(cg), ChildGeneration(cg), Node(n), ChildIter(child), + EndIter(end), + Scopes(AvailableValues, AvailableLoads, AvailableInvariants, + AvailableCalls) + {} + StackNode(const StackNode &) = delete; + StackNode &operator=(const StackNode &) = delete; + + // Accessors. + unsigned currentGeneration() { return CurrentGeneration; } + unsigned childGeneration() { return ChildGeneration; } + void childGeneration(unsigned generation) { ChildGeneration = generation; } + DomTreeNode *node() { return Node; } + DomTreeNode::iterator childIter() { return ChildIter; } + + DomTreeNode *nextChild() { + DomTreeNode *child = *ChildIter; + ++ChildIter; + return child; + } + + DomTreeNode::iterator end() { return EndIter; } + bool isProcessed() { return Processed; } + void process() { Processed = true; } + + private: + unsigned CurrentGeneration; + unsigned ChildGeneration; + DomTreeNode *Node; + DomTreeNode::iterator ChildIter; + DomTreeNode::iterator EndIter; + NodeScope Scopes; + bool Processed = false; + }; + + /// Wrapper class to handle memory instructions, including loads, + /// stores and intrinsic loads and stores defined by the target. + class ParseMemoryInst { + public: + ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) + : Inst(Inst) { + if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) + if (TTI.getTgtMemIntrinsic(II, Info)) + IsTargetMemInst = true; + } + + bool isLoad() const { + if (IsTargetMemInst) return Info.ReadMem; + return isa<LoadInst>(Inst); + } + + bool isStore() const { + if (IsTargetMemInst) return Info.WriteMem; + return isa<StoreInst>(Inst); + } + + bool isAtomic() const { + if (IsTargetMemInst) + return Info.Ordering != AtomicOrdering::NotAtomic; + return Inst->isAtomic(); + } + + bool isUnordered() const { + if (IsTargetMemInst) + return Info.isUnordered(); + + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + return LI->isUnordered(); + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + return SI->isUnordered(); + } + // Conservative answer + return !Inst->isAtomic(); + } + + bool isVolatile() const { + if (IsTargetMemInst) + return Info.IsVolatile; + + if (LoadInst *LI = dyn_cast<LoadInst>(Inst)) { + return LI->isVolatile(); + } else if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { + return SI->isVolatile(); + } + // Conservative answer + return true; + } + + bool isInvariantLoad() const { + if (auto *LI = dyn_cast<LoadInst>(Inst)) + return LI->getMetadata(LLVMContext::MD_invariant_load) != nullptr; + return false; + } + + bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { + return (getPointerOperand() == Inst.getPointerOperand() && + getMatchingId() == Inst.getMatchingId()); + } + + bool isValid() const { return getPointerOperand() != nullptr; } + + // For regular (non-intrinsic) loads/stores, this is set to -1. For + // intrinsic loads/stores, the id is retrieved from the corresponding + // field in the MemIntrinsicInfo structure. That field contains + // non-negative values only. + int getMatchingId() const { + if (IsTargetMemInst) return Info.MatchingId; + return -1; + } + + Value *getPointerOperand() const { + if (IsTargetMemInst) return Info.PtrVal; + return getLoadStorePointerOperand(Inst); + } + + bool mayReadFromMemory() const { + if (IsTargetMemInst) return Info.ReadMem; + return Inst->mayReadFromMemory(); + } + + bool mayWriteToMemory() const { + if (IsTargetMemInst) return Info.WriteMem; + return Inst->mayWriteToMemory(); + } + + private: + bool IsTargetMemInst = false; + MemIntrinsicInfo Info; + Instruction *Inst; + }; + + bool processNode(DomTreeNode *Node); + + bool handleBranchCondition(Instruction *CondInst, const BranchInst *BI, + const BasicBlock *BB, const BasicBlock *Pred); + + Value *getOrCreateResult(Value *Inst, Type *ExpectedType) const { + if (auto *LI = dyn_cast<LoadInst>(Inst)) + return LI; + if (auto *SI = dyn_cast<StoreInst>(Inst)) + return SI->getValueOperand(); + assert(isa<IntrinsicInst>(Inst) && "Instruction not supported"); + return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst), + ExpectedType); + } + + /// Return true if the instruction is known to only operate on memory + /// provably invariant in the given "generation". + bool isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt); + + bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration, + Instruction *EarlierInst, Instruction *LaterInst); + + void removeMSSA(Instruction *Inst) { + if (!MSSA) + return; + if (VerifyMemorySSA) + MSSA->verifyMemorySSA(); + // Removing a store here can leave MemorySSA in an unoptimized state by + // creating MemoryPhis that have identical arguments and by creating + // MemoryUses whose defining access is not an actual clobber. The phi case + // is handled by MemorySSA when passing OptimizePhis = true to + // removeMemoryAccess. The non-optimized MemoryUse case is lazily updated + // by MemorySSA's getClobberingMemoryAccess. + MSSAUpdater->removeMemoryAccess(Inst, true); + } +}; + +} // end anonymous namespace + +/// Determine if the memory referenced by LaterInst is from the same heap +/// version as EarlierInst. +/// This is currently called in two scenarios: +/// +/// load p +/// ... +/// load p +/// +/// and +/// +/// x = load p +/// ... +/// store x, p +/// +/// in both cases we want to verify that there are no possible writes to the +/// memory referenced by p between the earlier and later instruction. +bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration, + unsigned LaterGeneration, + Instruction *EarlierInst, + Instruction *LaterInst) { + // Check the simple memory generation tracking first. + if (EarlierGeneration == LaterGeneration) + return true; + + if (!MSSA) + return false; + + // If MemorySSA has determined that one of EarlierInst or LaterInst does not + // read/write memory, then we can safely return true here. + // FIXME: We could be more aggressive when checking doesNotAccessMemory(), + // onlyReadsMemory(), mayReadFromMemory(), and mayWriteToMemory() in this pass + // by also checking the MemorySSA MemoryAccess on the instruction. Initial + // experiments suggest this isn't worthwhile, at least for C/C++ code compiled + // with the default optimization pipeline. + auto *EarlierMA = MSSA->getMemoryAccess(EarlierInst); + if (!EarlierMA) + return true; + auto *LaterMA = MSSA->getMemoryAccess(LaterInst); + if (!LaterMA) + return true; + + // Since we know LaterDef dominates LaterInst and EarlierInst dominates + // LaterInst, if LaterDef dominates EarlierInst then it can't occur between + // EarlierInst and LaterInst and neither can any other write that potentially + // clobbers LaterInst. + MemoryAccess *LaterDef; + if (ClobberCounter < EarlyCSEMssaOptCap) { + LaterDef = MSSA->getWalker()->getClobberingMemoryAccess(LaterInst); + ClobberCounter++; + } else + LaterDef = LaterMA->getDefiningAccess(); + + return MSSA->dominates(LaterDef, EarlierMA); +} + +bool EarlyCSE::isOperatingOnInvariantMemAt(Instruction *I, unsigned GenAt) { + // A location loaded from with an invariant_load is assumed to *never* change + // within the visible scope of the compilation. + if (auto *LI = dyn_cast<LoadInst>(I)) + if (LI->getMetadata(LLVMContext::MD_invariant_load)) + return true; + + auto MemLocOpt = MemoryLocation::getOrNone(I); + if (!MemLocOpt) + // "target" intrinsic forms of loads aren't currently known to + // MemoryLocation::get. TODO + return false; + MemoryLocation MemLoc = *MemLocOpt; + if (!AvailableInvariants.count(MemLoc)) + return false; + + // Is the generation at which this became invariant older than the + // current one? + return AvailableInvariants.lookup(MemLoc) <= GenAt; +} + +bool EarlyCSE::handleBranchCondition(Instruction *CondInst, + const BranchInst *BI, const BasicBlock *BB, + const BasicBlock *Pred) { + assert(BI->isConditional() && "Should be a conditional branch!"); + assert(BI->getCondition() == CondInst && "Wrong condition?"); + assert(BI->getSuccessor(0) == BB || BI->getSuccessor(1) == BB); + auto *TorF = (BI->getSuccessor(0) == BB) + ? ConstantInt::getTrue(BB->getContext()) + : ConstantInt::getFalse(BB->getContext()); + auto MatchBinOp = [](Instruction *I, unsigned Opcode) { + if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(I)) + return BOp->getOpcode() == Opcode; + return false; + }; + // If the condition is AND operation, we can propagate its operands into the + // true branch. If it is OR operation, we can propagate them into the false + // branch. + unsigned PropagateOpcode = + (BI->getSuccessor(0) == BB) ? Instruction::And : Instruction::Or; + + bool MadeChanges = false; + SmallVector<Instruction *, 4> WorkList; + SmallPtrSet<Instruction *, 4> Visited; + WorkList.push_back(CondInst); + while (!WorkList.empty()) { + Instruction *Curr = WorkList.pop_back_val(); + + AvailableValues.insert(Curr, TorF); + LLVM_DEBUG(dbgs() << "EarlyCSE CVP: Add conditional value for '" + << Curr->getName() << "' as " << *TorF << " in " + << BB->getName() << "\n"); + if (!DebugCounter::shouldExecute(CSECounter)) { + LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); + } else { + // Replace all dominated uses with the known value. + if (unsigned Count = replaceDominatedUsesWith(Curr, TorF, DT, + BasicBlockEdge(Pred, BB))) { + NumCSECVP += Count; + MadeChanges = true; + } + } + + if (MatchBinOp(Curr, PropagateOpcode)) + for (auto &Op : cast<BinaryOperator>(Curr)->operands()) + if (Instruction *OPI = dyn_cast<Instruction>(Op)) + if (SimpleValue::canHandle(OPI) && Visited.insert(OPI).second) + WorkList.push_back(OPI); + } + + return MadeChanges; +} + +bool EarlyCSE::processNode(DomTreeNode *Node) { + bool Changed = false; + BasicBlock *BB = Node->getBlock(); + + // If this block has a single predecessor, then the predecessor is the parent + // of the domtree node and all of the live out memory values are still current + // in this block. If this block has multiple predecessors, then they could + // have invalidated the live-out memory values of our parent value. For now, + // just be conservative and invalidate memory if this block has multiple + // predecessors. + if (!BB->getSinglePredecessor()) + ++CurrentGeneration; + + // If this node has a single predecessor which ends in a conditional branch, + // we can infer the value of the branch condition given that we took this + // path. We need the single predecessor to ensure there's not another path + // which reaches this block where the condition might hold a different + // value. Since we're adding this to the scoped hash table (like any other + // def), it will have been popped if we encounter a future merge block. + if (BasicBlock *Pred = BB->getSinglePredecessor()) { + auto *BI = dyn_cast<BranchInst>(Pred->getTerminator()); + if (BI && BI->isConditional()) { + auto *CondInst = dyn_cast<Instruction>(BI->getCondition()); + if (CondInst && SimpleValue::canHandle(CondInst)) + Changed |= handleBranchCondition(CondInst, BI, BB, Pred); + } + } + + /// LastStore - Keep track of the last non-volatile store that we saw... for + /// as long as there in no instruction that reads memory. If we see a store + /// to the same location, we delete the dead store. This zaps trivial dead + /// stores which can occur in bitfield code among other things. + Instruction *LastStore = nullptr; + + // See if any instructions in the block can be eliminated. If so, do it. If + // not, add them to AvailableValues. + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;) { + Instruction *Inst = &*I++; + + // Dead instructions should just be removed. + if (isInstructionTriviallyDead(Inst, &TLI)) { + LLVM_DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n'); + if (!DebugCounter::shouldExecute(CSECounter)) { + LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); + continue; + } + if (!salvageDebugInfo(*Inst)) + replaceDbgUsesWithUndef(Inst); + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + ++NumSimplify; + continue; + } + + // Skip assume intrinsics, they don't really have side effects (although + // they're marked as such to ensure preservation of control dependencies), + // and this pass will not bother with its removal. However, we should mark + // its condition as true for all dominated blocks. + if (match(Inst, m_Intrinsic<Intrinsic::assume>())) { + auto *CondI = + dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0)); + if (CondI && SimpleValue::canHandle(CondI)) { + LLVM_DEBUG(dbgs() << "EarlyCSE considering assumption: " << *Inst + << '\n'); + AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } else + LLVM_DEBUG(dbgs() << "EarlyCSE skipping assumption: " << *Inst << '\n'); + continue; + } + + // Skip sideeffect intrinsics, for the same reason as assume intrinsics. + if (match(Inst, m_Intrinsic<Intrinsic::sideeffect>())) { + LLVM_DEBUG(dbgs() << "EarlyCSE skipping sideeffect: " << *Inst << '\n'); + continue; + } + + // We can skip all invariant.start intrinsics since they only read memory, + // and we can forward values across it. For invariant starts without + // invariant ends, we can use the fact that the invariantness never ends to + // start a scope in the current generaton which is true for all future + // generations. Also, we dont need to consume the last store since the + // semantics of invariant.start allow us to perform DSE of the last + // store, if there was a store following invariant.start. Consider: + // + // store 30, i8* p + // invariant.start(p) + // store 40, i8* p + // We can DSE the store to 30, since the store 40 to invariant location p + // causes undefined behaviour. + if (match(Inst, m_Intrinsic<Intrinsic::invariant_start>())) { + // If there are any uses, the scope might end. + if (!Inst->use_empty()) + continue; + auto *CI = cast<CallInst>(Inst); + MemoryLocation MemLoc = MemoryLocation::getForArgument(CI, 1, TLI); + // Don't start a scope if we already have a better one pushed + if (!AvailableInvariants.count(MemLoc)) + AvailableInvariants.insert(MemLoc, CurrentGeneration); + continue; + } + + if (isGuard(Inst)) { + if (auto *CondI = + dyn_cast<Instruction>(cast<CallInst>(Inst)->getArgOperand(0))) { + if (SimpleValue::canHandle(CondI)) { + // Do we already know the actual value of this condition? + if (auto *KnownCond = AvailableValues.lookup(CondI)) { + // Is the condition known to be true? + if (isa<ConstantInt>(KnownCond) && + cast<ConstantInt>(KnownCond)->isOne()) { + LLVM_DEBUG(dbgs() + << "EarlyCSE removing guard: " << *Inst << '\n'); + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + continue; + } else + // Use the known value if it wasn't true. + cast<CallInst>(Inst)->setArgOperand(0, KnownCond); + } + // The condition we're on guarding here is true for all dominated + // locations. + AvailableValues.insert(CondI, ConstantInt::getTrue(BB->getContext())); + } + } + + // Guard intrinsics read all memory, but don't write any memory. + // Accordingly, don't update the generation but consume the last store (to + // avoid an incorrect DSE). + LastStore = nullptr; + continue; + } + + // If the instruction can be simplified (e.g. X+0 = X) then replace it with + // its simpler value. + if (Value *V = SimplifyInstruction(Inst, SQ)) { + LLVM_DEBUG(dbgs() << "EarlyCSE Simplify: " << *Inst << " to: " << *V + << '\n'); + if (!DebugCounter::shouldExecute(CSECounter)) { + LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); + } else { + bool Killed = false; + if (!Inst->use_empty()) { + Inst->replaceAllUsesWith(V); + Changed = true; + } + if (isInstructionTriviallyDead(Inst, &TLI)) { + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + Killed = true; + } + if (Changed) + ++NumSimplify; + if (Killed) + continue; + } + } + + // If this is a simple instruction that we can value number, process it. + if (SimpleValue::canHandle(Inst)) { + // See if the instruction has an available value. If so, use it. + if (Value *V = AvailableValues.lookup(Inst)) { + LLVM_DEBUG(dbgs() << "EarlyCSE CSE: " << *Inst << " to: " << *V + << '\n'); + if (!DebugCounter::shouldExecute(CSECounter)) { + LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); + continue; + } + if (auto *I = dyn_cast<Instruction>(V)) + I->andIRFlags(Inst); + Inst->replaceAllUsesWith(V); + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + ++NumCSE; + continue; + } + + // Otherwise, just remember that this value is available. + AvailableValues.insert(Inst, Inst); + continue; + } + + ParseMemoryInst MemInst(Inst, TTI); + // If this is a non-volatile load, process it. + if (MemInst.isValid() && MemInst.isLoad()) { + // (conservatively) we can't peak past the ordering implied by this + // operation, but we can add this load to our set of available values + if (MemInst.isVolatile() || !MemInst.isUnordered()) { + LastStore = nullptr; + ++CurrentGeneration; + } + + if (MemInst.isInvariantLoad()) { + // If we pass an invariant load, we know that memory location is + // indefinitely constant from the moment of first dereferenceability. + // We conservatively treat the invariant_load as that moment. If we + // pass a invariant load after already establishing a scope, don't + // restart it since we want to preserve the earliest point seen. + auto MemLoc = MemoryLocation::get(Inst); + if (!AvailableInvariants.count(MemLoc)) + AvailableInvariants.insert(MemLoc, CurrentGeneration); + } + + // If we have an available version of this load, and if it is the right + // generation or the load is known to be from an invariant location, + // replace this instruction. + // + // If either the dominating load or the current load are invariant, then + // we can assume the current load loads the same value as the dominating + // load. + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); + if (InVal.DefInst != nullptr && + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing loads with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered() && + // We can't replace an atomic load with one which isn't also atomic. + InVal.IsAtomic >= MemInst.isAtomic() && + (isOperatingOnInvariantMemAt(Inst, InVal.Generation) || + isSameMemGeneration(InVal.Generation, CurrentGeneration, + InVal.DefInst, Inst))) { + Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType()); + if (Op != nullptr) { + LLVM_DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst + << " to: " << *InVal.DefInst << '\n'); + if (!DebugCounter::shouldExecute(CSECounter)) { + LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); + continue; + } + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(Op); + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + ++NumCSELoad; + continue; + } + } + + // Otherwise, remember that we have this instruction. + AvailableLoads.insert( + MemInst.getPointerOperand(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); + LastStore = nullptr; + continue; + } + + // If this instruction may read from memory or throw (and potentially read + // from memory in the exception handler), forget LastStore. Load/store + // intrinsics will indicate both a read and a write to memory. The target + // may override this (e.g. so that a store intrinsic does not read from + // memory, and thus will be treated the same as a regular store for + // commoning purposes). + if ((Inst->mayReadFromMemory() || Inst->mayThrow()) && + !(MemInst.isValid() && !MemInst.mayReadFromMemory())) + LastStore = nullptr; + + // If this is a read-only call, process it. + if (CallValue::canHandle(Inst)) { + // If we have an available version of this call, and if it is the right + // generation, replace this instruction. + std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst); + if (InVal.first != nullptr && + isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first, + Inst)) { + LLVM_DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst + << " to: " << *InVal.first << '\n'); + if (!DebugCounter::shouldExecute(CSECounter)) { + LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); + continue; + } + if (!Inst->use_empty()) + Inst->replaceAllUsesWith(InVal.first); + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + ++NumCSECall; + continue; + } + + // Otherwise, remember that we have this instruction. + AvailableCalls.insert( + Inst, std::pair<Instruction *, unsigned>(Inst, CurrentGeneration)); + continue; + } + + // A release fence requires that all stores complete before it, but does + // not prevent the reordering of following loads 'before' the fence. As a + // result, we don't need to consider it as writing to memory and don't need + // to advance the generation. We do need to prevent DSE across the fence, + // but that's handled above. + if (FenceInst *FI = dyn_cast<FenceInst>(Inst)) + if (FI->getOrdering() == AtomicOrdering::Release) { + assert(Inst->mayReadFromMemory() && "relied on to prevent DSE above"); + continue; + } + + // write back DSE - If we write back the same value we just loaded from + // the same location and haven't passed any intervening writes or ordering + // operations, we can remove the write. The primary benefit is in allowing + // the available load table to remain valid and value forward past where + // the store originally was. + if (MemInst.isValid() && MemInst.isStore()) { + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); + if (InVal.DefInst && + InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) && + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing stores with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered() && + (isOperatingOnInvariantMemAt(Inst, InVal.Generation) || + isSameMemGeneration(InVal.Generation, CurrentGeneration, + InVal.DefInst, Inst))) { + // It is okay to have a LastStore to a different pointer here if MemorySSA + // tells us that the load and store are from the same memory generation. + // In that case, LastStore should keep its present value since we're + // removing the current store. + assert((!LastStore || + ParseMemoryInst(LastStore, TTI).getPointerOperand() == + MemInst.getPointerOperand() || + MSSA) && + "can't have an intervening store if not using MemorySSA!"); + LLVM_DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n'); + if (!DebugCounter::shouldExecute(CSECounter)) { + LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); + continue; + } + removeMSSA(Inst); + Inst->eraseFromParent(); + Changed = true; + ++NumDSE; + // We can avoid incrementing the generation count since we were able + // to eliminate this store. + continue; + } + } + + // Okay, this isn't something we can CSE at all. Check to see if it is + // something that could modify memory. If so, our available memory values + // cannot be used so bump the generation count. + if (Inst->mayWriteToMemory()) { + ++CurrentGeneration; + + if (MemInst.isValid() && MemInst.isStore()) { + // We do a trivial form of DSE if there are two stores to the same + // location with no intervening loads. Delete the earlier store. + // At the moment, we don't remove ordered stores, but do remove + // unordered atomic stores. There's no special requirement (for + // unordered atomics) about removing atomic stores only in favor of + // other atomic stores since we were going to execute the non-atomic + // one anyway and the atomic one might never have become visible. + if (LastStore) { + ParseMemoryInst LastStoreMemInst(LastStore, TTI); + assert(LastStoreMemInst.isUnordered() && + !LastStoreMemInst.isVolatile() && + "Violated invariant"); + if (LastStoreMemInst.isMatchingMemLoc(MemInst)) { + LLVM_DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore + << " due to: " << *Inst << '\n'); + if (!DebugCounter::shouldExecute(CSECounter)) { + LLVM_DEBUG(dbgs() << "Skipping due to debug counter\n"); + } else { + removeMSSA(LastStore); + LastStore->eraseFromParent(); + Changed = true; + ++NumDSE; + LastStore = nullptr; + } + } + // fallthrough - we can exploit information about this store + } + + // Okay, we just invalidated anything we knew about loaded values. Try + // to salvage *something* by remembering that the stored value is a live + // version of the pointer. It is safe to forward from volatile stores + // to non-volatile loads, so we don't have to check for volatility of + // the store. + AvailableLoads.insert( + MemInst.getPointerOperand(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); + + // Remember that this was the last unordered store we saw for DSE. We + // don't yet handle DSE on ordered or volatile stores since we don't + // have a good way to model the ordering requirement for following + // passes once the store is removed. We could insert a fence, but + // since fences are slightly stronger than stores in their ordering, + // it's not clear this is a profitable transform. Another option would + // be to merge the ordering with that of the post dominating store. + if (MemInst.isUnordered() && !MemInst.isVolatile()) + LastStore = Inst; + else + LastStore = nullptr; + } + } + } + + return Changed; +} + +bool EarlyCSE::run() { + // Note, deque is being used here because there is significant performance + // gains over vector when the container becomes very large due to the + // specific access patterns. For more information see the mailing list + // discussion on this: + // http://lists.llvm.org/pipermail/llvm-commits/Week-of-Mon-20120116/135228.html + std::deque<StackNode *> nodesToProcess; + + bool Changed = false; + + // Process the root node. + nodesToProcess.push_back(new StackNode( + AvailableValues, AvailableLoads, AvailableInvariants, AvailableCalls, + CurrentGeneration, DT.getRootNode(), + DT.getRootNode()->begin(), DT.getRootNode()->end())); + + assert(!CurrentGeneration && "Create a new EarlyCSE instance to rerun it."); + + // Process the stack. + while (!nodesToProcess.empty()) { + // Grab the first item off the stack. Set the current generation, remove + // the node from the stack, and process it. + StackNode *NodeToProcess = nodesToProcess.back(); + + // Initialize class members. + CurrentGeneration = NodeToProcess->currentGeneration(); + + // Check if the node needs to be processed. + if (!NodeToProcess->isProcessed()) { + // Process the node. + Changed |= processNode(NodeToProcess->node()); + NodeToProcess->childGeneration(CurrentGeneration); + NodeToProcess->process(); + } else if (NodeToProcess->childIter() != NodeToProcess->end()) { + // Push the next child onto the stack. + DomTreeNode *child = NodeToProcess->nextChild(); + nodesToProcess.push_back( + new StackNode(AvailableValues, AvailableLoads, AvailableInvariants, + AvailableCalls, NodeToProcess->childGeneration(), + child, child->begin(), child->end())); + } else { + // It has been processed, and there are no more children to process, + // so delete it and pop it off the stack. + delete NodeToProcess; + nodesToProcess.pop_back(); + } + } // while (!nodes...) + + return Changed; +} + +PreservedAnalyses EarlyCSEPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &TLI = AM.getResult<TargetLibraryAnalysis>(F); + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + auto &DT = AM.getResult<DominatorTreeAnalysis>(F); + auto &AC = AM.getResult<AssumptionAnalysis>(F); + auto *MSSA = + UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr; + + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); + + if (!CSE.run()) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserveSet<CFGAnalyses>(); + PA.preserve<GlobalsAA>(); + if (UseMemorySSA) + PA.preserve<MemorySSAAnalysis>(); + return PA; +} + +template<bool UseMemorySSA> +bool EarlyCSELegacyCommonPass<UseMemorySSA>::runOnFunction(Function &F) { + if (skipFunction(F)) + return false; + + auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); + auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); + auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + auto *MSSA = + UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr; + + EarlyCSE CSE(F.getParent()->getDataLayout(), TLI, TTI, DT, AC, MSSA); + + return CSE.run(); + } + +using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>; + +template<> +char EarlyCSELegacyPass::ID = 0; + +INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false) + +using EarlyCSEMemSSALegacyPass = + EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>; + +template<> +char EarlyCSEMemSSALegacyPass::ID = 0; + +FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) { + if (UseMemorySSA) + return new EarlyCSEMemSSALegacyPass(); + else + return new EarlyCSELegacyPass(); +} + +INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa", + "Early CSE w/ MemorySSA", false, false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass) +INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa", + "Early CSE w/ MemorySSA", false, false) diff --git a/hpvm/llvm_patches/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/hpvm/llvm_patches/lib/Transforms/Scalar/SimplifyCFGPass.cpp new file mode 100644 index 0000000000000000000000000000000000000000..d7004dbbfb445b261711e84e2815176b8cd1d383 --- /dev/null +++ b/hpvm/llvm_patches/lib/Transforms/Scalar/SimplifyCFGPass.cpp @@ -0,0 +1,272 @@ +//===- SimplifyCFGPass.cpp - CFG Simplification Pass ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file implements dead code elimination and basic block merging, along +// with a collection of other peephole control flow optimizations. For example: +// +// * Removes basic blocks with no predecessors. +// * Merges a basic block into its predecessor if there is only one and the +// predecessor only has one successor. +// * Eliminates PHI nodes for basic blocks with a single predecessor. +// * Eliminates a basic block that only contains an unconditional branch. +// * Changes invoke instructions to nounwind functions to be calls. +// * Change things like "if (x) if (y)" into "if (x&y)". +// * etc.. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Transforms/Scalar.h" +#include "llvm/Transforms/Scalar/SimplifyCFG.h" +#include <utility> +using namespace llvm; + +#define DEBUG_TYPE "simplifycfg" + +static cl::opt<unsigned> UserBonusInstThreshold( + "bonus-inst-threshold", cl::Hidden, cl::init(1), + cl::desc("Control the number of bonus instructions (default = 1)")); + +static cl::opt<bool> UserKeepLoops( + "keep-loops", cl::Hidden, cl::init(true), + cl::desc("Preserve canonical loop structure (default = true)")); + +static cl::opt<bool> UserSwitchToLookup( + "switch-to-lookup", cl::Hidden, cl::init(false), + cl::desc("Convert switches to lookup tables (default = false)")); + +static cl::opt<bool> UserForwardSwitchCond( + "forward-switch-cond", cl::Hidden, cl::init(false), + cl::desc("Forward switch condition to phi ops (default = false)")); + +static cl::opt<bool> UserSinkCommonInsts( + "sink-common-insts", cl::Hidden, cl::init(false), + cl::desc("Sink common instructions (default = false)")); + + +STATISTIC(NumSimpl, "Number of blocks simplified"); + +/// If we have more than one empty (other than phi node) return blocks, +/// merge them together to promote recursive block merging. +static bool mergeEmptyReturnBlocks(Function &F) { + bool Changed = false; + + BasicBlock *RetBlock = nullptr; + + // Scan all the blocks in the function, looking for empty return blocks. + for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; ) { + BasicBlock &BB = *BBI++; + + // Only look at return blocks. + ReturnInst *Ret = dyn_cast<ReturnInst>(BB.getTerminator()); + if (!Ret) continue; + + // Only look at the block if it is empty or the only other thing in it is a + // single PHI node that is the operand to the return. + if (Ret != &BB.front()) { + // Check for something else in the block. + BasicBlock::iterator I(Ret); + --I; + // Skip over debug info. + while (isa<DbgInfoIntrinsic>(I) && I != BB.begin()) + --I; + if (!isa<DbgInfoIntrinsic>(I) && + (!isa<PHINode>(I) || I != BB.begin() || Ret->getNumOperands() == 0 || + Ret->getOperand(0) != &*I)) + continue; + } + + // If this is the first returning block, remember it and keep going. + if (!RetBlock) { + RetBlock = &BB; + continue; + } + + // Otherwise, we found a duplicate return block. Merge the two. + Changed = true; + + // Case when there is no input to the return or when the returned values + // agree is trivial. Note that they can't agree if there are phis in the + // blocks. + if (Ret->getNumOperands() == 0 || + Ret->getOperand(0) == + cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0)) { + BB.replaceAllUsesWith(RetBlock); + BB.eraseFromParent(); + continue; + } + + // If the canonical return block has no PHI node, create one now. + PHINode *RetBlockPHI = dyn_cast<PHINode>(RetBlock->begin()); + if (!RetBlockPHI) { + Value *InVal = cast<ReturnInst>(RetBlock->getTerminator())->getOperand(0); + pred_iterator PB = pred_begin(RetBlock), PE = pred_end(RetBlock); + RetBlockPHI = PHINode::Create(Ret->getOperand(0)->getType(), + std::distance(PB, PE), "merge", + &RetBlock->front()); + + for (pred_iterator PI = PB; PI != PE; ++PI) + RetBlockPHI->addIncoming(InVal, *PI); + RetBlock->getTerminator()->setOperand(0, RetBlockPHI); + } + + // Turn BB into a block that just unconditionally branches to the return + // block. This handles the case when the two return blocks have a common + // predecessor but that return different things. + RetBlockPHI->addIncoming(Ret->getOperand(0), &BB); + BB.getTerminator()->eraseFromParent(); + BranchInst::Create(RetBlock, &BB); + } + + return Changed; +} + +/// Call SimplifyCFG on all the blocks in the function, +/// iterating until no more changes are made. +static bool iterativelySimplifyCFG(Function &F, const TargetTransformInfo &TTI, + const SimplifyCFGOptions &Options) { + bool Changed = false; + bool LocalChange = true; + + SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32> Edges; + FindFunctionBackedges(F, Edges); + SmallPtrSet<BasicBlock *, 16> LoopHeaders; + for (unsigned i = 0, e = Edges.size(); i != e; ++i) + LoopHeaders.insert(const_cast<BasicBlock *>(Edges[i].second)); + + while (LocalChange) { + LocalChange = false; + + // Loop over all of the basic blocks and remove them if they are unneeded. + for (Function::iterator BBIt = F.begin(); BBIt != F.end(); ) { + if (simplifyCFG(&*BBIt++, TTI, Options, &LoopHeaders)) { + LocalChange = true; + ++NumSimpl; + } + } + Changed |= LocalChange; + } + return Changed; +} + +bool llvm::simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI, + const SimplifyCFGOptions &Options) { + bool EverChanged = removeUnreachableBlocks(F); + EverChanged |= mergeEmptyReturnBlocks(F); + EverChanged |= iterativelySimplifyCFG(F, TTI, Options); + + // If neither pass changed anything, we're done. + if (!EverChanged) return false; + + // iterativelySimplifyCFG can (rarely) make some loops dead. If this happens, + // removeUnreachableBlocks is needed to nuke them, which means we should + // iterate between the two optimizations. We structure the code like this to + // avoid rerunning iterativelySimplifyCFG if the second pass of + // removeUnreachableBlocks doesn't do anything. + if (!removeUnreachableBlocks(F)) + return true; + + do { + EverChanged = iterativelySimplifyCFG(F, TTI, Options); + EverChanged |= removeUnreachableBlocks(F); + } while (EverChanged); + + return true; +} + +// Command-line settings override compile-time settings. +SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) { + Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences() + ? UserBonusInstThreshold + : Opts.BonusInstThreshold; + Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences() + ? UserForwardSwitchCond + : Opts.ForwardSwitchCondToPhi; + Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences() + ? UserSwitchToLookup + : Opts.ConvertSwitchToLookupTable; + Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences() + ? UserKeepLoops + : Opts.NeedCanonicalLoop; + Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() + ? UserSinkCommonInsts + : Opts.SinkCommonInsts; +} + +PreservedAnalyses SimplifyCFGPass::run(Function &F, + FunctionAnalysisManager &AM) { + auto &TTI = AM.getResult<TargetIRAnalysis>(F); + Options.AC = &AM.getResult<AssumptionAnalysis>(F); + if (!simplifyFunctionCFG(F, TTI, Options)) + return PreservedAnalyses::all(); + PreservedAnalyses PA; + PA.preserve<GlobalsAA>(); + return PA; +} + +CFGSimplifyPass::CFGSimplifyPass(unsigned Threshold, bool ForwardSwitchCond, + bool ConvertSwitch, bool KeepLoops, + bool SinkCommon, + std::function<bool(const Function &)> Ftor) + : FunctionPass(ID), PredicateFtor(std::move(Ftor)) { + + initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry()); + + // Check for command-line overrides of options for debug/customization. + Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences() + ? UserBonusInstThreshold + : Threshold; + + Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences() + ? UserForwardSwitchCond + : ForwardSwitchCond; + + Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences() + ? UserSwitchToLookup + : ConvertSwitch; + + Options.NeedCanonicalLoop = + UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops; + + Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences() + ? UserSinkCommonInsts + : SinkCommon; + } +char CFGSimplifyPass::ID = 0; +INITIALIZE_PASS_BEGIN(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, + false) +INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false, + false) + +// Public interface to the CFGSimplification pass +FunctionPass * +llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond, + bool ConvertSwitch, bool KeepLoops, + bool SinkCommon, + std::function<bool(const Function &)> Ftor) { + return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch, + KeepLoops, SinkCommon, std::move(Ftor)); +} diff --git a/hpvm/llvm_patches/lib/Transforms/Utils/LoopSimplify.cpp b/hpvm/llvm_patches/lib/Transforms/Utils/LoopSimplify.cpp new file mode 100644 index 0000000000000000000000000000000000000000..614d38b58cb2742ef9b2e1ef576f78cc9f8f09c7 --- /dev/null +++ b/hpvm/llvm_patches/lib/Transforms/Utils/LoopSimplify.cpp @@ -0,0 +1,882 @@ +//===- LoopSimplify.cpp - Loop Canonicalization Pass ----------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This pass performs several transformations to transform natural loops into a +// simpler form, which makes subsequent analyses and transformations simpler and +// more effective. +// +// Loop pre-header insertion guarantees that there is a single, non-critical +// entry edge from outside of the loop to the loop header. This simplifies a +// number of analyses and transformations, such as LICM. +// +// Loop exit-block insertion guarantees that all exit blocks from the loop +// (blocks which are outside of the loop that have predecessors inside of the +// loop) only have predecessors from inside of the loop (and are thus dominated +// by the loop header). This simplifies transformations such as store-sinking +// that are built into LICM. +// +// This pass also guarantees that loops will have exactly one backedge. +// +// Indirectbr instructions introduce several complications. If the loop +// contains or is entered by an indirectbr instruction, it may not be possible +// to transform the loop and make these guarantees. Client code should check +// that these conditions are true before relying on them. +// +// Similar complications arise from callbr instructions, particularly in +// asm-goto where blockaddress expressions are used. +// +// Note that the simplifycfg pass will clean up blocks which are split out but +// end up being unnecessary, so usage of this pass should not pessimize +// generated code. +// +// This pass obviously modifies the CFG, but updates loop information and +// dominator information. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/LoopSimplify.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/AssumptionCache.h" +#include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/GlobalsModRef.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/MemorySSA.h" +#include "llvm/Analysis/MemorySSAUpdater.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" +#include "llvm/IR/CFG.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Dominators.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" +using namespace llvm; + +#define DEBUG_TYPE "loop-simplify" + +STATISTIC(NumNested , "Number of nested loops split out"); + +// If the block isn't already, move the new block to right after some 'outside +// block' block. This prevents the preheader from being placed inside the loop +// body, e.g. when the loop hasn't been rotated. +static void placeSplitBlockCarefully(BasicBlock *NewBB, + SmallVectorImpl<BasicBlock *> &SplitPreds, + Loop *L) { + // Check to see if NewBB is already well placed. + Function::iterator BBI = --NewBB->getIterator(); + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + if (&*BBI == SplitPreds[i]) + return; + } + + // If it isn't already after an outside block, move it after one. This is + // always good as it makes the uncond branch from the outside block into a + // fall-through. + + // Figure out *which* outside block to put this after. Prefer an outside + // block that neighbors a BB actually in the loop. + BasicBlock *FoundBB = nullptr; + for (unsigned i = 0, e = SplitPreds.size(); i != e; ++i) { + Function::iterator BBI = SplitPreds[i]->getIterator(); + if (++BBI != NewBB->getParent()->end() && L->contains(&*BBI)) { + FoundBB = SplitPreds[i]; + break; + } + } + + // If our heuristic for a *good* bb to place this after doesn't find + // anything, just pick something. It's likely better than leaving it within + // the loop. + if (!FoundBB) + FoundBB = SplitPreds[0]; + NewBB->moveAfter(FoundBB); +} + +/// InsertPreheaderForLoop - Once we discover that a loop doesn't have a +/// preheader, this method is called to insert one. This method has two phases: +/// preheader insertion and analysis updating. +/// +BasicBlock *llvm::InsertPreheaderForLoop(Loop *L, DominatorTree *DT, + LoopInfo *LI, MemorySSAUpdater *MSSAU, + bool PreserveLCSSA) { + BasicBlock *Header = L->getHeader(); + + // Compute the set of predecessors of the loop that are not in the loop. + SmallVector<BasicBlock*, 8> OutsideBlocks; + for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header); + PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) { // Coming in from outside the loop? + // If the loop is branched to from an indirect terminator, we won't + // be able to fully transform the loop, because it prohibits + // edge splitting. + if (P->getTerminator()->isIndirectTerminator()) + return nullptr; + + // Keep track of it. + OutsideBlocks.push_back(P); + } + } + + // Split out the loop pre-header. + BasicBlock *PreheaderBB; + PreheaderBB = SplitBlockPredecessors(Header, OutsideBlocks, ".preheader", DT, + LI, MSSAU, PreserveLCSSA); + if (!PreheaderBB) + return nullptr; + + LLVM_DEBUG(dbgs() << "LoopSimplify: Creating pre-header " + << PreheaderBB->getName() << "\n"); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + placeSplitBlockCarefully(PreheaderBB, OutsideBlocks, L); + + return PreheaderBB; +} + +/// Add the specified block, and all of its predecessors, to the specified set, +/// if it's not already in there. Stop predecessor traversal when we reach +/// StopBlock. +static void addBlockAndPredsToSet(BasicBlock *InputBB, BasicBlock *StopBlock, + std::set<BasicBlock*> &Blocks) { + SmallVector<BasicBlock *, 8> Worklist; + Worklist.push_back(InputBB); + do { + BasicBlock *BB = Worklist.pop_back_val(); + if (Blocks.insert(BB).second && BB != StopBlock) + // If BB is not already processed and it is not a stop block then + // insert its predecessor in the work list + for (pred_iterator I = pred_begin(BB), E = pred_end(BB); I != E; ++I) { + BasicBlock *WBB = *I; + Worklist.push_back(WBB); + } + } while (!Worklist.empty()); +} + +/// The first part of loop-nestification is to find a PHI node that tells +/// us how to partition the loops. +static PHINode *findPHIToPartitionLoops(Loop *L, DominatorTree *DT, + AssumptionCache *AC) { + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ) { + PHINode *PN = cast<PHINode>(I); + ++I; + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { + // This is a degenerate PHI already, don't modify it! + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + continue; + } + + // Scan this PHI node looking for a use of the PHI node by itself. + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) + if (PN->getIncomingValue(i) == PN && + L->contains(PN->getIncomingBlock(i))) + // We found something tasty to remove. + return PN; + } + return nullptr; +} + +/// If this loop has multiple backedges, try to pull one of them out into +/// a nested loop. +/// +/// This is important for code that looks like +/// this: +/// +/// Loop: +/// ... +/// br cond, Loop, Next +/// ... +/// br cond2, Loop, Out +/// +/// To identify this common case, we look at the PHI nodes in the header of the +/// loop. PHI nodes with unchanging values on one backedge correspond to values +/// that change in the "outer" loop, but not in the "inner" loop. +/// +/// If we are able to separate out a loop, return the new outer loop that was +/// created. +/// +static Loop *separateNestedLoop(Loop *L, BasicBlock *Preheader, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, bool PreserveLCSSA, + AssumptionCache *AC, MemorySSAUpdater *MSSAU) { + // Don't try to separate loops without a preheader. + if (!Preheader) + return nullptr; + + // The header is not a landing pad; preheader insertion should ensure this. + BasicBlock *Header = L->getHeader(); + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); + + PHINode *PN = findPHIToPartitionLoops(L, DT, AC); + if (!PN) return nullptr; // No known way to partition. + + // Pull out all predecessors that have varying values in the loop. This + // handles the case when a PHI node has multiple instances of itself as + // arguments. + SmallVector<BasicBlock*, 8> OuterLoopPreds; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + if (PN->getIncomingValue(i) != PN || + !L->contains(PN->getIncomingBlock(i))) { + // We can't split indirect control flow edges. + if (PN->getIncomingBlock(i)->getTerminator()->isIndirectTerminator()) + return nullptr; + OuterLoopPreds.push_back(PN->getIncomingBlock(i)); + } + } + LLVM_DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n"); + + // If ScalarEvolution is around and knows anything about values in + // this loop, tell it to forget them, because we're about to + // substantially change it. + if (SE) + SE->forgetLoop(L); + + BasicBlock *NewBB = SplitBlockPredecessors(Header, OuterLoopPreds, ".outer", + DT, LI, MSSAU, PreserveLCSSA); + + // Make sure that NewBB is put someplace intelligent, which doesn't mess up + // code layout too horribly. + placeSplitBlockCarefully(NewBB, OuterLoopPreds, L); + + // Create the new outer loop. + Loop *NewOuter = LI->AllocateLoop(); + + // Change the parent loop to use the outer loop as its child now. + if (Loop *Parent = L->getParentLoop()) + Parent->replaceChildLoopWith(L, NewOuter); + else + LI->changeTopLevelLoop(L, NewOuter); + + // L is now a subloop of our outer loop. + NewOuter->addChildLoop(L); + + for (Loop::block_iterator I = L->block_begin(), E = L->block_end(); + I != E; ++I) + NewOuter->addBlockEntry(*I); + + // Now reset the header in L, which had been moved by + // SplitBlockPredecessors for the outer loop. + L->moveToHeader(Header); + + // Determine which blocks should stay in L and which should be moved out to + // the Outer loop now. + std::set<BasicBlock*> BlocksInL; + for (pred_iterator PI=pred_begin(Header), E = pred_end(Header); PI!=E; ++PI) { + BasicBlock *P = *PI; + if (DT->dominates(Header, P)) + addBlockAndPredsToSet(P, Header, BlocksInL); + } + + // Scan all of the loop children of L, moving them to OuterLoop if they are + // not part of the inner loop. + const std::vector<Loop*> &SubLoops = L->getSubLoops(); + for (size_t I = 0; I != SubLoops.size(); ) + if (BlocksInL.count(SubLoops[I]->getHeader())) + ++I; // Loop remains in L + else + NewOuter->addChildLoop(L->removeChildLoop(SubLoops.begin() + I)); + + SmallVector<BasicBlock *, 8> OuterLoopBlocks; + OuterLoopBlocks.push_back(NewBB); + // Now that we know which blocks are in L and which need to be moved to + // OuterLoop, move any blocks that need it. + for (unsigned i = 0; i != L->getBlocks().size(); ++i) { + BasicBlock *BB = L->getBlocks()[i]; + if (!BlocksInL.count(BB)) { + // Move this block to the parent, updating the exit blocks sets + L->removeBlockFromLoop(BB); + if ((*LI)[BB] == L) { + LI->changeLoopFor(BB, NewOuter); + OuterLoopBlocks.push_back(BB); + } + --i; + } + } + + // Split edges to exit blocks from the inner loop, if they emerged in the + // process of separating the outer one. + formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA); + + if (PreserveLCSSA) { + // Fix LCSSA form for L. Some values, which previously were only used inside + // L, can now be used in NewOuter loop. We need to insert phi-nodes for them + // in corresponding exit blocks. + // We don't need to form LCSSA recursively, because there cannot be uses + // inside a newly created loop of defs from inner loops as those would + // already be a use of an LCSSA phi node. + formLCSSA(*L, *DT, LI, SE); + + assert(NewOuter->isRecursivelyLCSSAForm(*DT, *LI) && + "LCSSA is broken after separating nested loops!"); + } + + return NewOuter; +} + +/// This method is called when the specified loop has more than one +/// backedge in it. +/// +/// If this occurs, revector all of these backedges to target a new basic block +/// and have that block branch to the loop header. This ensures that loops +/// have exactly one backedge. +static BasicBlock *insertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader, + DominatorTree *DT, LoopInfo *LI, + MemorySSAUpdater *MSSAU) { + assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!"); + + // Get information about the loop + BasicBlock *Header = L->getHeader(); + Function *F = Header->getParent(); + + // Unique backedge insertion currently depends on having a preheader. + if (!Preheader) + return nullptr; + + // The header is not an EH pad; preheader insertion should ensure this. + assert(!Header->isEHPad() && "Can't insert backedge to EH pad"); + + // Figure out which basic blocks contain back-edges to the loop header. + std::vector<BasicBlock*> BackedgeBlocks; + for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I){ + BasicBlock *P = *I; + + // Indirect edges cannot be split, so we must fail if we find one. + if (P->getTerminator()->isIndirectTerminator()) + return nullptr; + + if (P != Preheader) BackedgeBlocks.push_back(P); + } + + // Create and insert the new backedge block... + BasicBlock *BEBlock = BasicBlock::Create(Header->getContext(), + Header->getName() + ".backedge", F); + BranchInst *BETerminator = BranchInst::Create(Header, BEBlock); + BETerminator->setDebugLoc(Header->getFirstNonPHI()->getDebugLoc()); + + LLVM_DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block " + << BEBlock->getName() << "\n"); + + // Move the new backedge block to right after the last backedge block. + Function::iterator InsertPos = ++BackedgeBlocks.back()->getIterator(); + F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock); + + // Now that the block has been inserted into the function, create PHI nodes in + // the backedge block which correspond to any PHI nodes in the header block. + for (BasicBlock::iterator I = Header->begin(); isa<PHINode>(I); ++I) { + PHINode *PN = cast<PHINode>(I); + PHINode *NewPN = PHINode::Create(PN->getType(), BackedgeBlocks.size(), + PN->getName()+".be", BETerminator); + + // Loop over the PHI node, moving all entries except the one for the + // preheader over to the new PHI node. + unsigned PreheaderIdx = ~0U; + bool HasUniqueIncomingValue = true; + Value *UniqueValue = nullptr; + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { + BasicBlock *IBB = PN->getIncomingBlock(i); + Value *IV = PN->getIncomingValue(i); + if (IBB == Preheader) { + PreheaderIdx = i; + } else { + NewPN->addIncoming(IV, IBB); + if (HasUniqueIncomingValue) { + if (!UniqueValue) + UniqueValue = IV; + else if (UniqueValue != IV) + HasUniqueIncomingValue = false; + } + } + } + + // Delete all of the incoming values from the old PN except the preheader's + assert(PreheaderIdx != ~0U && "PHI has no preheader entry??"); + if (PreheaderIdx != 0) { + PN->setIncomingValue(0, PN->getIncomingValue(PreheaderIdx)); + PN->setIncomingBlock(0, PN->getIncomingBlock(PreheaderIdx)); + } + // Nuke all entries except the zero'th. + for (unsigned i = 0, e = PN->getNumIncomingValues()-1; i != e; ++i) + PN->removeIncomingValue(e-i, false); + + // Finally, add the newly constructed PHI node as the entry for the BEBlock. + PN->addIncoming(NewPN, BEBlock); + + // As an optimization, if all incoming values in the new PhiNode (which is a + // subset of the incoming values of the old PHI node) have the same value, + // eliminate the PHI Node. + if (HasUniqueIncomingValue) { + NewPN->replaceAllUsesWith(UniqueValue); + BEBlock->getInstList().erase(NewPN); + } + } + + // Now that all of the PHI nodes have been inserted and adjusted, modify the + // backedge blocks to jump to the BEBlock instead of the header. + // If one of the backedges has llvm.loop metadata attached, we remove + // it from the backedge and add it to BEBlock. + unsigned LoopMDKind = BEBlock->getContext().getMDKindID("llvm.loop"); + MDNode *LoopMD = nullptr; + for (unsigned i = 0, e = BackedgeBlocks.size(); i != e; ++i) { + Instruction *TI = BackedgeBlocks[i]->getTerminator(); + if (!LoopMD) + LoopMD = TI->getMetadata(LoopMDKind); + TI->setMetadata(LoopMDKind, nullptr); + TI->replaceSuccessorWith(Header, BEBlock); + } + BEBlock->getTerminator()->setMetadata(LoopMDKind, LoopMD); + + //===--- Update all analyses which we must preserve now -----------------===// + + // Update Loop Information - we know that this block is now in the current + // loop and all parent loops. + L->addBasicBlockToLoop(BEBlock, *LI); + + // Update dominator information + DT->splitBlock(BEBlock); + + if (MSSAU) + MSSAU->updatePhisWhenInsertingUniqueBackedgeBlock(Header, Preheader, + BEBlock); + + return BEBlock; +} + +/// Simplify one loop and queue further loops for simplification. +static bool simplifyOneLoop(Loop *L, SmallVectorImpl<Loop *> &Worklist, + DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { + bool Changed = false; + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + +ReprocessLoop: + + // Check to see that no blocks (other than the header) in this loop have + // predecessors that are not in the loop. This is not valid for natural + // loops, but can occur if the blocks are unreachable. Since they are + // unreachable we can just shamelessly delete those CFG edges! + for (Loop::block_iterator BB = L->block_begin(), E = L->block_end(); + BB != E; ++BB) { + if (*BB == L->getHeader()) continue; + + SmallPtrSet<BasicBlock*, 4> BadPreds; + for (pred_iterator PI = pred_begin(*BB), + PE = pred_end(*BB); PI != PE; ++PI) { + BasicBlock *P = *PI; + if (!L->contains(P)) + BadPreds.insert(P); + } + + // Delete each unique out-of-loop (and thus dead) predecessor. + for (BasicBlock *P : BadPreds) { + + LLVM_DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor " + << P->getName() << "\n"); + + // Zap the dead pred's terminator and replace it with unreachable. + Instruction *TI = P->getTerminator(); + changeToUnreachable(TI, /*UseLLVMTrap=*/false, PreserveLCSSA, + /*DTU=*/nullptr, MSSAU); + Changed = true; + } + } + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + // If there are exiting blocks with branches on undef, resolve the undef in + // the direction which will exit the loop. This will help simplify loop + // trip count computations. + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (BasicBlock *ExitingBlock : ExitingBlocks) + if (BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator())) + if (BI->isConditional()) { + if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) { + + LLVM_DEBUG(dbgs() + << "LoopSimplify: Resolving \"br i1 undef\" to exit in " + << ExitingBlock->getName() << "\n"); + + BI->setCondition(ConstantInt::get(Cond->getType(), + !L->contains(BI->getSuccessor(0)))); + + Changed = true; + } + } + + // Does the loop already have a preheader? If so, don't insert one. + BasicBlock *Preheader = L->getLoopPreheader(); + if (!Preheader) { + Preheader = InsertPreheaderForLoop(L, DT, LI, MSSAU, PreserveLCSSA); + if (Preheader) + Changed = true; + } + + // Next, check to make sure that all exit nodes of the loop only have + // predecessors that are inside of the loop. This check guarantees that the + // loop preheader/header will dominate the exit blocks. If the exit block has + // predecessors from outside of the loop, split the edge now. + if (formDedicatedExitBlocks(L, DT, LI, MSSAU, PreserveLCSSA)) + Changed = true; + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + // If the header has more than two predecessors at this point (from the + // preheader and from multiple backedges), we must adjust the loop. + BasicBlock *LoopLatch = L->getLoopLatch(); + if (!LoopLatch) { + // If this is really a nested loop, rip it out into a child loop. Don't do + // this for loops with a giant number of backedges, just factor them into a + // common backedge instead. + if (L->getNumBackEdges() < 8) { + if (Loop *OuterL = separateNestedLoop(L, Preheader, DT, LI, SE, + PreserveLCSSA, AC, MSSAU)) { + ++NumNested; + // Enqueue the outer loop as it should be processed next in our + // depth-first nest walk. + Worklist.push_back(OuterL); + + // This is a big restructuring change, reprocess the whole loop. + Changed = true; + // GCC doesn't tail recursion eliminate this. + // FIXME: It isn't clear we can't rely on LLVM to TRE this. + goto ReprocessLoop; + } + } + + // If we either couldn't, or didn't want to, identify nesting of the loops, + // insert a new block that all backedges target, then make it jump to the + // loop header. + LoopLatch = insertUniqueBackedgeBlock(L, Preheader, DT, LI, MSSAU); + if (LoopLatch) + Changed = true; + } + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + const DataLayout &DL = L->getHeader()->getModule()->getDataLayout(); + + // Scan over the PHI nodes in the loop header. Since they now have only two + // incoming values (the loop is canonicalized), we may have simplified the PHI + // down to 'X = phi [X, Y]', which should be replaced with 'Y'. + PHINode *PN; + for (BasicBlock::iterator I = L->getHeader()->begin(); + (PN = dyn_cast<PHINode>(I++)); ) + if (Value *V = SimplifyInstruction(PN, {DL, nullptr, DT, AC})) { + if (SE) SE->forgetValue(PN); + if (!PreserveLCSSA || LI->replacementPreservesLCSSAForm(PN, V)) { + PN->replaceAllUsesWith(V); + PN->eraseFromParent(); + } + } + + // If this loop has multiple exits and the exits all go to the same + // block, attempt to merge the exits. This helps several passes, such + // as LoopRotation, which do not support loops with multiple exits. + // SimplifyCFG also does this (and this code uses the same utility + // function), however this code is loop-aware, where SimplifyCFG is + // not. That gives it the advantage of being able to hoist + // loop-invariant instructions out of the way to open up more + // opportunities, and the disadvantage of having the responsibility + // to preserve dominator information. + auto HasUniqueExitBlock = [&]() { + BasicBlock *UniqueExit = nullptr; + for (auto *ExitingBB : ExitingBlocks) + for (auto *SuccBB : successors(ExitingBB)) { + if (L->contains(SuccBB)) + continue; + + if (!UniqueExit) + UniqueExit = SuccBB; + else if (UniqueExit != SuccBB) + return false; + } + + return true; + }; + if (HasUniqueExitBlock()) { + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + BasicBlock *ExitingBlock = ExitingBlocks[i]; + if (!ExitingBlock->getSinglePredecessor()) continue; + BranchInst *BI = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); + if (!BI || !BI->isConditional()) continue; + CmpInst *CI = dyn_cast<CmpInst>(BI->getCondition()); + if (!CI || CI->getParent() != ExitingBlock) continue; + + // Attempt to hoist out all instructions except for the + // comparison and the branch. + bool AllInvariant = true; + bool AnyInvariant = false; + for (auto I = ExitingBlock->instructionsWithoutDebug().begin(); &*I != BI; ) { + Instruction *Inst = &*I++; + if (Inst == CI) + continue; + if (!L->makeLoopInvariant( + Inst, AnyInvariant, + Preheader ? Preheader->getTerminator() : nullptr, MSSAU)) { + AllInvariant = false; + break; + } + } + if (AnyInvariant) { + Changed = true; + // The loop disposition of all SCEV expressions that depend on any + // hoisted values have also changed. + if (SE) + SE->forgetLoopDispositions(L); + } + if (!AllInvariant) continue; + + // The block has now been cleared of all instructions except for + // a comparison and a conditional branch. SimplifyCFG may be able + // to fold it now. + if (!FoldBranchToCommonDest(BI, MSSAU)) + continue; + + // Success. The block is now dead, so remove it from the loop, + // update the dominator tree and delete it. + LLVM_DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block " + << ExitingBlock->getName() << "\n"); + + assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock)); + Changed = true; + LI->removeBlock(ExitingBlock); + + DomTreeNode *Node = DT->getNode(ExitingBlock); + const std::vector<DomTreeNodeBase<BasicBlock> *> &Children = + Node->getChildren(); + while (!Children.empty()) { + DomTreeNode *Child = Children.front(); + DT->changeImmediateDominator(Child, Node->getIDom()); + } + DT->eraseNode(ExitingBlock); + if (MSSAU) { + SmallSetVector<BasicBlock *, 8> ExitBlockSet; + ExitBlockSet.insert(ExitingBlock); + MSSAU->removeBlocks(ExitBlockSet); + } + + BI->getSuccessor(0)->removePredecessor( + ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); + BI->getSuccessor(1)->removePredecessor( + ExitingBlock, /* KeepOneInputPHIs */ PreserveLCSSA); + ExitingBlock->eraseFromParent(); + } + } + + // Changing exit conditions for blocks may affect exit counts of this loop and + // any of its paretns, so we must invalidate the entire subtree if we've made + // any changes. + if (Changed && SE) + SE->forgetTopmostLoop(L); + + if (MSSAU && VerifyMemorySSA) + MSSAU->getMemorySSA()->verifyMemorySSA(); + + return Changed; +} + +bool llvm::simplifyLoop(Loop *L, DominatorTree *DT, LoopInfo *LI, + ScalarEvolution *SE, AssumptionCache *AC, + MemorySSAUpdater *MSSAU, bool PreserveLCSSA) { + bool Changed = false; + +#ifndef NDEBUG + // If we're asked to preserve LCSSA, the loop nest needs to start in LCSSA + // form. + if (PreserveLCSSA) { + assert(DT && "DT not available."); + assert(LI && "LI not available."); + assert(L->isRecursivelyLCSSAForm(*DT, *LI) && + "Requested to preserve LCSSA, but it's already broken."); + } +#endif + + // Worklist maintains our depth-first queue of loops in this nest to process. + SmallVector<Loop *, 4> Worklist; + Worklist.push_back(L); + + // Walk the worklist from front to back, pushing newly found sub loops onto + // the back. This will let us process loops from back to front in depth-first + // order. We can use this simple process because loops form a tree. + for (unsigned Idx = 0; Idx != Worklist.size(); ++Idx) { + Loop *L2 = Worklist[Idx]; + Worklist.append(L2->begin(), L2->end()); + } + + while (!Worklist.empty()) + Changed |= simplifyOneLoop(Worklist.pop_back_val(), Worklist, DT, LI, SE, + AC, MSSAU, PreserveLCSSA); + + return Changed; +} + + +char LoopSimplify::ID = 0; +INITIALIZE_PASS_BEGIN(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", false, false) +INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) +INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) +INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) +INITIALIZE_PASS_END(LoopSimplify, "loop-simplify", + "Canonicalize natural loops", false, false) + +// Publicly exposed interface to pass... +char &llvm::LoopSimplifyID = LoopSimplify::ID; +Pass *llvm::createLoopSimplifyPass() { return new LoopSimplify(); } + +/// runOnFunction - Run down all loops in the CFG (recursively, but we could do +/// it in any convenient order) inserting preheaders... +/// +bool LoopSimplify::runOnFunction(Function &F) { + bool Changed = false; + LoopInfo *LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); + DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); + auto *SEWP = getAnalysisIfAvailable<ScalarEvolutionWrapperPass>(); + ScalarEvolution *SE = SEWP ? &SEWP->getSE() : nullptr; + AssumptionCache *AC = + &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F); + MemorySSA *MSSA = nullptr; + std::unique_ptr<MemorySSAUpdater> MSSAU; + if (EnableMSSALoopDependency) { + auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); + if (MSSAAnalysis) { + MSSA = &MSSAAnalysis->getMSSA(); + MSSAU = make_unique<MemorySSAUpdater>(MSSA); + } + } + + bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID); + + // Simplify each loop nest in the function. + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + Changed |= simplifyLoop(*I, DT, LI, SE, AC, MSSAU.get(), PreserveLCSSA); + +#ifndef NDEBUG + if (PreserveLCSSA) { + bool InLCSSA = all_of( + *LI, [&](Loop *L) { return L->isRecursivelyLCSSAForm(*DT, *LI); }); + assert(InLCSSA && "LCSSA is broken after loop-simplify."); + } +#endif + return Changed; +} + +PreservedAnalyses LoopSimplifyPass::run(Function &F, + FunctionAnalysisManager &AM) { + bool Changed = false; + LoopInfo *LI = &AM.getResult<LoopAnalysis>(F); + DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(F); + ScalarEvolution *SE = AM.getCachedResult<ScalarEvolutionAnalysis>(F); + AssumptionCache *AC = &AM.getResult<AssumptionAnalysis>(F); + + // Note that we don't preserve LCSSA in the new PM, if you need it run LCSSA + // after simplifying the loops. MemorySSA is not preserved either. + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + Changed |= + simplifyLoop(*I, DT, LI, SE, AC, nullptr, /*PreserveLCSSA*/ false); + + if (!Changed) + return PreservedAnalyses::all(); + + PreservedAnalyses PA; + PA.preserve<DominatorTreeAnalysis>(); + PA.preserve<LoopAnalysis>(); + PA.preserve<BasicAA>(); + PA.preserve<GlobalsAA>(); + PA.preserve<SCEVAA>(); + PA.preserve<ScalarEvolutionAnalysis>(); + PA.preserve<DependenceAnalysis>(); + // BPI maps conditional terminators to probabilities, LoopSimplify can insert + // blocks, but it does so only by splitting existing blocks and edges. This + // results in the interesting property that all new terminators inserted are + // unconditional branches which do not appear in BPI. All deletions are + // handled via ValueHandle callbacks w/in BPI. + PA.preserve<BranchProbabilityAnalysis>(); + return PA; +} + +// FIXME: Restore this code when we re-enable verification in verifyAnalysis +// below. +#if 0 +static void verifyLoop(Loop *L) { + // Verify subloops. + for (Loop::iterator I = L->begin(), E = L->end(); I != E; ++I) + verifyLoop(*I); + + // It used to be possible to just assert L->isLoopSimplifyForm(), however + // with the introduction of indirectbr, there are now cases where it's + // not possible to transform a loop as necessary. We can at least check + // that there is an indirectbr near any time there's trouble. + + // Indirectbr can interfere with preheader and unique backedge insertion. + if (!L->getLoopPreheader() || !L->getLoopLatch()) { + bool HasIndBrPred = false; + for (pred_iterator PI = pred_begin(L->getHeader()), + PE = pred_end(L->getHeader()); PI != PE; ++PI) + if (isa<IndirectBrInst>((*PI)->getTerminator())) { + HasIndBrPred = true; + break; + } + assert(HasIndBrPred && + "LoopSimplify has no excuse for missing loop header info!"); + (void)HasIndBrPred; + } + + // Indirectbr can interfere with exit block canonicalization. + if (!L->hasDedicatedExits()) { + bool HasIndBrExiting = false; + SmallVector<BasicBlock*, 8> ExitingBlocks; + L->getExitingBlocks(ExitingBlocks); + for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) { + if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) { + HasIndBrExiting = true; + break; + } + } + + assert(HasIndBrExiting && + "LoopSimplify has no excuse for missing exit block info!"); + (void)HasIndBrExiting; + } +} +#endif + +void LoopSimplify::verifyAnalysis() const { + // FIXME: This routine is being called mid-way through the loop pass manager + // as loop passes destroy this analysis. That's actually fine, but we have no + // way of expressing that here. Once all of the passes that destroy this are + // hoisted out of the loop pass manager we can add back verification here. +#if 0 + for (LoopInfo::iterator I = LI->begin(), E = LI->end(); I != E; ++I) + verifyLoop(*I); +#endif +} diff --git a/hpvm/llvm_patches/patch_llvm.sh b/hpvm/llvm_patches/patch_llvm.sh new file mode 100644 index 0000000000000000000000000000000000000000..fb1ff433847667e3b44ceb8b897015111f9d4bb2 --- /dev/null +++ b/hpvm/llvm_patches/patch_llvm.sh @@ -0,0 +1,20 @@ +#!/bin/sh + +SH="$(readlink -f /proc/$$/exe)" +if [[ "$SH" == "/bin/zsh" ]]; then + DIR="${0:A:h}" +else + DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +fi + +cd $DIR +shopt -s globstar +for f in ./**/* +do + if [ -f "$f" ]; then + if [[ ( $f == *.cpp ) || ( $f == *.h ) || ( $f == *.td ) || ( $f == *.txt ) || ( $f == *.def ) ]]; then + diff -Nu $LLVM_SRC_ROOT/$f $f > $f.patch + patch $LLVM_SRC_ROOT/$f < $f.patch + fi + fi +done diff --git a/hpvm/scripts/hpvm_installer.py b/hpvm/scripts/hpvm_installer.py index ae06ca12f6914f748da061d5ed64a1106c5123aa..950c73515e3ff8474e6de17368514fb8123aa8ae 100755 --- a/hpvm/scripts/hpvm_installer.py +++ b/hpvm/scripts/hpvm_installer.py @@ -157,7 +157,7 @@ Example: "DCMAKE_BUILD_TYPE=Release DCMAKE_INSTALL_PREFIX=install". Arguments: """ ) args.cmake_args = input() - if args.cmake_args.strip() != "": + if args.cmake_args.strip() != "": args.cmake_args = [f"-{arg}" for arg in args.cmake_args.split(" ")] args.no_pypkg = not input_with_check( @@ -187,7 +187,7 @@ def print_args(args): def check_python_version(): from sys import version_info, version, executable - + lowest, highest = PYTHON_REQ if not (lowest <= version_info < highest): lowest_str = ".".join([str(n) for n in lowest]) @@ -276,8 +276,7 @@ def link_and_patch(): symlink(ROOT_DIR / link, hpvm / link) print("Applying HPVM patches...") chdir(ROOT_DIR / "llvm_patches") - check_call(["bash", "./construct_patch.sh"]) - check_call(["bash", "./apply_patch.sh"]) + check_call(["bash", "./patch_llvm.sh"]) print("Patches applied.") chdir(cwd) @@ -370,7 +369,7 @@ def main(): """ HPVM not installed. To complete installation, follow these instructions: -- Create and navigate to a folder "./build" +- Create and navigate to a folder "./build" - Run "cmake ../llvm [options]". Find potential options in README.md. - Run "make -j<number of threads> hpvm-clang" and then "make install" For more details refer to README.md.