Home | History | Annotate | Line # | Download | only in Analysis
      1 //===- MustExecute.h - Is an instruction known to execute--------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 /// \file
      9 /// Contains a collection of routines for determining if a given instruction is
     10 /// guaranteed to execute if a given point in control flow is reached. The most
     11 /// common example is an instruction within a loop being provably executed if we
     12 /// branch to the header of it's containing loop.
     13 ///
     14 /// There are two interfaces available to determine if an instruction is
     15 /// executed once a given point in the control flow is reached:
     16 /// 1) A loop-centric one derived from LoopSafetyInfo.
     17 /// 2) A "must be executed context"-based one implemented in the
     18 ///    MustBeExecutedContextExplorer.
     19 /// Please refer to the class comments for more information.
     20 ///
     21 //===----------------------------------------------------------------------===//
     22 
     23 #ifndef LLVM_ANALYSIS_MUSTEXECUTE_H
     24 #define LLVM_ANALYSIS_MUSTEXECUTE_H
     25 
     26 #include "llvm/ADT/DenseMap.h"
     27 #include "llvm/ADT/DenseSet.h"
     28 #include "llvm/Analysis/EHPersonalities.h"
     29 #include "llvm/Analysis/InstructionPrecedenceTracking.h"
     30 #include "llvm/IR/PassManager.h"
     31 #include "llvm/Support/raw_ostream.h"
     32 
     33 namespace llvm {
     34 
     35 namespace {
     36 template <typename T> using GetterTy = std::function<T *(const Function &F)>;
     37 }
     38 
     39 class BasicBlock;
     40 class DominatorTree;
     41 class Instruction;
     42 class Loop;
     43 class LoopInfo;
     44 class PostDominatorTree;
     45 
     46 /// Captures loop safety information.
     47 /// It keep information for loop blocks may throw exception or otherwise
     48 /// exit abnormally on any iteration of the loop which might actually execute
     49 /// at runtime.  The primary way to consume this information is via
     50 /// isGuaranteedToExecute below, but some callers bailout or fallback to
     51 /// alternate reasoning if a loop contains any implicit control flow.
     52 /// NOTE: LoopSafetyInfo contains cached information regarding loops and their
     53 /// particular blocks. This information is only dropped on invocation of
     54 /// computeLoopSafetyInfo. If the loop or any of its block is deleted, or if
     55 /// any thrower instructions have been added or removed from them, or if the
     56 /// control flow has changed, or in case of other meaningful modifications, the
     57 /// LoopSafetyInfo needs to be recomputed. If a meaningful modifications to the
     58 /// loop were made and the info wasn't recomputed properly, the behavior of all
     59 /// methods except for computeLoopSafetyInfo is undefined.
     60 class LoopSafetyInfo {
     61   // Used to update funclet bundle operands.
     62   DenseMap<BasicBlock *, ColorVector> BlockColors;
     63 
     64 protected:
     65   /// Computes block colors.
     66   void computeBlockColors(const Loop *CurLoop);
     67 
     68 public:
     69   /// Returns block colors map that is used to update funclet operand bundles.
     70   const DenseMap<BasicBlock *, ColorVector> &getBlockColors() const;
     71 
     72   /// Copy colors of block \p Old into the block \p New.
     73   void copyColors(BasicBlock *New, BasicBlock *Old);
     74 
     75   /// Returns true iff the block \p BB potentially may throw exception. It can
     76   /// be false-positive in cases when we want to avoid complex analysis.
     77   virtual bool blockMayThrow(const BasicBlock *BB) const = 0;
     78 
     79   /// Returns true iff any block of the loop for which this info is contains an
     80   /// instruction that may throw or otherwise exit abnormally.
     81   virtual bool anyBlockMayThrow() const = 0;
     82 
     83   /// Return true if we must reach the block \p BB under assumption that the
     84   /// loop \p CurLoop is entered.
     85   bool allLoopPathsLeadToBlock(const Loop *CurLoop, const BasicBlock *BB,
     86                                const DominatorTree *DT) const;
     87 
     88   /// Computes safety information for a loop checks loop body & header for
     89   /// the possibility of may throw exception, it takes LoopSafetyInfo and loop
     90   /// as argument. Updates safety information in LoopSafetyInfo argument.
     91   /// Note: This is defined to clear and reinitialize an already initialized
     92   /// LoopSafetyInfo.  Some callers rely on this fact.
     93   virtual void computeLoopSafetyInfo(const Loop *CurLoop) = 0;
     94 
     95   /// Returns true if the instruction in a loop is guaranteed to execute at
     96   /// least once (under the assumption that the loop is entered).
     97   virtual bool isGuaranteedToExecute(const Instruction &Inst,
     98                                      const DominatorTree *DT,
     99                                      const Loop *CurLoop) const = 0;
    100 
    101   LoopSafetyInfo() = default;
    102 
    103   virtual ~LoopSafetyInfo() = default;
    104 };
    105 
    106 
    107 /// Simple and conservative implementation of LoopSafetyInfo that can give
    108 /// false-positive answers to its queries in order to avoid complicated
    109 /// analysis.
    110 class SimpleLoopSafetyInfo: public LoopSafetyInfo {
    111   bool MayThrow = false;       // The current loop contains an instruction which
    112                                // may throw.
    113   bool HeaderMayThrow = false; // Same as previous, but specific to loop header
    114 
    115 public:
    116   bool blockMayThrow(const BasicBlock *BB) const override;
    117 
    118   bool anyBlockMayThrow() const override;
    119 
    120   void computeLoopSafetyInfo(const Loop *CurLoop) override;
    121 
    122   bool isGuaranteedToExecute(const Instruction &Inst,
    123                              const DominatorTree *DT,
    124                              const Loop *CurLoop) const override;
    125 };
    126 
    127 /// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to
    128 /// give precise answers on "may throw" queries. This implementation uses cache
    129 /// that should be invalidated by calling the methods insertInstructionTo and
    130 /// removeInstruction whenever we modify a basic block's contents by adding or
    131 /// removing instructions.
    132 class ICFLoopSafetyInfo: public LoopSafetyInfo {
    133   bool MayThrow = false;       // The current loop contains an instruction which
    134                                // may throw.
    135   // Contains information about implicit control flow in this loop's blocks.
    136   mutable ImplicitControlFlowTracking ICF;
    137   // Contains information about instruction that may possibly write memory.
    138   mutable MemoryWriteTracking MW;
    139 
    140 public:
    141   bool blockMayThrow(const BasicBlock *BB) const override;
    142 
    143   bool anyBlockMayThrow() const override;
    144 
    145   void computeLoopSafetyInfo(const Loop *CurLoop) override;
    146 
    147   bool isGuaranteedToExecute(const Instruction &Inst,
    148                              const DominatorTree *DT,
    149                              const Loop *CurLoop) const override;
    150 
    151   /// Returns true if we could not execute a memory-modifying instruction before
    152   /// we enter \p BB under assumption that \p CurLoop is entered.
    153   bool doesNotWriteMemoryBefore(const BasicBlock *BB, const Loop *CurLoop)
    154       const;
    155 
    156   /// Returns true if we could not execute a memory-modifying instruction before
    157   /// we execute \p I under assumption that \p CurLoop is entered.
    158   bool doesNotWriteMemoryBefore(const Instruction &I, const Loop *CurLoop)
    159       const;
    160 
    161   /// Inform the safety info that we are planning to insert a new instruction
    162   /// \p Inst into the basic block \p BB. It will make all cache updates to keep
    163   /// it correct after this insertion.
    164   void insertInstructionTo(const Instruction *Inst, const BasicBlock *BB);
    165 
    166   /// Inform safety info that we are planning to remove the instruction \p Inst
    167   /// from its block. It will make all cache updates to keep it correct after
    168   /// this removal.
    169   void removeInstruction(const Instruction *Inst);
    170 };
    171 
    172 bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI);
    173 
    174 struct MustBeExecutedContextExplorer;
    175 
    176 /// Enum that allows us to spell out the direction.
    177 enum class ExplorationDirection {
    178   BACKWARD = 0,
    179   FORWARD = 1,
    180 };
    181 
    182 /// Must be executed iterators visit stretches of instructions that are
    183 /// guaranteed to be executed together, potentially with other instruction
    184 /// executed in-between.
    185 ///
    186 /// Given the following code, and assuming all statements are single
    187 /// instructions which transfer execution to the successor (see
    188 /// isGuaranteedToTransferExecutionToSuccessor), there are two possible
    189 /// outcomes. If we start the iterator at A, B, or E, we will visit only A, B,
    190 /// and E. If we start at C or D, we will visit all instructions A-E.
    191 ///
    192 /// \code
    193 ///   A;
    194 ///   B;
    195 ///   if (...) {
    196 ///     C;
    197 ///     D;
    198 ///   }
    199 ///   E;
    200 /// \endcode
    201 ///
    202 ///
    203 /// Below is the example extneded with instructions F and G. Now we assume F
    204 /// might not transfer execution to it's successor G. As a result we get the
    205 /// following visit sets:
    206 ///
    207 /// Start Instruction   | Visit Set
    208 /// A                   | A, B,       E, F
    209 ///    B                | A, B,       E, F
    210 ///       C             | A, B, C, D, E, F
    211 ///          D          | A, B, C, D, E, F
    212 ///             E       | A, B,       E, F
    213 ///                F    | A, B,       E, F
    214 ///                   G | A, B,       E, F, G
    215 ///
    216 ///
    217 /// \code
    218 ///   A;
    219 ///   B;
    220 ///   if (...) {
    221 ///     C;
    222 ///     D;
    223 ///   }
    224 ///   E;
    225 ///   F;  // Might not transfer execution to its successor G.
    226 ///   G;
    227 /// \endcode
    228 ///
    229 ///
    230 /// A more complex example involving conditionals, loops, break, and continue
    231 /// is shown below. We again assume all instructions will transmit control to
    232 /// the successor and we assume we can prove the inner loop to be finite. We
    233 /// omit non-trivial branch conditions as the exploration is oblivious to them.
    234 /// Constant branches are assumed to be unconditional in the CFG. The resulting
    235 /// visist sets are shown in the table below.
    236 ///
    237 /// \code
    238 ///   A;
    239 ///   while (true) {
    240 ///     B;
    241 ///     if (...)
    242 ///       C;
    243 ///     if (...)
    244 ///       continue;
    245 ///     D;
    246 ///     if (...)
    247 ///       break;
    248 ///     do {
    249 ///       if (...)
    250 ///         continue;
    251 ///       E;
    252 ///     } while (...);
    253 ///     F;
    254 ///   }
    255 ///   G;
    256 /// \endcode
    257 ///
    258 /// Start Instruction    | Visit Set
    259 /// A                    | A, B
    260 ///    B                 | A, B
    261 ///       C              | A, B, C
    262 ///          D           | A, B,    D
    263 ///             E        | A, B,    D, E, F
    264 ///                F     | A, B,    D,    F
    265 ///                   G  | A, B,    D,       G
    266 ///
    267 ///
    268 /// Note that the examples show optimal visist sets but not necessarily the ones
    269 /// derived by the explorer depending on the available CFG analyses (see
    270 /// MustBeExecutedContextExplorer). Also note that we, depending on the options,
    271 /// the visit set can contain instructions from other functions.
    272 struct MustBeExecutedIterator {
    273   /// Type declarations that make his class an input iterator.
    274   ///{
    275   typedef const Instruction *value_type;
    276   typedef std::ptrdiff_t difference_type;
    277   typedef const Instruction **pointer;
    278   typedef const Instruction *&reference;
    279   typedef std::input_iterator_tag iterator_category;
    280   ///}
    281 
    282   using ExplorerTy = MustBeExecutedContextExplorer;
    283 
    284   MustBeExecutedIterator(const MustBeExecutedIterator &Other)
    285       : Visited(Other.Visited), Explorer(Other.Explorer),
    286         CurInst(Other.CurInst), Head(Other.Head), Tail(Other.Tail) {}
    287 
    288   MustBeExecutedIterator(MustBeExecutedIterator &&Other)
    289       : Visited(std::move(Other.Visited)), Explorer(Other.Explorer),
    290         CurInst(Other.CurInst), Head(Other.Head), Tail(Other.Tail) {}
    291 
    292   MustBeExecutedIterator &operator=(MustBeExecutedIterator &&Other) {
    293     if (this != &Other) {
    294       std::swap(Visited, Other.Visited);
    295       std::swap(CurInst, Other.CurInst);
    296       std::swap(Head, Other.Head);
    297       std::swap(Tail, Other.Tail);
    298     }
    299     return *this;
    300   }
    301 
    302   ~MustBeExecutedIterator() {}
    303 
    304   /// Pre- and post-increment operators.
    305   ///{
    306   MustBeExecutedIterator &operator++() {
    307     CurInst = advance();
    308     return *this;
    309   }
    310 
    311   MustBeExecutedIterator operator++(int) {
    312     MustBeExecutedIterator tmp(*this);
    313     operator++();
    314     return tmp;
    315   }
    316   ///}
    317 
    318   /// Equality and inequality operators. Note that we ignore the history here.
    319   ///{
    320   bool operator==(const MustBeExecutedIterator &Other) const {
    321     return CurInst == Other.CurInst && Head == Other.Head && Tail == Other.Tail;
    322   }
    323 
    324   bool operator!=(const MustBeExecutedIterator &Other) const {
    325     return !(*this == Other);
    326   }
    327   ///}
    328 
    329   /// Return the underlying instruction.
    330   const Instruction *&operator*() { return CurInst; }
    331   const Instruction *getCurrentInst() const { return CurInst; }
    332 
    333   /// Return true if \p I was encountered by this iterator already.
    334   bool count(const Instruction *I) const {
    335     return Visited.count({I, ExplorationDirection::FORWARD}) ||
    336            Visited.count({I, ExplorationDirection::BACKWARD});
    337   }
    338 
    339 private:
    340   using VisitedSetTy =
    341       DenseSet<PointerIntPair<const Instruction *, 1, ExplorationDirection>>;
    342 
    343   /// Private constructors.
    344   MustBeExecutedIterator(ExplorerTy &Explorer, const Instruction *I);
    345 
    346   /// Reset the iterator to its initial state pointing at \p I.
    347   void reset(const Instruction *I);
    348 
    349   /// Reset the iterator to point at \p I, keep cached state.
    350   void resetInstruction(const Instruction *I);
    351 
    352   /// Try to advance one of the underlying positions (Head or Tail).
    353   ///
    354   /// \return The next instruction in the must be executed context, or nullptr
    355   ///         if none was found.
    356   const Instruction *advance();
    357 
    358   /// A set to track the visited instructions in order to deal with endless
    359   /// loops and recursion.
    360   VisitedSetTy Visited;
    361 
    362   /// A reference to the explorer that created this iterator.
    363   ExplorerTy &Explorer;
    364 
    365   /// The instruction we are currently exposing to the user. There is always an
    366   /// instruction that we know is executed with the given program point,
    367   /// initially the program point itself.
    368   const Instruction *CurInst;
    369 
    370   /// Two positions that mark the program points where this iterator will look
    371   /// for the next instruction. Note that the current instruction is either the
    372   /// one pointed to by Head, Tail, or both.
    373   const Instruction *Head, *Tail;
    374 
    375   friend struct MustBeExecutedContextExplorer;
    376 };
    377 
    378 /// A "must be executed context" for a given program point PP is the set of
    379 /// instructions, potentially before and after PP, that are executed always when
    380 /// PP is reached. The MustBeExecutedContextExplorer an interface to explore
    381 /// "must be executed contexts" in a module through the use of
    382 /// MustBeExecutedIterator.
    383 ///
    384 /// The explorer exposes "must be executed iterators" that traverse the must be
    385 /// executed context. There is little information sharing between iterators as
    386 /// the expected use case involves few iterators for "far apart" instructions.
    387 /// If that changes, we should consider caching more intermediate results.
    388 struct MustBeExecutedContextExplorer {
    389 
    390   /// In the description of the parameters we use PP to denote a program point
    391   /// for which the must be executed context is explored, or put differently,
    392   /// for which the MustBeExecutedIterator is created.
    393   ///
    394   /// \param ExploreInterBlock    Flag to indicate if instructions in blocks
    395   ///                             other than the parent of PP should be
    396   ///                             explored.
    397   /// \param ExploreCFGForward    Flag to indicate if instructions located after
    398   ///                             PP in the CFG, e.g., post-dominating PP,
    399   ///                             should be explored.
    400   /// \param ExploreCFGBackward   Flag to indicate if instructions located
    401   ///                             before PP in the CFG, e.g., dominating PP,
    402   ///                             should be explored.
    403   MustBeExecutedContextExplorer(
    404       bool ExploreInterBlock, bool ExploreCFGForward, bool ExploreCFGBackward,
    405       GetterTy<const LoopInfo> LIGetter =
    406           [](const Function &) { return nullptr; },
    407       GetterTy<const DominatorTree> DTGetter =
    408           [](const Function &) { return nullptr; },
    409       GetterTy<const PostDominatorTree> PDTGetter =
    410           [](const Function &) { return nullptr; })
    411       : ExploreInterBlock(ExploreInterBlock),
    412         ExploreCFGForward(ExploreCFGForward),
    413         ExploreCFGBackward(ExploreCFGBackward), LIGetter(LIGetter),
    414         DTGetter(DTGetter), PDTGetter(PDTGetter), EndIterator(*this, nullptr) {}
    415 
    416   /// Iterator-based interface. \see MustBeExecutedIterator.
    417   ///{
    418   using iterator = MustBeExecutedIterator;
    419   using const_iterator = const MustBeExecutedIterator;
    420 
    421   /// Return an iterator to explore the context around \p PP.
    422   iterator &begin(const Instruction *PP) {
    423     auto &It = InstructionIteratorMap[PP];
    424     if (!It)
    425       It.reset(new iterator(*this, PP));
    426     return *It;
    427   }
    428 
    429   /// Return an iterator to explore the cached context around \p PP.
    430   const_iterator &begin(const Instruction *PP) const {
    431     return *InstructionIteratorMap.find(PP)->second;
    432   }
    433 
    434   /// Return an universal end iterator.
    435   ///{
    436   iterator &end() { return EndIterator; }
    437   iterator &end(const Instruction *) { return EndIterator; }
    438 
    439   const_iterator &end() const { return EndIterator; }
    440   const_iterator &end(const Instruction *) const { return EndIterator; }
    441   ///}
    442 
    443   /// Return an iterator range to explore the context around \p PP.
    444   llvm::iterator_range<iterator> range(const Instruction *PP) {
    445     return llvm::make_range(begin(PP), end(PP));
    446   }
    447 
    448   /// Return an iterator range to explore the cached context around \p PP.
    449   llvm::iterator_range<const_iterator> range(const Instruction *PP) const {
    450     return llvm::make_range(begin(PP), end(PP));
    451   }
    452   ///}
    453 
    454   /// Check \p Pred on all instructions in the context.
    455   ///
    456   /// This method will evaluate \p Pred and return
    457   /// true if \p Pred holds in every instruction.
    458   bool checkForAllContext(const Instruction *PP,
    459                           function_ref<bool(const Instruction *)> Pred) {
    460     for (auto EIt = begin(PP), EEnd = end(PP); EIt != EEnd; ++EIt)
    461       if (!Pred(*EIt))
    462         return false;
    463     return true;
    464   }
    465 
    466   /// Helper to look for \p I in the context of \p PP.
    467   ///
    468   /// The context is expanded until \p I was found or no more expansion is
    469   /// possible.
    470   ///
    471   /// \returns True, iff \p I was found.
    472   bool findInContextOf(const Instruction *I, const Instruction *PP) {
    473     auto EIt = begin(PP), EEnd = end(PP);
    474     return findInContextOf(I, EIt, EEnd);
    475   }
    476 
    477   /// Helper to look for \p I in the context defined by \p EIt and \p EEnd.
    478   ///
    479   /// The context is expanded until \p I was found or no more expansion is
    480   /// possible.
    481   ///
    482   /// \returns True, iff \p I was found.
    483   bool findInContextOf(const Instruction *I, iterator &EIt, iterator &EEnd) {
    484     bool Found = EIt.count(I);
    485     while (!Found && EIt != EEnd)
    486       Found = (++EIt).getCurrentInst() == I;
    487     return Found;
    488   }
    489 
    490   /// Return the next instruction that is guaranteed to be executed after \p PP.
    491   ///
    492   /// \param It              The iterator that is used to traverse the must be
    493   ///                        executed context.
    494   /// \param PP              The program point for which the next instruction
    495   ///                        that is guaranteed to execute is determined.
    496   const Instruction *
    497   getMustBeExecutedNextInstruction(MustBeExecutedIterator &It,
    498                                    const Instruction *PP);
    499   /// Return the previous instr. that is guaranteed to be executed before \p PP.
    500   ///
    501   /// \param It              The iterator that is used to traverse the must be
    502   ///                        executed context.
    503   /// \param PP              The program point for which the previous instr.
    504   ///                        that is guaranteed to execute is determined.
    505   const Instruction *
    506   getMustBeExecutedPrevInstruction(MustBeExecutedIterator &It,
    507                                    const Instruction *PP);
    508 
    509   /// Find the next join point from \p InitBB in forward direction.
    510   const BasicBlock *findForwardJoinPoint(const BasicBlock *InitBB);
    511 
    512   /// Find the next join point from \p InitBB in backward direction.
    513   const BasicBlock *findBackwardJoinPoint(const BasicBlock *InitBB);
    514 
    515   /// Parameter that limit the performed exploration. See the constructor for
    516   /// their meaning.
    517   ///{
    518   const bool ExploreInterBlock;
    519   const bool ExploreCFGForward;
    520   const bool ExploreCFGBackward;
    521   ///}
    522 
    523 private:
    524   /// Getters for common CFG analyses: LoopInfo, DominatorTree, and
    525   /// PostDominatorTree.
    526   ///{
    527   GetterTy<const LoopInfo> LIGetter;
    528   GetterTy<const DominatorTree> DTGetter;
    529   GetterTy<const PostDominatorTree> PDTGetter;
    530   ///}
    531 
    532   /// Map to cache isGuaranteedToTransferExecutionToSuccessor results.
    533   DenseMap<const BasicBlock *, Optional<bool>> BlockTransferMap;
    534 
    535   /// Map to cache containsIrreducibleCFG results.
    536   DenseMap<const Function*, Optional<bool>> IrreducibleControlMap;
    537 
    538   /// Map from instructions to associated must be executed iterators.
    539   DenseMap<const Instruction *, std::unique_ptr<MustBeExecutedIterator>>
    540       InstructionIteratorMap;
    541 
    542   /// A unique end iterator.
    543   MustBeExecutedIterator EndIterator;
    544 };
    545 
    546 class MustExecutePrinterPass : public PassInfoMixin<MustExecutePrinterPass> {
    547   raw_ostream &OS;
    548 
    549 public:
    550   MustExecutePrinterPass(raw_ostream &OS) : OS(OS) {}
    551   PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
    552 };
    553 
    554 class MustBeExecutedContextPrinterPass
    555     : public PassInfoMixin<MustBeExecutedContextPrinterPass> {
    556   raw_ostream &OS;
    557 
    558 public:
    559   MustBeExecutedContextPrinterPass(raw_ostream &OS) : OS(OS) {}
    560   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    561 };
    562 
    563 } // namespace llvm
    564 
    565 #endif
    566