Home | History | Annotate | Line # | Download | only in Utils
      1 //===- Transform/Utils/CodeExtractor.h - Code extraction util ---*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // A utility to support extracting code from one function into its own
     10 // stand-alone function.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #ifndef LLVM_TRANSFORMS_UTILS_CODEEXTRACTOR_H
     15 #define LLVM_TRANSFORMS_UTILS_CODEEXTRACTOR_H
     16 
     17 #include "llvm/ADT/ArrayRef.h"
     18 #include "llvm/ADT/DenseMap.h"
     19 #include "llvm/ADT/SetVector.h"
     20 #include "llvm/ADT/SmallPtrSet.h"
     21 #include <limits>
     22 
     23 namespace llvm {
     24 
     25 class AllocaInst;
     26 class BasicBlock;
     27 class BlockFrequency;
     28 class BlockFrequencyInfo;
     29 class BranchProbabilityInfo;
     30 class AssumptionCache;
     31 class CallInst;
     32 class DominatorTree;
     33 class Function;
     34 class Instruction;
     35 class Loop;
     36 class Module;
     37 class Type;
     38 class Value;
     39 
     40 /// A cache for the CodeExtractor analysis. The operation \ref
     41 /// CodeExtractor::extractCodeRegion is guaranteed not to invalidate this
     42 /// object. This object should conservatively be considered invalid if any
     43 /// other mutating operations on the IR occur.
     44 ///
     45 /// Constructing this object is O(n) in the size of the function.
     46 class CodeExtractorAnalysisCache {
     47   /// The allocas in the function.
     48   SmallVector<AllocaInst *, 16> Allocas;
     49 
     50   /// Base memory addresses of load/store instructions, grouped by block.
     51   DenseMap<BasicBlock *, DenseSet<Value *>> BaseMemAddrs;
     52 
     53   /// Blocks which contain instructions which may have unknown side-effects
     54   /// on memory.
     55   DenseSet<BasicBlock *> SideEffectingBlocks;
     56 
     57   void findSideEffectInfoForBlock(BasicBlock &BB);
     58 
     59 public:
     60   CodeExtractorAnalysisCache(Function &F);
     61 
     62   /// Get the allocas in the function at the time the analysis was created.
     63   /// Note that some of these allocas may no longer be present in the function,
     64   /// due to \ref CodeExtractor::extractCodeRegion.
     65   ArrayRef<AllocaInst *> getAllocas() const { return Allocas; }
     66 
     67   /// Check whether \p BB contains an instruction thought to load from, store
     68   /// to, or otherwise clobber the alloca \p Addr.
     69   bool doesBlockContainClobberOfAddr(BasicBlock &BB, AllocaInst *Addr) const;
     70 };
     71 
     72   /// Utility class for extracting code into a new function.
     73   ///
     74   /// This utility provides a simple interface for extracting some sequence of
     75   /// code into its own function, replacing it with a call to that function. It
     76   /// also provides various methods to query about the nature and result of
     77   /// such a transformation.
     78   ///
     79   /// The rough algorithm used is:
     80   /// 1) Find both the inputs and outputs for the extracted region.
     81   /// 2) Pass the inputs as arguments, remapping them within the extracted
     82   ///    function to arguments.
     83   /// 3) Add allocas for any scalar outputs, adding all of the outputs' allocas
     84   ///    as arguments, and inserting stores to the arguments for any scalars.
     85   class CodeExtractor {
     86     using ValueSet = SetVector<Value *>;
     87 
     88     // Various bits of state computed on construction.
     89     DominatorTree *const DT;
     90     const bool AggregateArgs;
     91     BlockFrequencyInfo *BFI;
     92     BranchProbabilityInfo *BPI;
     93     AssumptionCache *AC;
     94 
     95     // If true, varargs functions can be extracted.
     96     bool AllowVarArgs;
     97 
     98     // Bits of intermediate state computed at various phases of extraction.
     99     SetVector<BasicBlock *> Blocks;
    100     unsigned NumExitBlocks = std::numeric_limits<unsigned>::max();
    101     Type *RetTy;
    102 
    103     // Suffix to use when creating extracted function (appended to the original
    104     // function name + "."). If empty, the default is to use the entry block
    105     // label, if non-empty, otherwise "extracted".
    106     std::string Suffix;
    107 
    108   public:
    109     /// Create a code extractor for a sequence of blocks.
    110     ///
    111     /// Given a sequence of basic blocks where the first block in the sequence
    112     /// dominates the rest, prepare a code extractor object for pulling this
    113     /// sequence out into its new function. When a DominatorTree is also given,
    114     /// extra checking and transformations are enabled. If AllowVarArgs is true,
    115     /// vararg functions can be extracted. This is safe, if all vararg handling
    116     /// code is extracted, including vastart. If AllowAlloca is true, then
    117     /// extraction of blocks containing alloca instructions would be possible,
    118     /// however code extractor won't validate whether extraction is legal.
    119     CodeExtractor(ArrayRef<BasicBlock *> BBs, DominatorTree *DT = nullptr,
    120                   bool AggregateArgs = false, BlockFrequencyInfo *BFI = nullptr,
    121                   BranchProbabilityInfo *BPI = nullptr,
    122                   AssumptionCache *AC = nullptr,
    123                   bool AllowVarArgs = false, bool AllowAlloca = false,
    124                   std::string Suffix = "");
    125 
    126     /// Create a code extractor for a loop body.
    127     ///
    128     /// Behaves just like the generic code sequence constructor, but uses the
    129     /// block sequence of the loop.
    130     CodeExtractor(DominatorTree &DT, Loop &L, bool AggregateArgs = false,
    131                   BlockFrequencyInfo *BFI = nullptr,
    132                   BranchProbabilityInfo *BPI = nullptr,
    133                   AssumptionCache *AC = nullptr,
    134                   std::string Suffix = "");
    135 
    136     /// Perform the extraction, returning the new function.
    137     ///
    138     /// Returns zero when called on a CodeExtractor instance where isEligible
    139     /// returns false.
    140     Function *extractCodeRegion(const CodeExtractorAnalysisCache &CEAC);
    141 
    142     /// Verify that assumption cache isn't stale after a region is extracted.
    143     /// Returns true when verifier finds errors. AssumptionCache is passed as
    144     /// parameter to make this function stateless.
    145     static bool verifyAssumptionCache(const Function &OldFunc,
    146                                       const Function &NewFunc,
    147                                       AssumptionCache *AC);
    148 
    149     /// Test whether this code extractor is eligible.
    150     ///
    151     /// Based on the blocks used when constructing the code extractor,
    152     /// determine whether it is eligible for extraction.
    153     ///
    154     /// Checks that varargs handling (with vastart and vaend) is only done in
    155     /// the outlined blocks.
    156     bool isEligible() const;
    157 
    158     /// Compute the set of input values and output values for the code.
    159     ///
    160     /// These can be used either when performing the extraction or to evaluate
    161     /// the expected size of a call to the extracted function. Note that this
    162     /// work cannot be cached between the two as once we decide to extract
    163     /// a code sequence, that sequence is modified, including changing these
    164     /// sets, before extraction occurs. These modifications won't have any
    165     /// significant impact on the cost however.
    166     void findInputsOutputs(ValueSet &Inputs, ValueSet &Outputs,
    167                            const ValueSet &Allocas) const;
    168 
    169     /// Check if life time marker nodes can be hoisted/sunk into the outline
    170     /// region.
    171     ///
    172     /// Returns true if it is safe to do the code motion.
    173     bool
    174     isLegalToShrinkwrapLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
    175                                        Instruction *AllocaAddr) const;
    176 
    177     /// Find the set of allocas whose life ranges are contained within the
    178     /// outlined region.
    179     ///
    180     /// Allocas which have life_time markers contained in the outlined region
    181     /// should be pushed to the outlined function. The address bitcasts that
    182     /// are used by the lifetime markers are also candidates for shrink-
    183     /// wrapping. The instructions that need to be sunk are collected in
    184     /// 'Allocas'.
    185     void findAllocas(const CodeExtractorAnalysisCache &CEAC,
    186                      ValueSet &SinkCands, ValueSet &HoistCands,
    187                      BasicBlock *&ExitBlock) const;
    188 
    189     /// Find or create a block within the outline region for placing hoisted
    190     /// code.
    191     ///
    192     /// CommonExitBlock is block outside the outline region. It is the common
    193     /// successor of blocks inside the region. If there exists a single block
    194     /// inside the region that is the predecessor of CommonExitBlock, that block
    195     /// will be returned. Otherwise CommonExitBlock will be split and the
    196     /// original block will be added to the outline region.
    197     BasicBlock *findOrCreateBlockForHoisting(BasicBlock *CommonExitBlock);
    198 
    199   private:
    200     struct LifetimeMarkerInfo {
    201       bool SinkLifeStart = false;
    202       bool HoistLifeEnd = false;
    203       Instruction *LifeStart = nullptr;
    204       Instruction *LifeEnd = nullptr;
    205     };
    206 
    207     LifetimeMarkerInfo
    208     getLifetimeMarkers(const CodeExtractorAnalysisCache &CEAC,
    209                        Instruction *Addr, BasicBlock *ExitBlock) const;
    210 
    211     void severSplitPHINodesOfEntry(BasicBlock *&Header);
    212     void severSplitPHINodesOfExits(const SmallPtrSetImpl<BasicBlock *> &Exits);
    213     void splitReturnBlocks();
    214 
    215     Function *constructFunction(const ValueSet &inputs,
    216                                 const ValueSet &outputs,
    217                                 BasicBlock *header,
    218                                 BasicBlock *newRootNode, BasicBlock *newHeader,
    219                                 Function *oldFunction, Module *M);
    220 
    221     void moveCodeToFunction(Function *newFunction);
    222 
    223     void calculateNewCallTerminatorWeights(
    224         BasicBlock *CodeReplacer,
    225         DenseMap<BasicBlock *, BlockFrequency> &ExitWeights,
    226         BranchProbabilityInfo *BPI);
    227 
    228     CallInst *emitCallAndSwitchStatement(Function *newFunction,
    229                                          BasicBlock *newHeader,
    230                                          ValueSet &inputs, ValueSet &outputs);
    231   };
    232 
    233 } // end namespace llvm
    234 
    235 #endif // LLVM_TRANSFORMS_UTILS_CODEEXTRACTOR_H
    236