Home | History | Annotate | Line # | Download | only in IPO
      1 //===- IROutliner.h - Extract similar IR regions into functions ------------==//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // \file
     10 // The interface file for the IROutliner which is used by the IROutliner Pass.
     11 //
     12 // The outliner uses the IRSimilarityIdentifier to identify the similar regions
     13 // of code.  It evaluates each set of IRSimilarityCandidates with an estimate of
     14 // whether it will provide code size reduction.  Each region is extracted using
     15 // the code extractor.  These extracted functions are consolidated into a single
     16 // function and called from the extracted call site.
     17 //
     18 // For example:
     19 // \code
     20 //   %1 = add i32 %a, %b
     21 //   %2 = add i32 %b, %a
     22 //   %3 = add i32 %b, %a
     23 //   %4 = add i32 %a, %b
     24 // \endcode
     25 // would become function
     26 // \code
     27 // define internal void outlined_ir_function(i32 %0, i32 %1) {
     28 //   %1 = add i32 %0, %1
     29 //   %2 = add i32 %1, %0
     30 //   ret void
     31 // }
     32 // \endcode
     33 // with calls:
     34 // \code
     35 //   call void outlined_ir_function(i32 %a, i32 %b)
     36 //   call void outlined_ir_function(i32 %b, i32 %a)
     37 // \endcode
     38 //
     39 //===----------------------------------------------------------------------===//
     40 
     41 #ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
     42 #define LLVM_TRANSFORMS_IPO_IROUTLINER_H
     43 
     44 #include "llvm/Analysis/IRSimilarityIdentifier.h"
     45 #include "llvm/IR/PassManager.h"
     46 #include "llvm/IR/ValueMap.h"
     47 #include "llvm/Support/InstructionCost.h"
     48 #include "llvm/Transforms/Utils/CodeExtractor.h"
     49 #include <set>
     50 
     51 struct OutlinableGroup;
     52 
     53 namespace llvm {
     54 using namespace IRSimilarity;
     55 
     56 class Module;
     57 class TargetTransformInfo;
     58 class OptimizationRemarkEmitter;
     59 
     60 /// The OutlinableRegion holds all the information for a specific region, or
     61 /// sequence of instructions. This includes what values need to be hoisted to
     62 /// arguments from the extracted function, inputs and outputs to the region, and
     63 /// mapping from the extracted function arguments to overall function arguments.
     64 struct OutlinableRegion {
     65   /// Describes the region of code.
     66   IRSimilarityCandidate *Candidate;
     67 
     68   /// If this region is outlined, the front and back IRInstructionData could
     69   /// potentially become invalidated if the only new instruction is a call.
     70   /// This ensures that we replace in the instruction in the IRInstructionData.
     71   IRInstructionData *NewFront = nullptr;
     72   IRInstructionData *NewBack = nullptr;
     73 
     74   /// The number of extracted inputs from the CodeExtractor.
     75   unsigned NumExtractedInputs;
     76 
     77   /// The corresponding BasicBlock with the appropriate stores for this
     78   /// OutlinableRegion in the overall function.
     79   unsigned OutputBlockNum;
     80 
     81   /// Mapping the extracted argument number to the argument number in the
     82   /// overall function.  Since there will be inputs, such as elevated constants
     83   /// that are not the same in each region in a SimilarityGroup, or values that
     84   /// cannot be sunk into the extracted section in every region, we must keep
     85   /// track of which extracted argument maps to which overall argument.
     86   DenseMap<unsigned, unsigned> ExtractedArgToAgg;
     87   DenseMap<unsigned, unsigned> AggArgToExtracted;
     88 
     89   /// Mapping of the argument number in the deduplicated function
     90   /// to a given constant, which is used when creating the arguments to the call
     91   /// to the newly created deduplicated function.  This is handled separately
     92   /// since the CodeExtractor does not recognize constants.
     93   DenseMap<unsigned, Constant *> AggArgToConstant;
     94 
     95   /// The global value numbers that are used as outputs for this section. Once
     96   /// extracted, each output will be stored to an output register.  This
     97   /// documents the global value numbers that are used in this pattern.
     98   SmallVector<unsigned, 4> GVNStores;
     99 
    100   /// Used to create an outlined function.
    101   CodeExtractor *CE = nullptr;
    102 
    103   /// The call site of the extracted region.
    104   CallInst *Call = nullptr;
    105 
    106   /// The function for the extracted region.
    107   Function *ExtractedFunction = nullptr;
    108 
    109   /// Flag for whether we have split out the IRSimilarityCanidate. That is,
    110   /// make the region contained the IRSimilarityCandidate its own BasicBlock.
    111   bool CandidateSplit = false;
    112 
    113   /// Flag for whether we should not consider this region for extraction.
    114   bool IgnoreRegion = false;
    115 
    116   /// The BasicBlock that is before the start of the region BasicBlock,
    117   /// only defined when the region has been split.
    118   BasicBlock *PrevBB = nullptr;
    119 
    120   /// The BasicBlock that contains the starting instruction of the region.
    121   BasicBlock *StartBB = nullptr;
    122 
    123   /// The BasicBlock that contains the ending instruction of the region.
    124   BasicBlock *EndBB = nullptr;
    125 
    126   /// The BasicBlock that is after the start of the region BasicBlock,
    127   /// only defined when the region has been split.
    128   BasicBlock *FollowBB = nullptr;
    129 
    130   /// The Outlinable Group that contains this region and structurally similar
    131   /// regions to this region.
    132   OutlinableGroup *Parent = nullptr;
    133 
    134   OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
    135       : Candidate(&C), Parent(&Group) {
    136     StartBB = C.getStartBB();
    137     EndBB = C.getEndBB();
    138   }
    139 
    140   /// For the contained region, split the parent BasicBlock at the starting and
    141   /// ending instructions of the contained IRSimilarityCandidate.
    142   void splitCandidate();
    143 
    144   /// For the contained region, reattach the BasicBlock at the starting and
    145   /// ending instructions of the contained IRSimilarityCandidate, or if the
    146   /// function has been extracted, the start and end of the BasicBlock
    147   /// containing the called function.
    148   void reattachCandidate();
    149 
    150   /// Get the size of the code removed from the region.
    151   ///
    152   /// \param [in] TTI - The TargetTransformInfo for the parent function.
    153   /// \returns the code size of the region
    154   InstructionCost getBenefit(TargetTransformInfo &TTI);
    155 };
    156 
    157 /// This class is a pass that identifies similarity in a Module, extracts
    158 /// instances of the similarity, and then consolidating the similar regions
    159 /// in an effort to reduce code size.  It uses the IRSimilarityIdentifier pass
    160 /// to identify the similar regions of code, and then extracts the similar
    161 /// sections into a single function.  See the above for an example as to
    162 /// how code is extracted and consolidated into a single function.
    163 class IROutliner {
    164 public:
    165   IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
    166              function_ref<IRSimilarityIdentifier &(Module &)> GIRSI,
    167              function_ref<OptimizationRemarkEmitter &(Function &)> GORE)
    168       : getTTI(GTTI), getIRSI(GIRSI), getORE(GORE) {}
    169   bool run(Module &M);
    170 
    171 private:
    172   /// Find repeated similar code sequences in \p M and outline them into new
    173   /// Functions.
    174   ///
    175   /// \param [in] M - The module to outline from.
    176   /// \returns The number of Functions created.
    177   unsigned doOutline(Module &M);
    178 
    179   /// Remove all the IRSimilarityCandidates from \p CandidateVec that have
    180   /// instructions contained in a previously outlined region and put the
    181   /// remaining regions in \p CurrentGroup.
    182   ///
    183   /// \param [in] CandidateVec - List of similarity candidates for regions with
    184   /// the same similarity structure.
    185   /// \param [in,out] CurrentGroup - Contains the potential sections to
    186   /// be outlined.
    187   void
    188   pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
    189                            OutlinableGroup &CurrentGroup);
    190 
    191   /// Create the function based on the overall types found in the current
    192   /// regions being outlined.
    193   ///
    194   /// \param M - The module to outline from.
    195   /// \param [in,out] CG - The OutlinableGroup for the regions to be outlined.
    196   /// \param [in] FunctionNameSuffix - How many functions have we previously
    197   /// created.
    198   /// \returns the newly created function.
    199   Function *createFunction(Module &M, OutlinableGroup &CG,
    200                            unsigned FunctionNameSuffix);
    201 
    202   /// Identify the needed extracted inputs in a section, and add to the overall
    203   /// function if needed.
    204   ///
    205   /// \param [in] M - The module to outline from.
    206   /// \param [in,out] Region - The region to be extracted.
    207   /// \param [in] NotSame - The global value numbers of the Values in the region
    208   /// that do not have the same Constant in each strucutrally similar region.
    209   void findAddInputsOutputs(Module &M, OutlinableRegion &Region,
    210                             DenseSet<unsigned> &NotSame);
    211 
    212   /// Find the number of instructions that will be removed by extracting the
    213   /// OutlinableRegions in \p CurrentGroup.
    214   ///
    215   /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
    216   /// analyzed.
    217   /// \returns the number of outlined instructions across all regions.
    218   InstructionCost findBenefitFromAllRegions(OutlinableGroup &CurrentGroup);
    219 
    220   /// Find the number of instructions that will be added by reloading arguments.
    221   ///
    222   /// \param [in] CurrentGroup - The collection of OutlinableRegions to be
    223   /// analyzed.
    224   /// \returns the number of added reload instructions across all regions.
    225   InstructionCost findCostOutputReloads(OutlinableGroup &CurrentGroup);
    226 
    227   /// Find the cost and the benefit of \p CurrentGroup and save it back to
    228   /// \p CurrentGroup.
    229   ///
    230   /// \param [in] M - The module being analyzed
    231   /// \param [in,out] CurrentGroup - The overall outlined section
    232   void findCostBenefit(Module &M, OutlinableGroup &CurrentGroup);
    233 
    234   /// Update the output mapping based on the load instruction, and the outputs
    235   /// of the extracted function.
    236   ///
    237   /// \param Region - The region extracted
    238   /// \param Outputs - The outputs from the extracted function.
    239   /// \param LI - The load instruction used to update the mapping.
    240   void updateOutputMapping(OutlinableRegion &Region,
    241                            ArrayRef<Value *> Outputs, LoadInst *LI);
    242 
    243   /// Extract \p Region into its own function.
    244   ///
    245   /// \param [in] Region - The region to be extracted into its own function.
    246   /// \returns True if it was successfully outlined.
    247   bool extractSection(OutlinableRegion &Region);
    248 
    249   /// For the similarities found, and the extracted sections, create a single
    250   /// outlined function with appropriate output blocks as necessary.
    251   ///
    252   /// \param [in] M - The module to outline from
    253   /// \param [in] CurrentGroup - The set of extracted sections to consolidate.
    254   /// \param [in,out] FuncsToRemove - List of functions to remove from the
    255   /// module after outlining is completed.
    256   /// \param [in,out] OutlinedFunctionNum - the number of new outlined
    257   /// functions.
    258   void deduplicateExtractedSections(Module &M, OutlinableGroup &CurrentGroup,
    259                                     std::vector<Function *> &FuncsToRemove,
    260                                     unsigned &OutlinedFunctionNum);
    261 
    262   /// If true, enables us to outline from functions that have LinkOnceFromODR
    263   /// linkages.
    264   bool OutlineFromLinkODRs = false;
    265 
    266   /// If false, we do not worry if the cost is greater than the benefit.  This
    267   /// is for debugging and testing, so that we can test small cases to ensure
    268   /// that the outlining is being done correctly.
    269   bool CostModel = true;
    270 
    271   /// The set of outlined Instructions, identified by their location in the
    272   /// sequential ordering of instructions in a Module.
    273   DenseSet<unsigned> Outlined;
    274 
    275   /// TargetTransformInfo lambda for target specific information.
    276   function_ref<TargetTransformInfo &(Function &)> getTTI;
    277 
    278   /// A mapping from newly created reloaded output values to the original value.
    279   /// If an value is replace by an output from an outlined region, this maps
    280   /// that Value, back to its original Value.
    281   DenseMap<Value *, Value *> OutputMappings;
    282 
    283   /// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
    284   function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
    285 
    286   /// The optimization remark emitter for the pass.
    287   function_ref<OptimizationRemarkEmitter &(Function &)> getORE;
    288 
    289   /// The memory allocator used to allocate the CodeExtractors.
    290   SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
    291 
    292   /// The memory allocator used to allocate the OutlinableRegions.
    293   SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
    294 
    295   /// The memory allocator used to allocate new IRInstructionData.
    296   SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
    297 
    298   /// Custom InstVisitor to classify different instructions for whether it can
    299   /// be analyzed for similarity.  This is needed as there may be instruction we
    300   /// can identify as having similarity, but are more complicated to outline.
    301   struct InstructionAllowed : public InstVisitor<InstructionAllowed, bool> {
    302     InstructionAllowed() {}
    303 
    304     // TODO: Determine a scheme to resolve when the label is similar enough.
    305     bool visitBranchInst(BranchInst &BI) { return false; }
    306     // TODO: Determine a scheme to resolve when the labels are similar enough.
    307     bool visitPHINode(PHINode &PN) { return false; }
    308     // TODO: Handle allocas.
    309     bool visitAllocaInst(AllocaInst &AI) { return false; }
    310     // VAArg instructions are not allowed since this could cause difficulty when
    311     // differentiating between different sets of variable instructions in
    312     // the deduplicated outlined regions.
    313     bool visitVAArgInst(VAArgInst &VI) { return false; }
    314     // We exclude all exception handling cases since they are so context
    315     // dependent.
    316     bool visitLandingPadInst(LandingPadInst &LPI) { return false; }
    317     bool visitFuncletPadInst(FuncletPadInst &FPI) { return false; }
    318     // DebugInfo should be included in the regions, but should not be
    319     // analyzed for similarity as it has no bearing on the outcome of the
    320     // program.
    321     bool visitDbgInfoIntrinsic(DbgInfoIntrinsic &DII) { return true; }
    322     // TODO: Handle specific intrinsics individually from those that can be
    323     // handled.
    324     bool IntrinsicInst(IntrinsicInst &II) { return false; }
    325     // We only handle CallInsts that are not indirect, since we cannot guarantee
    326     // that they have a name in these cases.
    327     bool visitCallInst(CallInst &CI) {
    328       Function *F = CI.getCalledFunction();
    329       if (!F || CI.isIndirectCall() || !F->hasName())
    330         return false;
    331       return true;
    332     }
    333     // TODO: Handle FreezeInsts.  Since a frozen value could be frozen inside
    334     // the outlined region, and then returned as an output, this will have to be
    335     // handled differently.
    336     bool visitFreezeInst(FreezeInst &CI) { return false; }
    337     // TODO: We do not current handle similarity that changes the control flow.
    338     bool visitInvokeInst(InvokeInst &II) { return false; }
    339     // TODO: We do not current handle similarity that changes the control flow.
    340     bool visitCallBrInst(CallBrInst &CBI) { return false; }
    341     // TODO: Handle interblock similarity.
    342     bool visitTerminator(Instruction &I) { return false; }
    343     bool visitInstruction(Instruction &I) { return true; }
    344   };
    345 
    346   /// A InstVisitor used to exclude certain instructions from being outlined.
    347   InstructionAllowed InstructionClassifier;
    348 };
    349 
    350 /// Pass to outline similar regions.
    351 class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
    352 public:
    353   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
    354 };
    355 
    356 } // end namespace llvm
    357 
    358 #endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H
    359