Home | History | Annotate | Line # | Download | only in Vectorize
      1 //===- LoopVectorizationPlanner.h - Planner for LoopVectorization ---------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 ///
      9 /// \file
     10 /// This file provides a LoopVectorizationPlanner class.
     11 /// InnerLoopVectorizer vectorizes loops which contain only one basic
     12 /// LoopVectorizationPlanner - drives the vectorization process after having
     13 /// passed Legality checks.
     14 /// The planner builds and optimizes the Vectorization Plans which record the
     15 /// decisions how to vectorize the given loop. In particular, represent the
     16 /// control-flow of the vectorized version, the replication of instructions that
     17 /// are to be scalarized, and interleave access groups.
     18 ///
     19 /// Also provides a VPlan-based builder utility analogous to IRBuilder.
     20 /// It provides an instruction-level API for generating VPInstructions while
     21 /// abstracting away the Recipe manipulation details.
     22 //===----------------------------------------------------------------------===//
     23 
     24 #ifndef LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
     25 #define LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
     26 
     27 #include "VPlan.h"
     28 
     29 namespace llvm {
     30 
     31 class LoopInfo;
     32 class LoopVectorizationLegality;
     33 class LoopVectorizationCostModel;
     34 class PredicatedScalarEvolution;
     35 class LoopVectorizationRequirements;
     36 class LoopVectorizeHints;
     37 class OptimizationRemarkEmitter;
     38 class TargetTransformInfo;
     39 class TargetLibraryInfo;
     40 class VPRecipeBuilder;
     41 
     42 /// VPlan-based builder utility analogous to IRBuilder.
     43 class VPBuilder {
     44   VPBasicBlock *BB = nullptr;
     45   VPBasicBlock::iterator InsertPt = VPBasicBlock::iterator();
     46 
     47   VPInstruction *createInstruction(unsigned Opcode,
     48                                    ArrayRef<VPValue *> Operands) {
     49     VPInstruction *Instr = new VPInstruction(Opcode, Operands);
     50     if (BB)
     51       BB->insert(Instr, InsertPt);
     52     return Instr;
     53   }
     54 
     55   VPInstruction *createInstruction(unsigned Opcode,
     56                                    std::initializer_list<VPValue *> Operands) {
     57     return createInstruction(Opcode, ArrayRef<VPValue *>(Operands));
     58   }
     59 
     60 public:
     61   VPBuilder() {}
     62 
     63   /// Clear the insertion point: created instructions will not be inserted into
     64   /// a block.
     65   void clearInsertionPoint() {
     66     BB = nullptr;
     67     InsertPt = VPBasicBlock::iterator();
     68   }
     69 
     70   VPBasicBlock *getInsertBlock() const { return BB; }
     71   VPBasicBlock::iterator getInsertPoint() const { return InsertPt; }
     72 
     73   /// InsertPoint - A saved insertion point.
     74   class VPInsertPoint {
     75     VPBasicBlock *Block = nullptr;
     76     VPBasicBlock::iterator Point;
     77 
     78   public:
     79     /// Creates a new insertion point which doesn't point to anything.
     80     VPInsertPoint() = default;
     81 
     82     /// Creates a new insertion point at the given location.
     83     VPInsertPoint(VPBasicBlock *InsertBlock, VPBasicBlock::iterator InsertPoint)
     84         : Block(InsertBlock), Point(InsertPoint) {}
     85 
     86     /// Returns true if this insert point is set.
     87     bool isSet() const { return Block != nullptr; }
     88 
     89     VPBasicBlock *getBlock() const { return Block; }
     90     VPBasicBlock::iterator getPoint() const { return Point; }
     91   };
     92 
     93   /// Sets the current insert point to a previously-saved location.
     94   void restoreIP(VPInsertPoint IP) {
     95     if (IP.isSet())
     96       setInsertPoint(IP.getBlock(), IP.getPoint());
     97     else
     98       clearInsertionPoint();
     99   }
    100 
    101   /// This specifies that created VPInstructions should be appended to the end
    102   /// of the specified block.
    103   void setInsertPoint(VPBasicBlock *TheBB) {
    104     assert(TheBB && "Attempting to set a null insert point");
    105     BB = TheBB;
    106     InsertPt = BB->end();
    107   }
    108 
    109   /// This specifies that created instructions should be inserted at the
    110   /// specified point.
    111   void setInsertPoint(VPBasicBlock *TheBB, VPBasicBlock::iterator IP) {
    112     BB = TheBB;
    113     InsertPt = IP;
    114   }
    115 
    116   /// Insert and return the specified instruction.
    117   VPInstruction *insert(VPInstruction *I) const {
    118     BB->insert(I, InsertPt);
    119     return I;
    120   }
    121 
    122   /// Create an N-ary operation with \p Opcode, \p Operands and set \p Inst as
    123   /// its underlying Instruction.
    124   VPValue *createNaryOp(unsigned Opcode, ArrayRef<VPValue *> Operands,
    125                         Instruction *Inst = nullptr) {
    126     VPInstruction *NewVPInst = createInstruction(Opcode, Operands);
    127     NewVPInst->setUnderlyingValue(Inst);
    128     return NewVPInst;
    129   }
    130   VPValue *createNaryOp(unsigned Opcode,
    131                         std::initializer_list<VPValue *> Operands,
    132                         Instruction *Inst = nullptr) {
    133     return createNaryOp(Opcode, ArrayRef<VPValue *>(Operands), Inst);
    134   }
    135 
    136   VPValue *createNot(VPValue *Operand) {
    137     return createInstruction(VPInstruction::Not, {Operand});
    138   }
    139 
    140   VPValue *createAnd(VPValue *LHS, VPValue *RHS) {
    141     return createInstruction(Instruction::BinaryOps::And, {LHS, RHS});
    142   }
    143 
    144   VPValue *createOr(VPValue *LHS, VPValue *RHS) {
    145     return createInstruction(Instruction::BinaryOps::Or, {LHS, RHS});
    146   }
    147 
    148   VPValue *createSelect(VPValue *Cond, VPValue *TrueVal, VPValue *FalseVal) {
    149     return createNaryOp(Instruction::Select, {Cond, TrueVal, FalseVal});
    150   }
    151 
    152   //===--------------------------------------------------------------------===//
    153   // RAII helpers.
    154   //===--------------------------------------------------------------------===//
    155 
    156   /// RAII object that stores the current insertion point and restores it when
    157   /// the object is destroyed.
    158   class InsertPointGuard {
    159     VPBuilder &Builder;
    160     VPBasicBlock *Block;
    161     VPBasicBlock::iterator Point;
    162 
    163   public:
    164     InsertPointGuard(VPBuilder &B)
    165         : Builder(B), Block(B.getInsertBlock()), Point(B.getInsertPoint()) {}
    166 
    167     InsertPointGuard(const InsertPointGuard &) = delete;
    168     InsertPointGuard &operator=(const InsertPointGuard &) = delete;
    169 
    170     ~InsertPointGuard() { Builder.restoreIP(VPInsertPoint(Block, Point)); }
    171   };
    172 };
    173 
    174 /// TODO: The following VectorizationFactor was pulled out of
    175 /// LoopVectorizationCostModel class. LV also deals with
    176 /// VectorizerParams::VectorizationFactor and VectorizationCostTy.
    177 /// We need to streamline them.
    178 
    179 /// Information about vectorization costs
    180 struct VectorizationFactor {
    181   // Vector width with best cost
    182   ElementCount Width;
    183   // Cost of the loop with that width
    184   InstructionCost Cost;
    185 
    186   VectorizationFactor(ElementCount Width, InstructionCost Cost)
    187       : Width(Width), Cost(Cost) {}
    188 
    189   // Width 1 means no vectorization, cost 0 means uncomputed cost.
    190   static VectorizationFactor Disabled() {
    191     return {ElementCount::getFixed(1), 0};
    192   }
    193 
    194   bool operator==(const VectorizationFactor &rhs) const {
    195     return Width == rhs.Width && Cost == rhs.Cost;
    196   }
    197 
    198   bool operator!=(const VectorizationFactor &rhs) const {
    199     return !(*this == rhs);
    200   }
    201 };
    202 
    203 /// A class that represents two vectorization factors (initialized with 0 by
    204 /// default). One for fixed-width vectorization and one for scalable
    205 /// vectorization. This can be used by the vectorizer to choose from a range of
    206 /// fixed and/or scalable VFs in order to find the most cost-effective VF to
    207 /// vectorize with.
    208 struct FixedScalableVFPair {
    209   ElementCount FixedVF;
    210   ElementCount ScalableVF;
    211 
    212   FixedScalableVFPair()
    213       : FixedVF(ElementCount::getFixed(0)),
    214         ScalableVF(ElementCount::getScalable(0)) {}
    215   FixedScalableVFPair(const ElementCount &Max) : FixedScalableVFPair() {
    216     *(Max.isScalable() ? &ScalableVF : &FixedVF) = Max;
    217   }
    218   FixedScalableVFPair(const ElementCount &FixedVF,
    219                       const ElementCount &ScalableVF)
    220       : FixedVF(FixedVF), ScalableVF(ScalableVF) {
    221     assert(!FixedVF.isScalable() && ScalableVF.isScalable() &&
    222            "Invalid scalable properties");
    223   }
    224 
    225   static FixedScalableVFPair getNone() { return FixedScalableVFPair(); }
    226 
    227   /// \return true if either fixed- or scalable VF is non-zero.
    228   explicit operator bool() const { return FixedVF || ScalableVF; }
    229 
    230   /// \return true if either fixed- or scalable VF is a valid vector VF.
    231   bool hasVector() const { return FixedVF.isVector() || ScalableVF.isVector(); }
    232 };
    233 
    234 /// Planner drives the vectorization process after having passed
    235 /// Legality checks.
    236 class LoopVectorizationPlanner {
    237   /// The loop that we evaluate.
    238   Loop *OrigLoop;
    239 
    240   /// Loop Info analysis.
    241   LoopInfo *LI;
    242 
    243   /// Target Library Info.
    244   const TargetLibraryInfo *TLI;
    245 
    246   /// Target Transform Info.
    247   const TargetTransformInfo *TTI;
    248 
    249   /// The legality analysis.
    250   LoopVectorizationLegality *Legal;
    251 
    252   /// The profitability analysis.
    253   LoopVectorizationCostModel &CM;
    254 
    255   /// The interleaved access analysis.
    256   InterleavedAccessInfo &IAI;
    257 
    258   PredicatedScalarEvolution &PSE;
    259 
    260   const LoopVectorizeHints &Hints;
    261 
    262   LoopVectorizationRequirements &Requirements;
    263 
    264   OptimizationRemarkEmitter *ORE;
    265 
    266   SmallVector<VPlanPtr, 4> VPlans;
    267 
    268   /// A builder used to construct the current plan.
    269   VPBuilder Builder;
    270 
    271   /// The best number of elements of the vector types used in the
    272   /// transformed loop. BestVF = None means that vectorization is
    273   /// disabled.
    274   Optional<ElementCount> BestVF = None;
    275   unsigned BestUF = 0;
    276 
    277 public:
    278   LoopVectorizationPlanner(Loop *L, LoopInfo *LI, const TargetLibraryInfo *TLI,
    279                            const TargetTransformInfo *TTI,
    280                            LoopVectorizationLegality *Legal,
    281                            LoopVectorizationCostModel &CM,
    282                            InterleavedAccessInfo &IAI,
    283                            PredicatedScalarEvolution &PSE,
    284                            const LoopVectorizeHints &Hints,
    285                            LoopVectorizationRequirements &Requirements,
    286                            OptimizationRemarkEmitter *ORE)
    287       : OrigLoop(L), LI(LI), TLI(TLI), TTI(TTI), Legal(Legal), CM(CM), IAI(IAI),
    288         PSE(PSE), Hints(Hints), Requirements(Requirements), ORE(ORE) {}
    289 
    290   /// Plan how to best vectorize, return the best VF and its cost, or None if
    291   /// vectorization and interleaving should be avoided up front.
    292   Optional<VectorizationFactor> plan(ElementCount UserVF, unsigned UserIC);
    293 
    294   /// Use the VPlan-native path to plan how to best vectorize, return the best
    295   /// VF and its cost.
    296   VectorizationFactor planInVPlanNativePath(ElementCount UserVF);
    297 
    298   /// Finalize the best decision and dispose of all other VPlans.
    299   void setBestPlan(ElementCount VF, unsigned UF);
    300 
    301   /// Generate the IR code for the body of the vectorized loop according to the
    302   /// best selected VPlan.
    303   void executePlan(InnerLoopVectorizer &LB, DominatorTree *DT);
    304 
    305 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    306   void printPlans(raw_ostream &O);
    307 #endif
    308 
    309   /// Look through the existing plans and return true if we have one with all
    310   /// the vectorization factors in question.
    311   bool hasPlanWithVFs(const ArrayRef<ElementCount> VFs) const {
    312     return any_of(VPlans, [&](const VPlanPtr &Plan) {
    313       return all_of(VFs, [&](const ElementCount &VF) {
    314         return Plan->hasVF(VF);
    315       });
    316     });
    317   }
    318 
    319   /// Test a \p Predicate on a \p Range of VF's. Return the value of applying
    320   /// \p Predicate on Range.Start, possibly decreasing Range.End such that the
    321   /// returned value holds for the entire \p Range.
    322   static bool
    323   getDecisionAndClampRange(const std::function<bool(ElementCount)> &Predicate,
    324                            VFRange &Range);
    325 
    326 protected:
    327   /// Collect the instructions from the original loop that would be trivially
    328   /// dead in the vectorized loop if generated.
    329   void collectTriviallyDeadInstructions(
    330       SmallPtrSetImpl<Instruction *> &DeadInstructions);
    331 
    332   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
    333   /// according to the information gathered by Legal when it checked if it is
    334   /// legal to vectorize the loop.
    335   void buildVPlans(ElementCount MinVF, ElementCount MaxVF);
    336 
    337 private:
    338   /// Build a VPlan according to the information gathered by Legal. \return a
    339   /// VPlan for vectorization factors \p Range.Start and up to \p Range.End
    340   /// exclusive, possibly decreasing \p Range.End.
    341   VPlanPtr buildVPlan(VFRange &Range);
    342 
    343   /// Build a VPlan using VPRecipes according to the information gather by
    344   /// Legal. This method is only used for the legacy inner loop vectorizer.
    345   VPlanPtr buildVPlanWithVPRecipes(
    346       VFRange &Range, SmallPtrSetImpl<Instruction *> &DeadInstructions,
    347       const DenseMap<Instruction *, Instruction *> &SinkAfter);
    348 
    349   /// Build VPlans for power-of-2 VF's between \p MinVF and \p MaxVF inclusive,
    350   /// according to the information gathered by Legal when it checked if it is
    351   /// legal to vectorize the loop. This method creates VPlans using VPRecipes.
    352   void buildVPlansWithVPRecipes(ElementCount MinVF, ElementCount MaxVF);
    353 
    354   /// Adjust the recipes for any inloop reductions. The chain of instructions
    355   /// leading from the loop exit instr to the phi need to be converted to
    356   /// reductions, with one operand being vector and the other being the scalar
    357   /// reduction chain.
    358   void adjustRecipesForInLoopReductions(VPlanPtr &Plan,
    359                                         VPRecipeBuilder &RecipeBuilder);
    360 };
    361 
    362 } // namespace llvm
    363 
    364 #endif // LLVM_TRANSFORMS_VECTORIZE_LOOPVECTORIZATIONPLANNER_H
    365