Home | History | Annotate | Line # | Download | only in Vectorize
      1 //===- LoopVectorizationLegality.cpp --------------------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file provides loop vectorization legality analysis. Original code
     10 // resided in LoopVectorize.cpp for a long time.
     11 //
     12 // At this point, it is implemented as a utility class, not as an analysis
     13 // pass. It should be easy to create an analysis pass around it if there
     14 // is a need (but D45420 needs to happen first).
     15 //
     16 
     17 #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
     18 #include "llvm/Analysis/Loads.h"
     19 #include "llvm/Analysis/LoopInfo.h"
     20 #include "llvm/Analysis/TargetLibraryInfo.h"
     21 #include "llvm/Analysis/ValueTracking.h"
     22 #include "llvm/Analysis/VectorUtils.h"
     23 #include "llvm/IR/IntrinsicInst.h"
     24 #include "llvm/IR/PatternMatch.h"
     25 #include "llvm/Transforms/Utils/SizeOpts.h"
     26 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
     27 
     28 using namespace llvm;
     29 using namespace PatternMatch;
     30 
     31 #define LV_NAME "loop-vectorize"
     32 #define DEBUG_TYPE LV_NAME
     33 
     34 extern cl::opt<bool> EnableVPlanPredication;
     35 
     36 static cl::opt<bool>
     37     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
     38                        cl::desc("Enable if-conversion during vectorization."));
     39 
     40 // TODO: Move size-based thresholds out of legality checking, make cost based
     41 // decisions instead of hard thresholds.
     42 static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
     43     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
     44     cl::desc("The maximum number of SCEV checks allowed."));
     45 
     46 static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
     47     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
     48     cl::desc("The maximum number of SCEV checks allowed with a "
     49              "vectorize(enable) pragma"));
     50 
     51 // FIXME: When scalable vectorization is stable enough, change the default
     52 // to SK_PreferFixedWidth.
     53 static cl::opt<LoopVectorizeHints::ScalableForceKind> ScalableVectorization(
     54     "scalable-vectorization", cl::init(LoopVectorizeHints::SK_FixedWidthOnly),
     55     cl::Hidden,
     56     cl::desc("Control whether the compiler can use scalable vectors to "
     57              "vectorize a loop"),
     58     cl::values(
     59         clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
     60                    "Scalable vectorization is disabled."),
     61         clEnumValN(LoopVectorizeHints::SK_PreferFixedWidth, "on",
     62                    "Scalable vectorization is available, but favor fixed-width "
     63                    "vectorization when the cost is inconclusive."),
     64         clEnumValN(LoopVectorizeHints::SK_PreferScalable, "preferred",
     65                    "Scalable vectorization is available and favored when the "
     66                    "cost is inconclusive.")));
     67 
     68 /// Maximum vectorization interleave count.
     69 static const unsigned MaxInterleaveFactor = 16;
     70 
     71 namespace llvm {
     72 
     73 bool LoopVectorizeHints::Hint::validate(unsigned Val) {
     74   switch (Kind) {
     75   case HK_WIDTH:
     76     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
     77   case HK_INTERLEAVE:
     78     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
     79   case HK_FORCE:
     80     return (Val <= 1);
     81   case HK_ISVECTORIZED:
     82   case HK_PREDICATE:
     83   case HK_SCALABLE:
     84     return (Val == 0 || Val == 1);
     85   }
     86   return false;
     87 }
     88 
     89 LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
     90                                        bool InterleaveOnlyWhenForced,
     91                                        OptimizationRemarkEmitter &ORE)
     92     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
     93       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
     94       Force("vectorize.enable", FK_Undefined, HK_FORCE),
     95       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
     96       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
     97       Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
     98       TheLoop(L), ORE(ORE) {
     99   // Populate values with existing loop metadata.
    100   getHintsFromMetadata();
    101 
    102   // force-vector-interleave overrides DisableInterleaving.
    103   if (VectorizerParams::isInterleaveForced())
    104     Interleave.Value = VectorizerParams::VectorizationInterleave;
    105 
    106   if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
    107     // If the width is set, but the metadata says nothing about the scalable
    108     // property, then assume it concerns only a fixed-width UserVF.
    109     // If width is not set, the flag takes precedence.
    110     Scalable.Value = Width.Value ? SK_FixedWidthOnly : ScalableVectorization;
    111   else if (ScalableVectorization == SK_FixedWidthOnly)
    112     // If the flag is set to disable any use of scalable vectors, override the
    113     // loop hint.
    114     Scalable.Value = SK_FixedWidthOnly;
    115 
    116   if (IsVectorized.Value != 1)
    117     // If the vectorization width and interleaving count are both 1 then
    118     // consider the loop to have been already vectorized because there's
    119     // nothing more that we can do.
    120     IsVectorized.Value =
    121         getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
    122   LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
    123              << "LV: Interleaving disabled by the pass manager\n");
    124 }
    125 
    126 void LoopVectorizeHints::setAlreadyVectorized() {
    127   LLVMContext &Context = TheLoop->getHeader()->getContext();
    128 
    129   MDNode *IsVectorizedMD = MDNode::get(
    130       Context,
    131       {MDString::get(Context, "llvm.loop.isvectorized"),
    132        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
    133   MDNode *LoopID = TheLoop->getLoopID();
    134   MDNode *NewLoopID =
    135       makePostTransformationMetadata(Context, LoopID,
    136                                      {Twine(Prefix(), "vectorize.").str(),
    137                                       Twine(Prefix(), "interleave.").str()},
    138                                      {IsVectorizedMD});
    139   TheLoop->setLoopID(NewLoopID);
    140 
    141   // Update internal cache.
    142   IsVectorized.Value = 1;
    143 }
    144 
    145 bool LoopVectorizeHints::allowVectorization(
    146     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
    147   if (getForce() == LoopVectorizeHints::FK_Disabled) {
    148     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
    149     emitRemarkWithHints();
    150     return false;
    151   }
    152 
    153   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
    154     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
    155     emitRemarkWithHints();
    156     return false;
    157   }
    158 
    159   if (getIsVectorized() == 1) {
    160     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
    161     // FIXME: Add interleave.disable metadata. This will allow
    162     // vectorize.disable to be used without disabling the pass and errors
    163     // to differentiate between disabled vectorization and a width of 1.
    164     ORE.emit([&]() {
    165       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
    166                                         "AllDisabled", L->getStartLoc(),
    167                                         L->getHeader())
    168              << "loop not vectorized: vectorization and interleaving are "
    169                 "explicitly disabled, or the loop has already been "
    170                 "vectorized";
    171     });
    172     return false;
    173   }
    174 
    175   return true;
    176 }
    177 
    178 void LoopVectorizeHints::emitRemarkWithHints() const {
    179   using namespace ore;
    180 
    181   ORE.emit([&]() {
    182     if (Force.Value == LoopVectorizeHints::FK_Disabled)
    183       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
    184                                       TheLoop->getStartLoc(),
    185                                       TheLoop->getHeader())
    186              << "loop not vectorized: vectorization is explicitly disabled";
    187     else {
    188       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
    189                                  TheLoop->getStartLoc(), TheLoop->getHeader());
    190       R << "loop not vectorized";
    191       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
    192         R << " (Force=" << NV("Force", true);
    193         if (Width.Value != 0)
    194           R << ", Vector Width=" << NV("VectorWidth", getWidth());
    195         if (getInterleave() != 0)
    196           R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
    197         R << ")";
    198       }
    199       return R;
    200     }
    201   });
    202 }
    203 
    204 const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
    205   if (getWidth() == ElementCount::getFixed(1))
    206     return LV_NAME;
    207   if (getForce() == LoopVectorizeHints::FK_Disabled)
    208     return LV_NAME;
    209   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
    210     return LV_NAME;
    211   return OptimizationRemarkAnalysis::AlwaysPrint;
    212 }
    213 
    214 void LoopVectorizeHints::getHintsFromMetadata() {
    215   MDNode *LoopID = TheLoop->getLoopID();
    216   if (!LoopID)
    217     return;
    218 
    219   // First operand should refer to the loop id itself.
    220   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
    221   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
    222 
    223   for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) {
    224     const MDString *S = nullptr;
    225     SmallVector<Metadata *, 4> Args;
    226 
    227     // The expected hint is either a MDString or a MDNode with the first
    228     // operand a MDString.
    229     if (const MDNode *MD = dyn_cast<MDNode>(LoopID->getOperand(i))) {
    230       if (!MD || MD->getNumOperands() == 0)
    231         continue;
    232       S = dyn_cast<MDString>(MD->getOperand(0));
    233       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
    234         Args.push_back(MD->getOperand(i));
    235     } else {
    236       S = dyn_cast<MDString>(LoopID->getOperand(i));
    237       assert(Args.size() == 0 && "too many arguments for MDString");
    238     }
    239 
    240     if (!S)
    241       continue;
    242 
    243     // Check if the hint starts with the loop metadata prefix.
    244     StringRef Name = S->getString();
    245     if (Args.size() == 1)
    246       setHint(Name, Args[0]);
    247   }
    248 }
    249 
    250 void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
    251   if (!Name.startswith(Prefix()))
    252     return;
    253   Name = Name.substr(Prefix().size(), StringRef::npos);
    254 
    255   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
    256   if (!C)
    257     return;
    258   unsigned Val = C->getZExtValue();
    259 
    260   Hint *Hints[] = {&Width,        &Interleave, &Force,
    261                    &IsVectorized, &Predicate,  &Scalable};
    262   for (auto H : Hints) {
    263     if (Name == H->Name) {
    264       if (H->validate(Val))
    265         H->Value = Val;
    266       else
    267         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
    268       break;
    269     }
    270   }
    271 }
    272 
    273 // Return true if the inner loop \p Lp is uniform with regard to the outer loop
    274 // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
    275 // executing the inner loop will execute the same iterations). This check is
    276 // very constrained for now but it will be relaxed in the future. \p Lp is
    277 // considered uniform if it meets all the following conditions:
    278 //   1) it has a canonical IV (starting from 0 and with stride 1),
    279 //   2) its latch terminator is a conditional branch and,
    280 //   3) its latch condition is a compare instruction whose operands are the
    281 //      canonical IV and an OuterLp invariant.
    282 // This check doesn't take into account the uniformity of other conditions not
    283 // related to the loop latch because they don't affect the loop uniformity.
    284 //
    285 // NOTE: We decided to keep all these checks and its associated documentation
    286 // together so that we can easily have a picture of the current supported loop
    287 // nests. However, some of the current checks don't depend on \p OuterLp and
    288 // would be redundantly executed for each \p Lp if we invoked this function for
    289 // different candidate outer loops. This is not the case for now because we
    290 // don't currently have the infrastructure to evaluate multiple candidate outer
    291 // loops and \p OuterLp will be a fixed parameter while we only support explicit
    292 // outer loop vectorization. It's also very likely that these checks go away
    293 // before introducing the aforementioned infrastructure. However, if this is not
    294 // the case, we should move the \p OuterLp independent checks to a separate
    295 // function that is only executed once for each \p Lp.
    296 static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
    297   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
    298 
    299   // If Lp is the outer loop, it's uniform by definition.
    300   if (Lp == OuterLp)
    301     return true;
    302   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
    303 
    304   // 1.
    305   PHINode *IV = Lp->getCanonicalInductionVariable();
    306   if (!IV) {
    307     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
    308     return false;
    309   }
    310 
    311   // 2.
    312   BasicBlock *Latch = Lp->getLoopLatch();
    313   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
    314   if (!LatchBr || LatchBr->isUnconditional()) {
    315     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
    316     return false;
    317   }
    318 
    319   // 3.
    320   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
    321   if (!LatchCmp) {
    322     LLVM_DEBUG(
    323         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
    324     return false;
    325   }
    326 
    327   Value *CondOp0 = LatchCmp->getOperand(0);
    328   Value *CondOp1 = LatchCmp->getOperand(1);
    329   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
    330   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
    331       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
    332     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
    333     return false;
    334   }
    335 
    336   return true;
    337 }
    338 
    339 // Return true if \p Lp and all its nested loops are uniform with regard to \p
    340 // OuterLp.
    341 static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
    342   if (!isUniformLoop(Lp, OuterLp))
    343     return false;
    344 
    345   // Check if nested loops are uniform.
    346   for (Loop *SubLp : *Lp)
    347     if (!isUniformLoopNest(SubLp, OuterLp))
    348       return false;
    349 
    350   return true;
    351 }
    352 
    353 /// Check whether it is safe to if-convert this phi node.
    354 ///
    355 /// Phi nodes with constant expressions that can trap are not safe to if
    356 /// convert.
    357 static bool canIfConvertPHINodes(BasicBlock *BB) {
    358   for (PHINode &Phi : BB->phis()) {
    359     for (Value *V : Phi.incoming_values())
    360       if (auto *C = dyn_cast<Constant>(V))
    361         if (C->canTrap())
    362           return false;
    363   }
    364   return true;
    365 }
    366 
    367 static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
    368   if (Ty->isPointerTy())
    369     return DL.getIntPtrType(Ty);
    370 
    371   // It is possible that char's or short's overflow when we ask for the loop's
    372   // trip count, work around this by changing the type size.
    373   if (Ty->getScalarSizeInBits() < 32)
    374     return Type::getInt32Ty(Ty->getContext());
    375 
    376   return Ty;
    377 }
    378 
    379 static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
    380   Ty0 = convertPointerToIntegerType(DL, Ty0);
    381   Ty1 = convertPointerToIntegerType(DL, Ty1);
    382   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
    383     return Ty0;
    384   return Ty1;
    385 }
    386 
    387 /// Check that the instruction has outside loop users and is not an
    388 /// identified reduction variable.
    389 static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
    390                                SmallPtrSetImpl<Value *> &AllowedExit) {
    391   // Reductions, Inductions and non-header phis are allowed to have exit users. All
    392   // other instructions must not have external users.
    393   if (!AllowedExit.count(Inst))
    394     // Check that all of the users of the loop are inside the BB.
    395     for (User *U : Inst->users()) {
    396       Instruction *UI = cast<Instruction>(U);
    397       // This user may be a reduction exit value.
    398       if (!TheLoop->contains(UI)) {
    399         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
    400         return true;
    401       }
    402     }
    403   return false;
    404 }
    405 
    406 int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) const {
    407   const ValueToValueMap &Strides =
    408       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
    409 
    410   Function *F = TheLoop->getHeader()->getParent();
    411   bool OptForSize = F->hasOptSize() ||
    412                     llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
    413                                                 PGSOQueryType::IRPass);
    414   bool CanAddPredicate = !OptForSize;
    415   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
    416   if (Stride == 1 || Stride == -1)
    417     return Stride;
    418   return 0;
    419 }
    420 
    421 bool LoopVectorizationLegality::isUniform(Value *V) {
    422   return LAI->isUniform(V);
    423 }
    424 
    425 bool LoopVectorizationLegality::canVectorizeOuterLoop() {
    426   assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
    427   // Store the result and return it at the end instead of exiting early, in case
    428   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
    429   bool Result = true;
    430   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
    431 
    432   for (BasicBlock *BB : TheLoop->blocks()) {
    433     // Check whether the BB terminator is a BranchInst. Any other terminator is
    434     // not supported yet.
    435     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
    436     if (!Br) {
    437       reportVectorizationFailure("Unsupported basic block terminator",
    438           "loop control flow is not understood by vectorizer",
    439           "CFGNotUnderstood", ORE, TheLoop);
    440       if (DoExtraAnalysis)
    441         Result = false;
    442       else
    443         return false;
    444     }
    445 
    446     // Check whether the BranchInst is a supported one. Only unconditional
    447     // branches, conditional branches with an outer loop invariant condition or
    448     // backedges are supported.
    449     // FIXME: We skip these checks when VPlan predication is enabled as we
    450     // want to allow divergent branches. This whole check will be removed
    451     // once VPlan predication is on by default.
    452     if (!EnableVPlanPredication && Br && Br->isConditional() &&
    453         !TheLoop->isLoopInvariant(Br->getCondition()) &&
    454         !LI->isLoopHeader(Br->getSuccessor(0)) &&
    455         !LI->isLoopHeader(Br->getSuccessor(1))) {
    456       reportVectorizationFailure("Unsupported conditional branch",
    457           "loop control flow is not understood by vectorizer",
    458           "CFGNotUnderstood", ORE, TheLoop);
    459       if (DoExtraAnalysis)
    460         Result = false;
    461       else
    462         return false;
    463     }
    464   }
    465 
    466   // Check whether inner loops are uniform. At this point, we only support
    467   // simple outer loops scenarios with uniform nested loops.
    468   if (!isUniformLoopNest(TheLoop /*loop nest*/,
    469                          TheLoop /*context outer loop*/)) {
    470     reportVectorizationFailure("Outer loop contains divergent loops",
    471         "loop control flow is not understood by vectorizer",
    472         "CFGNotUnderstood", ORE, TheLoop);
    473     if (DoExtraAnalysis)
    474       Result = false;
    475     else
    476       return false;
    477   }
    478 
    479   // Check whether we are able to set up outer loop induction.
    480   if (!setupOuterLoopInductions()) {
    481     reportVectorizationFailure("Unsupported outer loop Phi(s)",
    482                                "Unsupported outer loop Phi(s)",
    483                                "UnsupportedPhi", ORE, TheLoop);
    484     if (DoExtraAnalysis)
    485       Result = false;
    486     else
    487       return false;
    488   }
    489 
    490   return Result;
    491 }
    492 
    493 void LoopVectorizationLegality::addInductionPhi(
    494     PHINode *Phi, const InductionDescriptor &ID,
    495     SmallPtrSetImpl<Value *> &AllowedExit) {
    496   Inductions[Phi] = ID;
    497 
    498   // In case this induction also comes with casts that we know we can ignore
    499   // in the vectorized loop body, record them here. All casts could be recorded
    500   // here for ignoring, but suffices to record only the first (as it is the
    501   // only one that may bw used outside the cast sequence).
    502   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
    503   if (!Casts.empty())
    504     InductionCastsToIgnore.insert(*Casts.begin());
    505 
    506   Type *PhiTy = Phi->getType();
    507   const DataLayout &DL = Phi->getModule()->getDataLayout();
    508 
    509   // Get the widest type.
    510   if (!PhiTy->isFloatingPointTy()) {
    511     if (!WidestIndTy)
    512       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
    513     else
    514       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
    515   }
    516 
    517   // Int inductions are special because we only allow one IV.
    518   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
    519       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
    520       isa<Constant>(ID.getStartValue()) &&
    521       cast<Constant>(ID.getStartValue())->isNullValue()) {
    522 
    523     // Use the phi node with the widest type as induction. Use the last
    524     // one if there are multiple (no good reason for doing this other
    525     // than it is expedient). We've checked that it begins at zero and
    526     // steps by one, so this is a canonical induction variable.
    527     if (!PrimaryInduction || PhiTy == WidestIndTy)
    528       PrimaryInduction = Phi;
    529   }
    530 
    531   // Both the PHI node itself, and the "post-increment" value feeding
    532   // back into the PHI node may have external users.
    533   // We can allow those uses, except if the SCEVs we have for them rely
    534   // on predicates that only hold within the loop, since allowing the exit
    535   // currently means re-using this SCEV outside the loop (see PR33706 for more
    536   // details).
    537   if (PSE.getUnionPredicate().isAlwaysTrue()) {
    538     AllowedExit.insert(Phi);
    539     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
    540   }
    541 
    542   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
    543 }
    544 
    545 bool LoopVectorizationLegality::setupOuterLoopInductions() {
    546   BasicBlock *Header = TheLoop->getHeader();
    547 
    548   // Returns true if a given Phi is a supported induction.
    549   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
    550     InductionDescriptor ID;
    551     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
    552         ID.getKind() == InductionDescriptor::IK_IntInduction) {
    553       addInductionPhi(&Phi, ID, AllowedExit);
    554       return true;
    555     } else {
    556       // Bail out for any Phi in the outer loop header that is not a supported
    557       // induction.
    558       LLVM_DEBUG(
    559           dbgs()
    560           << "LV: Found unsupported PHI for outer loop vectorization.\n");
    561       return false;
    562     }
    563   };
    564 
    565   if (llvm::all_of(Header->phis(), isSupportedPhi))
    566     return true;
    567   else
    568     return false;
    569 }
    570 
    571 /// Checks if a function is scalarizable according to the TLI, in
    572 /// the sense that it should be vectorized and then expanded in
    573 /// multiple scalarcalls. This is represented in the
    574 /// TLI via mappings that do not specify a vector name, as in the
    575 /// following example:
    576 ///
    577 ///    const VecDesc VecIntrinsics[] = {
    578 ///      {"llvm.phx.abs.i32", "", 4}
    579 ///    };
    580 static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
    581   const StringRef ScalarName = CI.getCalledFunction()->getName();
    582   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
    583   // Check that all known VFs are not associated to a vector
    584   // function, i.e. the vector name is emty.
    585   if (Scalarize) {
    586     ElementCount WidestFixedVF, WidestScalableVF;
    587     TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
    588     for (ElementCount VF = ElementCount::getFixed(2);
    589          ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
    590       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
    591     for (ElementCount VF = ElementCount::getScalable(1);
    592          ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
    593       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
    594     assert((WidestScalableVF.isZero() || !Scalarize) &&
    595            "Caller may decide to scalarize a variant using a scalable VF");
    596   }
    597   return Scalarize;
    598 }
    599 
    600 bool LoopVectorizationLegality::canVectorizeInstrs() {
    601   BasicBlock *Header = TheLoop->getHeader();
    602 
    603   // For each block in the loop.
    604   for (BasicBlock *BB : TheLoop->blocks()) {
    605     // Scan the instructions in the block and look for hazards.
    606     for (Instruction &I : *BB) {
    607       if (auto *Phi = dyn_cast<PHINode>(&I)) {
    608         Type *PhiTy = Phi->getType();
    609         // Check that this PHI type is allowed.
    610         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
    611             !PhiTy->isPointerTy()) {
    612           reportVectorizationFailure("Found a non-int non-pointer PHI",
    613                                      "loop control flow is not understood by vectorizer",
    614                                      "CFGNotUnderstood", ORE, TheLoop);
    615           return false;
    616         }
    617 
    618         // If this PHINode is not in the header block, then we know that we
    619         // can convert it to select during if-conversion. No need to check if
    620         // the PHIs in this block are induction or reduction variables.
    621         if (BB != Header) {
    622           // Non-header phi nodes that have outside uses can be vectorized. Add
    623           // them to the list of allowed exits.
    624           // Unsafe cyclic dependencies with header phis are identified during
    625           // legalization for reduction, induction and first order
    626           // recurrences.
    627           AllowedExit.insert(&I);
    628           continue;
    629         }
    630 
    631         // We only allow if-converted PHIs with exactly two incoming values.
    632         if (Phi->getNumIncomingValues() != 2) {
    633           reportVectorizationFailure("Found an invalid PHI",
    634               "loop control flow is not understood by vectorizer",
    635               "CFGNotUnderstood", ORE, TheLoop, Phi);
    636           return false;
    637         }
    638 
    639         RecurrenceDescriptor RedDes;
    640         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
    641                                                  DT)) {
    642           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
    643           AllowedExit.insert(RedDes.getLoopExitInstr());
    644           Reductions[Phi] = RedDes;
    645           continue;
    646         }
    647 
    648         // TODO: Instead of recording the AllowedExit, it would be good to record the
    649         // complementary set: NotAllowedExit. These include (but may not be
    650         // limited to):
    651         // 1. Reduction phis as they represent the one-before-last value, which
    652         // is not available when vectorized
    653         // 2. Induction phis and increment when SCEV predicates cannot be used
    654         // outside the loop - see addInductionPhi
    655         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
    656         // outside the loop - see call to hasOutsideLoopUser in the non-phi
    657         // handling below
    658         // 4. FirstOrderRecurrence phis that can possibly be handled by
    659         // extraction.
    660         // By recording these, we can then reason about ways to vectorize each
    661         // of these NotAllowedExit.
    662         InductionDescriptor ID;
    663         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID)) {
    664           addInductionPhi(Phi, ID, AllowedExit);
    665           Requirements->addExactFPMathInst(ID.getExactFPMathInst());
    666           continue;
    667         }
    668 
    669         if (RecurrenceDescriptor::isFirstOrderRecurrence(Phi, TheLoop,
    670                                                          SinkAfter, DT)) {
    671           AllowedExit.insert(Phi);
    672           FirstOrderRecurrences.insert(Phi);
    673           continue;
    674         }
    675 
    676         // As a last resort, coerce the PHI to a AddRec expression
    677         // and re-try classifying it a an induction PHI.
    678         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true)) {
    679           addInductionPhi(Phi, ID, AllowedExit);
    680           continue;
    681         }
    682 
    683         reportVectorizationFailure("Found an unidentified PHI",
    684             "value that could not be identified as "
    685             "reduction is used outside the loop",
    686             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
    687         return false;
    688       } // end of PHI handling
    689 
    690       // We handle calls that:
    691       //   * Are debug info intrinsics.
    692       //   * Have a mapping to an IR intrinsic.
    693       //   * Have a vector version available.
    694       auto *CI = dyn_cast<CallInst>(&I);
    695 
    696       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
    697           !isa<DbgInfoIntrinsic>(CI) &&
    698           !(CI->getCalledFunction() && TLI &&
    699             (!VFDatabase::getMappings(*CI).empty() ||
    700              isTLIScalarize(*TLI, *CI)))) {
    701         // If the call is a recognized math libary call, it is likely that
    702         // we can vectorize it given loosened floating-point constraints.
    703         LibFunc Func;
    704         bool IsMathLibCall =
    705             TLI && CI->getCalledFunction() &&
    706             CI->getType()->isFloatingPointTy() &&
    707             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
    708             TLI->hasOptimizedCodeGen(Func);
    709 
    710         if (IsMathLibCall) {
    711           // TODO: Ideally, we should not use clang-specific language here,
    712           // but it's hard to provide meaningful yet generic advice.
    713           // Also, should this be guarded by allowExtraAnalysis() and/or be part
    714           // of the returned info from isFunctionVectorizable()?
    715           reportVectorizationFailure(
    716               "Found a non-intrinsic callsite",
    717               "library call cannot be vectorized. "
    718               "Try compiling with -fno-math-errno, -ffast-math, "
    719               "or similar flags",
    720               "CantVectorizeLibcall", ORE, TheLoop, CI);
    721         } else {
    722           reportVectorizationFailure("Found a non-intrinsic callsite",
    723                                      "call instruction cannot be vectorized",
    724                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
    725         }
    726         return false;
    727       }
    728 
    729       // Some intrinsics have scalar arguments and should be same in order for
    730       // them to be vectorized (i.e. loop invariant).
    731       if (CI) {
    732         auto *SE = PSE.getSE();
    733         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
    734         for (unsigned i = 0, e = CI->getNumArgOperands(); i != e; ++i)
    735           if (hasVectorInstrinsicScalarOpd(IntrinID, i)) {
    736             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
    737               reportVectorizationFailure("Found unvectorizable intrinsic",
    738                   "intrinsic instruction cannot be vectorized",
    739                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
    740               return false;
    741             }
    742           }
    743       }
    744 
    745       // Check that the instruction return type is vectorizable.
    746       // Also, we can't vectorize extractelement instructions.
    747       if ((!VectorType::isValidElementType(I.getType()) &&
    748            !I.getType()->isVoidTy()) ||
    749           isa<ExtractElementInst>(I)) {
    750         reportVectorizationFailure("Found unvectorizable type",
    751             "instruction return type cannot be vectorized",
    752             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
    753         return false;
    754       }
    755 
    756       // Check that the stored type is vectorizable.
    757       if (auto *ST = dyn_cast<StoreInst>(&I)) {
    758         Type *T = ST->getValueOperand()->getType();
    759         if (!VectorType::isValidElementType(T)) {
    760           reportVectorizationFailure("Store instruction cannot be vectorized",
    761                                      "store instruction cannot be vectorized",
    762                                      "CantVectorizeStore", ORE, TheLoop, ST);
    763           return false;
    764         }
    765 
    766         // For nontemporal stores, check that a nontemporal vector version is
    767         // supported on the target.
    768         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
    769           // Arbitrarily try a vector of 2 elements.
    770           auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
    771           assert(VecTy && "did not find vectorized version of stored type");
    772           if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
    773             reportVectorizationFailure(
    774                 "nontemporal store instruction cannot be vectorized",
    775                 "nontemporal store instruction cannot be vectorized",
    776                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
    777             return false;
    778           }
    779         }
    780 
    781       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
    782         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
    783           // For nontemporal loads, check that a nontemporal vector version is
    784           // supported on the target (arbitrarily try a vector of 2 elements).
    785           auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
    786           assert(VecTy && "did not find vectorized version of load type");
    787           if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
    788             reportVectorizationFailure(
    789                 "nontemporal load instruction cannot be vectorized",
    790                 "nontemporal load instruction cannot be vectorized",
    791                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
    792             return false;
    793           }
    794         }
    795 
    796         // FP instructions can allow unsafe algebra, thus vectorizable by
    797         // non-IEEE-754 compliant SIMD units.
    798         // This applies to floating-point math operations and calls, not memory
    799         // operations, shuffles, or casts, as they don't change precision or
    800         // semantics.
    801       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
    802                  !I.isFast()) {
    803         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
    804         Hints->setPotentiallyUnsafe();
    805       }
    806 
    807       // Reduction instructions are allowed to have exit users.
    808       // All other instructions must not have external users.
    809       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
    810         // We can safely vectorize loops where instructions within the loop are
    811         // used outside the loop only if the SCEV predicates within the loop is
    812         // same as outside the loop. Allowing the exit means reusing the SCEV
    813         // outside the loop.
    814         if (PSE.getUnionPredicate().isAlwaysTrue()) {
    815           AllowedExit.insert(&I);
    816           continue;
    817         }
    818         reportVectorizationFailure("Value cannot be used outside the loop",
    819                                    "value cannot be used outside the loop",
    820                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
    821         return false;
    822       }
    823     } // next instr.
    824   }
    825 
    826   if (!PrimaryInduction) {
    827     if (Inductions.empty()) {
    828       reportVectorizationFailure("Did not find one integer induction var",
    829           "loop induction variable could not be identified",
    830           "NoInductionVariable", ORE, TheLoop);
    831       return false;
    832     } else if (!WidestIndTy) {
    833       reportVectorizationFailure("Did not find one integer induction var",
    834           "integer loop induction variable could not be identified",
    835           "NoIntegerInductionVariable", ORE, TheLoop);
    836       return false;
    837     } else {
    838       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
    839     }
    840   }
    841 
    842   // For first order recurrences, we use the previous value (incoming value from
    843   // the latch) to check if it dominates all users of the recurrence. Bail out
    844   // if we have to sink such an instruction for another recurrence, as the
    845   // dominance requirement may not hold after sinking.
    846   BasicBlock *LoopLatch = TheLoop->getLoopLatch();
    847   if (any_of(FirstOrderRecurrences, [LoopLatch, this](const PHINode *Phi) {
    848         Instruction *V =
    849             cast<Instruction>(Phi->getIncomingValueForBlock(LoopLatch));
    850         return SinkAfter.find(V) != SinkAfter.end();
    851       }))
    852     return false;
    853 
    854   // Now we know the widest induction type, check if our found induction
    855   // is the same size. If it's not, unset it here and InnerLoopVectorizer
    856   // will create another.
    857   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
    858     PrimaryInduction = nullptr;
    859 
    860   return true;
    861 }
    862 
    863 bool LoopVectorizationLegality::canVectorizeMemory() {
    864   LAI = &(*GetLAA)(*TheLoop);
    865   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
    866   if (LAR) {
    867     ORE->emit([&]() {
    868       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
    869                                         "loop not vectorized: ", *LAR);
    870     });
    871   }
    872   if (!LAI->canVectorizeMemory())
    873     return false;
    874 
    875   if (LAI->hasDependenceInvolvingLoopInvariantAddress()) {
    876     reportVectorizationFailure("Stores to a uniform address",
    877         "write to a loop invariant address could not be vectorized",
    878         "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
    879     return false;
    880   }
    881   Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks());
    882   PSE.addPredicate(LAI->getPSE().getUnionPredicate());
    883 
    884   return true;
    885 }
    886 
    887 bool LoopVectorizationLegality::isInductionPhi(const Value *V) {
    888   Value *In0 = const_cast<Value *>(V);
    889   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
    890   if (!PN)
    891     return false;
    892 
    893   return Inductions.count(PN);
    894 }
    895 
    896 bool LoopVectorizationLegality::isCastedInductionVariable(const Value *V) {
    897   auto *Inst = dyn_cast<Instruction>(V);
    898   return (Inst && InductionCastsToIgnore.count(Inst));
    899 }
    900 
    901 bool LoopVectorizationLegality::isInductionVariable(const Value *V) {
    902   return isInductionPhi(V) || isCastedInductionVariable(V);
    903 }
    904 
    905 bool LoopVectorizationLegality::isFirstOrderRecurrence(const PHINode *Phi) {
    906   return FirstOrderRecurrences.count(Phi);
    907 }
    908 
    909 bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
    910   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
    911 }
    912 
    913 bool LoopVectorizationLegality::blockCanBePredicated(
    914     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
    915     SmallPtrSetImpl<const Instruction *> &MaskedOp,
    916     SmallPtrSetImpl<Instruction *> &ConditionalAssumes,
    917     bool PreserveGuards) const {
    918   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
    919 
    920   for (Instruction &I : *BB) {
    921     // Check that we don't have a constant expression that can trap as operand.
    922     for (Value *Operand : I.operands()) {
    923       if (auto *C = dyn_cast<Constant>(Operand))
    924         if (C->canTrap())
    925           return false;
    926     }
    927 
    928     // We can predicate blocks with calls to assume, as long as we drop them in
    929     // case we flatten the CFG via predication.
    930     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
    931       ConditionalAssumes.insert(&I);
    932       continue;
    933     }
    934 
    935     // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
    936     // TODO: there might be cases that it should block the vectorization. Let's
    937     // ignore those for now.
    938     if (isa<NoAliasScopeDeclInst>(&I))
    939       continue;
    940 
    941     // We might be able to hoist the load.
    942     if (I.mayReadFromMemory()) {
    943       auto *LI = dyn_cast<LoadInst>(&I);
    944       if (!LI)
    945         return false;
    946       if (!SafePtrs.count(LI->getPointerOperand())) {
    947         // !llvm.mem.parallel_loop_access implies if-conversion safety.
    948         // Otherwise, record that the load needs (real or emulated) masking
    949         // and let the cost model decide.
    950         if (!IsAnnotatedParallel || PreserveGuards)
    951           MaskedOp.insert(LI);
    952         continue;
    953       }
    954     }
    955 
    956     if (I.mayWriteToMemory()) {
    957       auto *SI = dyn_cast<StoreInst>(&I);
    958       if (!SI)
    959         return false;
    960       // Predicated store requires some form of masking:
    961       // 1) masked store HW instruction,
    962       // 2) emulation via load-blend-store (only if safe and legal to do so,
    963       //    be aware on the race conditions), or
    964       // 3) element-by-element predicate check and scalar store.
    965       MaskedOp.insert(SI);
    966       continue;
    967     }
    968     if (I.mayThrow())
    969       return false;
    970   }
    971 
    972   return true;
    973 }
    974 
    975 bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
    976   if (!EnableIfConversion) {
    977     reportVectorizationFailure("If-conversion is disabled",
    978                                "if-conversion is disabled",
    979                                "IfConversionDisabled",
    980                                ORE, TheLoop);
    981     return false;
    982   }
    983 
    984   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
    985 
    986   // A list of pointers which are known to be dereferenceable within scope of
    987   // the loop body for each iteration of the loop which executes.  That is,
    988   // the memory pointed to can be dereferenced (with the access size implied by
    989   // the value's type) unconditionally within the loop header without
    990   // introducing a new fault.
    991   SmallPtrSet<Value *, 8> SafePointers;
    992 
    993   // Collect safe addresses.
    994   for (BasicBlock *BB : TheLoop->blocks()) {
    995     if (!blockNeedsPredication(BB)) {
    996       for (Instruction &I : *BB)
    997         if (auto *Ptr = getLoadStorePointerOperand(&I))
    998           SafePointers.insert(Ptr);
    999       continue;
   1000     }
   1001 
   1002     // For a block which requires predication, a address may be safe to access
   1003     // in the loop w/o predication if we can prove dereferenceability facts
   1004     // sufficient to ensure it'll never fault within the loop. For the moment,
   1005     // we restrict this to loads; stores are more complicated due to
   1006     // concurrency restrictions.
   1007     ScalarEvolution &SE = *PSE.getSE();
   1008     for (Instruction &I : *BB) {
   1009       LoadInst *LI = dyn_cast<LoadInst>(&I);
   1010       if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
   1011           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT))
   1012         SafePointers.insert(LI->getPointerOperand());
   1013     }
   1014   }
   1015 
   1016   // Collect the blocks that need predication.
   1017   BasicBlock *Header = TheLoop->getHeader();
   1018   for (BasicBlock *BB : TheLoop->blocks()) {
   1019     // We don't support switch statements inside loops.
   1020     if (!isa<BranchInst>(BB->getTerminator())) {
   1021       reportVectorizationFailure("Loop contains a switch statement",
   1022                                  "loop contains a switch statement",
   1023                                  "LoopContainsSwitch", ORE, TheLoop,
   1024                                  BB->getTerminator());
   1025       return false;
   1026     }
   1027 
   1028     // We must be able to predicate all blocks that need to be predicated.
   1029     if (blockNeedsPredication(BB)) {
   1030       if (!blockCanBePredicated(BB, SafePointers, MaskedOp,
   1031                                 ConditionalAssumes)) {
   1032         reportVectorizationFailure(
   1033             "Control flow cannot be substituted for a select",
   1034             "control flow cannot be substituted for a select",
   1035             "NoCFGForSelect", ORE, TheLoop,
   1036             BB->getTerminator());
   1037         return false;
   1038       }
   1039     } else if (BB != Header && !canIfConvertPHINodes(BB)) {
   1040       reportVectorizationFailure(
   1041           "Control flow cannot be substituted for a select",
   1042           "control flow cannot be substituted for a select",
   1043           "NoCFGForSelect", ORE, TheLoop,
   1044           BB->getTerminator());
   1045       return false;
   1046     }
   1047   }
   1048 
   1049   // We can if-convert this loop.
   1050   return true;
   1051 }
   1052 
   1053 // Helper function to canVectorizeLoopNestCFG.
   1054 bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
   1055                                                     bool UseVPlanNativePath) {
   1056   assert((UseVPlanNativePath || Lp->isInnermost()) &&
   1057          "VPlan-native path is not enabled.");
   1058 
   1059   // TODO: ORE should be improved to show more accurate information when an
   1060   // outer loop can't be vectorized because a nested loop is not understood or
   1061   // legal. Something like: "outer_loop_location: loop not vectorized:
   1062   // (inner_loop_location) loop control flow is not understood by vectorizer".
   1063 
   1064   // Store the result and return it at the end instead of exiting early, in case
   1065   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
   1066   bool Result = true;
   1067   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
   1068 
   1069   // We must have a loop in canonical form. Loops with indirectbr in them cannot
   1070   // be canonicalized.
   1071   if (!Lp->getLoopPreheader()) {
   1072     reportVectorizationFailure("Loop doesn't have a legal pre-header",
   1073         "loop control flow is not understood by vectorizer",
   1074         "CFGNotUnderstood", ORE, TheLoop);
   1075     if (DoExtraAnalysis)
   1076       Result = false;
   1077     else
   1078       return false;
   1079   }
   1080 
   1081   // We must have a single backedge.
   1082   if (Lp->getNumBackEdges() != 1) {
   1083     reportVectorizationFailure("The loop must have a single backedge",
   1084         "loop control flow is not understood by vectorizer",
   1085         "CFGNotUnderstood", ORE, TheLoop);
   1086     if (DoExtraAnalysis)
   1087       Result = false;
   1088     else
   1089       return false;
   1090   }
   1091 
   1092   // We currently must have a single "exit block" after the loop. Note that
   1093   // multiple "exiting blocks" inside the loop are allowed, provided they all
   1094   // reach the single exit block.
   1095   // TODO: This restriction can be relaxed in the near future, it's here solely
   1096   // to allow separation of changes for review. We need to generalize the phi
   1097   // update logic in a number of places.
   1098   if (!Lp->getUniqueExitBlock()) {
   1099     reportVectorizationFailure("The loop must have a unique exit block",
   1100         "loop control flow is not understood by vectorizer",
   1101         "CFGNotUnderstood", ORE, TheLoop);
   1102     if (DoExtraAnalysis)
   1103       Result = false;
   1104     else
   1105       return false;
   1106   }
   1107   return Result;
   1108 }
   1109 
   1110 bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
   1111     Loop *Lp, bool UseVPlanNativePath) {
   1112   // Store the result and return it at the end instead of exiting early, in case
   1113   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
   1114   bool Result = true;
   1115   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
   1116   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
   1117     if (DoExtraAnalysis)
   1118       Result = false;
   1119     else
   1120       return false;
   1121   }
   1122 
   1123   // Recursively check whether the loop control flow of nested loops is
   1124   // understood.
   1125   for (Loop *SubLp : *Lp)
   1126     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
   1127       if (DoExtraAnalysis)
   1128         Result = false;
   1129       else
   1130         return false;
   1131     }
   1132 
   1133   return Result;
   1134 }
   1135 
   1136 bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
   1137   // Store the result and return it at the end instead of exiting early, in case
   1138   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
   1139   bool Result = true;
   1140 
   1141   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
   1142   // Check whether the loop-related control flow in the loop nest is expected by
   1143   // vectorizer.
   1144   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
   1145     if (DoExtraAnalysis)
   1146       Result = false;
   1147     else
   1148       return false;
   1149   }
   1150 
   1151   // We need to have a loop header.
   1152   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
   1153                     << '\n');
   1154 
   1155   // Specific checks for outer loops. We skip the remaining legal checks at this
   1156   // point because they don't support outer loops.
   1157   if (!TheLoop->isInnermost()) {
   1158     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
   1159 
   1160     if (!canVectorizeOuterLoop()) {
   1161       reportVectorizationFailure("Unsupported outer loop",
   1162                                  "unsupported outer loop",
   1163                                  "UnsupportedOuterLoop",
   1164                                  ORE, TheLoop);
   1165       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
   1166       // outer loops.
   1167       return false;
   1168     }
   1169 
   1170     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
   1171     return Result;
   1172   }
   1173 
   1174   assert(TheLoop->isInnermost() && "Inner loop expected.");
   1175   // Check if we can if-convert non-single-bb loops.
   1176   unsigned NumBlocks = TheLoop->getNumBlocks();
   1177   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
   1178     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
   1179     if (DoExtraAnalysis)
   1180       Result = false;
   1181     else
   1182       return false;
   1183   }
   1184 
   1185   // Check if we can vectorize the instructions and CFG in this loop.
   1186   if (!canVectorizeInstrs()) {
   1187     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
   1188     if (DoExtraAnalysis)
   1189       Result = false;
   1190     else
   1191       return false;
   1192   }
   1193 
   1194   // Go over each instruction and look at memory deps.
   1195   if (!canVectorizeMemory()) {
   1196     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
   1197     if (DoExtraAnalysis)
   1198       Result = false;
   1199     else
   1200       return false;
   1201   }
   1202 
   1203   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
   1204                     << (LAI->getRuntimePointerChecking()->Need
   1205                             ? " (with a runtime bound check)"
   1206                             : "")
   1207                     << "!\n");
   1208 
   1209   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
   1210   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
   1211     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
   1212 
   1213   if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) {
   1214     reportVectorizationFailure("Too many SCEV checks needed",
   1215         "Too many SCEV assumptions need to be made and checked at runtime",
   1216         "TooManySCEVRunTimeChecks", ORE, TheLoop);
   1217     if (DoExtraAnalysis)
   1218       Result = false;
   1219     else
   1220       return false;
   1221   }
   1222 
   1223   // Okay! We've done all the tests. If any have failed, return false. Otherwise
   1224   // we can vectorize, and at this point we don't have any other mem analysis
   1225   // which may limit our maximum vectorization factor, so just return true with
   1226   // no restrictions.
   1227   return Result;
   1228 }
   1229 
   1230 bool LoopVectorizationLegality::prepareToFoldTailByMasking() {
   1231 
   1232   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
   1233 
   1234   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
   1235 
   1236   for (auto &Reduction : getReductionVars())
   1237     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
   1238 
   1239   // TODO: handle non-reduction outside users when tail is folded by masking.
   1240   for (auto *AE : AllowedExit) {
   1241     // Check that all users of allowed exit values are inside the loop or
   1242     // are the live-out of a reduction.
   1243     if (ReductionLiveOuts.count(AE))
   1244       continue;
   1245     for (User *U : AE->users()) {
   1246       Instruction *UI = cast<Instruction>(U);
   1247       if (TheLoop->contains(UI))
   1248         continue;
   1249       LLVM_DEBUG(
   1250           dbgs()
   1251           << "LV: Cannot fold tail by masking, loop has an outside user for "
   1252           << *UI << "\n");
   1253       return false;
   1254     }
   1255   }
   1256 
   1257   // The list of pointers that we can safely read and write to remains empty.
   1258   SmallPtrSet<Value *, 8> SafePointers;
   1259 
   1260   SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
   1261   SmallPtrSet<Instruction *, 8> TmpConditionalAssumes;
   1262 
   1263   // Check and mark all blocks for predication, including those that ordinarily
   1264   // do not need predication such as the header block.
   1265   for (BasicBlock *BB : TheLoop->blocks()) {
   1266     if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp,
   1267                               TmpConditionalAssumes,
   1268                               /* MaskAllLoads= */ true)) {
   1269       LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking as requested.\n");
   1270       return false;
   1271     }
   1272   }
   1273 
   1274   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
   1275 
   1276   MaskedOp.insert(TmpMaskedOp.begin(), TmpMaskedOp.end());
   1277   ConditionalAssumes.insert(TmpConditionalAssumes.begin(),
   1278                             TmpConditionalAssumes.end());
   1279 
   1280   return true;
   1281 }
   1282 
   1283 } // namespace llvm
   1284