Home | History | Annotate | Line # | Download | only in Vectorize
      1 //===- VPlan.cpp - Vectorizer Plan ----------------------------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 ///
      9 /// \file
     10 /// This is the LLVM vectorization plan. It represents a candidate for
     11 /// vectorization, allowing to plan and optimize how to vectorize a given loop
     12 /// before generating LLVM-IR.
     13 /// The vectorizer uses vectorization plans to estimate the costs of potential
     14 /// candidates and if profitable to execute the desired plan, generating vector
     15 /// LLVM-IR code.
     16 ///
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "VPlan.h"
     20 #include "VPlanDominatorTree.h"
     21 #include "llvm/ADT/DepthFirstIterator.h"
     22 #include "llvm/ADT/PostOrderIterator.h"
     23 #include "llvm/ADT/STLExtras.h"
     24 #include "llvm/ADT/SmallVector.h"
     25 #include "llvm/ADT/Twine.h"
     26 #include "llvm/Analysis/IVDescriptors.h"
     27 #include "llvm/Analysis/LoopInfo.h"
     28 #include "llvm/IR/BasicBlock.h"
     29 #include "llvm/IR/CFG.h"
     30 #include "llvm/IR/InstrTypes.h"
     31 #include "llvm/IR/Instruction.h"
     32 #include "llvm/IR/Instructions.h"
     33 #include "llvm/IR/Type.h"
     34 #include "llvm/IR/Value.h"
     35 #include "llvm/Support/Casting.h"
     36 #include "llvm/Support/CommandLine.h"
     37 #include "llvm/Support/Debug.h"
     38 #include "llvm/Support/ErrorHandling.h"
     39 #include "llvm/Support/GenericDomTreeConstruction.h"
     40 #include "llvm/Support/GraphWriter.h"
     41 #include "llvm/Support/raw_ostream.h"
     42 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     43 #include <cassert>
     44 #include <iterator>
     45 #include <string>
     46 #include <vector>
     47 
     48 using namespace llvm;
     49 extern cl::opt<bool> EnableVPlanNativePath;
     50 
     51 #define DEBUG_TYPE "vplan"
     52 
     53 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     54 raw_ostream &llvm::operator<<(raw_ostream &OS, const VPValue &V) {
     55   const VPInstruction *Instr = dyn_cast<VPInstruction>(&V);
     56   VPSlotTracker SlotTracker(
     57       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
     58   V.print(OS, SlotTracker);
     59   return OS;
     60 }
     61 #endif
     62 
     63 Value *VPLane::getAsRuntimeExpr(IRBuilder<> &Builder,
     64                                 const ElementCount &VF) const {
     65   switch (LaneKind) {
     66   case VPLane::Kind::ScalableLast:
     67     // Lane = RuntimeVF - VF.getKnownMinValue() + Lane
     68     return Builder.CreateSub(getRuntimeVF(Builder, Builder.getInt32Ty(), VF),
     69                              Builder.getInt32(VF.getKnownMinValue() - Lane));
     70   case VPLane::Kind::First:
     71     return Builder.getInt32(Lane);
     72   }
     73   llvm_unreachable("Unknown lane kind");
     74 }
     75 
     76 VPValue::VPValue(const unsigned char SC, Value *UV, VPDef *Def)
     77     : SubclassID(SC), UnderlyingVal(UV), Def(Def) {
     78   if (Def)
     79     Def->addDefinedValue(this);
     80 }
     81 
     82 VPValue::~VPValue() {
     83   assert(Users.empty() && "trying to delete a VPValue with remaining users");
     84   if (Def)
     85     Def->removeDefinedValue(this);
     86 }
     87 
     88 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
     89 void VPValue::print(raw_ostream &OS, VPSlotTracker &SlotTracker) const {
     90   if (const VPRecipeBase *R = dyn_cast_or_null<VPRecipeBase>(Def))
     91     R->print(OS, "", SlotTracker);
     92   else
     93     printAsOperand(OS, SlotTracker);
     94 }
     95 
     96 void VPValue::dump() const {
     97   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this->Def);
     98   VPSlotTracker SlotTracker(
     99       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
    100   print(dbgs(), SlotTracker);
    101   dbgs() << "\n";
    102 }
    103 
    104 void VPDef::dump() const {
    105   const VPRecipeBase *Instr = dyn_cast_or_null<VPRecipeBase>(this);
    106   VPSlotTracker SlotTracker(
    107       (Instr && Instr->getParent()) ? Instr->getParent()->getPlan() : nullptr);
    108   print(dbgs(), "", SlotTracker);
    109   dbgs() << "\n";
    110 }
    111 #endif
    112 
    113 // Get the top-most entry block of \p Start. This is the entry block of the
    114 // containing VPlan. This function is templated to support both const and non-const blocks
    115 template <typename T> static T *getPlanEntry(T *Start) {
    116   T *Next = Start;
    117   T *Current = Start;
    118   while ((Next = Next->getParent()))
    119     Current = Next;
    120 
    121   SmallSetVector<T *, 8> WorkList;
    122   WorkList.insert(Current);
    123 
    124   for (unsigned i = 0; i < WorkList.size(); i++) {
    125     T *Current = WorkList[i];
    126     if (Current->getNumPredecessors() == 0)
    127       return Current;
    128     auto &Predecessors = Current->getPredecessors();
    129     WorkList.insert(Predecessors.begin(), Predecessors.end());
    130   }
    131 
    132   llvm_unreachable("VPlan without any entry node without predecessors");
    133 }
    134 
    135 VPlan *VPBlockBase::getPlan() { return getPlanEntry(this)->Plan; }
    136 
    137 const VPlan *VPBlockBase::getPlan() const { return getPlanEntry(this)->Plan; }
    138 
    139 /// \return the VPBasicBlock that is the entry of Block, possibly indirectly.
    140 const VPBasicBlock *VPBlockBase::getEntryBasicBlock() const {
    141   const VPBlockBase *Block = this;
    142   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
    143     Block = Region->getEntry();
    144   return cast<VPBasicBlock>(Block);
    145 }
    146 
    147 VPBasicBlock *VPBlockBase::getEntryBasicBlock() {
    148   VPBlockBase *Block = this;
    149   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
    150     Block = Region->getEntry();
    151   return cast<VPBasicBlock>(Block);
    152 }
    153 
    154 void VPBlockBase::setPlan(VPlan *ParentPlan) {
    155   assert(ParentPlan->getEntry() == this &&
    156          "Can only set plan on its entry block.");
    157   Plan = ParentPlan;
    158 }
    159 
    160 /// \return the VPBasicBlock that is the exit of Block, possibly indirectly.
    161 const VPBasicBlock *VPBlockBase::getExitBasicBlock() const {
    162   const VPBlockBase *Block = this;
    163   while (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
    164     Block = Region->getExit();
    165   return cast<VPBasicBlock>(Block);
    166 }
    167 
    168 VPBasicBlock *VPBlockBase::getExitBasicBlock() {
    169   VPBlockBase *Block = this;
    170   while (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
    171     Block = Region->getExit();
    172   return cast<VPBasicBlock>(Block);
    173 }
    174 
    175 VPBlockBase *VPBlockBase::getEnclosingBlockWithSuccessors() {
    176   if (!Successors.empty() || !Parent)
    177     return this;
    178   assert(Parent->getExit() == this &&
    179          "Block w/o successors not the exit of its parent.");
    180   return Parent->getEnclosingBlockWithSuccessors();
    181 }
    182 
    183 VPBlockBase *VPBlockBase::getEnclosingBlockWithPredecessors() {
    184   if (!Predecessors.empty() || !Parent)
    185     return this;
    186   assert(Parent->getEntry() == this &&
    187          "Block w/o predecessors not the entry of its parent.");
    188   return Parent->getEnclosingBlockWithPredecessors();
    189 }
    190 
    191 VPValue *VPBlockBase::getCondBit() {
    192   return CondBitUser.getSingleOperandOrNull();
    193 }
    194 
    195 const VPValue *VPBlockBase::getCondBit() const {
    196   return CondBitUser.getSingleOperandOrNull();
    197 }
    198 
    199 void VPBlockBase::setCondBit(VPValue *CV) { CondBitUser.resetSingleOpUser(CV); }
    200 
    201 VPValue *VPBlockBase::getPredicate() {
    202   return PredicateUser.getSingleOperandOrNull();
    203 }
    204 
    205 const VPValue *VPBlockBase::getPredicate() const {
    206   return PredicateUser.getSingleOperandOrNull();
    207 }
    208 
    209 void VPBlockBase::setPredicate(VPValue *CV) {
    210   PredicateUser.resetSingleOpUser(CV);
    211 }
    212 
    213 void VPBlockBase::deleteCFG(VPBlockBase *Entry) {
    214   SmallVector<VPBlockBase *, 8> Blocks(depth_first(Entry));
    215 
    216   for (VPBlockBase *Block : Blocks)
    217     delete Block;
    218 }
    219 
    220 VPBasicBlock::iterator VPBasicBlock::getFirstNonPhi() {
    221   iterator It = begin();
    222   while (It != end() && It->isPhi())
    223     It++;
    224   return It;
    225 }
    226 
    227 Value *VPTransformState::get(VPValue *Def, const VPIteration &Instance) {
    228   if (!Def->getDef())
    229     return Def->getLiveInIRValue();
    230 
    231   if (hasScalarValue(Def, Instance)) {
    232     return Data
    233         .PerPartScalars[Def][Instance.Part][Instance.Lane.mapToCacheIndex(VF)];
    234   }
    235 
    236   assert(hasVectorValue(Def, Instance.Part));
    237   auto *VecPart = Data.PerPartOutput[Def][Instance.Part];
    238   if (!VecPart->getType()->isVectorTy()) {
    239     assert(Instance.Lane.isFirstLane() && "cannot get lane > 0 for scalar");
    240     return VecPart;
    241   }
    242   // TODO: Cache created scalar values.
    243   Value *Lane = Instance.Lane.getAsRuntimeExpr(Builder, VF);
    244   auto *Extract = Builder.CreateExtractElement(VecPart, Lane);
    245   // set(Def, Extract, Instance);
    246   return Extract;
    247 }
    248 
    249 BasicBlock *
    250 VPBasicBlock::createEmptyBasicBlock(VPTransformState::CFGState &CFG) {
    251   // BB stands for IR BasicBlocks. VPBB stands for VPlan VPBasicBlocks.
    252   // Pred stands for Predessor. Prev stands for Previous - last visited/created.
    253   BasicBlock *PrevBB = CFG.PrevBB;
    254   BasicBlock *NewBB = BasicBlock::Create(PrevBB->getContext(), getName(),
    255                                          PrevBB->getParent(), CFG.LastBB);
    256   LLVM_DEBUG(dbgs() << "LV: created " << NewBB->getName() << '\n');
    257 
    258   // Hook up the new basic block to its predecessors.
    259   for (VPBlockBase *PredVPBlock : getHierarchicalPredecessors()) {
    260     VPBasicBlock *PredVPBB = PredVPBlock->getExitBasicBlock();
    261     auto &PredVPSuccessors = PredVPBB->getSuccessors();
    262     BasicBlock *PredBB = CFG.VPBB2IRBB[PredVPBB];
    263 
    264     // In outer loop vectorization scenario, the predecessor BBlock may not yet
    265     // be visited(backedge). Mark the VPBasicBlock for fixup at the end of
    266     // vectorization. We do not encounter this case in inner loop vectorization
    267     // as we start out by building a loop skeleton with the vector loop header
    268     // and latch blocks. As a result, we never enter this function for the
    269     // header block in the non VPlan-native path.
    270     if (!PredBB) {
    271       assert(EnableVPlanNativePath &&
    272              "Unexpected null predecessor in non VPlan-native path");
    273       CFG.VPBBsToFix.push_back(PredVPBB);
    274       continue;
    275     }
    276 
    277     assert(PredBB && "Predecessor basic-block not found building successor.");
    278     auto *PredBBTerminator = PredBB->getTerminator();
    279     LLVM_DEBUG(dbgs() << "LV: draw edge from" << PredBB->getName() << '\n');
    280     if (isa<UnreachableInst>(PredBBTerminator)) {
    281       assert(PredVPSuccessors.size() == 1 &&
    282              "Predecessor ending w/o branch must have single successor.");
    283       PredBBTerminator->eraseFromParent();
    284       BranchInst::Create(NewBB, PredBB);
    285     } else {
    286       assert(PredVPSuccessors.size() == 2 &&
    287              "Predecessor ending with branch must have two successors.");
    288       unsigned idx = PredVPSuccessors.front() == this ? 0 : 1;
    289       assert(!PredBBTerminator->getSuccessor(idx) &&
    290              "Trying to reset an existing successor block.");
    291       PredBBTerminator->setSuccessor(idx, NewBB);
    292     }
    293   }
    294   return NewBB;
    295 }
    296 
    297 void VPBasicBlock::execute(VPTransformState *State) {
    298   bool Replica = State->Instance && !State->Instance->isFirstIteration();
    299   VPBasicBlock *PrevVPBB = State->CFG.PrevVPBB;
    300   VPBlockBase *SingleHPred = nullptr;
    301   BasicBlock *NewBB = State->CFG.PrevBB; // Reuse it if possible.
    302 
    303   // 1. Create an IR basic block, or reuse the last one if possible.
    304   // The last IR basic block is reused, as an optimization, in three cases:
    305   // A. the first VPBB reuses the loop header BB - when PrevVPBB is null;
    306   // B. when the current VPBB has a single (hierarchical) predecessor which
    307   //    is PrevVPBB and the latter has a single (hierarchical) successor; and
    308   // C. when the current VPBB is an entry of a region replica - where PrevVPBB
    309   //    is the exit of this region from a previous instance, or the predecessor
    310   //    of this region.
    311   if (PrevVPBB && /* A */
    312       !((SingleHPred = getSingleHierarchicalPredecessor()) &&
    313         SingleHPred->getExitBasicBlock() == PrevVPBB &&
    314         PrevVPBB->getSingleHierarchicalSuccessor()) && /* B */
    315       !(Replica && getPredecessors().empty())) {       /* C */
    316     NewBB = createEmptyBasicBlock(State->CFG);
    317     State->Builder.SetInsertPoint(NewBB);
    318     // Temporarily terminate with unreachable until CFG is rewired.
    319     UnreachableInst *Terminator = State->Builder.CreateUnreachable();
    320     State->Builder.SetInsertPoint(Terminator);
    321     // Register NewBB in its loop. In innermost loops its the same for all BB's.
    322     Loop *L = State->LI->getLoopFor(State->CFG.LastBB);
    323     L->addBasicBlockToLoop(NewBB, *State->LI);
    324     State->CFG.PrevBB = NewBB;
    325   }
    326 
    327   // 2. Fill the IR basic block with IR instructions.
    328   LLVM_DEBUG(dbgs() << "LV: vectorizing VPBB:" << getName()
    329                     << " in BB:" << NewBB->getName() << '\n');
    330 
    331   State->CFG.VPBB2IRBB[this] = NewBB;
    332   State->CFG.PrevVPBB = this;
    333 
    334   for (VPRecipeBase &Recipe : Recipes)
    335     Recipe.execute(*State);
    336 
    337   VPValue *CBV;
    338   if (EnableVPlanNativePath && (CBV = getCondBit())) {
    339     assert(CBV->getUnderlyingValue() &&
    340            "Unexpected null underlying value for condition bit");
    341 
    342     // Condition bit value in a VPBasicBlock is used as the branch selector. In
    343     // the VPlan-native path case, since all branches are uniform we generate a
    344     // branch instruction using the condition value from vector lane 0 and dummy
    345     // successors. The successors are fixed later when the successor blocks are
    346     // visited.
    347     Value *NewCond = State->get(CBV, {0, 0});
    348 
    349     // Replace the temporary unreachable terminator with the new conditional
    350     // branch.
    351     auto *CurrentTerminator = NewBB->getTerminator();
    352     assert(isa<UnreachableInst>(CurrentTerminator) &&
    353            "Expected to replace unreachable terminator with conditional "
    354            "branch.");
    355     auto *CondBr = BranchInst::Create(NewBB, nullptr, NewCond);
    356     CondBr->setSuccessor(0, nullptr);
    357     ReplaceInstWithInst(CurrentTerminator, CondBr);
    358   }
    359 
    360   LLVM_DEBUG(dbgs() << "LV: filled BB:" << *NewBB);
    361 }
    362 
    363 void VPBasicBlock::dropAllReferences(VPValue *NewValue) {
    364   for (VPRecipeBase &R : Recipes) {
    365     for (auto *Def : R.definedValues())
    366       Def->replaceAllUsesWith(NewValue);
    367 
    368     for (unsigned I = 0, E = R.getNumOperands(); I != E; I++)
    369       R.setOperand(I, NewValue);
    370   }
    371 }
    372 
    373 VPBasicBlock *VPBasicBlock::splitAt(iterator SplitAt) {
    374   assert((SplitAt == end() || SplitAt->getParent() == this) &&
    375          "can only split at a position in the same block");
    376 
    377   SmallVector<VPBlockBase *, 2> Succs(getSuccessors().begin(),
    378                                       getSuccessors().end());
    379   // First, disconnect the current block from its successors.
    380   for (VPBlockBase *Succ : Succs)
    381     VPBlockUtils::disconnectBlocks(this, Succ);
    382 
    383   // Create new empty block after the block to split.
    384   auto *SplitBlock = new VPBasicBlock(getName() + ".split");
    385   VPBlockUtils::insertBlockAfter(SplitBlock, this);
    386 
    387   // Add successors for block to split to new block.
    388   for (VPBlockBase *Succ : Succs)
    389     VPBlockUtils::connectBlocks(SplitBlock, Succ);
    390 
    391   // Finally, move the recipes starting at SplitAt to new block.
    392   for (VPRecipeBase &ToMove :
    393        make_early_inc_range(make_range(SplitAt, this->end())))
    394     ToMove.moveBefore(*SplitBlock, SplitBlock->end());
    395 
    396   return SplitBlock;
    397 }
    398 
    399 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    400 void VPBasicBlock::print(raw_ostream &O, const Twine &Indent,
    401                          VPSlotTracker &SlotTracker) const {
    402   O << Indent << getName() << ":\n";
    403   if (const VPValue *Pred = getPredicate()) {
    404     O << Indent << "BlockPredicate:";
    405     Pred->printAsOperand(O, SlotTracker);
    406     if (const auto *PredInst = dyn_cast<VPInstruction>(Pred))
    407       O << " (" << PredInst->getParent()->getName() << ")";
    408     O << '\n';
    409   }
    410 
    411   auto RecipeIndent = Indent + "  ";
    412   for (const VPRecipeBase &Recipe : *this) {
    413     Recipe.print(O, RecipeIndent, SlotTracker);
    414     O << '\n';
    415   }
    416 
    417   if (getSuccessors().empty()) {
    418     O << Indent << "No successors\n";
    419   } else {
    420     O << Indent << "Successor(s): ";
    421     ListSeparator LS;
    422     for (auto *Succ : getSuccessors())
    423       O << LS << Succ->getName();
    424     O << '\n';
    425   }
    426 
    427   if (const VPValue *CBV = getCondBit()) {
    428     O << Indent << "CondBit: ";
    429     CBV->printAsOperand(O, SlotTracker);
    430     if (const auto *CBI = dyn_cast<VPInstruction>(CBV))
    431       O << " (" << CBI->getParent()->getName() << ")";
    432     O << '\n';
    433   }
    434 }
    435 #endif
    436 
    437 void VPRegionBlock::dropAllReferences(VPValue *NewValue) {
    438   for (VPBlockBase *Block : depth_first(Entry))
    439     // Drop all references in VPBasicBlocks and replace all uses with
    440     // DummyValue.
    441     Block->dropAllReferences(NewValue);
    442 }
    443 
    444 void VPRegionBlock::execute(VPTransformState *State) {
    445   ReversePostOrderTraversal<VPBlockBase *> RPOT(Entry);
    446 
    447   if (!isReplicator()) {
    448     // Visit the VPBlocks connected to "this", starting from it.
    449     for (VPBlockBase *Block : RPOT) {
    450       if (EnableVPlanNativePath) {
    451         // The inner loop vectorization path does not represent loop preheader
    452         // and exit blocks as part of the VPlan. In the VPlan-native path, skip
    453         // vectorizing loop preheader block. In future, we may replace this
    454         // check with the check for loop preheader.
    455         if (Block->getNumPredecessors() == 0)
    456           continue;
    457 
    458         // Skip vectorizing loop exit block. In future, we may replace this
    459         // check with the check for loop exit.
    460         if (Block->getNumSuccessors() == 0)
    461           continue;
    462       }
    463 
    464       LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
    465       Block->execute(State);
    466     }
    467     return;
    468   }
    469 
    470   assert(!State->Instance && "Replicating a Region with non-null instance.");
    471 
    472   // Enter replicating mode.
    473   State->Instance = VPIteration(0, 0);
    474 
    475   for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part) {
    476     State->Instance->Part = Part;
    477     assert(!State->VF.isScalable() && "VF is assumed to be non scalable.");
    478     for (unsigned Lane = 0, VF = State->VF.getKnownMinValue(); Lane < VF;
    479          ++Lane) {
    480       State->Instance->Lane = VPLane(Lane, VPLane::Kind::First);
    481       // Visit the VPBlocks connected to \p this, starting from it.
    482       for (VPBlockBase *Block : RPOT) {
    483         LLVM_DEBUG(dbgs() << "LV: VPBlock in RPO " << Block->getName() << '\n');
    484         Block->execute(State);
    485       }
    486     }
    487   }
    488 
    489   // Exit replicating mode.
    490   State->Instance.reset();
    491 }
    492 
    493 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    494 void VPRegionBlock::print(raw_ostream &O, const Twine &Indent,
    495                           VPSlotTracker &SlotTracker) const {
    496   O << Indent << (isReplicator() ? "<xVFxUF> " : "<x1> ") << getName() << ": {";
    497   auto NewIndent = Indent + "  ";
    498   for (auto *BlockBase : depth_first(Entry)) {
    499     O << '\n';
    500     BlockBase->print(O, NewIndent, SlotTracker);
    501   }
    502   O << Indent << "}\n";
    503 }
    504 #endif
    505 
    506 bool VPRecipeBase::mayHaveSideEffects() const {
    507   switch (getVPDefID()) {
    508   case VPBranchOnMaskSC:
    509     return false;
    510   case VPBlendSC:
    511   case VPWidenSC:
    512   case VPWidenGEPSC:
    513   case VPReductionSC:
    514   case VPWidenSelectSC: {
    515     const Instruction *I =
    516         dyn_cast_or_null<Instruction>(getVPValue()->getUnderlyingValue());
    517     (void)I;
    518     assert((!I || !I->mayHaveSideEffects()) &&
    519            "underlying instruction has side-effects");
    520     return false;
    521   }
    522   case VPReplicateSC: {
    523     auto *R = cast<VPReplicateRecipe>(this);
    524     return R->getUnderlyingInstr()->mayHaveSideEffects();
    525   }
    526   default:
    527     return true;
    528   }
    529 }
    530 
    531 void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) {
    532   assert(!Parent && "Recipe already in some VPBasicBlock");
    533   assert(InsertPos->getParent() &&
    534          "Insertion position not in any VPBasicBlock");
    535   Parent = InsertPos->getParent();
    536   Parent->getRecipeList().insert(InsertPos->getIterator(), this);
    537 }
    538 
    539 void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) {
    540   assert(!Parent && "Recipe already in some VPBasicBlock");
    541   assert(InsertPos->getParent() &&
    542          "Insertion position not in any VPBasicBlock");
    543   Parent = InsertPos->getParent();
    544   Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this);
    545 }
    546 
    547 void VPRecipeBase::removeFromParent() {
    548   assert(getParent() && "Recipe not in any VPBasicBlock");
    549   getParent()->getRecipeList().remove(getIterator());
    550   Parent = nullptr;
    551 }
    552 
    553 iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() {
    554   assert(getParent() && "Recipe not in any VPBasicBlock");
    555   return getParent()->getRecipeList().erase(getIterator());
    556 }
    557 
    558 void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) {
    559   removeFromParent();
    560   insertAfter(InsertPos);
    561 }
    562 
    563 void VPRecipeBase::moveBefore(VPBasicBlock &BB,
    564                               iplist<VPRecipeBase>::iterator I) {
    565   assert(I == BB.end() || I->getParent() == &BB);
    566   removeFromParent();
    567   Parent = &BB;
    568   BB.getRecipeList().insert(I, this);
    569 }
    570 
    571 void VPInstruction::generateInstruction(VPTransformState &State,
    572                                         unsigned Part) {
    573   IRBuilder<> &Builder = State.Builder;
    574 
    575   if (Instruction::isBinaryOp(getOpcode())) {
    576     Value *A = State.get(getOperand(0), Part);
    577     Value *B = State.get(getOperand(1), Part);
    578     Value *V = Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B);
    579     State.set(this, V, Part);
    580     return;
    581   }
    582 
    583   switch (getOpcode()) {
    584   case VPInstruction::Not: {
    585     Value *A = State.get(getOperand(0), Part);
    586     Value *V = Builder.CreateNot(A);
    587     State.set(this, V, Part);
    588     break;
    589   }
    590   case VPInstruction::ICmpULE: {
    591     Value *IV = State.get(getOperand(0), Part);
    592     Value *TC = State.get(getOperand(1), Part);
    593     Value *V = Builder.CreateICmpULE(IV, TC);
    594     State.set(this, V, Part);
    595     break;
    596   }
    597   case Instruction::Select: {
    598     Value *Cond = State.get(getOperand(0), Part);
    599     Value *Op1 = State.get(getOperand(1), Part);
    600     Value *Op2 = State.get(getOperand(2), Part);
    601     Value *V = Builder.CreateSelect(Cond, Op1, Op2);
    602     State.set(this, V, Part);
    603     break;
    604   }
    605   case VPInstruction::ActiveLaneMask: {
    606     // Get first lane of vector induction variable.
    607     Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0));
    608     // Get the original loop tripcount.
    609     Value *ScalarTC = State.TripCount;
    610 
    611     auto *Int1Ty = Type::getInt1Ty(Builder.getContext());
    612     auto *PredTy = FixedVectorType::get(Int1Ty, State.VF.getKnownMinValue());
    613     Instruction *Call = Builder.CreateIntrinsic(
    614         Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()},
    615         {VIVElem0, ScalarTC}, nullptr, "active.lane.mask");
    616     State.set(this, Call, Part);
    617     break;
    618   }
    619   default:
    620     llvm_unreachable("Unsupported opcode for instruction");
    621   }
    622 }
    623 
    624 void VPInstruction::execute(VPTransformState &State) {
    625   assert(!State.Instance && "VPInstruction executing an Instance");
    626   for (unsigned Part = 0; Part < State.UF; ++Part)
    627     generateInstruction(State, Part);
    628 }
    629 
    630 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    631 void VPInstruction::dump() const {
    632   VPSlotTracker SlotTracker(getParent()->getPlan());
    633   print(dbgs(), "", SlotTracker);
    634 }
    635 
    636 void VPInstruction::print(raw_ostream &O, const Twine &Indent,
    637                           VPSlotTracker &SlotTracker) const {
    638   O << Indent << "EMIT ";
    639 
    640   if (hasResult()) {
    641     printAsOperand(O, SlotTracker);
    642     O << " = ";
    643   }
    644 
    645   switch (getOpcode()) {
    646   case VPInstruction::Not:
    647     O << "not";
    648     break;
    649   case VPInstruction::ICmpULE:
    650     O << "icmp ule";
    651     break;
    652   case VPInstruction::SLPLoad:
    653     O << "combined load";
    654     break;
    655   case VPInstruction::SLPStore:
    656     O << "combined store";
    657     break;
    658   case VPInstruction::ActiveLaneMask:
    659     O << "active lane mask";
    660     break;
    661 
    662   default:
    663     O << Instruction::getOpcodeName(getOpcode());
    664   }
    665 
    666   for (const VPValue *Operand : operands()) {
    667     O << " ";
    668     Operand->printAsOperand(O, SlotTracker);
    669   }
    670 }
    671 #endif
    672 
    673 /// Generate the code inside the body of the vectorized loop. Assumes a single
    674 /// LoopVectorBody basic-block was created for this. Introduce additional
    675 /// basic-blocks as needed, and fill them all.
    676 void VPlan::execute(VPTransformState *State) {
    677   // -1. Check if the backedge taken count is needed, and if so build it.
    678   if (BackedgeTakenCount && BackedgeTakenCount->getNumUsers()) {
    679     Value *TC = State->TripCount;
    680     IRBuilder<> Builder(State->CFG.PrevBB->getTerminator());
    681     auto *TCMO = Builder.CreateSub(TC, ConstantInt::get(TC->getType(), 1),
    682                                    "trip.count.minus.1");
    683     auto VF = State->VF;
    684     Value *VTCMO =
    685         VF.isScalar() ? TCMO : Builder.CreateVectorSplat(VF, TCMO, "broadcast");
    686     for (unsigned Part = 0, UF = State->UF; Part < UF; ++Part)
    687       State->set(BackedgeTakenCount, VTCMO, Part);
    688   }
    689 
    690   // 0. Set the reverse mapping from VPValues to Values for code generation.
    691   for (auto &Entry : Value2VPValue)
    692     State->VPValue2Value[Entry.second] = Entry.first;
    693 
    694   BasicBlock *VectorPreHeaderBB = State->CFG.PrevBB;
    695   BasicBlock *VectorHeaderBB = VectorPreHeaderBB->getSingleSuccessor();
    696   assert(VectorHeaderBB && "Loop preheader does not have a single successor.");
    697 
    698   // 1. Make room to generate basic-blocks inside loop body if needed.
    699   BasicBlock *VectorLatchBB = VectorHeaderBB->splitBasicBlock(
    700       VectorHeaderBB->getFirstInsertionPt(), "vector.body.latch");
    701   Loop *L = State->LI->getLoopFor(VectorHeaderBB);
    702   L->addBasicBlockToLoop(VectorLatchBB, *State->LI);
    703   // Remove the edge between Header and Latch to allow other connections.
    704   // Temporarily terminate with unreachable until CFG is rewired.
    705   // Note: this asserts the generated code's assumption that
    706   // getFirstInsertionPt() can be dereferenced into an Instruction.
    707   VectorHeaderBB->getTerminator()->eraseFromParent();
    708   State->Builder.SetInsertPoint(VectorHeaderBB);
    709   UnreachableInst *Terminator = State->Builder.CreateUnreachable();
    710   State->Builder.SetInsertPoint(Terminator);
    711 
    712   // 2. Generate code in loop body.
    713   State->CFG.PrevVPBB = nullptr;
    714   State->CFG.PrevBB = VectorHeaderBB;
    715   State->CFG.LastBB = VectorLatchBB;
    716 
    717   for (VPBlockBase *Block : depth_first(Entry))
    718     Block->execute(State);
    719 
    720   // Setup branch terminator successors for VPBBs in VPBBsToFix based on
    721   // VPBB's successors.
    722   for (auto VPBB : State->CFG.VPBBsToFix) {
    723     assert(EnableVPlanNativePath &&
    724            "Unexpected VPBBsToFix in non VPlan-native path");
    725     BasicBlock *BB = State->CFG.VPBB2IRBB[VPBB];
    726     assert(BB && "Unexpected null basic block for VPBB");
    727 
    728     unsigned Idx = 0;
    729     auto *BBTerminator = BB->getTerminator();
    730 
    731     for (VPBlockBase *SuccVPBlock : VPBB->getHierarchicalSuccessors()) {
    732       VPBasicBlock *SuccVPBB = SuccVPBlock->getEntryBasicBlock();
    733       BBTerminator->setSuccessor(Idx, State->CFG.VPBB2IRBB[SuccVPBB]);
    734       ++Idx;
    735     }
    736   }
    737 
    738   // 3. Merge the temporary latch created with the last basic-block filled.
    739   BasicBlock *LastBB = State->CFG.PrevBB;
    740   // Connect LastBB to VectorLatchBB to facilitate their merge.
    741   assert((EnableVPlanNativePath ||
    742           isa<UnreachableInst>(LastBB->getTerminator())) &&
    743          "Expected InnerLoop VPlan CFG to terminate with unreachable");
    744   assert((!EnableVPlanNativePath || isa<BranchInst>(LastBB->getTerminator())) &&
    745          "Expected VPlan CFG to terminate with branch in NativePath");
    746   LastBB->getTerminator()->eraseFromParent();
    747   BranchInst::Create(VectorLatchBB, LastBB);
    748 
    749   // Merge LastBB with Latch.
    750   bool Merged = MergeBlockIntoPredecessor(VectorLatchBB, nullptr, State->LI);
    751   (void)Merged;
    752   assert(Merged && "Could not merge last basic block with latch.");
    753   VectorLatchBB = LastBB;
    754 
    755   // We do not attempt to preserve DT for outer loop vectorization currently.
    756   if (!EnableVPlanNativePath)
    757     updateDominatorTree(State->DT, VectorPreHeaderBB, VectorLatchBB,
    758                         L->getExitBlock());
    759 }
    760 
    761 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    762 LLVM_DUMP_METHOD
    763 void VPlan::print(raw_ostream &O) const {
    764   VPSlotTracker SlotTracker(this);
    765 
    766   O << "VPlan '" << Name << "' {";
    767   for (const VPBlockBase *Block : depth_first(getEntry())) {
    768     O << '\n';
    769     Block->print(O, "", SlotTracker);
    770   }
    771   O << "}\n";
    772 }
    773 
    774 LLVM_DUMP_METHOD
    775 void VPlan::printDOT(raw_ostream &O) const {
    776   VPlanPrinter Printer(O, *this);
    777   Printer.dump();
    778 }
    779 
    780 LLVM_DUMP_METHOD
    781 void VPlan::dump() const { print(dbgs()); }
    782 #endif
    783 
    784 void VPlan::updateDominatorTree(DominatorTree *DT, BasicBlock *LoopPreHeaderBB,
    785                                 BasicBlock *LoopLatchBB,
    786                                 BasicBlock *LoopExitBB) {
    787   BasicBlock *LoopHeaderBB = LoopPreHeaderBB->getSingleSuccessor();
    788   assert(LoopHeaderBB && "Loop preheader does not have a single successor.");
    789   // The vector body may be more than a single basic-block by this point.
    790   // Update the dominator tree information inside the vector body by propagating
    791   // it from header to latch, expecting only triangular control-flow, if any.
    792   BasicBlock *PostDomSucc = nullptr;
    793   for (auto *BB = LoopHeaderBB; BB != LoopLatchBB; BB = PostDomSucc) {
    794     // Get the list of successors of this block.
    795     std::vector<BasicBlock *> Succs(succ_begin(BB), succ_end(BB));
    796     assert(Succs.size() <= 2 &&
    797            "Basic block in vector loop has more than 2 successors.");
    798     PostDomSucc = Succs[0];
    799     if (Succs.size() == 1) {
    800       assert(PostDomSucc->getSinglePredecessor() &&
    801              "PostDom successor has more than one predecessor.");
    802       DT->addNewBlock(PostDomSucc, BB);
    803       continue;
    804     }
    805     BasicBlock *InterimSucc = Succs[1];
    806     if (PostDomSucc->getSingleSuccessor() == InterimSucc) {
    807       PostDomSucc = Succs[1];
    808       InterimSucc = Succs[0];
    809     }
    810     assert(InterimSucc->getSingleSuccessor() == PostDomSucc &&
    811            "One successor of a basic block does not lead to the other.");
    812     assert(InterimSucc->getSinglePredecessor() &&
    813            "Interim successor has more than one predecessor.");
    814     assert(PostDomSucc->hasNPredecessors(2) &&
    815            "PostDom successor has more than two predecessors.");
    816     DT->addNewBlock(InterimSucc, BB);
    817     DT->addNewBlock(PostDomSucc, BB);
    818   }
    819   // Latch block is a new dominator for the loop exit.
    820   DT->changeImmediateDominator(LoopExitBB, LoopLatchBB);
    821   assert(DT->verify(DominatorTree::VerificationLevel::Fast));
    822 }
    823 
    824 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
    825 const Twine VPlanPrinter::getUID(const VPBlockBase *Block) {
    826   return (isa<VPRegionBlock>(Block) ? "cluster_N" : "N") +
    827          Twine(getOrCreateBID(Block));
    828 }
    829 
    830 const Twine VPlanPrinter::getOrCreateName(const VPBlockBase *Block) {
    831   const std::string &Name = Block->getName();
    832   if (!Name.empty())
    833     return Name;
    834   return "VPB" + Twine(getOrCreateBID(Block));
    835 }
    836 
    837 void VPlanPrinter::dump() {
    838   Depth = 1;
    839   bumpIndent(0);
    840   OS << "digraph VPlan {\n";
    841   OS << "graph [labelloc=t, fontsize=30; label=\"Vectorization Plan";
    842   if (!Plan.getName().empty())
    843     OS << "\\n" << DOT::EscapeString(Plan.getName());
    844   if (Plan.BackedgeTakenCount) {
    845     OS << ", where:\\n";
    846     Plan.BackedgeTakenCount->print(OS, SlotTracker);
    847     OS << " := BackedgeTakenCount";
    848   }
    849   OS << "\"]\n";
    850   OS << "node [shape=rect, fontname=Courier, fontsize=30]\n";
    851   OS << "edge [fontname=Courier, fontsize=30]\n";
    852   OS << "compound=true\n";
    853 
    854   for (const VPBlockBase *Block : depth_first(Plan.getEntry()))
    855     dumpBlock(Block);
    856 
    857   OS << "}\n";
    858 }
    859 
    860 void VPlanPrinter::dumpBlock(const VPBlockBase *Block) {
    861   if (const VPBasicBlock *BasicBlock = dyn_cast<VPBasicBlock>(Block))
    862     dumpBasicBlock(BasicBlock);
    863   else if (const VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
    864     dumpRegion(Region);
    865   else
    866     llvm_unreachable("Unsupported kind of VPBlock.");
    867 }
    868 
    869 void VPlanPrinter::drawEdge(const VPBlockBase *From, const VPBlockBase *To,
    870                             bool Hidden, const Twine &Label) {
    871   // Due to "dot" we print an edge between two regions as an edge between the
    872   // exit basic block and the entry basic of the respective regions.
    873   const VPBlockBase *Tail = From->getExitBasicBlock();
    874   const VPBlockBase *Head = To->getEntryBasicBlock();
    875   OS << Indent << getUID(Tail) << " -> " << getUID(Head);
    876   OS << " [ label=\"" << Label << '\"';
    877   if (Tail != From)
    878     OS << " ltail=" << getUID(From);
    879   if (Head != To)
    880     OS << " lhead=" << getUID(To);
    881   if (Hidden)
    882     OS << "; splines=none";
    883   OS << "]\n";
    884 }
    885 
    886 void VPlanPrinter::dumpEdges(const VPBlockBase *Block) {
    887   auto &Successors = Block->getSuccessors();
    888   if (Successors.size() == 1)
    889     drawEdge(Block, Successors.front(), false, "");
    890   else if (Successors.size() == 2) {
    891     drawEdge(Block, Successors.front(), false, "T");
    892     drawEdge(Block, Successors.back(), false, "F");
    893   } else {
    894     unsigned SuccessorNumber = 0;
    895     for (auto *Successor : Successors)
    896       drawEdge(Block, Successor, false, Twine(SuccessorNumber++));
    897   }
    898 }
    899 
    900 void VPlanPrinter::dumpBasicBlock(const VPBasicBlock *BasicBlock) {
    901   // Implement dot-formatted dump by performing plain-text dump into the
    902   // temporary storage followed by some post-processing.
    903   OS << Indent << getUID(BasicBlock) << " [label =\n";
    904   bumpIndent(1);
    905   std::string Str;
    906   raw_string_ostream SS(Str);
    907   // Use no indentation as we need to wrap the lines into quotes ourselves.
    908   BasicBlock->print(SS, "", SlotTracker);
    909 
    910   // We need to process each line of the output separately, so split
    911   // single-string plain-text dump.
    912   SmallVector<StringRef, 0> Lines;
    913   StringRef(Str).rtrim('\n').split(Lines, "\n");
    914 
    915   auto EmitLine = [&](StringRef Line, StringRef Suffix) {
    916     OS << Indent << '"' << DOT::EscapeString(Line.str()) << "\\l\"" << Suffix;
    917   };
    918 
    919   // Don't need the "+" after the last line.
    920   for (auto Line : make_range(Lines.begin(), Lines.end() - 1))
    921     EmitLine(Line, " +\n");
    922   EmitLine(Lines.back(), "\n");
    923 
    924   bumpIndent(-1);
    925   OS << Indent << "]\n";
    926 
    927   dumpEdges(BasicBlock);
    928 }
    929 
    930 void VPlanPrinter::dumpRegion(const VPRegionBlock *Region) {
    931   OS << Indent << "subgraph " << getUID(Region) << " {\n";
    932   bumpIndent(1);
    933   OS << Indent << "fontname=Courier\n"
    934      << Indent << "label=\""
    935      << DOT::EscapeString(Region->isReplicator() ? "<xVFxUF> " : "<x1> ")
    936      << DOT::EscapeString(Region->getName()) << "\"\n";
    937   // Dump the blocks of the region.
    938   assert(Region->getEntry() && "Region contains no inner blocks.");
    939   for (const VPBlockBase *Block : depth_first(Region->getEntry()))
    940     dumpBlock(Block);
    941   bumpIndent(-1);
    942   OS << Indent << "}\n";
    943   dumpEdges(Region);
    944 }
    945 
    946 void VPlanIngredient::print(raw_ostream &O) const {
    947   if (auto *Inst = dyn_cast<Instruction>(V)) {
    948     if (!Inst->getType()->isVoidTy()) {
    949       Inst->printAsOperand(O, false);
    950       O << " = ";
    951     }
    952     O << Inst->getOpcodeName() << " ";
    953     unsigned E = Inst->getNumOperands();
    954     if (E > 0) {
    955       Inst->getOperand(0)->printAsOperand(O, false);
    956       for (unsigned I = 1; I < E; ++I)
    957         Inst->getOperand(I)->printAsOperand(O << ", ", false);
    958     }
    959   } else // !Inst
    960     V->printAsOperand(O, false);
    961 }
    962 
    963 void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent,
    964                               VPSlotTracker &SlotTracker) const {
    965   O << Indent << "WIDEN-CALL ";
    966 
    967   auto *CI = cast<CallInst>(getUnderlyingInstr());
    968   if (CI->getType()->isVoidTy())
    969     O << "void ";
    970   else {
    971     printAsOperand(O, SlotTracker);
    972     O << " = ";
    973   }
    974 
    975   O << "call @" << CI->getCalledFunction()->getName() << "(";
    976   printOperands(O, SlotTracker);
    977   O << ")";
    978 }
    979 
    980 void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent,
    981                                 VPSlotTracker &SlotTracker) const {
    982   O << Indent << "WIDEN-SELECT ";
    983   printAsOperand(O, SlotTracker);
    984   O << " = select ";
    985   getOperand(0)->printAsOperand(O, SlotTracker);
    986   O << ", ";
    987   getOperand(1)->printAsOperand(O, SlotTracker);
    988   O << ", ";
    989   getOperand(2)->printAsOperand(O, SlotTracker);
    990   O << (InvariantCond ? " (condition is loop invariant)" : "");
    991 }
    992 
    993 void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent,
    994                           VPSlotTracker &SlotTracker) const {
    995   O << Indent << "WIDEN ";
    996   printAsOperand(O, SlotTracker);
    997   O << " = " << getUnderlyingInstr()->getOpcodeName() << " ";
    998   printOperands(O, SlotTracker);
    999 }
   1000 
   1001 void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent,
   1002                                           VPSlotTracker &SlotTracker) const {
   1003   O << Indent << "WIDEN-INDUCTION";
   1004   if (getTruncInst()) {
   1005     O << "\\l\"";
   1006     O << " +\n" << Indent << "\"  " << VPlanIngredient(IV) << "\\l\"";
   1007     O << " +\n" << Indent << "\"  ";
   1008     getVPValue(0)->printAsOperand(O, SlotTracker);
   1009   } else
   1010     O << " " << VPlanIngredient(IV);
   1011 }
   1012 
   1013 void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent,
   1014                              VPSlotTracker &SlotTracker) const {
   1015   O << Indent << "WIDEN-GEP ";
   1016   O << (IsPtrLoopInvariant ? "Inv" : "Var");
   1017   size_t IndicesNumber = IsIndexLoopInvariant.size();
   1018   for (size_t I = 0; I < IndicesNumber; ++I)
   1019     O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]";
   1020 
   1021   O << " ";
   1022   printAsOperand(O, SlotTracker);
   1023   O << " = getelementptr ";
   1024   printOperands(O, SlotTracker);
   1025 }
   1026 
   1027 void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent,
   1028                              VPSlotTracker &SlotTracker) const {
   1029   O << Indent << "WIDEN-PHI ";
   1030 
   1031   auto *OriginalPhi = cast<PHINode>(getUnderlyingValue());
   1032   // Unless all incoming values are modeled in VPlan  print the original PHI
   1033   // directly.
   1034   // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming
   1035   // values as VPValues.
   1036   if (getNumOperands() != OriginalPhi->getNumOperands()) {
   1037     O << VPlanIngredient(OriginalPhi);
   1038     return;
   1039   }
   1040 
   1041   printAsOperand(O, SlotTracker);
   1042   O << " = phi ";
   1043   printOperands(O, SlotTracker);
   1044 }
   1045 
   1046 void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent,
   1047                           VPSlotTracker &SlotTracker) const {
   1048   O << Indent << "BLEND ";
   1049   Phi->printAsOperand(O, false);
   1050   O << " =";
   1051   if (getNumIncomingValues() == 1) {
   1052     // Not a User of any mask: not really blending, this is a
   1053     // single-predecessor phi.
   1054     O << " ";
   1055     getIncomingValue(0)->printAsOperand(O, SlotTracker);
   1056   } else {
   1057     for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) {
   1058       O << " ";
   1059       getIncomingValue(I)->printAsOperand(O, SlotTracker);
   1060       O << "/";
   1061       getMask(I)->printAsOperand(O, SlotTracker);
   1062     }
   1063   }
   1064 }
   1065 
   1066 void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent,
   1067                               VPSlotTracker &SlotTracker) const {
   1068   O << Indent << "REDUCE ";
   1069   printAsOperand(O, SlotTracker);
   1070   O << " = ";
   1071   getChainOp()->printAsOperand(O, SlotTracker);
   1072   O << " + reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode())
   1073     << " (";
   1074   getVecOp()->printAsOperand(O, SlotTracker);
   1075   if (getCondOp()) {
   1076     O << ", ";
   1077     getCondOp()->printAsOperand(O, SlotTracker);
   1078   }
   1079   O << ")";
   1080 }
   1081 
   1082 void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent,
   1083                               VPSlotTracker &SlotTracker) const {
   1084   O << Indent << (IsUniform ? "CLONE " : "REPLICATE ");
   1085 
   1086   if (!getUnderlyingInstr()->getType()->isVoidTy()) {
   1087     printAsOperand(O, SlotTracker);
   1088     O << " = ";
   1089   }
   1090   O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " ";
   1091   printOperands(O, SlotTracker);
   1092 
   1093   if (AlsoPack)
   1094     O << " (S->V)";
   1095 }
   1096 
   1097 void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent,
   1098                                 VPSlotTracker &SlotTracker) const {
   1099   O << Indent << "PHI-PREDICATED-INSTRUCTION ";
   1100   printAsOperand(O, SlotTracker);
   1101   O << " = ";
   1102   printOperands(O, SlotTracker);
   1103 }
   1104 
   1105 void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent,
   1106                                            VPSlotTracker &SlotTracker) const {
   1107   O << Indent << "WIDEN ";
   1108 
   1109   if (!isStore()) {
   1110     getVPValue()->printAsOperand(O, SlotTracker);
   1111     O << " = ";
   1112   }
   1113   O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " ";
   1114 
   1115   printOperands(O, SlotTracker);
   1116 }
   1117 #endif
   1118 
   1119 void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) {
   1120   Value *CanonicalIV = State.CanonicalIV;
   1121   Type *STy = CanonicalIV->getType();
   1122   IRBuilder<> Builder(State.CFG.PrevBB->getTerminator());
   1123   ElementCount VF = State.VF;
   1124   assert(!VF.isScalable() && "the code following assumes non scalables ECs");
   1125   Value *VStart = VF.isScalar()
   1126                       ? CanonicalIV
   1127                       : Builder.CreateVectorSplat(VF.getKnownMinValue(),
   1128                                                   CanonicalIV, "broadcast");
   1129   for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) {
   1130     SmallVector<Constant *, 8> Indices;
   1131     for (unsigned Lane = 0; Lane < VF.getKnownMinValue(); ++Lane)
   1132       Indices.push_back(
   1133           ConstantInt::get(STy, Part * VF.getKnownMinValue() + Lane));
   1134     // If VF == 1, there is only one iteration in the loop above, thus the
   1135     // element pushed back into Indices is ConstantInt::get(STy, Part)
   1136     Constant *VStep =
   1137         VF.isScalar() ? Indices.back() : ConstantVector::get(Indices);
   1138     // Add the consecutive indices to the vector value.
   1139     Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv");
   1140     State.set(getVPSingleValue(), CanonicalVectorIV, Part);
   1141   }
   1142 }
   1143 
   1144 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   1145 void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent,
   1146                                      VPSlotTracker &SlotTracker) const {
   1147   O << Indent << "EMIT ";
   1148   getVPValue()->printAsOperand(O, SlotTracker);
   1149   O << " = WIDEN-CANONICAL-INDUCTION";
   1150 }
   1151 #endif
   1152 
   1153 template void DomTreeBuilder::Calculate<VPDominatorTree>(VPDominatorTree &DT);
   1154 
   1155 void VPValue::replaceAllUsesWith(VPValue *New) {
   1156   for (unsigned J = 0; J < getNumUsers();) {
   1157     VPUser *User = Users[J];
   1158     unsigned NumUsers = getNumUsers();
   1159     for (unsigned I = 0, E = User->getNumOperands(); I < E; ++I)
   1160       if (User->getOperand(I) == this)
   1161         User->setOperand(I, New);
   1162     // If a user got removed after updating the current user, the next user to
   1163     // update will be moved to the current position, so we only need to
   1164     // increment the index if the number of users did not change.
   1165     if (NumUsers == getNumUsers())
   1166       J++;
   1167   }
   1168 }
   1169 
   1170 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
   1171 void VPValue::printAsOperand(raw_ostream &OS, VPSlotTracker &Tracker) const {
   1172   if (const Value *UV = getUnderlyingValue()) {
   1173     OS << "ir<";
   1174     UV->printAsOperand(OS, false);
   1175     OS << ">";
   1176     return;
   1177   }
   1178 
   1179   unsigned Slot = Tracker.getSlot(this);
   1180   if (Slot == unsigned(-1))
   1181     OS << "<badref>";
   1182   else
   1183     OS << "vp<%" << Tracker.getSlot(this) << ">";
   1184 }
   1185 
   1186 void VPUser::printOperands(raw_ostream &O, VPSlotTracker &SlotTracker) const {
   1187   interleaveComma(operands(), O, [&O, &SlotTracker](VPValue *Op) {
   1188     Op->printAsOperand(O, SlotTracker);
   1189   });
   1190 }
   1191 #endif
   1192 
   1193 void VPInterleavedAccessInfo::visitRegion(VPRegionBlock *Region,
   1194                                           Old2NewTy &Old2New,
   1195                                           InterleavedAccessInfo &IAI) {
   1196   ReversePostOrderTraversal<VPBlockBase *> RPOT(Region->getEntry());
   1197   for (VPBlockBase *Base : RPOT) {
   1198     visitBlock(Base, Old2New, IAI);
   1199   }
   1200 }
   1201 
   1202 void VPInterleavedAccessInfo::visitBlock(VPBlockBase *Block, Old2NewTy &Old2New,
   1203                                          InterleavedAccessInfo &IAI) {
   1204   if (VPBasicBlock *VPBB = dyn_cast<VPBasicBlock>(Block)) {
   1205     for (VPRecipeBase &VPI : *VPBB) {
   1206       if (isa<VPWidenPHIRecipe>(&VPI))
   1207         continue;
   1208       assert(isa<VPInstruction>(&VPI) && "Can only handle VPInstructions");
   1209       auto *VPInst = cast<VPInstruction>(&VPI);
   1210       auto *Inst = cast<Instruction>(VPInst->getUnderlyingValue());
   1211       auto *IG = IAI.getInterleaveGroup(Inst);
   1212       if (!IG)
   1213         continue;
   1214 
   1215       auto NewIGIter = Old2New.find(IG);
   1216       if (NewIGIter == Old2New.end())
   1217         Old2New[IG] = new InterleaveGroup<VPInstruction>(
   1218             IG->getFactor(), IG->isReverse(), IG->getAlign());
   1219 
   1220       if (Inst == IG->getInsertPos())
   1221         Old2New[IG]->setInsertPos(VPInst);
   1222 
   1223       InterleaveGroupMap[VPInst] = Old2New[IG];
   1224       InterleaveGroupMap[VPInst]->insertMember(
   1225           VPInst, IG->getIndex(Inst),
   1226           Align(IG->isReverse() ? (-1) * int(IG->getFactor())
   1227                                 : IG->getFactor()));
   1228     }
   1229   } else if (VPRegionBlock *Region = dyn_cast<VPRegionBlock>(Block))
   1230     visitRegion(Region, Old2New, IAI);
   1231   else
   1232     llvm_unreachable("Unsupported kind of VPBlock.");
   1233 }
   1234 
   1235 VPInterleavedAccessInfo::VPInterleavedAccessInfo(VPlan &Plan,
   1236                                                  InterleavedAccessInfo &IAI) {
   1237   Old2NewTy Old2New;
   1238   visitRegion(cast<VPRegionBlock>(Plan.getEntry()), Old2New, IAI);
   1239 }
   1240 
   1241 void VPSlotTracker::assignSlot(const VPValue *V) {
   1242   assert(Slots.find(V) == Slots.end() && "VPValue already has a slot!");
   1243   Slots[V] = NextSlot++;
   1244 }
   1245 
   1246 void VPSlotTracker::assignSlots(const VPlan &Plan) {
   1247 
   1248   for (const VPValue *V : Plan.VPExternalDefs)
   1249     assignSlot(V);
   1250 
   1251   if (Plan.BackedgeTakenCount)
   1252     assignSlot(Plan.BackedgeTakenCount);
   1253 
   1254   ReversePostOrderTraversal<
   1255       VPBlockRecursiveTraversalWrapper<const VPBlockBase *>>
   1256       RPOT(VPBlockRecursiveTraversalWrapper<const VPBlockBase *>(
   1257           Plan.getEntry()));
   1258   for (const VPBasicBlock *VPBB :
   1259        VPBlockUtils::blocksOnly<const VPBasicBlock>(RPOT))
   1260     for (const VPRecipeBase &Recipe : *VPBB)
   1261       for (VPValue *Def : Recipe.definedValues())
   1262         assignSlot(Def);
   1263 }
   1264