Home | History | Annotate | Line # | Download | only in CodeGen
      1 //===- TailDuplicator.cpp - Duplicate blocks into predecessors' tails -----===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This utility class duplicates basic blocks ending in unconditional branches
     10 // into the tails of their predecessors.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "llvm/CodeGen/TailDuplicator.h"
     15 #include "llvm/ADT/DenseMap.h"
     16 #include "llvm/ADT/DenseSet.h"
     17 #include "llvm/ADT/STLExtras.h"
     18 #include "llvm/ADT/SetVector.h"
     19 #include "llvm/ADT/SmallPtrSet.h"
     20 #include "llvm/ADT/SmallVector.h"
     21 #include "llvm/ADT/Statistic.h"
     22 #include "llvm/Analysis/ProfileSummaryInfo.h"
     23 #include "llvm/CodeGen/MachineBasicBlock.h"
     24 #include "llvm/CodeGen/MachineBranchProbabilityInfo.h"
     25 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
     26 #include "llvm/CodeGen/MachineFunction.h"
     27 #include "llvm/CodeGen/MachineInstr.h"
     28 #include "llvm/CodeGen/MachineInstrBuilder.h"
     29 #include "llvm/CodeGen/MachineOperand.h"
     30 #include "llvm/CodeGen/MachineRegisterInfo.h"
     31 #include "llvm/CodeGen/MachineSizeOpts.h"
     32 #include "llvm/CodeGen/MachineSSAUpdater.h"
     33 #include "llvm/CodeGen/TargetInstrInfo.h"
     34 #include "llvm/CodeGen/TargetRegisterInfo.h"
     35 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     36 #include "llvm/IR/DebugLoc.h"
     37 #include "llvm/IR/Function.h"
     38 #include "llvm/Support/CommandLine.h"
     39 #include "llvm/Support/Debug.h"
     40 #include "llvm/Support/ErrorHandling.h"
     41 #include "llvm/Support/raw_ostream.h"
     42 #include "llvm/Target/TargetMachine.h"
     43 #include <algorithm>
     44 #include <cassert>
     45 #include <iterator>
     46 #include <utility>
     47 
     48 using namespace llvm;
     49 
     50 #define DEBUG_TYPE "tailduplication"
     51 
     52 STATISTIC(NumTails, "Number of tails duplicated");
     53 STATISTIC(NumTailDups, "Number of tail duplicated blocks");
     54 STATISTIC(NumTailDupAdded,
     55           "Number of instructions added due to tail duplication");
     56 STATISTIC(NumTailDupRemoved,
     57           "Number of instructions removed due to tail duplication");
     58 STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
     59 STATISTIC(NumAddedPHIs, "Number of phis added");
     60 
     61 // Heuristic for tail duplication.
     62 static cl::opt<unsigned> TailDuplicateSize(
     63     "tail-dup-size",
     64     cl::desc("Maximum instructions to consider tail duplicating"), cl::init(2),
     65     cl::Hidden);
     66 
     67 static cl::opt<unsigned> TailDupIndirectBranchSize(
     68     "tail-dup-indirect-size",
     69     cl::desc("Maximum instructions to consider tail duplicating blocks that "
     70              "end with indirect branches."), cl::init(20),
     71     cl::Hidden);
     72 
     73 static cl::opt<bool>
     74     TailDupVerify("tail-dup-verify",
     75                   cl::desc("Verify sanity of PHI instructions during taildup"),
     76                   cl::init(false), cl::Hidden);
     77 
     78 static cl::opt<unsigned> TailDupLimit("tail-dup-limit", cl::init(~0U),
     79                                       cl::Hidden);
     80 
     81 void TailDuplicator::initMF(MachineFunction &MFin, bool PreRegAlloc,
     82                             const MachineBranchProbabilityInfo *MBPIin,
     83                             MBFIWrapper *MBFIin,
     84                             ProfileSummaryInfo *PSIin,
     85                             bool LayoutModeIn, unsigned TailDupSizeIn) {
     86   MF = &MFin;
     87   TII = MF->getSubtarget().getInstrInfo();
     88   TRI = MF->getSubtarget().getRegisterInfo();
     89   MRI = &MF->getRegInfo();
     90   MMI = &MF->getMMI();
     91   MBPI = MBPIin;
     92   MBFI = MBFIin;
     93   PSI = PSIin;
     94   TailDupSize = TailDupSizeIn;
     95 
     96   assert(MBPI != nullptr && "Machine Branch Probability Info required");
     97 
     98   LayoutMode = LayoutModeIn;
     99   this->PreRegAlloc = PreRegAlloc;
    100 }
    101 
    102 static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
    103   for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
    104     MachineBasicBlock *MBB = &*I;
    105     SmallSetVector<MachineBasicBlock *, 8> Preds(MBB->pred_begin(),
    106                                                  MBB->pred_end());
    107     MachineBasicBlock::iterator MI = MBB->begin();
    108     while (MI != MBB->end()) {
    109       if (!MI->isPHI())
    110         break;
    111       for (MachineBasicBlock *PredBB : Preds) {
    112         bool Found = false;
    113         for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
    114           MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
    115           if (PHIBB == PredBB) {
    116             Found = true;
    117             break;
    118           }
    119         }
    120         if (!Found) {
    121           dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
    122                  << *MI;
    123           dbgs() << "  missing input from predecessor "
    124                  << printMBBReference(*PredBB) << '\n';
    125           llvm_unreachable(nullptr);
    126         }
    127       }
    128 
    129       for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
    130         MachineBasicBlock *PHIBB = MI->getOperand(i + 1).getMBB();
    131         if (CheckExtra && !Preds.count(PHIBB)) {
    132           dbgs() << "Warning: malformed PHI in " << printMBBReference(*MBB)
    133                  << ": " << *MI;
    134           dbgs() << "  extra input from predecessor "
    135                  << printMBBReference(*PHIBB) << '\n';
    136           llvm_unreachable(nullptr);
    137         }
    138         if (PHIBB->getNumber() < 0) {
    139           dbgs() << "Malformed PHI in " << printMBBReference(*MBB) << ": "
    140                  << *MI;
    141           dbgs() << "  non-existing " << printMBBReference(*PHIBB) << '\n';
    142           llvm_unreachable(nullptr);
    143         }
    144       }
    145       ++MI;
    146     }
    147   }
    148 }
    149 
    150 /// Tail duplicate the block and cleanup.
    151 /// \p IsSimple - return value of isSimpleBB
    152 /// \p MBB - block to be duplicated
    153 /// \p ForcedLayoutPred - If non-null, treat this block as the layout
    154 ///     predecessor, instead of using the ordering in MF
    155 /// \p DuplicatedPreds - if non-null, \p DuplicatedPreds will contain a list of
    156 ///     all Preds that received a copy of \p MBB.
    157 /// \p RemovalCallback - if non-null, called just before MBB is deleted.
    158 bool TailDuplicator::tailDuplicateAndUpdate(
    159     bool IsSimple, MachineBasicBlock *MBB,
    160     MachineBasicBlock *ForcedLayoutPred,
    161     SmallVectorImpl<MachineBasicBlock*> *DuplicatedPreds,
    162     function_ref<void(MachineBasicBlock *)> *RemovalCallback,
    163     SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) {
    164   // Save the successors list.
    165   SmallSetVector<MachineBasicBlock *, 8> Succs(MBB->succ_begin(),
    166                                                MBB->succ_end());
    167 
    168   SmallVector<MachineBasicBlock *, 8> TDBBs;
    169   SmallVector<MachineInstr *, 16> Copies;
    170   if (!tailDuplicate(IsSimple, MBB, ForcedLayoutPred,
    171                      TDBBs, Copies, CandidatePtr))
    172     return false;
    173 
    174   ++NumTails;
    175 
    176   SmallVector<MachineInstr *, 8> NewPHIs;
    177   MachineSSAUpdater SSAUpdate(*MF, &NewPHIs);
    178 
    179   // TailBB's immediate successors are now successors of those predecessors
    180   // which duplicated TailBB. Add the predecessors as sources to the PHI
    181   // instructions.
    182   bool isDead = MBB->pred_empty() && !MBB->hasAddressTaken();
    183   if (PreRegAlloc)
    184     updateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
    185 
    186   // If it is dead, remove it.
    187   if (isDead) {
    188     NumTailDupRemoved += MBB->size();
    189     removeDeadBlock(MBB, RemovalCallback);
    190     ++NumDeadBlocks;
    191   }
    192 
    193   // Update SSA form.
    194   if (!SSAUpdateVRs.empty()) {
    195     for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
    196       unsigned VReg = SSAUpdateVRs[i];
    197       SSAUpdate.Initialize(VReg);
    198 
    199       // If the original definition is still around, add it as an available
    200       // value.
    201       MachineInstr *DefMI = MRI->getVRegDef(VReg);
    202       MachineBasicBlock *DefBB = nullptr;
    203       if (DefMI) {
    204         DefBB = DefMI->getParent();
    205         SSAUpdate.AddAvailableValue(DefBB, VReg);
    206       }
    207 
    208       // Add the new vregs as available values.
    209       DenseMap<Register, AvailableValsTy>::iterator LI =
    210           SSAUpdateVals.find(VReg);
    211       for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
    212         MachineBasicBlock *SrcBB = LI->second[j].first;
    213         Register SrcReg = LI->second[j].second;
    214         SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
    215       }
    216 
    217       // Rewrite uses that are outside of the original def's block.
    218       MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
    219       while (UI != MRI->use_end()) {
    220         MachineOperand &UseMO = *UI;
    221         MachineInstr *UseMI = UseMO.getParent();
    222         ++UI;
    223         if (UseMI->isDebugValue()) {
    224           // SSAUpdate can replace the use with an undef. That creates
    225           // a debug instruction that is a kill.
    226           // FIXME: Should it SSAUpdate job to delete debug instructions
    227           // instead of replacing the use with undef?
    228           UseMI->eraseFromParent();
    229           continue;
    230         }
    231         if (UseMI->getParent() == DefBB && !UseMI->isPHI())
    232           continue;
    233         SSAUpdate.RewriteUse(UseMO);
    234       }
    235     }
    236 
    237     SSAUpdateVRs.clear();
    238     SSAUpdateVals.clear();
    239   }
    240 
    241   // Eliminate some of the copies inserted by tail duplication to maintain
    242   // SSA form.
    243   for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
    244     MachineInstr *Copy = Copies[i];
    245     if (!Copy->isCopy())
    246       continue;
    247     Register Dst = Copy->getOperand(0).getReg();
    248     Register Src = Copy->getOperand(1).getReg();
    249     if (MRI->hasOneNonDBGUse(Src) &&
    250         MRI->constrainRegClass(Src, MRI->getRegClass(Dst))) {
    251       // Copy is the only use. Do trivial copy propagation here.
    252       MRI->replaceRegWith(Dst, Src);
    253       Copy->eraseFromParent();
    254     }
    255   }
    256 
    257   if (NewPHIs.size())
    258     NumAddedPHIs += NewPHIs.size();
    259 
    260   if (DuplicatedPreds)
    261     *DuplicatedPreds = std::move(TDBBs);
    262 
    263   return true;
    264 }
    265 
    266 /// Look for small blocks that are unconditionally branched to and do not fall
    267 /// through. Tail-duplicate their instructions into their predecessors to
    268 /// eliminate (dynamic) branches.
    269 bool TailDuplicator::tailDuplicateBlocks() {
    270   bool MadeChange = false;
    271 
    272   if (PreRegAlloc && TailDupVerify) {
    273     LLVM_DEBUG(dbgs() << "\n*** Before tail-duplicating\n");
    274     VerifyPHIs(*MF, true);
    275   }
    276 
    277   for (MachineFunction::iterator I = ++MF->begin(), E = MF->end(); I != E;) {
    278     MachineBasicBlock *MBB = &*I++;
    279 
    280     if (NumTails == TailDupLimit)
    281       break;
    282 
    283     bool IsSimple = isSimpleBB(MBB);
    284 
    285     if (!shouldTailDuplicate(IsSimple, *MBB))
    286       continue;
    287 
    288     MadeChange |= tailDuplicateAndUpdate(IsSimple, MBB, nullptr);
    289   }
    290 
    291   if (PreRegAlloc && TailDupVerify)
    292     VerifyPHIs(*MF, false);
    293 
    294   return MadeChange;
    295 }
    296 
    297 static bool isDefLiveOut(Register Reg, MachineBasicBlock *BB,
    298                          const MachineRegisterInfo *MRI) {
    299   for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
    300     if (UseMI.isDebugValue())
    301       continue;
    302     if (UseMI.getParent() != BB)
    303       return true;
    304   }
    305   return false;
    306 }
    307 
    308 static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
    309   for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
    310     if (MI->getOperand(i + 1).getMBB() == SrcBB)
    311       return i;
    312   return 0;
    313 }
    314 
    315 // Remember which registers are used by phis in this block. This is
    316 // used to determine which registers are liveout while modifying the
    317 // block (which is why we need to copy the information).
    318 static void getRegsUsedByPHIs(const MachineBasicBlock &BB,
    319                               DenseSet<Register> *UsedByPhi) {
    320   for (const auto &MI : BB) {
    321     if (!MI.isPHI())
    322       break;
    323     for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
    324       Register SrcReg = MI.getOperand(i).getReg();
    325       UsedByPhi->insert(SrcReg);
    326     }
    327   }
    328 }
    329 
    330 /// Add a definition and source virtual registers pair for SSA update.
    331 void TailDuplicator::addSSAUpdateEntry(Register OrigReg, Register NewReg,
    332                                        MachineBasicBlock *BB) {
    333   DenseMap<Register, AvailableValsTy>::iterator LI =
    334       SSAUpdateVals.find(OrigReg);
    335   if (LI != SSAUpdateVals.end())
    336     LI->second.push_back(std::make_pair(BB, NewReg));
    337   else {
    338     AvailableValsTy Vals;
    339     Vals.push_back(std::make_pair(BB, NewReg));
    340     SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
    341     SSAUpdateVRs.push_back(OrigReg);
    342   }
    343 }
    344 
    345 /// Process PHI node in TailBB by turning it into a copy in PredBB. Remember the
    346 /// source register that's contributed by PredBB and update SSA update map.
    347 void TailDuplicator::processPHI(
    348     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
    349     DenseMap<Register, RegSubRegPair> &LocalVRMap,
    350     SmallVectorImpl<std::pair<Register, RegSubRegPair>> &Copies,
    351     const DenseSet<Register> &RegsUsedByPhi, bool Remove) {
    352   Register DefReg = MI->getOperand(0).getReg();
    353   unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
    354   assert(SrcOpIdx && "Unable to find matching PHI source?");
    355   Register SrcReg = MI->getOperand(SrcOpIdx).getReg();
    356   unsigned SrcSubReg = MI->getOperand(SrcOpIdx).getSubReg();
    357   const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
    358   LocalVRMap.insert(std::make_pair(DefReg, RegSubRegPair(SrcReg, SrcSubReg)));
    359 
    360   // Insert a copy from source to the end of the block. The def register is the
    361   // available value liveout of the block.
    362   Register NewDef = MRI->createVirtualRegister(RC);
    363   Copies.push_back(std::make_pair(NewDef, RegSubRegPair(SrcReg, SrcSubReg)));
    364   if (isDefLiveOut(DefReg, TailBB, MRI) || RegsUsedByPhi.count(DefReg))
    365     addSSAUpdateEntry(DefReg, NewDef, PredBB);
    366 
    367   if (!Remove)
    368     return;
    369 
    370   // Remove PredBB from the PHI node.
    371   MI->RemoveOperand(SrcOpIdx + 1);
    372   MI->RemoveOperand(SrcOpIdx);
    373   if (MI->getNumOperands() == 1)
    374     MI->eraseFromParent();
    375 }
    376 
    377 /// Duplicate a TailBB instruction to PredBB and update
    378 /// the source operands due to earlier PHI translation.
    379 void TailDuplicator::duplicateInstruction(
    380     MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB,
    381     DenseMap<Register, RegSubRegPair> &LocalVRMap,
    382     const DenseSet<Register> &UsedByPhi) {
    383   // Allow duplication of CFI instructions.
    384   if (MI->isCFIInstruction()) {
    385     BuildMI(*PredBB, PredBB->end(), PredBB->findDebugLoc(PredBB->begin()),
    386       TII->get(TargetOpcode::CFI_INSTRUCTION)).addCFIIndex(
    387       MI->getOperand(0).getCFIIndex());
    388     return;
    389   }
    390   MachineInstr &NewMI = TII->duplicate(*PredBB, PredBB->end(), *MI);
    391   if (PreRegAlloc) {
    392     for (unsigned i = 0, e = NewMI.getNumOperands(); i != e; ++i) {
    393       MachineOperand &MO = NewMI.getOperand(i);
    394       if (!MO.isReg())
    395         continue;
    396       Register Reg = MO.getReg();
    397       if (!Register::isVirtualRegister(Reg))
    398         continue;
    399       if (MO.isDef()) {
    400         const TargetRegisterClass *RC = MRI->getRegClass(Reg);
    401         Register NewReg = MRI->createVirtualRegister(RC);
    402         MO.setReg(NewReg);
    403         LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
    404         if (isDefLiveOut(Reg, TailBB, MRI) || UsedByPhi.count(Reg))
    405           addSSAUpdateEntry(Reg, NewReg, PredBB);
    406       } else {
    407         auto VI = LocalVRMap.find(Reg);
    408         if (VI != LocalVRMap.end()) {
    409           // Need to make sure that the register class of the mapped register
    410           // will satisfy the constraints of the class of the register being
    411           // replaced.
    412           auto *OrigRC = MRI->getRegClass(Reg);
    413           auto *MappedRC = MRI->getRegClass(VI->second.Reg);
    414           const TargetRegisterClass *ConstrRC;
    415           if (VI->second.SubReg != 0) {
    416             ConstrRC = TRI->getMatchingSuperRegClass(MappedRC, OrigRC,
    417                                                      VI->second.SubReg);
    418             if (ConstrRC) {
    419               // The actual constraining (as in "find appropriate new class")
    420               // is done by getMatchingSuperRegClass, so now we only need to
    421               // change the class of the mapped register.
    422               MRI->setRegClass(VI->second.Reg, ConstrRC);
    423             }
    424           } else {
    425             // For mapped registers that do not have sub-registers, simply
    426             // restrict their class to match the original one.
    427             ConstrRC = MRI->constrainRegClass(VI->second.Reg, OrigRC);
    428           }
    429 
    430           if (ConstrRC) {
    431             // If the class constraining succeeded, we can simply replace
    432             // the old register with the mapped one.
    433             MO.setReg(VI->second.Reg);
    434             // We have Reg -> VI.Reg:VI.SubReg, so if Reg is used with a
    435             // sub-register, we need to compose the sub-register indices.
    436             MO.setSubReg(TRI->composeSubRegIndices(MO.getSubReg(),
    437                                                    VI->second.SubReg));
    438           } else {
    439             // The direct replacement is not possible, due to failing register
    440             // class constraints. An explicit COPY is necessary. Create one
    441             // that can be reused
    442             auto *NewRC = MI->getRegClassConstraint(i, TII, TRI);
    443             if (NewRC == nullptr)
    444               NewRC = OrigRC;
    445             Register NewReg = MRI->createVirtualRegister(NewRC);
    446             BuildMI(*PredBB, NewMI, NewMI.getDebugLoc(),
    447                     TII->get(TargetOpcode::COPY), NewReg)
    448                 .addReg(VI->second.Reg, 0, VI->second.SubReg);
    449             LocalVRMap.erase(VI);
    450             LocalVRMap.insert(std::make_pair(Reg, RegSubRegPair(NewReg, 0)));
    451             MO.setReg(NewReg);
    452             // The composed VI.Reg:VI.SubReg is replaced with NewReg, which
    453             // is equivalent to the whole register Reg. Hence, Reg:subreg
    454             // is same as NewReg:subreg, so keep the sub-register index
    455             // unchanged.
    456           }
    457           // Clear any kill flags from this operand.  The new register could
    458           // have uses after this one, so kills are not valid here.
    459           MO.setIsKill(false);
    460         }
    461       }
    462     }
    463   }
    464 }
    465 
    466 /// After FromBB is tail duplicated into its predecessor blocks, the successors
    467 /// have gained new predecessors. Update the PHI instructions in them
    468 /// accordingly.
    469 void TailDuplicator::updateSuccessorsPHIs(
    470     MachineBasicBlock *FromBB, bool isDead,
    471     SmallVectorImpl<MachineBasicBlock *> &TDBBs,
    472     SmallSetVector<MachineBasicBlock *, 8> &Succs) {
    473   for (MachineBasicBlock *SuccBB : Succs) {
    474     for (MachineInstr &MI : *SuccBB) {
    475       if (!MI.isPHI())
    476         break;
    477       MachineInstrBuilder MIB(*FromBB->getParent(), MI);
    478       unsigned Idx = 0;
    479       for (unsigned i = 1, e = MI.getNumOperands(); i != e; i += 2) {
    480         MachineOperand &MO = MI.getOperand(i + 1);
    481         if (MO.getMBB() == FromBB) {
    482           Idx = i;
    483           break;
    484         }
    485       }
    486 
    487       assert(Idx != 0);
    488       MachineOperand &MO0 = MI.getOperand(Idx);
    489       Register Reg = MO0.getReg();
    490       if (isDead) {
    491         // Folded into the previous BB.
    492         // There could be duplicate phi source entries. FIXME: Should sdisel
    493         // or earlier pass fixed this?
    494         for (unsigned i = MI.getNumOperands() - 2; i != Idx; i -= 2) {
    495           MachineOperand &MO = MI.getOperand(i + 1);
    496           if (MO.getMBB() == FromBB) {
    497             MI.RemoveOperand(i + 1);
    498             MI.RemoveOperand(i);
    499           }
    500         }
    501       } else
    502         Idx = 0;
    503 
    504       // If Idx is set, the operands at Idx and Idx+1 must be removed.
    505       // We reuse the location to avoid expensive RemoveOperand calls.
    506 
    507       DenseMap<Register, AvailableValsTy>::iterator LI =
    508           SSAUpdateVals.find(Reg);
    509       if (LI != SSAUpdateVals.end()) {
    510         // This register is defined in the tail block.
    511         for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
    512           MachineBasicBlock *SrcBB = LI->second[j].first;
    513           // If we didn't duplicate a bb into a particular predecessor, we
    514           // might still have added an entry to SSAUpdateVals to correcly
    515           // recompute SSA. If that case, avoid adding a dummy extra argument
    516           // this PHI.
    517           if (!SrcBB->isSuccessor(SuccBB))
    518             continue;
    519 
    520           Register SrcReg = LI->second[j].second;
    521           if (Idx != 0) {
    522             MI.getOperand(Idx).setReg(SrcReg);
    523             MI.getOperand(Idx + 1).setMBB(SrcBB);
    524             Idx = 0;
    525           } else {
    526             MIB.addReg(SrcReg).addMBB(SrcBB);
    527           }
    528         }
    529       } else {
    530         // Live in tail block, must also be live in predecessors.
    531         for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
    532           MachineBasicBlock *SrcBB = TDBBs[j];
    533           if (Idx != 0) {
    534             MI.getOperand(Idx).setReg(Reg);
    535             MI.getOperand(Idx + 1).setMBB(SrcBB);
    536             Idx = 0;
    537           } else {
    538             MIB.addReg(Reg).addMBB(SrcBB);
    539           }
    540         }
    541       }
    542       if (Idx != 0) {
    543         MI.RemoveOperand(Idx + 1);
    544         MI.RemoveOperand(Idx);
    545       }
    546     }
    547   }
    548 }
    549 
    550 /// Determine if it is profitable to duplicate this block.
    551 bool TailDuplicator::shouldTailDuplicate(bool IsSimple,
    552                                          MachineBasicBlock &TailBB) {
    553   // When doing tail-duplication during layout, the block ordering is in flux,
    554   // so canFallThrough returns a result based on incorrect information and
    555   // should just be ignored.
    556   if (!LayoutMode && TailBB.canFallThrough())
    557     return false;
    558 
    559   // Don't try to tail-duplicate single-block loops.
    560   if (TailBB.isSuccessor(&TailBB))
    561     return false;
    562 
    563   // Set the limit on the cost to duplicate. When optimizing for size,
    564   // duplicate only one, because one branch instruction can be eliminated to
    565   // compensate for the duplication.
    566   unsigned MaxDuplicateCount;
    567   bool OptForSize = MF->getFunction().hasOptSize() ||
    568                     llvm::shouldOptimizeForSize(&TailBB, PSI, MBFI);
    569   if (TailDupSize == 0)
    570     MaxDuplicateCount = TailDuplicateSize;
    571   else
    572     MaxDuplicateCount = TailDupSize;
    573   if (OptForSize)
    574     MaxDuplicateCount = 1;
    575 
    576   // If the block to be duplicated ends in an unanalyzable fallthrough, don't
    577   // duplicate it.
    578   // A similar check is necessary in MachineBlockPlacement to make sure pairs of
    579   // blocks with unanalyzable fallthrough get layed out contiguously.
    580   MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
    581   SmallVector<MachineOperand, 4> PredCond;
    582   if (TII->analyzeBranch(TailBB, PredTBB, PredFBB, PredCond) &&
    583       TailBB.canFallThrough())
    584     return false;
    585 
    586   // If the target has hardware branch prediction that can handle indirect
    587   // branches, duplicating them can often make them predictable when there
    588   // are common paths through the code.  The limit needs to be high enough
    589   // to allow undoing the effects of tail merging and other optimizations
    590   // that rearrange the predecessors of the indirect branch.
    591 
    592   bool HasIndirectbr = false;
    593   if (!TailBB.empty())
    594     HasIndirectbr = TailBB.back().isIndirectBranch();
    595 
    596   if (HasIndirectbr && PreRegAlloc)
    597     MaxDuplicateCount = TailDupIndirectBranchSize;
    598 
    599   // Check the instructions in the block to determine whether tail-duplication
    600   // is invalid or unlikely to be profitable.
    601   unsigned InstrCount = 0;
    602   for (MachineInstr &MI : TailBB) {
    603     // Non-duplicable things shouldn't be tail-duplicated.
    604     // CFI instructions are marked as non-duplicable, because Darwin compact
    605     // unwind info emission can't handle multiple prologue setups. In case of
    606     // DWARF, allow them be duplicated, so that their existence doesn't prevent
    607     // tail duplication of some basic blocks, that would be duplicated otherwise.
    608     if (MI.isNotDuplicable() &&
    609         (TailBB.getParent()->getTarget().getTargetTriple().isOSDarwin() ||
    610         !MI.isCFIInstruction()))
    611       return false;
    612 
    613     // Convergent instructions can be duplicated only if doing so doesn't add
    614     // new control dependencies, which is what we're going to do here.
    615     if (MI.isConvergent())
    616       return false;
    617 
    618     // Do not duplicate 'return' instructions if this is a pre-regalloc run.
    619     // A return may expand into a lot more instructions (e.g. reload of callee
    620     // saved registers) after PEI.
    621     if (PreRegAlloc && MI.isReturn())
    622       return false;
    623 
    624     // Avoid duplicating calls before register allocation. Calls presents a
    625     // barrier to register allocation so duplicating them may end up increasing
    626     // spills.
    627     if (PreRegAlloc && MI.isCall())
    628       return false;
    629 
    630     // TailDuplicator::appendCopies will erroneously place COPYs after
    631     // INLINEASM_BR instructions after 4b0aa5724fea, which demonstrates the same
    632     // bug that was fixed in f7a53d82c090.
    633     // FIXME: Use findPHICopyInsertPoint() to find the correct insertion point
    634     //        for the COPY when replacing PHIs.
    635     if (MI.getOpcode() == TargetOpcode::INLINEASM_BR)
    636       return false;
    637 
    638     if (MI.isBundle())
    639       InstrCount += MI.getBundleSize();
    640     else if (!MI.isPHI() && !MI.isMetaInstruction())
    641       InstrCount += 1;
    642 
    643     if (InstrCount > MaxDuplicateCount)
    644       return false;
    645   }
    646 
    647   // Check if any of the successors of TailBB has a PHI node in which the
    648   // value corresponding to TailBB uses a subregister.
    649   // If a phi node uses a register paired with a subregister, the actual
    650   // "value type" of the phi may differ from the type of the register without
    651   // any subregisters. Due to a bug, tail duplication may add a new operand
    652   // without a necessary subregister, producing an invalid code. This is
    653   // demonstrated by test/CodeGen/Hexagon/tail-dup-subreg-abort.ll.
    654   // Disable tail duplication for this case for now, until the problem is
    655   // fixed.
    656   for (auto SB : TailBB.successors()) {
    657     for (auto &I : *SB) {
    658       if (!I.isPHI())
    659         break;
    660       unsigned Idx = getPHISrcRegOpIdx(&I, &TailBB);
    661       assert(Idx != 0);
    662       MachineOperand &PU = I.getOperand(Idx);
    663       if (PU.getSubReg() != 0)
    664         return false;
    665     }
    666   }
    667 
    668   if (HasIndirectbr && PreRegAlloc)
    669     return true;
    670 
    671   if (IsSimple)
    672     return true;
    673 
    674   if (!PreRegAlloc)
    675     return true;
    676 
    677   return canCompletelyDuplicateBB(TailBB);
    678 }
    679 
    680 /// True if this BB has only one unconditional jump.
    681 bool TailDuplicator::isSimpleBB(MachineBasicBlock *TailBB) {
    682   if (TailBB->succ_size() != 1)
    683     return false;
    684   if (TailBB->pred_empty())
    685     return false;
    686   MachineBasicBlock::iterator I = TailBB->getFirstNonDebugInstr(true);
    687   if (I == TailBB->end())
    688     return true;
    689   return I->isUnconditionalBranch();
    690 }
    691 
    692 static bool bothUsedInPHI(const MachineBasicBlock &A,
    693                           const SmallPtrSet<MachineBasicBlock *, 8> &SuccsB) {
    694   for (MachineBasicBlock *BB : A.successors())
    695     if (SuccsB.count(BB) && !BB->empty() && BB->begin()->isPHI())
    696       return true;
    697 
    698   return false;
    699 }
    700 
    701 bool TailDuplicator::canCompletelyDuplicateBB(MachineBasicBlock &BB) {
    702   for (MachineBasicBlock *PredBB : BB.predecessors()) {
    703     if (PredBB->succ_size() > 1)
    704       return false;
    705 
    706     MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
    707     SmallVector<MachineOperand, 4> PredCond;
    708     if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
    709       return false;
    710 
    711     if (!PredCond.empty())
    712       return false;
    713   }
    714   return true;
    715 }
    716 
    717 bool TailDuplicator::duplicateSimpleBB(
    718     MachineBasicBlock *TailBB, SmallVectorImpl<MachineBasicBlock *> &TDBBs,
    719     const DenseSet<Register> &UsedByPhi,
    720     SmallVectorImpl<MachineInstr *> &Copies) {
    721   SmallPtrSet<MachineBasicBlock *, 8> Succs(TailBB->succ_begin(),
    722                                             TailBB->succ_end());
    723   SmallVector<MachineBasicBlock *, 8> Preds(TailBB->predecessors());
    724   bool Changed = false;
    725   for (MachineBasicBlock *PredBB : Preds) {
    726     if (PredBB->hasEHPadSuccessor() || PredBB->mayHaveInlineAsmBr())
    727       continue;
    728 
    729     if (bothUsedInPHI(*PredBB, Succs))
    730       continue;
    731 
    732     MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
    733     SmallVector<MachineOperand, 4> PredCond;
    734     if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
    735       continue;
    736 
    737     Changed = true;
    738     LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
    739                       << "From simple Succ: " << *TailBB);
    740 
    741     MachineBasicBlock *NewTarget = *TailBB->succ_begin();
    742     MachineBasicBlock *NextBB = PredBB->getNextNode();
    743 
    744     // Make PredFBB explicit.
    745     if (PredCond.empty())
    746       PredFBB = PredTBB;
    747 
    748     // Make fall through explicit.
    749     if (!PredTBB)
    750       PredTBB = NextBB;
    751     if (!PredFBB)
    752       PredFBB = NextBB;
    753 
    754     // Redirect
    755     if (PredFBB == TailBB)
    756       PredFBB = NewTarget;
    757     if (PredTBB == TailBB)
    758       PredTBB = NewTarget;
    759 
    760     // Make the branch unconditional if possible
    761     if (PredTBB == PredFBB) {
    762       PredCond.clear();
    763       PredFBB = nullptr;
    764     }
    765 
    766     // Avoid adding fall through branches.
    767     if (PredFBB == NextBB)
    768       PredFBB = nullptr;
    769     if (PredTBB == NextBB && PredFBB == nullptr)
    770       PredTBB = nullptr;
    771 
    772     auto DL = PredBB->findBranchDebugLoc();
    773     TII->removeBranch(*PredBB);
    774 
    775     if (!PredBB->isSuccessor(NewTarget))
    776       PredBB->replaceSuccessor(TailBB, NewTarget);
    777     else {
    778       PredBB->removeSuccessor(TailBB, true);
    779       assert(PredBB->succ_size() <= 1);
    780     }
    781 
    782     // For AutoFDO, since BB is going to be removed, we won't be able to sample
    783     // it. To avoid assigning a zero weight for BB, move all its pseudo probes
    784     // into Succ and mark them dangling. This should allow the counts inference
    785     // a chance to get a more reasonable weight for BB.
    786     TailBB->moveAndDanglePseudoProbes(PredBB);
    787 
    788     if (PredTBB)
    789       TII->insertBranch(*PredBB, PredTBB, PredFBB, PredCond, DL);
    790 
    791     TDBBs.push_back(PredBB);
    792   }
    793   return Changed;
    794 }
    795 
    796 bool TailDuplicator::canTailDuplicate(MachineBasicBlock *TailBB,
    797                                       MachineBasicBlock *PredBB) {
    798   // EH edges are ignored by analyzeBranch.
    799   if (PredBB->succ_size() > 1)
    800     return false;
    801 
    802   MachineBasicBlock *PredTBB = nullptr, *PredFBB = nullptr;
    803   SmallVector<MachineOperand, 4> PredCond;
    804   if (TII->analyzeBranch(*PredBB, PredTBB, PredFBB, PredCond))
    805     return false;
    806   if (!PredCond.empty())
    807     return false;
    808   return true;
    809 }
    810 
    811 /// If it is profitable, duplicate TailBB's contents in each
    812 /// of its predecessors.
    813 /// \p IsSimple result of isSimpleBB
    814 /// \p TailBB   Block to be duplicated.
    815 /// \p ForcedLayoutPred  When non-null, use this block as the layout predecessor
    816 ///                      instead of the previous block in MF's order.
    817 /// \p TDBBs             A vector to keep track of all blocks tail-duplicated
    818 ///                      into.
    819 /// \p Copies            A vector of copy instructions inserted. Used later to
    820 ///                      walk all the inserted copies and remove redundant ones.
    821 bool TailDuplicator::tailDuplicate(bool IsSimple, MachineBasicBlock *TailBB,
    822                           MachineBasicBlock *ForcedLayoutPred,
    823                           SmallVectorImpl<MachineBasicBlock *> &TDBBs,
    824                           SmallVectorImpl<MachineInstr *> &Copies,
    825                           SmallVectorImpl<MachineBasicBlock *> *CandidatePtr) {
    826   LLVM_DEBUG(dbgs() << "\n*** Tail-duplicating " << printMBBReference(*TailBB)
    827                     << '\n');
    828 
    829   bool ShouldUpdateTerminators = TailBB->canFallThrough();
    830 
    831   DenseSet<Register> UsedByPhi;
    832   getRegsUsedByPHIs(*TailBB, &UsedByPhi);
    833 
    834   if (IsSimple)
    835     return duplicateSimpleBB(TailBB, TDBBs, UsedByPhi, Copies);
    836 
    837   // Iterate through all the unique predecessors and tail-duplicate this
    838   // block into them, if possible. Copying the list ahead of time also
    839   // avoids trouble with the predecessor list reallocating.
    840   bool Changed = false;
    841   SmallSetVector<MachineBasicBlock *, 8> Preds;
    842   if (CandidatePtr)
    843     Preds.insert(CandidatePtr->begin(), CandidatePtr->end());
    844   else
    845     Preds.insert(TailBB->pred_begin(), TailBB->pred_end());
    846 
    847   for (MachineBasicBlock *PredBB : Preds) {
    848     assert(TailBB != PredBB &&
    849            "Single-block loop should have been rejected earlier!");
    850 
    851     if (!canTailDuplicate(TailBB, PredBB))
    852       continue;
    853 
    854     // Don't duplicate into a fall-through predecessor (at least for now).
    855     // If profile is available, findDuplicateCandidates can choose better
    856     // fall-through predecessor.
    857     if (!(MF->getFunction().hasProfileData() && LayoutMode)) {
    858       bool IsLayoutSuccessor = false;
    859       if (ForcedLayoutPred)
    860         IsLayoutSuccessor = (ForcedLayoutPred == PredBB);
    861       else if (PredBB->isLayoutSuccessor(TailBB) && PredBB->canFallThrough())
    862         IsLayoutSuccessor = true;
    863       if (IsLayoutSuccessor)
    864         continue;
    865     }
    866 
    867     LLVM_DEBUG(dbgs() << "\nTail-duplicating into PredBB: " << *PredBB
    868                       << "From Succ: " << *TailBB);
    869 
    870     TDBBs.push_back(PredBB);
    871 
    872     // Remove PredBB's unconditional branch.
    873     TII->removeBranch(*PredBB);
    874 
    875     // Clone the contents of TailBB into PredBB.
    876     DenseMap<Register, RegSubRegPair> LocalVRMap;
    877     SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
    878     for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
    879          I != E; /* empty */) {
    880       MachineInstr *MI = &*I;
    881       ++I;
    882       if (MI->isPHI()) {
    883         // Replace the uses of the def of the PHI with the register coming
    884         // from PredBB.
    885         processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, true);
    886       } else {
    887         // Replace def of virtual registers with new registers, and update
    888         // uses with PHI source register or the new registers.
    889         duplicateInstruction(MI, TailBB, PredBB, LocalVRMap, UsedByPhi);
    890       }
    891     }
    892     appendCopies(PredBB, CopyInfos, Copies);
    893 
    894     NumTailDupAdded += TailBB->size() - 1; // subtract one for removed branch
    895 
    896     // Update the CFG.
    897     PredBB->removeSuccessor(PredBB->succ_begin());
    898     assert(PredBB->succ_empty() &&
    899            "TailDuplicate called on block with multiple successors!");
    900     for (MachineBasicBlock *Succ : TailBB->successors())
    901       PredBB->addSuccessor(Succ, MBPI->getEdgeProbability(TailBB, Succ));
    902 
    903     // Update branches in pred to jump to tail's layout successor if needed.
    904     if (ShouldUpdateTerminators)
    905       PredBB->updateTerminator(TailBB->getNextNode());
    906 
    907     Changed = true;
    908     ++NumTailDups;
    909   }
    910 
    911   // If TailBB was duplicated into all its predecessors except for the prior
    912   // block, which falls through unconditionally, move the contents of this
    913   // block into the prior block.
    914   MachineBasicBlock *PrevBB = ForcedLayoutPred;
    915   if (!PrevBB)
    916     PrevBB = &*std::prev(TailBB->getIterator());
    917   MachineBasicBlock *PriorTBB = nullptr, *PriorFBB = nullptr;
    918   SmallVector<MachineOperand, 4> PriorCond;
    919   // This has to check PrevBB->succ_size() because EH edges are ignored by
    920   // analyzeBranch.
    921   if (PrevBB->succ_size() == 1 &&
    922       // Layout preds are not always CFG preds. Check.
    923       *PrevBB->succ_begin() == TailBB &&
    924       !TII->analyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond) &&
    925       PriorCond.empty() &&
    926       (!PriorTBB || PriorTBB == TailBB) &&
    927       TailBB->pred_size() == 1 &&
    928       !TailBB->hasAddressTaken()) {
    929     LLVM_DEBUG(dbgs() << "\nMerging into block: " << *PrevBB
    930                       << "From MBB: " << *TailBB);
    931     // There may be a branch to the layout successor. This is unlikely but it
    932     // happens. The correct thing to do is to remove the branch before
    933     // duplicating the instructions in all cases.
    934     TII->removeBranch(*PrevBB);
    935     if (PreRegAlloc) {
    936       DenseMap<Register, RegSubRegPair> LocalVRMap;
    937       SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
    938       MachineBasicBlock::iterator I = TailBB->begin();
    939       // Process PHI instructions first.
    940       while (I != TailBB->end() && I->isPHI()) {
    941         // Replace the uses of the def of the PHI with the register coming
    942         // from PredBB.
    943         MachineInstr *MI = &*I++;
    944         processPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos, UsedByPhi, true);
    945       }
    946 
    947       // Now copy the non-PHI instructions.
    948       while (I != TailBB->end()) {
    949         // Replace def of virtual registers with new registers, and update
    950         // uses with PHI source register or the new registers.
    951         MachineInstr *MI = &*I++;
    952         assert(!MI->isBundle() && "Not expecting bundles before regalloc!");
    953         duplicateInstruction(MI, TailBB, PrevBB, LocalVRMap, UsedByPhi);
    954         MI->eraseFromParent();
    955       }
    956       appendCopies(PrevBB, CopyInfos, Copies);
    957     } else {
    958       TII->removeBranch(*PrevBB);
    959       // No PHIs to worry about, just splice the instructions over.
    960       PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
    961     }
    962     PrevBB->removeSuccessor(PrevBB->succ_begin());
    963     assert(PrevBB->succ_empty());
    964     PrevBB->transferSuccessors(TailBB);
    965 
    966     // Update branches in PrevBB based on Tail's layout successor.
    967     if (ShouldUpdateTerminators)
    968       PrevBB->updateTerminator(TailBB->getNextNode());
    969 
    970     TDBBs.push_back(PrevBB);
    971     Changed = true;
    972   }
    973 
    974   // If this is after register allocation, there are no phis to fix.
    975   if (!PreRegAlloc)
    976     return Changed;
    977 
    978   // If we made no changes so far, we are safe.
    979   if (!Changed)
    980     return Changed;
    981 
    982   // Handle the nasty case in that we duplicated a block that is part of a loop
    983   // into some but not all of its predecessors. For example:
    984   //    1 -> 2 <-> 3                 |
    985   //          \                      |
    986   //           \---> rest            |
    987   // if we duplicate 2 into 1 but not into 3, we end up with
    988   // 12 -> 3 <-> 2 -> rest           |
    989   //   \             /               |
    990   //    \----->-----/                |
    991   // If there was a "var = phi(1, 3)" in 2, it has to be ultimately replaced
    992   // with a phi in 3 (which now dominates 2).
    993   // What we do here is introduce a copy in 3 of the register defined by the
    994   // phi, just like when we are duplicating 2 into 3, but we don't copy any
    995   // real instructions or remove the 3 -> 2 edge from the phi in 2.
    996   for (MachineBasicBlock *PredBB : Preds) {
    997     if (is_contained(TDBBs, PredBB))
    998       continue;
    999 
   1000     // EH edges
   1001     if (PredBB->succ_size() != 1)
   1002       continue;
   1003 
   1004     DenseMap<Register, RegSubRegPair> LocalVRMap;
   1005     SmallVector<std::pair<Register, RegSubRegPair>, 4> CopyInfos;
   1006     MachineBasicBlock::iterator I = TailBB->begin();
   1007     // Process PHI instructions first.
   1008     while (I != TailBB->end() && I->isPHI()) {
   1009       // Replace the uses of the def of the PHI with the register coming
   1010       // from PredBB.
   1011       MachineInstr *MI = &*I++;
   1012       processPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos, UsedByPhi, false);
   1013     }
   1014     appendCopies(PredBB, CopyInfos, Copies);
   1015   }
   1016 
   1017   return Changed;
   1018 }
   1019 
   1020 /// At the end of the block \p MBB generate COPY instructions between registers
   1021 /// described by \p CopyInfos. Append resulting instructions to \p Copies.
   1022 void TailDuplicator::appendCopies(MachineBasicBlock *MBB,
   1023       SmallVectorImpl<std::pair<Register, RegSubRegPair>> &CopyInfos,
   1024       SmallVectorImpl<MachineInstr*> &Copies) {
   1025   MachineBasicBlock::iterator Loc = MBB->getFirstTerminator();
   1026   const MCInstrDesc &CopyD = TII->get(TargetOpcode::COPY);
   1027   for (auto &CI : CopyInfos) {
   1028     auto C = BuildMI(*MBB, Loc, DebugLoc(), CopyD, CI.first)
   1029                 .addReg(CI.second.Reg, 0, CI.second.SubReg);
   1030     Copies.push_back(C);
   1031   }
   1032 }
   1033 
   1034 /// Remove the specified dead machine basic block from the function, updating
   1035 /// the CFG.
   1036 void TailDuplicator::removeDeadBlock(
   1037     MachineBasicBlock *MBB,
   1038     function_ref<void(MachineBasicBlock *)> *RemovalCallback) {
   1039   assert(MBB->pred_empty() && "MBB must be dead!");
   1040   LLVM_DEBUG(dbgs() << "\nRemoving MBB: " << *MBB);
   1041 
   1042   MachineFunction *MF = MBB->getParent();
   1043   // Update the call site info.
   1044   for (const MachineInstr &MI : *MBB)
   1045     if (MI.shouldUpdateCallSiteInfo())
   1046       MF->eraseCallSiteInfo(&MI);
   1047 
   1048   if (RemovalCallback)
   1049     (*RemovalCallback)(MBB);
   1050 
   1051   // Remove all successors.
   1052   while (!MBB->succ_empty())
   1053     MBB->removeSuccessor(MBB->succ_end() - 1);
   1054 
   1055   // Remove the block.
   1056   MBB->eraseFromParent();
   1057 }
   1058