Home | History | Annotate | Line # | Download | only in PowerPC
      1 //===-------------- PPCMIPeephole.cpp - MI Peephole Cleanups -------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===---------------------------------------------------------------------===//
      8 //
      9 // This pass performs peephole optimizations to clean up ugly code
     10 // sequences at the MachineInstruction layer.  It runs at the end of
     11 // the SSA phases, following VSX swap removal.  A pass of dead code
     12 // elimination follows this one for quick clean-up of any dead
     13 // instructions introduced here.  Although we could do this as callbacks
     14 // from the generic peephole pass, this would have a couple of bad
     15 // effects:  it might remove optimization opportunities for VSX swap
     16 // removal, and it would miss cleanups made possible following VSX
     17 // swap removal.
     18 //
     19 //===---------------------------------------------------------------------===//
     20 
     21 #include "MCTargetDesc/PPCMCTargetDesc.h"
     22 #include "MCTargetDesc/PPCPredicates.h"
     23 #include "PPC.h"
     24 #include "PPCInstrBuilder.h"
     25 #include "PPCInstrInfo.h"
     26 #include "PPCMachineFunctionInfo.h"
     27 #include "PPCTargetMachine.h"
     28 #include "llvm/ADT/Statistic.h"
     29 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
     30 #include "llvm/CodeGen/MachineDominators.h"
     31 #include "llvm/CodeGen/MachineFunctionPass.h"
     32 #include "llvm/CodeGen/MachineInstrBuilder.h"
     33 #include "llvm/CodeGen/MachinePostDominators.h"
     34 #include "llvm/CodeGen/MachineRegisterInfo.h"
     35 #include "llvm/InitializePasses.h"
     36 #include "llvm/Support/Debug.h"
     37 
     38 using namespace llvm;
     39 
     40 #define DEBUG_TYPE "ppc-mi-peepholes"
     41 
     42 STATISTIC(RemoveTOCSave, "Number of TOC saves removed");
     43 STATISTIC(MultiTOCSaves,
     44           "Number of functions with multiple TOC saves that must be kept");
     45 STATISTIC(NumTOCSavesInPrologue, "Number of TOC saves placed in the prologue");
     46 STATISTIC(NumEliminatedSExt, "Number of eliminated sign-extensions");
     47 STATISTIC(NumEliminatedZExt, "Number of eliminated zero-extensions");
     48 STATISTIC(NumOptADDLIs, "Number of optimized ADD instruction fed by LI");
     49 STATISTIC(NumConvertedToImmediateForm,
     50           "Number of instructions converted to their immediate form");
     51 STATISTIC(NumFunctionsEnteredInMIPeephole,
     52           "Number of functions entered in PPC MI Peepholes");
     53 STATISTIC(NumFixedPointIterations,
     54           "Number of fixed-point iterations converting reg-reg instructions "
     55           "to reg-imm ones");
     56 STATISTIC(NumRotatesCollapsed,
     57           "Number of pairs of rotate left, clear left/right collapsed");
     58 STATISTIC(NumEXTSWAndSLDICombined,
     59           "Number of pairs of EXTSW and SLDI combined as EXTSWSLI");
     60 STATISTIC(NumLoadImmZeroFoldedAndRemoved,
     61           "Number of LI(8) reg, 0 that are folded to r0 and removed");
     62 
     63 static cl::opt<bool>
     64 FixedPointRegToImm("ppc-reg-to-imm-fixed-point", cl::Hidden, cl::init(true),
     65                    cl::desc("Iterate to a fixed point when attempting to "
     66                             "convert reg-reg instructions to reg-imm"));
     67 
     68 static cl::opt<bool>
     69 ConvertRegReg("ppc-convert-rr-to-ri", cl::Hidden, cl::init(true),
     70               cl::desc("Convert eligible reg+reg instructions to reg+imm"));
     71 
     72 static cl::opt<bool>
     73     EnableSExtElimination("ppc-eliminate-signext",
     74                           cl::desc("enable elimination of sign-extensions"),
     75                           cl::init(false), cl::Hidden);
     76 
     77 static cl::opt<bool>
     78     EnableZExtElimination("ppc-eliminate-zeroext",
     79                           cl::desc("enable elimination of zero-extensions"),
     80                           cl::init(false), cl::Hidden);
     81 
     82 namespace {
     83 
     84 struct PPCMIPeephole : public MachineFunctionPass {
     85 
     86   static char ID;
     87   const PPCInstrInfo *TII;
     88   MachineFunction *MF;
     89   MachineRegisterInfo *MRI;
     90 
     91   PPCMIPeephole() : MachineFunctionPass(ID) {
     92     initializePPCMIPeepholePass(*PassRegistry::getPassRegistry());
     93   }
     94 
     95 private:
     96   MachineDominatorTree *MDT;
     97   MachinePostDominatorTree *MPDT;
     98   MachineBlockFrequencyInfo *MBFI;
     99   uint64_t EntryFreq;
    100 
    101   // Initialize class variables.
    102   void initialize(MachineFunction &MFParm);
    103 
    104   // Perform peepholes.
    105   bool simplifyCode(void);
    106 
    107   // Perform peepholes.
    108   bool eliminateRedundantCompare(void);
    109   bool eliminateRedundantTOCSaves(std::map<MachineInstr *, bool> &TOCSaves);
    110   bool combineSEXTAndSHL(MachineInstr &MI, MachineInstr *&ToErase);
    111   bool emitRLDICWhenLoweringJumpTables(MachineInstr &MI);
    112   void UpdateTOCSaves(std::map<MachineInstr *, bool> &TOCSaves,
    113                       MachineInstr *MI);
    114 
    115 public:
    116 
    117   void getAnalysisUsage(AnalysisUsage &AU) const override {
    118     AU.addRequired<MachineDominatorTree>();
    119     AU.addRequired<MachinePostDominatorTree>();
    120     AU.addRequired<MachineBlockFrequencyInfo>();
    121     AU.addPreserved<MachineDominatorTree>();
    122     AU.addPreserved<MachinePostDominatorTree>();
    123     AU.addPreserved<MachineBlockFrequencyInfo>();
    124     MachineFunctionPass::getAnalysisUsage(AU);
    125   }
    126 
    127   // Main entry point for this pass.
    128   bool runOnMachineFunction(MachineFunction &MF) override {
    129     initialize(MF);
    130     // At this point, TOC pointer should not be used in a function that uses
    131     // PC-Relative addressing.
    132     assert((MF.getRegInfo().use_empty(PPC::X2) ||
    133             !MF.getSubtarget<PPCSubtarget>().isUsingPCRelativeCalls()) &&
    134            "TOC pointer used in a function using PC-Relative addressing!");
    135     if (skipFunction(MF.getFunction()))
    136       return false;
    137     return simplifyCode();
    138   }
    139 };
    140 
    141 // Initialize class variables.
    142 void PPCMIPeephole::initialize(MachineFunction &MFParm) {
    143   MF = &MFParm;
    144   MRI = &MF->getRegInfo();
    145   MDT = &getAnalysis<MachineDominatorTree>();
    146   MPDT = &getAnalysis<MachinePostDominatorTree>();
    147   MBFI = &getAnalysis<MachineBlockFrequencyInfo>();
    148   EntryFreq = MBFI->getEntryFreq();
    149   TII = MF->getSubtarget<PPCSubtarget>().getInstrInfo();
    150   LLVM_DEBUG(dbgs() << "*** PowerPC MI peephole pass ***\n\n");
    151   LLVM_DEBUG(MF->dump());
    152 }
    153 
    154 static MachineInstr *getVRegDefOrNull(MachineOperand *Op,
    155                                       MachineRegisterInfo *MRI) {
    156   assert(Op && "Invalid Operand!");
    157   if (!Op->isReg())
    158     return nullptr;
    159 
    160   Register Reg = Op->getReg();
    161   if (!Register::isVirtualRegister(Reg))
    162     return nullptr;
    163 
    164   return MRI->getVRegDef(Reg);
    165 }
    166 
    167 // This function returns number of known zero bits in output of MI
    168 // starting from the most significant bit.
    169 static unsigned
    170 getKnownLeadingZeroCount(MachineInstr *MI, const PPCInstrInfo *TII) {
    171   unsigned Opcode = MI->getOpcode();
    172   if (Opcode == PPC::RLDICL || Opcode == PPC::RLDICL_rec ||
    173       Opcode == PPC::RLDCL || Opcode == PPC::RLDCL_rec)
    174     return MI->getOperand(3).getImm();
    175 
    176   if ((Opcode == PPC::RLDIC || Opcode == PPC::RLDIC_rec) &&
    177       MI->getOperand(3).getImm() <= 63 - MI->getOperand(2).getImm())
    178     return MI->getOperand(3).getImm();
    179 
    180   if ((Opcode == PPC::RLWINM || Opcode == PPC::RLWINM_rec ||
    181        Opcode == PPC::RLWNM || Opcode == PPC::RLWNM_rec ||
    182        Opcode == PPC::RLWINM8 || Opcode == PPC::RLWNM8) &&
    183       MI->getOperand(3).getImm() <= MI->getOperand(4).getImm())
    184     return 32 + MI->getOperand(3).getImm();
    185 
    186   if (Opcode == PPC::ANDI_rec) {
    187     uint16_t Imm = MI->getOperand(2).getImm();
    188     return 48 + countLeadingZeros(Imm);
    189   }
    190 
    191   if (Opcode == PPC::CNTLZW || Opcode == PPC::CNTLZW_rec ||
    192       Opcode == PPC::CNTTZW || Opcode == PPC::CNTTZW_rec ||
    193       Opcode == PPC::CNTLZW8 || Opcode == PPC::CNTTZW8)
    194     // The result ranges from 0 to 32.
    195     return 58;
    196 
    197   if (Opcode == PPC::CNTLZD || Opcode == PPC::CNTLZD_rec ||
    198       Opcode == PPC::CNTTZD || Opcode == PPC::CNTTZD_rec)
    199     // The result ranges from 0 to 64.
    200     return 57;
    201 
    202   if (Opcode == PPC::LHZ   || Opcode == PPC::LHZX  ||
    203       Opcode == PPC::LHZ8  || Opcode == PPC::LHZX8 ||
    204       Opcode == PPC::LHZU  || Opcode == PPC::LHZUX ||
    205       Opcode == PPC::LHZU8 || Opcode == PPC::LHZUX8)
    206     return 48;
    207 
    208   if (Opcode == PPC::LBZ   || Opcode == PPC::LBZX  ||
    209       Opcode == PPC::LBZ8  || Opcode == PPC::LBZX8 ||
    210       Opcode == PPC::LBZU  || Opcode == PPC::LBZUX ||
    211       Opcode == PPC::LBZU8 || Opcode == PPC::LBZUX8)
    212     return 56;
    213 
    214   if (TII->isZeroExtended(*MI))
    215     return 32;
    216 
    217   return 0;
    218 }
    219 
    220 // This function maintains a map for the pairs <TOC Save Instr, Keep>
    221 // Each time a new TOC save is encountered, it checks if any of the existing
    222 // ones are dominated by the new one. If so, it marks the existing one as
    223 // redundant by setting it's entry in the map as false. It then adds the new
    224 // instruction to the map with either true or false depending on if any
    225 // existing instructions dominated the new one.
    226 void PPCMIPeephole::UpdateTOCSaves(
    227   std::map<MachineInstr *, bool> &TOCSaves, MachineInstr *MI) {
    228   assert(TII->isTOCSaveMI(*MI) && "Expecting a TOC save instruction here");
    229   // FIXME: Saving TOC in prologue hasn't been implemented well in AIX ABI part,
    230   // here only support it under ELFv2.
    231   if (MF->getSubtarget<PPCSubtarget>().isELFv2ABI()) {
    232     PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
    233 
    234     MachineBasicBlock *Entry = &MF->front();
    235     uint64_t CurrBlockFreq = MBFI->getBlockFreq(MI->getParent()).getFrequency();
    236 
    237     // If the block in which the TOC save resides is in a block that
    238     // post-dominates Entry, or a block that is hotter than entry (keep in mind
    239     // that early MachineLICM has already run so the TOC save won't be hoisted)
    240     // we can just do the save in the prologue.
    241     if (CurrBlockFreq > EntryFreq || MPDT->dominates(MI->getParent(), Entry))
    242       FI->setMustSaveTOC(true);
    243 
    244     // If we are saving the TOC in the prologue, all the TOC saves can be
    245     // removed from the code.
    246     if (FI->mustSaveTOC()) {
    247       for (auto &TOCSave : TOCSaves)
    248         TOCSave.second = false;
    249       // Add new instruction to map.
    250       TOCSaves[MI] = false;
    251       return;
    252     }
    253   }
    254 
    255   bool Keep = true;
    256   for (auto It = TOCSaves.begin(); It != TOCSaves.end(); It++ ) {
    257     MachineInstr *CurrInst = It->first;
    258     // If new instruction dominates an existing one, mark existing one as
    259     // redundant.
    260     if (It->second && MDT->dominates(MI, CurrInst))
    261       It->second = false;
    262     // Check if the new instruction is redundant.
    263     if (MDT->dominates(CurrInst, MI)) {
    264       Keep = false;
    265       break;
    266     }
    267   }
    268   // Add new instruction to map.
    269   TOCSaves[MI] = Keep;
    270 }
    271 
    272 // This function returns a list of all PHI nodes in the tree starting from
    273 // the RootPHI node. We perform a BFS traversal to get an ordered list of nodes.
    274 // The list initially only contains the root PHI. When we visit a PHI node, we
    275 // add it to the list. We continue to look for other PHI node operands while
    276 // there are nodes to visit in the list. The function returns false if the
    277 // optimization cannot be applied on this tree.
    278 static bool collectUnprimedAccPHIs(MachineRegisterInfo *MRI,
    279                                    MachineInstr *RootPHI,
    280                                    SmallVectorImpl<MachineInstr *> &PHIs) {
    281   PHIs.push_back(RootPHI);
    282   unsigned VisitedIndex = 0;
    283   while (VisitedIndex < PHIs.size()) {
    284     MachineInstr *VisitedPHI = PHIs[VisitedIndex];
    285     for (unsigned PHIOp = 1, NumOps = VisitedPHI->getNumOperands();
    286          PHIOp != NumOps; PHIOp += 2) {
    287       Register RegOp = VisitedPHI->getOperand(PHIOp).getReg();
    288       if (!Register::isVirtualRegister(RegOp))
    289         return false;
    290       MachineInstr *Instr = MRI->getVRegDef(RegOp);
    291       // While collecting the PHI nodes, we check if they can be converted (i.e.
    292       // all the operands are either copies, implicit defs or PHI nodes).
    293       unsigned Opcode = Instr->getOpcode();
    294       if (Opcode == PPC::COPY) {
    295         Register Reg = Instr->getOperand(1).getReg();
    296         if (!Register::isVirtualRegister(Reg) ||
    297             MRI->getRegClass(Reg) != &PPC::ACCRCRegClass)
    298           return false;
    299       } else if (Opcode != PPC::IMPLICIT_DEF && Opcode != PPC::PHI)
    300         return false;
    301       // If we detect a cycle in the PHI nodes, we exit. It would be
    302       // possible to change cycles as well, but that would add a lot
    303       // of complexity for a case that is unlikely to occur with MMA
    304       // code.
    305       if (Opcode != PPC::PHI)
    306         continue;
    307       if (llvm::is_contained(PHIs, Instr))
    308         return false;
    309       PHIs.push_back(Instr);
    310     }
    311     VisitedIndex++;
    312   }
    313   return true;
    314 }
    315 
    316 // This function changes the unprimed accumulator PHI nodes in the PHIs list to
    317 // primed accumulator PHI nodes. The list is traversed in reverse order to
    318 // change all the PHI operands of a PHI node before changing the node itself.
    319 // We keep a map to associate each changed PHI node to its non-changed form.
    320 static void convertUnprimedAccPHIs(const PPCInstrInfo *TII,
    321                                    MachineRegisterInfo *MRI,
    322                                    SmallVectorImpl<MachineInstr *> &PHIs,
    323                                    Register Dst) {
    324   DenseMap<MachineInstr *, MachineInstr *> ChangedPHIMap;
    325   for (auto It = PHIs.rbegin(), End = PHIs.rend(); It != End; ++It) {
    326     MachineInstr *PHI = *It;
    327     SmallVector<std::pair<MachineOperand, MachineOperand>, 4> PHIOps;
    328     // We check if the current PHI node can be changed by looking at its
    329     // operands. If all the operands are either copies from primed
    330     // accumulators, implicit definitions or other unprimed accumulator
    331     // PHI nodes, we change it.
    332     for (unsigned PHIOp = 1, NumOps = PHI->getNumOperands(); PHIOp != NumOps;
    333          PHIOp += 2) {
    334       Register RegOp = PHI->getOperand(PHIOp).getReg();
    335       MachineInstr *PHIInput = MRI->getVRegDef(RegOp);
    336       unsigned Opcode = PHIInput->getOpcode();
    337       assert((Opcode == PPC::COPY || Opcode == PPC::IMPLICIT_DEF ||
    338               Opcode == PPC::PHI) &&
    339              "Unexpected instruction");
    340       if (Opcode == PPC::COPY) {
    341         assert(MRI->getRegClass(PHIInput->getOperand(1).getReg()) ==
    342                    &PPC::ACCRCRegClass &&
    343                "Unexpected register class");
    344         PHIOps.push_back({PHIInput->getOperand(1), PHI->getOperand(PHIOp + 1)});
    345       } else if (Opcode == PPC::IMPLICIT_DEF) {
    346         Register AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
    347         BuildMI(*PHIInput->getParent(), PHIInput, PHIInput->getDebugLoc(),
    348                 TII->get(PPC::IMPLICIT_DEF), AccReg);
    349         PHIOps.push_back({MachineOperand::CreateReg(AccReg, false),
    350                           PHI->getOperand(PHIOp + 1)});
    351       } else if (Opcode == PPC::PHI) {
    352         // We found a PHI operand. At this point we know this operand
    353         // has already been changed so we get its associated changed form
    354         // from the map.
    355         assert(ChangedPHIMap.count(PHIInput) == 1 &&
    356                "This PHI node should have already been changed.");
    357         MachineInstr *PrimedAccPHI = ChangedPHIMap.lookup(PHIInput);
    358         PHIOps.push_back({MachineOperand::CreateReg(
    359                               PrimedAccPHI->getOperand(0).getReg(), false),
    360                           PHI->getOperand(PHIOp + 1)});
    361       }
    362     }
    363     Register AccReg = Dst;
    364     // If the PHI node we are changing is the root node, the register it defines
    365     // will be the destination register of the original copy (of the PHI def).
    366     // For all other PHI's in the list, we need to create another primed
    367     // accumulator virtual register as the PHI will no longer define the
    368     // unprimed accumulator.
    369     if (PHI != PHIs[0])
    370       AccReg = MRI->createVirtualRegister(&PPC::ACCRCRegClass);
    371     MachineInstrBuilder NewPHI = BuildMI(
    372         *PHI->getParent(), PHI, PHI->getDebugLoc(), TII->get(PPC::PHI), AccReg);
    373     for (auto RegMBB : PHIOps)
    374       NewPHI.add(RegMBB.first).add(RegMBB.second);
    375     ChangedPHIMap[PHI] = NewPHI.getInstr();
    376   }
    377 }
    378 
    379 // Perform peephole optimizations.
    380 bool PPCMIPeephole::simplifyCode(void) {
    381   bool Simplified = false;
    382   MachineInstr* ToErase = nullptr;
    383   std::map<MachineInstr *, bool> TOCSaves;
    384   const TargetRegisterInfo *TRI = &TII->getRegisterInfo();
    385   NumFunctionsEnteredInMIPeephole++;
    386   if (ConvertRegReg) {
    387     // Fixed-point conversion of reg/reg instructions fed by load-immediate
    388     // into reg/imm instructions. FIXME: This is expensive, control it with
    389     // an option.
    390     bool SomethingChanged = false;
    391     do {
    392       NumFixedPointIterations++;
    393       SomethingChanged = false;
    394       for (MachineBasicBlock &MBB : *MF) {
    395         for (MachineInstr &MI : MBB) {
    396           if (MI.isDebugInstr())
    397             continue;
    398 
    399           if (TII->convertToImmediateForm(MI)) {
    400             // We don't erase anything in case the def has other uses. Let DCE
    401             // remove it if it can be removed.
    402             LLVM_DEBUG(dbgs() << "Converted instruction to imm form: ");
    403             LLVM_DEBUG(MI.dump());
    404             NumConvertedToImmediateForm++;
    405             SomethingChanged = true;
    406             Simplified = true;
    407             continue;
    408           }
    409         }
    410       }
    411     } while (SomethingChanged && FixedPointRegToImm);
    412   }
    413 
    414   for (MachineBasicBlock &MBB : *MF) {
    415     for (MachineInstr &MI : MBB) {
    416 
    417       // If the previous instruction was marked for elimination,
    418       // remove it now.
    419       if (ToErase) {
    420         ToErase->eraseFromParent();
    421         ToErase = nullptr;
    422       }
    423 
    424       // Ignore debug instructions.
    425       if (MI.isDebugInstr())
    426         continue;
    427 
    428       // Per-opcode peepholes.
    429       switch (MI.getOpcode()) {
    430 
    431       default:
    432         break;
    433       case PPC::COPY: {
    434         Register Src = MI.getOperand(1).getReg();
    435         Register Dst = MI.getOperand(0).getReg();
    436         if (!Register::isVirtualRegister(Src) ||
    437             !Register::isVirtualRegister(Dst))
    438           break;
    439         if (MRI->getRegClass(Src) != &PPC::UACCRCRegClass ||
    440             MRI->getRegClass(Dst) != &PPC::ACCRCRegClass)
    441           break;
    442 
    443         // We are copying an unprimed accumulator to a primed accumulator.
    444         // If the input to the copy is a PHI that is fed only by (i) copies in
    445         // the other direction (ii) implicitly defined unprimed accumulators or
    446         // (iii) other PHI nodes satisfying (i) and (ii), we can change
    447         // the PHI to a PHI on primed accumulators (as long as we also change
    448         // its operands). To detect and change such copies, we first get a list
    449         // of all the PHI nodes starting from the root PHI node in BFS order.
    450         // We then visit all these PHI nodes to check if they can be changed to
    451         // primed accumulator PHI nodes and if so, we change them.
    452         MachineInstr *RootPHI = MRI->getVRegDef(Src);
    453         if (RootPHI->getOpcode() != PPC::PHI)
    454           break;
    455 
    456         SmallVector<MachineInstr *, 4> PHIs;
    457         if (!collectUnprimedAccPHIs(MRI, RootPHI, PHIs))
    458           break;
    459 
    460         convertUnprimedAccPHIs(TII, MRI, PHIs, Dst);
    461 
    462         ToErase = &MI;
    463         break;
    464       }
    465       case PPC::LI:
    466       case PPC::LI8: {
    467         // If we are materializing a zero, look for any use operands for which
    468         // zero means immediate zero. All such operands can be replaced with
    469         // PPC::ZERO.
    470         if (!MI.getOperand(1).isImm() || MI.getOperand(1).getImm() != 0)
    471           break;
    472         unsigned MIDestReg = MI.getOperand(0).getReg();
    473         for (MachineInstr& UseMI : MRI->use_instructions(MIDestReg))
    474           Simplified |= TII->onlyFoldImmediate(UseMI, MI, MIDestReg);
    475         if (MRI->use_nodbg_empty(MIDestReg)) {
    476           ++NumLoadImmZeroFoldedAndRemoved;
    477           ToErase = &MI;
    478         }
    479         break;
    480       }
    481       case PPC::STW:
    482       case PPC::STD: {
    483         MachineFrameInfo &MFI = MF->getFrameInfo();
    484         if (MFI.hasVarSizedObjects() ||
    485             (!MF->getSubtarget<PPCSubtarget>().isELFv2ABI() &&
    486              !MF->getSubtarget<PPCSubtarget>().isAIXABI()))
    487           break;
    488         // When encountering a TOC save instruction, call UpdateTOCSaves
    489         // to add it to the TOCSaves map and mark any existing TOC saves
    490         // it dominates as redundant.
    491         if (TII->isTOCSaveMI(MI))
    492           UpdateTOCSaves(TOCSaves, &MI);
    493         break;
    494       }
    495       case PPC::XXPERMDI: {
    496         // Perform simplifications of 2x64 vector swaps and splats.
    497         // A swap is identified by an immediate value of 2, and a splat
    498         // is identified by an immediate value of 0 or 3.
    499         int Immed = MI.getOperand(3).getImm();
    500 
    501         if (Immed == 1)
    502           break;
    503 
    504         // For each of these simplifications, we need the two source
    505         // regs to match.  Unfortunately, MachineCSE ignores COPY and
    506         // SUBREG_TO_REG, so for example we can see
    507         //   XXPERMDI t, SUBREG_TO_REG(s), SUBREG_TO_REG(s), immed.
    508         // We have to look through chains of COPY and SUBREG_TO_REG
    509         // to find the real source values for comparison.
    510         unsigned TrueReg1 =
    511           TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
    512         unsigned TrueReg2 =
    513           TRI->lookThruCopyLike(MI.getOperand(2).getReg(), MRI);
    514 
    515         if (!(TrueReg1 == TrueReg2 && Register::isVirtualRegister(TrueReg1)))
    516           break;
    517 
    518         MachineInstr *DefMI = MRI->getVRegDef(TrueReg1);
    519 
    520         if (!DefMI)
    521           break;
    522 
    523         unsigned DefOpc = DefMI->getOpcode();
    524 
    525         // If this is a splat fed by a splatting load, the splat is
    526         // redundant. Replace with a copy. This doesn't happen directly due
    527         // to code in PPCDAGToDAGISel.cpp, but it can happen when converting
    528         // a load of a double to a vector of 64-bit integers.
    529         auto isConversionOfLoadAndSplat = [=]() -> bool {
    530           if (DefOpc != PPC::XVCVDPSXDS && DefOpc != PPC::XVCVDPUXDS)
    531             return false;
    532           unsigned FeedReg1 =
    533             TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
    534           if (Register::isVirtualRegister(FeedReg1)) {
    535             MachineInstr *LoadMI = MRI->getVRegDef(FeedReg1);
    536             if (LoadMI && LoadMI->getOpcode() == PPC::LXVDSX)
    537               return true;
    538           }
    539           return false;
    540         };
    541         if ((Immed == 0 || Immed == 3) &&
    542             (DefOpc == PPC::LXVDSX || isConversionOfLoadAndSplat())) {
    543           LLVM_DEBUG(dbgs() << "Optimizing load-and-splat/splat "
    544                                "to load-and-splat/copy: ");
    545           LLVM_DEBUG(MI.dump());
    546           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
    547                   MI.getOperand(0).getReg())
    548               .add(MI.getOperand(1));
    549           ToErase = &MI;
    550           Simplified = true;
    551         }
    552 
    553         // If this is a splat or a swap fed by another splat, we
    554         // can replace it with a copy.
    555         if (DefOpc == PPC::XXPERMDI) {
    556           unsigned DefReg1 = DefMI->getOperand(1).getReg();
    557           unsigned DefReg2 = DefMI->getOperand(2).getReg();
    558           unsigned DefImmed = DefMI->getOperand(3).getImm();
    559 
    560           // If the two inputs are not the same register, check to see if
    561           // they originate from the same virtual register after only
    562           // copy-like instructions.
    563           if (DefReg1 != DefReg2) {
    564             unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
    565             unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
    566 
    567             if (!(FeedReg1 == FeedReg2 &&
    568                   Register::isVirtualRegister(FeedReg1)))
    569               break;
    570           }
    571 
    572           if (DefImmed == 0 || DefImmed == 3) {
    573             LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
    574                                  "to splat/copy: ");
    575             LLVM_DEBUG(MI.dump());
    576             BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
    577                     MI.getOperand(0).getReg())
    578                 .add(MI.getOperand(1));
    579             ToErase = &MI;
    580             Simplified = true;
    581           }
    582 
    583           // If this is a splat fed by a swap, we can simplify modify
    584           // the splat to splat the other value from the swap's input
    585           // parameter.
    586           else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
    587             LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
    588             LLVM_DEBUG(MI.dump());
    589             MI.getOperand(1).setReg(DefReg1);
    590             MI.getOperand(2).setReg(DefReg2);
    591             MI.getOperand(3).setImm(3 - Immed);
    592             Simplified = true;
    593           }
    594 
    595           // If this is a swap fed by a swap, we can replace it
    596           // with a copy from the first swap's input.
    597           else if (Immed == 2 && DefImmed == 2) {
    598             LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
    599             LLVM_DEBUG(MI.dump());
    600             BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
    601                     MI.getOperand(0).getReg())
    602                 .add(DefMI->getOperand(1));
    603             ToErase = &MI;
    604             Simplified = true;
    605           }
    606         } else if ((Immed == 0 || Immed == 3) && DefOpc == PPC::XXPERMDIs &&
    607                    (DefMI->getOperand(2).getImm() == 0 ||
    608                     DefMI->getOperand(2).getImm() == 3)) {
    609           // Splat fed by another splat - switch the output of the first
    610           // and remove the second.
    611           DefMI->getOperand(0).setReg(MI.getOperand(0).getReg());
    612           ToErase = &MI;
    613           Simplified = true;
    614           LLVM_DEBUG(dbgs() << "Removing redundant splat: ");
    615           LLVM_DEBUG(MI.dump());
    616         }
    617         break;
    618       }
    619       case PPC::VSPLTB:
    620       case PPC::VSPLTH:
    621       case PPC::XXSPLTW: {
    622         unsigned MyOpcode = MI.getOpcode();
    623         unsigned OpNo = MyOpcode == PPC::XXSPLTW ? 1 : 2;
    624         unsigned TrueReg =
    625           TRI->lookThruCopyLike(MI.getOperand(OpNo).getReg(), MRI);
    626         if (!Register::isVirtualRegister(TrueReg))
    627           break;
    628         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
    629         if (!DefMI)
    630           break;
    631         unsigned DefOpcode = DefMI->getOpcode();
    632         auto isConvertOfSplat = [=]() -> bool {
    633           if (DefOpcode != PPC::XVCVSPSXWS && DefOpcode != PPC::XVCVSPUXWS)
    634             return false;
    635           Register ConvReg = DefMI->getOperand(1).getReg();
    636           if (!Register::isVirtualRegister(ConvReg))
    637             return false;
    638           MachineInstr *Splt = MRI->getVRegDef(ConvReg);
    639           return Splt && (Splt->getOpcode() == PPC::LXVWSX ||
    640             Splt->getOpcode() == PPC::XXSPLTW);
    641         };
    642         bool AlreadySplat = (MyOpcode == DefOpcode) ||
    643           (MyOpcode == PPC::VSPLTB && DefOpcode == PPC::VSPLTBs) ||
    644           (MyOpcode == PPC::VSPLTH && DefOpcode == PPC::VSPLTHs) ||
    645           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::XXSPLTWs) ||
    646           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::LXVWSX) ||
    647           (MyOpcode == PPC::XXSPLTW && DefOpcode == PPC::MTVSRWS)||
    648           (MyOpcode == PPC::XXSPLTW && isConvertOfSplat());
    649         // If the instruction[s] that feed this splat have already splat
    650         // the value, this splat is redundant.
    651         if (AlreadySplat) {
    652           LLVM_DEBUG(dbgs() << "Changing redundant splat to a copy: ");
    653           LLVM_DEBUG(MI.dump());
    654           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
    655                   MI.getOperand(0).getReg())
    656               .add(MI.getOperand(OpNo));
    657           ToErase = &MI;
    658           Simplified = true;
    659         }
    660         // Splat fed by a shift. Usually when we align value to splat into
    661         // vector element zero.
    662         if (DefOpcode == PPC::XXSLDWI) {
    663           Register ShiftRes = DefMI->getOperand(0).getReg();
    664           Register ShiftOp1 = DefMI->getOperand(1).getReg();
    665           Register ShiftOp2 = DefMI->getOperand(2).getReg();
    666           unsigned ShiftImm = DefMI->getOperand(3).getImm();
    667           unsigned SplatImm = MI.getOperand(2).getImm();
    668           if (ShiftOp1 == ShiftOp2) {
    669             unsigned NewElem = (SplatImm + ShiftImm) & 0x3;
    670             if (MRI->hasOneNonDBGUse(ShiftRes)) {
    671               LLVM_DEBUG(dbgs() << "Removing redundant shift: ");
    672               LLVM_DEBUG(DefMI->dump());
    673               ToErase = DefMI;
    674             }
    675             Simplified = true;
    676             LLVM_DEBUG(dbgs() << "Changing splat immediate from " << SplatImm
    677                               << " to " << NewElem << " in instruction: ");
    678             LLVM_DEBUG(MI.dump());
    679             MI.getOperand(1).setReg(ShiftOp1);
    680             MI.getOperand(2).setImm(NewElem);
    681           }
    682         }
    683         break;
    684       }
    685       case PPC::XVCVDPSP: {
    686         // If this is a DP->SP conversion fed by an FRSP, the FRSP is redundant.
    687         unsigned TrueReg =
    688           TRI->lookThruCopyLike(MI.getOperand(1).getReg(), MRI);
    689         if (!Register::isVirtualRegister(TrueReg))
    690           break;
    691         MachineInstr *DefMI = MRI->getVRegDef(TrueReg);
    692 
    693         // This can occur when building a vector of single precision or integer
    694         // values.
    695         if (DefMI && DefMI->getOpcode() == PPC::XXPERMDI) {
    696           unsigned DefsReg1 =
    697             TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
    698           unsigned DefsReg2 =
    699             TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
    700           if (!Register::isVirtualRegister(DefsReg1) ||
    701               !Register::isVirtualRegister(DefsReg2))
    702             break;
    703           MachineInstr *P1 = MRI->getVRegDef(DefsReg1);
    704           MachineInstr *P2 = MRI->getVRegDef(DefsReg2);
    705 
    706           if (!P1 || !P2)
    707             break;
    708 
    709           // Remove the passed FRSP/XSRSP instruction if it only feeds this MI
    710           // and set any uses of that FRSP/XSRSP (in this MI) to the source of
    711           // the FRSP/XSRSP.
    712           auto removeFRSPIfPossible = [&](MachineInstr *RoundInstr) {
    713             unsigned Opc = RoundInstr->getOpcode();
    714             if ((Opc == PPC::FRSP || Opc == PPC::XSRSP) &&
    715                 MRI->hasOneNonDBGUse(RoundInstr->getOperand(0).getReg())) {
    716               Simplified = true;
    717               Register ConvReg1 = RoundInstr->getOperand(1).getReg();
    718               Register FRSPDefines = RoundInstr->getOperand(0).getReg();
    719               MachineInstr &Use = *(MRI->use_instr_nodbg_begin(FRSPDefines));
    720               for (int i = 0, e = Use.getNumOperands(); i < e; ++i)
    721                 if (Use.getOperand(i).isReg() &&
    722                     Use.getOperand(i).getReg() == FRSPDefines)
    723                   Use.getOperand(i).setReg(ConvReg1);
    724               LLVM_DEBUG(dbgs() << "Removing redundant FRSP/XSRSP:\n");
    725               LLVM_DEBUG(RoundInstr->dump());
    726               LLVM_DEBUG(dbgs() << "As it feeds instruction:\n");
    727               LLVM_DEBUG(MI.dump());
    728               LLVM_DEBUG(dbgs() << "Through instruction:\n");
    729               LLVM_DEBUG(DefMI->dump());
    730               RoundInstr->eraseFromParent();
    731             }
    732           };
    733 
    734           // If the input to XVCVDPSP is a vector that was built (even
    735           // partially) out of FRSP's, the FRSP(s) can safely be removed
    736           // since this instruction performs the same operation.
    737           if (P1 != P2) {
    738             removeFRSPIfPossible(P1);
    739             removeFRSPIfPossible(P2);
    740             break;
    741           }
    742           removeFRSPIfPossible(P1);
    743         }
    744         break;
    745       }
    746       case PPC::EXTSH:
    747       case PPC::EXTSH8:
    748       case PPC::EXTSH8_32_64: {
    749         if (!EnableSExtElimination) break;
    750         Register NarrowReg = MI.getOperand(1).getReg();
    751         if (!Register::isVirtualRegister(NarrowReg))
    752           break;
    753 
    754         MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
    755         // If we've used a zero-extending load that we will sign-extend,
    756         // just do a sign-extending load.
    757         if (SrcMI->getOpcode() == PPC::LHZ ||
    758             SrcMI->getOpcode() == PPC::LHZX) {
    759           if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
    760             break;
    761           auto is64Bit = [] (unsigned Opcode) {
    762             return Opcode == PPC::EXTSH8;
    763           };
    764           auto isXForm = [] (unsigned Opcode) {
    765             return Opcode == PPC::LHZX;
    766           };
    767           auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
    768             if (is64Bit)
    769               if (isXForm) return PPC::LHAX8;
    770               else         return PPC::LHA8;
    771             else
    772               if (isXForm) return PPC::LHAX;
    773               else         return PPC::LHA;
    774           };
    775           unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
    776                                        isXForm(SrcMI->getOpcode()));
    777           LLVM_DEBUG(dbgs() << "Zero-extending load\n");
    778           LLVM_DEBUG(SrcMI->dump());
    779           LLVM_DEBUG(dbgs() << "and sign-extension\n");
    780           LLVM_DEBUG(MI.dump());
    781           LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
    782           SrcMI->setDesc(TII->get(Opc));
    783           SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
    784           ToErase = &MI;
    785           Simplified = true;
    786           NumEliminatedSExt++;
    787         }
    788         break;
    789       }
    790       case PPC::EXTSW:
    791       case PPC::EXTSW_32:
    792       case PPC::EXTSW_32_64: {
    793         if (!EnableSExtElimination) break;
    794         Register NarrowReg = MI.getOperand(1).getReg();
    795         if (!Register::isVirtualRegister(NarrowReg))
    796           break;
    797 
    798         MachineInstr *SrcMI = MRI->getVRegDef(NarrowReg);
    799         // If we've used a zero-extending load that we will sign-extend,
    800         // just do a sign-extending load.
    801         if (SrcMI->getOpcode() == PPC::LWZ ||
    802             SrcMI->getOpcode() == PPC::LWZX) {
    803           if (!MRI->hasOneNonDBGUse(SrcMI->getOperand(0).getReg()))
    804             break;
    805           auto is64Bit = [] (unsigned Opcode) {
    806             return Opcode == PPC::EXTSW || Opcode == PPC::EXTSW_32_64;
    807           };
    808           auto isXForm = [] (unsigned Opcode) {
    809             return Opcode == PPC::LWZX;
    810           };
    811           auto getSextLoadOp = [] (bool is64Bit, bool isXForm) {
    812             if (is64Bit)
    813               if (isXForm) return PPC::LWAX;
    814               else         return PPC::LWA;
    815             else
    816               if (isXForm) return PPC::LWAX_32;
    817               else         return PPC::LWA_32;
    818           };
    819           unsigned Opc = getSextLoadOp(is64Bit(MI.getOpcode()),
    820                                        isXForm(SrcMI->getOpcode()));
    821           LLVM_DEBUG(dbgs() << "Zero-extending load\n");
    822           LLVM_DEBUG(SrcMI->dump());
    823           LLVM_DEBUG(dbgs() << "and sign-extension\n");
    824           LLVM_DEBUG(MI.dump());
    825           LLVM_DEBUG(dbgs() << "are merged into sign-extending load\n");
    826           SrcMI->setDesc(TII->get(Opc));
    827           SrcMI->getOperand(0).setReg(MI.getOperand(0).getReg());
    828           ToErase = &MI;
    829           Simplified = true;
    830           NumEliminatedSExt++;
    831         } else if (MI.getOpcode() == PPC::EXTSW_32_64 &&
    832                    TII->isSignExtended(*SrcMI)) {
    833           // We can eliminate EXTSW if the input is known to be already
    834           // sign-extended.
    835           LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
    836           Register TmpReg =
    837               MF->getRegInfo().createVirtualRegister(&PPC::G8RCRegClass);
    838           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::IMPLICIT_DEF),
    839                   TmpReg);
    840           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::INSERT_SUBREG),
    841                   MI.getOperand(0).getReg())
    842               .addReg(TmpReg)
    843               .addReg(NarrowReg)
    844               .addImm(PPC::sub_32);
    845           ToErase = &MI;
    846           Simplified = true;
    847           NumEliminatedSExt++;
    848         }
    849         break;
    850       }
    851       case PPC::RLDICL: {
    852         // We can eliminate RLDICL (e.g. for zero-extension)
    853         // if all bits to clear are already zero in the input.
    854         // This code assume following code sequence for zero-extension.
    855         //   %6 = COPY %5:sub_32; (optional)
    856         //   %8 = IMPLICIT_DEF;
    857         //   %7<def,tied1> = INSERT_SUBREG %8<tied0>, %6, sub_32;
    858         if (!EnableZExtElimination) break;
    859 
    860         if (MI.getOperand(2).getImm() != 0)
    861           break;
    862 
    863         Register SrcReg = MI.getOperand(1).getReg();
    864         if (!Register::isVirtualRegister(SrcReg))
    865           break;
    866 
    867         MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
    868         if (!(SrcMI && SrcMI->getOpcode() == PPC::INSERT_SUBREG &&
    869               SrcMI->getOperand(0).isReg() && SrcMI->getOperand(1).isReg()))
    870           break;
    871 
    872         MachineInstr *ImpDefMI, *SubRegMI;
    873         ImpDefMI = MRI->getVRegDef(SrcMI->getOperand(1).getReg());
    874         SubRegMI = MRI->getVRegDef(SrcMI->getOperand(2).getReg());
    875         if (ImpDefMI->getOpcode() != PPC::IMPLICIT_DEF) break;
    876 
    877         SrcMI = SubRegMI;
    878         if (SubRegMI->getOpcode() == PPC::COPY) {
    879           Register CopyReg = SubRegMI->getOperand(1).getReg();
    880           if (Register::isVirtualRegister(CopyReg))
    881             SrcMI = MRI->getVRegDef(CopyReg);
    882         }
    883 
    884         unsigned KnownZeroCount = getKnownLeadingZeroCount(SrcMI, TII);
    885         if (MI.getOperand(3).getImm() <= KnownZeroCount) {
    886           LLVM_DEBUG(dbgs() << "Removing redundant zero-extension\n");
    887           BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
    888                   MI.getOperand(0).getReg())
    889               .addReg(SrcReg);
    890           ToErase = &MI;
    891           Simplified = true;
    892           NumEliminatedZExt++;
    893         }
    894         break;
    895       }
    896 
    897       // TODO: Any instruction that has an immediate form fed only by a PHI
    898       // whose operands are all load immediate can be folded away. We currently
    899       // do this for ADD instructions, but should expand it to arithmetic and
    900       // binary instructions with immediate forms in the future.
    901       case PPC::ADD4:
    902       case PPC::ADD8: {
    903         auto isSingleUsePHI = [&](MachineOperand *PhiOp) {
    904           assert(PhiOp && "Invalid Operand!");
    905           MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
    906 
    907           return DefPhiMI && (DefPhiMI->getOpcode() == PPC::PHI) &&
    908                  MRI->hasOneNonDBGUse(DefPhiMI->getOperand(0).getReg());
    909         };
    910 
    911         auto dominatesAllSingleUseLIs = [&](MachineOperand *DominatorOp,
    912                                             MachineOperand *PhiOp) {
    913           assert(PhiOp && "Invalid Operand!");
    914           assert(DominatorOp && "Invalid Operand!");
    915           MachineInstr *DefPhiMI = getVRegDefOrNull(PhiOp, MRI);
    916           MachineInstr *DefDomMI = getVRegDefOrNull(DominatorOp, MRI);
    917 
    918           // Note: the vregs only show up at odd indices position of PHI Node,
    919           // the even indices position save the BB info.
    920           for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
    921             MachineInstr *LiMI =
    922                 getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
    923             if (!LiMI ||
    924                 (LiMI->getOpcode() != PPC::LI && LiMI->getOpcode() != PPC::LI8)
    925                 || !MRI->hasOneNonDBGUse(LiMI->getOperand(0).getReg()) ||
    926                 !MDT->dominates(DefDomMI, LiMI))
    927               return false;
    928           }
    929 
    930           return true;
    931         };
    932 
    933         MachineOperand Op1 = MI.getOperand(1);
    934         MachineOperand Op2 = MI.getOperand(2);
    935         if (isSingleUsePHI(&Op2) && dominatesAllSingleUseLIs(&Op1, &Op2))
    936           std::swap(Op1, Op2);
    937         else if (!isSingleUsePHI(&Op1) || !dominatesAllSingleUseLIs(&Op2, &Op1))
    938           break; // We don't have an ADD fed by LI's that can be transformed
    939 
    940         // Now we know that Op1 is the PHI node and Op2 is the dominator
    941         Register DominatorReg = Op2.getReg();
    942 
    943         const TargetRegisterClass *TRC = MI.getOpcode() == PPC::ADD8
    944                                              ? &PPC::G8RC_and_G8RC_NOX0RegClass
    945                                              : &PPC::GPRC_and_GPRC_NOR0RegClass;
    946         MRI->setRegClass(DominatorReg, TRC);
    947 
    948         // replace LIs with ADDIs
    949         MachineInstr *DefPhiMI = getVRegDefOrNull(&Op1, MRI);
    950         for (unsigned i = 1; i < DefPhiMI->getNumOperands(); i += 2) {
    951           MachineInstr *LiMI = getVRegDefOrNull(&DefPhiMI->getOperand(i), MRI);
    952           LLVM_DEBUG(dbgs() << "Optimizing LI to ADDI: ");
    953           LLVM_DEBUG(LiMI->dump());
    954 
    955           // There could be repeated registers in the PHI, e.g: %1 =
    956           // PHI %6, <%bb.2>, %8, <%bb.3>, %8, <%bb.6>; So if we've
    957           // already replaced the def instruction, skip.
    958           if (LiMI->getOpcode() == PPC::ADDI || LiMI->getOpcode() == PPC::ADDI8)
    959             continue;
    960 
    961           assert((LiMI->getOpcode() == PPC::LI ||
    962                   LiMI->getOpcode() == PPC::LI8) &&
    963                  "Invalid Opcode!");
    964           auto LiImm = LiMI->getOperand(1).getImm(); // save the imm of LI
    965           LiMI->RemoveOperand(1);                    // remove the imm of LI
    966           LiMI->setDesc(TII->get(LiMI->getOpcode() == PPC::LI ? PPC::ADDI
    967                                                               : PPC::ADDI8));
    968           MachineInstrBuilder(*LiMI->getParent()->getParent(), *LiMI)
    969               .addReg(DominatorReg)
    970               .addImm(LiImm); // restore the imm of LI
    971           LLVM_DEBUG(LiMI->dump());
    972         }
    973 
    974         // Replace ADD with COPY
    975         LLVM_DEBUG(dbgs() << "Optimizing ADD to COPY: ");
    976         LLVM_DEBUG(MI.dump());
    977         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
    978                 MI.getOperand(0).getReg())
    979             .add(Op1);
    980         ToErase = &MI;
    981         Simplified = true;
    982         NumOptADDLIs++;
    983         break;
    984       }
    985       case PPC::RLDICR: {
    986         Simplified |= emitRLDICWhenLoweringJumpTables(MI) ||
    987                       combineSEXTAndSHL(MI, ToErase);
    988         break;
    989       }
    990       case PPC::RLWINM:
    991       case PPC::RLWINM_rec:
    992       case PPC::RLWINM8:
    993       case PPC::RLWINM8_rec: {
    994         Simplified = TII->combineRLWINM(MI, &ToErase);
    995         if (Simplified)
    996           ++NumRotatesCollapsed;
    997         break;
    998       }
    999       }
   1000     }
   1001 
   1002     // If the last instruction was marked for elimination,
   1003     // remove it now.
   1004     if (ToErase) {
   1005       ToErase->eraseFromParent();
   1006       ToErase = nullptr;
   1007     }
   1008   }
   1009 
   1010   // Eliminate all the TOC save instructions which are redundant.
   1011   Simplified |= eliminateRedundantTOCSaves(TOCSaves);
   1012   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
   1013   if (FI->mustSaveTOC())
   1014     NumTOCSavesInPrologue++;
   1015 
   1016   // We try to eliminate redundant compare instruction.
   1017   Simplified |= eliminateRedundantCompare();
   1018 
   1019   return Simplified;
   1020 }
   1021 
   1022 // helper functions for eliminateRedundantCompare
   1023 static bool isEqOrNe(MachineInstr *BI) {
   1024   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
   1025   unsigned PredCond = PPC::getPredicateCondition(Pred);
   1026   return (PredCond == PPC::PRED_EQ || PredCond == PPC::PRED_NE);
   1027 }
   1028 
   1029 static bool isSupportedCmpOp(unsigned opCode) {
   1030   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD  ||
   1031           opCode == PPC::CMPLW  || opCode == PPC::CMPW  ||
   1032           opCode == PPC::CMPLDI || opCode == PPC::CMPDI ||
   1033           opCode == PPC::CMPLWI || opCode == PPC::CMPWI);
   1034 }
   1035 
   1036 static bool is64bitCmpOp(unsigned opCode) {
   1037   return (opCode == PPC::CMPLD  || opCode == PPC::CMPD ||
   1038           opCode == PPC::CMPLDI || opCode == PPC::CMPDI);
   1039 }
   1040 
   1041 static bool isSignedCmpOp(unsigned opCode) {
   1042   return (opCode == PPC::CMPD  || opCode == PPC::CMPW ||
   1043           opCode == PPC::CMPDI || opCode == PPC::CMPWI);
   1044 }
   1045 
   1046 static unsigned getSignedCmpOpCode(unsigned opCode) {
   1047   if (opCode == PPC::CMPLD)  return PPC::CMPD;
   1048   if (opCode == PPC::CMPLW)  return PPC::CMPW;
   1049   if (opCode == PPC::CMPLDI) return PPC::CMPDI;
   1050   if (opCode == PPC::CMPLWI) return PPC::CMPWI;
   1051   return opCode;
   1052 }
   1053 
   1054 // We can decrement immediate x in (GE x) by changing it to (GT x-1) or
   1055 // (LT x) to (LE x-1)
   1056 static unsigned getPredicateToDecImm(MachineInstr *BI, MachineInstr *CMPI) {
   1057   uint64_t Imm = CMPI->getOperand(2).getImm();
   1058   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
   1059   if ((!SignedCmp && Imm == 0) || (SignedCmp && Imm == 0x8000))
   1060     return 0;
   1061 
   1062   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
   1063   unsigned PredCond = PPC::getPredicateCondition(Pred);
   1064   unsigned PredHint = PPC::getPredicateHint(Pred);
   1065   if (PredCond == PPC::PRED_GE)
   1066     return PPC::getPredicate(PPC::PRED_GT, PredHint);
   1067   if (PredCond == PPC::PRED_LT)
   1068     return PPC::getPredicate(PPC::PRED_LE, PredHint);
   1069 
   1070   return 0;
   1071 }
   1072 
   1073 // We can increment immediate x in (GT x) by changing it to (GE x+1) or
   1074 // (LE x) to (LT x+1)
   1075 static unsigned getPredicateToIncImm(MachineInstr *BI, MachineInstr *CMPI) {
   1076   uint64_t Imm = CMPI->getOperand(2).getImm();
   1077   bool SignedCmp = isSignedCmpOp(CMPI->getOpcode());
   1078   if ((!SignedCmp && Imm == 0xFFFF) || (SignedCmp && Imm == 0x7FFF))
   1079     return 0;
   1080 
   1081   PPC::Predicate Pred = (PPC::Predicate)BI->getOperand(0).getImm();
   1082   unsigned PredCond = PPC::getPredicateCondition(Pred);
   1083   unsigned PredHint = PPC::getPredicateHint(Pred);
   1084   if (PredCond == PPC::PRED_GT)
   1085     return PPC::getPredicate(PPC::PRED_GE, PredHint);
   1086   if (PredCond == PPC::PRED_LE)
   1087     return PPC::getPredicate(PPC::PRED_LT, PredHint);
   1088 
   1089   return 0;
   1090 }
   1091 
   1092 // This takes a Phi node and returns a register value for the specified BB.
   1093 static unsigned getIncomingRegForBlock(MachineInstr *Phi,
   1094                                        MachineBasicBlock *MBB) {
   1095   for (unsigned I = 2, E = Phi->getNumOperands() + 1; I != E; I += 2) {
   1096     MachineOperand &MO = Phi->getOperand(I);
   1097     if (MO.getMBB() == MBB)
   1098       return Phi->getOperand(I-1).getReg();
   1099   }
   1100   llvm_unreachable("invalid src basic block for this Phi node\n");
   1101   return 0;
   1102 }
   1103 
   1104 // This function tracks the source of the register through register copy.
   1105 // If BB1 and BB2 are non-NULL, we also track PHI instruction in BB2
   1106 // assuming that the control comes from BB1 into BB2.
   1107 static unsigned getSrcVReg(unsigned Reg, MachineBasicBlock *BB1,
   1108                            MachineBasicBlock *BB2, MachineRegisterInfo *MRI) {
   1109   unsigned SrcReg = Reg;
   1110   while (1) {
   1111     unsigned NextReg = SrcReg;
   1112     MachineInstr *Inst = MRI->getVRegDef(SrcReg);
   1113     if (BB1 && Inst->getOpcode() == PPC::PHI && Inst->getParent() == BB2) {
   1114       NextReg = getIncomingRegForBlock(Inst, BB1);
   1115       // We track through PHI only once to avoid infinite loop.
   1116       BB1 = nullptr;
   1117     }
   1118     else if (Inst->isFullCopy())
   1119       NextReg = Inst->getOperand(1).getReg();
   1120     if (NextReg == SrcReg || !Register::isVirtualRegister(NextReg))
   1121       break;
   1122     SrcReg = NextReg;
   1123   }
   1124   return SrcReg;
   1125 }
   1126 
   1127 static bool eligibleForCompareElimination(MachineBasicBlock &MBB,
   1128                                           MachineBasicBlock *&PredMBB,
   1129                                           MachineBasicBlock *&MBBtoMoveCmp,
   1130                                           MachineRegisterInfo *MRI) {
   1131 
   1132   auto isEligibleBB = [&](MachineBasicBlock &BB) {
   1133     auto BII = BB.getFirstInstrTerminator();
   1134     // We optimize BBs ending with a conditional branch.
   1135     // We check only for BCC here, not BCCLR, because BCCLR
   1136     // will be formed only later in the pipeline.
   1137     if (BB.succ_size() == 2 &&
   1138         BII != BB.instr_end() &&
   1139         (*BII).getOpcode() == PPC::BCC &&
   1140         (*BII).getOperand(1).isReg()) {
   1141       // We optimize only if the condition code is used only by one BCC.
   1142       Register CndReg = (*BII).getOperand(1).getReg();
   1143       if (!Register::isVirtualRegister(CndReg) || !MRI->hasOneNonDBGUse(CndReg))
   1144         return false;
   1145 
   1146       MachineInstr *CMPI = MRI->getVRegDef(CndReg);
   1147       // We assume compare and branch are in the same BB for ease of analysis.
   1148       if (CMPI->getParent() != &BB)
   1149         return false;
   1150 
   1151       // We skip this BB if a physical register is used in comparison.
   1152       for (MachineOperand &MO : CMPI->operands())
   1153         if (MO.isReg() && !Register::isVirtualRegister(MO.getReg()))
   1154           return false;
   1155 
   1156       return true;
   1157     }
   1158     return false;
   1159   };
   1160 
   1161   // If this BB has more than one successor, we can create a new BB and
   1162   // move the compare instruction in the new BB.
   1163   // So far, we do not move compare instruction to a BB having multiple
   1164   // successors to avoid potentially increasing code size.
   1165   auto isEligibleForMoveCmp = [](MachineBasicBlock &BB) {
   1166     return BB.succ_size() == 1;
   1167   };
   1168 
   1169   if (!isEligibleBB(MBB))
   1170     return false;
   1171 
   1172   unsigned NumPredBBs = MBB.pred_size();
   1173   if (NumPredBBs == 1) {
   1174     MachineBasicBlock *TmpMBB = *MBB.pred_begin();
   1175     if (isEligibleBB(*TmpMBB)) {
   1176       PredMBB = TmpMBB;
   1177       MBBtoMoveCmp = nullptr;
   1178       return true;
   1179     }
   1180   }
   1181   else if (NumPredBBs == 2) {
   1182     // We check for partially redundant case.
   1183     // So far, we support cases with only two predecessors
   1184     // to avoid increasing the number of instructions.
   1185     MachineBasicBlock::pred_iterator PI = MBB.pred_begin();
   1186     MachineBasicBlock *Pred1MBB = *PI;
   1187     MachineBasicBlock *Pred2MBB = *(PI+1);
   1188 
   1189     if (isEligibleBB(*Pred1MBB) && isEligibleForMoveCmp(*Pred2MBB)) {
   1190       // We assume Pred1MBB is the BB containing the compare to be merged and
   1191       // Pred2MBB is the BB to which we will append a compare instruction.
   1192       // Hence we can proceed as is.
   1193     }
   1194     else if (isEligibleBB(*Pred2MBB) && isEligibleForMoveCmp(*Pred1MBB)) {
   1195       // We need to swap Pred1MBB and Pred2MBB to canonicalize.
   1196       std::swap(Pred1MBB, Pred2MBB);
   1197     }
   1198     else return false;
   1199 
   1200     // Here, Pred2MBB is the BB to which we need to append a compare inst.
   1201     // We cannot move the compare instruction if operands are not available
   1202     // in Pred2MBB (i.e. defined in MBB by an instruction other than PHI).
   1203     MachineInstr *BI = &*MBB.getFirstInstrTerminator();
   1204     MachineInstr *CMPI = MRI->getVRegDef(BI->getOperand(1).getReg());
   1205     for (int I = 1; I <= 2; I++)
   1206       if (CMPI->getOperand(I).isReg()) {
   1207         MachineInstr *Inst = MRI->getVRegDef(CMPI->getOperand(I).getReg());
   1208         if (Inst->getParent() == &MBB && Inst->getOpcode() != PPC::PHI)
   1209           return false;
   1210       }
   1211 
   1212     PredMBB = Pred1MBB;
   1213     MBBtoMoveCmp = Pred2MBB;
   1214     return true;
   1215   }
   1216 
   1217   return false;
   1218 }
   1219 
   1220 // This function will iterate over the input map containing a pair of TOC save
   1221 // instruction and a flag. The flag will be set to false if the TOC save is
   1222 // proven redundant. This function will erase from the basic block all the TOC
   1223 // saves marked as redundant.
   1224 bool PPCMIPeephole::eliminateRedundantTOCSaves(
   1225     std::map<MachineInstr *, bool> &TOCSaves) {
   1226   bool Simplified = false;
   1227   int NumKept = 0;
   1228   for (auto TOCSave : TOCSaves) {
   1229     if (!TOCSave.second) {
   1230       TOCSave.first->eraseFromParent();
   1231       RemoveTOCSave++;
   1232       Simplified = true;
   1233     } else {
   1234       NumKept++;
   1235     }
   1236   }
   1237 
   1238   if (NumKept > 1)
   1239     MultiTOCSaves++;
   1240 
   1241   return Simplified;
   1242 }
   1243 
   1244 // If multiple conditional branches are executed based on the (essentially)
   1245 // same comparison, we merge compare instructions into one and make multiple
   1246 // conditional branches on this comparison.
   1247 // For example,
   1248 //   if (a == 0) { ... }
   1249 //   else if (a < 0) { ... }
   1250 // can be executed by one compare and two conditional branches instead of
   1251 // two pairs of a compare and a conditional branch.
   1252 //
   1253 // This method merges two compare instructions in two MBBs and modifies the
   1254 // compare and conditional branch instructions if needed.
   1255 // For the above example, the input for this pass looks like:
   1256 //   cmplwi r3, 0
   1257 //   beq    0, .LBB0_3
   1258 //   cmpwi  r3, -1
   1259 //   bgt    0, .LBB0_4
   1260 // So, before merging two compares, we need to modify these instructions as
   1261 //   cmpwi  r3, 0       ; cmplwi and cmpwi yield same result for beq
   1262 //   beq    0, .LBB0_3
   1263 //   cmpwi  r3, 0       ; greather than -1 means greater or equal to 0
   1264 //   bge    0, .LBB0_4
   1265 
   1266 bool PPCMIPeephole::eliminateRedundantCompare(void) {
   1267   bool Simplified = false;
   1268 
   1269   for (MachineBasicBlock &MBB2 : *MF) {
   1270     MachineBasicBlock *MBB1 = nullptr, *MBBtoMoveCmp = nullptr;
   1271 
   1272     // For fully redundant case, we select two basic blocks MBB1 and MBB2
   1273     // as an optimization target if
   1274     // - both MBBs end with a conditional branch,
   1275     // - MBB1 is the only predecessor of MBB2, and
   1276     // - compare does not take a physical register as a operand in both MBBs.
   1277     // In this case, eligibleForCompareElimination sets MBBtoMoveCmp nullptr.
   1278     //
   1279     // As partially redundant case, we additionally handle if MBB2 has one
   1280     // additional predecessor, which has only one successor (MBB2).
   1281     // In this case, we move the compare instruction originally in MBB2 into
   1282     // MBBtoMoveCmp. This partially redundant case is typically appear by
   1283     // compiling a while loop; here, MBBtoMoveCmp is the loop preheader.
   1284     //
   1285     // Overview of CFG of related basic blocks
   1286     // Fully redundant case        Partially redundant case
   1287     //   --------                   ----------------  --------
   1288     //   | MBB1 | (w/ 2 succ)       | MBBtoMoveCmp |  | MBB1 | (w/ 2 succ)
   1289     //   --------                   ----------------  --------
   1290     //      |    \                     (w/ 1 succ) \     |    \
   1291     //      |     \                                 \    |     \
   1292     //      |                                        \   |
   1293     //   --------                                     --------
   1294     //   | MBB2 | (w/ 1 pred                          | MBB2 | (w/ 2 pred
   1295     //   -------- and 2 succ)                         -------- and 2 succ)
   1296     //      |    \                                       |    \
   1297     //      |     \                                      |     \
   1298     //
   1299     if (!eligibleForCompareElimination(MBB2, MBB1, MBBtoMoveCmp, MRI))
   1300       continue;
   1301 
   1302     MachineInstr *BI1   = &*MBB1->getFirstInstrTerminator();
   1303     MachineInstr *CMPI1 = MRI->getVRegDef(BI1->getOperand(1).getReg());
   1304 
   1305     MachineInstr *BI2   = &*MBB2.getFirstInstrTerminator();
   1306     MachineInstr *CMPI2 = MRI->getVRegDef(BI2->getOperand(1).getReg());
   1307     bool IsPartiallyRedundant = (MBBtoMoveCmp != nullptr);
   1308 
   1309     // We cannot optimize an unsupported compare opcode or
   1310     // a mix of 32-bit and 64-bit comaprisons
   1311     if (!isSupportedCmpOp(CMPI1->getOpcode()) ||
   1312         !isSupportedCmpOp(CMPI2->getOpcode()) ||
   1313         is64bitCmpOp(CMPI1->getOpcode()) != is64bitCmpOp(CMPI2->getOpcode()))
   1314       continue;
   1315 
   1316     unsigned NewOpCode = 0;
   1317     unsigned NewPredicate1 = 0, NewPredicate2 = 0;
   1318     int16_t Imm1 = 0, NewImm1 = 0, Imm2 = 0, NewImm2 = 0;
   1319     bool SwapOperands = false;
   1320 
   1321     if (CMPI1->getOpcode() != CMPI2->getOpcode()) {
   1322       // Typically, unsigned comparison is used for equality check, but
   1323       // we replace it with a signed comparison if the comparison
   1324       // to be merged is a signed comparison.
   1325       // In other cases of opcode mismatch, we cannot optimize this.
   1326 
   1327       // We cannot change opcode when comparing against an immediate
   1328       // if the most significant bit of the immediate is one
   1329       // due to the difference in sign extension.
   1330       auto CmpAgainstImmWithSignBit = [](MachineInstr *I) {
   1331         if (!I->getOperand(2).isImm())
   1332           return false;
   1333         int16_t Imm = (int16_t)I->getOperand(2).getImm();
   1334         return Imm < 0;
   1335       };
   1336 
   1337       if (isEqOrNe(BI2) && !CmpAgainstImmWithSignBit(CMPI2) &&
   1338           CMPI1->getOpcode() == getSignedCmpOpCode(CMPI2->getOpcode()))
   1339         NewOpCode = CMPI1->getOpcode();
   1340       else if (isEqOrNe(BI1) && !CmpAgainstImmWithSignBit(CMPI1) &&
   1341                getSignedCmpOpCode(CMPI1->getOpcode()) == CMPI2->getOpcode())
   1342         NewOpCode = CMPI2->getOpcode();
   1343       else continue;
   1344     }
   1345 
   1346     if (CMPI1->getOperand(2).isReg() && CMPI2->getOperand(2).isReg()) {
   1347       // In case of comparisons between two registers, these two registers
   1348       // must be same to merge two comparisons.
   1349       unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
   1350                                          nullptr, nullptr, MRI);
   1351       unsigned Cmp1Operand2 = getSrcVReg(CMPI1->getOperand(2).getReg(),
   1352                                          nullptr, nullptr, MRI);
   1353       unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
   1354                                          MBB1, &MBB2, MRI);
   1355       unsigned Cmp2Operand2 = getSrcVReg(CMPI2->getOperand(2).getReg(),
   1356                                          MBB1, &MBB2, MRI);
   1357 
   1358       if (Cmp1Operand1 == Cmp2Operand1 && Cmp1Operand2 == Cmp2Operand2) {
   1359         // Same pair of registers in the same order; ready to merge as is.
   1360       }
   1361       else if (Cmp1Operand1 == Cmp2Operand2 && Cmp1Operand2 == Cmp2Operand1) {
   1362         // Same pair of registers in different order.
   1363         // We reverse the predicate to merge compare instructions.
   1364         PPC::Predicate Pred = (PPC::Predicate)BI2->getOperand(0).getImm();
   1365         NewPredicate2 = (unsigned)PPC::getSwappedPredicate(Pred);
   1366         // In case of partial redundancy, we need to swap operands
   1367         // in another compare instruction.
   1368         SwapOperands = true;
   1369       }
   1370       else continue;
   1371     }
   1372     else if (CMPI1->getOperand(2).isImm() && CMPI2->getOperand(2).isImm()) {
   1373       // In case of comparisons between a register and an immediate,
   1374       // the operand register must be same for two compare instructions.
   1375       unsigned Cmp1Operand1 = getSrcVReg(CMPI1->getOperand(1).getReg(),
   1376                                          nullptr, nullptr, MRI);
   1377       unsigned Cmp2Operand1 = getSrcVReg(CMPI2->getOperand(1).getReg(),
   1378                                          MBB1, &MBB2, MRI);
   1379       if (Cmp1Operand1 != Cmp2Operand1)
   1380         continue;
   1381 
   1382       NewImm1 = Imm1 = (int16_t)CMPI1->getOperand(2).getImm();
   1383       NewImm2 = Imm2 = (int16_t)CMPI2->getOperand(2).getImm();
   1384 
   1385       // If immediate are not same, we try to adjust by changing predicate;
   1386       // e.g. GT imm means GE (imm+1).
   1387       if (Imm1 != Imm2 && (!isEqOrNe(BI2) || !isEqOrNe(BI1))) {
   1388         int Diff = Imm1 - Imm2;
   1389         if (Diff < -2 || Diff > 2)
   1390           continue;
   1391 
   1392         unsigned PredToInc1 = getPredicateToIncImm(BI1, CMPI1);
   1393         unsigned PredToDec1 = getPredicateToDecImm(BI1, CMPI1);
   1394         unsigned PredToInc2 = getPredicateToIncImm(BI2, CMPI2);
   1395         unsigned PredToDec2 = getPredicateToDecImm(BI2, CMPI2);
   1396         if (Diff == 2) {
   1397           if (PredToInc2 && PredToDec1) {
   1398             NewPredicate2 = PredToInc2;
   1399             NewPredicate1 = PredToDec1;
   1400             NewImm2++;
   1401             NewImm1--;
   1402           }
   1403         }
   1404         else if (Diff == 1) {
   1405           if (PredToInc2) {
   1406             NewImm2++;
   1407             NewPredicate2 = PredToInc2;
   1408           }
   1409           else if (PredToDec1) {
   1410             NewImm1--;
   1411             NewPredicate1 = PredToDec1;
   1412           }
   1413         }
   1414         else if (Diff == -1) {
   1415           if (PredToDec2) {
   1416             NewImm2--;
   1417             NewPredicate2 = PredToDec2;
   1418           }
   1419           else if (PredToInc1) {
   1420             NewImm1++;
   1421             NewPredicate1 = PredToInc1;
   1422           }
   1423         }
   1424         else if (Diff == -2) {
   1425           if (PredToDec2 && PredToInc1) {
   1426             NewPredicate2 = PredToDec2;
   1427             NewPredicate1 = PredToInc1;
   1428             NewImm2--;
   1429             NewImm1++;
   1430           }
   1431         }
   1432       }
   1433 
   1434       // We cannot merge two compares if the immediates are not same.
   1435       if (NewImm2 != NewImm1)
   1436         continue;
   1437     }
   1438 
   1439     LLVM_DEBUG(dbgs() << "Optimize two pairs of compare and branch:\n");
   1440     LLVM_DEBUG(CMPI1->dump());
   1441     LLVM_DEBUG(BI1->dump());
   1442     LLVM_DEBUG(CMPI2->dump());
   1443     LLVM_DEBUG(BI2->dump());
   1444 
   1445     // We adjust opcode, predicates and immediate as we determined above.
   1446     if (NewOpCode != 0 && NewOpCode != CMPI1->getOpcode()) {
   1447       CMPI1->setDesc(TII->get(NewOpCode));
   1448     }
   1449     if (NewPredicate1) {
   1450       BI1->getOperand(0).setImm(NewPredicate1);
   1451     }
   1452     if (NewPredicate2) {
   1453       BI2->getOperand(0).setImm(NewPredicate2);
   1454     }
   1455     if (NewImm1 != Imm1) {
   1456       CMPI1->getOperand(2).setImm(NewImm1);
   1457     }
   1458 
   1459     if (IsPartiallyRedundant) {
   1460       // We touch up the compare instruction in MBB2 and move it to
   1461       // a previous BB to handle partially redundant case.
   1462       if (SwapOperands) {
   1463         Register Op1 = CMPI2->getOperand(1).getReg();
   1464         Register Op2 = CMPI2->getOperand(2).getReg();
   1465         CMPI2->getOperand(1).setReg(Op2);
   1466         CMPI2->getOperand(2).setReg(Op1);
   1467       }
   1468       if (NewImm2 != Imm2)
   1469         CMPI2->getOperand(2).setImm(NewImm2);
   1470 
   1471       for (int I = 1; I <= 2; I++) {
   1472         if (CMPI2->getOperand(I).isReg()) {
   1473           MachineInstr *Inst = MRI->getVRegDef(CMPI2->getOperand(I).getReg());
   1474           if (Inst->getParent() != &MBB2)
   1475             continue;
   1476 
   1477           assert(Inst->getOpcode() == PPC::PHI &&
   1478                  "We cannot support if an operand comes from this BB.");
   1479           unsigned SrcReg = getIncomingRegForBlock(Inst, MBBtoMoveCmp);
   1480           CMPI2->getOperand(I).setReg(SrcReg);
   1481         }
   1482       }
   1483       auto I = MachineBasicBlock::iterator(MBBtoMoveCmp->getFirstTerminator());
   1484       MBBtoMoveCmp->splice(I, &MBB2, MachineBasicBlock::iterator(CMPI2));
   1485 
   1486       DebugLoc DL = CMPI2->getDebugLoc();
   1487       Register NewVReg = MRI->createVirtualRegister(&PPC::CRRCRegClass);
   1488       BuildMI(MBB2, MBB2.begin(), DL,
   1489               TII->get(PPC::PHI), NewVReg)
   1490         .addReg(BI1->getOperand(1).getReg()).addMBB(MBB1)
   1491         .addReg(BI2->getOperand(1).getReg()).addMBB(MBBtoMoveCmp);
   1492       BI2->getOperand(1).setReg(NewVReg);
   1493     }
   1494     else {
   1495       // We finally eliminate compare instruction in MBB2.
   1496       BI2->getOperand(1).setReg(BI1->getOperand(1).getReg());
   1497       CMPI2->eraseFromParent();
   1498     }
   1499     BI2->getOperand(1).setIsKill(true);
   1500     BI1->getOperand(1).setIsKill(false);
   1501 
   1502     LLVM_DEBUG(dbgs() << "into a compare and two branches:\n");
   1503     LLVM_DEBUG(CMPI1->dump());
   1504     LLVM_DEBUG(BI1->dump());
   1505     LLVM_DEBUG(BI2->dump());
   1506     if (IsPartiallyRedundant) {
   1507       LLVM_DEBUG(dbgs() << "The following compare is moved into "
   1508                         << printMBBReference(*MBBtoMoveCmp)
   1509                         << " to handle partial redundancy.\n");
   1510       LLVM_DEBUG(CMPI2->dump());
   1511     }
   1512 
   1513     Simplified = true;
   1514   }
   1515 
   1516   return Simplified;
   1517 }
   1518 
   1519 // We miss the opportunity to emit an RLDIC when lowering jump tables
   1520 // since ISEL sees only a single basic block. When selecting, the clear
   1521 // and shift left will be in different blocks.
   1522 bool PPCMIPeephole::emitRLDICWhenLoweringJumpTables(MachineInstr &MI) {
   1523   if (MI.getOpcode() != PPC::RLDICR)
   1524     return false;
   1525 
   1526   Register SrcReg = MI.getOperand(1).getReg();
   1527   if (!Register::isVirtualRegister(SrcReg))
   1528     return false;
   1529 
   1530   MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
   1531   if (SrcMI->getOpcode() != PPC::RLDICL)
   1532     return false;
   1533 
   1534   MachineOperand MOpSHSrc = SrcMI->getOperand(2);
   1535   MachineOperand MOpMBSrc = SrcMI->getOperand(3);
   1536   MachineOperand MOpSHMI = MI.getOperand(2);
   1537   MachineOperand MOpMEMI = MI.getOperand(3);
   1538   if (!(MOpSHSrc.isImm() && MOpMBSrc.isImm() && MOpSHMI.isImm() &&
   1539         MOpMEMI.isImm()))
   1540     return false;
   1541 
   1542   uint64_t SHSrc = MOpSHSrc.getImm();
   1543   uint64_t MBSrc = MOpMBSrc.getImm();
   1544   uint64_t SHMI = MOpSHMI.getImm();
   1545   uint64_t MEMI = MOpMEMI.getImm();
   1546   uint64_t NewSH = SHSrc + SHMI;
   1547   uint64_t NewMB = MBSrc - SHMI;
   1548   if (NewMB > 63 || NewSH > 63)
   1549     return false;
   1550 
   1551   // The bits cleared with RLDICL are [0, MBSrc).
   1552   // The bits cleared with RLDICR are (MEMI, 63].
   1553   // After the sequence, the bits cleared are:
   1554   // [0, MBSrc-SHMI) and (MEMI, 63).
   1555   //
   1556   // The bits cleared with RLDIC are [0, NewMB) and (63-NewSH, 63].
   1557   if ((63 - NewSH) != MEMI)
   1558     return false;
   1559 
   1560   LLVM_DEBUG(dbgs() << "Converting pair: ");
   1561   LLVM_DEBUG(SrcMI->dump());
   1562   LLVM_DEBUG(MI.dump());
   1563 
   1564   MI.setDesc(TII->get(PPC::RLDIC));
   1565   MI.getOperand(1).setReg(SrcMI->getOperand(1).getReg());
   1566   MI.getOperand(2).setImm(NewSH);
   1567   MI.getOperand(3).setImm(NewMB);
   1568   MI.getOperand(1).setIsKill(SrcMI->getOperand(1).isKill());
   1569   SrcMI->getOperand(1).setIsKill(false);
   1570 
   1571   LLVM_DEBUG(dbgs() << "To: ");
   1572   LLVM_DEBUG(MI.dump());
   1573   NumRotatesCollapsed++;
   1574   // If SrcReg has no non-debug use it's safe to delete its def SrcMI.
   1575   if (MRI->use_nodbg_empty(SrcReg)) {
   1576     assert(!SrcMI->hasImplicitDef() &&
   1577            "Not expecting an implicit def with this instr.");
   1578     SrcMI->eraseFromParent();
   1579   }
   1580   return true;
   1581 }
   1582 
   1583 // For case in LLVM IR
   1584 // entry:
   1585 //   %iconv = sext i32 %index to i64
   1586 //   br i1 undef label %true, label %false
   1587 // true:
   1588 //   %ptr = getelementptr inbounds i32, i32* null, i64 %iconv
   1589 // ...
   1590 // PPCISelLowering::combineSHL fails to combine, because sext and shl are in
   1591 // different BBs when conducting instruction selection. We can do a peephole
   1592 // optimization to combine these two instructions into extswsli after
   1593 // instruction selection.
   1594 bool PPCMIPeephole::combineSEXTAndSHL(MachineInstr &MI,
   1595                                       MachineInstr *&ToErase) {
   1596   if (MI.getOpcode() != PPC::RLDICR)
   1597     return false;
   1598 
   1599   if (!MF->getSubtarget<PPCSubtarget>().isISA3_0())
   1600     return false;
   1601 
   1602   assert(MI.getNumOperands() == 4 && "RLDICR should have 4 operands");
   1603 
   1604   MachineOperand MOpSHMI = MI.getOperand(2);
   1605   MachineOperand MOpMEMI = MI.getOperand(3);
   1606   if (!(MOpSHMI.isImm() && MOpMEMI.isImm()))
   1607     return false;
   1608 
   1609   uint64_t SHMI = MOpSHMI.getImm();
   1610   uint64_t MEMI = MOpMEMI.getImm();
   1611   if (SHMI + MEMI != 63)
   1612     return false;
   1613 
   1614   Register SrcReg = MI.getOperand(1).getReg();
   1615   if (!Register::isVirtualRegister(SrcReg))
   1616     return false;
   1617 
   1618   MachineInstr *SrcMI = MRI->getVRegDef(SrcReg);
   1619   if (SrcMI->getOpcode() != PPC::EXTSW &&
   1620       SrcMI->getOpcode() != PPC::EXTSW_32_64)
   1621     return false;
   1622 
   1623   // If the register defined by extsw has more than one use, combination is not
   1624   // needed.
   1625   if (!MRI->hasOneNonDBGUse(SrcReg))
   1626     return false;
   1627 
   1628   assert(SrcMI->getNumOperands() == 2 && "EXTSW should have 2 operands");
   1629   assert(SrcMI->getOperand(1).isReg() &&
   1630          "EXTSW's second operand should be a register");
   1631   if (!Register::isVirtualRegister(SrcMI->getOperand(1).getReg()))
   1632     return false;
   1633 
   1634   LLVM_DEBUG(dbgs() << "Combining pair: ");
   1635   LLVM_DEBUG(SrcMI->dump());
   1636   LLVM_DEBUG(MI.dump());
   1637 
   1638   MachineInstr *NewInstr =
   1639       BuildMI(*MI.getParent(), &MI, MI.getDebugLoc(),
   1640               SrcMI->getOpcode() == PPC::EXTSW ? TII->get(PPC::EXTSWSLI)
   1641                                                : TII->get(PPC::EXTSWSLI_32_64),
   1642               MI.getOperand(0).getReg())
   1643           .add(SrcMI->getOperand(1))
   1644           .add(MOpSHMI);
   1645   (void)NewInstr;
   1646 
   1647   LLVM_DEBUG(dbgs() << "TO: ");
   1648   LLVM_DEBUG(NewInstr->dump());
   1649   ++NumEXTSWAndSLDICombined;
   1650   ToErase = &MI;
   1651   // SrcMI, which is extsw, is of no use now, erase it.
   1652   SrcMI->eraseFromParent();
   1653   return true;
   1654 }
   1655 
   1656 } // end default namespace
   1657 
   1658 INITIALIZE_PASS_BEGIN(PPCMIPeephole, DEBUG_TYPE,
   1659                       "PowerPC MI Peephole Optimization", false, false)
   1660 INITIALIZE_PASS_DEPENDENCY(MachineBlockFrequencyInfo)
   1661 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
   1662 INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
   1663 INITIALIZE_PASS_END(PPCMIPeephole, DEBUG_TYPE,
   1664                     "PowerPC MI Peephole Optimization", false, false)
   1665 
   1666 char PPCMIPeephole::ID = 0;
   1667 FunctionPass*
   1668 llvm::createPPCMIPeepholePass() { return new PPCMIPeephole(); }
   1669 
   1670