Home | History | Annotate | Line # | Download | only in X86
      1 //===-- X86FixupLEAs.cpp - use or replace LEA instructions -----------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file defines the pass that finds instructions that can be
     10 // re-written as LEA instructions in order to reduce pipeline delays.
     11 // It replaces LEAs with ADD/INC/DEC when that is better for size/speed.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "X86.h"
     16 #include "X86InstrInfo.h"
     17 #include "X86Subtarget.h"
     18 #include "llvm/ADT/Statistic.h"
     19 #include "llvm/Analysis/ProfileSummaryInfo.h"
     20 #include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
     21 #include "llvm/CodeGen/MachineFunctionPass.h"
     22 #include "llvm/CodeGen/MachineInstrBuilder.h"
     23 #include "llvm/CodeGen/MachineSizeOpts.h"
     24 #include "llvm/CodeGen/Passes.h"
     25 #include "llvm/CodeGen/TargetSchedule.h"
     26 #include "llvm/Support/Debug.h"
     27 #include "llvm/Support/raw_ostream.h"
     28 using namespace llvm;
     29 
     30 #define FIXUPLEA_DESC "X86 LEA Fixup"
     31 #define FIXUPLEA_NAME "x86-fixup-LEAs"
     32 
     33 #define DEBUG_TYPE FIXUPLEA_NAME
     34 
     35 STATISTIC(NumLEAs, "Number of LEA instructions created");
     36 
     37 namespace {
     38 class FixupLEAPass : public MachineFunctionPass {
     39   enum RegUsageState { RU_NotUsed, RU_Write, RU_Read };
     40 
     41   /// Given a machine register, look for the instruction
     42   /// which writes it in the current basic block. If found,
     43   /// try to replace it with an equivalent LEA instruction.
     44   /// If replacement succeeds, then also process the newly created
     45   /// instruction.
     46   void seekLEAFixup(MachineOperand &p, MachineBasicBlock::iterator &I,
     47                     MachineBasicBlock &MBB);
     48 
     49   /// Given a memory access or LEA instruction
     50   /// whose address mode uses a base and/or index register, look for
     51   /// an opportunity to replace the instruction which sets the base or index
     52   /// register with an equivalent LEA instruction.
     53   void processInstruction(MachineBasicBlock::iterator &I,
     54                           MachineBasicBlock &MBB);
     55 
     56   /// Given a LEA instruction which is unprofitable
     57   /// on SlowLEA targets try to replace it with an equivalent ADD instruction.
     58   void processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
     59                                     MachineBasicBlock &MBB);
     60 
     61   /// Given a LEA instruction which is unprofitable
     62   /// on SNB+ try to replace it with other instructions.
     63   /// According to Intel's Optimization Reference Manual:
     64   /// " For LEA instructions with three source operands and some specific
     65   ///   situations, instruction latency has increased to 3 cycles, and must
     66   ///   dispatch via port 1:
     67   /// - LEA that has all three source operands: base, index, and offset
     68   /// - LEA that uses base and index registers where the base is EBP, RBP,
     69   ///   or R13
     70   /// - LEA that uses RIP relative addressing mode
     71   /// - LEA that uses 16-bit addressing mode "
     72   /// This function currently handles the first 2 cases only.
     73   void processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
     74                                  MachineBasicBlock &MBB, bool OptIncDec);
     75 
     76   /// Look for LEAs that are really two address LEAs that we might be able to
     77   /// turn into regular ADD instructions.
     78   bool optTwoAddrLEA(MachineBasicBlock::iterator &I,
     79                      MachineBasicBlock &MBB, bool OptIncDec,
     80                      bool UseLEAForSP) const;
     81 
     82   /// Determine if an instruction references a machine register
     83   /// and, if so, whether it reads or writes the register.
     84   RegUsageState usesRegister(MachineOperand &p, MachineBasicBlock::iterator I);
     85 
     86   /// Step backwards through a basic block, looking
     87   /// for an instruction which writes a register within
     88   /// a maximum of INSTR_DISTANCE_THRESHOLD instruction latency cycles.
     89   MachineBasicBlock::iterator searchBackwards(MachineOperand &p,
     90                                               MachineBasicBlock::iterator &I,
     91                                               MachineBasicBlock &MBB);
     92 
     93   /// if an instruction can be converted to an
     94   /// equivalent LEA, insert the new instruction into the basic block
     95   /// and return a pointer to it. Otherwise, return zero.
     96   MachineInstr *postRAConvertToLEA(MachineBasicBlock &MBB,
     97                                    MachineBasicBlock::iterator &MBBI) const;
     98 
     99 public:
    100   static char ID;
    101 
    102   StringRef getPassName() const override { return FIXUPLEA_DESC; }
    103 
    104   FixupLEAPass() : MachineFunctionPass(ID) { }
    105 
    106   /// Loop over all of the basic blocks,
    107   /// replacing instructions by equivalent LEA instructions
    108   /// if needed and when possible.
    109   bool runOnMachineFunction(MachineFunction &MF) override;
    110 
    111   // This pass runs after regalloc and doesn't support VReg operands.
    112   MachineFunctionProperties getRequiredProperties() const override {
    113     return MachineFunctionProperties().set(
    114         MachineFunctionProperties::Property::NoVRegs);
    115   }
    116 
    117   void getAnalysisUsage(AnalysisUsage &AU) const override {
    118     AU.addRequired<ProfileSummaryInfoWrapperPass>();
    119     AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
    120     MachineFunctionPass::getAnalysisUsage(AU);
    121   }
    122 
    123 private:
    124   TargetSchedModel TSM;
    125   const X86InstrInfo *TII = nullptr;
    126   const X86RegisterInfo *TRI = nullptr;
    127 };
    128 }
    129 
    130 char FixupLEAPass::ID = 0;
    131 
    132 INITIALIZE_PASS(FixupLEAPass, FIXUPLEA_NAME, FIXUPLEA_DESC, false, false)
    133 
    134 MachineInstr *
    135 FixupLEAPass::postRAConvertToLEA(MachineBasicBlock &MBB,
    136                                  MachineBasicBlock::iterator &MBBI) const {
    137   MachineInstr &MI = *MBBI;
    138   switch (MI.getOpcode()) {
    139   case X86::MOV32rr:
    140   case X86::MOV64rr: {
    141     const MachineOperand &Src = MI.getOperand(1);
    142     const MachineOperand &Dest = MI.getOperand(0);
    143     MachineInstr *NewMI =
    144         BuildMI(MBB, MBBI, MI.getDebugLoc(),
    145                 TII->get(MI.getOpcode() == X86::MOV32rr ? X86::LEA32r
    146                                                         : X86::LEA64r))
    147             .add(Dest)
    148             .add(Src)
    149             .addImm(1)
    150             .addReg(0)
    151             .addImm(0)
    152             .addReg(0);
    153     return NewMI;
    154   }
    155   }
    156 
    157   if (!MI.isConvertibleTo3Addr())
    158     return nullptr;
    159 
    160   switch (MI.getOpcode()) {
    161   default:
    162     // Only convert instructions that we've verified are safe.
    163     return nullptr;
    164   case X86::ADD64ri32:
    165   case X86::ADD64ri8:
    166   case X86::ADD64ri32_DB:
    167   case X86::ADD64ri8_DB:
    168   case X86::ADD32ri:
    169   case X86::ADD32ri8:
    170   case X86::ADD32ri_DB:
    171   case X86::ADD32ri8_DB:
    172     if (!MI.getOperand(2).isImm()) {
    173       // convertToThreeAddress will call getImm()
    174       // which requires isImm() to be true
    175       return nullptr;
    176     }
    177     break;
    178   case X86::SHL64ri:
    179   case X86::SHL32ri:
    180   case X86::INC64r:
    181   case X86::INC32r:
    182   case X86::DEC64r:
    183   case X86::DEC32r:
    184   case X86::ADD64rr:
    185   case X86::ADD64rr_DB:
    186   case X86::ADD32rr:
    187   case X86::ADD32rr_DB:
    188     // These instructions are all fine to convert.
    189     break;
    190   }
    191   MachineFunction::iterator MFI = MBB.getIterator();
    192   return TII->convertToThreeAddress(MFI, MI, nullptr);
    193 }
    194 
    195 FunctionPass *llvm::createX86FixupLEAs() { return new FixupLEAPass(); }
    196 
    197 static bool isLEA(unsigned Opcode) {
    198   return Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
    199          Opcode == X86::LEA64_32r;
    200 }
    201 
    202 bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
    203   if (skipFunction(MF.getFunction()))
    204     return false;
    205 
    206   const X86Subtarget &ST = MF.getSubtarget<X86Subtarget>();
    207   bool IsSlowLEA = ST.slowLEA();
    208   bool IsSlow3OpsLEA = ST.slow3OpsLEA();
    209   bool LEAUsesAG = ST.LEAusesAG();
    210 
    211   bool OptIncDec = !ST.slowIncDec() || MF.getFunction().hasOptSize();
    212   bool UseLEAForSP = ST.useLeaForSP();
    213 
    214   TSM.init(&ST);
    215   TII = ST.getInstrInfo();
    216   TRI = ST.getRegisterInfo();
    217   auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
    218   auto *MBFI = (PSI && PSI->hasProfileSummary())
    219                    ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
    220                    : nullptr;
    221 
    222   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
    223   for (MachineBasicBlock &MBB : MF) {
    224     // First pass. Try to remove or optimize existing LEAs.
    225     bool OptIncDecPerBB =
    226         OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
    227     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
    228       if (!isLEA(I->getOpcode()))
    229         continue;
    230 
    231       if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
    232         continue;
    233 
    234       if (IsSlowLEA)
    235         processInstructionForSlowLEA(I, MBB);
    236       else if (IsSlow3OpsLEA)
    237         processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
    238     }
    239 
    240     // Second pass for creating LEAs. This may reverse some of the
    241     // transformations above.
    242     if (LEAUsesAG) {
    243       for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
    244         processInstruction(I, MBB);
    245     }
    246   }
    247 
    248   LLVM_DEBUG(dbgs() << "End X86FixupLEAs\n";);
    249 
    250   return true;
    251 }
    252 
    253 FixupLEAPass::RegUsageState
    254 FixupLEAPass::usesRegister(MachineOperand &p, MachineBasicBlock::iterator I) {
    255   RegUsageState RegUsage = RU_NotUsed;
    256   MachineInstr &MI = *I;
    257 
    258   for (unsigned i = 0; i < MI.getNumOperands(); ++i) {
    259     MachineOperand &opnd = MI.getOperand(i);
    260     if (opnd.isReg() && opnd.getReg() == p.getReg()) {
    261       if (opnd.isDef())
    262         return RU_Write;
    263       RegUsage = RU_Read;
    264     }
    265   }
    266   return RegUsage;
    267 }
    268 
    269 /// getPreviousInstr - Given a reference to an instruction in a basic
    270 /// block, return a reference to the previous instruction in the block,
    271 /// wrapping around to the last instruction of the block if the block
    272 /// branches to itself.
    273 static inline bool getPreviousInstr(MachineBasicBlock::iterator &I,
    274                                     MachineBasicBlock &MBB) {
    275   if (I == MBB.begin()) {
    276     if (MBB.isPredecessor(&MBB)) {
    277       I = --MBB.end();
    278       return true;
    279     } else
    280       return false;
    281   }
    282   --I;
    283   return true;
    284 }
    285 
    286 MachineBasicBlock::iterator
    287 FixupLEAPass::searchBackwards(MachineOperand &p, MachineBasicBlock::iterator &I,
    288                               MachineBasicBlock &MBB) {
    289   int InstrDistance = 1;
    290   MachineBasicBlock::iterator CurInst;
    291   static const int INSTR_DISTANCE_THRESHOLD = 5;
    292 
    293   CurInst = I;
    294   bool Found;
    295   Found = getPreviousInstr(CurInst, MBB);
    296   while (Found && I != CurInst) {
    297     if (CurInst->isCall() || CurInst->isInlineAsm())
    298       break;
    299     if (InstrDistance > INSTR_DISTANCE_THRESHOLD)
    300       break; // too far back to make a difference
    301     if (usesRegister(p, CurInst) == RU_Write) {
    302       return CurInst;
    303     }
    304     InstrDistance += TSM.computeInstrLatency(&*CurInst);
    305     Found = getPreviousInstr(CurInst, MBB);
    306   }
    307   return MachineBasicBlock::iterator();
    308 }
    309 
    310 static inline bool isInefficientLEAReg(unsigned Reg) {
    311   return Reg == X86::EBP || Reg == X86::RBP ||
    312          Reg == X86::R13D || Reg == X86::R13;
    313 }
    314 
    315 /// Returns true if this LEA uses base an index registers, and the base register
    316 /// is known to be inefficient for the subtarget.
    317 // TODO: use a variant scheduling class to model the latency profile
    318 // of LEA instructions, and implement this logic as a scheduling predicate.
    319 static inline bool hasInefficientLEABaseReg(const MachineOperand &Base,
    320                                             const MachineOperand &Index) {
    321   return Base.isReg() && isInefficientLEAReg(Base.getReg()) && Index.isReg() &&
    322          Index.getReg() != X86::NoRegister;
    323 }
    324 
    325 static inline bool hasLEAOffset(const MachineOperand &Offset) {
    326   return (Offset.isImm() && Offset.getImm() != 0) || Offset.isGlobal();
    327 }
    328 
    329 static inline unsigned getADDrrFromLEA(unsigned LEAOpcode) {
    330   switch (LEAOpcode) {
    331   default:
    332     llvm_unreachable("Unexpected LEA instruction");
    333   case X86::LEA32r:
    334   case X86::LEA64_32r:
    335     return X86::ADD32rr;
    336   case X86::LEA64r:
    337     return X86::ADD64rr;
    338   }
    339 }
    340 
    341 static inline unsigned getADDriFromLEA(unsigned LEAOpcode,
    342                                        const MachineOperand &Offset) {
    343   bool IsInt8 = Offset.isImm() && isInt<8>(Offset.getImm());
    344   switch (LEAOpcode) {
    345   default:
    346     llvm_unreachable("Unexpected LEA instruction");
    347   case X86::LEA32r:
    348   case X86::LEA64_32r:
    349     return IsInt8 ? X86::ADD32ri8 : X86::ADD32ri;
    350   case X86::LEA64r:
    351     return IsInt8 ? X86::ADD64ri8 : X86::ADD64ri32;
    352   }
    353 }
    354 
    355 static inline unsigned getINCDECFromLEA(unsigned LEAOpcode, bool IsINC) {
    356   switch (LEAOpcode) {
    357   default:
    358     llvm_unreachable("Unexpected LEA instruction");
    359   case X86::LEA32r:
    360   case X86::LEA64_32r:
    361     return IsINC ? X86::INC32r : X86::DEC32r;
    362   case X86::LEA64r:
    363     return IsINC ? X86::INC64r : X86::DEC64r;
    364   }
    365 }
    366 
    367 bool FixupLEAPass::optTwoAddrLEA(MachineBasicBlock::iterator &I,
    368                                  MachineBasicBlock &MBB, bool OptIncDec,
    369                                  bool UseLEAForSP) const {
    370   MachineInstr &MI = *I;
    371 
    372   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
    373   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
    374   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
    375   const MachineOperand &Disp =    MI.getOperand(1 + X86::AddrDisp);
    376   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
    377 
    378   if (Segment.getReg() != 0 || !Disp.isImm() || Scale.getImm() > 1 ||
    379       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I) !=
    380           MachineBasicBlock::LQR_Dead)
    381     return false;
    382 
    383   Register DestReg = MI.getOperand(0).getReg();
    384   Register BaseReg = Base.getReg();
    385   Register IndexReg = Index.getReg();
    386 
    387   // Don't change stack adjustment LEAs.
    388   if (UseLEAForSP && (DestReg == X86::ESP || DestReg == X86::RSP))
    389     return false;
    390 
    391   // LEA64_32 has 64-bit operands but 32-bit result.
    392   if (MI.getOpcode() == X86::LEA64_32r) {
    393     if (BaseReg != 0)
    394       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
    395     if (IndexReg != 0)
    396       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
    397   }
    398 
    399   MachineInstr *NewMI = nullptr;
    400 
    401   // Look for lea(%reg1, %reg2), %reg1 or lea(%reg2, %reg1), %reg1
    402   // which can be turned into add %reg2, %reg1
    403   if (BaseReg != 0 && IndexReg != 0 && Disp.getImm() == 0 &&
    404       (DestReg == BaseReg || DestReg == IndexReg)) {
    405     unsigned NewOpcode = getADDrrFromLEA(MI.getOpcode());
    406     if (DestReg != BaseReg)
    407       std::swap(BaseReg, IndexReg);
    408 
    409     if (MI.getOpcode() == X86::LEA64_32r) {
    410       // TODO: Do we need the super register implicit use?
    411       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
    412         .addReg(BaseReg).addReg(IndexReg)
    413         .addReg(Base.getReg(), RegState::Implicit)
    414         .addReg(Index.getReg(), RegState::Implicit);
    415     } else {
    416       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
    417         .addReg(BaseReg).addReg(IndexReg);
    418     }
    419   } else if (DestReg == BaseReg && IndexReg == 0) {
    420     // This is an LEA with only a base register and a displacement,
    421     // We can use ADDri or INC/DEC.
    422 
    423     // Does this LEA have one these forms:
    424     // lea  %reg, 1(%reg)
    425     // lea  %reg, -1(%reg)
    426     if (OptIncDec && (Disp.getImm() == 1 || Disp.getImm() == -1)) {
    427       bool IsINC = Disp.getImm() == 1;
    428       unsigned NewOpcode = getINCDECFromLEA(MI.getOpcode(), IsINC);
    429 
    430       if (MI.getOpcode() == X86::LEA64_32r) {
    431         // TODO: Do we need the super register implicit use?
    432         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
    433           .addReg(BaseReg).addReg(Base.getReg(), RegState::Implicit);
    434       } else {
    435         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
    436           .addReg(BaseReg);
    437       }
    438     } else {
    439       unsigned NewOpcode = getADDriFromLEA(MI.getOpcode(), Disp);
    440       if (MI.getOpcode() == X86::LEA64_32r) {
    441         // TODO: Do we need the super register implicit use?
    442         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
    443           .addReg(BaseReg).addImm(Disp.getImm())
    444           .addReg(Base.getReg(), RegState::Implicit);
    445       } else {
    446         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpcode), DestReg)
    447           .addReg(BaseReg).addImm(Disp.getImm());
    448       }
    449     }
    450   } else
    451     return false;
    452 
    453   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
    454   MBB.erase(I);
    455   I = NewMI;
    456   return true;
    457 }
    458 
    459 void FixupLEAPass::processInstruction(MachineBasicBlock::iterator &I,
    460                                       MachineBasicBlock &MBB) {
    461   // Process a load, store, or LEA instruction.
    462   MachineInstr &MI = *I;
    463   const MCInstrDesc &Desc = MI.getDesc();
    464   int AddrOffset = X86II::getMemoryOperandNo(Desc.TSFlags);
    465   if (AddrOffset >= 0) {
    466     AddrOffset += X86II::getOperandBias(Desc);
    467     MachineOperand &p = MI.getOperand(AddrOffset + X86::AddrBaseReg);
    468     if (p.isReg() && p.getReg() != X86::ESP) {
    469       seekLEAFixup(p, I, MBB);
    470     }
    471     MachineOperand &q = MI.getOperand(AddrOffset + X86::AddrIndexReg);
    472     if (q.isReg() && q.getReg() != X86::ESP) {
    473       seekLEAFixup(q, I, MBB);
    474     }
    475   }
    476 }
    477 
    478 void FixupLEAPass::seekLEAFixup(MachineOperand &p,
    479                                 MachineBasicBlock::iterator &I,
    480                                 MachineBasicBlock &MBB) {
    481   MachineBasicBlock::iterator MBI = searchBackwards(p, I, MBB);
    482   if (MBI != MachineBasicBlock::iterator()) {
    483     MachineInstr *NewMI = postRAConvertToLEA(MBB, MBI);
    484     if (NewMI) {
    485       ++NumLEAs;
    486       LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MBI->dump(););
    487       // now to replace with an equivalent LEA...
    488       LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: "; NewMI->dump(););
    489       MBB.getParent()->substituteDebugValuesForInst(*MBI, *NewMI, 1);
    490       MBB.erase(MBI);
    491       MachineBasicBlock::iterator J =
    492           static_cast<MachineBasicBlock::iterator>(NewMI);
    493       processInstruction(J, MBB);
    494     }
    495   }
    496 }
    497 
    498 void FixupLEAPass::processInstructionForSlowLEA(MachineBasicBlock::iterator &I,
    499                                                 MachineBasicBlock &MBB) {
    500   MachineInstr &MI = *I;
    501   const unsigned Opcode = MI.getOpcode();
    502 
    503   const MachineOperand &Dst =     MI.getOperand(0);
    504   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
    505   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
    506   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
    507   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
    508   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
    509 
    510   if (Segment.getReg() != 0 || !Offset.isImm() ||
    511       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
    512           MachineBasicBlock::LQR_Dead)
    513     return;
    514   const Register DstR = Dst.getReg();
    515   const Register SrcR1 = Base.getReg();
    516   const Register SrcR2 = Index.getReg();
    517   if ((SrcR1 == 0 || SrcR1 != DstR) && (SrcR2 == 0 || SrcR2 != DstR))
    518     return;
    519   if (Scale.getImm() > 1)
    520     return;
    521   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; I->dump(););
    522   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
    523   MachineInstr *NewMI = nullptr;
    524   // Make ADD instruction for two registers writing to LEA's destination
    525   if (SrcR1 != 0 && SrcR2 != 0) {
    526     const MCInstrDesc &ADDrr = TII->get(getADDrrFromLEA(Opcode));
    527     const MachineOperand &Src = SrcR1 == DstR ? Index : Base;
    528     NewMI =
    529         BuildMI(MBB, I, MI.getDebugLoc(), ADDrr, DstR).addReg(DstR).add(Src);
    530     LLVM_DEBUG(NewMI->dump(););
    531   }
    532   // Make ADD instruction for immediate
    533   if (Offset.getImm() != 0) {
    534     const MCInstrDesc &ADDri =
    535         TII->get(getADDriFromLEA(Opcode, Offset));
    536     const MachineOperand &SrcR = SrcR1 == DstR ? Base : Index;
    537     NewMI = BuildMI(MBB, I, MI.getDebugLoc(), ADDri, DstR)
    538                 .add(SrcR)
    539                 .addImm(Offset.getImm());
    540     LLVM_DEBUG(NewMI->dump(););
    541   }
    542   if (NewMI) {
    543     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
    544     MBB.erase(I);
    545     I = NewMI;
    546   }
    547 }
    548 
    549 void FixupLEAPass::processInstrForSlow3OpLEA(MachineBasicBlock::iterator &I,
    550                                              MachineBasicBlock &MBB,
    551                                              bool OptIncDec) {
    552   MachineInstr &MI = *I;
    553   const unsigned LEAOpcode = MI.getOpcode();
    554 
    555   const MachineOperand &Dest =    MI.getOperand(0);
    556   const MachineOperand &Base =    MI.getOperand(1 + X86::AddrBaseReg);
    557   const MachineOperand &Scale =   MI.getOperand(1 + X86::AddrScaleAmt);
    558   const MachineOperand &Index =   MI.getOperand(1 + X86::AddrIndexReg);
    559   const MachineOperand &Offset =  MI.getOperand(1 + X86::AddrDisp);
    560   const MachineOperand &Segment = MI.getOperand(1 + X86::AddrSegmentReg);
    561 
    562   if (!(TII->isThreeOperandsLEA(MI) || hasInefficientLEABaseReg(Base, Index)) ||
    563       MBB.computeRegisterLiveness(TRI, X86::EFLAGS, I, 4) !=
    564           MachineBasicBlock::LQR_Dead ||
    565       Segment.getReg() != X86::NoRegister)
    566     return;
    567 
    568   Register DestReg = Dest.getReg();
    569   Register BaseReg = Base.getReg();
    570   Register IndexReg = Index.getReg();
    571 
    572   if (MI.getOpcode() == X86::LEA64_32r) {
    573     if (BaseReg != 0)
    574       BaseReg = TRI->getSubReg(BaseReg, X86::sub_32bit);
    575     if (IndexReg != 0)
    576       IndexReg = TRI->getSubReg(IndexReg, X86::sub_32bit);
    577   }
    578 
    579   bool IsScale1 = Scale.getImm() == 1;
    580   bool IsInefficientBase = isInefficientLEAReg(BaseReg);
    581   bool IsInefficientIndex = isInefficientLEAReg(IndexReg);
    582 
    583   // Skip these cases since it takes more than 2 instructions
    584   // to replace the LEA instruction.
    585   if (IsInefficientBase && DestReg == BaseReg && !IsScale1)
    586     return;
    587 
    588   LLVM_DEBUG(dbgs() << "FixLEA: Candidate to replace:"; MI.dump(););
    589   LLVM_DEBUG(dbgs() << "FixLEA: Replaced by: ";);
    590 
    591   MachineInstr *NewMI = nullptr;
    592 
    593   // First try to replace LEA with one or two (for the 3-op LEA case)
    594   // add instructions:
    595   // 1.lea (%base,%index,1), %base => add %index,%base
    596   // 2.lea (%base,%index,1), %index => add %base,%index
    597   if (IsScale1 && (DestReg == BaseReg || DestReg == IndexReg)) {
    598     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
    599     if (DestReg != BaseReg)
    600       std::swap(BaseReg, IndexReg);
    601 
    602     if (MI.getOpcode() == X86::LEA64_32r) {
    603       // TODO: Do we need the super register implicit use?
    604       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
    605                   .addReg(BaseReg)
    606                   .addReg(IndexReg)
    607                   .addReg(Base.getReg(), RegState::Implicit)
    608                   .addReg(Index.getReg(), RegState::Implicit);
    609     } else {
    610       NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
    611                   .addReg(BaseReg)
    612                   .addReg(IndexReg);
    613     }
    614   } else if (!IsInefficientBase || (!IsInefficientIndex && IsScale1)) {
    615     // If the base is inefficient try switching the index and base operands,
    616     // otherwise just break the 3-Ops LEA inst into 2-Ops LEA + ADD instruction:
    617     // lea offset(%base,%index,scale),%dst =>
    618     // lea (%base,%index,scale); add offset,%dst
    619     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
    620                 .add(Dest)
    621                 .add(IsInefficientBase ? Index : Base)
    622                 .add(Scale)
    623                 .add(IsInefficientBase ? Base : Index)
    624                 .addImm(0)
    625                 .add(Segment);
    626     LLVM_DEBUG(NewMI->dump(););
    627   }
    628 
    629   // If either replacement succeeded above, add the offset if needed, then
    630   // replace the instruction.
    631   if (NewMI) {
    632     // Create ADD instruction for the Offset in case of 3-Ops LEA.
    633     if (hasLEAOffset(Offset)) {
    634       if (OptIncDec && Offset.isImm() &&
    635           (Offset.getImm() == 1 || Offset.getImm() == -1)) {
    636         unsigned NewOpc =
    637             getINCDECFromLEA(MI.getOpcode(), Offset.getImm() == 1);
    638         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
    639                     .addReg(DestReg);
    640         LLVM_DEBUG(NewMI->dump(););
    641       } else {
    642         unsigned NewOpc = getADDriFromLEA(MI.getOpcode(), Offset);
    643         NewMI = BuildMI(MBB, I, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
    644                     .addReg(DestReg)
    645                     .add(Offset);
    646         LLVM_DEBUG(NewMI->dump(););
    647       }
    648     }
    649 
    650     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
    651     MBB.erase(I);
    652     I = NewMI;
    653     return;
    654   }
    655 
    656   // Handle the rest of the cases with inefficient base register:
    657   assert(DestReg != BaseReg && "DestReg == BaseReg should be handled already!");
    658   assert(IsInefficientBase && "efficient base should be handled already!");
    659 
    660   // FIXME: Handle LEA64_32r.
    661   if (LEAOpcode == X86::LEA64_32r)
    662     return;
    663 
    664   // lea (%base,%index,1), %dst => mov %base,%dst; add %index,%dst
    665   if (IsScale1 && !hasLEAOffset(Offset)) {
    666     bool BIK = Base.isKill() && BaseReg != IndexReg;
    667     TII->copyPhysReg(MBB, MI, MI.getDebugLoc(), DestReg, BaseReg, BIK);
    668     LLVM_DEBUG(MI.getPrevNode()->dump(););
    669 
    670     unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
    671     NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
    672                 .addReg(DestReg)
    673                 .add(Index);
    674     LLVM_DEBUG(NewMI->dump(););
    675 
    676     MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
    677     MBB.erase(I);
    678     I = NewMI;
    679     return;
    680   }
    681 
    682   // lea offset(%base,%index,scale), %dst =>
    683   // lea offset( ,%index,scale), %dst; add %base,%dst
    684   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(LEAOpcode))
    685               .add(Dest)
    686               .addReg(0)
    687               .add(Scale)
    688               .add(Index)
    689               .add(Offset)
    690               .add(Segment);
    691   LLVM_DEBUG(NewMI->dump(););
    692 
    693   unsigned NewOpc = getADDrrFromLEA(MI.getOpcode());
    694   NewMI = BuildMI(MBB, MI, MI.getDebugLoc(), TII->get(NewOpc), DestReg)
    695               .addReg(DestReg)
    696               .add(Base);
    697   LLVM_DEBUG(NewMI->dump(););
    698 
    699   MBB.getParent()->substituteDebugValuesForInst(*I, *NewMI, 1);
    700   MBB.erase(I);
    701   I = NewMI;
    702 }
    703