Home | History | Annotate | Line # | Download | only in AArch64
      1 //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file contains the AArch64 implementation of the TargetInstrInfo class.
     10 //
     11 //===----------------------------------------------------------------------===//
     12 
     13 #include "AArch64InstrInfo.h"
     14 #include "AArch64MachineFunctionInfo.h"
     15 #include "AArch64Subtarget.h"
     16 #include "MCTargetDesc/AArch64AddressingModes.h"
     17 #include "Utils/AArch64BaseInfo.h"
     18 #include "llvm/ADT/ArrayRef.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 #include "llvm/ADT/SmallVector.h"
     21 #include "llvm/CodeGen/MachineBasicBlock.h"
     22 #include "llvm/CodeGen/MachineFrameInfo.h"
     23 #include "llvm/CodeGen/MachineFunction.h"
     24 #include "llvm/CodeGen/MachineInstr.h"
     25 #include "llvm/CodeGen/MachineInstrBuilder.h"
     26 #include "llvm/CodeGen/MachineMemOperand.h"
     27 #include "llvm/CodeGen/MachineModuleInfo.h"
     28 #include "llvm/CodeGen/MachineOperand.h"
     29 #include "llvm/CodeGen/MachineRegisterInfo.h"
     30 #include "llvm/CodeGen/StackMaps.h"
     31 #include "llvm/CodeGen/TargetRegisterInfo.h"
     32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     33 #include "llvm/IR/DebugInfoMetadata.h"
     34 #include "llvm/IR/DebugLoc.h"
     35 #include "llvm/IR/GlobalValue.h"
     36 #include "llvm/MC/MCAsmInfo.h"
     37 #include "llvm/MC/MCInst.h"
     38 #include "llvm/MC/MCInstBuilder.h"
     39 #include "llvm/MC/MCInstrDesc.h"
     40 #include "llvm/Support/Casting.h"
     41 #include "llvm/Support/CodeGen.h"
     42 #include "llvm/Support/CommandLine.h"
     43 #include "llvm/Support/Compiler.h"
     44 #include "llvm/Support/ErrorHandling.h"
     45 #include "llvm/Support/MathExtras.h"
     46 #include "llvm/Target/TargetMachine.h"
     47 #include "llvm/Target/TargetOptions.h"
     48 #include <cassert>
     49 #include <cstdint>
     50 #include <iterator>
     51 #include <utility>
     52 
     53 using namespace llvm;
     54 
     55 #define GET_INSTRINFO_CTOR_DTOR
     56 #include "AArch64GenInstrInfo.inc"
     57 
     58 static cl::opt<unsigned> TBZDisplacementBits(
     59     "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
     60     cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
     61 
     62 static cl::opt<unsigned> CBZDisplacementBits(
     63     "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
     64     cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
     65 
     66 static cl::opt<unsigned>
     67     BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
     68                         cl::desc("Restrict range of Bcc instructions (DEBUG)"));
     69 
     70 AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
     71     : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
     72                           AArch64::CATCHRET),
     73       RI(STI.getTargetTriple()), Subtarget(STI) {}
     74 
     75 /// GetInstSize - Return the number of bytes of code the specified
     76 /// instruction may be.  This returns the maximum number of bytes.
     77 unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
     78   const MachineBasicBlock &MBB = *MI.getParent();
     79   const MachineFunction *MF = MBB.getParent();
     80   const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
     81 
     82   {
     83     auto Op = MI.getOpcode();
     84     if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
     85       return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
     86   }
     87 
     88   // Meta-instructions emit no code.
     89   if (MI.isMetaInstruction())
     90     return 0;
     91 
     92   // FIXME: We currently only handle pseudoinstructions that don't get expanded
     93   //        before the assembly printer.
     94   unsigned NumBytes = 0;
     95   const MCInstrDesc &Desc = MI.getDesc();
     96   switch (Desc.getOpcode()) {
     97   default:
     98     // Anything not explicitly designated otherwise is a normal 4-byte insn.
     99     NumBytes = 4;
    100     break;
    101   case TargetOpcode::STACKMAP:
    102     // The upper bound for a stackmap intrinsic is the full length of its shadow
    103     NumBytes = StackMapOpers(&MI).getNumPatchBytes();
    104     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
    105     break;
    106   case TargetOpcode::PATCHPOINT:
    107     // The size of the patchpoint intrinsic is the number of bytes requested
    108     NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
    109     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
    110     break;
    111   case TargetOpcode::STATEPOINT:
    112     NumBytes = StatepointOpers(&MI).getNumPatchBytes();
    113     assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
    114     // No patch bytes means a normal call inst is emitted
    115     if (NumBytes == 0)
    116       NumBytes = 4;
    117     break;
    118   case AArch64::TLSDESC_CALLSEQ:
    119     // This gets lowered to an instruction sequence which takes 16 bytes
    120     NumBytes = 16;
    121     break;
    122   case AArch64::SpeculationBarrierISBDSBEndBB:
    123     // This gets lowered to 2 4-byte instructions.
    124     NumBytes = 8;
    125     break;
    126   case AArch64::SpeculationBarrierSBEndBB:
    127     // This gets lowered to 1 4-byte instructions.
    128     NumBytes = 4;
    129     break;
    130   case AArch64::JumpTableDest32:
    131   case AArch64::JumpTableDest16:
    132   case AArch64::JumpTableDest8:
    133     NumBytes = 12;
    134     break;
    135   case AArch64::SPACE:
    136     NumBytes = MI.getOperand(1).getImm();
    137     break;
    138   case AArch64::StoreSwiftAsyncContext:
    139     NumBytes = 20;
    140     break;
    141   case TargetOpcode::BUNDLE:
    142     NumBytes = getInstBundleLength(MI);
    143     break;
    144   }
    145 
    146   return NumBytes;
    147 }
    148 
    149 unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
    150   unsigned Size = 0;
    151   MachineBasicBlock::const_instr_iterator I = MI.getIterator();
    152   MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
    153   while (++I != E && I->isInsideBundle()) {
    154     assert(!I->isBundle() && "No nested bundle!");
    155     Size += getInstSizeInBytes(*I);
    156   }
    157   return Size;
    158 }
    159 
    160 static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
    161                             SmallVectorImpl<MachineOperand> &Cond) {
    162   // Block ends with fall-through condbranch.
    163   switch (LastInst->getOpcode()) {
    164   default:
    165     llvm_unreachable("Unknown branch instruction?");
    166   case AArch64::Bcc:
    167     Target = LastInst->getOperand(1).getMBB();
    168     Cond.push_back(LastInst->getOperand(0));
    169     break;
    170   case AArch64::CBZW:
    171   case AArch64::CBZX:
    172   case AArch64::CBNZW:
    173   case AArch64::CBNZX:
    174     Target = LastInst->getOperand(1).getMBB();
    175     Cond.push_back(MachineOperand::CreateImm(-1));
    176     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
    177     Cond.push_back(LastInst->getOperand(0));
    178     break;
    179   case AArch64::TBZW:
    180   case AArch64::TBZX:
    181   case AArch64::TBNZW:
    182   case AArch64::TBNZX:
    183     Target = LastInst->getOperand(2).getMBB();
    184     Cond.push_back(MachineOperand::CreateImm(-1));
    185     Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
    186     Cond.push_back(LastInst->getOperand(0));
    187     Cond.push_back(LastInst->getOperand(1));
    188   }
    189 }
    190 
    191 static unsigned getBranchDisplacementBits(unsigned Opc) {
    192   switch (Opc) {
    193   default:
    194     llvm_unreachable("unexpected opcode!");
    195   case AArch64::B:
    196     return 64;
    197   case AArch64::TBNZW:
    198   case AArch64::TBZW:
    199   case AArch64::TBNZX:
    200   case AArch64::TBZX:
    201     return TBZDisplacementBits;
    202   case AArch64::CBNZW:
    203   case AArch64::CBZW:
    204   case AArch64::CBNZX:
    205   case AArch64::CBZX:
    206     return CBZDisplacementBits;
    207   case AArch64::Bcc:
    208     return BCCDisplacementBits;
    209   }
    210 }
    211 
    212 bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
    213                                              int64_t BrOffset) const {
    214   unsigned Bits = getBranchDisplacementBits(BranchOp);
    215   assert(Bits >= 3 && "max branch displacement must be enough to jump"
    216                       "over conditional branch expansion");
    217   return isIntN(Bits, BrOffset / 4);
    218 }
    219 
    220 MachineBasicBlock *
    221 AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
    222   switch (MI.getOpcode()) {
    223   default:
    224     llvm_unreachable("unexpected opcode!");
    225   case AArch64::B:
    226     return MI.getOperand(0).getMBB();
    227   case AArch64::TBZW:
    228   case AArch64::TBNZW:
    229   case AArch64::TBZX:
    230   case AArch64::TBNZX:
    231     return MI.getOperand(2).getMBB();
    232   case AArch64::CBZW:
    233   case AArch64::CBNZW:
    234   case AArch64::CBZX:
    235   case AArch64::CBNZX:
    236   case AArch64::Bcc:
    237     return MI.getOperand(1).getMBB();
    238   }
    239 }
    240 
    241 // Branch analysis.
    242 bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
    243                                      MachineBasicBlock *&TBB,
    244                                      MachineBasicBlock *&FBB,
    245                                      SmallVectorImpl<MachineOperand> &Cond,
    246                                      bool AllowModify) const {
    247   // If the block has no terminators, it just falls into the block after it.
    248   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
    249   if (I == MBB.end())
    250     return false;
    251 
    252   // Skip over SpeculationBarrierEndBB terminators
    253   if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
    254       I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
    255     --I;
    256   }
    257 
    258   if (!isUnpredicatedTerminator(*I))
    259     return false;
    260 
    261   // Get the last instruction in the block.
    262   MachineInstr *LastInst = &*I;
    263 
    264   // If there is only one terminator instruction, process it.
    265   unsigned LastOpc = LastInst->getOpcode();
    266   if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
    267     if (isUncondBranchOpcode(LastOpc)) {
    268       TBB = LastInst->getOperand(0).getMBB();
    269       return false;
    270     }
    271     if (isCondBranchOpcode(LastOpc)) {
    272       // Block ends with fall-through condbranch.
    273       parseCondBranch(LastInst, TBB, Cond);
    274       return false;
    275     }
    276     return true; // Can't handle indirect branch.
    277   }
    278 
    279   // Get the instruction before it if it is a terminator.
    280   MachineInstr *SecondLastInst = &*I;
    281   unsigned SecondLastOpc = SecondLastInst->getOpcode();
    282 
    283   // If AllowModify is true and the block ends with two or more unconditional
    284   // branches, delete all but the first unconditional branch.
    285   if (AllowModify && isUncondBranchOpcode(LastOpc)) {
    286     while (isUncondBranchOpcode(SecondLastOpc)) {
    287       LastInst->eraseFromParent();
    288       LastInst = SecondLastInst;
    289       LastOpc = LastInst->getOpcode();
    290       if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
    291         // Return now the only terminator is an unconditional branch.
    292         TBB = LastInst->getOperand(0).getMBB();
    293         return false;
    294       } else {
    295         SecondLastInst = &*I;
    296         SecondLastOpc = SecondLastInst->getOpcode();
    297       }
    298     }
    299   }
    300 
    301   // If we're allowed to modify and the block ends in a unconditional branch
    302   // which could simply fallthrough, remove the branch.  (Note: This case only
    303   // matters when we can't understand the whole sequence, otherwise it's also
    304   // handled by BranchFolding.cpp.)
    305   if (AllowModify && isUncondBranchOpcode(LastOpc) &&
    306       MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
    307     LastInst->eraseFromParent();
    308     LastInst = SecondLastInst;
    309     LastOpc = LastInst->getOpcode();
    310     if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
    311       assert(!isUncondBranchOpcode(LastOpc) &&
    312              "unreachable unconditional branches removed above");
    313 
    314       if (isCondBranchOpcode(LastOpc)) {
    315         // Block ends with fall-through condbranch.
    316         parseCondBranch(LastInst, TBB, Cond);
    317         return false;
    318       }
    319       return true; // Can't handle indirect branch.
    320     } else {
    321       SecondLastInst = &*I;
    322       SecondLastOpc = SecondLastInst->getOpcode();
    323     }
    324   }
    325 
    326   // If there are three terminators, we don't know what sort of block this is.
    327   if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
    328     return true;
    329 
    330   // If the block ends with a B and a Bcc, handle it.
    331   if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    332     parseCondBranch(SecondLastInst, TBB, Cond);
    333     FBB = LastInst->getOperand(0).getMBB();
    334     return false;
    335   }
    336 
    337   // If the block ends with two unconditional branches, handle it.  The second
    338   // one is not executed, so remove it.
    339   if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    340     TBB = SecondLastInst->getOperand(0).getMBB();
    341     I = LastInst;
    342     if (AllowModify)
    343       I->eraseFromParent();
    344     return false;
    345   }
    346 
    347   // ...likewise if it ends with an indirect branch followed by an unconditional
    348   // branch.
    349   if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
    350     I = LastInst;
    351     if (AllowModify)
    352       I->eraseFromParent();
    353     return true;
    354   }
    355 
    356   // Otherwise, can't handle this.
    357   return true;
    358 }
    359 
    360 bool AArch64InstrInfo::analyzeBranchPredicate(MachineBasicBlock &MBB,
    361                                               MachineBranchPredicate &MBP,
    362                                               bool AllowModify) const {
    363   // For the moment, handle only a block which ends with a cb(n)zx followed by
    364   // a fallthrough.  Why this?  Because it is a common form.
    365   // TODO: Should we handle b.cc?
    366 
    367   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
    368   if (I == MBB.end())
    369     return true;
    370 
    371   // Skip over SpeculationBarrierEndBB terminators
    372   if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
    373       I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
    374     --I;
    375   }
    376 
    377   if (!isUnpredicatedTerminator(*I))
    378     return true;
    379 
    380   // Get the last instruction in the block.
    381   MachineInstr *LastInst = &*I;
    382   unsigned LastOpc = LastInst->getOpcode();
    383   if (!isCondBranchOpcode(LastOpc))
    384     return true;
    385 
    386   switch (LastOpc) {
    387   default:
    388     return true;
    389   case AArch64::CBZW:
    390   case AArch64::CBZX:
    391   case AArch64::CBNZW:
    392   case AArch64::CBNZX:
    393     break;
    394   };
    395 
    396   MBP.TrueDest = LastInst->getOperand(1).getMBB();
    397   assert(MBP.TrueDest && "expected!");
    398   MBP.FalseDest = MBB.getNextNode();
    399 
    400   MBP.ConditionDef = nullptr;
    401   MBP.SingleUseCondition = false;
    402 
    403   MBP.LHS = LastInst->getOperand(0);
    404   MBP.RHS = MachineOperand::CreateImm(0);
    405   MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
    406                                             : MachineBranchPredicate::PRED_EQ;
    407   return false;
    408 }
    409 
    410 bool AArch64InstrInfo::reverseBranchCondition(
    411     SmallVectorImpl<MachineOperand> &Cond) const {
    412   if (Cond[0].getImm() != -1) {
    413     // Regular Bcc
    414     AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
    415     Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
    416   } else {
    417     // Folded compare-and-branch
    418     switch (Cond[1].getImm()) {
    419     default:
    420       llvm_unreachable("Unknown conditional branch!");
    421     case AArch64::CBZW:
    422       Cond[1].setImm(AArch64::CBNZW);
    423       break;
    424     case AArch64::CBNZW:
    425       Cond[1].setImm(AArch64::CBZW);
    426       break;
    427     case AArch64::CBZX:
    428       Cond[1].setImm(AArch64::CBNZX);
    429       break;
    430     case AArch64::CBNZX:
    431       Cond[1].setImm(AArch64::CBZX);
    432       break;
    433     case AArch64::TBZW:
    434       Cond[1].setImm(AArch64::TBNZW);
    435       break;
    436     case AArch64::TBNZW:
    437       Cond[1].setImm(AArch64::TBZW);
    438       break;
    439     case AArch64::TBZX:
    440       Cond[1].setImm(AArch64::TBNZX);
    441       break;
    442     case AArch64::TBNZX:
    443       Cond[1].setImm(AArch64::TBZX);
    444       break;
    445     }
    446   }
    447 
    448   return false;
    449 }
    450 
    451 unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
    452                                         int *BytesRemoved) const {
    453   MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
    454   if (I == MBB.end())
    455     return 0;
    456 
    457   if (!isUncondBranchOpcode(I->getOpcode()) &&
    458       !isCondBranchOpcode(I->getOpcode()))
    459     return 0;
    460 
    461   // Remove the branch.
    462   I->eraseFromParent();
    463 
    464   I = MBB.end();
    465 
    466   if (I == MBB.begin()) {
    467     if (BytesRemoved)
    468       *BytesRemoved = 4;
    469     return 1;
    470   }
    471   --I;
    472   if (!isCondBranchOpcode(I->getOpcode())) {
    473     if (BytesRemoved)
    474       *BytesRemoved = 4;
    475     return 1;
    476   }
    477 
    478   // Remove the branch.
    479   I->eraseFromParent();
    480   if (BytesRemoved)
    481     *BytesRemoved = 8;
    482 
    483   return 2;
    484 }
    485 
    486 void AArch64InstrInfo::instantiateCondBranch(
    487     MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
    488     ArrayRef<MachineOperand> Cond) const {
    489   if (Cond[0].getImm() != -1) {
    490     // Regular Bcc
    491     BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
    492   } else {
    493     // Folded compare-and-branch
    494     // Note that we use addOperand instead of addReg to keep the flags.
    495     const MachineInstrBuilder MIB =
    496         BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
    497     if (Cond.size() > 3)
    498       MIB.addImm(Cond[3].getImm());
    499     MIB.addMBB(TBB);
    500   }
    501 }
    502 
    503 unsigned AArch64InstrInfo::insertBranch(
    504     MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
    505     ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
    506   // Shouldn't be a fall through.
    507   assert(TBB && "insertBranch must not be told to insert a fallthrough");
    508 
    509   if (!FBB) {
    510     if (Cond.empty()) // Unconditional branch?
    511       BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
    512     else
    513       instantiateCondBranch(MBB, DL, TBB, Cond);
    514 
    515     if (BytesAdded)
    516       *BytesAdded = 4;
    517 
    518     return 1;
    519   }
    520 
    521   // Two-way conditional branch.
    522   instantiateCondBranch(MBB, DL, TBB, Cond);
    523   BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
    524 
    525   if (BytesAdded)
    526     *BytesAdded = 8;
    527 
    528   return 2;
    529 }
    530 
    531 // Find the original register that VReg is copied from.
    532 static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
    533   while (Register::isVirtualRegister(VReg)) {
    534     const MachineInstr *DefMI = MRI.getVRegDef(VReg);
    535     if (!DefMI->isFullCopy())
    536       return VReg;
    537     VReg = DefMI->getOperand(1).getReg();
    538   }
    539   return VReg;
    540 }
    541 
    542 // Determine if VReg is defined by an instruction that can be folded into a
    543 // csel instruction. If so, return the folded opcode, and the replacement
    544 // register.
    545 static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
    546                                 unsigned *NewVReg = nullptr) {
    547   VReg = removeCopies(MRI, VReg);
    548   if (!Register::isVirtualRegister(VReg))
    549     return 0;
    550 
    551   bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
    552   const MachineInstr *DefMI = MRI.getVRegDef(VReg);
    553   unsigned Opc = 0;
    554   unsigned SrcOpNum = 0;
    555   switch (DefMI->getOpcode()) {
    556   case AArch64::ADDSXri:
    557   case AArch64::ADDSWri:
    558     // if NZCV is used, do not fold.
    559     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
    560       return 0;
    561     // fall-through to ADDXri and ADDWri.
    562     LLVM_FALLTHROUGH;
    563   case AArch64::ADDXri:
    564   case AArch64::ADDWri:
    565     // add x, 1 -> csinc.
    566     if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
    567         DefMI->getOperand(3).getImm() != 0)
    568       return 0;
    569     SrcOpNum = 1;
    570     Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
    571     break;
    572 
    573   case AArch64::ORNXrr:
    574   case AArch64::ORNWrr: {
    575     // not x -> csinv, represented as orn dst, xzr, src.
    576     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
    577     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
    578       return 0;
    579     SrcOpNum = 2;
    580     Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
    581     break;
    582   }
    583 
    584   case AArch64::SUBSXrr:
    585   case AArch64::SUBSWrr:
    586     // if NZCV is used, do not fold.
    587     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
    588       return 0;
    589     // fall-through to SUBXrr and SUBWrr.
    590     LLVM_FALLTHROUGH;
    591   case AArch64::SUBXrr:
    592   case AArch64::SUBWrr: {
    593     // neg x -> csneg, represented as sub dst, xzr, src.
    594     unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
    595     if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
    596       return 0;
    597     SrcOpNum = 2;
    598     Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
    599     break;
    600   }
    601   default:
    602     return 0;
    603   }
    604   assert(Opc && SrcOpNum && "Missing parameters");
    605 
    606   if (NewVReg)
    607     *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
    608   return Opc;
    609 }
    610 
    611 bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
    612                                        ArrayRef<MachineOperand> Cond,
    613                                        Register DstReg, Register TrueReg,
    614                                        Register FalseReg, int &CondCycles,
    615                                        int &TrueCycles,
    616                                        int &FalseCycles) const {
    617   // Check register classes.
    618   const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    619   const TargetRegisterClass *RC =
    620       RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
    621   if (!RC)
    622     return false;
    623 
    624   // Also need to check the dest regclass, in case we're trying to optimize
    625   // something like:
    626   // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
    627   if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
    628     return false;
    629 
    630   // Expanding cbz/tbz requires an extra cycle of latency on the condition.
    631   unsigned ExtraCondLat = Cond.size() != 1;
    632 
    633   // GPRs are handled by csel.
    634   // FIXME: Fold in x+1, -x, and ~x when applicable.
    635   if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
    636       AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
    637     // Single-cycle csel, csinc, csinv, and csneg.
    638     CondCycles = 1 + ExtraCondLat;
    639     TrueCycles = FalseCycles = 1;
    640     if (canFoldIntoCSel(MRI, TrueReg))
    641       TrueCycles = 0;
    642     else if (canFoldIntoCSel(MRI, FalseReg))
    643       FalseCycles = 0;
    644     return true;
    645   }
    646 
    647   // Scalar floating point is handled by fcsel.
    648   // FIXME: Form fabs, fmin, and fmax when applicable.
    649   if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
    650       AArch64::FPR32RegClass.hasSubClassEq(RC)) {
    651     CondCycles = 5 + ExtraCondLat;
    652     TrueCycles = FalseCycles = 2;
    653     return true;
    654   }
    655 
    656   // Can't do vectors.
    657   return false;
    658 }
    659 
    660 void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
    661                                     MachineBasicBlock::iterator I,
    662                                     const DebugLoc &DL, Register DstReg,
    663                                     ArrayRef<MachineOperand> Cond,
    664                                     Register TrueReg, Register FalseReg) const {
    665   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
    666 
    667   // Parse the condition code, see parseCondBranch() above.
    668   AArch64CC::CondCode CC;
    669   switch (Cond.size()) {
    670   default:
    671     llvm_unreachable("Unknown condition opcode in Cond");
    672   case 1: // b.cc
    673     CC = AArch64CC::CondCode(Cond[0].getImm());
    674     break;
    675   case 3: { // cbz/cbnz
    676     // We must insert a compare against 0.
    677     bool Is64Bit;
    678     switch (Cond[1].getImm()) {
    679     default:
    680       llvm_unreachable("Unknown branch opcode in Cond");
    681     case AArch64::CBZW:
    682       Is64Bit = false;
    683       CC = AArch64CC::EQ;
    684       break;
    685     case AArch64::CBZX:
    686       Is64Bit = true;
    687       CC = AArch64CC::EQ;
    688       break;
    689     case AArch64::CBNZW:
    690       Is64Bit = false;
    691       CC = AArch64CC::NE;
    692       break;
    693     case AArch64::CBNZX:
    694       Is64Bit = true;
    695       CC = AArch64CC::NE;
    696       break;
    697     }
    698     Register SrcReg = Cond[2].getReg();
    699     if (Is64Bit) {
    700       // cmp reg, #0 is actually subs xzr, reg, #0.
    701       MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
    702       BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
    703           .addReg(SrcReg)
    704           .addImm(0)
    705           .addImm(0);
    706     } else {
    707       MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
    708       BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
    709           .addReg(SrcReg)
    710           .addImm(0)
    711           .addImm(0);
    712     }
    713     break;
    714   }
    715   case 4: { // tbz/tbnz
    716     // We must insert a tst instruction.
    717     switch (Cond[1].getImm()) {
    718     default:
    719       llvm_unreachable("Unknown branch opcode in Cond");
    720     case AArch64::TBZW:
    721     case AArch64::TBZX:
    722       CC = AArch64CC::EQ;
    723       break;
    724     case AArch64::TBNZW:
    725     case AArch64::TBNZX:
    726       CC = AArch64CC::NE;
    727       break;
    728     }
    729     // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
    730     if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
    731       BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
    732           .addReg(Cond[2].getReg())
    733           .addImm(
    734               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
    735     else
    736       BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
    737           .addReg(Cond[2].getReg())
    738           .addImm(
    739               AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
    740     break;
    741   }
    742   }
    743 
    744   unsigned Opc = 0;
    745   const TargetRegisterClass *RC = nullptr;
    746   bool TryFold = false;
    747   if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
    748     RC = &AArch64::GPR64RegClass;
    749     Opc = AArch64::CSELXr;
    750     TryFold = true;
    751   } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
    752     RC = &AArch64::GPR32RegClass;
    753     Opc = AArch64::CSELWr;
    754     TryFold = true;
    755   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
    756     RC = &AArch64::FPR64RegClass;
    757     Opc = AArch64::FCSELDrrr;
    758   } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
    759     RC = &AArch64::FPR32RegClass;
    760     Opc = AArch64::FCSELSrrr;
    761   }
    762   assert(RC && "Unsupported regclass");
    763 
    764   // Try folding simple instructions into the csel.
    765   if (TryFold) {
    766     unsigned NewVReg = 0;
    767     unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
    768     if (FoldedOpc) {
    769       // The folded opcodes csinc, csinc and csneg apply the operation to
    770       // FalseReg, so we need to invert the condition.
    771       CC = AArch64CC::getInvertedCondCode(CC);
    772       TrueReg = FalseReg;
    773     } else
    774       FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
    775 
    776     // Fold the operation. Leave any dead instructions for DCE to clean up.
    777     if (FoldedOpc) {
    778       FalseReg = NewVReg;
    779       Opc = FoldedOpc;
    780       // The extends the live range of NewVReg.
    781       MRI.clearKillFlags(NewVReg);
    782     }
    783   }
    784 
    785   // Pull all virtual register into the appropriate class.
    786   MRI.constrainRegClass(TrueReg, RC);
    787   MRI.constrainRegClass(FalseReg, RC);
    788 
    789   // Insert the csel.
    790   BuildMI(MBB, I, DL, get(Opc), DstReg)
    791       .addReg(TrueReg)
    792       .addReg(FalseReg)
    793       .addImm(CC);
    794 }
    795 
    796 /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an  ORRxx.
    797 static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
    798   uint64_t Imm = MI.getOperand(1).getImm();
    799   uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
    800   uint64_t Encoding;
    801   return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
    802 }
    803 
    804 // FIXME: this implementation should be micro-architecture dependent, so a
    805 // micro-architecture target hook should be introduced here in future.
    806 bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
    807   if (!Subtarget.hasCustomCheapAsMoveHandling())
    808     return MI.isAsCheapAsAMove();
    809 
    810   const unsigned Opcode = MI.getOpcode();
    811 
    812   // Firstly, check cases gated by features.
    813 
    814   if (Subtarget.hasZeroCycleZeroingFP()) {
    815     if (Opcode == AArch64::FMOVH0 ||
    816         Opcode == AArch64::FMOVS0 ||
    817         Opcode == AArch64::FMOVD0)
    818       return true;
    819   }
    820 
    821   if (Subtarget.hasZeroCycleZeroingGP()) {
    822     if (Opcode == TargetOpcode::COPY &&
    823         (MI.getOperand(1).getReg() == AArch64::WZR ||
    824          MI.getOperand(1).getReg() == AArch64::XZR))
    825       return true;
    826   }
    827 
    828   // Secondly, check cases specific to sub-targets.
    829 
    830   if (Subtarget.hasExynosCheapAsMoveHandling()) {
    831     if (isExynosCheapAsMove(MI))
    832       return true;
    833 
    834     return MI.isAsCheapAsAMove();
    835   }
    836 
    837   // Finally, check generic cases.
    838 
    839   switch (Opcode) {
    840   default:
    841     return false;
    842 
    843   // add/sub on register without shift
    844   case AArch64::ADDWri:
    845   case AArch64::ADDXri:
    846   case AArch64::SUBWri:
    847   case AArch64::SUBXri:
    848     return (MI.getOperand(3).getImm() == 0);
    849 
    850   // logical ops on immediate
    851   case AArch64::ANDWri:
    852   case AArch64::ANDXri:
    853   case AArch64::EORWri:
    854   case AArch64::EORXri:
    855   case AArch64::ORRWri:
    856   case AArch64::ORRXri:
    857     return true;
    858 
    859   // logical ops on register without shift
    860   case AArch64::ANDWrr:
    861   case AArch64::ANDXrr:
    862   case AArch64::BICWrr:
    863   case AArch64::BICXrr:
    864   case AArch64::EONWrr:
    865   case AArch64::EONXrr:
    866   case AArch64::EORWrr:
    867   case AArch64::EORXrr:
    868   case AArch64::ORNWrr:
    869   case AArch64::ORNXrr:
    870   case AArch64::ORRWrr:
    871   case AArch64::ORRXrr:
    872     return true;
    873 
    874   // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
    875   // ORRXri, it is as cheap as MOV
    876   case AArch64::MOVi32imm:
    877     return canBeExpandedToORR(MI, 32);
    878   case AArch64::MOVi64imm:
    879     return canBeExpandedToORR(MI, 64);
    880   }
    881 
    882   llvm_unreachable("Unknown opcode to check as cheap as a move!");
    883 }
    884 
    885 bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
    886   switch (MI.getOpcode()) {
    887   default:
    888     return false;
    889 
    890   case AArch64::ADDWrs:
    891   case AArch64::ADDXrs:
    892   case AArch64::ADDSWrs:
    893   case AArch64::ADDSXrs: {
    894     unsigned Imm = MI.getOperand(3).getImm();
    895     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
    896     if (ShiftVal == 0)
    897       return true;
    898     return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
    899   }
    900 
    901   case AArch64::ADDWrx:
    902   case AArch64::ADDXrx:
    903   case AArch64::ADDXrx64:
    904   case AArch64::ADDSWrx:
    905   case AArch64::ADDSXrx:
    906   case AArch64::ADDSXrx64: {
    907     unsigned Imm = MI.getOperand(3).getImm();
    908     switch (AArch64_AM::getArithExtendType(Imm)) {
    909     default:
    910       return false;
    911     case AArch64_AM::UXTB:
    912     case AArch64_AM::UXTH:
    913     case AArch64_AM::UXTW:
    914     case AArch64_AM::UXTX:
    915       return AArch64_AM::getArithShiftValue(Imm) <= 4;
    916     }
    917   }
    918 
    919   case AArch64::SUBWrs:
    920   case AArch64::SUBSWrs: {
    921     unsigned Imm = MI.getOperand(3).getImm();
    922     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
    923     return ShiftVal == 0 ||
    924            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
    925   }
    926 
    927   case AArch64::SUBXrs:
    928   case AArch64::SUBSXrs: {
    929     unsigned Imm = MI.getOperand(3).getImm();
    930     unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
    931     return ShiftVal == 0 ||
    932            (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
    933   }
    934 
    935   case AArch64::SUBWrx:
    936   case AArch64::SUBXrx:
    937   case AArch64::SUBXrx64:
    938   case AArch64::SUBSWrx:
    939   case AArch64::SUBSXrx:
    940   case AArch64::SUBSXrx64: {
    941     unsigned Imm = MI.getOperand(3).getImm();
    942     switch (AArch64_AM::getArithExtendType(Imm)) {
    943     default:
    944       return false;
    945     case AArch64_AM::UXTB:
    946     case AArch64_AM::UXTH:
    947     case AArch64_AM::UXTW:
    948     case AArch64_AM::UXTX:
    949       return AArch64_AM::getArithShiftValue(Imm) == 0;
    950     }
    951   }
    952 
    953   case AArch64::LDRBBroW:
    954   case AArch64::LDRBBroX:
    955   case AArch64::LDRBroW:
    956   case AArch64::LDRBroX:
    957   case AArch64::LDRDroW:
    958   case AArch64::LDRDroX:
    959   case AArch64::LDRHHroW:
    960   case AArch64::LDRHHroX:
    961   case AArch64::LDRHroW:
    962   case AArch64::LDRHroX:
    963   case AArch64::LDRQroW:
    964   case AArch64::LDRQroX:
    965   case AArch64::LDRSBWroW:
    966   case AArch64::LDRSBWroX:
    967   case AArch64::LDRSBXroW:
    968   case AArch64::LDRSBXroX:
    969   case AArch64::LDRSHWroW:
    970   case AArch64::LDRSHWroX:
    971   case AArch64::LDRSHXroW:
    972   case AArch64::LDRSHXroX:
    973   case AArch64::LDRSWroW:
    974   case AArch64::LDRSWroX:
    975   case AArch64::LDRSroW:
    976   case AArch64::LDRSroX:
    977   case AArch64::LDRWroW:
    978   case AArch64::LDRWroX:
    979   case AArch64::LDRXroW:
    980   case AArch64::LDRXroX:
    981   case AArch64::PRFMroW:
    982   case AArch64::PRFMroX:
    983   case AArch64::STRBBroW:
    984   case AArch64::STRBBroX:
    985   case AArch64::STRBroW:
    986   case AArch64::STRBroX:
    987   case AArch64::STRDroW:
    988   case AArch64::STRDroX:
    989   case AArch64::STRHHroW:
    990   case AArch64::STRHHroX:
    991   case AArch64::STRHroW:
    992   case AArch64::STRHroX:
    993   case AArch64::STRQroW:
    994   case AArch64::STRQroX:
    995   case AArch64::STRSroW:
    996   case AArch64::STRSroX:
    997   case AArch64::STRWroW:
    998   case AArch64::STRWroX:
    999   case AArch64::STRXroW:
   1000   case AArch64::STRXroX: {
   1001     unsigned IsSigned = MI.getOperand(3).getImm();
   1002     return !IsSigned;
   1003   }
   1004   }
   1005 }
   1006 
   1007 bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
   1008   unsigned Opc = MI.getOpcode();
   1009   switch (Opc) {
   1010     default:
   1011       return false;
   1012     case AArch64::SEH_StackAlloc:
   1013     case AArch64::SEH_SaveFPLR:
   1014     case AArch64::SEH_SaveFPLR_X:
   1015     case AArch64::SEH_SaveReg:
   1016     case AArch64::SEH_SaveReg_X:
   1017     case AArch64::SEH_SaveRegP:
   1018     case AArch64::SEH_SaveRegP_X:
   1019     case AArch64::SEH_SaveFReg:
   1020     case AArch64::SEH_SaveFReg_X:
   1021     case AArch64::SEH_SaveFRegP:
   1022     case AArch64::SEH_SaveFRegP_X:
   1023     case AArch64::SEH_SetFP:
   1024     case AArch64::SEH_AddFP:
   1025     case AArch64::SEH_Nop:
   1026     case AArch64::SEH_PrologEnd:
   1027     case AArch64::SEH_EpilogStart:
   1028     case AArch64::SEH_EpilogEnd:
   1029       return true;
   1030   }
   1031 }
   1032 
   1033 bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
   1034                                              Register &SrcReg, Register &DstReg,
   1035                                              unsigned &SubIdx) const {
   1036   switch (MI.getOpcode()) {
   1037   default:
   1038     return false;
   1039   case AArch64::SBFMXri: // aka sxtw
   1040   case AArch64::UBFMXri: // aka uxtw
   1041     // Check for the 32 -> 64 bit extension case, these instructions can do
   1042     // much more.
   1043     if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
   1044       return false;
   1045     // This is a signed or unsigned 32 -> 64 bit extension.
   1046     SrcReg = MI.getOperand(1).getReg();
   1047     DstReg = MI.getOperand(0).getReg();
   1048     SubIdx = AArch64::sub_32;
   1049     return true;
   1050   }
   1051 }
   1052 
   1053 bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
   1054     const MachineInstr &MIa, const MachineInstr &MIb) const {
   1055   const TargetRegisterInfo *TRI = &getRegisterInfo();
   1056   const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
   1057   int64_t OffsetA = 0, OffsetB = 0;
   1058   unsigned WidthA = 0, WidthB = 0;
   1059   bool OffsetAIsScalable = false, OffsetBIsScalable = false;
   1060 
   1061   assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
   1062   assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
   1063 
   1064   if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
   1065       MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
   1066     return false;
   1067 
   1068   // Retrieve the base, offset from the base and width. Width
   1069   // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8).  If
   1070   // base are identical, and the offset of a lower memory access +
   1071   // the width doesn't overlap the offset of a higher memory access,
   1072   // then the memory accesses are different.
   1073   // If OffsetAIsScalable and OffsetBIsScalable are both true, they
   1074   // are assumed to have the same scale (vscale).
   1075   if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
   1076                                    WidthA, TRI) &&
   1077       getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
   1078                                    WidthB, TRI)) {
   1079     if (BaseOpA->isIdenticalTo(*BaseOpB) &&
   1080         OffsetAIsScalable == OffsetBIsScalable) {
   1081       int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
   1082       int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
   1083       int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
   1084       if (LowOffset + LowWidth <= HighOffset)
   1085         return true;
   1086     }
   1087   }
   1088   return false;
   1089 }
   1090 
   1091 bool AArch64InstrInfo::isSchedulingBoundary(const MachineInstr &MI,
   1092                                             const MachineBasicBlock *MBB,
   1093                                             const MachineFunction &MF) const {
   1094   if (TargetInstrInfo::isSchedulingBoundary(MI, MBB, MF))
   1095     return true;
   1096   switch (MI.getOpcode()) {
   1097   case AArch64::HINT:
   1098     // CSDB hints are scheduling barriers.
   1099     if (MI.getOperand(0).getImm() == 0x14)
   1100       return true;
   1101     break;
   1102   case AArch64::DSB:
   1103   case AArch64::ISB:
   1104     // DSB and ISB also are scheduling barriers.
   1105     return true;
   1106   default:;
   1107   }
   1108   return isSEHInstruction(MI);
   1109 }
   1110 
   1111 /// analyzeCompare - For a comparison instruction, return the source registers
   1112 /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
   1113 /// Return true if the comparison instruction can be analyzed.
   1114 bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, Register &SrcReg,
   1115                                       Register &SrcReg2, int &CmpMask,
   1116                                       int &CmpValue) const {
   1117   // The first operand can be a frame index where we'd normally expect a
   1118   // register.
   1119   assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
   1120   if (!MI.getOperand(1).isReg())
   1121     return false;
   1122 
   1123   switch (MI.getOpcode()) {
   1124   default:
   1125     break;
   1126   case AArch64::PTEST_PP:
   1127     SrcReg = MI.getOperand(0).getReg();
   1128     SrcReg2 = MI.getOperand(1).getReg();
   1129     // Not sure about the mask and value for now...
   1130     CmpMask = ~0;
   1131     CmpValue = 0;
   1132     return true;
   1133   case AArch64::SUBSWrr:
   1134   case AArch64::SUBSWrs:
   1135   case AArch64::SUBSWrx:
   1136   case AArch64::SUBSXrr:
   1137   case AArch64::SUBSXrs:
   1138   case AArch64::SUBSXrx:
   1139   case AArch64::ADDSWrr:
   1140   case AArch64::ADDSWrs:
   1141   case AArch64::ADDSWrx:
   1142   case AArch64::ADDSXrr:
   1143   case AArch64::ADDSXrs:
   1144   case AArch64::ADDSXrx:
   1145     // Replace SUBSWrr with SUBWrr if NZCV is not used.
   1146     SrcReg = MI.getOperand(1).getReg();
   1147     SrcReg2 = MI.getOperand(2).getReg();
   1148     CmpMask = ~0;
   1149     CmpValue = 0;
   1150     return true;
   1151   case AArch64::SUBSWri:
   1152   case AArch64::ADDSWri:
   1153   case AArch64::SUBSXri:
   1154   case AArch64::ADDSXri:
   1155     SrcReg = MI.getOperand(1).getReg();
   1156     SrcReg2 = 0;
   1157     CmpMask = ~0;
   1158     // FIXME: In order to convert CmpValue to 0 or 1
   1159     CmpValue = MI.getOperand(2).getImm() != 0;
   1160     return true;
   1161   case AArch64::ANDSWri:
   1162   case AArch64::ANDSXri:
   1163     // ANDS does not use the same encoding scheme as the others xxxS
   1164     // instructions.
   1165     SrcReg = MI.getOperand(1).getReg();
   1166     SrcReg2 = 0;
   1167     CmpMask = ~0;
   1168     // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
   1169     // while the type of CmpValue is int. When converting uint64_t to int,
   1170     // the high 32 bits of uint64_t will be lost.
   1171     // In fact it causes a bug in spec2006-483.xalancbmk
   1172     // CmpValue is only used to compare with zero in OptimizeCompareInstr
   1173     CmpValue = AArch64_AM::decodeLogicalImmediate(
   1174                    MI.getOperand(2).getImm(),
   1175                    MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
   1176     return true;
   1177   }
   1178 
   1179   return false;
   1180 }
   1181 
   1182 static bool UpdateOperandRegClass(MachineInstr &Instr) {
   1183   MachineBasicBlock *MBB = Instr.getParent();
   1184   assert(MBB && "Can't get MachineBasicBlock here");
   1185   MachineFunction *MF = MBB->getParent();
   1186   assert(MF && "Can't get MachineFunction here");
   1187   const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
   1188   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   1189   MachineRegisterInfo *MRI = &MF->getRegInfo();
   1190 
   1191   for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
   1192        ++OpIdx) {
   1193     MachineOperand &MO = Instr.getOperand(OpIdx);
   1194     const TargetRegisterClass *OpRegCstraints =
   1195         Instr.getRegClassConstraint(OpIdx, TII, TRI);
   1196 
   1197     // If there's no constraint, there's nothing to do.
   1198     if (!OpRegCstraints)
   1199       continue;
   1200     // If the operand is a frame index, there's nothing to do here.
   1201     // A frame index operand will resolve correctly during PEI.
   1202     if (MO.isFI())
   1203       continue;
   1204 
   1205     assert(MO.isReg() &&
   1206            "Operand has register constraints without being a register!");
   1207 
   1208     Register Reg = MO.getReg();
   1209     if (Register::isPhysicalRegister(Reg)) {
   1210       if (!OpRegCstraints->contains(Reg))
   1211         return false;
   1212     } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
   1213                !MRI->constrainRegClass(Reg, OpRegCstraints))
   1214       return false;
   1215   }
   1216 
   1217   return true;
   1218 }
   1219 
   1220 /// Return the opcode that does not set flags when possible - otherwise
   1221 /// return the original opcode. The caller is responsible to do the actual
   1222 /// substitution and legality checking.
   1223 static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
   1224   // Don't convert all compare instructions, because for some the zero register
   1225   // encoding becomes the sp register.
   1226   bool MIDefinesZeroReg = false;
   1227   if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
   1228     MIDefinesZeroReg = true;
   1229 
   1230   switch (MI.getOpcode()) {
   1231   default:
   1232     return MI.getOpcode();
   1233   case AArch64::ADDSWrr:
   1234     return AArch64::ADDWrr;
   1235   case AArch64::ADDSWri:
   1236     return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
   1237   case AArch64::ADDSWrs:
   1238     return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
   1239   case AArch64::ADDSWrx:
   1240     return AArch64::ADDWrx;
   1241   case AArch64::ADDSXrr:
   1242     return AArch64::ADDXrr;
   1243   case AArch64::ADDSXri:
   1244     return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
   1245   case AArch64::ADDSXrs:
   1246     return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
   1247   case AArch64::ADDSXrx:
   1248     return AArch64::ADDXrx;
   1249   case AArch64::SUBSWrr:
   1250     return AArch64::SUBWrr;
   1251   case AArch64::SUBSWri:
   1252     return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
   1253   case AArch64::SUBSWrs:
   1254     return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
   1255   case AArch64::SUBSWrx:
   1256     return AArch64::SUBWrx;
   1257   case AArch64::SUBSXrr:
   1258     return AArch64::SUBXrr;
   1259   case AArch64::SUBSXri:
   1260     return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
   1261   case AArch64::SUBSXrs:
   1262     return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
   1263   case AArch64::SUBSXrx:
   1264     return AArch64::SUBXrx;
   1265   }
   1266 }
   1267 
   1268 enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
   1269 
   1270 /// True when condition flags are accessed (either by writing or reading)
   1271 /// on the instruction trace starting at From and ending at To.
   1272 ///
   1273 /// Note: If From and To are from different blocks it's assumed CC are accessed
   1274 ///       on the path.
   1275 static bool areCFlagsAccessedBetweenInstrs(
   1276     MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
   1277     const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
   1278   // Early exit if To is at the beginning of the BB.
   1279   if (To == To->getParent()->begin())
   1280     return true;
   1281 
   1282   // Check whether the instructions are in the same basic block
   1283   // If not, assume the condition flags might get modified somewhere.
   1284   if (To->getParent() != From->getParent())
   1285     return true;
   1286 
   1287   // From must be above To.
   1288   assert(std::any_of(
   1289       ++To.getReverse(), To->getParent()->rend(),
   1290       [From](MachineInstr &MI) { return MI.getIterator() == From; }));
   1291 
   1292   // We iterate backward starting at \p To until we hit \p From.
   1293   for (const MachineInstr &Instr :
   1294        instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
   1295     if (((AccessToCheck & AK_Write) &&
   1296          Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
   1297         ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
   1298       return true;
   1299   }
   1300   return false;
   1301 }
   1302 
   1303 /// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
   1304 /// operation which could set the flags in an identical manner
   1305 bool AArch64InstrInfo::optimizePTestInstr(
   1306     MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
   1307     const MachineRegisterInfo *MRI) const {
   1308   auto *Mask = MRI->getUniqueVRegDef(MaskReg);
   1309   auto *Pred = MRI->getUniqueVRegDef(PredReg);
   1310   auto NewOp = Pred->getOpcode();
   1311   bool OpChanged = false;
   1312 
   1313   unsigned MaskOpcode = Mask->getOpcode();
   1314   unsigned PredOpcode = Pred->getOpcode();
   1315   bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
   1316   bool PredIsWhileLike = isWhileOpcode(PredOpcode);
   1317 
   1318   if (isPTrueOpcode(MaskOpcode) && (PredIsPTestLike || PredIsWhileLike)) {
   1319     // For PTEST(PTRUE, OTHER_INST), PTEST is redundant when PTRUE doesn't
   1320     // deactivate any lanes OTHER_INST might set.
   1321     uint64_t MaskElementSize = getElementSizeForOpcode(MaskOpcode);
   1322     uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
   1323 
   1324     // Must be an all active predicate of matching element size.
   1325     if ((PredElementSize != MaskElementSize) ||
   1326         (Mask->getOperand(1).getImm() != 31))
   1327       return false;
   1328 
   1329     // Fallthough to simply remove the PTEST.
   1330   } else if ((Mask == Pred) && (PredIsPTestLike || PredIsWhileLike)) {
   1331     // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
   1332     // instruction that sets the flags as PTEST would.
   1333 
   1334     // Fallthough to simply remove the PTEST.
   1335   } else if (PredIsPTestLike) {
   1336     // For PTEST(PG_1, PTEST_LIKE(PG2, ...)), PTEST is redundant when both
   1337     // instructions use the same predicate.
   1338     auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
   1339     if (Mask != PTestLikeMask)
   1340       return false;
   1341 
   1342     // Fallthough to simply remove the PTEST.
   1343   } else {
   1344     switch (Pred->getOpcode()) {
   1345     case AArch64::BRKB_PPzP:
   1346     case AArch64::BRKPB_PPzPP: {
   1347       // Op 0 is chain, 1 is the mask, 2 the previous predicate to
   1348       // propagate, 3 the new predicate.
   1349 
   1350       // Check to see if our mask is the same as the brkpb's. If
   1351       // not the resulting flag bits may be different and we
   1352       // can't remove the ptest.
   1353       auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
   1354       if (Mask != PredMask)
   1355         return false;
   1356 
   1357       // Switch to the new opcode
   1358       NewOp = Pred->getOpcode() == AArch64::BRKB_PPzP ? AArch64::BRKBS_PPzP
   1359                                                       : AArch64::BRKPBS_PPzPP;
   1360       OpChanged = true;
   1361       break;
   1362     }
   1363     case AArch64::BRKN_PPzP: {
   1364       auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
   1365       if (Mask != PredMask)
   1366         return false;
   1367 
   1368       NewOp = AArch64::BRKNS_PPzP;
   1369       OpChanged = true;
   1370       break;
   1371     }
   1372     case AArch64::RDFFR_PPz: {
   1373       // rdffr   p1.b, PredMask=p0/z <--- Definition of Pred
   1374       // ptest   Mask=p0, Pred=p1.b  <--- If equal masks, remove this and use
   1375       //                                  `rdffrs p1.b, p0/z` above.
   1376       auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
   1377       if (Mask != PredMask)
   1378         return false;
   1379 
   1380       NewOp = AArch64::RDFFRS_PPz;
   1381       OpChanged = true;
   1382       break;
   1383     }
   1384     default:
   1385       // Bail out if we don't recognize the input
   1386       return false;
   1387     }
   1388   }
   1389 
   1390   const TargetRegisterInfo *TRI = &getRegisterInfo();
   1391 
   1392   // If another instruction between Pred and PTest accesses flags, don't remove
   1393   // the ptest or update the earlier instruction to modify them.
   1394   if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
   1395     return false;
   1396 
   1397   // If we pass all the checks, it's safe to remove the PTEST and use the flags
   1398   // as they are prior to PTEST. Sometimes this requires the tested PTEST
   1399   // operand to be replaced with an equivalent instruction that also sets the
   1400   // flags.
   1401   Pred->setDesc(get(NewOp));
   1402   PTest->eraseFromParent();
   1403   if (OpChanged) {
   1404     bool succeeded = UpdateOperandRegClass(*Pred);
   1405     (void)succeeded;
   1406     assert(succeeded && "Operands have incompatible register classes!");
   1407     Pred->addRegisterDefined(AArch64::NZCV, TRI);
   1408   }
   1409 
   1410   // Ensure that the flags def is live.
   1411   if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
   1412     unsigned i = 0, e = Pred->getNumOperands();
   1413     for (; i != e; ++i) {
   1414       MachineOperand &MO = Pred->getOperand(i);
   1415       if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
   1416         MO.setIsDead(false);
   1417         break;
   1418       }
   1419     }
   1420   }
   1421   return true;
   1422 }
   1423 
   1424 /// Try to optimize a compare instruction. A compare instruction is an
   1425 /// instruction which produces AArch64::NZCV. It can be truly compare
   1426 /// instruction
   1427 /// when there are no uses of its destination register.
   1428 ///
   1429 /// The following steps are tried in order:
   1430 /// 1. Convert CmpInstr into an unconditional version.
   1431 /// 2. Remove CmpInstr if above there is an instruction producing a needed
   1432 ///    condition code or an instruction which can be converted into such an
   1433 ///    instruction.
   1434 ///    Only comparison with zero is supported.
   1435 bool AArch64InstrInfo::optimizeCompareInstr(
   1436     MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int CmpMask,
   1437     int CmpValue, const MachineRegisterInfo *MRI) const {
   1438   assert(CmpInstr.getParent());
   1439   assert(MRI);
   1440 
   1441   // Replace SUBSWrr with SUBWrr if NZCV is not used.
   1442   int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
   1443   if (DeadNZCVIdx != -1) {
   1444     if (CmpInstr.definesRegister(AArch64::WZR) ||
   1445         CmpInstr.definesRegister(AArch64::XZR)) {
   1446       CmpInstr.eraseFromParent();
   1447       return true;
   1448     }
   1449     unsigned Opc = CmpInstr.getOpcode();
   1450     unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
   1451     if (NewOpc == Opc)
   1452       return false;
   1453     const MCInstrDesc &MCID = get(NewOpc);
   1454     CmpInstr.setDesc(MCID);
   1455     CmpInstr.RemoveOperand(DeadNZCVIdx);
   1456     bool succeeded = UpdateOperandRegClass(CmpInstr);
   1457     (void)succeeded;
   1458     assert(succeeded && "Some operands reg class are incompatible!");
   1459     return true;
   1460   }
   1461 
   1462   if (CmpInstr.getOpcode() == AArch64::PTEST_PP)
   1463     return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
   1464 
   1465   // Continue only if we have a "ri" where immediate is zero.
   1466   // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
   1467   // function.
   1468   assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
   1469   if (SrcReg2 != 0)
   1470     return false;
   1471 
   1472   // CmpInstr is a Compare instruction if destination register is not used.
   1473   if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
   1474     return false;
   1475 
   1476   if (!CmpValue && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
   1477     return true;
   1478   return removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
   1479 }
   1480 
   1481 /// Get opcode of S version of Instr.
   1482 /// If Instr is S version its opcode is returned.
   1483 /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
   1484 /// or we are not interested in it.
   1485 static unsigned sForm(MachineInstr &Instr) {
   1486   switch (Instr.getOpcode()) {
   1487   default:
   1488     return AArch64::INSTRUCTION_LIST_END;
   1489 
   1490   case AArch64::ADDSWrr:
   1491   case AArch64::ADDSWri:
   1492   case AArch64::ADDSXrr:
   1493   case AArch64::ADDSXri:
   1494   case AArch64::SUBSWrr:
   1495   case AArch64::SUBSWri:
   1496   case AArch64::SUBSXrr:
   1497   case AArch64::SUBSXri:
   1498     return Instr.getOpcode();
   1499 
   1500   case AArch64::ADDWrr:
   1501     return AArch64::ADDSWrr;
   1502   case AArch64::ADDWri:
   1503     return AArch64::ADDSWri;
   1504   case AArch64::ADDXrr:
   1505     return AArch64::ADDSXrr;
   1506   case AArch64::ADDXri:
   1507     return AArch64::ADDSXri;
   1508   case AArch64::ADCWr:
   1509     return AArch64::ADCSWr;
   1510   case AArch64::ADCXr:
   1511     return AArch64::ADCSXr;
   1512   case AArch64::SUBWrr:
   1513     return AArch64::SUBSWrr;
   1514   case AArch64::SUBWri:
   1515     return AArch64::SUBSWri;
   1516   case AArch64::SUBXrr:
   1517     return AArch64::SUBSXrr;
   1518   case AArch64::SUBXri:
   1519     return AArch64::SUBSXri;
   1520   case AArch64::SBCWr:
   1521     return AArch64::SBCSWr;
   1522   case AArch64::SBCXr:
   1523     return AArch64::SBCSXr;
   1524   case AArch64::ANDWri:
   1525     return AArch64::ANDSWri;
   1526   case AArch64::ANDXri:
   1527     return AArch64::ANDSXri;
   1528   }
   1529 }
   1530 
   1531 /// Check if AArch64::NZCV should be alive in successors of MBB.
   1532 static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB) {
   1533   for (auto *BB : MBB->successors())
   1534     if (BB->isLiveIn(AArch64::NZCV))
   1535       return true;
   1536   return false;
   1537 }
   1538 
   1539 /// \returns The condition code operand index for \p Instr if it is a branch
   1540 /// or select and -1 otherwise.
   1541 static int
   1542 findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr) {
   1543   switch (Instr.getOpcode()) {
   1544   default:
   1545     return -1;
   1546 
   1547   case AArch64::Bcc: {
   1548     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
   1549     assert(Idx >= 2);
   1550     return Idx - 2;
   1551   }
   1552 
   1553   case AArch64::CSINVWr:
   1554   case AArch64::CSINVXr:
   1555   case AArch64::CSINCWr:
   1556   case AArch64::CSINCXr:
   1557   case AArch64::CSELWr:
   1558   case AArch64::CSELXr:
   1559   case AArch64::CSNEGWr:
   1560   case AArch64::CSNEGXr:
   1561   case AArch64::FCSELSrrr:
   1562   case AArch64::FCSELDrrr: {
   1563     int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
   1564     assert(Idx >= 1);
   1565     return Idx - 1;
   1566   }
   1567   }
   1568 }
   1569 
   1570 namespace {
   1571 
   1572 struct UsedNZCV {
   1573   bool N = false;
   1574   bool Z = false;
   1575   bool C = false;
   1576   bool V = false;
   1577 
   1578   UsedNZCV() = default;
   1579 
   1580   UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
   1581     this->N |= UsedFlags.N;
   1582     this->Z |= UsedFlags.Z;
   1583     this->C |= UsedFlags.C;
   1584     this->V |= UsedFlags.V;
   1585     return *this;
   1586   }
   1587 };
   1588 
   1589 } // end anonymous namespace
   1590 
   1591 /// Find a condition code used by the instruction.
   1592 /// Returns AArch64CC::Invalid if either the instruction does not use condition
   1593 /// codes or we don't optimize CmpInstr in the presence of such instructions.
   1594 static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
   1595   int CCIdx = findCondCodeUseOperandIdxForBranchOrSelect(Instr);
   1596   return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
   1597                           Instr.getOperand(CCIdx).getImm())
   1598                     : AArch64CC::Invalid;
   1599 }
   1600 
   1601 static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
   1602   assert(CC != AArch64CC::Invalid);
   1603   UsedNZCV UsedFlags;
   1604   switch (CC) {
   1605   default:
   1606     break;
   1607 
   1608   case AArch64CC::EQ: // Z set
   1609   case AArch64CC::NE: // Z clear
   1610     UsedFlags.Z = true;
   1611     break;
   1612 
   1613   case AArch64CC::HI: // Z clear and C set
   1614   case AArch64CC::LS: // Z set   or  C clear
   1615     UsedFlags.Z = true;
   1616     LLVM_FALLTHROUGH;
   1617   case AArch64CC::HS: // C set
   1618   case AArch64CC::LO: // C clear
   1619     UsedFlags.C = true;
   1620     break;
   1621 
   1622   case AArch64CC::MI: // N set
   1623   case AArch64CC::PL: // N clear
   1624     UsedFlags.N = true;
   1625     break;
   1626 
   1627   case AArch64CC::VS: // V set
   1628   case AArch64CC::VC: // V clear
   1629     UsedFlags.V = true;
   1630     break;
   1631 
   1632   case AArch64CC::GT: // Z clear, N and V the same
   1633   case AArch64CC::LE: // Z set,   N and V differ
   1634     UsedFlags.Z = true;
   1635     LLVM_FALLTHROUGH;
   1636   case AArch64CC::GE: // N and V the same
   1637   case AArch64CC::LT: // N and V differ
   1638     UsedFlags.N = true;
   1639     UsedFlags.V = true;
   1640     break;
   1641   }
   1642   return UsedFlags;
   1643 }
   1644 
   1645 /// \returns Conditions flags used after \p CmpInstr in its MachineBB if they
   1646 /// are not containing C or V flags and NZCV flags are not alive in successors
   1647 /// of the same \p CmpInstr and \p MI parent. \returns None otherwise.
   1648 ///
   1649 /// Collect instructions using that flags in \p CCUseInstrs if provided.
   1650 static Optional<UsedNZCV>
   1651 examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr,
   1652                  const TargetRegisterInfo &TRI,
   1653                  SmallVectorImpl<MachineInstr *> *CCUseInstrs = nullptr) {
   1654   MachineBasicBlock *CmpParent = CmpInstr.getParent();
   1655   if (MI.getParent() != CmpParent)
   1656     return None;
   1657 
   1658   if (areCFlagsAliveInSuccessors(CmpParent))
   1659     return None;
   1660 
   1661   UsedNZCV NZCVUsedAfterCmp;
   1662   for (MachineInstr &Instr : instructionsWithoutDebug(
   1663            std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
   1664     if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
   1665       AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
   1666       if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
   1667         return None;
   1668       NZCVUsedAfterCmp |= getUsedNZCV(CC);
   1669       if (CCUseInstrs)
   1670         CCUseInstrs->push_back(&Instr);
   1671     }
   1672     if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
   1673       break;
   1674   }
   1675   if (NZCVUsedAfterCmp.C || NZCVUsedAfterCmp.V)
   1676     return None;
   1677   return NZCVUsedAfterCmp;
   1678 }
   1679 
   1680 static bool isADDSRegImm(unsigned Opcode) {
   1681   return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
   1682 }
   1683 
   1684 static bool isSUBSRegImm(unsigned Opcode) {
   1685   return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
   1686 }
   1687 
   1688 /// Check if CmpInstr can be substituted by MI.
   1689 ///
   1690 /// CmpInstr can be substituted:
   1691 /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
   1692 /// - and, MI and CmpInstr are from the same MachineBB
   1693 /// - and, condition flags are not alive in successors of the CmpInstr parent
   1694 /// - and, if MI opcode is the S form there must be no defs of flags between
   1695 ///        MI and CmpInstr
   1696 ///        or if MI opcode is not the S form there must be neither defs of flags
   1697 ///        nor uses of flags between MI and CmpInstr.
   1698 /// - and  C/V flags are not used after CmpInstr
   1699 static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr,
   1700                                        const TargetRegisterInfo &TRI) {
   1701   assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
   1702 
   1703   const unsigned CmpOpcode = CmpInstr.getOpcode();
   1704   if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
   1705     return false;
   1706 
   1707   if (!examineCFlagsUse(MI, CmpInstr, TRI))
   1708     return false;
   1709 
   1710   AccessKind AccessToCheck = AK_Write;
   1711   if (sForm(MI) != MI.getOpcode())
   1712     AccessToCheck = AK_All;
   1713   return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
   1714 }
   1715 
   1716 /// Substitute an instruction comparing to zero with another instruction
   1717 /// which produces needed condition flags.
   1718 ///
   1719 /// Return true on success.
   1720 bool AArch64InstrInfo::substituteCmpToZero(
   1721     MachineInstr &CmpInstr, unsigned SrcReg,
   1722     const MachineRegisterInfo &MRI) const {
   1723   // Get the unique definition of SrcReg.
   1724   MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
   1725   if (!MI)
   1726     return false;
   1727 
   1728   const TargetRegisterInfo &TRI = getRegisterInfo();
   1729 
   1730   unsigned NewOpc = sForm(*MI);
   1731   if (NewOpc == AArch64::INSTRUCTION_LIST_END)
   1732     return false;
   1733 
   1734   if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
   1735     return false;
   1736 
   1737   // Update the instruction to set NZCV.
   1738   MI->setDesc(get(NewOpc));
   1739   CmpInstr.eraseFromParent();
   1740   bool succeeded = UpdateOperandRegClass(*MI);
   1741   (void)succeeded;
   1742   assert(succeeded && "Some operands reg class are incompatible!");
   1743   MI->addRegisterDefined(AArch64::NZCV, &TRI);
   1744   return true;
   1745 }
   1746 
   1747 /// \returns True if \p CmpInstr can be removed.
   1748 ///
   1749 /// \p IsInvertCC is true if, after removing \p CmpInstr, condition
   1750 /// codes used in \p CCUseInstrs must be inverted.
   1751 static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr,
   1752                                  int CmpValue, const TargetRegisterInfo &TRI,
   1753                                  SmallVectorImpl<MachineInstr *> &CCUseInstrs,
   1754                                  bool &IsInvertCC) {
   1755   assert((CmpValue == 0 || CmpValue == 1) &&
   1756          "Only comparisons to 0 or 1 considered for removal!");
   1757 
   1758   // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
   1759   unsigned MIOpc = MI.getOpcode();
   1760   if (MIOpc == AArch64::CSINCWr) {
   1761     if (MI.getOperand(1).getReg() != AArch64::WZR ||
   1762         MI.getOperand(2).getReg() != AArch64::WZR)
   1763       return false;
   1764   } else if (MIOpc == AArch64::CSINCXr) {
   1765     if (MI.getOperand(1).getReg() != AArch64::XZR ||
   1766         MI.getOperand(2).getReg() != AArch64::XZR)
   1767       return false;
   1768   } else {
   1769     return false;
   1770   }
   1771   AArch64CC::CondCode MICC = findCondCodeUsedByInstr(MI);
   1772   if (MICC == AArch64CC::Invalid)
   1773     return false;
   1774 
   1775   // NZCV needs to be defined
   1776   if (MI.findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
   1777     return false;
   1778 
   1779   // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
   1780   const unsigned CmpOpcode = CmpInstr.getOpcode();
   1781   bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
   1782   if (CmpValue && !IsSubsRegImm)
   1783     return false;
   1784   if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
   1785     return false;
   1786 
   1787   // MI conditions allowed: eq, ne, mi, pl
   1788   UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
   1789   if (MIUsedNZCV.C || MIUsedNZCV.V)
   1790     return false;
   1791 
   1792   Optional<UsedNZCV> NZCVUsedAfterCmp =
   1793       examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
   1794   // Condition flags are not used in CmpInstr basic block successors and only
   1795   // Z or N flags allowed to be used after CmpInstr within its basic block
   1796   if (!NZCVUsedAfterCmp)
   1797     return false;
   1798   // Z or N flag used after CmpInstr must correspond to the flag used in MI
   1799   if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
   1800       (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
   1801     return false;
   1802   // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
   1803   if (MIUsedNZCV.N && !CmpValue)
   1804     return false;
   1805 
   1806   // There must be no defs of flags between MI and CmpInstr
   1807   if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
   1808     return false;
   1809 
   1810   // Condition code is inverted in the following cases:
   1811   // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
   1812   // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
   1813   IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
   1814                (!CmpValue && MICC == AArch64CC::NE);
   1815   return true;
   1816 }
   1817 
   1818 /// Remove comparision in csinc-cmp sequence
   1819 ///
   1820 /// Examples:
   1821 /// 1. \code
   1822 ///   csinc w9, wzr, wzr, ne
   1823 ///   cmp   w9, #0
   1824 ///   b.eq
   1825 ///    \endcode
   1826 /// to
   1827 ///    \code
   1828 ///   csinc w9, wzr, wzr, ne
   1829 ///   b.ne
   1830 ///    \endcode
   1831 ///
   1832 /// 2. \code
   1833 ///   csinc x2, xzr, xzr, mi
   1834 ///   cmp   x2, #1
   1835 ///   b.pl
   1836 ///    \endcode
   1837 /// to
   1838 ///    \code
   1839 ///   csinc x2, xzr, xzr, mi
   1840 ///   b.pl
   1841 ///    \endcode
   1842 ///
   1843 /// \param  CmpInstr comparison instruction
   1844 /// \return True when comparison removed
   1845 bool AArch64InstrInfo::removeCmpToZeroOrOne(
   1846     MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
   1847     const MachineRegisterInfo &MRI) const {
   1848   MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
   1849   if (!MI)
   1850     return false;
   1851   const TargetRegisterInfo &TRI = getRegisterInfo();
   1852   SmallVector<MachineInstr *, 4> CCUseInstrs;
   1853   bool IsInvertCC = false;
   1854   if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
   1855                             IsInvertCC))
   1856     return false;
   1857   // Make transformation
   1858   CmpInstr.eraseFromParent();
   1859   if (IsInvertCC) {
   1860     // Invert condition codes in CmpInstr CC users
   1861     for (MachineInstr *CCUseInstr : CCUseInstrs) {
   1862       int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
   1863       assert(Idx >= 0 && "Unexpected instruction using CC.");
   1864       MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
   1865       AArch64CC::CondCode CCUse = AArch64CC::getInvertedCondCode(
   1866           static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
   1867       CCOperand.setImm(CCUse);
   1868     }
   1869   }
   1870   return true;
   1871 }
   1872 
   1873 bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
   1874   if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
   1875       MI.getOpcode() != AArch64::CATCHRET)
   1876     return false;
   1877 
   1878   MachineBasicBlock &MBB = *MI.getParent();
   1879   auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
   1880   auto TRI = Subtarget.getRegisterInfo();
   1881   DebugLoc DL = MI.getDebugLoc();
   1882 
   1883   if (MI.getOpcode() == AArch64::CATCHRET) {
   1884     // Skip to the first instruction before the epilog.
   1885     const TargetInstrInfo *TII =
   1886       MBB.getParent()->getSubtarget().getInstrInfo();
   1887     MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
   1888     auto MBBI = MachineBasicBlock::iterator(MI);
   1889     MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
   1890     while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
   1891            FirstEpilogSEH != MBB.begin())
   1892       FirstEpilogSEH = std::prev(FirstEpilogSEH);
   1893     if (FirstEpilogSEH != MBB.begin())
   1894       FirstEpilogSEH = std::next(FirstEpilogSEH);
   1895     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
   1896         .addReg(AArch64::X0, RegState::Define)
   1897         .addMBB(TargetMBB);
   1898     BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
   1899         .addReg(AArch64::X0, RegState::Define)
   1900         .addReg(AArch64::X0)
   1901         .addMBB(TargetMBB)
   1902         .addImm(0);
   1903     return true;
   1904   }
   1905 
   1906   Register Reg = MI.getOperand(0).getReg();
   1907   Module &M = *MBB.getParent()->getFunction().getParent();
   1908   if (M.getStackProtectorGuard() == "sysreg") {
   1909     const AArch64SysReg::SysReg *SrcReg =
   1910         AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
   1911     if (!SrcReg)
   1912       report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
   1913 
   1914     // mrs xN, sysreg
   1915     BuildMI(MBB, MI, DL, get(AArch64::MRS))
   1916         .addDef(Reg, RegState::Renamable)
   1917         .addImm(SrcReg->Encoding);
   1918     int Offset = M.getStackProtectorGuardOffset();
   1919     if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
   1920       // ldr xN, [xN, #offset]
   1921       BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
   1922           .addDef(Reg)
   1923           .addUse(Reg, RegState::Kill)
   1924           .addImm(Offset / 8);
   1925     } else if (Offset >= -256 && Offset <= 255) {
   1926       // ldur xN, [xN, #offset]
   1927       BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
   1928           .addDef(Reg)
   1929           .addUse(Reg, RegState::Kill)
   1930           .addImm(Offset);
   1931     } else if (Offset >= -4095 && Offset <= 4095) {
   1932       if (Offset > 0) {
   1933         // add xN, xN, #offset
   1934         BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
   1935             .addDef(Reg)
   1936             .addUse(Reg, RegState::Kill)
   1937             .addImm(Offset)
   1938             .addImm(0);
   1939       } else {
   1940         // sub xN, xN, #offset
   1941         BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
   1942             .addDef(Reg)
   1943             .addUse(Reg, RegState::Kill)
   1944             .addImm(-Offset)
   1945             .addImm(0);
   1946       }
   1947       // ldr xN, [xN]
   1948       BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
   1949           .addDef(Reg)
   1950           .addUse(Reg, RegState::Kill)
   1951           .addImm(0);
   1952     } else {
   1953       // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
   1954       // than 23760.
   1955       // It might be nice to use AArch64::MOVi32imm here, which would get
   1956       // expanded in PreSched2 after PostRA, but our lone scratch Reg already
   1957       // contains the MRS result. findScratchNonCalleeSaveRegister() in
   1958       // AArch64FrameLowering might help us find such a scratch register
   1959       // though. If we failed to find a scratch register, we could emit a
   1960       // stream of add instructions to build up the immediate. Or, we could try
   1961       // to insert a AArch64::MOVi32imm before register allocation so that we
   1962       // didn't need to scavenge for a scratch register.
   1963       report_fatal_error("Unable to encode Stack Protector Guard Offset");
   1964     }
   1965     MBB.erase(MI);
   1966     return true;
   1967   }
   1968 
   1969   const GlobalValue *GV =
   1970       cast<GlobalValue>((*MI.memoperands_begin())->getValue());
   1971   const TargetMachine &TM = MBB.getParent()->getTarget();
   1972   unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
   1973   const unsigned char MO_NC = AArch64II::MO_NC;
   1974 
   1975   if ((OpFlags & AArch64II::MO_GOT) != 0) {
   1976     BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
   1977         .addGlobalAddress(GV, 0, OpFlags);
   1978     if (Subtarget.isTargetILP32()) {
   1979       unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
   1980       BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
   1981           .addDef(Reg32, RegState::Dead)
   1982           .addUse(Reg, RegState::Kill)
   1983           .addImm(0)
   1984           .addMemOperand(*MI.memoperands_begin())
   1985           .addDef(Reg, RegState::Implicit);
   1986     } else {
   1987       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   1988           .addReg(Reg, RegState::Kill)
   1989           .addImm(0)
   1990           .addMemOperand(*MI.memoperands_begin());
   1991     }
   1992   } else if (TM.getCodeModel() == CodeModel::Large) {
   1993     assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
   1994     BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
   1995         .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
   1996         .addImm(0);
   1997     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   1998         .addReg(Reg, RegState::Kill)
   1999         .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
   2000         .addImm(16);
   2001     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   2002         .addReg(Reg, RegState::Kill)
   2003         .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
   2004         .addImm(32);
   2005     BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
   2006         .addReg(Reg, RegState::Kill)
   2007         .addGlobalAddress(GV, 0, AArch64II::MO_G3)
   2008         .addImm(48);
   2009     BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   2010         .addReg(Reg, RegState::Kill)
   2011         .addImm(0)
   2012         .addMemOperand(*MI.memoperands_begin());
   2013   } else if (TM.getCodeModel() == CodeModel::Tiny) {
   2014     BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
   2015         .addGlobalAddress(GV, 0, OpFlags);
   2016   } else {
   2017     BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
   2018         .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
   2019     unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
   2020     if (Subtarget.isTargetILP32()) {
   2021       unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
   2022       BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
   2023           .addDef(Reg32, RegState::Dead)
   2024           .addUse(Reg, RegState::Kill)
   2025           .addGlobalAddress(GV, 0, LoFlags)
   2026           .addMemOperand(*MI.memoperands_begin())
   2027           .addDef(Reg, RegState::Implicit);
   2028     } else {
   2029       BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
   2030           .addReg(Reg, RegState::Kill)
   2031           .addGlobalAddress(GV, 0, LoFlags)
   2032           .addMemOperand(*MI.memoperands_begin());
   2033     }
   2034   }
   2035 
   2036   MBB.erase(MI);
   2037 
   2038   return true;
   2039 }
   2040 
   2041 // Return true if this instruction simply sets its single destination register
   2042 // to zero. This is equivalent to a register rename of the zero-register.
   2043 bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
   2044   switch (MI.getOpcode()) {
   2045   default:
   2046     break;
   2047   case AArch64::MOVZWi:
   2048   case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
   2049     if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
   2050       assert(MI.getDesc().getNumOperands() == 3 &&
   2051              MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
   2052       return true;
   2053     }
   2054     break;
   2055   case AArch64::ANDWri: // and Rd, Rzr, #imm
   2056     return MI.getOperand(1).getReg() == AArch64::WZR;
   2057   case AArch64::ANDXri:
   2058     return MI.getOperand(1).getReg() == AArch64::XZR;
   2059   case TargetOpcode::COPY:
   2060     return MI.getOperand(1).getReg() == AArch64::WZR;
   2061   }
   2062   return false;
   2063 }
   2064 
   2065 // Return true if this instruction simply renames a general register without
   2066 // modifying bits.
   2067 bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
   2068   switch (MI.getOpcode()) {
   2069   default:
   2070     break;
   2071   case TargetOpcode::COPY: {
   2072     // GPR32 copies will by lowered to ORRXrs
   2073     Register DstReg = MI.getOperand(0).getReg();
   2074     return (AArch64::GPR32RegClass.contains(DstReg) ||
   2075             AArch64::GPR64RegClass.contains(DstReg));
   2076   }
   2077   case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
   2078     if (MI.getOperand(1).getReg() == AArch64::XZR) {
   2079       assert(MI.getDesc().getNumOperands() == 4 &&
   2080              MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
   2081       return true;
   2082     }
   2083     break;
   2084   case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
   2085     if (MI.getOperand(2).getImm() == 0) {
   2086       assert(MI.getDesc().getNumOperands() == 4 &&
   2087              MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
   2088       return true;
   2089     }
   2090     break;
   2091   }
   2092   return false;
   2093 }
   2094 
   2095 // Return true if this instruction simply renames a general register without
   2096 // modifying bits.
   2097 bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
   2098   switch (MI.getOpcode()) {
   2099   default:
   2100     break;
   2101   case TargetOpcode::COPY: {
   2102     // FPR64 copies will by lowered to ORR.16b
   2103     Register DstReg = MI.getOperand(0).getReg();
   2104     return (AArch64::FPR64RegClass.contains(DstReg) ||
   2105             AArch64::FPR128RegClass.contains(DstReg));
   2106   }
   2107   case AArch64::ORRv16i8:
   2108     if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
   2109       assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
   2110              "invalid ORRv16i8 operands");
   2111       return true;
   2112     }
   2113     break;
   2114   }
   2115   return false;
   2116 }
   2117 
   2118 unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
   2119                                                int &FrameIndex) const {
   2120   switch (MI.getOpcode()) {
   2121   default:
   2122     break;
   2123   case AArch64::LDRWui:
   2124   case AArch64::LDRXui:
   2125   case AArch64::LDRBui:
   2126   case AArch64::LDRHui:
   2127   case AArch64::LDRSui:
   2128   case AArch64::LDRDui:
   2129   case AArch64::LDRQui:
   2130     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
   2131         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
   2132       FrameIndex = MI.getOperand(1).getIndex();
   2133       return MI.getOperand(0).getReg();
   2134     }
   2135     break;
   2136   }
   2137 
   2138   return 0;
   2139 }
   2140 
   2141 unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
   2142                                               int &FrameIndex) const {
   2143   switch (MI.getOpcode()) {
   2144   default:
   2145     break;
   2146   case AArch64::STRWui:
   2147   case AArch64::STRXui:
   2148   case AArch64::STRBui:
   2149   case AArch64::STRHui:
   2150   case AArch64::STRSui:
   2151   case AArch64::STRDui:
   2152   case AArch64::STRQui:
   2153   case AArch64::LDR_PXI:
   2154   case AArch64::STR_PXI:
   2155     if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
   2156         MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
   2157       FrameIndex = MI.getOperand(1).getIndex();
   2158       return MI.getOperand(0).getReg();
   2159     }
   2160     break;
   2161   }
   2162   return 0;
   2163 }
   2164 
   2165 /// Check all MachineMemOperands for a hint to suppress pairing.
   2166 bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
   2167   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
   2168     return MMO->getFlags() & MOSuppressPair;
   2169   });
   2170 }
   2171 
   2172 /// Set a flag on the first MachineMemOperand to suppress pairing.
   2173 void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
   2174   if (MI.memoperands_empty())
   2175     return;
   2176   (*MI.memoperands_begin())->setFlags(MOSuppressPair);
   2177 }
   2178 
   2179 /// Check all MachineMemOperands for a hint that the load/store is strided.
   2180 bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
   2181   return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
   2182     return MMO->getFlags() & MOStridedAccess;
   2183   });
   2184 }
   2185 
   2186 bool AArch64InstrInfo::hasUnscaledLdStOffset(unsigned Opc) {
   2187   switch (Opc) {
   2188   default:
   2189     return false;
   2190   case AArch64::STURSi:
   2191   case AArch64::STRSpre:
   2192   case AArch64::STURDi:
   2193   case AArch64::STRDpre:
   2194   case AArch64::STURQi:
   2195   case AArch64::STRQpre:
   2196   case AArch64::STURBBi:
   2197   case AArch64::STURHHi:
   2198   case AArch64::STURWi:
   2199   case AArch64::STRWpre:
   2200   case AArch64::STURXi:
   2201   case AArch64::STRXpre:
   2202   case AArch64::LDURSi:
   2203   case AArch64::LDRSpre:
   2204   case AArch64::LDURDi:
   2205   case AArch64::LDRDpre:
   2206   case AArch64::LDURQi:
   2207   case AArch64::LDRQpre:
   2208   case AArch64::LDURWi:
   2209   case AArch64::LDRWpre:
   2210   case AArch64::LDURXi:
   2211   case AArch64::LDRXpre:
   2212   case AArch64::LDURSWi:
   2213   case AArch64::LDURHHi:
   2214   case AArch64::LDURBBi:
   2215   case AArch64::LDURSBWi:
   2216   case AArch64::LDURSHWi:
   2217     return true;
   2218   }
   2219 }
   2220 
   2221 Optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
   2222   switch (Opc) {
   2223   default: return {};
   2224   case AArch64::PRFMui: return AArch64::PRFUMi;
   2225   case AArch64::LDRXui: return AArch64::LDURXi;
   2226   case AArch64::LDRWui: return AArch64::LDURWi;
   2227   case AArch64::LDRBui: return AArch64::LDURBi;
   2228   case AArch64::LDRHui: return AArch64::LDURHi;
   2229   case AArch64::LDRSui: return AArch64::LDURSi;
   2230   case AArch64::LDRDui: return AArch64::LDURDi;
   2231   case AArch64::LDRQui: return AArch64::LDURQi;
   2232   case AArch64::LDRBBui: return AArch64::LDURBBi;
   2233   case AArch64::LDRHHui: return AArch64::LDURHHi;
   2234   case AArch64::LDRSBXui: return AArch64::LDURSBXi;
   2235   case AArch64::LDRSBWui: return AArch64::LDURSBWi;
   2236   case AArch64::LDRSHXui: return AArch64::LDURSHXi;
   2237   case AArch64::LDRSHWui: return AArch64::LDURSHWi;
   2238   case AArch64::LDRSWui: return AArch64::LDURSWi;
   2239   case AArch64::STRXui: return AArch64::STURXi;
   2240   case AArch64::STRWui: return AArch64::STURWi;
   2241   case AArch64::STRBui: return AArch64::STURBi;
   2242   case AArch64::STRHui: return AArch64::STURHi;
   2243   case AArch64::STRSui: return AArch64::STURSi;
   2244   case AArch64::STRDui: return AArch64::STURDi;
   2245   case AArch64::STRQui: return AArch64::STURQi;
   2246   case AArch64::STRBBui: return AArch64::STURBBi;
   2247   case AArch64::STRHHui: return AArch64::STURHHi;
   2248   }
   2249 }
   2250 
   2251 unsigned AArch64InstrInfo::getLoadStoreImmIdx(unsigned Opc) {
   2252   switch (Opc) {
   2253   default:
   2254     return 2;
   2255   case AArch64::LDPXi:
   2256   case AArch64::LDPDi:
   2257   case AArch64::STPXi:
   2258   case AArch64::STPDi:
   2259   case AArch64::LDNPXi:
   2260   case AArch64::LDNPDi:
   2261   case AArch64::STNPXi:
   2262   case AArch64::STNPDi:
   2263   case AArch64::LDPQi:
   2264   case AArch64::STPQi:
   2265   case AArch64::LDNPQi:
   2266   case AArch64::STNPQi:
   2267   case AArch64::LDPWi:
   2268   case AArch64::LDPSi:
   2269   case AArch64::STPWi:
   2270   case AArch64::STPSi:
   2271   case AArch64::LDNPWi:
   2272   case AArch64::LDNPSi:
   2273   case AArch64::STNPWi:
   2274   case AArch64::STNPSi:
   2275   case AArch64::LDG:
   2276   case AArch64::STGPi:
   2277   case AArch64::LD1B_IMM:
   2278   case AArch64::LD1H_IMM:
   2279   case AArch64::LD1W_IMM:
   2280   case AArch64::LD1D_IMM:
   2281   case AArch64::ST1B_IMM:
   2282   case AArch64::ST1H_IMM:
   2283   case AArch64::ST1W_IMM:
   2284   case AArch64::ST1D_IMM:
   2285   case AArch64::LD1B_H_IMM:
   2286   case AArch64::LD1SB_H_IMM:
   2287   case AArch64::LD1H_S_IMM:
   2288   case AArch64::LD1SH_S_IMM:
   2289   case AArch64::LD1W_D_IMM:
   2290   case AArch64::LD1SW_D_IMM:
   2291   case AArch64::ST1B_H_IMM:
   2292   case AArch64::ST1H_S_IMM:
   2293   case AArch64::ST1W_D_IMM:
   2294   case AArch64::LD1B_S_IMM:
   2295   case AArch64::LD1SB_S_IMM:
   2296   case AArch64::LD1H_D_IMM:
   2297   case AArch64::LD1SH_D_IMM:
   2298   case AArch64::ST1B_S_IMM:
   2299   case AArch64::ST1H_D_IMM:
   2300   case AArch64::LD1B_D_IMM:
   2301   case AArch64::LD1SB_D_IMM:
   2302   case AArch64::ST1B_D_IMM:
   2303     return 3;
   2304   case AArch64::ADDG:
   2305   case AArch64::STGOffset:
   2306   case AArch64::LDR_PXI:
   2307   case AArch64::STR_PXI:
   2308     return 2;
   2309   }
   2310 }
   2311 
   2312 bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
   2313   switch (MI.getOpcode()) {
   2314   default:
   2315     return false;
   2316   // Scaled instructions.
   2317   case AArch64::STRSui:
   2318   case AArch64::STRDui:
   2319   case AArch64::STRQui:
   2320   case AArch64::STRXui:
   2321   case AArch64::STRWui:
   2322   case AArch64::LDRSui:
   2323   case AArch64::LDRDui:
   2324   case AArch64::LDRQui:
   2325   case AArch64::LDRXui:
   2326   case AArch64::LDRWui:
   2327   case AArch64::LDRSWui:
   2328   // Unscaled instructions.
   2329   case AArch64::STURSi:
   2330   case AArch64::STRSpre:
   2331   case AArch64::STURDi:
   2332   case AArch64::STRDpre:
   2333   case AArch64::STURQi:
   2334   case AArch64::STRQpre:
   2335   case AArch64::STURWi:
   2336   case AArch64::STRWpre:
   2337   case AArch64::STURXi:
   2338   case AArch64::STRXpre:
   2339   case AArch64::LDURSi:
   2340   case AArch64::LDRSpre:
   2341   case AArch64::LDURDi:
   2342   case AArch64::LDRDpre:
   2343   case AArch64::LDURQi:
   2344   case AArch64::LDRQpre:
   2345   case AArch64::LDURWi:
   2346   case AArch64::LDRWpre:
   2347   case AArch64::LDURXi:
   2348   case AArch64::LDRXpre:
   2349   case AArch64::LDURSWi:
   2350     return true;
   2351   }
   2352 }
   2353 
   2354 unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
   2355                                                    bool &Is64Bit) {
   2356   switch (Opc) {
   2357   default:
   2358     llvm_unreachable("Opcode has no flag setting equivalent!");
   2359   // 32-bit cases:
   2360   case AArch64::ADDWri:
   2361     Is64Bit = false;
   2362     return AArch64::ADDSWri;
   2363   case AArch64::ADDWrr:
   2364     Is64Bit = false;
   2365     return AArch64::ADDSWrr;
   2366   case AArch64::ADDWrs:
   2367     Is64Bit = false;
   2368     return AArch64::ADDSWrs;
   2369   case AArch64::ADDWrx:
   2370     Is64Bit = false;
   2371     return AArch64::ADDSWrx;
   2372   case AArch64::ANDWri:
   2373     Is64Bit = false;
   2374     return AArch64::ANDSWri;
   2375   case AArch64::ANDWrr:
   2376     Is64Bit = false;
   2377     return AArch64::ANDSWrr;
   2378   case AArch64::ANDWrs:
   2379     Is64Bit = false;
   2380     return AArch64::ANDSWrs;
   2381   case AArch64::BICWrr:
   2382     Is64Bit = false;
   2383     return AArch64::BICSWrr;
   2384   case AArch64::BICWrs:
   2385     Is64Bit = false;
   2386     return AArch64::BICSWrs;
   2387   case AArch64::SUBWri:
   2388     Is64Bit = false;
   2389     return AArch64::SUBSWri;
   2390   case AArch64::SUBWrr:
   2391     Is64Bit = false;
   2392     return AArch64::SUBSWrr;
   2393   case AArch64::SUBWrs:
   2394     Is64Bit = false;
   2395     return AArch64::SUBSWrs;
   2396   case AArch64::SUBWrx:
   2397     Is64Bit = false;
   2398     return AArch64::SUBSWrx;
   2399   // 64-bit cases:
   2400   case AArch64::ADDXri:
   2401     Is64Bit = true;
   2402     return AArch64::ADDSXri;
   2403   case AArch64::ADDXrr:
   2404     Is64Bit = true;
   2405     return AArch64::ADDSXrr;
   2406   case AArch64::ADDXrs:
   2407     Is64Bit = true;
   2408     return AArch64::ADDSXrs;
   2409   case AArch64::ADDXrx:
   2410     Is64Bit = true;
   2411     return AArch64::ADDSXrx;
   2412   case AArch64::ANDXri:
   2413     Is64Bit = true;
   2414     return AArch64::ANDSXri;
   2415   case AArch64::ANDXrr:
   2416     Is64Bit = true;
   2417     return AArch64::ANDSXrr;
   2418   case AArch64::ANDXrs:
   2419     Is64Bit = true;
   2420     return AArch64::ANDSXrs;
   2421   case AArch64::BICXrr:
   2422     Is64Bit = true;
   2423     return AArch64::BICSXrr;
   2424   case AArch64::BICXrs:
   2425     Is64Bit = true;
   2426     return AArch64::BICSXrs;
   2427   case AArch64::SUBXri:
   2428     Is64Bit = true;
   2429     return AArch64::SUBSXri;
   2430   case AArch64::SUBXrr:
   2431     Is64Bit = true;
   2432     return AArch64::SUBSXrr;
   2433   case AArch64::SUBXrs:
   2434     Is64Bit = true;
   2435     return AArch64::SUBSXrs;
   2436   case AArch64::SUBXrx:
   2437     Is64Bit = true;
   2438     return AArch64::SUBSXrx;
   2439   }
   2440 }
   2441 
   2442 // Is this a candidate for ld/st merging or pairing?  For example, we don't
   2443 // touch volatiles or load/stores that have a hint to avoid pair formation.
   2444 bool AArch64InstrInfo::isCandidateToMergeOrPair(const MachineInstr &MI) const {
   2445 
   2446   bool IsPreLdSt = isPreLdSt(MI);
   2447 
   2448   // If this is a volatile load/store, don't mess with it.
   2449   if (MI.hasOrderedMemoryRef())
   2450     return false;
   2451 
   2452   // Make sure this is a reg/fi+imm (as opposed to an address reloc).
   2453   // For Pre-inc LD/ST, the operand is shifted by one.
   2454   assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
   2455           MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
   2456          "Expected a reg or frame index operand.");
   2457 
   2458   // For Pre-indexed addressing quadword instructions, the third operand is the
   2459   // immediate value.
   2460   bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
   2461 
   2462   if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
   2463     return false;
   2464 
   2465   // Can't merge/pair if the instruction modifies the base register.
   2466   // e.g., ldr x0, [x0]
   2467   // This case will never occur with an FI base.
   2468   // However, if the instruction is an LDR/STR<S,D,Q,W,X>pre, it can be merged.
   2469   // For example:
   2470   //   ldr q0, [x11, #32]!
   2471   //   ldr q1, [x11, #16]
   2472   //   to
   2473   //   ldp q0, q1, [x11, #32]!
   2474   if (MI.getOperand(1).isReg() && !IsPreLdSt) {
   2475     Register BaseReg = MI.getOperand(1).getReg();
   2476     const TargetRegisterInfo *TRI = &getRegisterInfo();
   2477     if (MI.modifiesRegister(BaseReg, TRI))
   2478       return false;
   2479   }
   2480 
   2481   // Check if this load/store has a hint to avoid pair formation.
   2482   // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
   2483   if (isLdStPairSuppressed(MI))
   2484     return false;
   2485 
   2486   // Do not pair any callee-save store/reload instructions in the
   2487   // prologue/epilogue if the CFI information encoded the operations as separate
   2488   // instructions, as that will cause the size of the actual prologue to mismatch
   2489   // with the prologue size recorded in the Windows CFI.
   2490   const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
   2491   bool NeedsWinCFI = MAI->usesWindowsCFI() &&
   2492                      MI.getMF()->getFunction().needsUnwindTableEntry();
   2493   if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
   2494                       MI.getFlag(MachineInstr::FrameDestroy)))
   2495     return false;
   2496 
   2497   // On some CPUs quad load/store pairs are slower than two single load/stores.
   2498   if (Subtarget.isPaired128Slow()) {
   2499     switch (MI.getOpcode()) {
   2500     default:
   2501       break;
   2502     case AArch64::LDURQi:
   2503     case AArch64::STURQi:
   2504     case AArch64::LDRQui:
   2505     case AArch64::STRQui:
   2506       return false;
   2507     }
   2508   }
   2509 
   2510   return true;
   2511 }
   2512 
   2513 bool AArch64InstrInfo::getMemOperandsWithOffsetWidth(
   2514     const MachineInstr &LdSt, SmallVectorImpl<const MachineOperand *> &BaseOps,
   2515     int64_t &Offset, bool &OffsetIsScalable, unsigned &Width,
   2516     const TargetRegisterInfo *TRI) const {
   2517   if (!LdSt.mayLoadOrStore())
   2518     return false;
   2519 
   2520   const MachineOperand *BaseOp;
   2521   if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
   2522                                     Width, TRI))
   2523     return false;
   2524   BaseOps.push_back(BaseOp);
   2525   return true;
   2526 }
   2527 
   2528 Optional<ExtAddrMode>
   2529 AArch64InstrInfo::getAddrModeFromMemoryOp(const MachineInstr &MemI,
   2530                                           const TargetRegisterInfo *TRI) const {
   2531   const MachineOperand *Base; // Filled with the base operand of MI.
   2532   int64_t Offset;             // Filled with the offset of MI.
   2533   bool OffsetIsScalable;
   2534   if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
   2535     return None;
   2536 
   2537   if (!Base->isReg())
   2538     return None;
   2539   ExtAddrMode AM;
   2540   AM.BaseReg = Base->getReg();
   2541   AM.Displacement = Offset;
   2542   AM.ScaledReg = 0;
   2543   return AM;
   2544 }
   2545 
   2546 bool AArch64InstrInfo::getMemOperandWithOffsetWidth(
   2547     const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
   2548     bool &OffsetIsScalable, unsigned &Width,
   2549     const TargetRegisterInfo *TRI) const {
   2550   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
   2551   // Handle only loads/stores with base register followed by immediate offset.
   2552   if (LdSt.getNumExplicitOperands() == 3) {
   2553     // Non-paired instruction (e.g., ldr x1, [x0, #8]).
   2554     if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
   2555         !LdSt.getOperand(2).isImm())
   2556       return false;
   2557   } else if (LdSt.getNumExplicitOperands() == 4) {
   2558     // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
   2559     if (!LdSt.getOperand(1).isReg() ||
   2560         (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
   2561         !LdSt.getOperand(3).isImm())
   2562       return false;
   2563   } else
   2564     return false;
   2565 
   2566   // Get the scaling factor for the instruction and set the width for the
   2567   // instruction.
   2568   TypeSize Scale(0U, false);
   2569   int64_t Dummy1, Dummy2;
   2570 
   2571   // If this returns false, then it's an instruction we don't want to handle.
   2572   if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
   2573     return false;
   2574 
   2575   // Compute the offset. Offset is calculated as the immediate operand
   2576   // multiplied by the scaling factor. Unscaled instructions have scaling factor
   2577   // set to 1.
   2578   if (LdSt.getNumExplicitOperands() == 3) {
   2579     BaseOp = &LdSt.getOperand(1);
   2580     Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinSize();
   2581   } else {
   2582     assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
   2583     BaseOp = &LdSt.getOperand(2);
   2584     Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinSize();
   2585   }
   2586   OffsetIsScalable = Scale.isScalable();
   2587 
   2588   if (!BaseOp->isReg() && !BaseOp->isFI())
   2589     return false;
   2590 
   2591   return true;
   2592 }
   2593 
   2594 MachineOperand &
   2595 AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
   2596   assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
   2597   MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
   2598   assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
   2599   return OfsOp;
   2600 }
   2601 
   2602 bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
   2603                                     unsigned &Width, int64_t &MinOffset,
   2604                                     int64_t &MaxOffset) {
   2605   const unsigned SVEMaxBytesPerVector = AArch64::SVEMaxBitsPerVector / 8;
   2606   switch (Opcode) {
   2607   // Not a memory operation or something we want to handle.
   2608   default:
   2609     Scale = TypeSize::Fixed(0);
   2610     Width = 0;
   2611     MinOffset = MaxOffset = 0;
   2612     return false;
   2613   case AArch64::STRWpost:
   2614   case AArch64::LDRWpost:
   2615     Width = 32;
   2616     Scale = TypeSize::Fixed(4);
   2617     MinOffset = -256;
   2618     MaxOffset = 255;
   2619     break;
   2620   case AArch64::LDURQi:
   2621   case AArch64::STURQi:
   2622     Width = 16;
   2623     Scale = TypeSize::Fixed(1);
   2624     MinOffset = -256;
   2625     MaxOffset = 255;
   2626     break;
   2627   case AArch64::PRFUMi:
   2628   case AArch64::LDURXi:
   2629   case AArch64::LDURDi:
   2630   case AArch64::STURXi:
   2631   case AArch64::STURDi:
   2632     Width = 8;
   2633     Scale = TypeSize::Fixed(1);
   2634     MinOffset = -256;
   2635     MaxOffset = 255;
   2636     break;
   2637   case AArch64::LDURWi:
   2638   case AArch64::LDURSi:
   2639   case AArch64::LDURSWi:
   2640   case AArch64::STURWi:
   2641   case AArch64::STURSi:
   2642     Width = 4;
   2643     Scale = TypeSize::Fixed(1);
   2644     MinOffset = -256;
   2645     MaxOffset = 255;
   2646     break;
   2647   case AArch64::LDURHi:
   2648   case AArch64::LDURHHi:
   2649   case AArch64::LDURSHXi:
   2650   case AArch64::LDURSHWi:
   2651   case AArch64::STURHi:
   2652   case AArch64::STURHHi:
   2653     Width = 2;
   2654     Scale = TypeSize::Fixed(1);
   2655     MinOffset = -256;
   2656     MaxOffset = 255;
   2657     break;
   2658   case AArch64::LDURBi:
   2659   case AArch64::LDURBBi:
   2660   case AArch64::LDURSBXi:
   2661   case AArch64::LDURSBWi:
   2662   case AArch64::STURBi:
   2663   case AArch64::STURBBi:
   2664     Width = 1;
   2665     Scale = TypeSize::Fixed(1);
   2666     MinOffset = -256;
   2667     MaxOffset = 255;
   2668     break;
   2669   case AArch64::LDPQi:
   2670   case AArch64::LDNPQi:
   2671   case AArch64::STPQi:
   2672   case AArch64::STNPQi:
   2673     Scale = TypeSize::Fixed(16);
   2674     Width = 32;
   2675     MinOffset = -64;
   2676     MaxOffset = 63;
   2677     break;
   2678   case AArch64::LDRQui:
   2679   case AArch64::STRQui:
   2680     Scale = TypeSize::Fixed(16);
   2681     Width = 16;
   2682     MinOffset = 0;
   2683     MaxOffset = 4095;
   2684     break;
   2685   case AArch64::LDPXi:
   2686   case AArch64::LDPDi:
   2687   case AArch64::LDNPXi:
   2688   case AArch64::LDNPDi:
   2689   case AArch64::STPXi:
   2690   case AArch64::STPDi:
   2691   case AArch64::STNPXi:
   2692   case AArch64::STNPDi:
   2693     Scale = TypeSize::Fixed(8);
   2694     Width = 16;
   2695     MinOffset = -64;
   2696     MaxOffset = 63;
   2697     break;
   2698   case AArch64::PRFMui:
   2699   case AArch64::LDRXui:
   2700   case AArch64::LDRDui:
   2701   case AArch64::STRXui:
   2702   case AArch64::STRDui:
   2703     Scale = TypeSize::Fixed(8);
   2704     Width = 8;
   2705     MinOffset = 0;
   2706     MaxOffset = 4095;
   2707     break;
   2708   case AArch64::StoreSwiftAsyncContext:
   2709     // Store is an STRXui, but there might be an ADDXri in the expansion too.
   2710     Scale = TypeSize::Fixed(1);
   2711     Width = 8;
   2712     MinOffset = 0;
   2713     MaxOffset = 4095;
   2714     break;
   2715   case AArch64::LDPWi:
   2716   case AArch64::LDPSi:
   2717   case AArch64::LDNPWi:
   2718   case AArch64::LDNPSi:
   2719   case AArch64::STPWi:
   2720   case AArch64::STPSi:
   2721   case AArch64::STNPWi:
   2722   case AArch64::STNPSi:
   2723     Scale = TypeSize::Fixed(4);
   2724     Width = 8;
   2725     MinOffset = -64;
   2726     MaxOffset = 63;
   2727     break;
   2728   case AArch64::LDRWui:
   2729   case AArch64::LDRSui:
   2730   case AArch64::LDRSWui:
   2731   case AArch64::STRWui:
   2732   case AArch64::STRSui:
   2733     Scale = TypeSize::Fixed(4);
   2734     Width = 4;
   2735     MinOffset = 0;
   2736     MaxOffset = 4095;
   2737     break;
   2738   case AArch64::LDRHui:
   2739   case AArch64::LDRHHui:
   2740   case AArch64::LDRSHWui:
   2741   case AArch64::LDRSHXui:
   2742   case AArch64::STRHui:
   2743   case AArch64::STRHHui:
   2744     Scale = TypeSize::Fixed(2);
   2745     Width = 2;
   2746     MinOffset = 0;
   2747     MaxOffset = 4095;
   2748     break;
   2749   case AArch64::LDRBui:
   2750   case AArch64::LDRBBui:
   2751   case AArch64::LDRSBWui:
   2752   case AArch64::LDRSBXui:
   2753   case AArch64::STRBui:
   2754   case AArch64::STRBBui:
   2755     Scale = TypeSize::Fixed(1);
   2756     Width = 1;
   2757     MinOffset = 0;
   2758     MaxOffset = 4095;
   2759     break;
   2760   case AArch64::STPXpre:
   2761   case AArch64::LDPXpost:
   2762   case AArch64::STPDpre:
   2763   case AArch64::LDPDpost:
   2764     Scale = TypeSize::Fixed(8);
   2765     Width = 8;
   2766     MinOffset = -512;
   2767     MaxOffset = 504;
   2768     break;
   2769   case AArch64::STPQpre:
   2770   case AArch64::LDPQpost:
   2771     Scale = TypeSize::Fixed(16);
   2772     Width = 16;
   2773     MinOffset = -1024;
   2774     MaxOffset = 1008;
   2775     break;
   2776   case AArch64::STRXpre:
   2777   case AArch64::STRDpre:
   2778   case AArch64::LDRXpost:
   2779   case AArch64::LDRDpost:
   2780     Scale = TypeSize::Fixed(1);
   2781     Width = 8;
   2782     MinOffset = -256;
   2783     MaxOffset = 255;
   2784     break;
   2785   case AArch64::STRQpre:
   2786   case AArch64::LDRQpost:
   2787     Scale = TypeSize::Fixed(1);
   2788     Width = 16;
   2789     MinOffset = -256;
   2790     MaxOffset = 255;
   2791     break;
   2792   case AArch64::ADDG:
   2793     Scale = TypeSize::Fixed(16);
   2794     Width = 0;
   2795     MinOffset = 0;
   2796     MaxOffset = 63;
   2797     break;
   2798   case AArch64::TAGPstack:
   2799     Scale = TypeSize::Fixed(16);
   2800     Width = 0;
   2801     // TAGP with a negative offset turns into SUBP, which has a maximum offset
   2802     // of 63 (not 64!).
   2803     MinOffset = -63;
   2804     MaxOffset = 63;
   2805     break;
   2806   case AArch64::LDG:
   2807   case AArch64::STGOffset:
   2808   case AArch64::STZGOffset:
   2809     Scale = TypeSize::Fixed(16);
   2810     Width = 16;
   2811     MinOffset = -256;
   2812     MaxOffset = 255;
   2813     break;
   2814   case AArch64::STR_ZZZZXI:
   2815   case AArch64::LDR_ZZZZXI:
   2816     Scale = TypeSize::Scalable(16);
   2817     Width = SVEMaxBytesPerVector * 4;
   2818     MinOffset = -256;
   2819     MaxOffset = 252;
   2820     break;
   2821   case AArch64::STR_ZZZXI:
   2822   case AArch64::LDR_ZZZXI:
   2823     Scale = TypeSize::Scalable(16);
   2824     Width = SVEMaxBytesPerVector * 3;
   2825     MinOffset = -256;
   2826     MaxOffset = 253;
   2827     break;
   2828   case AArch64::STR_ZZXI:
   2829   case AArch64::LDR_ZZXI:
   2830     Scale = TypeSize::Scalable(16);
   2831     Width = SVEMaxBytesPerVector * 2;
   2832     MinOffset = -256;
   2833     MaxOffset = 254;
   2834     break;
   2835   case AArch64::LDR_PXI:
   2836   case AArch64::STR_PXI:
   2837     Scale = TypeSize::Scalable(2);
   2838     Width = SVEMaxBytesPerVector / 8;
   2839     MinOffset = -256;
   2840     MaxOffset = 255;
   2841     break;
   2842   case AArch64::LDR_ZXI:
   2843   case AArch64::STR_ZXI:
   2844     Scale = TypeSize::Scalable(16);
   2845     Width = SVEMaxBytesPerVector;
   2846     MinOffset = -256;
   2847     MaxOffset = 255;
   2848     break;
   2849   case AArch64::LD1B_IMM:
   2850   case AArch64::LD1H_IMM:
   2851   case AArch64::LD1W_IMM:
   2852   case AArch64::LD1D_IMM:
   2853   case AArch64::ST1B_IMM:
   2854   case AArch64::ST1H_IMM:
   2855   case AArch64::ST1W_IMM:
   2856   case AArch64::ST1D_IMM:
   2857     // A full vectors worth of data
   2858     // Width = mbytes * elements
   2859     Scale = TypeSize::Scalable(16);
   2860     Width = SVEMaxBytesPerVector;
   2861     MinOffset = -8;
   2862     MaxOffset = 7;
   2863     break;
   2864   case AArch64::LD1B_H_IMM:
   2865   case AArch64::LD1SB_H_IMM:
   2866   case AArch64::LD1H_S_IMM:
   2867   case AArch64::LD1SH_S_IMM:
   2868   case AArch64::LD1W_D_IMM:
   2869   case AArch64::LD1SW_D_IMM:
   2870   case AArch64::ST1B_H_IMM:
   2871   case AArch64::ST1H_S_IMM:
   2872   case AArch64::ST1W_D_IMM:
   2873     // A half vector worth of data
   2874     // Width = mbytes * elements
   2875     Scale = TypeSize::Scalable(8);
   2876     Width = SVEMaxBytesPerVector / 2;
   2877     MinOffset = -8;
   2878     MaxOffset = 7;
   2879     break;
   2880   case AArch64::LD1B_S_IMM:
   2881   case AArch64::LD1SB_S_IMM:
   2882   case AArch64::LD1H_D_IMM:
   2883   case AArch64::LD1SH_D_IMM:
   2884   case AArch64::ST1B_S_IMM:
   2885   case AArch64::ST1H_D_IMM:
   2886     // A quarter vector worth of data
   2887     // Width = mbytes * elements
   2888     Scale = TypeSize::Scalable(4);
   2889     Width = SVEMaxBytesPerVector / 4;
   2890     MinOffset = -8;
   2891     MaxOffset = 7;
   2892     break;
   2893   case AArch64::LD1B_D_IMM:
   2894   case AArch64::LD1SB_D_IMM:
   2895   case AArch64::ST1B_D_IMM:
   2896     // A eighth vector worth of data
   2897     // Width = mbytes * elements
   2898     Scale = TypeSize::Scalable(2);
   2899     Width = SVEMaxBytesPerVector / 8;
   2900     MinOffset = -8;
   2901     MaxOffset = 7;
   2902     break;
   2903   case AArch64::ST2GOffset:
   2904   case AArch64::STZ2GOffset:
   2905     Scale = TypeSize::Fixed(16);
   2906     Width = 32;
   2907     MinOffset = -256;
   2908     MaxOffset = 255;
   2909     break;
   2910   case AArch64::STGPi:
   2911     Scale = TypeSize::Fixed(16);
   2912     Width = 16;
   2913     MinOffset = -64;
   2914     MaxOffset = 63;
   2915     break;
   2916   }
   2917 
   2918   return true;
   2919 }
   2920 
   2921 // Scaling factor for unscaled load or store.
   2922 int AArch64InstrInfo::getMemScale(unsigned Opc) {
   2923   switch (Opc) {
   2924   default:
   2925     llvm_unreachable("Opcode has unknown scale!");
   2926   case AArch64::LDRBBui:
   2927   case AArch64::LDURBBi:
   2928   case AArch64::LDRSBWui:
   2929   case AArch64::LDURSBWi:
   2930   case AArch64::STRBBui:
   2931   case AArch64::STURBBi:
   2932     return 1;
   2933   case AArch64::LDRHHui:
   2934   case AArch64::LDURHHi:
   2935   case AArch64::LDRSHWui:
   2936   case AArch64::LDURSHWi:
   2937   case AArch64::STRHHui:
   2938   case AArch64::STURHHi:
   2939     return 2;
   2940   case AArch64::LDRSui:
   2941   case AArch64::LDURSi:
   2942   case AArch64::LDRSpre:
   2943   case AArch64::LDRSWui:
   2944   case AArch64::LDURSWi:
   2945   case AArch64::LDRWpre:
   2946   case AArch64::LDRWui:
   2947   case AArch64::LDURWi:
   2948   case AArch64::STRSui:
   2949   case AArch64::STURSi:
   2950   case AArch64::STRSpre:
   2951   case AArch64::STRWui:
   2952   case AArch64::STURWi:
   2953   case AArch64::STRWpre:
   2954   case AArch64::LDPSi:
   2955   case AArch64::LDPSWi:
   2956   case AArch64::LDPWi:
   2957   case AArch64::STPSi:
   2958   case AArch64::STPWi:
   2959     return 4;
   2960   case AArch64::LDRDui:
   2961   case AArch64::LDURDi:
   2962   case AArch64::LDRDpre:
   2963   case AArch64::LDRXui:
   2964   case AArch64::LDURXi:
   2965   case AArch64::LDRXpre:
   2966   case AArch64::STRDui:
   2967   case AArch64::STURDi:
   2968   case AArch64::STRDpre:
   2969   case AArch64::STRXui:
   2970   case AArch64::STURXi:
   2971   case AArch64::STRXpre:
   2972   case AArch64::LDPDi:
   2973   case AArch64::LDPXi:
   2974   case AArch64::STPDi:
   2975   case AArch64::STPXi:
   2976     return 8;
   2977   case AArch64::LDRQui:
   2978   case AArch64::LDURQi:
   2979   case AArch64::STRQui:
   2980   case AArch64::STURQi:
   2981   case AArch64::STRQpre:
   2982   case AArch64::LDPQi:
   2983   case AArch64::LDRQpre:
   2984   case AArch64::STPQi:
   2985   case AArch64::STGOffset:
   2986   case AArch64::STZGOffset:
   2987   case AArch64::ST2GOffset:
   2988   case AArch64::STZ2GOffset:
   2989   case AArch64::STGPi:
   2990     return 16;
   2991   }
   2992 }
   2993 
   2994 bool AArch64InstrInfo::isPreLd(const MachineInstr &MI) {
   2995   switch (MI.getOpcode()) {
   2996   default:
   2997     return false;
   2998   case AArch64::LDRWpre:
   2999   case AArch64::LDRXpre:
   3000   case AArch64::LDRSpre:
   3001   case AArch64::LDRDpre:
   3002   case AArch64::LDRQpre:
   3003     return true;
   3004   }
   3005 }
   3006 
   3007 bool AArch64InstrInfo::isPreSt(const MachineInstr &MI) {
   3008   switch (MI.getOpcode()) {
   3009   default:
   3010     return false;
   3011   case AArch64::STRWpre:
   3012   case AArch64::STRXpre:
   3013   case AArch64::STRSpre:
   3014   case AArch64::STRDpre:
   3015   case AArch64::STRQpre:
   3016     return true;
   3017   }
   3018 }
   3019 
   3020 bool AArch64InstrInfo::isPreLdSt(const MachineInstr &MI) {
   3021   return isPreLd(MI) || isPreSt(MI);
   3022 }
   3023 
   3024 // Scale the unscaled offsets.  Returns false if the unscaled offset can't be
   3025 // scaled.
   3026 static bool scaleOffset(unsigned Opc, int64_t &Offset) {
   3027   int Scale = AArch64InstrInfo::getMemScale(Opc);
   3028 
   3029   // If the byte-offset isn't a multiple of the stride, we can't scale this
   3030   // offset.
   3031   if (Offset % Scale != 0)
   3032     return false;
   3033 
   3034   // Convert the byte-offset used by unscaled into an "element" offset used
   3035   // by the scaled pair load/store instructions.
   3036   Offset /= Scale;
   3037   return true;
   3038 }
   3039 
   3040 static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
   3041   if (FirstOpc == SecondOpc)
   3042     return true;
   3043   // We can also pair sign-ext and zero-ext instructions.
   3044   switch (FirstOpc) {
   3045   default:
   3046     return false;
   3047   case AArch64::LDRWui:
   3048   case AArch64::LDURWi:
   3049     return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
   3050   case AArch64::LDRSWui:
   3051   case AArch64::LDURSWi:
   3052     return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
   3053   }
   3054   // These instructions can't be paired based on their opcodes.
   3055   return false;
   3056 }
   3057 
   3058 static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
   3059                             int64_t Offset1, unsigned Opcode1, int FI2,
   3060                             int64_t Offset2, unsigned Opcode2) {
   3061   // Accesses through fixed stack object frame indices may access a different
   3062   // fixed stack slot. Check that the object offsets + offsets match.
   3063   if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
   3064     int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
   3065     int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
   3066     assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
   3067     // Convert to scaled object offsets.
   3068     int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
   3069     if (ObjectOffset1 % Scale1 != 0)
   3070       return false;
   3071     ObjectOffset1 /= Scale1;
   3072     int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
   3073     if (ObjectOffset2 % Scale2 != 0)
   3074       return false;
   3075     ObjectOffset2 /= Scale2;
   3076     ObjectOffset1 += Offset1;
   3077     ObjectOffset2 += Offset2;
   3078     return ObjectOffset1 + 1 == ObjectOffset2;
   3079   }
   3080 
   3081   return FI1 == FI2;
   3082 }
   3083 
   3084 /// Detect opportunities for ldp/stp formation.
   3085 ///
   3086 /// Only called for LdSt for which getMemOperandWithOffset returns true.
   3087 bool AArch64InstrInfo::shouldClusterMemOps(
   3088     ArrayRef<const MachineOperand *> BaseOps1,
   3089     ArrayRef<const MachineOperand *> BaseOps2, unsigned NumLoads,
   3090     unsigned NumBytes) const {
   3091   assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
   3092   const MachineOperand &BaseOp1 = *BaseOps1.front();
   3093   const MachineOperand &BaseOp2 = *BaseOps2.front();
   3094   const MachineInstr &FirstLdSt = *BaseOp1.getParent();
   3095   const MachineInstr &SecondLdSt = *BaseOp2.getParent();
   3096   if (BaseOp1.getType() != BaseOp2.getType())
   3097     return false;
   3098 
   3099   assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
   3100          "Only base registers and frame indices are supported.");
   3101 
   3102   // Check for both base regs and base FI.
   3103   if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
   3104     return false;
   3105 
   3106   // Only cluster up to a single pair.
   3107   if (NumLoads > 2)
   3108     return false;
   3109 
   3110   if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
   3111     return false;
   3112 
   3113   // Can we pair these instructions based on their opcodes?
   3114   unsigned FirstOpc = FirstLdSt.getOpcode();
   3115   unsigned SecondOpc = SecondLdSt.getOpcode();
   3116   if (!canPairLdStOpc(FirstOpc, SecondOpc))
   3117     return false;
   3118 
   3119   // Can't merge volatiles or load/stores that have a hint to avoid pair
   3120   // formation, for example.
   3121   if (!isCandidateToMergeOrPair(FirstLdSt) ||
   3122       !isCandidateToMergeOrPair(SecondLdSt))
   3123     return false;
   3124 
   3125   // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
   3126   int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
   3127   if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
   3128     return false;
   3129 
   3130   int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
   3131   if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
   3132     return false;
   3133 
   3134   // Pairwise instructions have a 7-bit signed offset field.
   3135   if (Offset1 > 63 || Offset1 < -64)
   3136     return false;
   3137 
   3138   // The caller should already have ordered First/SecondLdSt by offset.
   3139   // Note: except for non-equal frame index bases
   3140   if (BaseOp1.isFI()) {
   3141     assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
   3142            "Caller should have ordered offsets.");
   3143 
   3144     const MachineFrameInfo &MFI =
   3145         FirstLdSt.getParent()->getParent()->getFrameInfo();
   3146     return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
   3147                            BaseOp2.getIndex(), Offset2, SecondOpc);
   3148   }
   3149 
   3150   assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
   3151 
   3152   return Offset1 + 1 == Offset2;
   3153 }
   3154 
   3155 static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
   3156                                             unsigned Reg, unsigned SubIdx,
   3157                                             unsigned State,
   3158                                             const TargetRegisterInfo *TRI) {
   3159   if (!SubIdx)
   3160     return MIB.addReg(Reg, State);
   3161 
   3162   if (Register::isPhysicalRegister(Reg))
   3163     return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
   3164   return MIB.addReg(Reg, State, SubIdx);
   3165 }
   3166 
   3167 static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
   3168                                         unsigned NumRegs) {
   3169   // We really want the positive remainder mod 32 here, that happens to be
   3170   // easily obtainable with a mask.
   3171   return ((DestReg - SrcReg) & 0x1f) < NumRegs;
   3172 }
   3173 
   3174 void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
   3175                                         MachineBasicBlock::iterator I,
   3176                                         const DebugLoc &DL, MCRegister DestReg,
   3177                                         MCRegister SrcReg, bool KillSrc,
   3178                                         unsigned Opcode,
   3179                                         ArrayRef<unsigned> Indices) const {
   3180   assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
   3181   const TargetRegisterInfo *TRI = &getRegisterInfo();
   3182   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
   3183   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
   3184   unsigned NumRegs = Indices.size();
   3185 
   3186   int SubReg = 0, End = NumRegs, Incr = 1;
   3187   if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
   3188     SubReg = NumRegs - 1;
   3189     End = -1;
   3190     Incr = -1;
   3191   }
   3192 
   3193   for (; SubReg != End; SubReg += Incr) {
   3194     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
   3195     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
   3196     AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
   3197     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
   3198   }
   3199 }
   3200 
   3201 void AArch64InstrInfo::copyGPRRegTuple(MachineBasicBlock &MBB,
   3202                                        MachineBasicBlock::iterator I,
   3203                                        DebugLoc DL, unsigned DestReg,
   3204                                        unsigned SrcReg, bool KillSrc,
   3205                                        unsigned Opcode, unsigned ZeroReg,
   3206                                        llvm::ArrayRef<unsigned> Indices) const {
   3207   const TargetRegisterInfo *TRI = &getRegisterInfo();
   3208   unsigned NumRegs = Indices.size();
   3209 
   3210 #ifndef NDEBUG
   3211   uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
   3212   uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
   3213   assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
   3214          "GPR reg sequences should not be able to overlap");
   3215 #endif
   3216 
   3217   for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
   3218     const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
   3219     AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
   3220     MIB.addReg(ZeroReg);
   3221     AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
   3222     MIB.addImm(0);
   3223   }
   3224 }
   3225 
   3226 void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
   3227                                    MachineBasicBlock::iterator I,
   3228                                    const DebugLoc &DL, MCRegister DestReg,
   3229                                    MCRegister SrcReg, bool KillSrc) const {
   3230   if (AArch64::GPR32spRegClass.contains(DestReg) &&
   3231       (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
   3232     const TargetRegisterInfo *TRI = &getRegisterInfo();
   3233 
   3234     if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
   3235       // If either operand is WSP, expand to ADD #0.
   3236       if (Subtarget.hasZeroCycleRegMove()) {
   3237         // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
   3238         MCRegister DestRegX = TRI->getMatchingSuperReg(
   3239             DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
   3240         MCRegister SrcRegX = TRI->getMatchingSuperReg(
   3241             SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
   3242         // This instruction is reading and writing X registers.  This may upset
   3243         // the register scavenger and machine verifier, so we need to indicate
   3244         // that we are reading an undefined value from SrcRegX, but a proper
   3245         // value from SrcReg.
   3246         BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
   3247             .addReg(SrcRegX, RegState::Undef)
   3248             .addImm(0)
   3249             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
   3250             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
   3251       } else {
   3252         BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
   3253             .addReg(SrcReg, getKillRegState(KillSrc))
   3254             .addImm(0)
   3255             .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   3256       }
   3257     } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
   3258       BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
   3259           .addImm(0)
   3260           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   3261     } else {
   3262       if (Subtarget.hasZeroCycleRegMove()) {
   3263         // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
   3264         MCRegister DestRegX = TRI->getMatchingSuperReg(
   3265             DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
   3266         MCRegister SrcRegX = TRI->getMatchingSuperReg(
   3267             SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
   3268         // This instruction is reading and writing X registers.  This may upset
   3269         // the register scavenger and machine verifier, so we need to indicate
   3270         // that we are reading an undefined value from SrcRegX, but a proper
   3271         // value from SrcReg.
   3272         BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
   3273             .addReg(AArch64::XZR)
   3274             .addReg(SrcRegX, RegState::Undef)
   3275             .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
   3276       } else {
   3277         // Otherwise, expand to ORR WZR.
   3278         BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
   3279             .addReg(AArch64::WZR)
   3280             .addReg(SrcReg, getKillRegState(KillSrc));
   3281       }
   3282     }
   3283     return;
   3284   }
   3285 
   3286   // Copy a Predicate register by ORRing with itself.
   3287   if (AArch64::PPRRegClass.contains(DestReg) &&
   3288       AArch64::PPRRegClass.contains(SrcReg)) {
   3289     assert(Subtarget.hasSVE() && "Unexpected SVE register.");
   3290     BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
   3291       .addReg(SrcReg) // Pg
   3292       .addReg(SrcReg)
   3293       .addReg(SrcReg, getKillRegState(KillSrc));
   3294     return;
   3295   }
   3296 
   3297   // Copy a Z register by ORRing with itself.
   3298   if (AArch64::ZPRRegClass.contains(DestReg) &&
   3299       AArch64::ZPRRegClass.contains(SrcReg)) {
   3300     assert(Subtarget.hasSVE() && "Unexpected SVE register.");
   3301     BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
   3302       .addReg(SrcReg)
   3303       .addReg(SrcReg, getKillRegState(KillSrc));
   3304     return;
   3305   }
   3306 
   3307   // Copy a Z register pair by copying the individual sub-registers.
   3308   if (AArch64::ZPR2RegClass.contains(DestReg) &&
   3309       AArch64::ZPR2RegClass.contains(SrcReg)) {
   3310     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
   3311     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
   3312                      Indices);
   3313     return;
   3314   }
   3315 
   3316   // Copy a Z register triple by copying the individual sub-registers.
   3317   if (AArch64::ZPR3RegClass.contains(DestReg) &&
   3318       AArch64::ZPR3RegClass.contains(SrcReg)) {
   3319     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
   3320                                        AArch64::zsub2};
   3321     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
   3322                      Indices);
   3323     return;
   3324   }
   3325 
   3326   // Copy a Z register quad by copying the individual sub-registers.
   3327   if (AArch64::ZPR4RegClass.contains(DestReg) &&
   3328       AArch64::ZPR4RegClass.contains(SrcReg)) {
   3329     static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
   3330                                        AArch64::zsub2, AArch64::zsub3};
   3331     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
   3332                      Indices);
   3333     return;
   3334   }
   3335 
   3336   if (AArch64::GPR64spRegClass.contains(DestReg) &&
   3337       (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
   3338     if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
   3339       // If either operand is SP, expand to ADD #0.
   3340       BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
   3341           .addReg(SrcReg, getKillRegState(KillSrc))
   3342           .addImm(0)
   3343           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   3344     } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
   3345       BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
   3346           .addImm(0)
   3347           .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
   3348     } else {
   3349       // Otherwise, expand to ORR XZR.
   3350       BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
   3351           .addReg(AArch64::XZR)
   3352           .addReg(SrcReg, getKillRegState(KillSrc));
   3353     }
   3354     return;
   3355   }
   3356 
   3357   // Copy a DDDD register quad by copying the individual sub-registers.
   3358   if (AArch64::DDDDRegClass.contains(DestReg) &&
   3359       AArch64::DDDDRegClass.contains(SrcReg)) {
   3360     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
   3361                                        AArch64::dsub2, AArch64::dsub3};
   3362     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   3363                      Indices);
   3364     return;
   3365   }
   3366 
   3367   // Copy a DDD register triple by copying the individual sub-registers.
   3368   if (AArch64::DDDRegClass.contains(DestReg) &&
   3369       AArch64::DDDRegClass.contains(SrcReg)) {
   3370     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
   3371                                        AArch64::dsub2};
   3372     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   3373                      Indices);
   3374     return;
   3375   }
   3376 
   3377   // Copy a DD register pair by copying the individual sub-registers.
   3378   if (AArch64::DDRegClass.contains(DestReg) &&
   3379       AArch64::DDRegClass.contains(SrcReg)) {
   3380     static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
   3381     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
   3382                      Indices);
   3383     return;
   3384   }
   3385 
   3386   // Copy a QQQQ register quad by copying the individual sub-registers.
   3387   if (AArch64::QQQQRegClass.contains(DestReg) &&
   3388       AArch64::QQQQRegClass.contains(SrcReg)) {
   3389     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
   3390                                        AArch64::qsub2, AArch64::qsub3};
   3391     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   3392                      Indices);
   3393     return;
   3394   }
   3395 
   3396   // Copy a QQQ register triple by copying the individual sub-registers.
   3397   if (AArch64::QQQRegClass.contains(DestReg) &&
   3398       AArch64::QQQRegClass.contains(SrcReg)) {
   3399     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
   3400                                        AArch64::qsub2};
   3401     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   3402                      Indices);
   3403     return;
   3404   }
   3405 
   3406   // Copy a QQ register pair by copying the individual sub-registers.
   3407   if (AArch64::QQRegClass.contains(DestReg) &&
   3408       AArch64::QQRegClass.contains(SrcReg)) {
   3409     static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
   3410     copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
   3411                      Indices);
   3412     return;
   3413   }
   3414 
   3415   if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
   3416       AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
   3417     static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
   3418     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
   3419                     AArch64::XZR, Indices);
   3420     return;
   3421   }
   3422 
   3423   if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
   3424       AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
   3425     static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
   3426     copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
   3427                     AArch64::WZR, Indices);
   3428     return;
   3429   }
   3430 
   3431   if (AArch64::FPR128RegClass.contains(DestReg) &&
   3432       AArch64::FPR128RegClass.contains(SrcReg)) {
   3433     if (Subtarget.hasNEON()) {
   3434       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   3435           .addReg(SrcReg)
   3436           .addReg(SrcReg, getKillRegState(KillSrc));
   3437     } else {
   3438       BuildMI(MBB, I, DL, get(AArch64::STRQpre))
   3439           .addReg(AArch64::SP, RegState::Define)
   3440           .addReg(SrcReg, getKillRegState(KillSrc))
   3441           .addReg(AArch64::SP)
   3442           .addImm(-16);
   3443       BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
   3444           .addReg(AArch64::SP, RegState::Define)
   3445           .addReg(DestReg, RegState::Define)
   3446           .addReg(AArch64::SP)
   3447           .addImm(16);
   3448     }
   3449     return;
   3450   }
   3451 
   3452   if (AArch64::FPR64RegClass.contains(DestReg) &&
   3453       AArch64::FPR64RegClass.contains(SrcReg)) {
   3454     if (Subtarget.hasNEON()) {
   3455       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
   3456                                        &AArch64::FPR128RegClass);
   3457       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
   3458                                       &AArch64::FPR128RegClass);
   3459       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   3460           .addReg(SrcReg)
   3461           .addReg(SrcReg, getKillRegState(KillSrc));
   3462     } else {
   3463       BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
   3464           .addReg(SrcReg, getKillRegState(KillSrc));
   3465     }
   3466     return;
   3467   }
   3468 
   3469   if (AArch64::FPR32RegClass.contains(DestReg) &&
   3470       AArch64::FPR32RegClass.contains(SrcReg)) {
   3471     if (Subtarget.hasNEON()) {
   3472       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
   3473                                        &AArch64::FPR128RegClass);
   3474       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
   3475                                       &AArch64::FPR128RegClass);
   3476       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   3477           .addReg(SrcReg)
   3478           .addReg(SrcReg, getKillRegState(KillSrc));
   3479     } else {
   3480       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   3481           .addReg(SrcReg, getKillRegState(KillSrc));
   3482     }
   3483     return;
   3484   }
   3485 
   3486   if (AArch64::FPR16RegClass.contains(DestReg) &&
   3487       AArch64::FPR16RegClass.contains(SrcReg)) {
   3488     if (Subtarget.hasNEON()) {
   3489       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
   3490                                        &AArch64::FPR128RegClass);
   3491       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
   3492                                       &AArch64::FPR128RegClass);
   3493       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   3494           .addReg(SrcReg)
   3495           .addReg(SrcReg, getKillRegState(KillSrc));
   3496     } else {
   3497       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
   3498                                        &AArch64::FPR32RegClass);
   3499       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
   3500                                       &AArch64::FPR32RegClass);
   3501       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   3502           .addReg(SrcReg, getKillRegState(KillSrc));
   3503     }
   3504     return;
   3505   }
   3506 
   3507   if (AArch64::FPR8RegClass.contains(DestReg) &&
   3508       AArch64::FPR8RegClass.contains(SrcReg)) {
   3509     if (Subtarget.hasNEON()) {
   3510       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
   3511                                        &AArch64::FPR128RegClass);
   3512       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
   3513                                       &AArch64::FPR128RegClass);
   3514       BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
   3515           .addReg(SrcReg)
   3516           .addReg(SrcReg, getKillRegState(KillSrc));
   3517     } else {
   3518       DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
   3519                                        &AArch64::FPR32RegClass);
   3520       SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
   3521                                       &AArch64::FPR32RegClass);
   3522       BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
   3523           .addReg(SrcReg, getKillRegState(KillSrc));
   3524     }
   3525     return;
   3526   }
   3527 
   3528   // Copies between GPR64 and FPR64.
   3529   if (AArch64::FPR64RegClass.contains(DestReg) &&
   3530       AArch64::GPR64RegClass.contains(SrcReg)) {
   3531     BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
   3532         .addReg(SrcReg, getKillRegState(KillSrc));
   3533     return;
   3534   }
   3535   if (AArch64::GPR64RegClass.contains(DestReg) &&
   3536       AArch64::FPR64RegClass.contains(SrcReg)) {
   3537     BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
   3538         .addReg(SrcReg, getKillRegState(KillSrc));
   3539     return;
   3540   }
   3541   // Copies between GPR32 and FPR32.
   3542   if (AArch64::FPR32RegClass.contains(DestReg) &&
   3543       AArch64::GPR32RegClass.contains(SrcReg)) {
   3544     BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
   3545         .addReg(SrcReg, getKillRegState(KillSrc));
   3546     return;
   3547   }
   3548   if (AArch64::GPR32RegClass.contains(DestReg) &&
   3549       AArch64::FPR32RegClass.contains(SrcReg)) {
   3550     BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
   3551         .addReg(SrcReg, getKillRegState(KillSrc));
   3552     return;
   3553   }
   3554 
   3555   if (DestReg == AArch64::NZCV) {
   3556     assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
   3557     BuildMI(MBB, I, DL, get(AArch64::MSR))
   3558         .addImm(AArch64SysReg::NZCV)
   3559         .addReg(SrcReg, getKillRegState(KillSrc))
   3560         .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
   3561     return;
   3562   }
   3563 
   3564   if (SrcReg == AArch64::NZCV) {
   3565     assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
   3566     BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
   3567         .addImm(AArch64SysReg::NZCV)
   3568         .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
   3569     return;
   3570   }
   3571 
   3572   llvm_unreachable("unimplemented reg-to-reg copy");
   3573 }
   3574 
   3575 static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
   3576                                     MachineBasicBlock &MBB,
   3577                                     MachineBasicBlock::iterator InsertBefore,
   3578                                     const MCInstrDesc &MCID,
   3579                                     Register SrcReg, bool IsKill,
   3580                                     unsigned SubIdx0, unsigned SubIdx1, int FI,
   3581                                     MachineMemOperand *MMO) {
   3582   Register SrcReg0 = SrcReg;
   3583   Register SrcReg1 = SrcReg;
   3584   if (Register::isPhysicalRegister(SrcReg)) {
   3585     SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
   3586     SubIdx0 = 0;
   3587     SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
   3588     SubIdx1 = 0;
   3589   }
   3590   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
   3591       .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
   3592       .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
   3593       .addFrameIndex(FI)
   3594       .addImm(0)
   3595       .addMemOperand(MMO);
   3596 }
   3597 
   3598 void AArch64InstrInfo::storeRegToStackSlot(
   3599     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg,
   3600     bool isKill, int FI, const TargetRegisterClass *RC,
   3601     const TargetRegisterInfo *TRI) const {
   3602   MachineFunction &MF = *MBB.getParent();
   3603   MachineFrameInfo &MFI = MF.getFrameInfo();
   3604 
   3605   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
   3606   MachineMemOperand *MMO =
   3607       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOStore,
   3608                               MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
   3609   unsigned Opc = 0;
   3610   bool Offset = true;
   3611   unsigned StackID = TargetStackID::Default;
   3612   switch (TRI->getSpillSize(*RC)) {
   3613   case 1:
   3614     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
   3615       Opc = AArch64::STRBui;
   3616     break;
   3617   case 2:
   3618     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
   3619       Opc = AArch64::STRHui;
   3620     else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
   3621       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
   3622       Opc = AArch64::STR_PXI;
   3623       StackID = TargetStackID::ScalableVector;
   3624     }
   3625     break;
   3626   case 4:
   3627     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
   3628       Opc = AArch64::STRWui;
   3629       if (Register::isVirtualRegister(SrcReg))
   3630         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
   3631       else
   3632         assert(SrcReg != AArch64::WSP);
   3633     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
   3634       Opc = AArch64::STRSui;
   3635     break;
   3636   case 8:
   3637     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
   3638       Opc = AArch64::STRXui;
   3639       if (Register::isVirtualRegister(SrcReg))
   3640         MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
   3641       else
   3642         assert(SrcReg != AArch64::SP);
   3643     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
   3644       Opc = AArch64::STRDui;
   3645     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
   3646       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
   3647                               get(AArch64::STPWi), SrcReg, isKill,
   3648                               AArch64::sube32, AArch64::subo32, FI, MMO);
   3649       return;
   3650     }
   3651     break;
   3652   case 16:
   3653     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
   3654       Opc = AArch64::STRQui;
   3655     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
   3656       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   3657       Opc = AArch64::ST1Twov1d;
   3658       Offset = false;
   3659     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
   3660       storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
   3661                               get(AArch64::STPXi), SrcReg, isKill,
   3662                               AArch64::sube64, AArch64::subo64, FI, MMO);
   3663       return;
   3664     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
   3665       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
   3666       Opc = AArch64::STR_ZXI;
   3667       StackID = TargetStackID::ScalableVector;
   3668     }
   3669     break;
   3670   case 24:
   3671     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
   3672       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   3673       Opc = AArch64::ST1Threev1d;
   3674       Offset = false;
   3675     }
   3676     break;
   3677   case 32:
   3678     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
   3679       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   3680       Opc = AArch64::ST1Fourv1d;
   3681       Offset = false;
   3682     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
   3683       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   3684       Opc = AArch64::ST1Twov2d;
   3685       Offset = false;
   3686     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
   3687       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
   3688       Opc = AArch64::STR_ZZXI;
   3689       StackID = TargetStackID::ScalableVector;
   3690     }
   3691     break;
   3692   case 48:
   3693     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
   3694       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   3695       Opc = AArch64::ST1Threev2d;
   3696       Offset = false;
   3697     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
   3698       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
   3699       Opc = AArch64::STR_ZZZXI;
   3700       StackID = TargetStackID::ScalableVector;
   3701     }
   3702     break;
   3703   case 64:
   3704     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
   3705       assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
   3706       Opc = AArch64::ST1Fourv2d;
   3707       Offset = false;
   3708     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
   3709       assert(Subtarget.hasSVE() && "Unexpected register store without SVE");
   3710       Opc = AArch64::STR_ZZZZXI;
   3711       StackID = TargetStackID::ScalableVector;
   3712     }
   3713     break;
   3714   }
   3715   assert(Opc && "Unknown register class");
   3716   MFI.setStackID(FI, StackID);
   3717 
   3718   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
   3719                                      .addReg(SrcReg, getKillRegState(isKill))
   3720                                      .addFrameIndex(FI);
   3721 
   3722   if (Offset)
   3723     MI.addImm(0);
   3724   MI.addMemOperand(MMO);
   3725 }
   3726 
   3727 static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
   3728                                      MachineBasicBlock &MBB,
   3729                                      MachineBasicBlock::iterator InsertBefore,
   3730                                      const MCInstrDesc &MCID,
   3731                                      Register DestReg, unsigned SubIdx0,
   3732                                      unsigned SubIdx1, int FI,
   3733                                      MachineMemOperand *MMO) {
   3734   Register DestReg0 = DestReg;
   3735   Register DestReg1 = DestReg;
   3736   bool IsUndef = true;
   3737   if (Register::isPhysicalRegister(DestReg)) {
   3738     DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
   3739     SubIdx0 = 0;
   3740     DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
   3741     SubIdx1 = 0;
   3742     IsUndef = false;
   3743   }
   3744   BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
   3745       .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
   3746       .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
   3747       .addFrameIndex(FI)
   3748       .addImm(0)
   3749       .addMemOperand(MMO);
   3750 }
   3751 
   3752 void AArch64InstrInfo::loadRegFromStackSlot(
   3753     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg,
   3754     int FI, const TargetRegisterClass *RC,
   3755     const TargetRegisterInfo *TRI) const {
   3756   MachineFunction &MF = *MBB.getParent();
   3757   MachineFrameInfo &MFI = MF.getFrameInfo();
   3758   MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
   3759   MachineMemOperand *MMO =
   3760       MF.getMachineMemOperand(PtrInfo, MachineMemOperand::MOLoad,
   3761                               MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
   3762 
   3763   unsigned Opc = 0;
   3764   bool Offset = true;
   3765   unsigned StackID = TargetStackID::Default;
   3766   switch (TRI->getSpillSize(*RC)) {
   3767   case 1:
   3768     if (AArch64::FPR8RegClass.hasSubClassEq(RC))
   3769       Opc = AArch64::LDRBui;
   3770     break;
   3771   case 2:
   3772     if (AArch64::FPR16RegClass.hasSubClassEq(RC))
   3773       Opc = AArch64::LDRHui;
   3774     else if (AArch64::PPRRegClass.hasSubClassEq(RC)) {
   3775       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
   3776       Opc = AArch64::LDR_PXI;
   3777       StackID = TargetStackID::ScalableVector;
   3778     }
   3779     break;
   3780   case 4:
   3781     if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
   3782       Opc = AArch64::LDRWui;
   3783       if (Register::isVirtualRegister(DestReg))
   3784         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
   3785       else
   3786         assert(DestReg != AArch64::WSP);
   3787     } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
   3788       Opc = AArch64::LDRSui;
   3789     break;
   3790   case 8:
   3791     if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
   3792       Opc = AArch64::LDRXui;
   3793       if (Register::isVirtualRegister(DestReg))
   3794         MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
   3795       else
   3796         assert(DestReg != AArch64::SP);
   3797     } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
   3798       Opc = AArch64::LDRDui;
   3799     } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
   3800       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
   3801                                get(AArch64::LDPWi), DestReg, AArch64::sube32,
   3802                                AArch64::subo32, FI, MMO);
   3803       return;
   3804     }
   3805     break;
   3806   case 16:
   3807     if (AArch64::FPR128RegClass.hasSubClassEq(RC))
   3808       Opc = AArch64::LDRQui;
   3809     else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
   3810       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   3811       Opc = AArch64::LD1Twov1d;
   3812       Offset = false;
   3813     } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
   3814       loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
   3815                                get(AArch64::LDPXi), DestReg, AArch64::sube64,
   3816                                AArch64::subo64, FI, MMO);
   3817       return;
   3818     } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
   3819       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
   3820       Opc = AArch64::LDR_ZXI;
   3821       StackID = TargetStackID::ScalableVector;
   3822     }
   3823     break;
   3824   case 24:
   3825     if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
   3826       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   3827       Opc = AArch64::LD1Threev1d;
   3828       Offset = false;
   3829     }
   3830     break;
   3831   case 32:
   3832     if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
   3833       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   3834       Opc = AArch64::LD1Fourv1d;
   3835       Offset = false;
   3836     } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
   3837       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   3838       Opc = AArch64::LD1Twov2d;
   3839       Offset = false;
   3840     } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
   3841       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
   3842       Opc = AArch64::LDR_ZZXI;
   3843       StackID = TargetStackID::ScalableVector;
   3844     }
   3845     break;
   3846   case 48:
   3847     if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
   3848       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   3849       Opc = AArch64::LD1Threev2d;
   3850       Offset = false;
   3851     } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
   3852       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
   3853       Opc = AArch64::LDR_ZZZXI;
   3854       StackID = TargetStackID::ScalableVector;
   3855     }
   3856     break;
   3857   case 64:
   3858     if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
   3859       assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
   3860       Opc = AArch64::LD1Fourv2d;
   3861       Offset = false;
   3862     } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
   3863       assert(Subtarget.hasSVE() && "Unexpected register load without SVE");
   3864       Opc = AArch64::LDR_ZZZZXI;
   3865       StackID = TargetStackID::ScalableVector;
   3866     }
   3867     break;
   3868   }
   3869 
   3870   assert(Opc && "Unknown register class");
   3871   MFI.setStackID(FI, StackID);
   3872 
   3873   const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
   3874                                      .addReg(DestReg, getDefRegState(true))
   3875                                      .addFrameIndex(FI);
   3876   if (Offset)
   3877     MI.addImm(0);
   3878   MI.addMemOperand(MMO);
   3879 }
   3880 
   3881 bool llvm::isNZCVTouchedInInstructionRange(const MachineInstr &DefMI,
   3882                                            const MachineInstr &UseMI,
   3883                                            const TargetRegisterInfo *TRI) {
   3884   return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
   3885                                          UseMI.getIterator()),
   3886                 [TRI](const MachineInstr &I) {
   3887                   return I.modifiesRegister(AArch64::NZCV, TRI) ||
   3888                          I.readsRegister(AArch64::NZCV, TRI);
   3889                 });
   3890 }
   3891 
   3892 void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
   3893     const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
   3894   // The smallest scalable element supported by scaled SVE addressing
   3895   // modes are predicates, which are 2 scalable bytes in size. So the scalable
   3896   // byte offset must always be a multiple of 2.
   3897   assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
   3898 
   3899   // VGSized offsets are divided by '2', because the VG register is the
   3900   // the number of 64bit granules as opposed to 128bit vector chunks,
   3901   // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
   3902   // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
   3903   // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
   3904   ByteSized = Offset.getFixed();
   3905   VGSized = Offset.getScalable() / 2;
   3906 }
   3907 
   3908 /// Returns the offset in parts to which this frame offset can be
   3909 /// decomposed for the purpose of describing a frame offset.
   3910 /// For non-scalable offsets this is simply its byte size.
   3911 void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
   3912     const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
   3913     int64_t &NumDataVectors) {
   3914   // The smallest scalable element supported by scaled SVE addressing
   3915   // modes are predicates, which are 2 scalable bytes in size. So the scalable
   3916   // byte offset must always be a multiple of 2.
   3917   assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
   3918 
   3919   NumBytes = Offset.getFixed();
   3920   NumDataVectors = 0;
   3921   NumPredicateVectors = Offset.getScalable() / 2;
   3922   // This method is used to get the offsets to adjust the frame offset.
   3923   // If the function requires ADDPL to be used and needs more than two ADDPL
   3924   // instructions, part of the offset is folded into NumDataVectors so that it
   3925   // uses ADDVL for part of it, reducing the number of ADDPL instructions.
   3926   if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
   3927       NumPredicateVectors > 62) {
   3928     NumDataVectors = NumPredicateVectors / 8;
   3929     NumPredicateVectors -= NumDataVectors * 8;
   3930   }
   3931 }
   3932 
   3933 // Helper function to emit a frame offset adjustment from a given
   3934 // pointer (SrcReg), stored into DestReg. This function is explicit
   3935 // in that it requires the opcode.
   3936 static void emitFrameOffsetAdj(MachineBasicBlock &MBB,
   3937                                MachineBasicBlock::iterator MBBI,
   3938                                const DebugLoc &DL, unsigned DestReg,
   3939                                unsigned SrcReg, int64_t Offset, unsigned Opc,
   3940                                const TargetInstrInfo *TII,
   3941                                MachineInstr::MIFlag Flag, bool NeedsWinCFI,
   3942                                bool *HasWinCFI) {
   3943   int Sign = 1;
   3944   unsigned MaxEncoding, ShiftSize;
   3945   switch (Opc) {
   3946   case AArch64::ADDXri:
   3947   case AArch64::ADDSXri:
   3948   case AArch64::SUBXri:
   3949   case AArch64::SUBSXri:
   3950     MaxEncoding = 0xfff;
   3951     ShiftSize = 12;
   3952     break;
   3953   case AArch64::ADDVL_XXI:
   3954   case AArch64::ADDPL_XXI:
   3955     MaxEncoding = 31;
   3956     ShiftSize = 0;
   3957     if (Offset < 0) {
   3958       MaxEncoding = 32;
   3959       Sign = -1;
   3960       Offset = -Offset;
   3961     }
   3962     break;
   3963   default:
   3964     llvm_unreachable("Unsupported opcode");
   3965   }
   3966 
   3967   // FIXME: If the offset won't fit in 24-bits, compute the offset into a
   3968   // scratch register.  If DestReg is a virtual register, use it as the
   3969   // scratch register; otherwise, create a new virtual register (to be
   3970   // replaced by the scavenger at the end of PEI).  That case can be optimized
   3971   // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
   3972   // register can be loaded with offset%8 and the add/sub can use an extending
   3973   // instruction with LSL#3.
   3974   // Currently the function handles any offsets but generates a poor sequence
   3975   // of code.
   3976   //  assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
   3977 
   3978   const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
   3979   Register TmpReg = DestReg;
   3980   if (TmpReg == AArch64::XZR)
   3981     TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
   3982         &AArch64::GPR64RegClass);
   3983   do {
   3984     uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
   3985     unsigned LocalShiftSize = 0;
   3986     if (ThisVal > MaxEncoding) {
   3987       ThisVal = ThisVal >> ShiftSize;
   3988       LocalShiftSize = ShiftSize;
   3989     }
   3990     assert((ThisVal >> ShiftSize) <= MaxEncoding &&
   3991            "Encoding cannot handle value that big");
   3992 
   3993     Offset -= ThisVal << LocalShiftSize;
   3994     if (Offset == 0)
   3995       TmpReg = DestReg;
   3996     auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
   3997                    .addReg(SrcReg)
   3998                    .addImm(Sign * (int)ThisVal);
   3999     if (ShiftSize)
   4000       MBI = MBI.addImm(
   4001           AArch64_AM::getShifterImm(AArch64_AM::LSL, LocalShiftSize));
   4002     MBI = MBI.setMIFlag(Flag);
   4003 
   4004     if (NeedsWinCFI) {
   4005       assert(Sign == 1 && "SEH directives should always have a positive sign");
   4006       int Imm = (int)(ThisVal << LocalShiftSize);
   4007       if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
   4008           (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
   4009         if (HasWinCFI)
   4010           *HasWinCFI = true;
   4011         if (Imm == 0)
   4012           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
   4013         else
   4014           BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
   4015               .addImm(Imm)
   4016               .setMIFlag(Flag);
   4017         assert(Offset == 0 && "Expected remaining offset to be zero to "
   4018                               "emit a single SEH directive");
   4019       } else if (DestReg == AArch64::SP) {
   4020         if (HasWinCFI)
   4021           *HasWinCFI = true;
   4022         assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
   4023         BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
   4024             .addImm(Imm)
   4025             .setMIFlag(Flag);
   4026       }
   4027       if (HasWinCFI)
   4028         *HasWinCFI = true;
   4029     }
   4030 
   4031     SrcReg = TmpReg;
   4032   } while (Offset);
   4033 }
   4034 
   4035 void llvm::emitFrameOffset(MachineBasicBlock &MBB,
   4036                            MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
   4037                            unsigned DestReg, unsigned SrcReg,
   4038                            StackOffset Offset, const TargetInstrInfo *TII,
   4039                            MachineInstr::MIFlag Flag, bool SetNZCV,
   4040                            bool NeedsWinCFI, bool *HasWinCFI) {
   4041   int64_t Bytes, NumPredicateVectors, NumDataVectors;
   4042   AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
   4043       Offset, Bytes, NumPredicateVectors, NumDataVectors);
   4044 
   4045   // First emit non-scalable frame offsets, or a simple 'mov'.
   4046   if (Bytes || (!Offset && SrcReg != DestReg)) {
   4047     assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
   4048            "SP increment/decrement not 8-byte aligned");
   4049     unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
   4050     if (Bytes < 0) {
   4051       Bytes = -Bytes;
   4052       Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
   4053     }
   4054     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
   4055                        NeedsWinCFI, HasWinCFI);
   4056     SrcReg = DestReg;
   4057   }
   4058 
   4059   assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
   4060          "SetNZCV not supported with SVE vectors");
   4061   assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
   4062          "WinCFI not supported with SVE vectors");
   4063 
   4064   if (NumDataVectors) {
   4065     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
   4066                        AArch64::ADDVL_XXI, TII, Flag, NeedsWinCFI, nullptr);
   4067     SrcReg = DestReg;
   4068   }
   4069 
   4070   if (NumPredicateVectors) {
   4071     assert(DestReg != AArch64::SP && "Unaligned access to SP");
   4072     emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
   4073                        AArch64::ADDPL_XXI, TII, Flag, NeedsWinCFI, nullptr);
   4074   }
   4075 }
   4076 
   4077 MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
   4078     MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
   4079     MachineBasicBlock::iterator InsertPt, int FrameIndex,
   4080     LiveIntervals *LIS, VirtRegMap *VRM) const {
   4081   // This is a bit of a hack. Consider this instruction:
   4082   //
   4083   //   %0 = COPY %sp; GPR64all:%0
   4084   //
   4085   // We explicitly chose GPR64all for the virtual register so such a copy might
   4086   // be eliminated by RegisterCoalescer. However, that may not be possible, and
   4087   // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
   4088   // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
   4089   //
   4090   // To prevent that, we are going to constrain the %0 register class here.
   4091   //
   4092   // <rdar://problem/11522048>
   4093   //
   4094   if (MI.isFullCopy()) {
   4095     Register DstReg = MI.getOperand(0).getReg();
   4096     Register SrcReg = MI.getOperand(1).getReg();
   4097     if (SrcReg == AArch64::SP && Register::isVirtualRegister(DstReg)) {
   4098       MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
   4099       return nullptr;
   4100     }
   4101     if (DstReg == AArch64::SP && Register::isVirtualRegister(SrcReg)) {
   4102       MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
   4103       return nullptr;
   4104     }
   4105   }
   4106 
   4107   // Handle the case where a copy is being spilled or filled but the source
   4108   // and destination register class don't match.  For example:
   4109   //
   4110   //   %0 = COPY %xzr; GPR64common:%0
   4111   //
   4112   // In this case we can still safely fold away the COPY and generate the
   4113   // following spill code:
   4114   //
   4115   //   STRXui %xzr, %stack.0
   4116   //
   4117   // This also eliminates spilled cross register class COPYs (e.g. between x and
   4118   // d regs) of the same size.  For example:
   4119   //
   4120   //   %0 = COPY %1; GPR64:%0, FPR64:%1
   4121   //
   4122   // will be filled as
   4123   //
   4124   //   LDRDui %0, fi<#0>
   4125   //
   4126   // instead of
   4127   //
   4128   //   LDRXui %Temp, fi<#0>
   4129   //   %0 = FMOV %Temp
   4130   //
   4131   if (MI.isCopy() && Ops.size() == 1 &&
   4132       // Make sure we're only folding the explicit COPY defs/uses.
   4133       (Ops[0] == 0 || Ops[0] == 1)) {
   4134     bool IsSpill = Ops[0] == 0;
   4135     bool IsFill = !IsSpill;
   4136     const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
   4137     const MachineRegisterInfo &MRI = MF.getRegInfo();
   4138     MachineBasicBlock &MBB = *MI.getParent();
   4139     const MachineOperand &DstMO = MI.getOperand(0);
   4140     const MachineOperand &SrcMO = MI.getOperand(1);
   4141     Register DstReg = DstMO.getReg();
   4142     Register SrcReg = SrcMO.getReg();
   4143     // This is slightly expensive to compute for physical regs since
   4144     // getMinimalPhysRegClass is slow.
   4145     auto getRegClass = [&](unsigned Reg) {
   4146       return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
   4147                                               : TRI.getMinimalPhysRegClass(Reg);
   4148     };
   4149 
   4150     if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
   4151       assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
   4152                  TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
   4153              "Mismatched register size in non subreg COPY");
   4154       if (IsSpill)
   4155         storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
   4156                             getRegClass(SrcReg), &TRI);
   4157       else
   4158         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
   4159                              getRegClass(DstReg), &TRI);
   4160       return &*--InsertPt;
   4161     }
   4162 
   4163     // Handle cases like spilling def of:
   4164     //
   4165     //   %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
   4166     //
   4167     // where the physical register source can be widened and stored to the full
   4168     // virtual reg destination stack slot, in this case producing:
   4169     //
   4170     //   STRXui %xzr, %stack.0
   4171     //
   4172     if (IsSpill && DstMO.isUndef() && Register::isPhysicalRegister(SrcReg)) {
   4173       assert(SrcMO.getSubReg() == 0 &&
   4174              "Unexpected subreg on physical register");
   4175       const TargetRegisterClass *SpillRC;
   4176       unsigned SpillSubreg;
   4177       switch (DstMO.getSubReg()) {
   4178       default:
   4179         SpillRC = nullptr;
   4180         break;
   4181       case AArch64::sub_32:
   4182       case AArch64::ssub:
   4183         if (AArch64::GPR32RegClass.contains(SrcReg)) {
   4184           SpillRC = &AArch64::GPR64RegClass;
   4185           SpillSubreg = AArch64::sub_32;
   4186         } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
   4187           SpillRC = &AArch64::FPR64RegClass;
   4188           SpillSubreg = AArch64::ssub;
   4189         } else
   4190           SpillRC = nullptr;
   4191         break;
   4192       case AArch64::dsub:
   4193         if (AArch64::FPR64RegClass.contains(SrcReg)) {
   4194           SpillRC = &AArch64::FPR128RegClass;
   4195           SpillSubreg = AArch64::dsub;
   4196         } else
   4197           SpillRC = nullptr;
   4198         break;
   4199       }
   4200 
   4201       if (SpillRC)
   4202         if (unsigned WidenedSrcReg =
   4203                 TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
   4204           storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
   4205                               FrameIndex, SpillRC, &TRI);
   4206           return &*--InsertPt;
   4207         }
   4208     }
   4209 
   4210     // Handle cases like filling use of:
   4211     //
   4212     //   %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
   4213     //
   4214     // where we can load the full virtual reg source stack slot, into the subreg
   4215     // destination, in this case producing:
   4216     //
   4217     //   LDRWui %0:sub_32<def,read-undef>, %stack.0
   4218     //
   4219     if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
   4220       const TargetRegisterClass *FillRC;
   4221       switch (DstMO.getSubReg()) {
   4222       default:
   4223         FillRC = nullptr;
   4224         break;
   4225       case AArch64::sub_32:
   4226         FillRC = &AArch64::GPR32RegClass;
   4227         break;
   4228       case AArch64::ssub:
   4229         FillRC = &AArch64::FPR32RegClass;
   4230         break;
   4231       case AArch64::dsub:
   4232         FillRC = &AArch64::FPR64RegClass;
   4233         break;
   4234       }
   4235 
   4236       if (FillRC) {
   4237         assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
   4238                    TRI.getRegSizeInBits(*FillRC) &&
   4239                "Mismatched regclass size on folded subreg COPY");
   4240         loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
   4241         MachineInstr &LoadMI = *--InsertPt;
   4242         MachineOperand &LoadDst = LoadMI.getOperand(0);
   4243         assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
   4244         LoadDst.setSubReg(DstMO.getSubReg());
   4245         LoadDst.setIsUndef();
   4246         return &LoadMI;
   4247       }
   4248     }
   4249   }
   4250 
   4251   // Cannot fold.
   4252   return nullptr;
   4253 }
   4254 
   4255 int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
   4256                                     StackOffset &SOffset,
   4257                                     bool *OutUseUnscaledOp,
   4258                                     unsigned *OutUnscaledOp,
   4259                                     int64_t *EmittableOffset) {
   4260   // Set output values in case of early exit.
   4261   if (EmittableOffset)
   4262     *EmittableOffset = 0;
   4263   if (OutUseUnscaledOp)
   4264     *OutUseUnscaledOp = false;
   4265   if (OutUnscaledOp)
   4266     *OutUnscaledOp = 0;
   4267 
   4268   // Exit early for structured vector spills/fills as they can't take an
   4269   // immediate offset.
   4270   switch (MI.getOpcode()) {
   4271   default:
   4272     break;
   4273   case AArch64::LD1Twov2d:
   4274   case AArch64::LD1Threev2d:
   4275   case AArch64::LD1Fourv2d:
   4276   case AArch64::LD1Twov1d:
   4277   case AArch64::LD1Threev1d:
   4278   case AArch64::LD1Fourv1d:
   4279   case AArch64::ST1Twov2d:
   4280   case AArch64::ST1Threev2d:
   4281   case AArch64::ST1Fourv2d:
   4282   case AArch64::ST1Twov1d:
   4283   case AArch64::ST1Threev1d:
   4284   case AArch64::ST1Fourv1d:
   4285   case AArch64::IRG:
   4286   case AArch64::IRGstack:
   4287   case AArch64::STGloop:
   4288   case AArch64::STZGloop:
   4289     return AArch64FrameOffsetCannotUpdate;
   4290   }
   4291 
   4292   // Get the min/max offset and the scale.
   4293   TypeSize ScaleValue(0U, false);
   4294   unsigned Width;
   4295   int64_t MinOff, MaxOff;
   4296   if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
   4297                                       MaxOff))
   4298     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
   4299 
   4300   // Construct the complete offset.
   4301   bool IsMulVL = ScaleValue.isScalable();
   4302   unsigned Scale = ScaleValue.getKnownMinSize();
   4303   int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
   4304 
   4305   const MachineOperand &ImmOpnd =
   4306       MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
   4307   Offset += ImmOpnd.getImm() * Scale;
   4308 
   4309   // If the offset doesn't match the scale, we rewrite the instruction to
   4310   // use the unscaled instruction instead. Likewise, if we have a negative
   4311   // offset and there is an unscaled op to use.
   4312   Optional<unsigned> UnscaledOp =
   4313       AArch64InstrInfo::getUnscaledLdSt(MI.getOpcode());
   4314   bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
   4315   if (useUnscaledOp &&
   4316       !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
   4317                                       MaxOff))
   4318     llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
   4319 
   4320   Scale = ScaleValue.getKnownMinSize();
   4321   assert(IsMulVL == ScaleValue.isScalable() &&
   4322          "Unscaled opcode has different value for scalable");
   4323 
   4324   int64_t Remainder = Offset % Scale;
   4325   assert(!(Remainder && useUnscaledOp) &&
   4326          "Cannot have remainder when using unscaled op");
   4327 
   4328   assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
   4329   int64_t NewOffset = Offset / Scale;
   4330   if (MinOff <= NewOffset && NewOffset <= MaxOff)
   4331     Offset = Remainder;
   4332   else {
   4333     NewOffset = NewOffset < 0 ? MinOff : MaxOff;
   4334     Offset = Offset - NewOffset * Scale + Remainder;
   4335   }
   4336 
   4337   if (EmittableOffset)
   4338     *EmittableOffset = NewOffset;
   4339   if (OutUseUnscaledOp)
   4340     *OutUseUnscaledOp = useUnscaledOp;
   4341   if (OutUnscaledOp && UnscaledOp)
   4342     *OutUnscaledOp = *UnscaledOp;
   4343 
   4344   if (IsMulVL)
   4345     SOffset = StackOffset::get(SOffset.getFixed(), Offset);
   4346   else
   4347     SOffset = StackOffset::get(Offset, SOffset.getScalable());
   4348   return AArch64FrameOffsetCanUpdate |
   4349          (SOffset ? 0 : AArch64FrameOffsetIsLegal);
   4350 }
   4351 
   4352 bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
   4353                                     unsigned FrameReg, StackOffset &Offset,
   4354                                     const AArch64InstrInfo *TII) {
   4355   unsigned Opcode = MI.getOpcode();
   4356   unsigned ImmIdx = FrameRegIdx + 1;
   4357 
   4358   if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
   4359     Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
   4360     emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
   4361                     MI.getOperand(0).getReg(), FrameReg, Offset, TII,
   4362                     MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
   4363     MI.eraseFromParent();
   4364     Offset = StackOffset();
   4365     return true;
   4366   }
   4367 
   4368   int64_t NewOffset;
   4369   unsigned UnscaledOp;
   4370   bool UseUnscaledOp;
   4371   int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
   4372                                          &UnscaledOp, &NewOffset);
   4373   if (Status & AArch64FrameOffsetCanUpdate) {
   4374     if (Status & AArch64FrameOffsetIsLegal)
   4375       // Replace the FrameIndex with FrameReg.
   4376       MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
   4377     if (UseUnscaledOp)
   4378       MI.setDesc(TII->get(UnscaledOp));
   4379 
   4380     MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
   4381     return !Offset;
   4382   }
   4383 
   4384   return false;
   4385 }
   4386 
   4387 MCInst AArch64InstrInfo::getNop() const {
   4388   return MCInstBuilder(AArch64::HINT).addImm(0);
   4389 }
   4390 
   4391 // AArch64 supports MachineCombiner.
   4392 bool AArch64InstrInfo::useMachineCombiner() const { return true; }
   4393 
   4394 // True when Opc sets flag
   4395 static bool isCombineInstrSettingFlag(unsigned Opc) {
   4396   switch (Opc) {
   4397   case AArch64::ADDSWrr:
   4398   case AArch64::ADDSWri:
   4399   case AArch64::ADDSXrr:
   4400   case AArch64::ADDSXri:
   4401   case AArch64::SUBSWrr:
   4402   case AArch64::SUBSXrr:
   4403   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   4404   case AArch64::SUBSWri:
   4405   case AArch64::SUBSXri:
   4406     return true;
   4407   default:
   4408     break;
   4409   }
   4410   return false;
   4411 }
   4412 
   4413 // 32b Opcodes that can be combined with a MUL
   4414 static bool isCombineInstrCandidate32(unsigned Opc) {
   4415   switch (Opc) {
   4416   case AArch64::ADDWrr:
   4417   case AArch64::ADDWri:
   4418   case AArch64::SUBWrr:
   4419   case AArch64::ADDSWrr:
   4420   case AArch64::ADDSWri:
   4421   case AArch64::SUBSWrr:
   4422   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   4423   case AArch64::SUBWri:
   4424   case AArch64::SUBSWri:
   4425     return true;
   4426   default:
   4427     break;
   4428   }
   4429   return false;
   4430 }
   4431 
   4432 // 64b Opcodes that can be combined with a MUL
   4433 static bool isCombineInstrCandidate64(unsigned Opc) {
   4434   switch (Opc) {
   4435   case AArch64::ADDXrr:
   4436   case AArch64::ADDXri:
   4437   case AArch64::SUBXrr:
   4438   case AArch64::ADDSXrr:
   4439   case AArch64::ADDSXri:
   4440   case AArch64::SUBSXrr:
   4441   // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
   4442   case AArch64::SUBXri:
   4443   case AArch64::SUBSXri:
   4444   case AArch64::ADDv8i8:
   4445   case AArch64::ADDv16i8:
   4446   case AArch64::ADDv4i16:
   4447   case AArch64::ADDv8i16:
   4448   case AArch64::ADDv2i32:
   4449   case AArch64::ADDv4i32:
   4450   case AArch64::SUBv8i8:
   4451   case AArch64::SUBv16i8:
   4452   case AArch64::SUBv4i16:
   4453   case AArch64::SUBv8i16:
   4454   case AArch64::SUBv2i32:
   4455   case AArch64::SUBv4i32:
   4456     return true;
   4457   default:
   4458     break;
   4459   }
   4460   return false;
   4461 }
   4462 
   4463 // FP Opcodes that can be combined with a FMUL.
   4464 static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
   4465   switch (Inst.getOpcode()) {
   4466   default:
   4467     break;
   4468   case AArch64::FADDHrr:
   4469   case AArch64::FADDSrr:
   4470   case AArch64::FADDDrr:
   4471   case AArch64::FADDv4f16:
   4472   case AArch64::FADDv8f16:
   4473   case AArch64::FADDv2f32:
   4474   case AArch64::FADDv2f64:
   4475   case AArch64::FADDv4f32:
   4476   case AArch64::FSUBHrr:
   4477   case AArch64::FSUBSrr:
   4478   case AArch64::FSUBDrr:
   4479   case AArch64::FSUBv4f16:
   4480   case AArch64::FSUBv8f16:
   4481   case AArch64::FSUBv2f32:
   4482   case AArch64::FSUBv2f64:
   4483   case AArch64::FSUBv4f32:
   4484     TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
   4485     // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
   4486     // the target options or if FADD/FSUB has the contract fast-math flag.
   4487     return Options.UnsafeFPMath ||
   4488            Options.AllowFPOpFusion == FPOpFusion::Fast ||
   4489            Inst.getFlag(MachineInstr::FmContract);
   4490     return true;
   4491   }
   4492   return false;
   4493 }
   4494 
   4495 // Opcodes that can be combined with a MUL
   4496 static bool isCombineInstrCandidate(unsigned Opc) {
   4497   return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
   4498 }
   4499 
   4500 //
   4501 // Utility routine that checks if \param MO is defined by an
   4502 // \param CombineOpc instruction in the basic block \param MBB
   4503 static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
   4504                        unsigned CombineOpc, unsigned ZeroReg = 0,
   4505                        bool CheckZeroReg = false) {
   4506   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   4507   MachineInstr *MI = nullptr;
   4508 
   4509   if (MO.isReg() && Register::isVirtualRegister(MO.getReg()))
   4510     MI = MRI.getUniqueVRegDef(MO.getReg());
   4511   // And it needs to be in the trace (otherwise, it won't have a depth).
   4512   if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
   4513     return false;
   4514   // Must only used by the user we combine with.
   4515   if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
   4516     return false;
   4517 
   4518   if (CheckZeroReg) {
   4519     assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
   4520            MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
   4521            MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
   4522     // The third input reg must be zero.
   4523     if (MI->getOperand(3).getReg() != ZeroReg)
   4524       return false;
   4525   }
   4526 
   4527   return true;
   4528 }
   4529 
   4530 //
   4531 // Is \param MO defined by an integer multiply and can be combined?
   4532 static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
   4533                               unsigned MulOpc, unsigned ZeroReg) {
   4534   return canCombine(MBB, MO, MulOpc, ZeroReg, true);
   4535 }
   4536 
   4537 //
   4538 // Is \param MO defined by a floating-point multiply and can be combined?
   4539 static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
   4540                                unsigned MulOpc) {
   4541   return canCombine(MBB, MO, MulOpc);
   4542 }
   4543 
   4544 // TODO: There are many more machine instruction opcodes to match:
   4545 //       1. Other data types (integer, vectors)
   4546 //       2. Other math / logic operations (xor, or)
   4547 //       3. Other forms of the same operation (intrinsics and other variants)
   4548 bool AArch64InstrInfo::isAssociativeAndCommutative(
   4549     const MachineInstr &Inst) const {
   4550   switch (Inst.getOpcode()) {
   4551   case AArch64::FADDDrr:
   4552   case AArch64::FADDSrr:
   4553   case AArch64::FADDv2f32:
   4554   case AArch64::FADDv2f64:
   4555   case AArch64::FADDv4f32:
   4556   case AArch64::FMULDrr:
   4557   case AArch64::FMULSrr:
   4558   case AArch64::FMULX32:
   4559   case AArch64::FMULX64:
   4560   case AArch64::FMULXv2f32:
   4561   case AArch64::FMULXv2f64:
   4562   case AArch64::FMULXv4f32:
   4563   case AArch64::FMULv2f32:
   4564   case AArch64::FMULv2f64:
   4565   case AArch64::FMULv4f32:
   4566     return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
   4567   default:
   4568     return false;
   4569   }
   4570 }
   4571 
   4572 /// Find instructions that can be turned into madd.
   4573 static bool getMaddPatterns(MachineInstr &Root,
   4574                             SmallVectorImpl<MachineCombinerPattern> &Patterns) {
   4575   unsigned Opc = Root.getOpcode();
   4576   MachineBasicBlock &MBB = *Root.getParent();
   4577   bool Found = false;
   4578 
   4579   if (!isCombineInstrCandidate(Opc))
   4580     return false;
   4581   if (isCombineInstrSettingFlag(Opc)) {
   4582     int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
   4583     // When NZCV is live bail out.
   4584     if (Cmp_NZCV == -1)
   4585       return false;
   4586     unsigned NewOpc = convertToNonFlagSettingOpc(Root);
   4587     // When opcode can't change bail out.
   4588     // CHECKME: do we miss any cases for opcode conversion?
   4589     if (NewOpc == Opc)
   4590       return false;
   4591     Opc = NewOpc;
   4592   }
   4593 
   4594   auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
   4595                       MachineCombinerPattern Pattern) {
   4596     if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
   4597       Patterns.push_back(Pattern);
   4598       Found = true;
   4599     }
   4600   };
   4601 
   4602   auto setVFound = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) {
   4603     if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
   4604       Patterns.push_back(Pattern);
   4605       Found = true;
   4606     }
   4607   };
   4608 
   4609   typedef MachineCombinerPattern MCP;
   4610 
   4611   switch (Opc) {
   4612   default:
   4613     break;
   4614   case AArch64::ADDWrr:
   4615     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
   4616            "ADDWrr does not have register operands");
   4617     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
   4618     setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
   4619     break;
   4620   case AArch64::ADDXrr:
   4621     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
   4622     setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
   4623     break;
   4624   case AArch64::SUBWrr:
   4625     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
   4626     setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
   4627     break;
   4628   case AArch64::SUBXrr:
   4629     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
   4630     setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
   4631     break;
   4632   case AArch64::ADDWri:
   4633     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
   4634     break;
   4635   case AArch64::ADDXri:
   4636     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
   4637     break;
   4638   case AArch64::SUBWri:
   4639     setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
   4640     break;
   4641   case AArch64::SUBXri:
   4642     setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
   4643     break;
   4644   case AArch64::ADDv8i8:
   4645     setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
   4646     setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
   4647     break;
   4648   case AArch64::ADDv16i8:
   4649     setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
   4650     setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
   4651     break;
   4652   case AArch64::ADDv4i16:
   4653     setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
   4654     setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
   4655     setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
   4656     setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
   4657     break;
   4658   case AArch64::ADDv8i16:
   4659     setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
   4660     setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
   4661     setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
   4662     setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
   4663     break;
   4664   case AArch64::ADDv2i32:
   4665     setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
   4666     setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
   4667     setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
   4668     setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
   4669     break;
   4670   case AArch64::ADDv4i32:
   4671     setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
   4672     setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
   4673     setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
   4674     setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
   4675     break;
   4676   case AArch64::SUBv8i8:
   4677     setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
   4678     setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
   4679     break;
   4680   case AArch64::SUBv16i8:
   4681     setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
   4682     setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
   4683     break;
   4684   case AArch64::SUBv4i16:
   4685     setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
   4686     setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
   4687     setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
   4688     setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
   4689     break;
   4690   case AArch64::SUBv8i16:
   4691     setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
   4692     setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
   4693     setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
   4694     setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
   4695     break;
   4696   case AArch64::SUBv2i32:
   4697     setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
   4698     setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
   4699     setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
   4700     setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
   4701     break;
   4702   case AArch64::SUBv4i32:
   4703     setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
   4704     setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
   4705     setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
   4706     setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
   4707     break;
   4708   }
   4709   return Found;
   4710 }
   4711 /// Floating-Point Support
   4712 
   4713 /// Find instructions that can be turned into madd.
   4714 static bool getFMAPatterns(MachineInstr &Root,
   4715                            SmallVectorImpl<MachineCombinerPattern> &Patterns) {
   4716 
   4717   if (!isCombineInstrCandidateFP(Root))
   4718     return false;
   4719 
   4720   MachineBasicBlock &MBB = *Root.getParent();
   4721   bool Found = false;
   4722 
   4723   auto Match = [&](int Opcode, int Operand,
   4724                    MachineCombinerPattern Pattern) -> bool {
   4725     if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
   4726       Patterns.push_back(Pattern);
   4727       return true;
   4728     }
   4729     return false;
   4730   };
   4731 
   4732   typedef MachineCombinerPattern MCP;
   4733 
   4734   switch (Root.getOpcode()) {
   4735   default:
   4736     assert(false && "Unsupported FP instruction in combiner\n");
   4737     break;
   4738   case AArch64::FADDHrr:
   4739     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
   4740            "FADDHrr does not have register operands");
   4741 
   4742     Found  = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
   4743     Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
   4744     break;
   4745   case AArch64::FADDSrr:
   4746     assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
   4747            "FADDSrr does not have register operands");
   4748 
   4749     Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
   4750              Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
   4751 
   4752     Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
   4753              Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
   4754     break;
   4755   case AArch64::FADDDrr:
   4756     Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
   4757              Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
   4758 
   4759     Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
   4760              Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
   4761     break;
   4762   case AArch64::FADDv4f16:
   4763     Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
   4764              Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
   4765 
   4766     Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
   4767              Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
   4768     break;
   4769   case AArch64::FADDv8f16:
   4770     Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
   4771              Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
   4772 
   4773     Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
   4774              Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
   4775     break;
   4776   case AArch64::FADDv2f32:
   4777     Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
   4778              Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
   4779 
   4780     Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
   4781              Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
   4782     break;
   4783   case AArch64::FADDv2f64:
   4784     Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
   4785              Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
   4786 
   4787     Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
   4788              Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
   4789     break;
   4790   case AArch64::FADDv4f32:
   4791     Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
   4792              Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
   4793 
   4794     Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
   4795              Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
   4796     break;
   4797   case AArch64::FSUBHrr:
   4798     Found  = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
   4799     Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
   4800     Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
   4801     break;
   4802   case AArch64::FSUBSrr:
   4803     Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
   4804 
   4805     Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
   4806              Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
   4807 
   4808     Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
   4809     break;
   4810   case AArch64::FSUBDrr:
   4811     Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
   4812 
   4813     Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
   4814              Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
   4815 
   4816     Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
   4817     break;
   4818   case AArch64::FSUBv4f16:
   4819     Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
   4820              Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
   4821 
   4822     Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
   4823              Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
   4824     break;
   4825   case AArch64::FSUBv8f16:
   4826     Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
   4827              Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
   4828 
   4829     Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
   4830              Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
   4831     break;
   4832   case AArch64::FSUBv2f32:
   4833     Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
   4834              Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
   4835 
   4836     Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
   4837              Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
   4838     break;
   4839   case AArch64::FSUBv2f64:
   4840     Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
   4841              Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
   4842 
   4843     Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
   4844              Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
   4845     break;
   4846   case AArch64::FSUBv4f32:
   4847     Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
   4848              Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
   4849 
   4850     Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
   4851              Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
   4852     break;
   4853   }
   4854   return Found;
   4855 }
   4856 
   4857 /// Return true when a code sequence can improve throughput. It
   4858 /// should be called only for instructions in loops.
   4859 /// \param Pattern - combiner pattern
   4860 bool AArch64InstrInfo::isThroughputPattern(
   4861     MachineCombinerPattern Pattern) const {
   4862   switch (Pattern) {
   4863   default:
   4864     break;
   4865   case MachineCombinerPattern::FMULADDH_OP1:
   4866   case MachineCombinerPattern::FMULADDH_OP2:
   4867   case MachineCombinerPattern::FMULSUBH_OP1:
   4868   case MachineCombinerPattern::FMULSUBH_OP2:
   4869   case MachineCombinerPattern::FMULADDS_OP1:
   4870   case MachineCombinerPattern::FMULADDS_OP2:
   4871   case MachineCombinerPattern::FMULSUBS_OP1:
   4872   case MachineCombinerPattern::FMULSUBS_OP2:
   4873   case MachineCombinerPattern::FMULADDD_OP1:
   4874   case MachineCombinerPattern::FMULADDD_OP2:
   4875   case MachineCombinerPattern::FMULSUBD_OP1:
   4876   case MachineCombinerPattern::FMULSUBD_OP2:
   4877   case MachineCombinerPattern::FNMULSUBH_OP1:
   4878   case MachineCombinerPattern::FNMULSUBS_OP1:
   4879   case MachineCombinerPattern::FNMULSUBD_OP1:
   4880   case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
   4881   case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
   4882   case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
   4883   case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
   4884   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
   4885   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
   4886   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
   4887   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
   4888   case MachineCombinerPattern::FMLAv4f16_OP2:
   4889   case MachineCombinerPattern::FMLAv4f16_OP1:
   4890   case MachineCombinerPattern::FMLAv8f16_OP1:
   4891   case MachineCombinerPattern::FMLAv8f16_OP2:
   4892   case MachineCombinerPattern::FMLAv2f32_OP2:
   4893   case MachineCombinerPattern::FMLAv2f32_OP1:
   4894   case MachineCombinerPattern::FMLAv2f64_OP1:
   4895   case MachineCombinerPattern::FMLAv2f64_OP2:
   4896   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
   4897   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
   4898   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
   4899   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
   4900   case MachineCombinerPattern::FMLAv4f32_OP1:
   4901   case MachineCombinerPattern::FMLAv4f32_OP2:
   4902   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
   4903   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
   4904   case MachineCombinerPattern::FMLSv4i16_indexed_OP1:
   4905   case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
   4906   case MachineCombinerPattern::FMLSv8i16_indexed_OP1:
   4907   case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
   4908   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
   4909   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
   4910   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
   4911   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
   4912   case MachineCombinerPattern::FMLSv4f16_OP1:
   4913   case MachineCombinerPattern::FMLSv4f16_OP2:
   4914   case MachineCombinerPattern::FMLSv8f16_OP1:
   4915   case MachineCombinerPattern::FMLSv8f16_OP2:
   4916   case MachineCombinerPattern::FMLSv2f32_OP2:
   4917   case MachineCombinerPattern::FMLSv2f64_OP2:
   4918   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
   4919   case MachineCombinerPattern::FMLSv4f32_OP2:
   4920   case MachineCombinerPattern::MULADDv8i8_OP1:
   4921   case MachineCombinerPattern::MULADDv8i8_OP2:
   4922   case MachineCombinerPattern::MULADDv16i8_OP1:
   4923   case MachineCombinerPattern::MULADDv16i8_OP2:
   4924   case MachineCombinerPattern::MULADDv4i16_OP1:
   4925   case MachineCombinerPattern::MULADDv4i16_OP2:
   4926   case MachineCombinerPattern::MULADDv8i16_OP1:
   4927   case MachineCombinerPattern::MULADDv8i16_OP2:
   4928   case MachineCombinerPattern::MULADDv2i32_OP1:
   4929   case MachineCombinerPattern::MULADDv2i32_OP2:
   4930   case MachineCombinerPattern::MULADDv4i32_OP1:
   4931   case MachineCombinerPattern::MULADDv4i32_OP2:
   4932   case MachineCombinerPattern::MULSUBv8i8_OP1:
   4933   case MachineCombinerPattern::MULSUBv8i8_OP2:
   4934   case MachineCombinerPattern::MULSUBv16i8_OP1:
   4935   case MachineCombinerPattern::MULSUBv16i8_OP2:
   4936   case MachineCombinerPattern::MULSUBv4i16_OP1:
   4937   case MachineCombinerPattern::MULSUBv4i16_OP2:
   4938   case MachineCombinerPattern::MULSUBv8i16_OP1:
   4939   case MachineCombinerPattern::MULSUBv8i16_OP2:
   4940   case MachineCombinerPattern::MULSUBv2i32_OP1:
   4941   case MachineCombinerPattern::MULSUBv2i32_OP2:
   4942   case MachineCombinerPattern::MULSUBv4i32_OP1:
   4943   case MachineCombinerPattern::MULSUBv4i32_OP2:
   4944   case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
   4945   case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
   4946   case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
   4947   case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
   4948   case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
   4949   case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
   4950   case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
   4951   case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
   4952   case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
   4953   case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
   4954   case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
   4955   case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
   4956   case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
   4957   case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
   4958   case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
   4959   case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
   4960     return true;
   4961   } // end switch (Pattern)
   4962   return false;
   4963 }
   4964 /// Return true when there is potentially a faster code sequence for an
   4965 /// instruction chain ending in \p Root. All potential patterns are listed in
   4966 /// the \p Pattern vector. Pattern should be sorted in priority order since the
   4967 /// pattern evaluator stops checking as soon as it finds a faster sequence.
   4968 
   4969 bool AArch64InstrInfo::getMachineCombinerPatterns(
   4970     MachineInstr &Root, SmallVectorImpl<MachineCombinerPattern> &Patterns,
   4971     bool DoRegPressureReduce) const {
   4972   // Integer patterns
   4973   if (getMaddPatterns(Root, Patterns))
   4974     return true;
   4975   // Floating point patterns
   4976   if (getFMAPatterns(Root, Patterns))
   4977     return true;
   4978 
   4979   return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
   4980                                                      DoRegPressureReduce);
   4981 }
   4982 
   4983 enum class FMAInstKind { Default, Indexed, Accumulator };
   4984 /// genFusedMultiply - Generate fused multiply instructions.
   4985 /// This function supports both integer and floating point instructions.
   4986 /// A typical example:
   4987 ///  F|MUL I=A,B,0
   4988 ///  F|ADD R,I,C
   4989 ///  ==> F|MADD R,A,B,C
   4990 /// \param MF Containing MachineFunction
   4991 /// \param MRI Register information
   4992 /// \param TII Target information
   4993 /// \param Root is the F|ADD instruction
   4994 /// \param [out] InsInstrs is a vector of machine instructions and will
   4995 /// contain the generated madd instruction
   4996 /// \param IdxMulOpd is index of operand in Root that is the result of
   4997 /// the F|MUL. In the example above IdxMulOpd is 1.
   4998 /// \param MaddOpc the opcode fo the f|madd instruction
   4999 /// \param RC Register class of operands
   5000 /// \param kind of fma instruction (addressing mode) to be generated
   5001 /// \param ReplacedAddend is the result register from the instruction
   5002 /// replacing the non-combined operand, if any.
   5003 static MachineInstr *
   5004 genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
   5005                  const TargetInstrInfo *TII, MachineInstr &Root,
   5006                  SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
   5007                  unsigned MaddOpc, const TargetRegisterClass *RC,
   5008                  FMAInstKind kind = FMAInstKind::Default,
   5009                  const Register *ReplacedAddend = nullptr) {
   5010   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
   5011 
   5012   unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
   5013   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
   5014   Register ResultReg = Root.getOperand(0).getReg();
   5015   Register SrcReg0 = MUL->getOperand(1).getReg();
   5016   bool Src0IsKill = MUL->getOperand(1).isKill();
   5017   Register SrcReg1 = MUL->getOperand(2).getReg();
   5018   bool Src1IsKill = MUL->getOperand(2).isKill();
   5019 
   5020   unsigned SrcReg2;
   5021   bool Src2IsKill;
   5022   if (ReplacedAddend) {
   5023     // If we just generated a new addend, we must be it's only use.
   5024     SrcReg2 = *ReplacedAddend;
   5025     Src2IsKill = true;
   5026   } else {
   5027     SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
   5028     Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
   5029   }
   5030 
   5031   if (Register::isVirtualRegister(ResultReg))
   5032     MRI.constrainRegClass(ResultReg, RC);
   5033   if (Register::isVirtualRegister(SrcReg0))
   5034     MRI.constrainRegClass(SrcReg0, RC);
   5035   if (Register::isVirtualRegister(SrcReg1))
   5036     MRI.constrainRegClass(SrcReg1, RC);
   5037   if (Register::isVirtualRegister(SrcReg2))
   5038     MRI.constrainRegClass(SrcReg2, RC);
   5039 
   5040   MachineInstrBuilder MIB;
   5041   if (kind == FMAInstKind::Default)
   5042     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
   5043               .addReg(SrcReg0, getKillRegState(Src0IsKill))
   5044               .addReg(SrcReg1, getKillRegState(Src1IsKill))
   5045               .addReg(SrcReg2, getKillRegState(Src2IsKill));
   5046   else if (kind == FMAInstKind::Indexed)
   5047     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
   5048               .addReg(SrcReg2, getKillRegState(Src2IsKill))
   5049               .addReg(SrcReg0, getKillRegState(Src0IsKill))
   5050               .addReg(SrcReg1, getKillRegState(Src1IsKill))
   5051               .addImm(MUL->getOperand(3).getImm());
   5052   else if (kind == FMAInstKind::Accumulator)
   5053     MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
   5054               .addReg(SrcReg2, getKillRegState(Src2IsKill))
   5055               .addReg(SrcReg0, getKillRegState(Src0IsKill))
   5056               .addReg(SrcReg1, getKillRegState(Src1IsKill));
   5057   else
   5058     assert(false && "Invalid FMA instruction kind \n");
   5059   // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
   5060   InsInstrs.push_back(MIB);
   5061   return MUL;
   5062 }
   5063 
   5064 /// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
   5065 /// instructions.
   5066 ///
   5067 /// \see genFusedMultiply
   5068 static MachineInstr *genFusedMultiplyAcc(
   5069     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
   5070     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
   5071     unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
   5072   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
   5073                           FMAInstKind::Accumulator);
   5074 }
   5075 
   5076 /// genNeg - Helper to generate an intermediate negation of the second operand
   5077 /// of Root
   5078 static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI,
   5079                        const TargetInstrInfo *TII, MachineInstr &Root,
   5080                        SmallVectorImpl<MachineInstr *> &InsInstrs,
   5081                        DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
   5082                        unsigned MnegOpc, const TargetRegisterClass *RC) {
   5083   Register NewVR = MRI.createVirtualRegister(RC);
   5084   MachineInstrBuilder MIB =
   5085       BuildMI(MF, Root.getDebugLoc(), TII->get(MnegOpc), NewVR)
   5086           .add(Root.getOperand(2));
   5087   InsInstrs.push_back(MIB);
   5088 
   5089   assert(InstrIdxForVirtReg.empty());
   5090   InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   5091 
   5092   return NewVR;
   5093 }
   5094 
   5095 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
   5096 /// instructions with an additional negation of the accumulator
   5097 static MachineInstr *genFusedMultiplyAccNeg(
   5098     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
   5099     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
   5100     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
   5101     unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
   5102   assert(IdxMulOpd == 1);
   5103 
   5104   Register NewVR =
   5105       genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
   5106   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
   5107                           FMAInstKind::Accumulator, &NewVR);
   5108 }
   5109 
   5110 /// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
   5111 /// instructions.
   5112 ///
   5113 /// \see genFusedMultiply
   5114 static MachineInstr *genFusedMultiplyIdx(
   5115     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
   5116     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
   5117     unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
   5118   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
   5119                           FMAInstKind::Indexed);
   5120 }
   5121 
   5122 /// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
   5123 /// instructions with an additional negation of the accumulator
   5124 static MachineInstr *genFusedMultiplyIdxNeg(
   5125     MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII,
   5126     MachineInstr &Root, SmallVectorImpl<MachineInstr *> &InsInstrs,
   5127     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
   5128     unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
   5129   assert(IdxMulOpd == 1);
   5130 
   5131   Register NewVR =
   5132       genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
   5133 
   5134   return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
   5135                           FMAInstKind::Indexed, &NewVR);
   5136 }
   5137 
   5138 /// genMaddR - Generate madd instruction and combine mul and add using
   5139 /// an extra virtual register
   5140 /// Example - an ADD intermediate needs to be stored in a register:
   5141 ///   MUL I=A,B,0
   5142 ///   ADD R,I,Imm
   5143 ///   ==> ORR  V, ZR, Imm
   5144 ///   ==> MADD R,A,B,V
   5145 /// \param MF Containing MachineFunction
   5146 /// \param MRI Register information
   5147 /// \param TII Target information
   5148 /// \param Root is the ADD instruction
   5149 /// \param [out] InsInstrs is a vector of machine instructions and will
   5150 /// contain the generated madd instruction
   5151 /// \param IdxMulOpd is index of operand in Root that is the result of
   5152 /// the MUL. In the example above IdxMulOpd is 1.
   5153 /// \param MaddOpc the opcode fo the madd instruction
   5154 /// \param VR is a virtual register that holds the value of an ADD operand
   5155 /// (V in the example above).
   5156 /// \param RC Register class of operands
   5157 static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
   5158                               const TargetInstrInfo *TII, MachineInstr &Root,
   5159                               SmallVectorImpl<MachineInstr *> &InsInstrs,
   5160                               unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
   5161                               const TargetRegisterClass *RC) {
   5162   assert(IdxMulOpd == 1 || IdxMulOpd == 2);
   5163 
   5164   MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
   5165   Register ResultReg = Root.getOperand(0).getReg();
   5166   Register SrcReg0 = MUL->getOperand(1).getReg();
   5167   bool Src0IsKill = MUL->getOperand(1).isKill();
   5168   Register SrcReg1 = MUL->getOperand(2).getReg();
   5169   bool Src1IsKill = MUL->getOperand(2).isKill();
   5170 
   5171   if (Register::isVirtualRegister(ResultReg))
   5172     MRI.constrainRegClass(ResultReg, RC);
   5173   if (Register::isVirtualRegister(SrcReg0))
   5174     MRI.constrainRegClass(SrcReg0, RC);
   5175   if (Register::isVirtualRegister(SrcReg1))
   5176     MRI.constrainRegClass(SrcReg1, RC);
   5177   if (Register::isVirtualRegister(VR))
   5178     MRI.constrainRegClass(VR, RC);
   5179 
   5180   MachineInstrBuilder MIB =
   5181       BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
   5182           .addReg(SrcReg0, getKillRegState(Src0IsKill))
   5183           .addReg(SrcReg1, getKillRegState(Src1IsKill))
   5184           .addReg(VR);
   5185   // Insert the MADD
   5186   InsInstrs.push_back(MIB);
   5187   return MUL;
   5188 }
   5189 
   5190 /// When getMachineCombinerPatterns() finds potential patterns,
   5191 /// this function generates the instructions that could replace the
   5192 /// original code sequence
   5193 void AArch64InstrInfo::genAlternativeCodeSequence(
   5194     MachineInstr &Root, MachineCombinerPattern Pattern,
   5195     SmallVectorImpl<MachineInstr *> &InsInstrs,
   5196     SmallVectorImpl<MachineInstr *> &DelInstrs,
   5197     DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
   5198   MachineBasicBlock &MBB = *Root.getParent();
   5199   MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
   5200   MachineFunction &MF = *MBB.getParent();
   5201   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   5202 
   5203   MachineInstr *MUL = nullptr;
   5204   const TargetRegisterClass *RC;
   5205   unsigned Opc;
   5206   switch (Pattern) {
   5207   default:
   5208     // Reassociate instructions.
   5209     TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
   5210                                                 DelInstrs, InstrIdxForVirtReg);
   5211     return;
   5212   case MachineCombinerPattern::MULADDW_OP1:
   5213   case MachineCombinerPattern::MULADDX_OP1:
   5214     // MUL I=A,B,0
   5215     // ADD R,I,C
   5216     // ==> MADD R,A,B,C
   5217     // --- Create(MADD);
   5218     if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
   5219       Opc = AArch64::MADDWrrr;
   5220       RC = &AArch64::GPR32RegClass;
   5221     } else {
   5222       Opc = AArch64::MADDXrrr;
   5223       RC = &AArch64::GPR64RegClass;
   5224     }
   5225     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5226     break;
   5227   case MachineCombinerPattern::MULADDW_OP2:
   5228   case MachineCombinerPattern::MULADDX_OP2:
   5229     // MUL I=A,B,0
   5230     // ADD R,C,I
   5231     // ==> MADD R,A,B,C
   5232     // --- Create(MADD);
   5233     if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
   5234       Opc = AArch64::MADDWrrr;
   5235       RC = &AArch64::GPR32RegClass;
   5236     } else {
   5237       Opc = AArch64::MADDXrrr;
   5238       RC = &AArch64::GPR64RegClass;
   5239     }
   5240     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5241     break;
   5242   case MachineCombinerPattern::MULADDWI_OP1:
   5243   case MachineCombinerPattern::MULADDXI_OP1: {
   5244     // MUL I=A,B,0
   5245     // ADD R,I,Imm
   5246     // ==> ORR  V, ZR, Imm
   5247     // ==> MADD R,A,B,V
   5248     // --- Create(MADD);
   5249     const TargetRegisterClass *OrrRC;
   5250     unsigned BitSize, OrrOpc, ZeroReg;
   5251     if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
   5252       OrrOpc = AArch64::ORRWri;
   5253       OrrRC = &AArch64::GPR32spRegClass;
   5254       BitSize = 32;
   5255       ZeroReg = AArch64::WZR;
   5256       Opc = AArch64::MADDWrrr;
   5257       RC = &AArch64::GPR32RegClass;
   5258     } else {
   5259       OrrOpc = AArch64::ORRXri;
   5260       OrrRC = &AArch64::GPR64spRegClass;
   5261       BitSize = 64;
   5262       ZeroReg = AArch64::XZR;
   5263       Opc = AArch64::MADDXrrr;
   5264       RC = &AArch64::GPR64RegClass;
   5265     }
   5266     Register NewVR = MRI.createVirtualRegister(OrrRC);
   5267     uint64_t Imm = Root.getOperand(2).getImm();
   5268 
   5269     if (Root.getOperand(3).isImm()) {
   5270       unsigned Val = Root.getOperand(3).getImm();
   5271       Imm = Imm << Val;
   5272     }
   5273     uint64_t UImm = SignExtend64(Imm, BitSize);
   5274     uint64_t Encoding;
   5275     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
   5276       MachineInstrBuilder MIB1 =
   5277           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
   5278               .addReg(ZeroReg)
   5279               .addImm(Encoding);
   5280       InsInstrs.push_back(MIB1);
   5281       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   5282       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   5283     }
   5284     break;
   5285   }
   5286   case MachineCombinerPattern::MULSUBW_OP1:
   5287   case MachineCombinerPattern::MULSUBX_OP1: {
   5288     // MUL I=A,B,0
   5289     // SUB R,I, C
   5290     // ==> SUB  V, 0, C
   5291     // ==> MADD R,A,B,V // = -C + A*B
   5292     // --- Create(MADD);
   5293     const TargetRegisterClass *SubRC;
   5294     unsigned SubOpc, ZeroReg;
   5295     if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
   5296       SubOpc = AArch64::SUBWrr;
   5297       SubRC = &AArch64::GPR32spRegClass;
   5298       ZeroReg = AArch64::WZR;
   5299       Opc = AArch64::MADDWrrr;
   5300       RC = &AArch64::GPR32RegClass;
   5301     } else {
   5302       SubOpc = AArch64::SUBXrr;
   5303       SubRC = &AArch64::GPR64spRegClass;
   5304       ZeroReg = AArch64::XZR;
   5305       Opc = AArch64::MADDXrrr;
   5306       RC = &AArch64::GPR64RegClass;
   5307     }
   5308     Register NewVR = MRI.createVirtualRegister(SubRC);
   5309     // SUB NewVR, 0, C
   5310     MachineInstrBuilder MIB1 =
   5311         BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
   5312             .addReg(ZeroReg)
   5313             .add(Root.getOperand(2));
   5314     InsInstrs.push_back(MIB1);
   5315     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   5316     MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   5317     break;
   5318   }
   5319   case MachineCombinerPattern::MULSUBW_OP2:
   5320   case MachineCombinerPattern::MULSUBX_OP2:
   5321     // MUL I=A,B,0
   5322     // SUB R,C,I
   5323     // ==> MSUB R,A,B,C (computes C - A*B)
   5324     // --- Create(MSUB);
   5325     if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
   5326       Opc = AArch64::MSUBWrrr;
   5327       RC = &AArch64::GPR32RegClass;
   5328     } else {
   5329       Opc = AArch64::MSUBXrrr;
   5330       RC = &AArch64::GPR64RegClass;
   5331     }
   5332     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5333     break;
   5334   case MachineCombinerPattern::MULSUBWI_OP1:
   5335   case MachineCombinerPattern::MULSUBXI_OP1: {
   5336     // MUL I=A,B,0
   5337     // SUB R,I, Imm
   5338     // ==> ORR  V, ZR, -Imm
   5339     // ==> MADD R,A,B,V // = -Imm + A*B
   5340     // --- Create(MADD);
   5341     const TargetRegisterClass *OrrRC;
   5342     unsigned BitSize, OrrOpc, ZeroReg;
   5343     if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
   5344       OrrOpc = AArch64::ORRWri;
   5345       OrrRC = &AArch64::GPR32spRegClass;
   5346       BitSize = 32;
   5347       ZeroReg = AArch64::WZR;
   5348       Opc = AArch64::MADDWrrr;
   5349       RC = &AArch64::GPR32RegClass;
   5350     } else {
   5351       OrrOpc = AArch64::ORRXri;
   5352       OrrRC = &AArch64::GPR64spRegClass;
   5353       BitSize = 64;
   5354       ZeroReg = AArch64::XZR;
   5355       Opc = AArch64::MADDXrrr;
   5356       RC = &AArch64::GPR64RegClass;
   5357     }
   5358     Register NewVR = MRI.createVirtualRegister(OrrRC);
   5359     uint64_t Imm = Root.getOperand(2).getImm();
   5360     if (Root.getOperand(3).isImm()) {
   5361       unsigned Val = Root.getOperand(3).getImm();
   5362       Imm = Imm << Val;
   5363     }
   5364     uint64_t UImm = SignExtend64(-Imm, BitSize);
   5365     uint64_t Encoding;
   5366     if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
   5367       MachineInstrBuilder MIB1 =
   5368           BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
   5369               .addReg(ZeroReg)
   5370               .addImm(Encoding);
   5371       InsInstrs.push_back(MIB1);
   5372       InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   5373       MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
   5374     }
   5375     break;
   5376   }
   5377 
   5378   case MachineCombinerPattern::MULADDv8i8_OP1:
   5379     Opc = AArch64::MLAv8i8;
   5380     RC = &AArch64::FPR64RegClass;
   5381     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5382     break;
   5383   case MachineCombinerPattern::MULADDv8i8_OP2:
   5384     Opc = AArch64::MLAv8i8;
   5385     RC = &AArch64::FPR64RegClass;
   5386     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5387     break;
   5388   case MachineCombinerPattern::MULADDv16i8_OP1:
   5389     Opc = AArch64::MLAv16i8;
   5390     RC = &AArch64::FPR128RegClass;
   5391     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5392     break;
   5393   case MachineCombinerPattern::MULADDv16i8_OP2:
   5394     Opc = AArch64::MLAv16i8;
   5395     RC = &AArch64::FPR128RegClass;
   5396     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5397     break;
   5398   case MachineCombinerPattern::MULADDv4i16_OP1:
   5399     Opc = AArch64::MLAv4i16;
   5400     RC = &AArch64::FPR64RegClass;
   5401     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5402     break;
   5403   case MachineCombinerPattern::MULADDv4i16_OP2:
   5404     Opc = AArch64::MLAv4i16;
   5405     RC = &AArch64::FPR64RegClass;
   5406     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5407     break;
   5408   case MachineCombinerPattern::MULADDv8i16_OP1:
   5409     Opc = AArch64::MLAv8i16;
   5410     RC = &AArch64::FPR128RegClass;
   5411     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5412     break;
   5413   case MachineCombinerPattern::MULADDv8i16_OP2:
   5414     Opc = AArch64::MLAv8i16;
   5415     RC = &AArch64::FPR128RegClass;
   5416     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5417     break;
   5418   case MachineCombinerPattern::MULADDv2i32_OP1:
   5419     Opc = AArch64::MLAv2i32;
   5420     RC = &AArch64::FPR64RegClass;
   5421     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5422     break;
   5423   case MachineCombinerPattern::MULADDv2i32_OP2:
   5424     Opc = AArch64::MLAv2i32;
   5425     RC = &AArch64::FPR64RegClass;
   5426     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5427     break;
   5428   case MachineCombinerPattern::MULADDv4i32_OP1:
   5429     Opc = AArch64::MLAv4i32;
   5430     RC = &AArch64::FPR128RegClass;
   5431     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5432     break;
   5433   case MachineCombinerPattern::MULADDv4i32_OP2:
   5434     Opc = AArch64::MLAv4i32;
   5435     RC = &AArch64::FPR128RegClass;
   5436     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5437     break;
   5438 
   5439   case MachineCombinerPattern::MULSUBv8i8_OP1:
   5440     Opc = AArch64::MLAv8i8;
   5441     RC = &AArch64::FPR64RegClass;
   5442     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
   5443                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
   5444                                  RC);
   5445     break;
   5446   case MachineCombinerPattern::MULSUBv8i8_OP2:
   5447     Opc = AArch64::MLSv8i8;
   5448     RC = &AArch64::FPR64RegClass;
   5449     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5450     break;
   5451   case MachineCombinerPattern::MULSUBv16i8_OP1:
   5452     Opc = AArch64::MLAv16i8;
   5453     RC = &AArch64::FPR128RegClass;
   5454     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
   5455                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
   5456                                  RC);
   5457     break;
   5458   case MachineCombinerPattern::MULSUBv16i8_OP2:
   5459     Opc = AArch64::MLSv16i8;
   5460     RC = &AArch64::FPR128RegClass;
   5461     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5462     break;
   5463   case MachineCombinerPattern::MULSUBv4i16_OP1:
   5464     Opc = AArch64::MLAv4i16;
   5465     RC = &AArch64::FPR64RegClass;
   5466     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
   5467                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
   5468                                  RC);
   5469     break;
   5470   case MachineCombinerPattern::MULSUBv4i16_OP2:
   5471     Opc = AArch64::MLSv4i16;
   5472     RC = &AArch64::FPR64RegClass;
   5473     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5474     break;
   5475   case MachineCombinerPattern::MULSUBv8i16_OP1:
   5476     Opc = AArch64::MLAv8i16;
   5477     RC = &AArch64::FPR128RegClass;
   5478     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
   5479                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
   5480                                  RC);
   5481     break;
   5482   case MachineCombinerPattern::MULSUBv8i16_OP2:
   5483     Opc = AArch64::MLSv8i16;
   5484     RC = &AArch64::FPR128RegClass;
   5485     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5486     break;
   5487   case MachineCombinerPattern::MULSUBv2i32_OP1:
   5488     Opc = AArch64::MLAv2i32;
   5489     RC = &AArch64::FPR64RegClass;
   5490     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
   5491                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
   5492                                  RC);
   5493     break;
   5494   case MachineCombinerPattern::MULSUBv2i32_OP2:
   5495     Opc = AArch64::MLSv2i32;
   5496     RC = &AArch64::FPR64RegClass;
   5497     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5498     break;
   5499   case MachineCombinerPattern::MULSUBv4i32_OP1:
   5500     Opc = AArch64::MLAv4i32;
   5501     RC = &AArch64::FPR128RegClass;
   5502     MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
   5503                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
   5504                                  RC);
   5505     break;
   5506   case MachineCombinerPattern::MULSUBv4i32_OP2:
   5507     Opc = AArch64::MLSv4i32;
   5508     RC = &AArch64::FPR128RegClass;
   5509     MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5510     break;
   5511 
   5512   case MachineCombinerPattern::MULADDv4i16_indexed_OP1:
   5513     Opc = AArch64::MLAv4i16_indexed;
   5514     RC = &AArch64::FPR64RegClass;
   5515     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5516     break;
   5517   case MachineCombinerPattern::MULADDv4i16_indexed_OP2:
   5518     Opc = AArch64::MLAv4i16_indexed;
   5519     RC = &AArch64::FPR64RegClass;
   5520     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5521     break;
   5522   case MachineCombinerPattern::MULADDv8i16_indexed_OP1:
   5523     Opc = AArch64::MLAv8i16_indexed;
   5524     RC = &AArch64::FPR128RegClass;
   5525     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5526     break;
   5527   case MachineCombinerPattern::MULADDv8i16_indexed_OP2:
   5528     Opc = AArch64::MLAv8i16_indexed;
   5529     RC = &AArch64::FPR128RegClass;
   5530     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5531     break;
   5532   case MachineCombinerPattern::MULADDv2i32_indexed_OP1:
   5533     Opc = AArch64::MLAv2i32_indexed;
   5534     RC = &AArch64::FPR64RegClass;
   5535     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5536     break;
   5537   case MachineCombinerPattern::MULADDv2i32_indexed_OP2:
   5538     Opc = AArch64::MLAv2i32_indexed;
   5539     RC = &AArch64::FPR64RegClass;
   5540     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5541     break;
   5542   case MachineCombinerPattern::MULADDv4i32_indexed_OP1:
   5543     Opc = AArch64::MLAv4i32_indexed;
   5544     RC = &AArch64::FPR128RegClass;
   5545     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5546     break;
   5547   case MachineCombinerPattern::MULADDv4i32_indexed_OP2:
   5548     Opc = AArch64::MLAv4i32_indexed;
   5549     RC = &AArch64::FPR128RegClass;
   5550     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5551     break;
   5552 
   5553   case MachineCombinerPattern::MULSUBv4i16_indexed_OP1:
   5554     Opc = AArch64::MLAv4i16_indexed;
   5555     RC = &AArch64::FPR64RegClass;
   5556     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
   5557                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
   5558                                  RC);
   5559     break;
   5560   case MachineCombinerPattern::MULSUBv4i16_indexed_OP2:
   5561     Opc = AArch64::MLSv4i16_indexed;
   5562     RC = &AArch64::FPR64RegClass;
   5563     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5564     break;
   5565   case MachineCombinerPattern::MULSUBv8i16_indexed_OP1:
   5566     Opc = AArch64::MLAv8i16_indexed;
   5567     RC = &AArch64::FPR128RegClass;
   5568     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
   5569                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
   5570                                  RC);
   5571     break;
   5572   case MachineCombinerPattern::MULSUBv8i16_indexed_OP2:
   5573     Opc = AArch64::MLSv8i16_indexed;
   5574     RC = &AArch64::FPR128RegClass;
   5575     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5576     break;
   5577   case MachineCombinerPattern::MULSUBv2i32_indexed_OP1:
   5578     Opc = AArch64::MLAv2i32_indexed;
   5579     RC = &AArch64::FPR64RegClass;
   5580     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
   5581                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
   5582                                  RC);
   5583     break;
   5584   case MachineCombinerPattern::MULSUBv2i32_indexed_OP2:
   5585     Opc = AArch64::MLSv2i32_indexed;
   5586     RC = &AArch64::FPR64RegClass;
   5587     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5588     break;
   5589   case MachineCombinerPattern::MULSUBv4i32_indexed_OP1:
   5590     Opc = AArch64::MLAv4i32_indexed;
   5591     RC = &AArch64::FPR128RegClass;
   5592     MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
   5593                                  InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
   5594                                  RC);
   5595     break;
   5596   case MachineCombinerPattern::MULSUBv4i32_indexed_OP2:
   5597     Opc = AArch64::MLSv4i32_indexed;
   5598     RC = &AArch64::FPR128RegClass;
   5599     MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5600     break;
   5601 
   5602   // Floating Point Support
   5603   case MachineCombinerPattern::FMULADDH_OP1:
   5604     Opc = AArch64::FMADDHrrr;
   5605     RC = &AArch64::FPR16RegClass;
   5606     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5607     break;
   5608   case MachineCombinerPattern::FMULADDS_OP1:
   5609     Opc = AArch64::FMADDSrrr;
   5610     RC = &AArch64::FPR32RegClass;
   5611     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5612     break;
   5613   case MachineCombinerPattern::FMULADDD_OP1:
   5614     Opc = AArch64::FMADDDrrr;
   5615     RC = &AArch64::FPR64RegClass;
   5616     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5617     break;
   5618 
   5619   case MachineCombinerPattern::FMULADDH_OP2:
   5620     Opc = AArch64::FMADDHrrr;
   5621     RC = &AArch64::FPR16RegClass;
   5622     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5623     break;
   5624   case MachineCombinerPattern::FMULADDS_OP2:
   5625     Opc = AArch64::FMADDSrrr;
   5626     RC = &AArch64::FPR32RegClass;
   5627     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5628     break;
   5629   case MachineCombinerPattern::FMULADDD_OP2:
   5630     Opc = AArch64::FMADDDrrr;
   5631     RC = &AArch64::FPR64RegClass;
   5632     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5633     break;
   5634 
   5635   case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
   5636     Opc = AArch64::FMLAv1i32_indexed;
   5637     RC = &AArch64::FPR32RegClass;
   5638     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5639                            FMAInstKind::Indexed);
   5640     break;
   5641   case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
   5642     Opc = AArch64::FMLAv1i32_indexed;
   5643     RC = &AArch64::FPR32RegClass;
   5644     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5645                            FMAInstKind::Indexed);
   5646     break;
   5647 
   5648   case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
   5649     Opc = AArch64::FMLAv1i64_indexed;
   5650     RC = &AArch64::FPR64RegClass;
   5651     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5652                            FMAInstKind::Indexed);
   5653     break;
   5654   case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
   5655     Opc = AArch64::FMLAv1i64_indexed;
   5656     RC = &AArch64::FPR64RegClass;
   5657     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5658                            FMAInstKind::Indexed);
   5659     break;
   5660 
   5661   case MachineCombinerPattern::FMLAv4i16_indexed_OP1:
   5662     RC = &AArch64::FPR64RegClass;
   5663     Opc = AArch64::FMLAv4i16_indexed;
   5664     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5665                            FMAInstKind::Indexed);
   5666     break;
   5667   case MachineCombinerPattern::FMLAv4f16_OP1:
   5668     RC = &AArch64::FPR64RegClass;
   5669     Opc = AArch64::FMLAv4f16;
   5670     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5671                            FMAInstKind::Accumulator);
   5672     break;
   5673   case MachineCombinerPattern::FMLAv4i16_indexed_OP2:
   5674     RC = &AArch64::FPR64RegClass;
   5675     Opc = AArch64::FMLAv4i16_indexed;
   5676     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5677                            FMAInstKind::Indexed);
   5678     break;
   5679   case MachineCombinerPattern::FMLAv4f16_OP2:
   5680     RC = &AArch64::FPR64RegClass;
   5681     Opc = AArch64::FMLAv4f16;
   5682     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5683                            FMAInstKind::Accumulator);
   5684     break;
   5685 
   5686   case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
   5687   case MachineCombinerPattern::FMLAv2f32_OP1:
   5688     RC = &AArch64::FPR64RegClass;
   5689     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
   5690       Opc = AArch64::FMLAv2i32_indexed;
   5691       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5692                              FMAInstKind::Indexed);
   5693     } else {
   5694       Opc = AArch64::FMLAv2f32;
   5695       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5696                              FMAInstKind::Accumulator);
   5697     }
   5698     break;
   5699   case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
   5700   case MachineCombinerPattern::FMLAv2f32_OP2:
   5701     RC = &AArch64::FPR64RegClass;
   5702     if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
   5703       Opc = AArch64::FMLAv2i32_indexed;
   5704       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5705                              FMAInstKind::Indexed);
   5706     } else {
   5707       Opc = AArch64::FMLAv2f32;
   5708       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5709                              FMAInstKind::Accumulator);
   5710     }
   5711     break;
   5712 
   5713   case MachineCombinerPattern::FMLAv8i16_indexed_OP1:
   5714     RC = &AArch64::FPR128RegClass;
   5715     Opc = AArch64::FMLAv8i16_indexed;
   5716     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5717                            FMAInstKind::Indexed);
   5718     break;
   5719   case MachineCombinerPattern::FMLAv8f16_OP1:
   5720     RC = &AArch64::FPR128RegClass;
   5721     Opc = AArch64::FMLAv8f16;
   5722     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5723                            FMAInstKind::Accumulator);
   5724     break;
   5725   case MachineCombinerPattern::FMLAv8i16_indexed_OP2:
   5726     RC = &AArch64::FPR128RegClass;
   5727     Opc = AArch64::FMLAv8i16_indexed;
   5728     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5729                            FMAInstKind::Indexed);
   5730     break;
   5731   case MachineCombinerPattern::FMLAv8f16_OP2:
   5732     RC = &AArch64::FPR128RegClass;
   5733     Opc = AArch64::FMLAv8f16;
   5734     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5735                            FMAInstKind::Accumulator);
   5736     break;
   5737 
   5738   case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
   5739   case MachineCombinerPattern::FMLAv2f64_OP1:
   5740     RC = &AArch64::FPR128RegClass;
   5741     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
   5742       Opc = AArch64::FMLAv2i64_indexed;
   5743       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5744                              FMAInstKind::Indexed);
   5745     } else {
   5746       Opc = AArch64::FMLAv2f64;
   5747       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5748                              FMAInstKind::Accumulator);
   5749     }
   5750     break;
   5751   case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
   5752   case MachineCombinerPattern::FMLAv2f64_OP2:
   5753     RC = &AArch64::FPR128RegClass;
   5754     if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
   5755       Opc = AArch64::FMLAv2i64_indexed;
   5756       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5757                              FMAInstKind::Indexed);
   5758     } else {
   5759       Opc = AArch64::FMLAv2f64;
   5760       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5761                              FMAInstKind::Accumulator);
   5762     }
   5763     break;
   5764 
   5765   case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
   5766   case MachineCombinerPattern::FMLAv4f32_OP1:
   5767     RC = &AArch64::FPR128RegClass;
   5768     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
   5769       Opc = AArch64::FMLAv4i32_indexed;
   5770       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5771                              FMAInstKind::Indexed);
   5772     } else {
   5773       Opc = AArch64::FMLAv4f32;
   5774       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5775                              FMAInstKind::Accumulator);
   5776     }
   5777     break;
   5778 
   5779   case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
   5780   case MachineCombinerPattern::FMLAv4f32_OP2:
   5781     RC = &AArch64::FPR128RegClass;
   5782     if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
   5783       Opc = AArch64::FMLAv4i32_indexed;
   5784       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5785                              FMAInstKind::Indexed);
   5786     } else {
   5787       Opc = AArch64::FMLAv4f32;
   5788       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5789                              FMAInstKind::Accumulator);
   5790     }
   5791     break;
   5792 
   5793   case MachineCombinerPattern::FMULSUBH_OP1:
   5794     Opc = AArch64::FNMSUBHrrr;
   5795     RC = &AArch64::FPR16RegClass;
   5796     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5797     break;
   5798   case MachineCombinerPattern::FMULSUBS_OP1:
   5799     Opc = AArch64::FNMSUBSrrr;
   5800     RC = &AArch64::FPR32RegClass;
   5801     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5802     break;
   5803   case MachineCombinerPattern::FMULSUBD_OP1:
   5804     Opc = AArch64::FNMSUBDrrr;
   5805     RC = &AArch64::FPR64RegClass;
   5806     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5807     break;
   5808 
   5809   case MachineCombinerPattern::FNMULSUBH_OP1:
   5810     Opc = AArch64::FNMADDHrrr;
   5811     RC = &AArch64::FPR16RegClass;
   5812     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5813     break;
   5814   case MachineCombinerPattern::FNMULSUBS_OP1:
   5815     Opc = AArch64::FNMADDSrrr;
   5816     RC = &AArch64::FPR32RegClass;
   5817     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5818     break;
   5819   case MachineCombinerPattern::FNMULSUBD_OP1:
   5820     Opc = AArch64::FNMADDDrrr;
   5821     RC = &AArch64::FPR64RegClass;
   5822     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
   5823     break;
   5824 
   5825   case MachineCombinerPattern::FMULSUBH_OP2:
   5826     Opc = AArch64::FMSUBHrrr;
   5827     RC = &AArch64::FPR16RegClass;
   5828     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5829     break;
   5830   case MachineCombinerPattern::FMULSUBS_OP2:
   5831     Opc = AArch64::FMSUBSrrr;
   5832     RC = &AArch64::FPR32RegClass;
   5833     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5834     break;
   5835   case MachineCombinerPattern::FMULSUBD_OP2:
   5836     Opc = AArch64::FMSUBDrrr;
   5837     RC = &AArch64::FPR64RegClass;
   5838     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
   5839     break;
   5840 
   5841   case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
   5842     Opc = AArch64::FMLSv1i32_indexed;
   5843     RC = &AArch64::FPR32RegClass;
   5844     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5845                            FMAInstKind::Indexed);
   5846     break;
   5847 
   5848   case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
   5849     Opc = AArch64::FMLSv1i64_indexed;
   5850     RC = &AArch64::FPR64RegClass;
   5851     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5852                            FMAInstKind::Indexed);
   5853     break;
   5854 
   5855   case MachineCombinerPattern::FMLSv4f16_OP1:
   5856   case MachineCombinerPattern::FMLSv4i16_indexed_OP1: {
   5857     RC = &AArch64::FPR64RegClass;
   5858     Register NewVR = MRI.createVirtualRegister(RC);
   5859     MachineInstrBuilder MIB1 =
   5860         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f16), NewVR)
   5861             .add(Root.getOperand(2));
   5862     InsInstrs.push_back(MIB1);
   5863     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   5864     if (Pattern == MachineCombinerPattern::FMLSv4f16_OP1) {
   5865       Opc = AArch64::FMLAv4f16;
   5866       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5867                              FMAInstKind::Accumulator, &NewVR);
   5868     } else {
   5869       Opc = AArch64::FMLAv4i16_indexed;
   5870       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5871                              FMAInstKind::Indexed, &NewVR);
   5872     }
   5873     break;
   5874   }
   5875   case MachineCombinerPattern::FMLSv4f16_OP2:
   5876     RC = &AArch64::FPR64RegClass;
   5877     Opc = AArch64::FMLSv4f16;
   5878     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5879                            FMAInstKind::Accumulator);
   5880     break;
   5881   case MachineCombinerPattern::FMLSv4i16_indexed_OP2:
   5882     RC = &AArch64::FPR64RegClass;
   5883     Opc = AArch64::FMLSv4i16_indexed;
   5884     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5885                            FMAInstKind::Indexed);
   5886     break;
   5887 
   5888   case MachineCombinerPattern::FMLSv2f32_OP2:
   5889   case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
   5890     RC = &AArch64::FPR64RegClass;
   5891     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
   5892       Opc = AArch64::FMLSv2i32_indexed;
   5893       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5894                              FMAInstKind::Indexed);
   5895     } else {
   5896       Opc = AArch64::FMLSv2f32;
   5897       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5898                              FMAInstKind::Accumulator);
   5899     }
   5900     break;
   5901 
   5902   case MachineCombinerPattern::FMLSv8f16_OP1:
   5903   case MachineCombinerPattern::FMLSv8i16_indexed_OP1: {
   5904     RC = &AArch64::FPR128RegClass;
   5905     Register NewVR = MRI.createVirtualRegister(RC);
   5906     MachineInstrBuilder MIB1 =
   5907         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv8f16), NewVR)
   5908             .add(Root.getOperand(2));
   5909     InsInstrs.push_back(MIB1);
   5910     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   5911     if (Pattern == MachineCombinerPattern::FMLSv8f16_OP1) {
   5912       Opc = AArch64::FMLAv8f16;
   5913       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5914                              FMAInstKind::Accumulator, &NewVR);
   5915     } else {
   5916       Opc = AArch64::FMLAv8i16_indexed;
   5917       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5918                              FMAInstKind::Indexed, &NewVR);
   5919     }
   5920     break;
   5921   }
   5922   case MachineCombinerPattern::FMLSv8f16_OP2:
   5923     RC = &AArch64::FPR128RegClass;
   5924     Opc = AArch64::FMLSv8f16;
   5925     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5926                            FMAInstKind::Accumulator);
   5927     break;
   5928   case MachineCombinerPattern::FMLSv8i16_indexed_OP2:
   5929     RC = &AArch64::FPR128RegClass;
   5930     Opc = AArch64::FMLSv8i16_indexed;
   5931     MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5932                            FMAInstKind::Indexed);
   5933     break;
   5934 
   5935   case MachineCombinerPattern::FMLSv2f64_OP2:
   5936   case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
   5937     RC = &AArch64::FPR128RegClass;
   5938     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
   5939       Opc = AArch64::FMLSv2i64_indexed;
   5940       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5941                              FMAInstKind::Indexed);
   5942     } else {
   5943       Opc = AArch64::FMLSv2f64;
   5944       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5945                              FMAInstKind::Accumulator);
   5946     }
   5947     break;
   5948 
   5949   case MachineCombinerPattern::FMLSv4f32_OP2:
   5950   case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
   5951     RC = &AArch64::FPR128RegClass;
   5952     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
   5953       Opc = AArch64::FMLSv4i32_indexed;
   5954       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5955                              FMAInstKind::Indexed);
   5956     } else {
   5957       Opc = AArch64::FMLSv4f32;
   5958       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
   5959                              FMAInstKind::Accumulator);
   5960     }
   5961     break;
   5962   case MachineCombinerPattern::FMLSv2f32_OP1:
   5963   case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
   5964     RC = &AArch64::FPR64RegClass;
   5965     Register NewVR = MRI.createVirtualRegister(RC);
   5966     MachineInstrBuilder MIB1 =
   5967         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
   5968             .add(Root.getOperand(2));
   5969     InsInstrs.push_back(MIB1);
   5970     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   5971     if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
   5972       Opc = AArch64::FMLAv2i32_indexed;
   5973       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5974                              FMAInstKind::Indexed, &NewVR);
   5975     } else {
   5976       Opc = AArch64::FMLAv2f32;
   5977       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5978                              FMAInstKind::Accumulator, &NewVR);
   5979     }
   5980     break;
   5981   }
   5982   case MachineCombinerPattern::FMLSv4f32_OP1:
   5983   case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
   5984     RC = &AArch64::FPR128RegClass;
   5985     Register NewVR = MRI.createVirtualRegister(RC);
   5986     MachineInstrBuilder MIB1 =
   5987         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
   5988             .add(Root.getOperand(2));
   5989     InsInstrs.push_back(MIB1);
   5990     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   5991     if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
   5992       Opc = AArch64::FMLAv4i32_indexed;
   5993       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5994                              FMAInstKind::Indexed, &NewVR);
   5995     } else {
   5996       Opc = AArch64::FMLAv4f32;
   5997       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   5998                              FMAInstKind::Accumulator, &NewVR);
   5999     }
   6000     break;
   6001   }
   6002   case MachineCombinerPattern::FMLSv2f64_OP1:
   6003   case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
   6004     RC = &AArch64::FPR128RegClass;
   6005     Register NewVR = MRI.createVirtualRegister(RC);
   6006     MachineInstrBuilder MIB1 =
   6007         BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
   6008             .add(Root.getOperand(2));
   6009     InsInstrs.push_back(MIB1);
   6010     InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
   6011     if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
   6012       Opc = AArch64::FMLAv2i64_indexed;
   6013       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   6014                              FMAInstKind::Indexed, &NewVR);
   6015     } else {
   6016       Opc = AArch64::FMLAv2f64;
   6017       MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
   6018                              FMAInstKind::Accumulator, &NewVR);
   6019     }
   6020     break;
   6021   }
   6022   } // end switch (Pattern)
   6023   // Record MUL and ADD/SUB for deletion
   6024   // FIXME: This assertion fails in CodeGen/AArch64/tailmerging_in_mbp.ll and
   6025   // CodeGen/AArch64/urem-seteq-nonzero.ll.
   6026   // assert(MUL && "MUL was never set");
   6027   DelInstrs.push_back(MUL);
   6028   DelInstrs.push_back(&Root);
   6029 }
   6030 
   6031 /// Replace csincr-branch sequence by simple conditional branch
   6032 ///
   6033 /// Examples:
   6034 /// 1. \code
   6035 ///   csinc  w9, wzr, wzr, <condition code>
   6036 ///   tbnz   w9, #0, 0x44
   6037 ///    \endcode
   6038 /// to
   6039 ///    \code
   6040 ///   b.<inverted condition code>
   6041 ///    \endcode
   6042 ///
   6043 /// 2. \code
   6044 ///   csinc w9, wzr, wzr, <condition code>
   6045 ///   tbz   w9, #0, 0x44
   6046 ///    \endcode
   6047 /// to
   6048 ///    \code
   6049 ///   b.<condition code>
   6050 ///    \endcode
   6051 ///
   6052 /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
   6053 /// compare's constant operand is power of 2.
   6054 ///
   6055 /// Examples:
   6056 ///    \code
   6057 ///   and  w8, w8, #0x400
   6058 ///   cbnz w8, L1
   6059 ///    \endcode
   6060 /// to
   6061 ///    \code
   6062 ///   tbnz w8, #10, L1
   6063 ///    \endcode
   6064 ///
   6065 /// \param  MI Conditional Branch
   6066 /// \return True when the simple conditional branch is generated
   6067 ///
   6068 bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
   6069   bool IsNegativeBranch = false;
   6070   bool IsTestAndBranch = false;
   6071   unsigned TargetBBInMI = 0;
   6072   switch (MI.getOpcode()) {
   6073   default:
   6074     llvm_unreachable("Unknown branch instruction?");
   6075   case AArch64::Bcc:
   6076     return false;
   6077   case AArch64::CBZW:
   6078   case AArch64::CBZX:
   6079     TargetBBInMI = 1;
   6080     break;
   6081   case AArch64::CBNZW:
   6082   case AArch64::CBNZX:
   6083     TargetBBInMI = 1;
   6084     IsNegativeBranch = true;
   6085     break;
   6086   case AArch64::TBZW:
   6087   case AArch64::TBZX:
   6088     TargetBBInMI = 2;
   6089     IsTestAndBranch = true;
   6090     break;
   6091   case AArch64::TBNZW:
   6092   case AArch64::TBNZX:
   6093     TargetBBInMI = 2;
   6094     IsNegativeBranch = true;
   6095     IsTestAndBranch = true;
   6096     break;
   6097   }
   6098   // So we increment a zero register and test for bits other
   6099   // than bit 0? Conservatively bail out in case the verifier
   6100   // missed this case.
   6101   if (IsTestAndBranch && MI.getOperand(1).getImm())
   6102     return false;
   6103 
   6104   // Find Definition.
   6105   assert(MI.getParent() && "Incomplete machine instruciton\n");
   6106   MachineBasicBlock *MBB = MI.getParent();
   6107   MachineFunction *MF = MBB->getParent();
   6108   MachineRegisterInfo *MRI = &MF->getRegInfo();
   6109   Register VReg = MI.getOperand(0).getReg();
   6110   if (!Register::isVirtualRegister(VReg))
   6111     return false;
   6112 
   6113   MachineInstr *DefMI = MRI->getVRegDef(VReg);
   6114 
   6115   // Look through COPY instructions to find definition.
   6116   while (DefMI->isCopy()) {
   6117     Register CopyVReg = DefMI->getOperand(1).getReg();
   6118     if (!MRI->hasOneNonDBGUse(CopyVReg))
   6119       return false;
   6120     if (!MRI->hasOneDef(CopyVReg))
   6121       return false;
   6122     DefMI = MRI->getVRegDef(CopyVReg);
   6123   }
   6124 
   6125   switch (DefMI->getOpcode()) {
   6126   default:
   6127     return false;
   6128   // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
   6129   case AArch64::ANDWri:
   6130   case AArch64::ANDXri: {
   6131     if (IsTestAndBranch)
   6132       return false;
   6133     if (DefMI->getParent() != MBB)
   6134       return false;
   6135     if (!MRI->hasOneNonDBGUse(VReg))
   6136       return false;
   6137 
   6138     bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
   6139     uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
   6140         DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
   6141     if (!isPowerOf2_64(Mask))
   6142       return false;
   6143 
   6144     MachineOperand &MO = DefMI->getOperand(1);
   6145     Register NewReg = MO.getReg();
   6146     if (!Register::isVirtualRegister(NewReg))
   6147       return false;
   6148 
   6149     assert(!MRI->def_empty(NewReg) && "Register must be defined.");
   6150 
   6151     MachineBasicBlock &RefToMBB = *MBB;
   6152     MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
   6153     DebugLoc DL = MI.getDebugLoc();
   6154     unsigned Imm = Log2_64(Mask);
   6155     unsigned Opc = (Imm < 32)
   6156                        ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
   6157                        : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
   6158     MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
   6159                               .addReg(NewReg)
   6160                               .addImm(Imm)
   6161                               .addMBB(TBB);
   6162     // Register lives on to the CBZ now.
   6163     MO.setIsKill(false);
   6164 
   6165     // For immediate smaller than 32, we need to use the 32-bit
   6166     // variant (W) in all cases. Indeed the 64-bit variant does not
   6167     // allow to encode them.
   6168     // Therefore, if the input register is 64-bit, we need to take the
   6169     // 32-bit sub-part.
   6170     if (!Is32Bit && Imm < 32)
   6171       NewMI->getOperand(0).setSubReg(AArch64::sub_32);
   6172     MI.eraseFromParent();
   6173     return true;
   6174   }
   6175   // Look for CSINC
   6176   case AArch64::CSINCWr:
   6177   case AArch64::CSINCXr: {
   6178     if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
   6179           DefMI->getOperand(2).getReg() == AArch64::WZR) &&
   6180         !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
   6181           DefMI->getOperand(2).getReg() == AArch64::XZR))
   6182       return false;
   6183 
   6184     if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
   6185       return false;
   6186 
   6187     AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
   6188     // Convert only when the condition code is not modified between
   6189     // the CSINC and the branch. The CC may be used by other
   6190     // instructions in between.
   6191     if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
   6192       return false;
   6193     MachineBasicBlock &RefToMBB = *MBB;
   6194     MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
   6195     DebugLoc DL = MI.getDebugLoc();
   6196     if (IsNegativeBranch)
   6197       CC = AArch64CC::getInvertedCondCode(CC);
   6198     BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
   6199     MI.eraseFromParent();
   6200     return true;
   6201   }
   6202   }
   6203 }
   6204 
   6205 std::pair<unsigned, unsigned>
   6206 AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
   6207   const unsigned Mask = AArch64II::MO_FRAGMENT;
   6208   return std::make_pair(TF & Mask, TF & ~Mask);
   6209 }
   6210 
   6211 ArrayRef<std::pair<unsigned, const char *>>
   6212 AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
   6213   using namespace AArch64II;
   6214 
   6215   static const std::pair<unsigned, const char *> TargetFlags[] = {
   6216       {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
   6217       {MO_G3, "aarch64-g3"},     {MO_G2, "aarch64-g2"},
   6218       {MO_G1, "aarch64-g1"},     {MO_G0, "aarch64-g0"},
   6219       {MO_HI12, "aarch64-hi12"}};
   6220   return makeArrayRef(TargetFlags);
   6221 }
   6222 
   6223 ArrayRef<std::pair<unsigned, const char *>>
   6224 AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
   6225   using namespace AArch64II;
   6226 
   6227   static const std::pair<unsigned, const char *> TargetFlags[] = {
   6228       {MO_COFFSTUB, "aarch64-coffstub"},
   6229       {MO_GOT, "aarch64-got"},
   6230       {MO_NC, "aarch64-nc"},
   6231       {MO_S, "aarch64-s"},
   6232       {MO_TLS, "aarch64-tls"},
   6233       {MO_DLLIMPORT, "aarch64-dllimport"},
   6234       {MO_PREL, "aarch64-prel"},
   6235       {MO_TAGGED, "aarch64-tagged"}};
   6236   return makeArrayRef(TargetFlags);
   6237 }
   6238 
   6239 ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
   6240 AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
   6241   static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
   6242       {{MOSuppressPair, "aarch64-suppress-pair"},
   6243        {MOStridedAccess, "aarch64-strided-access"}};
   6244   return makeArrayRef(TargetFlags);
   6245 }
   6246 
   6247 /// Constants defining how certain sequences should be outlined.
   6248 /// This encompasses how an outlined function should be called, and what kind of
   6249 /// frame should be emitted for that outlined function.
   6250 ///
   6251 /// \p MachineOutlinerDefault implies that the function should be called with
   6252 /// a save and restore of LR to the stack.
   6253 ///
   6254 /// That is,
   6255 ///
   6256 /// I1     Save LR                    OUTLINED_FUNCTION:
   6257 /// I2 --> BL OUTLINED_FUNCTION       I1
   6258 /// I3     Restore LR                 I2
   6259 ///                                   I3
   6260 ///                                   RET
   6261 ///
   6262 /// * Call construction overhead: 3 (save + BL + restore)
   6263 /// * Frame construction overhead: 1 (ret)
   6264 /// * Requires stack fixups? Yes
   6265 ///
   6266 /// \p MachineOutlinerTailCall implies that the function is being created from
   6267 /// a sequence of instructions ending in a return.
   6268 ///
   6269 /// That is,
   6270 ///
   6271 /// I1                             OUTLINED_FUNCTION:
   6272 /// I2 --> B OUTLINED_FUNCTION     I1
   6273 /// RET                            I2
   6274 ///                                RET
   6275 ///
   6276 /// * Call construction overhead: 1 (B)
   6277 /// * Frame construction overhead: 0 (Return included in sequence)
   6278 /// * Requires stack fixups? No
   6279 ///
   6280 /// \p MachineOutlinerNoLRSave implies that the function should be called using
   6281 /// a BL instruction, but doesn't require LR to be saved and restored. This
   6282 /// happens when LR is known to be dead.
   6283 ///
   6284 /// That is,
   6285 ///
   6286 /// I1                                OUTLINED_FUNCTION:
   6287 /// I2 --> BL OUTLINED_FUNCTION       I1
   6288 /// I3                                I2
   6289 ///                                   I3
   6290 ///                                   RET
   6291 ///
   6292 /// * Call construction overhead: 1 (BL)
   6293 /// * Frame construction overhead: 1 (RET)
   6294 /// * Requires stack fixups? No
   6295 ///
   6296 /// \p MachineOutlinerThunk implies that the function is being created from
   6297 /// a sequence of instructions ending in a call. The outlined function is
   6298 /// called with a BL instruction, and the outlined function tail-calls the
   6299 /// original call destination.
   6300 ///
   6301 /// That is,
   6302 ///
   6303 /// I1                                OUTLINED_FUNCTION:
   6304 /// I2 --> BL OUTLINED_FUNCTION       I1
   6305 /// BL f                              I2
   6306 ///                                   B f
   6307 /// * Call construction overhead: 1 (BL)
   6308 /// * Frame construction overhead: 0
   6309 /// * Requires stack fixups? No
   6310 ///
   6311 /// \p MachineOutlinerRegSave implies that the function should be called with a
   6312 /// save and restore of LR to an available register. This allows us to avoid
   6313 /// stack fixups. Note that this outlining variant is compatible with the
   6314 /// NoLRSave case.
   6315 ///
   6316 /// That is,
   6317 ///
   6318 /// I1     Save LR                    OUTLINED_FUNCTION:
   6319 /// I2 --> BL OUTLINED_FUNCTION       I1
   6320 /// I3     Restore LR                 I2
   6321 ///                                   I3
   6322 ///                                   RET
   6323 ///
   6324 /// * Call construction overhead: 3 (save + BL + restore)
   6325 /// * Frame construction overhead: 1 (ret)
   6326 /// * Requires stack fixups? No
   6327 enum MachineOutlinerClass {
   6328   MachineOutlinerDefault,  /// Emit a save, restore, call, and return.
   6329   MachineOutlinerTailCall, /// Only emit a branch.
   6330   MachineOutlinerNoLRSave, /// Emit a call and return.
   6331   MachineOutlinerThunk,    /// Emit a call and tail-call.
   6332   MachineOutlinerRegSave   /// Same as default, but save to a register.
   6333 };
   6334 
   6335 enum MachineOutlinerMBBFlags {
   6336   LRUnavailableSomewhere = 0x2,
   6337   HasCalls = 0x4,
   6338   UnsafeRegsDead = 0x8
   6339 };
   6340 
   6341 unsigned
   6342 AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
   6343   assert(C.LRUWasSet && "LRU wasn't set?");
   6344   MachineFunction *MF = C.getMF();
   6345   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
   6346       MF->getSubtarget().getRegisterInfo());
   6347 
   6348   // Check if there is an available register across the sequence that we can
   6349   // use.
   6350   for (unsigned Reg : AArch64::GPR64RegClass) {
   6351     if (!ARI->isReservedReg(*MF, Reg) &&
   6352         Reg != AArch64::LR &&  // LR is not reserved, but don't use it.
   6353         Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
   6354         Reg != AArch64::X17 && // Ditto for X17.
   6355         C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
   6356       return Reg;
   6357   }
   6358 
   6359   // No suitable register. Return 0.
   6360   return 0u;
   6361 }
   6362 
   6363 static bool
   6364 outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a,
   6365                                          const outliner::Candidate &b) {
   6366   const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
   6367   const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
   6368 
   6369   return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
   6370          MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
   6371 }
   6372 
   6373 static bool
   6374 outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a,
   6375                                        const outliner::Candidate &b) {
   6376   const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
   6377   const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
   6378 
   6379   return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
   6380 }
   6381 
   6382 static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a,
   6383                                                 const outliner::Candidate &b) {
   6384   const AArch64Subtarget &SubtargetA =
   6385       a.getMF()->getSubtarget<AArch64Subtarget>();
   6386   const AArch64Subtarget &SubtargetB =
   6387       b.getMF()->getSubtarget<AArch64Subtarget>();
   6388   return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
   6389 }
   6390 
   6391 outliner::OutlinedFunction AArch64InstrInfo::getOutliningCandidateInfo(
   6392     std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
   6393   outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
   6394   unsigned SequenceSize =
   6395       std::accumulate(FirstCand.front(), std::next(FirstCand.back()), 0,
   6396                       [this](unsigned Sum, const MachineInstr &MI) {
   6397                         return Sum + getInstSizeInBytes(MI);
   6398                       });
   6399   unsigned NumBytesToCreateFrame = 0;
   6400 
   6401   // We only allow outlining for functions having exactly matching return
   6402   // address signing attributes, i.e., all share the same value for the
   6403   // attribute "sign-return-address" and all share the same type of key they
   6404   // are signed with.
   6405   // Additionally we require all functions to simultaniously either support
   6406   // v8.3a features or not. Otherwise an outlined function could get signed
   6407   // using dedicated v8.3 instructions and a call from a function that doesn't
   6408   // support v8.3 instructions would therefore be invalid.
   6409   if (std::adjacent_find(
   6410           RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
   6411           [](const outliner::Candidate &a, const outliner::Candidate &b) {
   6412             // Return true if a and b are non-equal w.r.t. return address
   6413             // signing or support of v8.3a features
   6414             if (outliningCandidatesSigningScopeConsensus(a, b) &&
   6415                 outliningCandidatesSigningKeyConsensus(a, b) &&
   6416                 outliningCandidatesV8_3OpsConsensus(a, b)) {
   6417               return false;
   6418             }
   6419             return true;
   6420           }) != RepeatedSequenceLocs.end()) {
   6421     return outliner::OutlinedFunction();
   6422   }
   6423 
   6424   // Since at this point all candidates agree on their return address signing
   6425   // picking just one is fine. If the candidate functions potentially sign their
   6426   // return addresses, the outlined function should do the same. Note that in
   6427   // the case of "sign-return-address"="non-leaf" this is an assumption: It is
   6428   // not certainly true that the outlined function will have to sign its return
   6429   // address but this decision is made later, when the decision to outline
   6430   // has already been made.
   6431   // The same holds for the number of additional instructions we need: On
   6432   // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
   6433   // necessary. However, at this point we don't know if the outlined function
   6434   // will have a RET instruction so we assume the worst.
   6435   const TargetRegisterInfo &TRI = getRegisterInfo();
   6436   if (FirstCand.getMF()
   6437           ->getInfo<AArch64FunctionInfo>()
   6438           ->shouldSignReturnAddress(true)) {
   6439     // One PAC and one AUT instructions
   6440     NumBytesToCreateFrame += 8;
   6441 
   6442     // We have to check if sp modifying instructions would get outlined.
   6443     // If so we only allow outlining if sp is unchanged overall, so matching
   6444     // sub and add instructions are okay to outline, all other sp modifications
   6445     // are not
   6446     auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
   6447       int SPValue = 0;
   6448       MachineBasicBlock::iterator MBBI = C.front();
   6449       for (;;) {
   6450         if (MBBI->modifiesRegister(AArch64::SP, &TRI)) {
   6451           switch (MBBI->getOpcode()) {
   6452           case AArch64::ADDXri:
   6453           case AArch64::ADDWri:
   6454             assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
   6455             assert(MBBI->getOperand(2).isImm() &&
   6456                    "Expected operand to be immediate");
   6457             assert(MBBI->getOperand(1).isReg() &&
   6458                    "Expected operand to be a register");
   6459             // Check if the add just increments sp. If so, we search for
   6460             // matching sub instructions that decrement sp. If not, the
   6461             // modification is illegal
   6462             if (MBBI->getOperand(1).getReg() == AArch64::SP)
   6463               SPValue += MBBI->getOperand(2).getImm();
   6464             else
   6465               return true;
   6466             break;
   6467           case AArch64::SUBXri:
   6468           case AArch64::SUBWri:
   6469             assert(MBBI->getNumOperands() == 4 && "Wrong number of operands");
   6470             assert(MBBI->getOperand(2).isImm() &&
   6471                    "Expected operand to be immediate");
   6472             assert(MBBI->getOperand(1).isReg() &&
   6473                    "Expected operand to be a register");
   6474             // Check if the sub just decrements sp. If so, we search for
   6475             // matching add instructions that increment sp. If not, the
   6476             // modification is illegal
   6477             if (MBBI->getOperand(1).getReg() == AArch64::SP)
   6478               SPValue -= MBBI->getOperand(2).getImm();
   6479             else
   6480               return true;
   6481             break;
   6482           default:
   6483             return true;
   6484           }
   6485         }
   6486         if (MBBI == C.back())
   6487           break;
   6488         ++MBBI;
   6489       }
   6490       if (SPValue)
   6491         return true;
   6492       return false;
   6493     };
   6494     // Remove candidates with illegal stack modifying instructions
   6495     llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
   6496 
   6497     // If the sequence doesn't have enough candidates left, then we're done.
   6498     if (RepeatedSequenceLocs.size() < 2)
   6499       return outliner::OutlinedFunction();
   6500   }
   6501 
   6502   // Properties about candidate MBBs that hold for all of them.
   6503   unsigned FlagsSetInAll = 0xF;
   6504 
   6505   // Compute liveness information for each candidate, and set FlagsSetInAll.
   6506   std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
   6507                 [&FlagsSetInAll](outliner::Candidate &C) {
   6508                   FlagsSetInAll &= C.Flags;
   6509                 });
   6510 
   6511   // According to the AArch64 Procedure Call Standard, the following are
   6512   // undefined on entry/exit from a function call:
   6513   //
   6514   // * Registers x16, x17, (and thus w16, w17)
   6515   // * Condition codes (and thus the NZCV register)
   6516   //
   6517   // Because if this, we can't outline any sequence of instructions where
   6518   // one
   6519   // of these registers is live into/across it. Thus, we need to delete
   6520   // those
   6521   // candidates.
   6522   auto CantGuaranteeValueAcrossCall = [&TRI](outliner::Candidate &C) {
   6523     // If the unsafe registers in this block are all dead, then we don't need
   6524     // to compute liveness here.
   6525     if (C.Flags & UnsafeRegsDead)
   6526       return false;
   6527     C.initLRU(TRI);
   6528     LiveRegUnits LRU = C.LRU;
   6529     return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
   6530             !LRU.available(AArch64::NZCV));
   6531   };
   6532 
   6533   // Are there any candidates where those registers are live?
   6534   if (!(FlagsSetInAll & UnsafeRegsDead)) {
   6535     // Erase every candidate that violates the restrictions above. (It could be
   6536     // true that we have viable candidates, so it's not worth bailing out in
   6537     // the case that, say, 1 out of 20 candidates violate the restructions.)
   6538     llvm::erase_if(RepeatedSequenceLocs, CantGuaranteeValueAcrossCall);
   6539 
   6540     // If the sequence doesn't have enough candidates left, then we're done.
   6541     if (RepeatedSequenceLocs.size() < 2)
   6542       return outliner::OutlinedFunction();
   6543   }
   6544 
   6545   // At this point, we have only "safe" candidates to outline. Figure out
   6546   // frame + call instruction information.
   6547 
   6548   unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
   6549 
   6550   // Helper lambda which sets call information for every candidate.
   6551   auto SetCandidateCallInfo =
   6552       [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
   6553         for (outliner::Candidate &C : RepeatedSequenceLocs)
   6554           C.setCallInfo(CallID, NumBytesForCall);
   6555       };
   6556 
   6557   unsigned FrameID = MachineOutlinerDefault;
   6558   NumBytesToCreateFrame += 4;
   6559 
   6560   bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
   6561     return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
   6562   });
   6563 
   6564   // We check to see if CFI Instructions are present, and if they are
   6565   // we find the number of CFI Instructions in the candidates.
   6566   unsigned CFICount = 0;
   6567   MachineBasicBlock::iterator MBBI = RepeatedSequenceLocs[0].front();
   6568   for (unsigned Loc = RepeatedSequenceLocs[0].getStartIdx();
   6569        Loc < RepeatedSequenceLocs[0].getEndIdx() + 1; Loc++) {
   6570     const std::vector<MCCFIInstruction> &CFIInstructions =
   6571         RepeatedSequenceLocs[0].getMF()->getFrameInstructions();
   6572     if (MBBI->isCFIInstruction()) {
   6573       unsigned CFIIndex = MBBI->getOperand(0).getCFIIndex();
   6574       MCCFIInstruction CFI = CFIInstructions[CFIIndex];
   6575       CFICount++;
   6576     }
   6577     MBBI++;
   6578   }
   6579 
   6580   // We compare the number of found CFI Instructions to  the number of CFI
   6581   // instructions in the parent function for each candidate.  We must check this
   6582   // since if we outline one of the CFI instructions in a function, we have to
   6583   // outline them all for correctness. If we do not, the address offsets will be
   6584   // incorrect between the two sections of the program.
   6585   for (outliner::Candidate &C : RepeatedSequenceLocs) {
   6586     std::vector<MCCFIInstruction> CFIInstructions =
   6587         C.getMF()->getFrameInstructions();
   6588 
   6589     if (CFICount > 0 && CFICount != CFIInstructions.size())
   6590       return outliner::OutlinedFunction();
   6591   }
   6592 
   6593   // Returns true if an instructions is safe to fix up, false otherwise.
   6594   auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
   6595     if (MI.isCall())
   6596       return true;
   6597 
   6598     if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
   6599         !MI.readsRegister(AArch64::SP, &TRI))
   6600       return true;
   6601 
   6602     // Any modification of SP will break our code to save/restore LR.
   6603     // FIXME: We could handle some instructions which add a constant
   6604     // offset to SP, with a bit more work.
   6605     if (MI.modifiesRegister(AArch64::SP, &TRI))
   6606       return false;
   6607 
   6608     // At this point, we have a stack instruction that we might need to
   6609     // fix up. We'll handle it if it's a load or store.
   6610     if (MI.mayLoadOrStore()) {
   6611       const MachineOperand *Base; // Filled with the base operand of MI.
   6612       int64_t Offset;             // Filled with the offset of MI.
   6613       bool OffsetIsScalable;
   6614 
   6615       // Does it allow us to offset the base operand and is the base the
   6616       // register SP?
   6617       if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
   6618           !Base->isReg() || Base->getReg() != AArch64::SP)
   6619         return false;
   6620 
   6621       // Fixe-up code below assumes bytes.
   6622       if (OffsetIsScalable)
   6623         return false;
   6624 
   6625       // Find the minimum/maximum offset for this instruction and check
   6626       // if fixing it up would be in range.
   6627       int64_t MinOffset,
   6628           MaxOffset;  // Unscaled offsets for the instruction.
   6629       TypeSize Scale(0U, false); // The scale to multiply the offsets by.
   6630       unsigned DummyWidth;
   6631       getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
   6632 
   6633       Offset += 16; // Update the offset to what it would be if we outlined.
   6634       if (Offset < MinOffset * (int64_t)Scale.getFixedSize() ||
   6635           Offset > MaxOffset * (int64_t)Scale.getFixedSize())
   6636         return false;
   6637 
   6638       // It's in range, so we can outline it.
   6639       return true;
   6640     }
   6641 
   6642     // FIXME: Add handling for instructions like "add x0, sp, #8".
   6643 
   6644     // We can't fix it up, so don't outline it.
   6645     return false;
   6646   };
   6647 
   6648   // True if it's possible to fix up each stack instruction in this sequence.
   6649   // Important for frames/call variants that modify the stack.
   6650   bool AllStackInstrsSafe = std::all_of(
   6651       FirstCand.front(), std::next(FirstCand.back()), IsSafeToFixup);
   6652 
   6653   // If the last instruction in any candidate is a terminator, then we should
   6654   // tail call all of the candidates.
   6655   if (RepeatedSequenceLocs[0].back()->isTerminator()) {
   6656     FrameID = MachineOutlinerTailCall;
   6657     NumBytesToCreateFrame = 0;
   6658     SetCandidateCallInfo(MachineOutlinerTailCall, 4);
   6659   }
   6660 
   6661   else if (LastInstrOpcode == AArch64::BL ||
   6662            ((LastInstrOpcode == AArch64::BLR ||
   6663              LastInstrOpcode == AArch64::BLRNoIP) &&
   6664             !HasBTI)) {
   6665     // FIXME: Do we need to check if the code after this uses the value of LR?
   6666     FrameID = MachineOutlinerThunk;
   6667     NumBytesToCreateFrame = 0;
   6668     SetCandidateCallInfo(MachineOutlinerThunk, 4);
   6669   }
   6670 
   6671   else {
   6672     // We need to decide how to emit calls + frames. We can always emit the same
   6673     // frame if we don't need to save to the stack. If we have to save to the
   6674     // stack, then we need a different frame.
   6675     unsigned NumBytesNoStackCalls = 0;
   6676     std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
   6677 
   6678     // Check if we have to save LR.
   6679     for (outliner::Candidate &C : RepeatedSequenceLocs) {
   6680       C.initLRU(TRI);
   6681 
   6682       // If we have a noreturn caller, then we're going to be conservative and
   6683       // say that we have to save LR. If we don't have a ret at the end of the
   6684       // block, then we can't reason about liveness accurately.
   6685       //
   6686       // FIXME: We can probably do better than always disabling this in
   6687       // noreturn functions by fixing up the liveness info.
   6688       bool IsNoReturn =
   6689           C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
   6690 
   6691       // Is LR available? If so, we don't need a save.
   6692       if (C.LRU.available(AArch64::LR) && !IsNoReturn) {
   6693         NumBytesNoStackCalls += 4;
   6694         C.setCallInfo(MachineOutlinerNoLRSave, 4);
   6695         CandidatesWithoutStackFixups.push_back(C);
   6696       }
   6697 
   6698       // Is an unused register available? If so, we won't modify the stack, so
   6699       // we can outline with the same frame type as those that don't save LR.
   6700       else if (findRegisterToSaveLRTo(C)) {
   6701         NumBytesNoStackCalls += 12;
   6702         C.setCallInfo(MachineOutlinerRegSave, 12);
   6703         CandidatesWithoutStackFixups.push_back(C);
   6704       }
   6705 
   6706       // Is SP used in the sequence at all? If not, we don't have to modify
   6707       // the stack, so we are guaranteed to get the same frame.
   6708       else if (C.UsedInSequence.available(AArch64::SP)) {
   6709         NumBytesNoStackCalls += 12;
   6710         C.setCallInfo(MachineOutlinerDefault, 12);
   6711         CandidatesWithoutStackFixups.push_back(C);
   6712       }
   6713 
   6714       // If we outline this, we need to modify the stack. Pretend we don't
   6715       // outline this by saving all of its bytes.
   6716       else {
   6717         NumBytesNoStackCalls += SequenceSize;
   6718       }
   6719     }
   6720 
   6721     // If there are no places where we have to save LR, then note that we
   6722     // don't have to update the stack. Otherwise, give every candidate the
   6723     // default call type, as long as it's safe to do so.
   6724     if (!AllStackInstrsSafe ||
   6725         NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
   6726       RepeatedSequenceLocs = CandidatesWithoutStackFixups;
   6727       FrameID = MachineOutlinerNoLRSave;
   6728     } else {
   6729       SetCandidateCallInfo(MachineOutlinerDefault, 12);
   6730 
   6731       // Bugzilla ID: 46767
   6732       // TODO: Check if fixing up the stack more than once is safe so we can
   6733       // outline these.
   6734       //
   6735       // An outline resulting in a caller that requires stack fixups at the
   6736       // callsite to a callee that also requires stack fixups can happen when
   6737       // there are no available registers at the candidate callsite for a
   6738       // candidate that itself also has calls.
   6739       //
   6740       // In other words if function_containing_sequence in the following pseudo
   6741       // assembly requires that we save LR at the point of the call, but there
   6742       // are no available registers: in this case we save using SP and as a
   6743       // result the SP offsets requires stack fixups by multiples of 16.
   6744       //
   6745       // function_containing_sequence:
   6746       //   ...
   6747       //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
   6748       //   call OUTLINED_FUNCTION_N
   6749       //   restore LR from SP
   6750       //   ...
   6751       //
   6752       // OUTLINED_FUNCTION_N:
   6753       //   save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
   6754       //   ...
   6755       //   bl foo
   6756       //   restore LR from SP
   6757       //   ret
   6758       //
   6759       // Because the code to handle more than one stack fixup does not
   6760       // currently have the proper checks for legality, these cases will assert
   6761       // in the AArch64 MachineOutliner. This is because the code to do this
   6762       // needs more hardening, testing, better checks that generated code is
   6763       // legal, etc and because it is only verified to handle a single pass of
   6764       // stack fixup.
   6765       //
   6766       // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
   6767       // these cases until they are known to be handled. Bugzilla 46767 is
   6768       // referenced in comments at the assert site.
   6769       //
   6770       // To avoid asserting (or generating non-legal code on noassert builds)
   6771       // we remove all candidates which would need more than one stack fixup by
   6772       // pruning the cases where the candidate has calls while also having no
   6773       // available LR and having no available general purpose registers to copy
   6774       // LR to (ie one extra stack save/restore).
   6775       //
   6776       if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
   6777         erase_if(RepeatedSequenceLocs, [this](outliner::Candidate &C) {
   6778           return (std::any_of(
   6779                      C.front(), std::next(C.back()),
   6780                      [](const MachineInstr &MI) { return MI.isCall(); })) &&
   6781                  (!C.LRU.available(AArch64::LR) || !findRegisterToSaveLRTo(C));
   6782         });
   6783       }
   6784     }
   6785 
   6786     // If we dropped all of the candidates, bail out here.
   6787     if (RepeatedSequenceLocs.size() < 2) {
   6788       RepeatedSequenceLocs.clear();
   6789       return outliner::OutlinedFunction();
   6790     }
   6791   }
   6792 
   6793   // Does every candidate's MBB contain a call? If so, then we might have a call
   6794   // in the range.
   6795   if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
   6796     // Check if the range contains a call. These require a save + restore of the
   6797     // link register.
   6798     bool ModStackToSaveLR = false;
   6799     if (std::any_of(FirstCand.front(), FirstCand.back(),
   6800                     [](const MachineInstr &MI) { return MI.isCall(); }))
   6801       ModStackToSaveLR = true;
   6802 
   6803     // Handle the last instruction separately. If this is a tail call, then the
   6804     // last instruction is a call. We don't want to save + restore in this case.
   6805     // However, it could be possible that the last instruction is a call without
   6806     // it being valid to tail call this sequence. We should consider this as
   6807     // well.
   6808     else if (FrameID != MachineOutlinerThunk &&
   6809              FrameID != MachineOutlinerTailCall && FirstCand.back()->isCall())
   6810       ModStackToSaveLR = true;
   6811 
   6812     if (ModStackToSaveLR) {
   6813       // We can't fix up the stack. Bail out.
   6814       if (!AllStackInstrsSafe) {
   6815         RepeatedSequenceLocs.clear();
   6816         return outliner::OutlinedFunction();
   6817       }
   6818 
   6819       // Save + restore LR.
   6820       NumBytesToCreateFrame += 8;
   6821     }
   6822   }
   6823 
   6824   // If we have CFI instructions, we can only outline if the outlined section
   6825   // can be a tail call
   6826   if (FrameID != MachineOutlinerTailCall && CFICount > 0)
   6827     return outliner::OutlinedFunction();
   6828 
   6829   return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
   6830                                     NumBytesToCreateFrame, FrameID);
   6831 }
   6832 
   6833 bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
   6834     MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
   6835   const Function &F = MF.getFunction();
   6836 
   6837   // Can F be deduplicated by the linker? If it can, don't outline from it.
   6838   if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
   6839     return false;
   6840 
   6841   // Don't outline from functions with section markings; the program could
   6842   // expect that all the code is in the named section.
   6843   // FIXME: Allow outlining from multiple functions with the same section
   6844   // marking.
   6845   if (F.hasSection())
   6846     return false;
   6847 
   6848   // Outlining from functions with redzones is unsafe since the outliner may
   6849   // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
   6850   // outline from it.
   6851   AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
   6852   if (!AFI || AFI->hasRedZone().getValueOr(true))
   6853     return false;
   6854 
   6855   // FIXME: Teach the outliner to generate/handle Windows unwind info.
   6856   if (MF.getTarget().getMCAsmInfo()->usesWindowsCFI())
   6857     return false;
   6858 
   6859   // It's safe to outline from MF.
   6860   return true;
   6861 }
   6862 
   6863 bool AArch64InstrInfo::isMBBSafeToOutlineFrom(MachineBasicBlock &MBB,
   6864                                               unsigned &Flags) const {
   6865   // Check if LR is available through all of the MBB. If it's not, then set
   6866   // a flag.
   6867   assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
   6868          "Suitable Machine Function for outlining must track liveness");
   6869   LiveRegUnits LRU(getRegisterInfo());
   6870 
   6871   std::for_each(MBB.rbegin(), MBB.rend(),
   6872                 [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
   6873 
   6874   // Check if each of the unsafe registers are available...
   6875   bool W16AvailableInBlock = LRU.available(AArch64::W16);
   6876   bool W17AvailableInBlock = LRU.available(AArch64::W17);
   6877   bool NZCVAvailableInBlock = LRU.available(AArch64::NZCV);
   6878 
   6879   // If all of these are dead (and not live out), we know we don't have to check
   6880   // them later.
   6881   if (W16AvailableInBlock && W17AvailableInBlock && NZCVAvailableInBlock)
   6882     Flags |= MachineOutlinerMBBFlags::UnsafeRegsDead;
   6883 
   6884   // Now, add the live outs to the set.
   6885   LRU.addLiveOuts(MBB);
   6886 
   6887   // If any of these registers is available in the MBB, but also a live out of
   6888   // the block, then we know outlining is unsafe.
   6889   if (W16AvailableInBlock && !LRU.available(AArch64::W16))
   6890     return false;
   6891   if (W17AvailableInBlock && !LRU.available(AArch64::W17))
   6892     return false;
   6893   if (NZCVAvailableInBlock && !LRU.available(AArch64::NZCV))
   6894     return false;
   6895 
   6896   // Check if there's a call inside this MachineBasicBlock. If there is, then
   6897   // set a flag.
   6898   if (any_of(MBB, [](MachineInstr &MI) { return MI.isCall(); }))
   6899     Flags |= MachineOutlinerMBBFlags::HasCalls;
   6900 
   6901   MachineFunction *MF = MBB.getParent();
   6902 
   6903   // In the event that we outline, we may have to save LR. If there is an
   6904   // available register in the MBB, then we'll always save LR there. Check if
   6905   // this is true.
   6906   bool CanSaveLR = false;
   6907   const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
   6908       MF->getSubtarget().getRegisterInfo());
   6909 
   6910   // Check if there is an available register across the sequence that we can
   6911   // use.
   6912   for (unsigned Reg : AArch64::GPR64RegClass) {
   6913     if (!ARI->isReservedReg(*MF, Reg) && Reg != AArch64::LR &&
   6914         Reg != AArch64::X16 && Reg != AArch64::X17 && LRU.available(Reg)) {
   6915       CanSaveLR = true;
   6916       break;
   6917     }
   6918   }
   6919 
   6920   // Check if we have a register we can save LR to, and if LR was used
   6921   // somewhere. If both of those things are true, then we need to evaluate the
   6922   // safety of outlining stack instructions later.
   6923   if (!CanSaveLR && !LRU.available(AArch64::LR))
   6924     Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
   6925 
   6926   return true;
   6927 }
   6928 
   6929 outliner::InstrType
   6930 AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
   6931                                    unsigned Flags) const {
   6932   MachineInstr &MI = *MIT;
   6933   MachineBasicBlock *MBB = MI.getParent();
   6934   MachineFunction *MF = MBB->getParent();
   6935   AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
   6936 
   6937   // Don't outline anything used for return address signing. The outlined
   6938   // function will get signed later if needed
   6939   switch (MI.getOpcode()) {
   6940   case AArch64::PACIASP:
   6941   case AArch64::PACIBSP:
   6942   case AArch64::AUTIASP:
   6943   case AArch64::AUTIBSP:
   6944   case AArch64::RETAA:
   6945   case AArch64::RETAB:
   6946   case AArch64::EMITBKEY:
   6947     return outliner::InstrType::Illegal;
   6948   }
   6949 
   6950   // Don't outline LOHs.
   6951   if (FuncInfo->getLOHRelated().count(&MI))
   6952     return outliner::InstrType::Illegal;
   6953 
   6954   // We can only outline these if we will tail call the outlined function, or
   6955   // fix up the CFI offsets. Currently, CFI instructions are outlined only if
   6956   // in a tail call.
   6957   //
   6958   // FIXME: If the proper fixups for the offset are implemented, this should be
   6959   // possible.
   6960   if (MI.isCFIInstruction())
   6961     return outliner::InstrType::Legal;
   6962 
   6963   // Don't allow debug values to impact outlining type.
   6964   if (MI.isDebugInstr() || MI.isIndirectDebugValue())
   6965     return outliner::InstrType::Invisible;
   6966 
   6967   // At this point, KILL instructions don't really tell us much so we can go
   6968   // ahead and skip over them.
   6969   if (MI.isKill())
   6970     return outliner::InstrType::Invisible;
   6971 
   6972   // Is this a terminator for a basic block?
   6973   if (MI.isTerminator()) {
   6974 
   6975     // Is this the end of a function?
   6976     if (MI.getParent()->succ_empty())
   6977       return outliner::InstrType::Legal;
   6978 
   6979     // It's not, so don't outline it.
   6980     return outliner::InstrType::Illegal;
   6981   }
   6982 
   6983   // Make sure none of the operands are un-outlinable.
   6984   for (const MachineOperand &MOP : MI.operands()) {
   6985     if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
   6986         MOP.isTargetIndex())
   6987       return outliner::InstrType::Illegal;
   6988 
   6989     // If it uses LR or W30 explicitly, then don't touch it.
   6990     if (MOP.isReg() && !MOP.isImplicit() &&
   6991         (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
   6992       return outliner::InstrType::Illegal;
   6993   }
   6994 
   6995   // Special cases for instructions that can always be outlined, but will fail
   6996   // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
   6997   // be outlined because they don't require a *specific* value to be in LR.
   6998   if (MI.getOpcode() == AArch64::ADRP)
   6999     return outliner::InstrType::Legal;
   7000 
   7001   // If MI is a call we might be able to outline it. We don't want to outline
   7002   // any calls that rely on the position of items on the stack. When we outline
   7003   // something containing a call, we have to emit a save and restore of LR in
   7004   // the outlined function. Currently, this always happens by saving LR to the
   7005   // stack. Thus, if we outline, say, half the parameters for a function call
   7006   // plus the call, then we'll break the callee's expectations for the layout
   7007   // of the stack.
   7008   //
   7009   // FIXME: Allow calls to functions which construct a stack frame, as long
   7010   // as they don't access arguments on the stack.
   7011   // FIXME: Figure out some way to analyze functions defined in other modules.
   7012   // We should be able to compute the memory usage based on the IR calling
   7013   // convention, even if we can't see the definition.
   7014   if (MI.isCall()) {
   7015     // Get the function associated with the call. Look at each operand and find
   7016     // the one that represents the callee and get its name.
   7017     const Function *Callee = nullptr;
   7018     for (const MachineOperand &MOP : MI.operands()) {
   7019       if (MOP.isGlobal()) {
   7020         Callee = dyn_cast<Function>(MOP.getGlobal());
   7021         break;
   7022       }
   7023     }
   7024 
   7025     // Never outline calls to mcount.  There isn't any rule that would require
   7026     // this, but the Linux kernel's "ftrace" feature depends on it.
   7027     if (Callee && Callee->getName() == "\01_mcount")
   7028       return outliner::InstrType::Illegal;
   7029 
   7030     // If we don't know anything about the callee, assume it depends on the
   7031     // stack layout of the caller. In that case, it's only legal to outline
   7032     // as a tail-call. Explicitly list the call instructions we know about so we
   7033     // don't get unexpected results with call pseudo-instructions.
   7034     auto UnknownCallOutlineType = outliner::InstrType::Illegal;
   7035     if (MI.getOpcode() == AArch64::BLR ||
   7036         MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
   7037       UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
   7038 
   7039     if (!Callee)
   7040       return UnknownCallOutlineType;
   7041 
   7042     // We have a function we have information about. Check it if it's something
   7043     // can safely outline.
   7044     MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
   7045 
   7046     // We don't know what's going on with the callee at all. Don't touch it.
   7047     if (!CalleeMF)
   7048       return UnknownCallOutlineType;
   7049 
   7050     // Check if we know anything about the callee saves on the function. If we
   7051     // don't, then don't touch it, since that implies that we haven't
   7052     // computed anything about its stack frame yet.
   7053     MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
   7054     if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
   7055         MFI.getNumObjects() > 0)
   7056       return UnknownCallOutlineType;
   7057 
   7058     // At this point, we can say that CalleeMF ought to not pass anything on the
   7059     // stack. Therefore, we can outline it.
   7060     return outliner::InstrType::Legal;
   7061   }
   7062 
   7063   // Don't outline positions.
   7064   if (MI.isPosition())
   7065     return outliner::InstrType::Illegal;
   7066 
   7067   // Don't touch the link register or W30.
   7068   if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
   7069       MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
   7070     return outliner::InstrType::Illegal;
   7071 
   7072   // Don't outline BTI instructions, because that will prevent the outlining
   7073   // site from being indirectly callable.
   7074   if (MI.getOpcode() == AArch64::HINT) {
   7075     int64_t Imm = MI.getOperand(0).getImm();
   7076     if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
   7077       return outliner::InstrType::Illegal;
   7078   }
   7079 
   7080   return outliner::InstrType::Legal;
   7081 }
   7082 
   7083 void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
   7084   for (MachineInstr &MI : MBB) {
   7085     const MachineOperand *Base;
   7086     unsigned Width;
   7087     int64_t Offset;
   7088     bool OffsetIsScalable;
   7089 
   7090     // Is this a load or store with an immediate offset with SP as the base?
   7091     if (!MI.mayLoadOrStore() ||
   7092         !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
   7093                                       &RI) ||
   7094         (Base->isReg() && Base->getReg() != AArch64::SP))
   7095       continue;
   7096 
   7097     // It is, so we have to fix it up.
   7098     TypeSize Scale(0U, false);
   7099     int64_t Dummy1, Dummy2;
   7100 
   7101     MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
   7102     assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
   7103     getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
   7104     assert(Scale != 0 && "Unexpected opcode!");
   7105     assert(!OffsetIsScalable && "Expected offset to be a byte offset");
   7106 
   7107     // We've pushed the return address to the stack, so add 16 to the offset.
   7108     // This is safe, since we already checked if it would overflow when we
   7109     // checked if this instruction was legal to outline.
   7110     int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedSize();
   7111     StackOffsetOperand.setImm(NewImm);
   7112   }
   7113 }
   7114 
   7115 static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB,
   7116                                  bool ShouldSignReturnAddr,
   7117                                  bool ShouldSignReturnAddrWithAKey) {
   7118   if (ShouldSignReturnAddr) {
   7119     MachineBasicBlock::iterator MBBPAC = MBB.begin();
   7120     MachineBasicBlock::iterator MBBAUT = MBB.getFirstTerminator();
   7121     const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
   7122     const TargetInstrInfo *TII = Subtarget.getInstrInfo();
   7123     DebugLoc DL;
   7124 
   7125     if (MBBAUT != MBB.end())
   7126       DL = MBBAUT->getDebugLoc();
   7127 
   7128     // At the very beginning of the basic block we insert the following
   7129     // depending on the key type
   7130     //
   7131     // a_key:                   b_key:
   7132     //    PACIASP                   EMITBKEY
   7133     //    CFI_INSTRUCTION           PACIBSP
   7134     //                              CFI_INSTRUCTION
   7135     if (ShouldSignReturnAddrWithAKey) {
   7136       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIASP))
   7137           .setMIFlag(MachineInstr::FrameSetup);
   7138     } else {
   7139       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::EMITBKEY))
   7140           .setMIFlag(MachineInstr::FrameSetup);
   7141       BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::PACIBSP))
   7142           .setMIFlag(MachineInstr::FrameSetup);
   7143     }
   7144     unsigned CFIIndex =
   7145         MF.addFrameInst(MCCFIInstruction::createNegateRAState(nullptr));
   7146     BuildMI(MBB, MBBPAC, DebugLoc(), TII->get(AArch64::CFI_INSTRUCTION))
   7147         .addCFIIndex(CFIIndex)
   7148         .setMIFlags(MachineInstr::FrameSetup);
   7149 
   7150     // If v8.3a features are available we can replace a RET instruction by
   7151     // RETAA or RETAB and omit the AUT instructions
   7152     if (Subtarget.hasPAuth() && MBBAUT != MBB.end() &&
   7153         MBBAUT->getOpcode() == AArch64::RET) {
   7154       BuildMI(MBB, MBBAUT, DL,
   7155               TII->get(ShouldSignReturnAddrWithAKey ? AArch64::RETAA
   7156                                                     : AArch64::RETAB))
   7157           .copyImplicitOps(*MBBAUT);
   7158       MBB.erase(MBBAUT);
   7159     } else {
   7160       BuildMI(MBB, MBBAUT, DL,
   7161               TII->get(ShouldSignReturnAddrWithAKey ? AArch64::AUTIASP
   7162                                                     : AArch64::AUTIBSP))
   7163           .setMIFlag(MachineInstr::FrameDestroy);
   7164     }
   7165   }
   7166 }
   7167 
   7168 void AArch64InstrInfo::buildOutlinedFrame(
   7169     MachineBasicBlock &MBB, MachineFunction &MF,
   7170     const outliner::OutlinedFunction &OF) const {
   7171 
   7172   AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
   7173 
   7174   if (OF.FrameConstructionID == MachineOutlinerTailCall)
   7175     FI->setOutliningStyle("Tail Call");
   7176   else if (OF.FrameConstructionID == MachineOutlinerThunk) {
   7177     // For thunk outlining, rewrite the last instruction from a call to a
   7178     // tail-call.
   7179     MachineInstr *Call = &*--MBB.instr_end();
   7180     unsigned TailOpcode;
   7181     if (Call->getOpcode() == AArch64::BL) {
   7182       TailOpcode = AArch64::TCRETURNdi;
   7183     } else {
   7184       assert(Call->getOpcode() == AArch64::BLR ||
   7185              Call->getOpcode() == AArch64::BLRNoIP);
   7186       TailOpcode = AArch64::TCRETURNriALL;
   7187     }
   7188     MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
   7189                            .add(Call->getOperand(0))
   7190                            .addImm(0);
   7191     MBB.insert(MBB.end(), TC);
   7192     Call->eraseFromParent();
   7193 
   7194     FI->setOutliningStyle("Thunk");
   7195   }
   7196 
   7197   bool IsLeafFunction = true;
   7198 
   7199   // Is there a call in the outlined range?
   7200   auto IsNonTailCall = [](const MachineInstr &MI) {
   7201     return MI.isCall() && !MI.isReturn();
   7202   };
   7203 
   7204   if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
   7205     // Fix up the instructions in the range, since we're going to modify the
   7206     // stack.
   7207 
   7208     // Bugzilla ID: 46767
   7209     // TODO: Check if fixing up twice is safe so we can outline these.
   7210     assert(OF.FrameConstructionID != MachineOutlinerDefault &&
   7211            "Can only fix up stack references once");
   7212     fixupPostOutline(MBB);
   7213 
   7214     IsLeafFunction = false;
   7215 
   7216     // LR has to be a live in so that we can save it.
   7217     if (!MBB.isLiveIn(AArch64::LR))
   7218       MBB.addLiveIn(AArch64::LR);
   7219 
   7220     MachineBasicBlock::iterator It = MBB.begin();
   7221     MachineBasicBlock::iterator Et = MBB.end();
   7222 
   7223     if (OF.FrameConstructionID == MachineOutlinerTailCall ||
   7224         OF.FrameConstructionID == MachineOutlinerThunk)
   7225       Et = std::prev(MBB.end());
   7226 
   7227     // Insert a save before the outlined region
   7228     MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
   7229                                 .addReg(AArch64::SP, RegState::Define)
   7230                                 .addReg(AArch64::LR)
   7231                                 .addReg(AArch64::SP)
   7232                                 .addImm(-16);
   7233     It = MBB.insert(It, STRXpre);
   7234 
   7235     const TargetSubtargetInfo &STI = MF.getSubtarget();
   7236     const MCRegisterInfo *MRI = STI.getRegisterInfo();
   7237     unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
   7238 
   7239     // Add a CFI saying the stack was moved 16 B down.
   7240     int64_t StackPosEntry =
   7241         MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, 16));
   7242     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
   7243         .addCFIIndex(StackPosEntry)
   7244         .setMIFlags(MachineInstr::FrameSetup);
   7245 
   7246     // Add a CFI saying that the LR that we want to find is now 16 B higher than
   7247     // before.
   7248     int64_t LRPosEntry =
   7249         MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
   7250     BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
   7251         .addCFIIndex(LRPosEntry)
   7252         .setMIFlags(MachineInstr::FrameSetup);
   7253 
   7254     // Insert a restore before the terminator for the function.
   7255     MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
   7256                                  .addReg(AArch64::SP, RegState::Define)
   7257                                  .addReg(AArch64::LR, RegState::Define)
   7258                                  .addReg(AArch64::SP)
   7259                                  .addImm(16);
   7260     Et = MBB.insert(Et, LDRXpost);
   7261   }
   7262 
   7263   // If a bunch of candidates reach this point they must agree on their return
   7264   // address signing. It is therefore enough to just consider the signing
   7265   // behaviour of one of them
   7266   const auto &MFI = *OF.Candidates.front().getMF()->getInfo<AArch64FunctionInfo>();
   7267   bool ShouldSignReturnAddr = MFI.shouldSignReturnAddress(!IsLeafFunction);
   7268 
   7269   // a_key is the default
   7270   bool ShouldSignReturnAddrWithAKey = !MFI.shouldSignWithBKey();
   7271 
   7272   // If this is a tail call outlined function, then there's already a return.
   7273   if (OF.FrameConstructionID == MachineOutlinerTailCall ||
   7274       OF.FrameConstructionID == MachineOutlinerThunk) {
   7275     signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
   7276                          ShouldSignReturnAddrWithAKey);
   7277     return;
   7278   }
   7279 
   7280   // It's not a tail call, so we have to insert the return ourselves.
   7281 
   7282   // LR has to be a live in so that we can return to it.
   7283   if (!MBB.isLiveIn(AArch64::LR))
   7284     MBB.addLiveIn(AArch64::LR);
   7285 
   7286   MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
   7287                           .addReg(AArch64::LR);
   7288   MBB.insert(MBB.end(), ret);
   7289 
   7290   signOutlinedFunction(MF, MBB, ShouldSignReturnAddr,
   7291                        ShouldSignReturnAddrWithAKey);
   7292 
   7293   FI->setOutliningStyle("Function");
   7294 
   7295   // Did we have to modify the stack by saving the link register?
   7296   if (OF.FrameConstructionID != MachineOutlinerDefault)
   7297     return;
   7298 
   7299   // We modified the stack.
   7300   // Walk over the basic block and fix up all the stack accesses.
   7301   fixupPostOutline(MBB);
   7302 }
   7303 
   7304 MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
   7305     Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
   7306     MachineFunction &MF, const outliner::Candidate &C) const {
   7307 
   7308   // Are we tail calling?
   7309   if (C.CallConstructionID == MachineOutlinerTailCall) {
   7310     // If yes, then we can just branch to the label.
   7311     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
   7312                             .addGlobalAddress(M.getNamedValue(MF.getName()))
   7313                             .addImm(0));
   7314     return It;
   7315   }
   7316 
   7317   // Are we saving the link register?
   7318   if (C.CallConstructionID == MachineOutlinerNoLRSave ||
   7319       C.CallConstructionID == MachineOutlinerThunk) {
   7320     // No, so just insert the call.
   7321     It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
   7322                             .addGlobalAddress(M.getNamedValue(MF.getName())));
   7323     return It;
   7324   }
   7325 
   7326   // We want to return the spot where we inserted the call.
   7327   MachineBasicBlock::iterator CallPt;
   7328 
   7329   // Instructions for saving and restoring LR around the call instruction we're
   7330   // going to insert.
   7331   MachineInstr *Save;
   7332   MachineInstr *Restore;
   7333   // Can we save to a register?
   7334   if (C.CallConstructionID == MachineOutlinerRegSave) {
   7335     // FIXME: This logic should be sunk into a target-specific interface so that
   7336     // we don't have to recompute the register.
   7337     unsigned Reg = findRegisterToSaveLRTo(C);
   7338     assert(Reg != 0 && "No callee-saved register available?");
   7339 
   7340     // Save and restore LR from that register.
   7341     Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
   7342                .addReg(AArch64::XZR)
   7343                .addReg(AArch64::LR)
   7344                .addImm(0);
   7345     Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
   7346                 .addReg(AArch64::XZR)
   7347                 .addReg(Reg)
   7348                 .addImm(0);
   7349   } else {
   7350     // We have the default case. Save and restore from SP.
   7351     Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
   7352                .addReg(AArch64::SP, RegState::Define)
   7353                .addReg(AArch64::LR)
   7354                .addReg(AArch64::SP)
   7355                .addImm(-16);
   7356     Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
   7357                   .addReg(AArch64::SP, RegState::Define)
   7358                   .addReg(AArch64::LR, RegState::Define)
   7359                   .addReg(AArch64::SP)
   7360                   .addImm(16);
   7361   }
   7362 
   7363   It = MBB.insert(It, Save);
   7364   It++;
   7365 
   7366   // Insert the call.
   7367   It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
   7368                           .addGlobalAddress(M.getNamedValue(MF.getName())));
   7369   CallPt = It;
   7370   It++;
   7371 
   7372   It = MBB.insert(It, Restore);
   7373   return CallPt;
   7374 }
   7375 
   7376 bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
   7377   MachineFunction &MF) const {
   7378   return MF.getFunction().hasMinSize();
   7379 }
   7380 
   7381 Optional<DestSourcePair>
   7382 AArch64InstrInfo::isCopyInstrImpl(const MachineInstr &MI) const {
   7383 
   7384   // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
   7385   // and zero immediate operands used as an alias for mov instruction.
   7386   if (MI.getOpcode() == AArch64::ORRWrs &&
   7387       MI.getOperand(1).getReg() == AArch64::WZR &&
   7388       MI.getOperand(3).getImm() == 0x0) {
   7389     return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
   7390   }
   7391 
   7392   if (MI.getOpcode() == AArch64::ORRXrs &&
   7393       MI.getOperand(1).getReg() == AArch64::XZR &&
   7394       MI.getOperand(3).getImm() == 0x0) {
   7395     return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
   7396   }
   7397 
   7398   return None;
   7399 }
   7400 
   7401 Optional<RegImmPair> AArch64InstrInfo::isAddImmediate(const MachineInstr &MI,
   7402                                                       Register Reg) const {
   7403   int Sign = 1;
   7404   int64_t Offset = 0;
   7405 
   7406   // TODO: Handle cases where Reg is a super- or sub-register of the
   7407   // destination register.
   7408   const MachineOperand &Op0 = MI.getOperand(0);
   7409   if (!Op0.isReg() || Reg != Op0.getReg())
   7410     return None;
   7411 
   7412   switch (MI.getOpcode()) {
   7413   default:
   7414     return None;
   7415   case AArch64::SUBWri:
   7416   case AArch64::SUBXri:
   7417   case AArch64::SUBSWri:
   7418   case AArch64::SUBSXri:
   7419     Sign *= -1;
   7420     LLVM_FALLTHROUGH;
   7421   case AArch64::ADDSWri:
   7422   case AArch64::ADDSXri:
   7423   case AArch64::ADDWri:
   7424   case AArch64::ADDXri: {
   7425     // TODO: Third operand can be global address (usually some string).
   7426     if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
   7427         !MI.getOperand(2).isImm())
   7428       return None;
   7429     int Shift = MI.getOperand(3).getImm();
   7430     assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
   7431     Offset = Sign * (MI.getOperand(2).getImm() << Shift);
   7432   }
   7433   }
   7434   return RegImmPair{MI.getOperand(1).getReg(), Offset};
   7435 }
   7436 
   7437 /// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
   7438 /// the destination register then, if possible, describe the value in terms of
   7439 /// the source register.
   7440 static Optional<ParamLoadedValue>
   7441 describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg,
   7442                        const TargetInstrInfo *TII,
   7443                        const TargetRegisterInfo *TRI) {
   7444   auto DestSrc = TII->isCopyInstr(MI);
   7445   if (!DestSrc)
   7446     return None;
   7447 
   7448   Register DestReg = DestSrc->Destination->getReg();
   7449   Register SrcReg = DestSrc->Source->getReg();
   7450 
   7451   auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
   7452 
   7453   // If the described register is the destination, just return the source.
   7454   if (DestReg == DescribedReg)
   7455     return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
   7456 
   7457   // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
   7458   if (MI.getOpcode() == AArch64::ORRWrs &&
   7459       TRI->isSuperRegister(DestReg, DescribedReg))
   7460     return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
   7461 
   7462   // We may need to describe the lower part of a ORRXrs move.
   7463   if (MI.getOpcode() == AArch64::ORRXrs &&
   7464       TRI->isSubRegister(DestReg, DescribedReg)) {
   7465     Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
   7466     return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
   7467   }
   7468 
   7469   assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
   7470          "Unhandled ORR[XW]rs copy case");
   7471 
   7472   return None;
   7473 }
   7474 
   7475 Optional<ParamLoadedValue>
   7476 AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
   7477                                       Register Reg) const {
   7478   const MachineFunction *MF = MI.getMF();
   7479   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
   7480   switch (MI.getOpcode()) {
   7481   case AArch64::MOVZWi:
   7482   case AArch64::MOVZXi: {
   7483     // MOVZWi may be used for producing zero-extended 32-bit immediates in
   7484     // 64-bit parameters, so we need to consider super-registers.
   7485     if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
   7486       return None;
   7487 
   7488     if (!MI.getOperand(1).isImm())
   7489       return None;
   7490     int64_t Immediate = MI.getOperand(1).getImm();
   7491     int Shift = MI.getOperand(2).getImm();
   7492     return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
   7493                             nullptr);
   7494   }
   7495   case AArch64::ORRWrs:
   7496   case AArch64::ORRXrs:
   7497     return describeORRLoadedValue(MI, Reg, this, TRI);
   7498   }
   7499 
   7500   return TargetInstrInfo::describeLoadedValue(MI, Reg);
   7501 }
   7502 
   7503 bool AArch64InstrInfo::isExtendLikelyToBeFolded(
   7504     MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
   7505   assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
   7506          ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
   7507          ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
   7508 
   7509   // Anyexts are nops.
   7510   if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
   7511     return true;
   7512 
   7513   Register DefReg = ExtMI.getOperand(0).getReg();
   7514   if (!MRI.hasOneNonDBGUse(DefReg))
   7515     return false;
   7516 
   7517   // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
   7518   // addressing mode.
   7519   auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
   7520   return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
   7521 }
   7522 
   7523 uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
   7524   return get(Opc).TSFlags & AArch64::ElementSizeMask;
   7525 }
   7526 
   7527 bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
   7528   return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
   7529 }
   7530 
   7531 bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
   7532   return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
   7533 }
   7534 
   7535 unsigned int
   7536 AArch64InstrInfo::getTailDuplicateSize(CodeGenOpt::Level OptLevel) const {
   7537   return OptLevel >= CodeGenOpt::Aggressive ? 6 : 2;
   7538 }
   7539 
   7540 unsigned llvm::getBLRCallOpcode(const MachineFunction &MF) {
   7541   if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
   7542     return AArch64::BLRNoIP;
   7543   else
   7544     return AArch64::BLR;
   7545 }
   7546 
   7547 #define GET_INSTRINFO_HELPERS
   7548 #define GET_INSTRMAP_INFO
   7549 #include "AArch64GenInstrInfo.inc"
   7550