Home | History | Annotate | Line # | Download | only in ARM
      1 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file contains a pass that expands pseudo instructions into target
     10 // instructions to allow proper scheduling, if-conversion, and other late
     11 // optimizations. This pass should be run after register allocation but before
     12 // the post-regalloc scheduling pass.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "ARM.h"
     17 #include "ARMBaseInstrInfo.h"
     18 #include "ARMBaseRegisterInfo.h"
     19 #include "ARMConstantPoolValue.h"
     20 #include "ARMMachineFunctionInfo.h"
     21 #include "ARMSubtarget.h"
     22 #include "MCTargetDesc/ARMAddressingModes.h"
     23 #include "llvm/CodeGen/LivePhysRegs.h"
     24 #include "llvm/CodeGen/MachineFrameInfo.h"
     25 #include "llvm/CodeGen/MachineFunctionPass.h"
     26 #include "llvm/Support/Debug.h"
     27 
     28 using namespace llvm;
     29 
     30 #define DEBUG_TYPE "arm-pseudo"
     31 
     32 static cl::opt<bool>
     33 VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
     34                 cl::desc("Verify machine code after expanding ARM pseudos"));
     35 
     36 #define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass"
     37 
     38 namespace {
     39   class ARMExpandPseudo : public MachineFunctionPass {
     40   public:
     41     static char ID;
     42     ARMExpandPseudo() : MachineFunctionPass(ID) {}
     43 
     44     const ARMBaseInstrInfo *TII;
     45     const TargetRegisterInfo *TRI;
     46     const ARMSubtarget *STI;
     47     ARMFunctionInfo *AFI;
     48 
     49     bool runOnMachineFunction(MachineFunction &Fn) override;
     50 
     51     MachineFunctionProperties getRequiredProperties() const override {
     52       return MachineFunctionProperties().set(
     53           MachineFunctionProperties::Property::NoVRegs);
     54     }
     55 
     56     StringRef getPassName() const override {
     57       return ARM_EXPAND_PSEUDO_NAME;
     58     }
     59 
     60   private:
     61     void TransferImpOps(MachineInstr &OldMI,
     62                         MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
     63     bool ExpandMI(MachineBasicBlock &MBB,
     64                   MachineBasicBlock::iterator MBBI,
     65                   MachineBasicBlock::iterator &NextMBBI);
     66     bool ExpandMBB(MachineBasicBlock &MBB);
     67     void ExpandVLD(MachineBasicBlock::iterator &MBBI);
     68     void ExpandVST(MachineBasicBlock::iterator &MBBI);
     69     void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
     70     void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
     71                     unsigned Opc, bool IsExt);
     72     void ExpandMOV32BitImm(MachineBasicBlock &MBB,
     73                            MachineBasicBlock::iterator &MBBI);
     74     void CMSEClearGPRegs(MachineBasicBlock &MBB,
     75                          MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
     76                          const SmallVectorImpl<unsigned> &ClearRegs,
     77                          unsigned ClobberReg);
     78     MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB,
     79                                        MachineBasicBlock::iterator MBBI);
     80     MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB,
     81                                          MachineBasicBlock::iterator MBBI,
     82                                          const BitVector &ClearRegs);
     83     MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB,
     84                                           MachineBasicBlock::iterator MBBI,
     85                                           const BitVector &ClearRegs);
     86     void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
     87                              MachineBasicBlock::iterator MBBI, DebugLoc &DL,
     88                              const LivePhysRegs &LiveRegs,
     89                              SmallVectorImpl<unsigned> &AvailableRegs);
     90     void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
     91                                MachineBasicBlock::iterator MBBI, DebugLoc &DL,
     92                                const LivePhysRegs &LiveRegs,
     93                                SmallVectorImpl<unsigned> &ScratchRegs);
     94     void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
     95                                 MachineBasicBlock::iterator MBBI, DebugLoc &DL,
     96                                 const LivePhysRegs &LiveRegs);
     97     void CMSERestoreFPRegs(MachineBasicBlock &MBB,
     98                            MachineBasicBlock::iterator MBBI, DebugLoc &DL,
     99                            SmallVectorImpl<unsigned> &AvailableRegs);
    100     void CMSERestoreFPRegsV8(MachineBasicBlock &MBB,
    101                              MachineBasicBlock::iterator MBBI, DebugLoc &DL,
    102                              SmallVectorImpl<unsigned> &AvailableRegs);
    103     void CMSERestoreFPRegsV81(MachineBasicBlock &MBB,
    104                               MachineBasicBlock::iterator MBBI, DebugLoc &DL,
    105                               SmallVectorImpl<unsigned> &AvailableRegs);
    106     bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
    107                         MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
    108                         unsigned StrexOp, unsigned UxtOp,
    109                         MachineBasicBlock::iterator &NextMBBI);
    110 
    111     bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
    112                            MachineBasicBlock::iterator MBBI,
    113                            MachineBasicBlock::iterator &NextMBBI);
    114   };
    115   char ARMExpandPseudo::ID = 0;
    116 }
    117 
    118 INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false,
    119                 false)
    120 
    121 /// TransferImpOps - Transfer implicit operands on the pseudo instruction to
    122 /// the instructions created from the expansion.
    123 void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
    124                                      MachineInstrBuilder &UseMI,
    125                                      MachineInstrBuilder &DefMI) {
    126   const MCInstrDesc &Desc = OldMI.getDesc();
    127   for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
    128        i != e; ++i) {
    129     const MachineOperand &MO = OldMI.getOperand(i);
    130     assert(MO.isReg() && MO.getReg());
    131     if (MO.isUse())
    132       UseMI.add(MO);
    133     else
    134       DefMI.add(MO);
    135   }
    136 }
    137 
    138 namespace {
    139   // Constants for register spacing in NEON load/store instructions.
    140   // For quad-register load-lane and store-lane pseudo instructors, the
    141   // spacing is initially assumed to be EvenDblSpc, and that is changed to
    142   // OddDblSpc depending on the lane number operand.
    143   enum NEONRegSpacing {
    144     SingleSpc,
    145     SingleLowSpc ,  // Single spacing, low registers, three and four vectors.
    146     SingleHighQSpc, // Single spacing, high registers, four vectors.
    147     SingleHighTSpc, // Single spacing, high registers, three vectors.
    148     EvenDblSpc,
    149     OddDblSpc
    150   };
    151 
    152   // Entries for NEON load/store information table.  The table is sorted by
    153   // PseudoOpc for fast binary-search lookups.
    154   struct NEONLdStTableEntry {
    155     uint16_t PseudoOpc;
    156     uint16_t RealOpc;
    157     bool IsLoad;
    158     bool isUpdating;
    159     bool hasWritebackOperand;
    160     uint8_t RegSpacing; // One of type NEONRegSpacing
    161     uint8_t NumRegs; // D registers loaded or stored
    162     uint8_t RegElts; // elements per D register; used for lane ops
    163     // FIXME: Temporary flag to denote whether the real instruction takes
    164     // a single register (like the encoding) or all of the registers in
    165     // the list (like the asm syntax and the isel DAG). When all definitions
    166     // are converted to take only the single encoded register, this will
    167     // go away.
    168     bool copyAllListRegs;
    169 
    170     // Comparison methods for binary search of the table.
    171     bool operator<(const NEONLdStTableEntry &TE) const {
    172       return PseudoOpc < TE.PseudoOpc;
    173     }
    174     friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
    175       return TE.PseudoOpc < PseudoOpc;
    176     }
    177     friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
    178                                                 const NEONLdStTableEntry &TE) {
    179       return PseudoOpc < TE.PseudoOpc;
    180     }
    181   };
    182 }
    183 
    184 static const NEONLdStTableEntry NEONLdStTable[] = {
    185 { ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, false, EvenDblSpc, 1, 4 ,true},
    186 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true,  EvenDblSpc, 1, 4 ,true},
    187 { ARM::VLD1LNq32Pseudo,     ARM::VLD1LNd32,     true, false, false, EvenDblSpc, 1, 2 ,true},
    188 { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true,  EvenDblSpc, 1, 2 ,true},
    189 { ARM::VLD1LNq8Pseudo,      ARM::VLD1LNd8,      true, false, false, EvenDblSpc, 1, 8 ,true},
    190 { ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD, true, true, true,  EvenDblSpc, 1, 8 ,true},
    191 
    192 { ARM::VLD1d16QPseudo,      ARM::VLD1d16Q,     true,  false, false, SingleSpc,  4, 4 ,false},
    193 { ARM::VLD1d16TPseudo,      ARM::VLD1d16T,     true,  false, false, SingleSpc,  3, 4 ,false},
    194 { ARM::VLD1d32QPseudo,      ARM::VLD1d32Q,     true,  false, false, SingleSpc,  4, 2 ,false},
    195 { ARM::VLD1d32TPseudo,      ARM::VLD1d32T,     true,  false, false, SingleSpc,  3, 2 ,false},
    196 { ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, false, SingleSpc,  4, 1 ,false},
    197 { ARM::VLD1d64QPseudoWB_fixed,  ARM::VLD1d64Qwb_fixed,   true,  true, false, SingleSpc,  4, 1 ,false},
    198 { ARM::VLD1d64QPseudoWB_register,  ARM::VLD1d64Qwb_register,   true,  true, true, SingleSpc,  4, 1 ,false},
    199 { ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, false, SingleSpc,  3, 1 ,false},
    200 { ARM::VLD1d64TPseudoWB_fixed,  ARM::VLD1d64Twb_fixed,   true,  true, false, SingleSpc,  3, 1 ,false},
    201 { ARM::VLD1d64TPseudoWB_register,  ARM::VLD1d64Twb_register, true, true, true,  SingleSpc,  3, 1 ,false},
    202 { ARM::VLD1d8QPseudo,       ARM::VLD1d8Q,      true,  false, false, SingleSpc,  4, 8 ,false},
    203 { ARM::VLD1d8TPseudo,       ARM::VLD1d8T,      true,  false, false, SingleSpc,  3, 8 ,false},
    204 { ARM::VLD1q16HighQPseudo,  ARM::VLD1d16Q,     true,  false, false, SingleHighQSpc,  4, 4 ,false},
    205 { ARM::VLD1q16HighTPseudo,  ARM::VLD1d16T,     true,  false, false, SingleHighTSpc,  3, 4 ,false},
    206 { ARM::VLD1q16LowQPseudo_UPD,  ARM::VLD1d16Qwb_fixed,   true,  true, true, SingleLowSpc,  4, 4 ,false},
    207 { ARM::VLD1q16LowTPseudo_UPD,  ARM::VLD1d16Twb_fixed,   true,  true, true, SingleLowSpc,  3, 4 ,false},
    208 { ARM::VLD1q32HighQPseudo,  ARM::VLD1d32Q,     true,  false, false, SingleHighQSpc,  4, 2 ,false},
    209 { ARM::VLD1q32HighTPseudo,  ARM::VLD1d32T,     true,  false, false, SingleHighTSpc,  3, 2 ,false},
    210 { ARM::VLD1q32LowQPseudo_UPD,  ARM::VLD1d32Qwb_fixed,   true,  true, true, SingleLowSpc,  4, 2 ,false},
    211 { ARM::VLD1q32LowTPseudo_UPD,  ARM::VLD1d32Twb_fixed,   true,  true, true, SingleLowSpc,  3, 2 ,false},
    212 { ARM::VLD1q64HighQPseudo,  ARM::VLD1d64Q,     true,  false, false, SingleHighQSpc,  4, 1 ,false},
    213 { ARM::VLD1q64HighTPseudo,  ARM::VLD1d64T,     true,  false, false, SingleHighTSpc,  3, 1 ,false},
    214 { ARM::VLD1q64LowQPseudo_UPD,  ARM::VLD1d64Qwb_fixed,   true,  true, true, SingleLowSpc,  4, 1 ,false},
    215 { ARM::VLD1q64LowTPseudo_UPD,  ARM::VLD1d64Twb_fixed,   true,  true, true, SingleLowSpc,  3, 1 ,false},
    216 { ARM::VLD1q8HighQPseudo,   ARM::VLD1d8Q,     true,  false, false, SingleHighQSpc,  4, 8 ,false},
    217 { ARM::VLD1q8HighTPseudo,   ARM::VLD1d8T,     true,  false, false, SingleHighTSpc,  3, 8 ,false},
    218 { ARM::VLD1q8LowQPseudo_UPD,  ARM::VLD1d8Qwb_fixed,   true,  true, true, SingleLowSpc,  4, 8 ,false},
    219 { ARM::VLD1q8LowTPseudo_UPD,  ARM::VLD1d8Twb_fixed,   true,  true, true, SingleLowSpc,  3, 8 ,false},
    220 
    221 { ARM::VLD2DUPq16EvenPseudo,  ARM::VLD2DUPd16x2,  true, false, false, EvenDblSpc, 2, 4 ,false},
    222 { ARM::VLD2DUPq16OddPseudo,   ARM::VLD2DUPd16x2,  true, false, false, OddDblSpc,  2, 4 ,false},
    223 { ARM::VLD2DUPq32EvenPseudo,  ARM::VLD2DUPd32x2,  true, false, false, EvenDblSpc, 2, 2 ,false},
    224 { ARM::VLD2DUPq32OddPseudo,   ARM::VLD2DUPd32x2,  true, false, false, OddDblSpc,  2, 2 ,false},
    225 { ARM::VLD2DUPq8EvenPseudo,   ARM::VLD2DUPd8x2,   true, false, false, EvenDblSpc, 2, 8 ,false},
    226 { ARM::VLD2DUPq8OddPseudo,    ARM::VLD2DUPd8x2,   true, false, false, OddDblSpc,  2, 8 ,false},
    227 
    228 { ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, false, SingleSpc,  2, 4 ,true},
    229 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true,  SingleSpc,  2, 4 ,true},
    230 { ARM::VLD2LNd32Pseudo,     ARM::VLD2LNd32,     true, false, false, SingleSpc,  2, 2 ,true},
    231 { ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true,  SingleSpc,  2, 2 ,true},
    232 { ARM::VLD2LNd8Pseudo,      ARM::VLD2LNd8,      true, false, false, SingleSpc,  2, 8 ,true},
    233 { ARM::VLD2LNd8Pseudo_UPD,  ARM::VLD2LNd8_UPD, true, true, true,  SingleSpc,  2, 8 ,true},
    234 { ARM::VLD2LNq16Pseudo,     ARM::VLD2LNq16,     true, false, false, EvenDblSpc, 2, 4 ,true},
    235 { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true,  EvenDblSpc, 2, 4 ,true},
    236 { ARM::VLD2LNq32Pseudo,     ARM::VLD2LNq32,     true, false, false, EvenDblSpc, 2, 2 ,true},
    237 { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true,  EvenDblSpc, 2, 2 ,true},
    238 
    239 { ARM::VLD2q16Pseudo,       ARM::VLD2q16,      true,  false, false, SingleSpc,  4, 4 ,false},
    240 { ARM::VLD2q16PseudoWB_fixed,   ARM::VLD2q16wb_fixed, true, true, false,  SingleSpc,  4, 4 ,false},
    241 { ARM::VLD2q16PseudoWB_register,   ARM::VLD2q16wb_register, true, true, true,  SingleSpc,  4, 4 ,false},
    242 { ARM::VLD2q32Pseudo,       ARM::VLD2q32,      true,  false, false, SingleSpc,  4, 2 ,false},
    243 { ARM::VLD2q32PseudoWB_fixed,   ARM::VLD2q32wb_fixed, true, true, false,  SingleSpc,  4, 2 ,false},
    244 { ARM::VLD2q32PseudoWB_register,   ARM::VLD2q32wb_register, true, true, true,  SingleSpc,  4, 2 ,false},
    245 { ARM::VLD2q8Pseudo,        ARM::VLD2q8,       true,  false, false, SingleSpc,  4, 8 ,false},
    246 { ARM::VLD2q8PseudoWB_fixed,    ARM::VLD2q8wb_fixed, true, true, false,  SingleSpc,  4, 8 ,false},
    247 { ARM::VLD2q8PseudoWB_register,    ARM::VLD2q8wb_register, true, true, true,  SingleSpc,  4, 8 ,false},
    248 
    249 { ARM::VLD3DUPd16Pseudo,     ARM::VLD3DUPd16,     true, false, false, SingleSpc, 3, 4,true},
    250 { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true,  SingleSpc, 3, 4,true},
    251 { ARM::VLD3DUPd32Pseudo,     ARM::VLD3DUPd32,     true, false, false, SingleSpc, 3, 2,true},
    252 { ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true,  SingleSpc, 3, 2,true},
    253 { ARM::VLD3DUPd8Pseudo,      ARM::VLD3DUPd8,      true, false, false, SingleSpc, 3, 8,true},
    254 { ARM::VLD3DUPd8Pseudo_UPD,  ARM::VLD3DUPd8_UPD, true, true, true,  SingleSpc, 3, 8,true},
    255 { ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16,     true, false, false, EvenDblSpc, 3, 4 ,true},
    256 { ARM::VLD3DUPq16OddPseudo,  ARM::VLD3DUPq16,     true, false, false, OddDblSpc,  3, 4 ,true},
    257 { ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32,     true, false, false, EvenDblSpc, 3, 2 ,true},
    258 { ARM::VLD3DUPq32OddPseudo,  ARM::VLD3DUPq32,     true, false, false, OddDblSpc,  3, 2 ,true},
    259 { ARM::VLD3DUPq8EvenPseudo,  ARM::VLD3DUPq8,      true, false, false, EvenDblSpc, 3, 8 ,true},
    260 { ARM::VLD3DUPq8OddPseudo,   ARM::VLD3DUPq8,      true, false, false, OddDblSpc,  3, 8 ,true},
    261 
    262 { ARM::VLD3LNd16Pseudo,     ARM::VLD3LNd16,     true, false, false, SingleSpc,  3, 4 ,true},
    263 { ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true,  SingleSpc,  3, 4 ,true},
    264 { ARM::VLD3LNd32Pseudo,     ARM::VLD3LNd32,     true, false, false, SingleSpc,  3, 2 ,true},
    265 { ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true,  SingleSpc,  3, 2 ,true},
    266 { ARM::VLD3LNd8Pseudo,      ARM::VLD3LNd8,      true, false, false, SingleSpc,  3, 8 ,true},
    267 { ARM::VLD3LNd8Pseudo_UPD,  ARM::VLD3LNd8_UPD, true, true, true,  SingleSpc,  3, 8 ,true},
    268 { ARM::VLD3LNq16Pseudo,     ARM::VLD3LNq16,     true, false, false, EvenDblSpc, 3, 4 ,true},
    269 { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true,  EvenDblSpc, 3, 4 ,true},
    270 { ARM::VLD3LNq32Pseudo,     ARM::VLD3LNq32,     true, false, false, EvenDblSpc, 3, 2 ,true},
    271 { ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true,  EvenDblSpc, 3, 2 ,true},
    272 
    273 { ARM::VLD3d16Pseudo,       ARM::VLD3d16,      true,  false, false, SingleSpc,  3, 4 ,true},
    274 { ARM::VLD3d16Pseudo_UPD,   ARM::VLD3d16_UPD, true, true, true,  SingleSpc,  3, 4 ,true},
    275 { ARM::VLD3d32Pseudo,       ARM::VLD3d32,      true,  false, false, SingleSpc,  3, 2 ,true},
    276 { ARM::VLD3d32Pseudo_UPD,   ARM::VLD3d32_UPD, true, true, true,  SingleSpc,  3, 2 ,true},
    277 { ARM::VLD3d8Pseudo,        ARM::VLD3d8,       true,  false, false, SingleSpc,  3, 8 ,true},
    278 { ARM::VLD3d8Pseudo_UPD,    ARM::VLD3d8_UPD, true, true, true,  SingleSpc,  3, 8 ,true},
    279 
    280 { ARM::VLD3q16Pseudo_UPD,    ARM::VLD3q16_UPD, true, true, true,  EvenDblSpc, 3, 4 ,true},
    281 { ARM::VLD3q16oddPseudo,     ARM::VLD3q16,     true,  false, false, OddDblSpc,  3, 4 ,true},
    282 { ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true,  OddDblSpc,  3, 4 ,true},
    283 { ARM::VLD3q32Pseudo_UPD,    ARM::VLD3q32_UPD, true, true, true,  EvenDblSpc, 3, 2 ,true},
    284 { ARM::VLD3q32oddPseudo,     ARM::VLD3q32,     true,  false, false, OddDblSpc,  3, 2 ,true},
    285 { ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true,  OddDblSpc,  3, 2 ,true},
    286 { ARM::VLD3q8Pseudo_UPD,     ARM::VLD3q8_UPD, true, true, true,  EvenDblSpc, 3, 8 ,true},
    287 { ARM::VLD3q8oddPseudo,      ARM::VLD3q8,      true,  false, false, OddDblSpc,  3, 8 ,true},
    288 { ARM::VLD3q8oddPseudo_UPD,  ARM::VLD3q8_UPD, true, true, true,  OddDblSpc,  3, 8 ,true},
    289 
    290 { ARM::VLD4DUPd16Pseudo,     ARM::VLD4DUPd16,     true, false, false, SingleSpc, 4, 4,true},
    291 { ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true,  SingleSpc, 4, 4,true},
    292 { ARM::VLD4DUPd32Pseudo,     ARM::VLD4DUPd32,     true, false, false, SingleSpc, 4, 2,true},
    293 { ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true,  SingleSpc, 4, 2,true},
    294 { ARM::VLD4DUPd8Pseudo,      ARM::VLD4DUPd8,      true, false, false, SingleSpc, 4, 8,true},
    295 { ARM::VLD4DUPd8Pseudo_UPD,  ARM::VLD4DUPd8_UPD, true, true, true,  SingleSpc, 4, 8,true},
    296 { ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16,     true, false, false, EvenDblSpc, 4, 4 ,true},
    297 { ARM::VLD4DUPq16OddPseudo,  ARM::VLD4DUPq16,     true, false, false, OddDblSpc,  4, 4 ,true},
    298 { ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32,     true, false, false, EvenDblSpc, 4, 2 ,true},
    299 { ARM::VLD4DUPq32OddPseudo,  ARM::VLD4DUPq32,     true, false, false, OddDblSpc,  4, 2 ,true},
    300 { ARM::VLD4DUPq8EvenPseudo,  ARM::VLD4DUPq8,      true, false, false, EvenDblSpc, 4, 8 ,true},
    301 { ARM::VLD4DUPq8OddPseudo,   ARM::VLD4DUPq8,      true, false, false, OddDblSpc,  4, 8 ,true},
    302 
    303 { ARM::VLD4LNd16Pseudo,     ARM::VLD4LNd16,     true, false, false, SingleSpc,  4, 4 ,true},
    304 { ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true,  SingleSpc,  4, 4 ,true},
    305 { ARM::VLD4LNd32Pseudo,     ARM::VLD4LNd32,     true, false, false, SingleSpc,  4, 2 ,true},
    306 { ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true,  SingleSpc,  4, 2 ,true},
    307 { ARM::VLD4LNd8Pseudo,      ARM::VLD4LNd8,      true, false, false, SingleSpc,  4, 8 ,true},
    308 { ARM::VLD4LNd8Pseudo_UPD,  ARM::VLD4LNd8_UPD, true, true, true,  SingleSpc,  4, 8 ,true},
    309 { ARM::VLD4LNq16Pseudo,     ARM::VLD4LNq16,     true, false, false, EvenDblSpc, 4, 4 ,true},
    310 { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true,  EvenDblSpc, 4, 4 ,true},
    311 { ARM::VLD4LNq32Pseudo,     ARM::VLD4LNq32,     true, false, false, EvenDblSpc, 4, 2 ,true},
    312 { ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true,  EvenDblSpc, 4, 2 ,true},
    313 
    314 { ARM::VLD4d16Pseudo,       ARM::VLD4d16,      true,  false, false, SingleSpc,  4, 4 ,true},
    315 { ARM::VLD4d16Pseudo_UPD,   ARM::VLD4d16_UPD, true, true, true,  SingleSpc,  4, 4 ,true},
    316 { ARM::VLD4d32Pseudo,       ARM::VLD4d32,      true,  false, false, SingleSpc,  4, 2 ,true},
    317 { ARM::VLD4d32Pseudo_UPD,   ARM::VLD4d32_UPD, true, true, true,  SingleSpc,  4, 2 ,true},
    318 { ARM::VLD4d8Pseudo,        ARM::VLD4d8,       true,  false, false, SingleSpc,  4, 8 ,true},
    319 { ARM::VLD4d8Pseudo_UPD,    ARM::VLD4d8_UPD, true, true, true,  SingleSpc,  4, 8 ,true},
    320 
    321 { ARM::VLD4q16Pseudo_UPD,    ARM::VLD4q16_UPD, true, true, true,  EvenDblSpc, 4, 4 ,true},
    322 { ARM::VLD4q16oddPseudo,     ARM::VLD4q16,     true,  false, false, OddDblSpc,  4, 4 ,true},
    323 { ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true,  OddDblSpc,  4, 4 ,true},
    324 { ARM::VLD4q32Pseudo_UPD,    ARM::VLD4q32_UPD, true, true, true,  EvenDblSpc, 4, 2 ,true},
    325 { ARM::VLD4q32oddPseudo,     ARM::VLD4q32,     true,  false, false, OddDblSpc,  4, 2 ,true},
    326 { ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true,  OddDblSpc,  4, 2 ,true},
    327 { ARM::VLD4q8Pseudo_UPD,     ARM::VLD4q8_UPD, true, true, true,  EvenDblSpc, 4, 8 ,true},
    328 { ARM::VLD4q8oddPseudo,      ARM::VLD4q8,      true,  false, false, OddDblSpc,  4, 8 ,true},
    329 { ARM::VLD4q8oddPseudo_UPD,  ARM::VLD4q8_UPD, true, true, true,  OddDblSpc,  4, 8 ,true},
    330 
    331 { ARM::VST1LNq16Pseudo,     ARM::VST1LNd16,    false, false, false, EvenDblSpc, 1, 4 ,true},
    332 { ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true,  EvenDblSpc, 1, 4 ,true},
    333 { ARM::VST1LNq32Pseudo,     ARM::VST1LNd32,    false, false, false, EvenDblSpc, 1, 2 ,true},
    334 { ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true,  EvenDblSpc, 1, 2 ,true},
    335 { ARM::VST1LNq8Pseudo,      ARM::VST1LNd8,     false, false, false, EvenDblSpc, 1, 8 ,true},
    336 { ARM::VST1LNq8Pseudo_UPD,  ARM::VST1LNd8_UPD, false, true, true,  EvenDblSpc, 1, 8 ,true},
    337 
    338 { ARM::VST1d16QPseudo,      ARM::VST1d16Q,     false, false, false, SingleSpc,  4, 4 ,false},
    339 { ARM::VST1d16QPseudoWB_fixed,  ARM::VST1d16Qwb_fixed, false, true, false, SingleSpc,  4, 4 ,false},
    340 { ARM::VST1d16QPseudoWB_register, ARM::VST1d16Qwb_register, false, true, true, SingleSpc,  4, 4 ,false},
    341 { ARM::VST1d16TPseudo,      ARM::VST1d16T,     false, false, false, SingleSpc,  3, 4 ,false},
    342 { ARM::VST1d16TPseudoWB_fixed,  ARM::VST1d16Twb_fixed, false, true, false, SingleSpc,  3, 4 ,false},
    343 { ARM::VST1d16TPseudoWB_register, ARM::VST1d16Twb_register, false, true, true, SingleSpc,  3, 4 ,false},
    344 
    345 { ARM::VST1d32QPseudo,      ARM::VST1d32Q,     false, false, false, SingleSpc,  4, 2 ,false},
    346 { ARM::VST1d32QPseudoWB_fixed,  ARM::VST1d32Qwb_fixed, false, true, false, SingleSpc,  4, 2 ,false},
    347 { ARM::VST1d32QPseudoWB_register, ARM::VST1d32Qwb_register, false, true, true, SingleSpc,  4, 2 ,false},
    348 { ARM::VST1d32TPseudo,      ARM::VST1d32T,     false, false, false, SingleSpc,  3, 2 ,false},
    349 { ARM::VST1d32TPseudoWB_fixed,  ARM::VST1d32Twb_fixed, false, true, false, SingleSpc,  3, 2 ,false},
    350 { ARM::VST1d32TPseudoWB_register, ARM::VST1d32Twb_register, false, true, true, SingleSpc,  3, 2 ,false},
    351 
    352 { ARM::VST1d64QPseudo,      ARM::VST1d64Q,     false, false, false, SingleSpc,  4, 1 ,false},
    353 { ARM::VST1d64QPseudoWB_fixed,  ARM::VST1d64Qwb_fixed, false, true, false,  SingleSpc,  4, 1 ,false},
    354 { ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true,  SingleSpc,  4, 1 ,false},
    355 { ARM::VST1d64TPseudo,      ARM::VST1d64T,     false, false, false, SingleSpc,  3, 1 ,false},
    356 { ARM::VST1d64TPseudoWB_fixed,  ARM::VST1d64Twb_fixed, false, true, false,  SingleSpc,  3, 1 ,false},
    357 { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true,  SingleSpc,  3, 1 ,false},
    358 
    359 { ARM::VST1d8QPseudo,       ARM::VST1d8Q,      false, false, false, SingleSpc,  4, 8 ,false},
    360 { ARM::VST1d8QPseudoWB_fixed,   ARM::VST1d8Qwb_fixed, false, true, false, SingleSpc,  4, 8 ,false},
    361 { ARM::VST1d8QPseudoWB_register,  ARM::VST1d8Qwb_register, false, true, true, SingleSpc,  4, 8 ,false},
    362 { ARM::VST1d8TPseudo,       ARM::VST1d8T,      false, false, false, SingleSpc,  3, 8 ,false},
    363 { ARM::VST1d8TPseudoWB_fixed,   ARM::VST1d8Twb_fixed, false, true, false, SingleSpc,  3, 8 ,false},
    364 { ARM::VST1d8TPseudoWB_register,  ARM::VST1d8Twb_register, false, true, true, SingleSpc,  3, 8 ,false},
    365 
    366 { ARM::VST1q16HighQPseudo,  ARM::VST1d16Q,     false, false, false, SingleHighQSpc,   4, 4 ,false},
    367 { ARM::VST1q16HighQPseudo_UPD,  ARM::VST1d16Qwb_fixed,  false, true, true, SingleHighQSpc,   4, 8 ,false},
    368 { ARM::VST1q16HighTPseudo,  ARM::VST1d16T,     false, false, false, SingleHighTSpc,   3, 4 ,false},
    369 { ARM::VST1q16HighTPseudo_UPD,  ARM::VST1d16Twb_fixed,  false, true, true, SingleHighTSpc,   3, 4 ,false},
    370 { ARM::VST1q16LowQPseudo_UPD,   ARM::VST1d16Qwb_fixed,  false, true, true, SingleLowSpc,   4, 4 ,false},
    371 { ARM::VST1q16LowTPseudo_UPD,   ARM::VST1d16Twb_fixed,  false, true, true, SingleLowSpc,   3, 4 ,false},
    372 
    373 { ARM::VST1q32HighQPseudo,  ARM::VST1d32Q,     false, false, false, SingleHighQSpc,   4, 2 ,false},
    374 { ARM::VST1q32HighQPseudo_UPD,  ARM::VST1d32Qwb_fixed,  false, true, true, SingleHighQSpc,   4, 8 ,false},
    375 { ARM::VST1q32HighTPseudo,  ARM::VST1d32T,     false, false, false, SingleHighTSpc,   3, 2 ,false},
    376 { ARM::VST1q32HighTPseudo_UPD,  ARM::VST1d32Twb_fixed,  false, true, true, SingleHighTSpc,   3, 2 ,false},
    377 { ARM::VST1q32LowQPseudo_UPD,   ARM::VST1d32Qwb_fixed,  false, true, true, SingleLowSpc,   4, 2 ,false},
    378 { ARM::VST1q32LowTPseudo_UPD,   ARM::VST1d32Twb_fixed,  false, true, true, SingleLowSpc,   3, 2 ,false},
    379 
    380 { ARM::VST1q64HighQPseudo,  ARM::VST1d64Q,     false, false, false, SingleHighQSpc,   4, 1 ,false},
    381 { ARM::VST1q64HighQPseudo_UPD,  ARM::VST1d64Qwb_fixed,  false, true, true, SingleHighQSpc,   4, 8 ,false},
    382 { ARM::VST1q64HighTPseudo,  ARM::VST1d64T,     false, false, false, SingleHighTSpc,   3, 1 ,false},
    383 { ARM::VST1q64HighTPseudo_UPD,  ARM::VST1d64Twb_fixed,  false, true, true, SingleHighTSpc,   3, 1 ,false},
    384 { ARM::VST1q64LowQPseudo_UPD,   ARM::VST1d64Qwb_fixed,  false, true, true, SingleLowSpc,   4, 1 ,false},
    385 { ARM::VST1q64LowTPseudo_UPD,   ARM::VST1d64Twb_fixed,  false, true, true, SingleLowSpc,   3, 1 ,false},
    386 
    387 { ARM::VST1q8HighQPseudo,   ARM::VST1d8Q,      false, false, false, SingleHighQSpc,   4, 8 ,false},
    388 { ARM::VST1q8HighQPseudo_UPD,  ARM::VST1d8Qwb_fixed,  false, true, true, SingleHighQSpc,   4, 8 ,false},
    389 { ARM::VST1q8HighTPseudo,   ARM::VST1d8T,      false, false, false, SingleHighTSpc,   3, 8 ,false},
    390 { ARM::VST1q8HighTPseudo_UPD,  ARM::VST1d8Twb_fixed,  false, true, true, SingleHighTSpc,   3, 8 ,false},
    391 { ARM::VST1q8LowQPseudo_UPD,   ARM::VST1d8Qwb_fixed,  false, true, true, SingleLowSpc,   4, 8 ,false},
    392 { ARM::VST1q8LowTPseudo_UPD,   ARM::VST1d8Twb_fixed,  false, true, true, SingleLowSpc,   3, 8 ,false},
    393 
    394 { ARM::VST2LNd16Pseudo,     ARM::VST2LNd16,     false, false, false, SingleSpc, 2, 4 ,true},
    395 { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true,  SingleSpc, 2, 4 ,true},
    396 { ARM::VST2LNd32Pseudo,     ARM::VST2LNd32,     false, false, false, SingleSpc, 2, 2 ,true},
    397 { ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true,  SingleSpc, 2, 2 ,true},
    398 { ARM::VST2LNd8Pseudo,      ARM::VST2LNd8,      false, false, false, SingleSpc, 2, 8 ,true},
    399 { ARM::VST2LNd8Pseudo_UPD,  ARM::VST2LNd8_UPD, false, true, true,  SingleSpc, 2, 8 ,true},
    400 { ARM::VST2LNq16Pseudo,     ARM::VST2LNq16,     false, false, false, EvenDblSpc, 2, 4,true},
    401 { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true,  EvenDblSpc, 2, 4,true},
    402 { ARM::VST2LNq32Pseudo,     ARM::VST2LNq32,     false, false, false, EvenDblSpc, 2, 2,true},
    403 { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true,  EvenDblSpc, 2, 2,true},
    404 
    405 { ARM::VST2q16Pseudo,       ARM::VST2q16,      false, false, false, SingleSpc,  4, 4 ,false},
    406 { ARM::VST2q16PseudoWB_fixed,   ARM::VST2q16wb_fixed, false, true, false,  SingleSpc,  4, 4 ,false},
    407 { ARM::VST2q16PseudoWB_register,   ARM::VST2q16wb_register, false, true, true,  SingleSpc,  4, 4 ,false},
    408 { ARM::VST2q32Pseudo,       ARM::VST2q32,      false, false, false, SingleSpc,  4, 2 ,false},
    409 { ARM::VST2q32PseudoWB_fixed,   ARM::VST2q32wb_fixed, false, true, false,  SingleSpc,  4, 2 ,false},
    410 { ARM::VST2q32PseudoWB_register,   ARM::VST2q32wb_register, false, true, true,  SingleSpc,  4, 2 ,false},
    411 { ARM::VST2q8Pseudo,        ARM::VST2q8,       false, false, false, SingleSpc,  4, 8 ,false},
    412 { ARM::VST2q8PseudoWB_fixed,    ARM::VST2q8wb_fixed, false, true, false,  SingleSpc,  4, 8 ,false},
    413 { ARM::VST2q8PseudoWB_register,    ARM::VST2q8wb_register, false, true, true,  SingleSpc,  4, 8 ,false},
    414 
    415 { ARM::VST3LNd16Pseudo,     ARM::VST3LNd16,     false, false, false, SingleSpc, 3, 4 ,true},
    416 { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true,  SingleSpc, 3, 4 ,true},
    417 { ARM::VST3LNd32Pseudo,     ARM::VST3LNd32,     false, false, false, SingleSpc, 3, 2 ,true},
    418 { ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true,  SingleSpc, 3, 2 ,true},
    419 { ARM::VST3LNd8Pseudo,      ARM::VST3LNd8,      false, false, false, SingleSpc, 3, 8 ,true},
    420 { ARM::VST3LNd8Pseudo_UPD,  ARM::VST3LNd8_UPD, false, true, true,  SingleSpc, 3, 8 ,true},
    421 { ARM::VST3LNq16Pseudo,     ARM::VST3LNq16,     false, false, false, EvenDblSpc, 3, 4,true},
    422 { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true,  EvenDblSpc, 3, 4,true},
    423 { ARM::VST3LNq32Pseudo,     ARM::VST3LNq32,     false, false, false, EvenDblSpc, 3, 2,true},
    424 { ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true,  EvenDblSpc, 3, 2,true},
    425 
    426 { ARM::VST3d16Pseudo,       ARM::VST3d16,      false, false, false, SingleSpc,  3, 4 ,true},
    427 { ARM::VST3d16Pseudo_UPD,   ARM::VST3d16_UPD, false, true, true,  SingleSpc,  3, 4 ,true},
    428 { ARM::VST3d32Pseudo,       ARM::VST3d32,      false, false, false, SingleSpc,  3, 2 ,true},
    429 { ARM::VST3d32Pseudo_UPD,   ARM::VST3d32_UPD, false, true, true,  SingleSpc,  3, 2 ,true},
    430 { ARM::VST3d8Pseudo,        ARM::VST3d8,       false, false, false, SingleSpc,  3, 8 ,true},
    431 { ARM::VST3d8Pseudo_UPD,    ARM::VST3d8_UPD, false, true, true,  SingleSpc,  3, 8 ,true},
    432 
    433 { ARM::VST3q16Pseudo_UPD,    ARM::VST3q16_UPD, false, true, true,  EvenDblSpc, 3, 4 ,true},
    434 { ARM::VST3q16oddPseudo,     ARM::VST3q16,     false, false, false, OddDblSpc,  3, 4 ,true},
    435 { ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true,  OddDblSpc,  3, 4 ,true},
    436 { ARM::VST3q32Pseudo_UPD,    ARM::VST3q32_UPD, false, true, true,  EvenDblSpc, 3, 2 ,true},
    437 { ARM::VST3q32oddPseudo,     ARM::VST3q32,     false, false, false, OddDblSpc,  3, 2 ,true},
    438 { ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true,  OddDblSpc,  3, 2 ,true},
    439 { ARM::VST3q8Pseudo_UPD,     ARM::VST3q8_UPD, false, true, true,  EvenDblSpc, 3, 8 ,true},
    440 { ARM::VST3q8oddPseudo,      ARM::VST3q8,      false, false, false, OddDblSpc,  3, 8 ,true},
    441 { ARM::VST3q8oddPseudo_UPD,  ARM::VST3q8_UPD, false, true, true,  OddDblSpc,  3, 8 ,true},
    442 
    443 { ARM::VST4LNd16Pseudo,     ARM::VST4LNd16,     false, false, false, SingleSpc, 4, 4 ,true},
    444 { ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true,  SingleSpc, 4, 4 ,true},
    445 { ARM::VST4LNd32Pseudo,     ARM::VST4LNd32,     false, false, false, SingleSpc, 4, 2 ,true},
    446 { ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true,  SingleSpc, 4, 2 ,true},
    447 { ARM::VST4LNd8Pseudo,      ARM::VST4LNd8,      false, false, false, SingleSpc, 4, 8 ,true},
    448 { ARM::VST4LNd8Pseudo_UPD,  ARM::VST4LNd8_UPD, false, true, true,  SingleSpc, 4, 8 ,true},
    449 { ARM::VST4LNq16Pseudo,     ARM::VST4LNq16,     false, false, false, EvenDblSpc, 4, 4,true},
    450 { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true,  EvenDblSpc, 4, 4,true},
    451 { ARM::VST4LNq32Pseudo,     ARM::VST4LNq32,     false, false, false, EvenDblSpc, 4, 2,true},
    452 { ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true,  EvenDblSpc, 4, 2,true},
    453 
    454 { ARM::VST4d16Pseudo,       ARM::VST4d16,      false, false, false, SingleSpc,  4, 4 ,true},
    455 { ARM::VST4d16Pseudo_UPD,   ARM::VST4d16_UPD, false, true, true,  SingleSpc,  4, 4 ,true},
    456 { ARM::VST4d32Pseudo,       ARM::VST4d32,      false, false, false, SingleSpc,  4, 2 ,true},
    457 { ARM::VST4d32Pseudo_UPD,   ARM::VST4d32_UPD, false, true, true,  SingleSpc,  4, 2 ,true},
    458 { ARM::VST4d8Pseudo,        ARM::VST4d8,       false, false, false, SingleSpc,  4, 8 ,true},
    459 { ARM::VST4d8Pseudo_UPD,    ARM::VST4d8_UPD, false, true, true,  SingleSpc,  4, 8 ,true},
    460 
    461 { ARM::VST4q16Pseudo_UPD,    ARM::VST4q16_UPD, false, true, true,  EvenDblSpc, 4, 4 ,true},
    462 { ARM::VST4q16oddPseudo,     ARM::VST4q16,     false, false, false, OddDblSpc,  4, 4 ,true},
    463 { ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true,  OddDblSpc,  4, 4 ,true},
    464 { ARM::VST4q32Pseudo_UPD,    ARM::VST4q32_UPD, false, true, true,  EvenDblSpc, 4, 2 ,true},
    465 { ARM::VST4q32oddPseudo,     ARM::VST4q32,     false, false, false, OddDblSpc,  4, 2 ,true},
    466 { ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true,  OddDblSpc,  4, 2 ,true},
    467 { ARM::VST4q8Pseudo_UPD,     ARM::VST4q8_UPD, false, true, true,  EvenDblSpc, 4, 8 ,true},
    468 { ARM::VST4q8oddPseudo,      ARM::VST4q8,      false, false, false, OddDblSpc,  4, 8 ,true},
    469 { ARM::VST4q8oddPseudo_UPD,  ARM::VST4q8_UPD, false, true, true,  OddDblSpc,  4, 8 ,true}
    470 };
    471 
    472 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
    473 /// load or store pseudo instruction.
    474 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
    475 #ifndef NDEBUG
    476   // Make sure the table is sorted.
    477   static std::atomic<bool> TableChecked(false);
    478   if (!TableChecked.load(std::memory_order_relaxed)) {
    479     assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!");
    480     TableChecked.store(true, std::memory_order_relaxed);
    481   }
    482 #endif
    483 
    484   auto I = llvm::lower_bound(NEONLdStTable, Opcode);
    485   if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
    486     return I;
    487   return nullptr;
    488 }
    489 
    490 /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
    491 /// corresponding to the specified register spacing.  Not all of the results
    492 /// are necessarily valid, e.g., a Q register only has 2 D subregisters.
    493 static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
    494                         const TargetRegisterInfo *TRI, unsigned &D0,
    495                         unsigned &D1, unsigned &D2, unsigned &D3) {
    496   if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) {
    497     D0 = TRI->getSubReg(Reg, ARM::dsub_0);
    498     D1 = TRI->getSubReg(Reg, ARM::dsub_1);
    499     D2 = TRI->getSubReg(Reg, ARM::dsub_2);
    500     D3 = TRI->getSubReg(Reg, ARM::dsub_3);
    501   } else if (RegSpc == SingleHighQSpc) {
    502     D0 = TRI->getSubReg(Reg, ARM::dsub_4);
    503     D1 = TRI->getSubReg(Reg, ARM::dsub_5);
    504     D2 = TRI->getSubReg(Reg, ARM::dsub_6);
    505     D3 = TRI->getSubReg(Reg, ARM::dsub_7);
    506   } else if (RegSpc == SingleHighTSpc) {
    507     D0 = TRI->getSubReg(Reg, ARM::dsub_3);
    508     D1 = TRI->getSubReg(Reg, ARM::dsub_4);
    509     D2 = TRI->getSubReg(Reg, ARM::dsub_5);
    510     D3 = TRI->getSubReg(Reg, ARM::dsub_6);
    511   } else if (RegSpc == EvenDblSpc) {
    512     D0 = TRI->getSubReg(Reg, ARM::dsub_0);
    513     D1 = TRI->getSubReg(Reg, ARM::dsub_2);
    514     D2 = TRI->getSubReg(Reg, ARM::dsub_4);
    515     D3 = TRI->getSubReg(Reg, ARM::dsub_6);
    516   } else {
    517     assert(RegSpc == OddDblSpc && "unknown register spacing");
    518     D0 = TRI->getSubReg(Reg, ARM::dsub_1);
    519     D1 = TRI->getSubReg(Reg, ARM::dsub_3);
    520     D2 = TRI->getSubReg(Reg, ARM::dsub_5);
    521     D3 = TRI->getSubReg(Reg, ARM::dsub_7);
    522   }
    523 }
    524 
    525 /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
    526 /// operands to real VLD instructions with D register operands.
    527 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
    528   MachineInstr &MI = *MBBI;
    529   MachineBasicBlock &MBB = *MI.getParent();
    530   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
    531 
    532   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
    533   assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
    534   NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
    535   unsigned NumRegs = TableEntry->NumRegs;
    536 
    537   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
    538                                     TII->get(TableEntry->RealOpc));
    539   unsigned OpIdx = 0;
    540 
    541   bool DstIsDead = MI.getOperand(OpIdx).isDead();
    542   Register DstReg = MI.getOperand(OpIdx++).getReg();
    543   if(TableEntry->RealOpc == ARM::VLD2DUPd8x2 ||
    544      TableEntry->RealOpc == ARM::VLD2DUPd16x2 ||
    545      TableEntry->RealOpc == ARM::VLD2DUPd32x2) {
    546     unsigned SubRegIndex;
    547     if (RegSpc == EvenDblSpc) {
    548       SubRegIndex = ARM::dsub_0;
    549     } else {
    550       assert(RegSpc == OddDblSpc && "Unexpected spacing!");
    551       SubRegIndex = ARM::dsub_1;
    552     }
    553     Register SubReg = TRI->getSubReg(DstReg, SubRegIndex);
    554     unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0,
    555                                                    &ARM::DPairSpcRegClass);
    556     MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead));
    557   } else {
    558     unsigned D0, D1, D2, D3;
    559     GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
    560     MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
    561     if (NumRegs > 1 && TableEntry->copyAllListRegs)
    562       MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
    563     if (NumRegs > 2 && TableEntry->copyAllListRegs)
    564       MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
    565     if (NumRegs > 3 && TableEntry->copyAllListRegs)
    566       MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
    567   }
    568 
    569   if (TableEntry->isUpdating)
    570     MIB.add(MI.getOperand(OpIdx++));
    571 
    572   // Copy the addrmode6 operands.
    573   MIB.add(MI.getOperand(OpIdx++));
    574   MIB.add(MI.getOperand(OpIdx++));
    575 
    576   // Copy the am6offset operand.
    577   if (TableEntry->hasWritebackOperand) {
    578     // TODO: The writing-back pseudo instructions we translate here are all
    579     // defined to take am6offset nodes that are capable to represent both fixed
    580     // and register forms. Some real instructions, however, do not rely on
    581     // am6offset and have separate definitions for such forms. When this is the
    582     // case, fixed forms do not take any offset nodes, so here we skip them for
    583     // such instructions. Once all real and pseudo writing-back instructions are
    584     // rewritten without use of am6offset nodes, this code will go away.
    585     const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
    586     if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed ||
    587         TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed ||
    588         TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed ||
    589         TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed ||
    590         TableEntry->RealOpc == ARM::VLD1d8Twb_fixed ||
    591         TableEntry->RealOpc == ARM::VLD1d16Twb_fixed ||
    592         TableEntry->RealOpc == ARM::VLD1d32Twb_fixed ||
    593         TableEntry->RealOpc == ARM::VLD1d64Twb_fixed) {
    594       assert(AM6Offset.getReg() == 0 &&
    595              "A fixed writing-back pseudo instruction provides an offset "
    596              "register!");
    597     } else {
    598       MIB.add(AM6Offset);
    599     }
    600   }
    601 
    602   // For an instruction writing double-spaced subregs, the pseudo instruction
    603   // has an extra operand that is a use of the super-register.  Record the
    604   // operand index and skip over it.
    605   unsigned SrcOpIdx = 0;
    606   if(TableEntry->RealOpc != ARM::VLD2DUPd8x2 &&
    607      TableEntry->RealOpc != ARM::VLD2DUPd16x2 &&
    608      TableEntry->RealOpc != ARM::VLD2DUPd32x2) {
    609     if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
    610         RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
    611         RegSpc == SingleHighTSpc)
    612       SrcOpIdx = OpIdx++;
    613   }
    614 
    615   // Copy the predicate operands.
    616   MIB.add(MI.getOperand(OpIdx++));
    617   MIB.add(MI.getOperand(OpIdx++));
    618 
    619   // Copy the super-register source operand used for double-spaced subregs over
    620   // to the new instruction as an implicit operand.
    621   if (SrcOpIdx != 0) {
    622     MachineOperand MO = MI.getOperand(SrcOpIdx);
    623     MO.setImplicit(true);
    624     MIB.add(MO);
    625   }
    626   // Add an implicit def for the super-register.
    627   MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
    628   TransferImpOps(MI, MIB, MIB);
    629 
    630   // Transfer memoperands.
    631   MIB.cloneMemRefs(MI);
    632   MI.eraseFromParent();
    633   LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
    634 }
    635 
    636 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
    637 /// operands to real VST instructions with D register operands.
    638 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
    639   MachineInstr &MI = *MBBI;
    640   MachineBasicBlock &MBB = *MI.getParent();
    641   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
    642 
    643   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
    644   assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
    645   NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
    646   unsigned NumRegs = TableEntry->NumRegs;
    647 
    648   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
    649                                     TII->get(TableEntry->RealOpc));
    650   unsigned OpIdx = 0;
    651   if (TableEntry->isUpdating)
    652     MIB.add(MI.getOperand(OpIdx++));
    653 
    654   // Copy the addrmode6 operands.
    655   MIB.add(MI.getOperand(OpIdx++));
    656   MIB.add(MI.getOperand(OpIdx++));
    657 
    658   if (TableEntry->hasWritebackOperand) {
    659     // TODO: The writing-back pseudo instructions we translate here are all
    660     // defined to take am6offset nodes that are capable to represent both fixed
    661     // and register forms. Some real instructions, however, do not rely on
    662     // am6offset and have separate definitions for such forms. When this is the
    663     // case, fixed forms do not take any offset nodes, so here we skip them for
    664     // such instructions. Once all real and pseudo writing-back instructions are
    665     // rewritten without use of am6offset nodes, this code will go away.
    666     const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
    667     if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed ||
    668         TableEntry->RealOpc == ARM::VST1d16Qwb_fixed ||
    669         TableEntry->RealOpc == ARM::VST1d32Qwb_fixed ||
    670         TableEntry->RealOpc == ARM::VST1d64Qwb_fixed ||
    671         TableEntry->RealOpc == ARM::VST1d8Twb_fixed ||
    672         TableEntry->RealOpc == ARM::VST1d16Twb_fixed ||
    673         TableEntry->RealOpc == ARM::VST1d32Twb_fixed ||
    674         TableEntry->RealOpc == ARM::VST1d64Twb_fixed) {
    675       assert(AM6Offset.getReg() == 0 &&
    676              "A fixed writing-back pseudo instruction provides an offset "
    677              "register!");
    678     } else {
    679       MIB.add(AM6Offset);
    680     }
    681   }
    682 
    683   bool SrcIsKill = MI.getOperand(OpIdx).isKill();
    684   bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
    685   Register SrcReg = MI.getOperand(OpIdx++).getReg();
    686   unsigned D0, D1, D2, D3;
    687   GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
    688   MIB.addReg(D0, getUndefRegState(SrcIsUndef));
    689   if (NumRegs > 1 && TableEntry->copyAllListRegs)
    690     MIB.addReg(D1, getUndefRegState(SrcIsUndef));
    691   if (NumRegs > 2 && TableEntry->copyAllListRegs)
    692     MIB.addReg(D2, getUndefRegState(SrcIsUndef));
    693   if (NumRegs > 3 && TableEntry->copyAllListRegs)
    694     MIB.addReg(D3, getUndefRegState(SrcIsUndef));
    695 
    696   // Copy the predicate operands.
    697   MIB.add(MI.getOperand(OpIdx++));
    698   MIB.add(MI.getOperand(OpIdx++));
    699 
    700   if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
    701     MIB->addRegisterKilled(SrcReg, TRI, true);
    702   else if (!SrcIsUndef)
    703     MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg.
    704   TransferImpOps(MI, MIB, MIB);
    705 
    706   // Transfer memoperands.
    707   MIB.cloneMemRefs(MI);
    708   MI.eraseFromParent();
    709   LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
    710 }
    711 
    712 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
    713 /// register operands to real instructions with D register operands.
    714 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
    715   MachineInstr &MI = *MBBI;
    716   MachineBasicBlock &MBB = *MI.getParent();
    717   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
    718 
    719   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
    720   assert(TableEntry && "NEONLdStTable lookup failed");
    721   NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
    722   unsigned NumRegs = TableEntry->NumRegs;
    723   unsigned RegElts = TableEntry->RegElts;
    724 
    725   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
    726                                     TII->get(TableEntry->RealOpc));
    727   unsigned OpIdx = 0;
    728   // The lane operand is always the 3rd from last operand, before the 2
    729   // predicate operands.
    730   unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
    731 
    732   // Adjust the lane and spacing as needed for Q registers.
    733   assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
    734   if (RegSpc == EvenDblSpc && Lane >= RegElts) {
    735     RegSpc = OddDblSpc;
    736     Lane -= RegElts;
    737   }
    738   assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
    739 
    740   unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
    741   unsigned DstReg = 0;
    742   bool DstIsDead = false;
    743   if (TableEntry->IsLoad) {
    744     DstIsDead = MI.getOperand(OpIdx).isDead();
    745     DstReg = MI.getOperand(OpIdx++).getReg();
    746     GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
    747     MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
    748     if (NumRegs > 1)
    749       MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
    750     if (NumRegs > 2)
    751       MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
    752     if (NumRegs > 3)
    753       MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
    754   }
    755 
    756   if (TableEntry->isUpdating)
    757     MIB.add(MI.getOperand(OpIdx++));
    758 
    759   // Copy the addrmode6 operands.
    760   MIB.add(MI.getOperand(OpIdx++));
    761   MIB.add(MI.getOperand(OpIdx++));
    762   // Copy the am6offset operand.
    763   if (TableEntry->hasWritebackOperand)
    764     MIB.add(MI.getOperand(OpIdx++));
    765 
    766   // Grab the super-register source.
    767   MachineOperand MO = MI.getOperand(OpIdx++);
    768   if (!TableEntry->IsLoad)
    769     GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
    770 
    771   // Add the subregs as sources of the new instruction.
    772   unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
    773                        getKillRegState(MO.isKill()));
    774   MIB.addReg(D0, SrcFlags);
    775   if (NumRegs > 1)
    776     MIB.addReg(D1, SrcFlags);
    777   if (NumRegs > 2)
    778     MIB.addReg(D2, SrcFlags);
    779   if (NumRegs > 3)
    780     MIB.addReg(D3, SrcFlags);
    781 
    782   // Add the lane number operand.
    783   MIB.addImm(Lane);
    784   OpIdx += 1;
    785 
    786   // Copy the predicate operands.
    787   MIB.add(MI.getOperand(OpIdx++));
    788   MIB.add(MI.getOperand(OpIdx++));
    789 
    790   // Copy the super-register source to be an implicit source.
    791   MO.setImplicit(true);
    792   MIB.add(MO);
    793   if (TableEntry->IsLoad)
    794     // Add an implicit def for the super-register.
    795     MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
    796   TransferImpOps(MI, MIB, MIB);
    797   // Transfer memoperands.
    798   MIB.cloneMemRefs(MI);
    799   MI.eraseFromParent();
    800 }
    801 
    802 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
    803 /// register operands to real instructions with D register operands.
    804 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
    805                                  unsigned Opc, bool IsExt) {
    806   MachineInstr &MI = *MBBI;
    807   MachineBasicBlock &MBB = *MI.getParent();
    808   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
    809 
    810   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
    811   unsigned OpIdx = 0;
    812 
    813   // Transfer the destination register operand.
    814   MIB.add(MI.getOperand(OpIdx++));
    815   if (IsExt) {
    816     MachineOperand VdSrc(MI.getOperand(OpIdx++));
    817     MIB.add(VdSrc);
    818   }
    819 
    820   bool SrcIsKill = MI.getOperand(OpIdx).isKill();
    821   Register SrcReg = MI.getOperand(OpIdx++).getReg();
    822   unsigned D0, D1, D2, D3;
    823   GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
    824   MIB.addReg(D0);
    825 
    826   // Copy the other source register operand.
    827   MachineOperand VmSrc(MI.getOperand(OpIdx++));
    828   MIB.add(VmSrc);
    829 
    830   // Copy the predicate operands.
    831   MIB.add(MI.getOperand(OpIdx++));
    832   MIB.add(MI.getOperand(OpIdx++));
    833 
    834   // Add an implicit kill and use for the super-reg.
    835   MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
    836   TransferImpOps(MI, MIB, MIB);
    837   MI.eraseFromParent();
    838   LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
    839 }
    840 
    841 static bool IsAnAddressOperand(const MachineOperand &MO) {
    842   // This check is overly conservative.  Unless we are certain that the machine
    843   // operand is not a symbol reference, we return that it is a symbol reference.
    844   // This is important as the load pair may not be split up Windows.
    845   switch (MO.getType()) {
    846   case MachineOperand::MO_Register:
    847   case MachineOperand::MO_Immediate:
    848   case MachineOperand::MO_CImmediate:
    849   case MachineOperand::MO_FPImmediate:
    850   case MachineOperand::MO_ShuffleMask:
    851     return false;
    852   case MachineOperand::MO_MachineBasicBlock:
    853     return true;
    854   case MachineOperand::MO_FrameIndex:
    855     return false;
    856   case MachineOperand::MO_ConstantPoolIndex:
    857   case MachineOperand::MO_TargetIndex:
    858   case MachineOperand::MO_JumpTableIndex:
    859   case MachineOperand::MO_ExternalSymbol:
    860   case MachineOperand::MO_GlobalAddress:
    861   case MachineOperand::MO_BlockAddress:
    862     return true;
    863   case MachineOperand::MO_RegisterMask:
    864   case MachineOperand::MO_RegisterLiveOut:
    865     return false;
    866   case MachineOperand::MO_Metadata:
    867   case MachineOperand::MO_MCSymbol:
    868     return true;
    869   case MachineOperand::MO_CFIIndex:
    870     return false;
    871   case MachineOperand::MO_IntrinsicID:
    872   case MachineOperand::MO_Predicate:
    873     llvm_unreachable("should not exist post-isel");
    874   }
    875   llvm_unreachable("unhandled machine operand type");
    876 }
    877 
    878 static MachineOperand makeImplicit(const MachineOperand &MO) {
    879   MachineOperand NewMO = MO;
    880   NewMO.setImplicit();
    881   return NewMO;
    882 }
    883 
    884 void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
    885                                         MachineBasicBlock::iterator &MBBI) {
    886   MachineInstr &MI = *MBBI;
    887   unsigned Opcode = MI.getOpcode();
    888   Register PredReg;
    889   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
    890   Register DstReg = MI.getOperand(0).getReg();
    891   bool DstIsDead = MI.getOperand(0).isDead();
    892   bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
    893   const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
    894   bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
    895   MachineInstrBuilder LO16, HI16;
    896   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
    897 
    898   if (!STI->hasV6T2Ops() &&
    899       (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
    900     // FIXME Windows CE supports older ARM CPUs
    901     assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
    902 
    903     assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
    904     unsigned ImmVal = (unsigned)MO.getImm();
    905     unsigned SOImmValV1 = 0, SOImmValV2 = 0;
    906 
    907     if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
    908       LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
    909       HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
    910           .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
    911           .addReg(DstReg);
    912       SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
    913       SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
    914     } else { // Expand into a mvn + sub.
    915       LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
    916       HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
    917           .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
    918           .addReg(DstReg);
    919       SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
    920       SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
    921       SOImmValV1 = ~(-SOImmValV1);
    922     }
    923 
    924     unsigned MIFlags = MI.getFlags();
    925     LO16 = LO16.addImm(SOImmValV1);
    926     HI16 = HI16.addImm(SOImmValV2);
    927     LO16.cloneMemRefs(MI);
    928     HI16.cloneMemRefs(MI);
    929     LO16.setMIFlags(MIFlags);
    930     HI16.setMIFlags(MIFlags);
    931     LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
    932     HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
    933     if (isCC)
    934       LO16.add(makeImplicit(MI.getOperand(1)));
    935     TransferImpOps(MI, LO16, HI16);
    936     MI.eraseFromParent();
    937     return;
    938   }
    939 
    940   unsigned LO16Opc = 0;
    941   unsigned HI16Opc = 0;
    942   unsigned MIFlags = MI.getFlags();
    943   if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
    944     LO16Opc = ARM::t2MOVi16;
    945     HI16Opc = ARM::t2MOVTi16;
    946   } else {
    947     LO16Opc = ARM::MOVi16;
    948     HI16Opc = ARM::MOVTi16;
    949   }
    950 
    951   LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
    952   HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
    953     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
    954     .addReg(DstReg);
    955 
    956   LO16.setMIFlags(MIFlags);
    957   HI16.setMIFlags(MIFlags);
    958 
    959   switch (MO.getType()) {
    960   case MachineOperand::MO_Immediate: {
    961     unsigned Imm = MO.getImm();
    962     unsigned Lo16 = Imm & 0xffff;
    963     unsigned Hi16 = (Imm >> 16) & 0xffff;
    964     LO16 = LO16.addImm(Lo16);
    965     HI16 = HI16.addImm(Hi16);
    966     break;
    967   }
    968   case MachineOperand::MO_ExternalSymbol: {
    969     const char *ES = MO.getSymbolName();
    970     unsigned TF = MO.getTargetFlags();
    971     LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16);
    972     HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16);
    973     break;
    974   }
    975   default: {
    976     const GlobalValue *GV = MO.getGlobal();
    977     unsigned TF = MO.getTargetFlags();
    978     LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
    979     HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
    980     break;
    981   }
    982   }
    983 
    984   LO16.cloneMemRefs(MI);
    985   HI16.cloneMemRefs(MI);
    986   LO16.addImm(Pred).addReg(PredReg);
    987   HI16.addImm(Pred).addReg(PredReg);
    988 
    989   if (RequiresBundling)
    990     finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
    991 
    992   if (isCC)
    993     LO16.add(makeImplicit(MI.getOperand(1)));
    994   TransferImpOps(MI, LO16, HI16);
    995   MI.eraseFromParent();
    996   LLVM_DEBUG(dbgs() << "To:        "; LO16.getInstr()->dump(););
    997   LLVM_DEBUG(dbgs() << "And:       "; HI16.getInstr()->dump(););
    998 }
    999 
   1000 // The size of the area, accessed by that VLSTM/VLLDM
   1001 // S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad)
   1002 static const int CMSE_FP_SAVE_SIZE = 136;
   1003 
   1004 static void determineGPRegsToClear(const MachineInstr &MI,
   1005                                    const std::initializer_list<unsigned> &Regs,
   1006                                    SmallVectorImpl<unsigned> &ClearRegs) {
   1007   SmallVector<unsigned, 4> OpRegs;
   1008   for (const MachineOperand &Op : MI.operands()) {
   1009     if (!Op.isReg() || !Op.isUse())
   1010       continue;
   1011     OpRegs.push_back(Op.getReg());
   1012   }
   1013   llvm::sort(OpRegs);
   1014 
   1015   std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(),
   1016                       std::back_inserter(ClearRegs));
   1017 }
   1018 
   1019 void ARMExpandPseudo::CMSEClearGPRegs(
   1020     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
   1021     const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs,
   1022     unsigned ClobberReg) {
   1023 
   1024   if (STI->hasV8_1MMainlineOps()) {
   1025     // Clear the registers using the CLRM instruction.
   1026     MachineInstrBuilder CLRM =
   1027         BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL));
   1028     for (unsigned R : ClearRegs)
   1029       CLRM.addReg(R, RegState::Define);
   1030     CLRM.addReg(ARM::APSR, RegState::Define);
   1031     CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
   1032   } else {
   1033     // Clear the registers and flags by copying ClobberReg into them.
   1034     // (Baseline can't do a high register clear in one instruction).
   1035     for (unsigned Reg : ClearRegs) {
   1036       if (Reg == ClobberReg)
   1037         continue;
   1038       BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg)
   1039           .addReg(ClobberReg)
   1040           .add(predOps(ARMCC::AL));
   1041     }
   1042 
   1043     BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M))
   1044         .addImm(STI->hasDSP() ? 0xc00 : 0x800)
   1045         .addReg(ClobberReg)
   1046         .add(predOps(ARMCC::AL));
   1047   }
   1048 }
   1049 
   1050 // Find which FP registers need to be cleared.  The parameter `ClearRegs` is
   1051 // initialised with all elements set to true, and this function resets all the
   1052 // bits, which correspond to register uses. Returns true if any floating point
   1053 // register is defined, false otherwise.
   1054 static bool determineFPRegsToClear(const MachineInstr &MI,
   1055                                    BitVector &ClearRegs) {
   1056   bool DefFP = false;
   1057   for (const MachineOperand &Op : MI.operands()) {
   1058     if (!Op.isReg())
   1059       continue;
   1060 
   1061     unsigned Reg = Op.getReg();
   1062     if (Op.isDef()) {
   1063       if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
   1064           (Reg >= ARM::D0 && Reg <= ARM::D15) ||
   1065           (Reg >= ARM::S0 && Reg <= ARM::S31))
   1066         DefFP = true;
   1067       continue;
   1068     }
   1069 
   1070     if (Reg >= ARM::Q0 && Reg <= ARM::Q7) {
   1071       int R = Reg - ARM::Q0;
   1072       ClearRegs.reset(R * 4, (R + 1) * 4);
   1073     } else if (Reg >= ARM::D0 && Reg <= ARM::D15) {
   1074       int R = Reg - ARM::D0;
   1075       ClearRegs.reset(R * 2, (R + 1) * 2);
   1076     } else if (Reg >= ARM::S0 && Reg <= ARM::S31) {
   1077       ClearRegs[Reg - ARM::S0] = false;
   1078     }
   1079   }
   1080   return DefFP;
   1081 }
   1082 
   1083 MachineBasicBlock &
   1084 ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB,
   1085                                  MachineBasicBlock::iterator MBBI) {
   1086   BitVector ClearRegs(16, true);
   1087   (void)determineFPRegsToClear(*MBBI, ClearRegs);
   1088 
   1089   if (STI->hasV8_1MMainlineOps())
   1090     return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
   1091   else
   1092     return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs);
   1093 }
   1094 
   1095 // Clear the FP registers for v8.0-M, by copying over the content
   1096 // of LR. Uses R12 as a scratch register.
   1097 MachineBasicBlock &
   1098 ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB,
   1099                                    MachineBasicBlock::iterator MBBI,
   1100                                    const BitVector &ClearRegs) {
   1101   if (!STI->hasFPRegs())
   1102     return MBB;
   1103 
   1104   auto &RetI = *MBBI;
   1105   const DebugLoc &DL = RetI.getDebugLoc();
   1106 
   1107   // If optimising for minimum size, clear FP registers unconditionally.
   1108   // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and
   1109   // don't clear them if they belong to the non-secure state.
   1110   MachineBasicBlock *ClearBB, *DoneBB;
   1111   if (STI->hasMinSize()) {
   1112     ClearBB = DoneBB = &MBB;
   1113   } else {
   1114     MachineFunction *MF = MBB.getParent();
   1115     ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
   1116     DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
   1117 
   1118     MF->insert(++MBB.getIterator(), ClearBB);
   1119     MF->insert(++ClearBB->getIterator(), DoneBB);
   1120 
   1121     DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end());
   1122     DoneBB->transferSuccessors(&MBB);
   1123     MBB.addSuccessor(ClearBB);
   1124     MBB.addSuccessor(DoneBB);
   1125     ClearBB->addSuccessor(DoneBB);
   1126 
   1127     // At the new basic blocks we need to have live-in the registers, used
   1128     // for the return value as well as LR, used to clear registers.
   1129     for (const MachineOperand &Op : RetI.operands()) {
   1130       if (!Op.isReg())
   1131         continue;
   1132       Register Reg = Op.getReg();
   1133       if (Reg == ARM::NoRegister || Reg == ARM::LR)
   1134         continue;
   1135       assert(Register::isPhysicalRegister(Reg) && "Unallocated register");
   1136       ClearBB->addLiveIn(Reg);
   1137       DoneBB->addLiveIn(Reg);
   1138     }
   1139     ClearBB->addLiveIn(ARM::LR);
   1140     DoneBB->addLiveIn(ARM::LR);
   1141 
   1142     // Read the CONTROL register.
   1143     BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12)
   1144         .addImm(20)
   1145         .add(predOps(ARMCC::AL));
   1146     // Check bit 3 (SFPA).
   1147     BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri))
   1148         .addReg(ARM::R12)
   1149         .addImm(8)
   1150         .add(predOps(ARMCC::AL));
   1151     // If SFPA is clear, jump over ClearBB to DoneBB.
   1152     BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc))
   1153         .addMBB(DoneBB)
   1154         .addImm(ARMCC::EQ)
   1155         .addReg(ARM::CPSR, RegState::Kill);
   1156   }
   1157 
   1158   // Emit the clearing sequence
   1159   for (unsigned D = 0; D < 8; D++) {
   1160     // Attempt to clear as double
   1161     if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) {
   1162       unsigned Reg = ARM::D0 + D;
   1163       BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg)
   1164           .addReg(ARM::LR)
   1165           .addReg(ARM::LR)
   1166           .add(predOps(ARMCC::AL));
   1167     } else {
   1168       // Clear first part as single
   1169       if (ClearRegs[D * 2 + 0]) {
   1170         unsigned Reg = ARM::S0 + D * 2;
   1171         BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
   1172             .addReg(ARM::LR)
   1173             .add(predOps(ARMCC::AL));
   1174       }
   1175       // Clear second part as single
   1176       if (ClearRegs[D * 2 + 1]) {
   1177         unsigned Reg = ARM::S0 + D * 2 + 1;
   1178         BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
   1179             .addReg(ARM::LR)
   1180             .add(predOps(ARMCC::AL));
   1181       }
   1182     }
   1183   }
   1184 
   1185   // Clear FPSCR bits 0-4, 7, 28-31
   1186   // The other bits are program global according to the AAPCS
   1187   BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12)
   1188       .add(predOps(ARMCC::AL));
   1189   BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
   1190       .addReg(ARM::R12)
   1191       .addImm(0x0000009F)
   1192       .add(predOps(ARMCC::AL))
   1193       .add(condCodeOp());
   1194   BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
   1195       .addReg(ARM::R12)
   1196       .addImm(0xF0000000)
   1197       .add(predOps(ARMCC::AL))
   1198       .add(condCodeOp());
   1199   BuildMI(ClearBB, DL, TII->get(ARM::VMSR))
   1200       .addReg(ARM::R12)
   1201       .add(predOps(ARMCC::AL));
   1202 
   1203   return *DoneBB;
   1204 }
   1205 
   1206 MachineBasicBlock &
   1207 ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
   1208                                     MachineBasicBlock::iterator MBBI,
   1209                                     const BitVector &ClearRegs) {
   1210   auto &RetI = *MBBI;
   1211 
   1212   // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for
   1213   // each contiguous sequence of S-registers.
   1214   int Start = -1, End = -1;
   1215   for (int S = 0, E = ClearRegs.size(); S != E; ++S) {
   1216     if (ClearRegs[S] && S == End + 1) {
   1217       End = S; // extend range
   1218       continue;
   1219     }
   1220     // Emit current range.
   1221     if (Start < End) {
   1222       MachineInstrBuilder VSCCLRM =
   1223           BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
   1224               .add(predOps(ARMCC::AL));
   1225       while (++Start <= End)
   1226         VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
   1227       VSCCLRM.addReg(ARM::VPR, RegState::Define);
   1228     }
   1229     Start = End = S;
   1230   }
   1231   // Emit last range.
   1232   if (Start < End) {
   1233     MachineInstrBuilder VSCCLRM =
   1234         BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
   1235             .add(predOps(ARMCC::AL));
   1236     while (++Start <= End)
   1237       VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
   1238     VSCCLRM.addReg(ARM::VPR, RegState::Define);
   1239   }
   1240 
   1241   return MBB;
   1242 }
   1243 
   1244 void ARMExpandPseudo::CMSESaveClearFPRegs(
   1245     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
   1246     const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
   1247   if (STI->hasV8_1MMainlineOps())
   1248     CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
   1249   else
   1250     CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
   1251 }
   1252 
   1253 // Save and clear FP registers if present
   1254 void ARMExpandPseudo::CMSESaveClearFPRegsV8(
   1255     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
   1256     const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
   1257   if (!STI->hasFPRegs())
   1258     return;
   1259 
   1260   // Store an available register for FPSCR clearing
   1261   assert(!ScratchRegs.empty());
   1262   unsigned SpareReg = ScratchRegs.front();
   1263 
   1264   // save space on stack for VLSTM
   1265   BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
   1266       .addReg(ARM::SP)
   1267       .addImm(CMSE_FP_SAVE_SIZE >> 2)
   1268       .add(predOps(ARMCC::AL));
   1269 
   1270   // Use ScratchRegs to store the fp regs
   1271   std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
   1272   std::vector<unsigned> NonclearedFPRegs;
   1273   for (const MachineOperand &Op : MBBI->operands()) {
   1274     if (Op.isReg() && Op.isUse()) {
   1275       unsigned Reg = Op.getReg();
   1276       assert(!ARM::DPRRegClass.contains(Reg) ||
   1277              ARM::DPR_VFP2RegClass.contains(Reg));
   1278       assert(!ARM::QPRRegClass.contains(Reg));
   1279       if (ARM::DPR_VFP2RegClass.contains(Reg)) {
   1280         if (ScratchRegs.size() >= 2) {
   1281           unsigned SaveReg2 = ScratchRegs.pop_back_val();
   1282           unsigned SaveReg1 = ScratchRegs.pop_back_val();
   1283           ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
   1284 
   1285           // Save the fp register to the normal registers
   1286           BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
   1287               .addReg(SaveReg1, RegState::Define)
   1288               .addReg(SaveReg2, RegState::Define)
   1289               .addReg(Reg)
   1290               .add(predOps(ARMCC::AL));
   1291         } else {
   1292           NonclearedFPRegs.push_back(Reg);
   1293         }
   1294       } else if (ARM::SPRRegClass.contains(Reg)) {
   1295         if (ScratchRegs.size() >= 1) {
   1296           unsigned SaveReg = ScratchRegs.pop_back_val();
   1297           ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
   1298 
   1299           // Save the fp register to the normal registers
   1300           BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
   1301               .addReg(Reg)
   1302               .add(predOps(ARMCC::AL));
   1303         } else {
   1304           NonclearedFPRegs.push_back(Reg);
   1305         }
   1306       }
   1307     }
   1308   }
   1309 
   1310   bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
   1311 
   1312   // Lazy store all fp registers to the stack
   1313   MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
   1314                                   .addReg(ARM::SP)
   1315                                   .add(predOps(ARMCC::AL));
   1316   for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
   1317                  ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
   1318     VLSTM.addReg(R, RegState::Implicit |
   1319                         (LiveRegs.contains(R) ? 0 : RegState::Undef));
   1320 
   1321   // Restore all arguments
   1322   for (const auto &Regs : ClearedFPRegs) {
   1323     unsigned Reg, SaveReg1, SaveReg2;
   1324     std::tie(Reg, SaveReg1, SaveReg2) = Regs;
   1325     if (ARM::DPR_VFP2RegClass.contains(Reg))
   1326       BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
   1327           .addReg(SaveReg1)
   1328           .addReg(SaveReg2)
   1329           .add(predOps(ARMCC::AL));
   1330     else if (ARM::SPRRegClass.contains(Reg))
   1331       BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
   1332           .addReg(SaveReg1)
   1333           .add(predOps(ARMCC::AL));
   1334   }
   1335 
   1336   for (unsigned Reg : NonclearedFPRegs) {
   1337     if (ARM::DPR_VFP2RegClass.contains(Reg)) {
   1338       if (STI->isLittle()) {
   1339         BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg)
   1340             .addReg(ARM::SP)
   1341             .addImm((Reg - ARM::D0) * 2)
   1342             .add(predOps(ARMCC::AL));
   1343       } else {
   1344         // For big-endian targets we need to load the two subregisters of Reg
   1345         // manually because VLDRD would load them in wrong order
   1346         unsigned SReg0 = TRI->getSubReg(Reg, ARM::ssub_0);
   1347         BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0)
   1348             .addReg(ARM::SP)
   1349             .addImm((Reg - ARM::D0) * 2)
   1350             .add(predOps(ARMCC::AL));
   1351         BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1)
   1352             .addReg(ARM::SP)
   1353             .addImm((Reg - ARM::D0) * 2 + 1)
   1354             .add(predOps(ARMCC::AL));
   1355       }
   1356     } else if (ARM::SPRRegClass.contains(Reg)) {
   1357       BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg)
   1358           .addReg(ARM::SP)
   1359           .addImm(Reg - ARM::S0)
   1360           .add(predOps(ARMCC::AL));
   1361     }
   1362   }
   1363   // restore FPSCR from stack and clear bits 0-4, 7, 28-31
   1364   // The other bits are program global according to the AAPCS
   1365   if (passesFPReg) {
   1366     BuildMI(MBB, MBBI, DL, TII->get(ARM::t2LDRi8), SpareReg)
   1367         .addReg(ARM::SP)
   1368         .addImm(0x40)
   1369         .add(predOps(ARMCC::AL));
   1370     BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
   1371         .addReg(SpareReg)
   1372         .addImm(0x0000009F)
   1373         .add(predOps(ARMCC::AL))
   1374         .add(condCodeOp());
   1375     BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
   1376         .addReg(SpareReg)
   1377         .addImm(0xF0000000)
   1378         .add(predOps(ARMCC::AL))
   1379         .add(condCodeOp());
   1380     BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR))
   1381         .addReg(SpareReg)
   1382         .add(predOps(ARMCC::AL));
   1383     // The ldr must happen after a floating point instruction. To prevent the
   1384     // post-ra scheduler to mess with the order, we create a bundle.
   1385     finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator());
   1386   }
   1387 }
   1388 
   1389 void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
   1390                                              MachineBasicBlock::iterator MBBI,
   1391                                              DebugLoc &DL,
   1392                                              const LivePhysRegs &LiveRegs) {
   1393   BitVector ClearRegs(32, true);
   1394   bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
   1395 
   1396   // If the instruction does not write to a FP register and no elements were
   1397   // removed from the set, then no FP registers were used to pass
   1398   // arguments/returns.
   1399   if (!DefFP && ClearRegs.count() == ClearRegs.size()) {
   1400     // save space on stack for VLSTM
   1401     BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
   1402         .addReg(ARM::SP)
   1403         .addImm(CMSE_FP_SAVE_SIZE >> 2)
   1404         .add(predOps(ARMCC::AL));
   1405 
   1406     // Lazy store all FP registers to the stack
   1407     MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
   1408                                     .addReg(ARM::SP)
   1409                                     .add(predOps(ARMCC::AL));
   1410     for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
   1411                    ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
   1412       VLSTM.addReg(R, RegState::Implicit |
   1413                           (LiveRegs.contains(R) ? 0 : RegState::Undef));
   1414   } else {
   1415     // Push all the callee-saved registers (s16-s31).
   1416     MachineInstrBuilder VPUSH =
   1417         BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP)
   1418             .addReg(ARM::SP)
   1419             .add(predOps(ARMCC::AL));
   1420     for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
   1421       VPUSH.addReg(Reg);
   1422 
   1423     // Clear FP registers with a VSCCLRM.
   1424     (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
   1425 
   1426     // Save floating-point context.
   1427     BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP)
   1428         .addReg(ARM::SP)
   1429         .addImm(-8)
   1430         .add(predOps(ARMCC::AL));
   1431   }
   1432 }
   1433 
   1434 // Restore FP registers if present
   1435 void ARMExpandPseudo::CMSERestoreFPRegs(
   1436     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
   1437     SmallVectorImpl<unsigned> &AvailableRegs) {
   1438   if (STI->hasV8_1MMainlineOps())
   1439     CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs);
   1440   else
   1441     CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs);
   1442 }
   1443 
   1444 void ARMExpandPseudo::CMSERestoreFPRegsV8(
   1445     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
   1446     SmallVectorImpl<unsigned> &AvailableRegs) {
   1447   if (!STI->hasFPRegs())
   1448     return;
   1449 
   1450   // Use AvailableRegs to store the fp regs
   1451   std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
   1452   std::vector<unsigned> NonclearedFPRegs;
   1453   for (const MachineOperand &Op : MBBI->operands()) {
   1454     if (Op.isReg() && Op.isDef()) {
   1455       unsigned Reg = Op.getReg();
   1456       assert(!ARM::DPRRegClass.contains(Reg) ||
   1457              ARM::DPR_VFP2RegClass.contains(Reg));
   1458       assert(!ARM::QPRRegClass.contains(Reg));
   1459       if (ARM::DPR_VFP2RegClass.contains(Reg)) {
   1460         if (AvailableRegs.size() >= 2) {
   1461           unsigned SaveReg2 = AvailableRegs.pop_back_val();
   1462           unsigned SaveReg1 = AvailableRegs.pop_back_val();
   1463           ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
   1464 
   1465           // Save the fp register to the normal registers
   1466           BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
   1467               .addReg(SaveReg1, RegState::Define)
   1468               .addReg(SaveReg2, RegState::Define)
   1469               .addReg(Reg)
   1470               .add(predOps(ARMCC::AL));
   1471         } else {
   1472           NonclearedFPRegs.push_back(Reg);
   1473         }
   1474       } else if (ARM::SPRRegClass.contains(Reg)) {
   1475         if (AvailableRegs.size() >= 1) {
   1476           unsigned SaveReg = AvailableRegs.pop_back_val();
   1477           ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
   1478 
   1479           // Save the fp register to the normal registers
   1480           BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
   1481               .addReg(Reg)
   1482               .add(predOps(ARMCC::AL));
   1483         } else {
   1484           NonclearedFPRegs.push_back(Reg);
   1485         }
   1486       }
   1487     }
   1488   }
   1489 
   1490   // Push FP regs that cannot be restored via normal registers on the stack
   1491   for (unsigned Reg : NonclearedFPRegs) {
   1492     if (ARM::DPR_VFP2RegClass.contains(Reg))
   1493       BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD), Reg)
   1494           .addReg(ARM::SP)
   1495           .addImm((Reg - ARM::D0) * 2)
   1496           .add(predOps(ARMCC::AL));
   1497     else if (ARM::SPRRegClass.contains(Reg))
   1498       BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS), Reg)
   1499           .addReg(ARM::SP)
   1500           .addImm(Reg - ARM::S0)
   1501           .add(predOps(ARMCC::AL));
   1502   }
   1503 
   1504   // Lazy load fp regs from stack
   1505   BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
   1506       .addReg(ARM::SP)
   1507       .add(predOps(ARMCC::AL));
   1508 
   1509   // Restore all FP registers via normal registers
   1510   for (const auto &Regs : ClearedFPRegs) {
   1511     unsigned Reg, SaveReg1, SaveReg2;
   1512     std::tie(Reg, SaveReg1, SaveReg2) = Regs;
   1513     if (ARM::DPR_VFP2RegClass.contains(Reg))
   1514       BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
   1515           .addReg(SaveReg1)
   1516           .addReg(SaveReg2)
   1517           .add(predOps(ARMCC::AL));
   1518     else if (ARM::SPRRegClass.contains(Reg))
   1519       BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
   1520           .addReg(SaveReg1)
   1521           .add(predOps(ARMCC::AL));
   1522   }
   1523 
   1524   // Pop the stack space
   1525   BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
   1526       .addReg(ARM::SP)
   1527       .addImm(CMSE_FP_SAVE_SIZE >> 2)
   1528       .add(predOps(ARMCC::AL));
   1529 }
   1530 
   1531 static bool definesOrUsesFPReg(const MachineInstr &MI) {
   1532   for (const MachineOperand &Op : MI.operands()) {
   1533     if (!Op.isReg())
   1534       continue;
   1535     unsigned Reg = Op.getReg();
   1536     if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
   1537         (Reg >= ARM::D0 && Reg <= ARM::D15) ||
   1538         (Reg >= ARM::S0 && Reg <= ARM::S31))
   1539       return true;
   1540   }
   1541   return false;
   1542 }
   1543 
   1544 void ARMExpandPseudo::CMSERestoreFPRegsV81(
   1545     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
   1546     SmallVectorImpl<unsigned> &AvailableRegs) {
   1547   if (!definesOrUsesFPReg(*MBBI)) {
   1548     // Load FP registers from stack.
   1549     BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
   1550         .addReg(ARM::SP)
   1551         .add(predOps(ARMCC::AL));
   1552 
   1553     // Pop the stack space
   1554     BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
   1555         .addReg(ARM::SP)
   1556         .addImm(CMSE_FP_SAVE_SIZE >> 2)
   1557         .add(predOps(ARMCC::AL));
   1558   } else {
   1559     // Restore the floating point context.
   1560     BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post),
   1561             ARM::SP)
   1562         .addReg(ARM::SP)
   1563         .addImm(8)
   1564         .add(predOps(ARMCC::AL));
   1565 
   1566     // Pop all the callee-saved registers (s16-s31).
   1567     MachineInstrBuilder VPOP =
   1568         BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP)
   1569             .addReg(ARM::SP)
   1570             .add(predOps(ARMCC::AL));
   1571     for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
   1572       VPOP.addReg(Reg, RegState::Define);
   1573   }
   1574 }
   1575 
   1576 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
   1577 /// possible. This only gets used at -O0 so we don't care about efficiency of
   1578 /// the generated code.
   1579 bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
   1580                                      MachineBasicBlock::iterator MBBI,
   1581                                      unsigned LdrexOp, unsigned StrexOp,
   1582                                      unsigned UxtOp,
   1583                                      MachineBasicBlock::iterator &NextMBBI) {
   1584   bool IsThumb = STI->isThumb();
   1585   MachineInstr &MI = *MBBI;
   1586   DebugLoc DL = MI.getDebugLoc();
   1587   const MachineOperand &Dest = MI.getOperand(0);
   1588   Register TempReg = MI.getOperand(1).getReg();
   1589   // Duplicating undef operands into 2 instructions does not guarantee the same
   1590   // value on both; However undef should be replaced by xzr anyway.
   1591   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
   1592   Register AddrReg = MI.getOperand(2).getReg();
   1593   Register DesiredReg = MI.getOperand(3).getReg();
   1594   Register NewReg = MI.getOperand(4).getReg();
   1595 
   1596   if (IsThumb) {
   1597     assert(STI->hasV8MBaselineOps() &&
   1598            "CMP_SWAP not expected to be custom expanded for Thumb1");
   1599     assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) &&
   1600            "ARMv8-M.baseline does not have t2UXTB/t2UXTH");
   1601     assert(ARM::tGPRRegClass.contains(DesiredReg) &&
   1602            "DesiredReg used for UXT op must be tGPR");
   1603   }
   1604 
   1605   MachineFunction *MF = MBB.getParent();
   1606   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
   1607   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
   1608   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
   1609 
   1610   MF->insert(++MBB.getIterator(), LoadCmpBB);
   1611   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
   1612   MF->insert(++StoreBB->getIterator(), DoneBB);
   1613 
   1614   if (UxtOp) {
   1615     MachineInstrBuilder MIB =
   1616         BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg)
   1617             .addReg(DesiredReg, RegState::Kill);
   1618     if (!IsThumb)
   1619       MIB.addImm(0);
   1620     MIB.add(predOps(ARMCC::AL));
   1621   }
   1622 
   1623   // .Lloadcmp:
   1624   //     ldrex rDest, [rAddr]
   1625   //     cmp rDest, rDesired
   1626   //     bne .Ldone
   1627 
   1628   MachineInstrBuilder MIB;
   1629   MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
   1630   MIB.addReg(AddrReg);
   1631   if (LdrexOp == ARM::t2LDREX)
   1632     MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
   1633   MIB.add(predOps(ARMCC::AL));
   1634 
   1635   unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
   1636   BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
   1637       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
   1638       .addReg(DesiredReg)
   1639       .add(predOps(ARMCC::AL));
   1640   unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
   1641   BuildMI(LoadCmpBB, DL, TII->get(Bcc))
   1642       .addMBB(DoneBB)
   1643       .addImm(ARMCC::NE)
   1644       .addReg(ARM::CPSR, RegState::Kill);
   1645   LoadCmpBB->addSuccessor(DoneBB);
   1646   LoadCmpBB->addSuccessor(StoreBB);
   1647 
   1648   // .Lstore:
   1649   //     strex rTempReg, rNew, [rAddr]
   1650   //     cmp rTempReg, #0
   1651   //     bne .Lloadcmp
   1652   MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg)
   1653     .addReg(NewReg)
   1654     .addReg(AddrReg);
   1655   if (StrexOp == ARM::t2STREX)
   1656     MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
   1657   MIB.add(predOps(ARMCC::AL));
   1658 
   1659   unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
   1660   BuildMI(StoreBB, DL, TII->get(CMPri))
   1661       .addReg(TempReg, RegState::Kill)
   1662       .addImm(0)
   1663       .add(predOps(ARMCC::AL));
   1664   BuildMI(StoreBB, DL, TII->get(Bcc))
   1665       .addMBB(LoadCmpBB)
   1666       .addImm(ARMCC::NE)
   1667       .addReg(ARM::CPSR, RegState::Kill);
   1668   StoreBB->addSuccessor(LoadCmpBB);
   1669   StoreBB->addSuccessor(DoneBB);
   1670 
   1671   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
   1672   DoneBB->transferSuccessors(&MBB);
   1673 
   1674   MBB.addSuccessor(LoadCmpBB);
   1675 
   1676   NextMBBI = MBB.end();
   1677   MI.eraseFromParent();
   1678 
   1679   // Recompute livein lists.
   1680   LivePhysRegs LiveRegs;
   1681   computeAndAddLiveIns(LiveRegs, *DoneBB);
   1682   computeAndAddLiveIns(LiveRegs, *StoreBB);
   1683   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
   1684   // Do an extra pass around the loop to get loop carried registers right.
   1685   StoreBB->clearLiveIns();
   1686   computeAndAddLiveIns(LiveRegs, *StoreBB);
   1687   LoadCmpBB->clearLiveIns();
   1688   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
   1689 
   1690   return true;
   1691 }
   1692 
   1693 /// ARM's ldrexd/strexd take a consecutive register pair (represented as a
   1694 /// single GPRPair register), Thumb's take two separate registers so we need to
   1695 /// extract the subregs from the pair.
   1696 static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
   1697                                 unsigned Flags, bool IsThumb,
   1698                                 const TargetRegisterInfo *TRI) {
   1699   if (IsThumb) {
   1700     Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
   1701     Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
   1702     MIB.addReg(RegLo, Flags);
   1703     MIB.addReg(RegHi, Flags);
   1704   } else
   1705     MIB.addReg(Reg.getReg(), Flags);
   1706 }
   1707 
   1708 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
   1709 bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
   1710                                         MachineBasicBlock::iterator MBBI,
   1711                                         MachineBasicBlock::iterator &NextMBBI) {
   1712   bool IsThumb = STI->isThumb();
   1713   MachineInstr &MI = *MBBI;
   1714   DebugLoc DL = MI.getDebugLoc();
   1715   MachineOperand &Dest = MI.getOperand(0);
   1716   Register TempReg = MI.getOperand(1).getReg();
   1717   // Duplicating undef operands into 2 instructions does not guarantee the same
   1718   // value on both; However undef should be replaced by xzr anyway.
   1719   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
   1720   Register AddrReg = MI.getOperand(2).getReg();
   1721   Register DesiredReg = MI.getOperand(3).getReg();
   1722   MachineOperand New = MI.getOperand(4);
   1723   New.setIsKill(false);
   1724 
   1725   Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
   1726   Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
   1727   Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0);
   1728   Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1);
   1729 
   1730   MachineFunction *MF = MBB.getParent();
   1731   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
   1732   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
   1733   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
   1734 
   1735   MF->insert(++MBB.getIterator(), LoadCmpBB);
   1736   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
   1737   MF->insert(++StoreBB->getIterator(), DoneBB);
   1738 
   1739   // .Lloadcmp:
   1740   //     ldrexd rDestLo, rDestHi, [rAddr]
   1741   //     cmp rDestLo, rDesiredLo
   1742   //     sbcs dead rTempReg, rDestHi, rDesiredHi
   1743   //     bne .Ldone
   1744   unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
   1745   MachineInstrBuilder MIB;
   1746   MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
   1747   addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
   1748   MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
   1749 
   1750   unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
   1751   BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
   1752       .addReg(DestLo, getKillRegState(Dest.isDead()))
   1753       .addReg(DesiredLo)
   1754       .add(predOps(ARMCC::AL));
   1755 
   1756   BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
   1757       .addReg(DestHi, getKillRegState(Dest.isDead()))
   1758       .addReg(DesiredHi)
   1759       .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill);
   1760 
   1761   unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
   1762   BuildMI(LoadCmpBB, DL, TII->get(Bcc))
   1763       .addMBB(DoneBB)
   1764       .addImm(ARMCC::NE)
   1765       .addReg(ARM::CPSR, RegState::Kill);
   1766   LoadCmpBB->addSuccessor(DoneBB);
   1767   LoadCmpBB->addSuccessor(StoreBB);
   1768 
   1769   // .Lstore:
   1770   //     strexd rTempReg, rNewLo, rNewHi, [rAddr]
   1771   //     cmp rTempReg, #0
   1772   //     bne .Lloadcmp
   1773   unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
   1774   MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
   1775   unsigned Flags = getKillRegState(New.isDead());
   1776   addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI);
   1777   MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
   1778 
   1779   unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
   1780   BuildMI(StoreBB, DL, TII->get(CMPri))
   1781       .addReg(TempReg, RegState::Kill)
   1782       .addImm(0)
   1783       .add(predOps(ARMCC::AL));
   1784   BuildMI(StoreBB, DL, TII->get(Bcc))
   1785       .addMBB(LoadCmpBB)
   1786       .addImm(ARMCC::NE)
   1787       .addReg(ARM::CPSR, RegState::Kill);
   1788   StoreBB->addSuccessor(LoadCmpBB);
   1789   StoreBB->addSuccessor(DoneBB);
   1790 
   1791   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
   1792   DoneBB->transferSuccessors(&MBB);
   1793 
   1794   MBB.addSuccessor(LoadCmpBB);
   1795 
   1796   NextMBBI = MBB.end();
   1797   MI.eraseFromParent();
   1798 
   1799   // Recompute livein lists.
   1800   LivePhysRegs LiveRegs;
   1801   computeAndAddLiveIns(LiveRegs, *DoneBB);
   1802   computeAndAddLiveIns(LiveRegs, *StoreBB);
   1803   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
   1804   // Do an extra pass around the loop to get loop carried registers right.
   1805   StoreBB->clearLiveIns();
   1806   computeAndAddLiveIns(LiveRegs, *StoreBB);
   1807   LoadCmpBB->clearLiveIns();
   1808   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
   1809 
   1810   return true;
   1811 }
   1812 
   1813 static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
   1814                                 MachineBasicBlock &MBB,
   1815                                 MachineBasicBlock::iterator MBBI, int JumpReg,
   1816                                 const LivePhysRegs &LiveRegs, bool Thumb1Only) {
   1817   const DebugLoc &DL = MBBI->getDebugLoc();
   1818   if (Thumb1Only) { // push Lo and Hi regs separately
   1819     MachineInstrBuilder PushMIB =
   1820         BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
   1821     for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
   1822       PushMIB.addReg(
   1823           Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
   1824     }
   1825 
   1826     // Thumb1 can only tPUSH low regs, so we copy the high regs to the low
   1827     // regs that we just saved and push the low regs again, taking care to
   1828     // not clobber JumpReg. If JumpReg is one of the low registers, push first
   1829     // the values of r9-r11, and then r8. That would leave them ordered in
   1830     // memory, and allow us to later pop them with a single instructions.
   1831     // FIXME: Could also use any of r0-r3 that are free (including in the
   1832     // first PUSH above).
   1833     for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) {
   1834       if (JumpReg == LoReg)
   1835         continue;
   1836       BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
   1837           .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
   1838           .add(predOps(ARMCC::AL));
   1839       --HiReg;
   1840     }
   1841     MachineInstrBuilder PushMIB2 =
   1842         BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
   1843     for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
   1844       if (Reg == JumpReg)
   1845         continue;
   1846       PushMIB2.addReg(Reg, RegState::Kill);
   1847     }
   1848 
   1849     // If we couldn't use a low register for temporary storage (because it was
   1850     // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been
   1851     // saved.
   1852     if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
   1853       int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
   1854       BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
   1855           .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
   1856           .add(predOps(ARMCC::AL));
   1857       BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
   1858           .add(predOps(ARMCC::AL))
   1859           .addReg(LoReg, RegState::Kill);
   1860     }
   1861   } else { // push Lo and Hi registers with a single instruction
   1862     MachineInstrBuilder PushMIB =
   1863         BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
   1864             .addReg(ARM::SP)
   1865             .add(predOps(ARMCC::AL));
   1866     for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
   1867       PushMIB.addReg(
   1868           Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
   1869     }
   1870   }
   1871 }
   1872 
   1873 static void CMSEPopCalleeSaves(const TargetInstrInfo &TII,
   1874                                MachineBasicBlock &MBB,
   1875                                MachineBasicBlock::iterator MBBI, int JumpReg,
   1876                                bool Thumb1Only) {
   1877   const DebugLoc &DL = MBBI->getDebugLoc();
   1878   if (Thumb1Only) {
   1879     MachineInstrBuilder PopMIB =
   1880         BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
   1881     for (int R = 0; R < 4; ++R) {
   1882       PopMIB.addReg(ARM::R4 + R, RegState::Define);
   1883       BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R)
   1884           .addReg(ARM::R4 + R, RegState::Kill)
   1885           .add(predOps(ARMCC::AL));
   1886     }
   1887     MachineInstrBuilder PopMIB2 =
   1888         BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
   1889     for (int R = 0; R < 4; ++R)
   1890       PopMIB2.addReg(ARM::R4 + R, RegState::Define);
   1891   } else { // pop Lo and Hi registers with a single instruction
   1892     MachineInstrBuilder PopMIB =
   1893         BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP)
   1894             .addReg(ARM::SP)
   1895             .add(predOps(ARMCC::AL));
   1896     for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
   1897       PopMIB.addReg(Reg, RegState::Define);
   1898   }
   1899 }
   1900 
   1901 bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
   1902                                MachineBasicBlock::iterator MBBI,
   1903                                MachineBasicBlock::iterator &NextMBBI) {
   1904   MachineInstr &MI = *MBBI;
   1905   unsigned Opcode = MI.getOpcode();
   1906   switch (Opcode) {
   1907     default:
   1908       return false;
   1909 
   1910     case ARM::VBSPd:
   1911     case ARM::VBSPq: {
   1912       Register DstReg = MI.getOperand(0).getReg();
   1913       if (DstReg == MI.getOperand(3).getReg()) {
   1914         // Expand to VBIT
   1915         unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
   1916         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
   1917             .add(MI.getOperand(0))
   1918             .add(MI.getOperand(3))
   1919             .add(MI.getOperand(2))
   1920             .add(MI.getOperand(1))
   1921             .addImm(MI.getOperand(4).getImm())
   1922             .add(MI.getOperand(5));
   1923       } else if (DstReg == MI.getOperand(2).getReg()) {
   1924         // Expand to VBIF
   1925         unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
   1926         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
   1927             .add(MI.getOperand(0))
   1928             .add(MI.getOperand(2))
   1929             .add(MI.getOperand(3))
   1930             .add(MI.getOperand(1))
   1931             .addImm(MI.getOperand(4).getImm())
   1932             .add(MI.getOperand(5));
   1933       } else {
   1934         // Expand to VBSL
   1935         unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
   1936         if (DstReg == MI.getOperand(1).getReg()) {
   1937           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
   1938               .add(MI.getOperand(0))
   1939               .add(MI.getOperand(1))
   1940               .add(MI.getOperand(2))
   1941               .add(MI.getOperand(3))
   1942               .addImm(MI.getOperand(4).getImm())
   1943               .add(MI.getOperand(5));
   1944         } else {
   1945           // Use move to satisfy constraints
   1946           unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
   1947           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
   1948               .addReg(DstReg,
   1949                       RegState::Define |
   1950                           getRenamableRegState(MI.getOperand(0).isRenamable()))
   1951               .add(MI.getOperand(1))
   1952               .add(MI.getOperand(1))
   1953               .addImm(MI.getOperand(4).getImm())
   1954               .add(MI.getOperand(5));
   1955           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
   1956               .add(MI.getOperand(0))
   1957               .addReg(DstReg,
   1958                       RegState::Kill |
   1959                           getRenamableRegState(MI.getOperand(0).isRenamable()))
   1960               .add(MI.getOperand(2))
   1961               .add(MI.getOperand(3))
   1962               .addImm(MI.getOperand(4).getImm())
   1963               .add(MI.getOperand(5));
   1964         }
   1965       }
   1966       MI.eraseFromParent();
   1967       return true;
   1968     }
   1969 
   1970     case ARM::TCRETURNdi:
   1971     case ARM::TCRETURNri: {
   1972       MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
   1973       assert(MBBI->isReturn() &&
   1974              "Can only insert epilog into returning blocks");
   1975       unsigned RetOpcode = MBBI->getOpcode();
   1976       DebugLoc dl = MBBI->getDebugLoc();
   1977       const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
   1978           MBB.getParent()->getSubtarget().getInstrInfo());
   1979 
   1980       // Tail call return: adjust the stack pointer and jump to callee.
   1981       MBBI = MBB.getLastNonDebugInstr();
   1982       MachineOperand &JumpTarget = MBBI->getOperand(0);
   1983 
   1984       // Jump to label or value in register.
   1985       if (RetOpcode == ARM::TCRETURNdi) {
   1986         unsigned TCOpcode =
   1987             STI->isThumb()
   1988                 ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
   1989                 : ARM::TAILJMPd;
   1990         MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
   1991         if (JumpTarget.isGlobal())
   1992           MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
   1993                                JumpTarget.getTargetFlags());
   1994         else {
   1995           assert(JumpTarget.isSymbol());
   1996           MIB.addExternalSymbol(JumpTarget.getSymbolName(),
   1997                                 JumpTarget.getTargetFlags());
   1998         }
   1999 
   2000         // Add the default predicate in Thumb mode.
   2001         if (STI->isThumb())
   2002           MIB.add(predOps(ARMCC::AL));
   2003       } else if (RetOpcode == ARM::TCRETURNri) {
   2004         unsigned Opcode =
   2005           STI->isThumb() ? ARM::tTAILJMPr
   2006                          : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4);
   2007         BuildMI(MBB, MBBI, dl,
   2008                 TII.get(Opcode))
   2009             .addReg(JumpTarget.getReg(), RegState::Kill);
   2010       }
   2011 
   2012       auto NewMI = std::prev(MBBI);
   2013       for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
   2014         NewMI->addOperand(MBBI->getOperand(i));
   2015 
   2016 
   2017       // Update call site info and delete the pseudo instruction TCRETURN.
   2018       if (MI.isCandidateForCallSiteEntry())
   2019         MI.getMF()->moveCallSiteInfo(&MI, &*NewMI);
   2020       MBB.erase(MBBI);
   2021 
   2022       MBBI = NewMI;
   2023       return true;
   2024     }
   2025     case ARM::tBXNS_RET: {
   2026       MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI);
   2027 
   2028       if (STI->hasV8_1MMainlineOps()) {
   2029         // Restore the non-secure floating point context.
   2030         BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
   2031                 TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP)
   2032             .addReg(ARM::SP)
   2033             .addImm(4)
   2034             .add(predOps(ARMCC::AL));
   2035       }
   2036 
   2037       // Clear all GPR that are not a use of the return instruction.
   2038       assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) {
   2039         return !Op.isReg() || Op.getReg() != ARM::R12;
   2040       }));
   2041       SmallVector<unsigned, 5> ClearRegs;
   2042       determineGPRegsToClear(
   2043           *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs);
   2044       CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs,
   2045                       ARM::LR);
   2046 
   2047       MachineInstrBuilder NewMI =
   2048           BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(),
   2049                   TII->get(ARM::tBXNS))
   2050               .addReg(ARM::LR)
   2051               .add(predOps(ARMCC::AL));
   2052       for (const MachineOperand &Op : MI.operands())
   2053         NewMI->addOperand(Op);
   2054       MI.eraseFromParent();
   2055       return true;
   2056     }
   2057     case ARM::tBLXNS_CALL: {
   2058       DebugLoc DL = MBBI->getDebugLoc();
   2059       unsigned JumpReg = MBBI->getOperand(0).getReg();
   2060 
   2061       // Figure out which registers are live at the point immediately before the
   2062       // call. When we indiscriminately push a set of registers, the live
   2063       // registers are added as ordinary use operands, whereas dead registers
   2064       // are "undef".
   2065       LivePhysRegs LiveRegs(*TRI);
   2066       LiveRegs.addLiveOuts(MBB);
   2067       for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
   2068         LiveRegs.stepBackward(MI);
   2069       LiveRegs.stepBackward(*MBBI);
   2070 
   2071       CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
   2072                           AFI->isThumb1OnlyFunction());
   2073 
   2074       SmallVector<unsigned, 16> ClearRegs;
   2075       determineGPRegsToClear(*MBBI,
   2076                              {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
   2077                               ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9,
   2078                               ARM::R10, ARM::R11, ARM::R12},
   2079                              ClearRegs);
   2080       auto OriginalClearRegs = ClearRegs;
   2081 
   2082       // Get the first cleared register as a scratch (to use later with tBIC).
   2083       // We need to use the first so we can ensure it is a low register.
   2084       unsigned ScratchReg = ClearRegs.front();
   2085 
   2086       // Clear LSB of JumpReg
   2087       if (AFI->isThumb2Function()) {
   2088         BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg)
   2089             .addReg(JumpReg)
   2090             .addImm(1)
   2091             .add(predOps(ARMCC::AL))
   2092             .add(condCodeOp());
   2093       } else {
   2094         // We need to use an extra register to cope with 8M Baseline,
   2095         // since we have saved all of the registers we are ok to trash a non
   2096         // argument register here.
   2097         BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg)
   2098             .add(condCodeOp())
   2099             .addImm(1)
   2100             .add(predOps(ARMCC::AL));
   2101         BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg)
   2102             .addReg(ARM::CPSR, RegState::Define)
   2103             .addReg(JumpReg)
   2104             .addReg(ScratchReg)
   2105             .add(predOps(ARMCC::AL));
   2106       }
   2107 
   2108       CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
   2109                           ClearRegs); // save+clear FP regs with ClearRegs
   2110       CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
   2111 
   2112       const MachineInstrBuilder NewCall =
   2113           BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr))
   2114               .add(predOps(ARMCC::AL))
   2115               .addReg(JumpReg, RegState::Kill);
   2116 
   2117       for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
   2118         NewCall->addOperand(MI.getOperand(I));
   2119       if (MI.isCandidateForCallSiteEntry())
   2120         MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr());
   2121 
   2122       CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers
   2123 
   2124       CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction());
   2125 
   2126       MI.eraseFromParent();
   2127       return true;
   2128     }
   2129     case ARM::VMOVHcc:
   2130     case ARM::VMOVScc:
   2131     case ARM::VMOVDcc: {
   2132       unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD;
   2133       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
   2134               MI.getOperand(1).getReg())
   2135           .add(MI.getOperand(2))
   2136           .addImm(MI.getOperand(3).getImm()) // 'pred'
   2137           .add(MI.getOperand(4))
   2138           .add(makeImplicit(MI.getOperand(1)));
   2139 
   2140       MI.eraseFromParent();
   2141       return true;
   2142     }
   2143     case ARM::t2MOVCCr:
   2144     case ARM::MOVCCr: {
   2145       unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
   2146       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
   2147               MI.getOperand(1).getReg())
   2148           .add(MI.getOperand(2))
   2149           .addImm(MI.getOperand(3).getImm()) // 'pred'
   2150           .add(MI.getOperand(4))
   2151           .add(condCodeOp()) // 's' bit
   2152           .add(makeImplicit(MI.getOperand(1)));
   2153 
   2154       MI.eraseFromParent();
   2155       return true;
   2156     }
   2157     case ARM::MOVCCsi: {
   2158       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
   2159               (MI.getOperand(1).getReg()))
   2160           .add(MI.getOperand(2))
   2161           .addImm(MI.getOperand(3).getImm())
   2162           .addImm(MI.getOperand(4).getImm()) // 'pred'
   2163           .add(MI.getOperand(5))
   2164           .add(condCodeOp()) // 's' bit
   2165           .add(makeImplicit(MI.getOperand(1)));
   2166 
   2167       MI.eraseFromParent();
   2168       return true;
   2169     }
   2170     case ARM::MOVCCsr: {
   2171       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
   2172               (MI.getOperand(1).getReg()))
   2173           .add(MI.getOperand(2))
   2174           .add(MI.getOperand(3))
   2175           .addImm(MI.getOperand(4).getImm())
   2176           .addImm(MI.getOperand(5).getImm()) // 'pred'
   2177           .add(MI.getOperand(6))
   2178           .add(condCodeOp()) // 's' bit
   2179           .add(makeImplicit(MI.getOperand(1)));
   2180 
   2181       MI.eraseFromParent();
   2182       return true;
   2183     }
   2184     case ARM::t2MOVCCi16:
   2185     case ARM::MOVCCi16: {
   2186       unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
   2187       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
   2188               MI.getOperand(1).getReg())
   2189           .addImm(MI.getOperand(2).getImm())
   2190           .addImm(MI.getOperand(3).getImm()) // 'pred'
   2191           .add(MI.getOperand(4))
   2192           .add(makeImplicit(MI.getOperand(1)));
   2193       MI.eraseFromParent();
   2194       return true;
   2195     }
   2196     case ARM::t2MOVCCi:
   2197     case ARM::MOVCCi: {
   2198       unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
   2199       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
   2200               MI.getOperand(1).getReg())
   2201           .addImm(MI.getOperand(2).getImm())
   2202           .addImm(MI.getOperand(3).getImm()) // 'pred'
   2203           .add(MI.getOperand(4))
   2204           .add(condCodeOp()) // 's' bit
   2205           .add(makeImplicit(MI.getOperand(1)));
   2206 
   2207       MI.eraseFromParent();
   2208       return true;
   2209     }
   2210     case ARM::t2MVNCCi:
   2211     case ARM::MVNCCi: {
   2212       unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
   2213       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
   2214               MI.getOperand(1).getReg())
   2215           .addImm(MI.getOperand(2).getImm())
   2216           .addImm(MI.getOperand(3).getImm()) // 'pred'
   2217           .add(MI.getOperand(4))
   2218           .add(condCodeOp()) // 's' bit
   2219           .add(makeImplicit(MI.getOperand(1)));
   2220 
   2221       MI.eraseFromParent();
   2222       return true;
   2223     }
   2224     case ARM::t2MOVCClsl:
   2225     case ARM::t2MOVCClsr:
   2226     case ARM::t2MOVCCasr:
   2227     case ARM::t2MOVCCror: {
   2228       unsigned NewOpc;
   2229       switch (Opcode) {
   2230       case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
   2231       case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
   2232       case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
   2233       case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
   2234       default: llvm_unreachable("unexpeced conditional move");
   2235       }
   2236       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
   2237               MI.getOperand(1).getReg())
   2238           .add(MI.getOperand(2))
   2239           .addImm(MI.getOperand(3).getImm())
   2240           .addImm(MI.getOperand(4).getImm()) // 'pred'
   2241           .add(MI.getOperand(5))
   2242           .add(condCodeOp()) // 's' bit
   2243           .add(makeImplicit(MI.getOperand(1)));
   2244       MI.eraseFromParent();
   2245       return true;
   2246     }
   2247     case ARM::Int_eh_sjlj_dispatchsetup: {
   2248       MachineFunction &MF = *MI.getParent()->getParent();
   2249       const ARMBaseInstrInfo *AII =
   2250         static_cast<const ARMBaseInstrInfo*>(TII);
   2251       const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
   2252       // For functions using a base pointer, we rematerialize it (via the frame
   2253       // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
   2254       // for us. Otherwise, expand to nothing.
   2255       if (RI.hasBasePointer(MF)) {
   2256         int32_t NumBytes = AFI->getFramePtrSpillOffset();
   2257         Register FramePtr = RI.getFrameRegister(MF);
   2258         assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
   2259                "base pointer without frame pointer?");
   2260 
   2261         if (AFI->isThumb2Function()) {
   2262           emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
   2263                                  FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
   2264         } else if (AFI->isThumbFunction()) {
   2265           emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
   2266                                     FramePtr, -NumBytes, *TII, RI);
   2267         } else {
   2268           emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
   2269                                   FramePtr, -NumBytes, ARMCC::AL, 0,
   2270                                   *TII);
   2271         }
   2272         // If there's dynamic realignment, adjust for it.
   2273         if (RI.hasStackRealignment(MF)) {
   2274           MachineFrameInfo &MFI = MF.getFrameInfo();
   2275           Align MaxAlign = MFI.getMaxAlign();
   2276           assert (!AFI->isThumb1OnlyFunction());
   2277           // Emit bic r6, r6, MaxAlign
   2278           assert(MaxAlign <= Align(256) &&
   2279                  "The BIC instruction cannot encode "
   2280                  "immediates larger than 256 with all lower "
   2281                  "bits set.");
   2282           unsigned bicOpc = AFI->isThumbFunction() ?
   2283             ARM::t2BICri : ARM::BICri;
   2284           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
   2285               .addReg(ARM::R6, RegState::Kill)
   2286               .addImm(MaxAlign.value() - 1)
   2287               .add(predOps(ARMCC::AL))
   2288               .add(condCodeOp());
   2289         }
   2290       }
   2291       MI.eraseFromParent();
   2292       return true;
   2293     }
   2294 
   2295     case ARM::MOVsrl_flag:
   2296     case ARM::MOVsra_flag: {
   2297       // These are just fancy MOVs instructions.
   2298       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
   2299               MI.getOperand(0).getReg())
   2300           .add(MI.getOperand(1))
   2301           .addImm(ARM_AM::getSORegOpc(
   2302               (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))
   2303           .add(predOps(ARMCC::AL))
   2304           .addReg(ARM::CPSR, RegState::Define);
   2305       MI.eraseFromParent();
   2306       return true;
   2307     }
   2308     case ARM::RRX: {
   2309       // This encodes as "MOVs Rd, Rm, rrx
   2310       MachineInstrBuilder MIB =
   2311           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
   2312                   MI.getOperand(0).getReg())
   2313               .add(MI.getOperand(1))
   2314               .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
   2315               .add(predOps(ARMCC::AL))
   2316               .add(condCodeOp());
   2317       TransferImpOps(MI, MIB, MIB);
   2318       MI.eraseFromParent();
   2319       return true;
   2320     }
   2321     case ARM::tTPsoft:
   2322     case ARM::TPsoft: {
   2323       const bool Thumb = Opcode == ARM::tTPsoft;
   2324 
   2325       MachineInstrBuilder MIB;
   2326       MachineFunction *MF = MBB.getParent();
   2327       if (STI->genLongCalls()) {
   2328         MachineConstantPool *MCP = MF->getConstantPool();
   2329         unsigned PCLabelID = AFI->createPICLabelUId();
   2330         MachineConstantPoolValue *CPV =
   2331             ARMConstantPoolSymbol::Create(MF->getFunction().getContext(),
   2332                                           "__aeabi_read_tp", PCLabelID, 0);
   2333         Register Reg = MI.getOperand(0).getReg();
   2334         MIB =
   2335             BuildMI(MBB, MBBI, MI.getDebugLoc(),
   2336                     TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
   2337                 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
   2338         if (!Thumb)
   2339           MIB.addImm(0);
   2340         MIB.add(predOps(ARMCC::AL));
   2341 
   2342         MIB =
   2343             BuildMI(MBB, MBBI, MI.getDebugLoc(),
   2344                     TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF)));
   2345         if (Thumb)
   2346           MIB.add(predOps(ARMCC::AL));
   2347         MIB.addReg(Reg, RegState::Kill);
   2348       } else {
   2349         MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
   2350                       TII->get(Thumb ? ARM::tBL : ARM::BL));
   2351         if (Thumb)
   2352           MIB.add(predOps(ARMCC::AL));
   2353         MIB.addExternalSymbol("__aeabi_read_tp", 0);
   2354       }
   2355 
   2356       MIB.cloneMemRefs(MI);
   2357       TransferImpOps(MI, MIB, MIB);
   2358       // Update the call site info.
   2359       if (MI.isCandidateForCallSiteEntry())
   2360         MF->moveCallSiteInfo(&MI, &*MIB);
   2361       MI.eraseFromParent();
   2362       return true;
   2363     }
   2364     case ARM::tLDRpci_pic:
   2365     case ARM::t2LDRpci_pic: {
   2366       unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
   2367         ? ARM::tLDRpci : ARM::t2LDRpci;
   2368       Register DstReg = MI.getOperand(0).getReg();
   2369       bool DstIsDead = MI.getOperand(0).isDead();
   2370       MachineInstrBuilder MIB1 =
   2371           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
   2372               .add(MI.getOperand(1))
   2373               .add(predOps(ARMCC::AL));
   2374       MIB1.cloneMemRefs(MI);
   2375       MachineInstrBuilder MIB2 =
   2376           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
   2377               .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
   2378               .addReg(DstReg)
   2379               .add(MI.getOperand(2));
   2380       TransferImpOps(MI, MIB1, MIB2);
   2381       MI.eraseFromParent();
   2382       return true;
   2383     }
   2384 
   2385     case ARM::LDRLIT_ga_abs:
   2386     case ARM::LDRLIT_ga_pcrel:
   2387     case ARM::LDRLIT_ga_pcrel_ldr:
   2388     case ARM::tLDRLIT_ga_abs:
   2389     case ARM::tLDRLIT_ga_pcrel: {
   2390       Register DstReg = MI.getOperand(0).getReg();
   2391       bool DstIsDead = MI.getOperand(0).isDead();
   2392       const MachineOperand &MO1 = MI.getOperand(1);
   2393       auto Flags = MO1.getTargetFlags();
   2394       const GlobalValue *GV = MO1.getGlobal();
   2395       bool IsARM =
   2396           Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs;
   2397       bool IsPIC =
   2398           Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
   2399       unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
   2400       unsigned PICAddOpc =
   2401           IsARM
   2402               ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
   2403               : ARM::tPICADD;
   2404 
   2405       // We need a new const-pool entry to load from.
   2406       MachineConstantPool *MCP = MBB.getParent()->getConstantPool();
   2407       unsigned ARMPCLabelIndex = 0;
   2408       MachineConstantPoolValue *CPV;
   2409 
   2410       if (IsPIC) {
   2411         unsigned PCAdj = IsARM ? 8 : 4;
   2412         auto Modifier = (Flags & ARMII::MO_GOT)
   2413                             ? ARMCP::GOT_PREL
   2414                             : ARMCP::no_modifier;
   2415         ARMPCLabelIndex = AFI->createPICLabelUId();
   2416         CPV = ARMConstantPoolConstant::Create(
   2417             GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier,
   2418             /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL);
   2419       } else
   2420         CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier);
   2421 
   2422       MachineInstrBuilder MIB =
   2423           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
   2424               .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
   2425       if (IsARM)
   2426         MIB.addImm(0);
   2427       MIB.add(predOps(ARMCC::AL));
   2428 
   2429       if (IsPIC) {
   2430         MachineInstrBuilder MIB =
   2431           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc))
   2432             .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
   2433             .addReg(DstReg)
   2434             .addImm(ARMPCLabelIndex);
   2435 
   2436         if (IsARM)
   2437           MIB.add(predOps(ARMCC::AL));
   2438       }
   2439 
   2440       MI.eraseFromParent();
   2441       return true;
   2442     }
   2443     case ARM::MOV_ga_pcrel:
   2444     case ARM::MOV_ga_pcrel_ldr:
   2445     case ARM::t2MOV_ga_pcrel: {
   2446       // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
   2447       unsigned LabelId = AFI->createPICLabelUId();
   2448       Register DstReg = MI.getOperand(0).getReg();
   2449       bool DstIsDead = MI.getOperand(0).isDead();
   2450       const MachineOperand &MO1 = MI.getOperand(1);
   2451       const GlobalValue *GV = MO1.getGlobal();
   2452       unsigned TF = MO1.getTargetFlags();
   2453       bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
   2454       unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
   2455       unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
   2456       unsigned LO16TF = TF | ARMII::MO_LO16;
   2457       unsigned HI16TF = TF | ARMII::MO_HI16;
   2458       unsigned PICAddOpc = isARM
   2459         ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
   2460         : ARM::tPICADD;
   2461       MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
   2462                                          TII->get(LO16Opc), DstReg)
   2463         .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
   2464         .addImm(LabelId);
   2465 
   2466       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
   2467         .addReg(DstReg)
   2468         .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
   2469         .addImm(LabelId);
   2470 
   2471       MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
   2472                                          TII->get(PICAddOpc))
   2473         .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
   2474         .addReg(DstReg).addImm(LabelId);
   2475       if (isARM) {
   2476         MIB3.add(predOps(ARMCC::AL));
   2477         if (Opcode == ARM::MOV_ga_pcrel_ldr)
   2478           MIB3.cloneMemRefs(MI);
   2479       }
   2480       TransferImpOps(MI, MIB1, MIB3);
   2481       MI.eraseFromParent();
   2482       return true;
   2483     }
   2484 
   2485     case ARM::MOVi32imm:
   2486     case ARM::MOVCCi32imm:
   2487     case ARM::t2MOVi32imm:
   2488     case ARM::t2MOVCCi32imm:
   2489       ExpandMOV32BitImm(MBB, MBBI);
   2490       return true;
   2491 
   2492     case ARM::SUBS_PC_LR: {
   2493       MachineInstrBuilder MIB =
   2494           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
   2495               .addReg(ARM::LR)
   2496               .add(MI.getOperand(0))
   2497               .add(MI.getOperand(1))
   2498               .add(MI.getOperand(2))
   2499               .addReg(ARM::CPSR, RegState::Undef);
   2500       TransferImpOps(MI, MIB, MIB);
   2501       MI.eraseFromParent();
   2502       return true;
   2503     }
   2504     case ARM::VLDMQIA: {
   2505       unsigned NewOpc = ARM::VLDMDIA;
   2506       MachineInstrBuilder MIB =
   2507         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
   2508       unsigned OpIdx = 0;
   2509 
   2510       // Grab the Q register destination.
   2511       bool DstIsDead = MI.getOperand(OpIdx).isDead();
   2512       Register DstReg = MI.getOperand(OpIdx++).getReg();
   2513 
   2514       // Copy the source register.
   2515       MIB.add(MI.getOperand(OpIdx++));
   2516 
   2517       // Copy the predicate operands.
   2518       MIB.add(MI.getOperand(OpIdx++));
   2519       MIB.add(MI.getOperand(OpIdx++));
   2520 
   2521       // Add the destination operands (D subregs).
   2522       Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
   2523       Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
   2524       MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
   2525         .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
   2526 
   2527       // Add an implicit def for the super-register.
   2528       MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
   2529       TransferImpOps(MI, MIB, MIB);
   2530       MIB.cloneMemRefs(MI);
   2531       MI.eraseFromParent();
   2532       return true;
   2533     }
   2534 
   2535     case ARM::VSTMQIA: {
   2536       unsigned NewOpc = ARM::VSTMDIA;
   2537       MachineInstrBuilder MIB =
   2538         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
   2539       unsigned OpIdx = 0;
   2540 
   2541       // Grab the Q register source.
   2542       bool SrcIsKill = MI.getOperand(OpIdx).isKill();
   2543       Register SrcReg = MI.getOperand(OpIdx++).getReg();
   2544 
   2545       // Copy the destination register.
   2546       MachineOperand Dst(MI.getOperand(OpIdx++));
   2547       MIB.add(Dst);
   2548 
   2549       // Copy the predicate operands.
   2550       MIB.add(MI.getOperand(OpIdx++));
   2551       MIB.add(MI.getOperand(OpIdx++));
   2552 
   2553       // Add the source operands (D subregs).
   2554       Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
   2555       Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
   2556       MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
   2557          .addReg(D1, SrcIsKill ? RegState::Kill : 0);
   2558 
   2559       if (SrcIsKill)      // Add an implicit kill for the Q register.
   2560         MIB->addRegisterKilled(SrcReg, TRI, true);
   2561 
   2562       TransferImpOps(MI, MIB, MIB);
   2563       MIB.cloneMemRefs(MI);
   2564       MI.eraseFromParent();
   2565       return true;
   2566     }
   2567 
   2568     case ARM::VLD2q8Pseudo:
   2569     case ARM::VLD2q16Pseudo:
   2570     case ARM::VLD2q32Pseudo:
   2571     case ARM::VLD2q8PseudoWB_fixed:
   2572     case ARM::VLD2q16PseudoWB_fixed:
   2573     case ARM::VLD2q32PseudoWB_fixed:
   2574     case ARM::VLD2q8PseudoWB_register:
   2575     case ARM::VLD2q16PseudoWB_register:
   2576     case ARM::VLD2q32PseudoWB_register:
   2577     case ARM::VLD3d8Pseudo:
   2578     case ARM::VLD3d16Pseudo:
   2579     case ARM::VLD3d32Pseudo:
   2580     case ARM::VLD1d8TPseudo:
   2581     case ARM::VLD1d16TPseudo:
   2582     case ARM::VLD1d32TPseudo:
   2583     case ARM::VLD1d64TPseudo:
   2584     case ARM::VLD1d64TPseudoWB_fixed:
   2585     case ARM::VLD1d64TPseudoWB_register:
   2586     case ARM::VLD3d8Pseudo_UPD:
   2587     case ARM::VLD3d16Pseudo_UPD:
   2588     case ARM::VLD3d32Pseudo_UPD:
   2589     case ARM::VLD3q8Pseudo_UPD:
   2590     case ARM::VLD3q16Pseudo_UPD:
   2591     case ARM::VLD3q32Pseudo_UPD:
   2592     case ARM::VLD3q8oddPseudo:
   2593     case ARM::VLD3q16oddPseudo:
   2594     case ARM::VLD3q32oddPseudo:
   2595     case ARM::VLD3q8oddPseudo_UPD:
   2596     case ARM::VLD3q16oddPseudo_UPD:
   2597     case ARM::VLD3q32oddPseudo_UPD:
   2598     case ARM::VLD4d8Pseudo:
   2599     case ARM::VLD4d16Pseudo:
   2600     case ARM::VLD4d32Pseudo:
   2601     case ARM::VLD1d8QPseudo:
   2602     case ARM::VLD1d16QPseudo:
   2603     case ARM::VLD1d32QPseudo:
   2604     case ARM::VLD1d64QPseudo:
   2605     case ARM::VLD1d64QPseudoWB_fixed:
   2606     case ARM::VLD1d64QPseudoWB_register:
   2607     case ARM::VLD1q8HighQPseudo:
   2608     case ARM::VLD1q8LowQPseudo_UPD:
   2609     case ARM::VLD1q8HighTPseudo:
   2610     case ARM::VLD1q8LowTPseudo_UPD:
   2611     case ARM::VLD1q16HighQPseudo:
   2612     case ARM::VLD1q16LowQPseudo_UPD:
   2613     case ARM::VLD1q16HighTPseudo:
   2614     case ARM::VLD1q16LowTPseudo_UPD:
   2615     case ARM::VLD1q32HighQPseudo:
   2616     case ARM::VLD1q32LowQPseudo_UPD:
   2617     case ARM::VLD1q32HighTPseudo:
   2618     case ARM::VLD1q32LowTPseudo_UPD:
   2619     case ARM::VLD1q64HighQPseudo:
   2620     case ARM::VLD1q64LowQPseudo_UPD:
   2621     case ARM::VLD1q64HighTPseudo:
   2622     case ARM::VLD1q64LowTPseudo_UPD:
   2623     case ARM::VLD4d8Pseudo_UPD:
   2624     case ARM::VLD4d16Pseudo_UPD:
   2625     case ARM::VLD4d32Pseudo_UPD:
   2626     case ARM::VLD4q8Pseudo_UPD:
   2627     case ARM::VLD4q16Pseudo_UPD:
   2628     case ARM::VLD4q32Pseudo_UPD:
   2629     case ARM::VLD4q8oddPseudo:
   2630     case ARM::VLD4q16oddPseudo:
   2631     case ARM::VLD4q32oddPseudo:
   2632     case ARM::VLD4q8oddPseudo_UPD:
   2633     case ARM::VLD4q16oddPseudo_UPD:
   2634     case ARM::VLD4q32oddPseudo_UPD:
   2635     case ARM::VLD3DUPd8Pseudo:
   2636     case ARM::VLD3DUPd16Pseudo:
   2637     case ARM::VLD3DUPd32Pseudo:
   2638     case ARM::VLD3DUPd8Pseudo_UPD:
   2639     case ARM::VLD3DUPd16Pseudo_UPD:
   2640     case ARM::VLD3DUPd32Pseudo_UPD:
   2641     case ARM::VLD4DUPd8Pseudo:
   2642     case ARM::VLD4DUPd16Pseudo:
   2643     case ARM::VLD4DUPd32Pseudo:
   2644     case ARM::VLD4DUPd8Pseudo_UPD:
   2645     case ARM::VLD4DUPd16Pseudo_UPD:
   2646     case ARM::VLD4DUPd32Pseudo_UPD:
   2647     case ARM::VLD2DUPq8EvenPseudo:
   2648     case ARM::VLD2DUPq8OddPseudo:
   2649     case ARM::VLD2DUPq16EvenPseudo:
   2650     case ARM::VLD2DUPq16OddPseudo:
   2651     case ARM::VLD2DUPq32EvenPseudo:
   2652     case ARM::VLD2DUPq32OddPseudo:
   2653     case ARM::VLD3DUPq8EvenPseudo:
   2654     case ARM::VLD3DUPq8OddPseudo:
   2655     case ARM::VLD3DUPq16EvenPseudo:
   2656     case ARM::VLD3DUPq16OddPseudo:
   2657     case ARM::VLD3DUPq32EvenPseudo:
   2658     case ARM::VLD3DUPq32OddPseudo:
   2659     case ARM::VLD4DUPq8EvenPseudo:
   2660     case ARM::VLD4DUPq8OddPseudo:
   2661     case ARM::VLD4DUPq16EvenPseudo:
   2662     case ARM::VLD4DUPq16OddPseudo:
   2663     case ARM::VLD4DUPq32EvenPseudo:
   2664     case ARM::VLD4DUPq32OddPseudo:
   2665       ExpandVLD(MBBI);
   2666       return true;
   2667 
   2668     case ARM::VST2q8Pseudo:
   2669     case ARM::VST2q16Pseudo:
   2670     case ARM::VST2q32Pseudo:
   2671     case ARM::VST2q8PseudoWB_fixed:
   2672     case ARM::VST2q16PseudoWB_fixed:
   2673     case ARM::VST2q32PseudoWB_fixed:
   2674     case ARM::VST2q8PseudoWB_register:
   2675     case ARM::VST2q16PseudoWB_register:
   2676     case ARM::VST2q32PseudoWB_register:
   2677     case ARM::VST3d8Pseudo:
   2678     case ARM::VST3d16Pseudo:
   2679     case ARM::VST3d32Pseudo:
   2680     case ARM::VST1d8TPseudo:
   2681     case ARM::VST1d8TPseudoWB_fixed:
   2682     case ARM::VST1d8TPseudoWB_register:
   2683     case ARM::VST1d16TPseudo:
   2684     case ARM::VST1d16TPseudoWB_fixed:
   2685     case ARM::VST1d16TPseudoWB_register:
   2686     case ARM::VST1d32TPseudo:
   2687     case ARM::VST1d32TPseudoWB_fixed:
   2688     case ARM::VST1d32TPseudoWB_register:
   2689     case ARM::VST1d64TPseudo:
   2690     case ARM::VST1d64TPseudoWB_fixed:
   2691     case ARM::VST1d64TPseudoWB_register:
   2692     case ARM::VST3d8Pseudo_UPD:
   2693     case ARM::VST3d16Pseudo_UPD:
   2694     case ARM::VST3d32Pseudo_UPD:
   2695     case ARM::VST3q8Pseudo_UPD:
   2696     case ARM::VST3q16Pseudo_UPD:
   2697     case ARM::VST3q32Pseudo_UPD:
   2698     case ARM::VST3q8oddPseudo:
   2699     case ARM::VST3q16oddPseudo:
   2700     case ARM::VST3q32oddPseudo:
   2701     case ARM::VST3q8oddPseudo_UPD:
   2702     case ARM::VST3q16oddPseudo_UPD:
   2703     case ARM::VST3q32oddPseudo_UPD:
   2704     case ARM::VST4d8Pseudo:
   2705     case ARM::VST4d16Pseudo:
   2706     case ARM::VST4d32Pseudo:
   2707     case ARM::VST1d8QPseudo:
   2708     case ARM::VST1d8QPseudoWB_fixed:
   2709     case ARM::VST1d8QPseudoWB_register:
   2710     case ARM::VST1d16QPseudo:
   2711     case ARM::VST1d16QPseudoWB_fixed:
   2712     case ARM::VST1d16QPseudoWB_register:
   2713     case ARM::VST1d32QPseudo:
   2714     case ARM::VST1d32QPseudoWB_fixed:
   2715     case ARM::VST1d32QPseudoWB_register:
   2716     case ARM::VST1d64QPseudo:
   2717     case ARM::VST1d64QPseudoWB_fixed:
   2718     case ARM::VST1d64QPseudoWB_register:
   2719     case ARM::VST4d8Pseudo_UPD:
   2720     case ARM::VST4d16Pseudo_UPD:
   2721     case ARM::VST4d32Pseudo_UPD:
   2722     case ARM::VST1q8HighQPseudo:
   2723     case ARM::VST1q8LowQPseudo_UPD:
   2724     case ARM::VST1q8HighTPseudo:
   2725     case ARM::VST1q8LowTPseudo_UPD:
   2726     case ARM::VST1q16HighQPseudo:
   2727     case ARM::VST1q16LowQPseudo_UPD:
   2728     case ARM::VST1q16HighTPseudo:
   2729     case ARM::VST1q16LowTPseudo_UPD:
   2730     case ARM::VST1q32HighQPseudo:
   2731     case ARM::VST1q32LowQPseudo_UPD:
   2732     case ARM::VST1q32HighTPseudo:
   2733     case ARM::VST1q32LowTPseudo_UPD:
   2734     case ARM::VST1q64HighQPseudo:
   2735     case ARM::VST1q64LowQPseudo_UPD:
   2736     case ARM::VST1q64HighTPseudo:
   2737     case ARM::VST1q64LowTPseudo_UPD:
   2738     case ARM::VST1q8HighTPseudo_UPD:
   2739     case ARM::VST1q16HighTPseudo_UPD:
   2740     case ARM::VST1q32HighTPseudo_UPD:
   2741     case ARM::VST1q64HighTPseudo_UPD:
   2742     case ARM::VST1q8HighQPseudo_UPD:
   2743     case ARM::VST1q16HighQPseudo_UPD:
   2744     case ARM::VST1q32HighQPseudo_UPD:
   2745     case ARM::VST1q64HighQPseudo_UPD:
   2746     case ARM::VST4q8Pseudo_UPD:
   2747     case ARM::VST4q16Pseudo_UPD:
   2748     case ARM::VST4q32Pseudo_UPD:
   2749     case ARM::VST4q8oddPseudo:
   2750     case ARM::VST4q16oddPseudo:
   2751     case ARM::VST4q32oddPseudo:
   2752     case ARM::VST4q8oddPseudo_UPD:
   2753     case ARM::VST4q16oddPseudo_UPD:
   2754     case ARM::VST4q32oddPseudo_UPD:
   2755       ExpandVST(MBBI);
   2756       return true;
   2757 
   2758     case ARM::VLD1LNq8Pseudo:
   2759     case ARM::VLD1LNq16Pseudo:
   2760     case ARM::VLD1LNq32Pseudo:
   2761     case ARM::VLD1LNq8Pseudo_UPD:
   2762     case ARM::VLD1LNq16Pseudo_UPD:
   2763     case ARM::VLD1LNq32Pseudo_UPD:
   2764     case ARM::VLD2LNd8Pseudo:
   2765     case ARM::VLD2LNd16Pseudo:
   2766     case ARM::VLD2LNd32Pseudo:
   2767     case ARM::VLD2LNq16Pseudo:
   2768     case ARM::VLD2LNq32Pseudo:
   2769     case ARM::VLD2LNd8Pseudo_UPD:
   2770     case ARM::VLD2LNd16Pseudo_UPD:
   2771     case ARM::VLD2LNd32Pseudo_UPD:
   2772     case ARM::VLD2LNq16Pseudo_UPD:
   2773     case ARM::VLD2LNq32Pseudo_UPD:
   2774     case ARM::VLD3LNd8Pseudo:
   2775     case ARM::VLD3LNd16Pseudo:
   2776     case ARM::VLD3LNd32Pseudo:
   2777     case ARM::VLD3LNq16Pseudo:
   2778     case ARM::VLD3LNq32Pseudo:
   2779     case ARM::VLD3LNd8Pseudo_UPD:
   2780     case ARM::VLD3LNd16Pseudo_UPD:
   2781     case ARM::VLD3LNd32Pseudo_UPD:
   2782     case ARM::VLD3LNq16Pseudo_UPD:
   2783     case ARM::VLD3LNq32Pseudo_UPD:
   2784     case ARM::VLD4LNd8Pseudo:
   2785     case ARM::VLD4LNd16Pseudo:
   2786     case ARM::VLD4LNd32Pseudo:
   2787     case ARM::VLD4LNq16Pseudo:
   2788     case ARM::VLD4LNq32Pseudo:
   2789     case ARM::VLD4LNd8Pseudo_UPD:
   2790     case ARM::VLD4LNd16Pseudo_UPD:
   2791     case ARM::VLD4LNd32Pseudo_UPD:
   2792     case ARM::VLD4LNq16Pseudo_UPD:
   2793     case ARM::VLD4LNq32Pseudo_UPD:
   2794     case ARM::VST1LNq8Pseudo:
   2795     case ARM::VST1LNq16Pseudo:
   2796     case ARM::VST1LNq32Pseudo:
   2797     case ARM::VST1LNq8Pseudo_UPD:
   2798     case ARM::VST1LNq16Pseudo_UPD:
   2799     case ARM::VST1LNq32Pseudo_UPD:
   2800     case ARM::VST2LNd8Pseudo:
   2801     case ARM::VST2LNd16Pseudo:
   2802     case ARM::VST2LNd32Pseudo:
   2803     case ARM::VST2LNq16Pseudo:
   2804     case ARM::VST2LNq32Pseudo:
   2805     case ARM::VST2LNd8Pseudo_UPD:
   2806     case ARM::VST2LNd16Pseudo_UPD:
   2807     case ARM::VST2LNd32Pseudo_UPD:
   2808     case ARM::VST2LNq16Pseudo_UPD:
   2809     case ARM::VST2LNq32Pseudo_UPD:
   2810     case ARM::VST3LNd8Pseudo:
   2811     case ARM::VST3LNd16Pseudo:
   2812     case ARM::VST3LNd32Pseudo:
   2813     case ARM::VST3LNq16Pseudo:
   2814     case ARM::VST3LNq32Pseudo:
   2815     case ARM::VST3LNd8Pseudo_UPD:
   2816     case ARM::VST3LNd16Pseudo_UPD:
   2817     case ARM::VST3LNd32Pseudo_UPD:
   2818     case ARM::VST3LNq16Pseudo_UPD:
   2819     case ARM::VST3LNq32Pseudo_UPD:
   2820     case ARM::VST4LNd8Pseudo:
   2821     case ARM::VST4LNd16Pseudo:
   2822     case ARM::VST4LNd32Pseudo:
   2823     case ARM::VST4LNq16Pseudo:
   2824     case ARM::VST4LNq32Pseudo:
   2825     case ARM::VST4LNd8Pseudo_UPD:
   2826     case ARM::VST4LNd16Pseudo_UPD:
   2827     case ARM::VST4LNd32Pseudo_UPD:
   2828     case ARM::VST4LNq16Pseudo_UPD:
   2829     case ARM::VST4LNq32Pseudo_UPD:
   2830       ExpandLaneOp(MBBI);
   2831       return true;
   2832 
   2833     case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
   2834     case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
   2835     case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
   2836     case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
   2837 
   2838     case ARM::tCMP_SWAP_8:
   2839       assert(STI->isThumb());
   2840       return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB,
   2841                             NextMBBI);
   2842     case ARM::tCMP_SWAP_16:
   2843       assert(STI->isThumb());
   2844       return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH,
   2845                             NextMBBI);
   2846 
   2847     case ARM::CMP_SWAP_8:
   2848       assert(!STI->isThumb());
   2849       return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB,
   2850                             NextMBBI);
   2851     case ARM::CMP_SWAP_16:
   2852       assert(!STI->isThumb());
   2853       return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH,
   2854                             NextMBBI);
   2855     case ARM::CMP_SWAP_32:
   2856       if (STI->isThumb())
   2857         return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0,
   2858                               NextMBBI);
   2859       else
   2860         return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
   2861 
   2862     case ARM::CMP_SWAP_64:
   2863       return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
   2864 
   2865     case ARM::tBL_PUSHLR:
   2866     case ARM::BL_PUSHLR: {
   2867       const bool Thumb = Opcode == ARM::tBL_PUSHLR;
   2868       Register Reg = MI.getOperand(0).getReg();
   2869       assert(Reg == ARM::LR && "expect LR register!");
   2870       MachineInstrBuilder MIB;
   2871       if (Thumb) {
   2872         // push {lr}
   2873         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH))
   2874             .add(predOps(ARMCC::AL))
   2875             .addReg(Reg);
   2876 
   2877         // bl __gnu_mcount_nc
   2878         MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL));
   2879       } else {
   2880         // stmdb   sp!, {lr}
   2881         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD))
   2882             .addReg(ARM::SP, RegState::Define)
   2883             .addReg(ARM::SP)
   2884             .add(predOps(ARMCC::AL))
   2885             .addReg(Reg);
   2886 
   2887         // bl __gnu_mcount_nc
   2888         MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
   2889       }
   2890       MIB.cloneMemRefs(MI);
   2891       for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i));
   2892       MI.eraseFromParent();
   2893       return true;
   2894     }
   2895     case ARM::LOADDUAL:
   2896     case ARM::STOREDUAL: {
   2897       Register PairReg = MI.getOperand(0).getReg();
   2898 
   2899       MachineInstrBuilder MIB =
   2900           BuildMI(MBB, MBBI, MI.getDebugLoc(),
   2901                   TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
   2902               .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
   2903                       Opcode == ARM::LOADDUAL ? RegState::Define : 0)
   2904               .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
   2905                       Opcode == ARM::LOADDUAL ? RegState::Define : 0);
   2906       for (unsigned i = 1; i < MI.getNumOperands(); i++)
   2907         MIB.add(MI.getOperand(i));
   2908       MIB.add(predOps(ARMCC::AL));
   2909       MIB.cloneMemRefs(MI);
   2910       MI.eraseFromParent();
   2911       return true;
   2912     }
   2913   }
   2914 }
   2915 
   2916 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
   2917   bool Modified = false;
   2918 
   2919   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
   2920   while (MBBI != E) {
   2921     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
   2922     Modified |= ExpandMI(MBB, MBBI, NMBBI);
   2923     MBBI = NMBBI;
   2924   }
   2925 
   2926   return Modified;
   2927 }
   2928 
   2929 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
   2930   STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
   2931   TII = STI->getInstrInfo();
   2932   TRI = STI->getRegisterInfo();
   2933   AFI = MF.getInfo<ARMFunctionInfo>();
   2934 
   2935   LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n"
   2936                     << "********** Function: " << MF.getName() << '\n');
   2937 
   2938   bool Modified = false;
   2939   for (MachineBasicBlock &MBB : MF)
   2940     Modified |= ExpandMBB(MBB);
   2941   if (VerifyARMPseudo)
   2942     MF.verify(this, "After expanding ARM pseudo instructions.");
   2943 
   2944   LLVM_DEBUG(dbgs() << "***************************************************\n");
   2945   return Modified;
   2946 }
   2947 
   2948 /// createARMExpandPseudoPass - returns an instance of the pseudo instruction
   2949 /// expansion pass.
   2950 FunctionPass *llvm::createARMExpandPseudoPass() {
   2951   return new ARMExpandPseudo();
   2952 }
   2953