1 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling, if-conversion, and other late 11 // optimizations. This pass should be run after register allocation but before 12 // the post-regalloc scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMBaseRegisterInfo.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMMachineFunctionInfo.h" 21 #include "ARMSubtarget.h" 22 #include "MCTargetDesc/ARMAddressingModes.h" 23 #include "llvm/CodeGen/LivePhysRegs.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/Support/Debug.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "arm-pseudo" 31 32 static cl::opt<bool> 33 VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, 34 cl::desc("Verify machine code after expanding ARM pseudos")); 35 36 #define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass" 37 38 namespace { 39 class ARMExpandPseudo : public MachineFunctionPass { 40 public: 41 static char ID; 42 ARMExpandPseudo() : MachineFunctionPass(ID) {} 43 44 const ARMBaseInstrInfo *TII; 45 const TargetRegisterInfo *TRI; 46 const ARMSubtarget *STI; 47 ARMFunctionInfo *AFI; 48 49 bool runOnMachineFunction(MachineFunction &Fn) override; 50 51 MachineFunctionProperties getRequiredProperties() const override { 52 return MachineFunctionProperties().set( 53 MachineFunctionProperties::Property::NoVRegs); 54 } 55 56 StringRef getPassName() const override { 57 return ARM_EXPAND_PSEUDO_NAME; 58 } 59 60 private: 61 void TransferImpOps(MachineInstr &OldMI, 62 MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); 63 bool ExpandMI(MachineBasicBlock &MBB, 64 MachineBasicBlock::iterator MBBI, 65 MachineBasicBlock::iterator &NextMBBI); 66 bool ExpandMBB(MachineBasicBlock &MBB); 67 void ExpandVLD(MachineBasicBlock::iterator &MBBI); 68 void ExpandVST(MachineBasicBlock::iterator &MBBI); 69 void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); 70 void ExpandVTBL(MachineBasicBlock::iterator &MBBI, 71 unsigned Opc, bool IsExt); 72 void ExpandMOV32BitImm(MachineBasicBlock &MBB, 73 MachineBasicBlock::iterator &MBBI); 74 void CMSEClearGPRegs(MachineBasicBlock &MBB, 75 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 76 const SmallVectorImpl<unsigned> &ClearRegs, 77 unsigned ClobberReg); 78 MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB, 79 MachineBasicBlock::iterator MBBI); 80 MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB, 81 MachineBasicBlock::iterator MBBI, 82 const BitVector &ClearRegs); 83 MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB, 84 MachineBasicBlock::iterator MBBI, 85 const BitVector &ClearRegs); 86 void CMSESaveClearFPRegs(MachineBasicBlock &MBB, 87 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 88 const LivePhysRegs &LiveRegs, 89 SmallVectorImpl<unsigned> &AvailableRegs); 90 void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB, 91 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 92 const LivePhysRegs &LiveRegs, 93 SmallVectorImpl<unsigned> &ScratchRegs); 94 void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, 95 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 96 const LivePhysRegs &LiveRegs); 97 void CMSERestoreFPRegs(MachineBasicBlock &MBB, 98 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 99 SmallVectorImpl<unsigned> &AvailableRegs); 100 void CMSERestoreFPRegsV8(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 102 SmallVectorImpl<unsigned> &AvailableRegs); 103 void CMSERestoreFPRegsV81(MachineBasicBlock &MBB, 104 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 105 SmallVectorImpl<unsigned> &AvailableRegs); 106 bool ExpandCMP_SWAP(MachineBasicBlock &MBB, 107 MachineBasicBlock::iterator MBBI, unsigned LdrexOp, 108 unsigned StrexOp, unsigned UxtOp, 109 MachineBasicBlock::iterator &NextMBBI); 110 111 bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB, 112 MachineBasicBlock::iterator MBBI, 113 MachineBasicBlock::iterator &NextMBBI); 114 }; 115 char ARMExpandPseudo::ID = 0; 116 } 117 118 INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false, 119 false) 120 121 /// TransferImpOps - Transfer implicit operands on the pseudo instruction to 122 /// the instructions created from the expansion. 123 void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI, 124 MachineInstrBuilder &UseMI, 125 MachineInstrBuilder &DefMI) { 126 const MCInstrDesc &Desc = OldMI.getDesc(); 127 for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands(); 128 i != e; ++i) { 129 const MachineOperand &MO = OldMI.getOperand(i); 130 assert(MO.isReg() && MO.getReg()); 131 if (MO.isUse()) 132 UseMI.add(MO); 133 else 134 DefMI.add(MO); 135 } 136 } 137 138 namespace { 139 // Constants for register spacing in NEON load/store instructions. 140 // For quad-register load-lane and store-lane pseudo instructors, the 141 // spacing is initially assumed to be EvenDblSpc, and that is changed to 142 // OddDblSpc depending on the lane number operand. 143 enum NEONRegSpacing { 144 SingleSpc, 145 SingleLowSpc , // Single spacing, low registers, three and four vectors. 146 SingleHighQSpc, // Single spacing, high registers, four vectors. 147 SingleHighTSpc, // Single spacing, high registers, three vectors. 148 EvenDblSpc, 149 OddDblSpc 150 }; 151 152 // Entries for NEON load/store information table. The table is sorted by 153 // PseudoOpc for fast binary-search lookups. 154 struct NEONLdStTableEntry { 155 uint16_t PseudoOpc; 156 uint16_t RealOpc; 157 bool IsLoad; 158 bool isUpdating; 159 bool hasWritebackOperand; 160 uint8_t RegSpacing; // One of type NEONRegSpacing 161 uint8_t NumRegs; // D registers loaded or stored 162 uint8_t RegElts; // elements per D register; used for lane ops 163 // FIXME: Temporary flag to denote whether the real instruction takes 164 // a single register (like the encoding) or all of the registers in 165 // the list (like the asm syntax and the isel DAG). When all definitions 166 // are converted to take only the single encoded register, this will 167 // go away. 168 bool copyAllListRegs; 169 170 // Comparison methods for binary search of the table. 171 bool operator<(const NEONLdStTableEntry &TE) const { 172 return PseudoOpc < TE.PseudoOpc; 173 } 174 friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { 175 return TE.PseudoOpc < PseudoOpc; 176 } 177 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, 178 const NEONLdStTableEntry &TE) { 179 return PseudoOpc < TE.PseudoOpc; 180 } 181 }; 182 } 183 184 static const NEONLdStTableEntry NEONLdStTable[] = { 185 { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, 186 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, 187 { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, 188 { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true}, 189 { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true}, 190 { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true}, 191 192 { ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false}, 193 { ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false}, 194 { ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false}, 195 { ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false}, 196 { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, 197 { ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false}, 198 { ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false}, 199 { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, 200 { ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false}, 201 { ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false}, 202 { ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false}, 203 { ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false}, 204 { ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false}, 205 { ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false}, 206 { ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false}, 207 { ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false}, 208 { ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false}, 209 { ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false}, 210 { ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false}, 211 { ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false}, 212 { ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false}, 213 { ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false}, 214 { ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false}, 215 { ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false}, 216 { ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false}, 217 { ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false}, 218 { ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false}, 219 { ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false}, 220 221 { ARM::VLD2DUPq16EvenPseudo, ARM::VLD2DUPd16x2, true, false, false, EvenDblSpc, 2, 4 ,false}, 222 { ARM::VLD2DUPq16OddPseudo, ARM::VLD2DUPd16x2, true, false, false, OddDblSpc, 2, 4 ,false}, 223 { ARM::VLD2DUPq32EvenPseudo, ARM::VLD2DUPd32x2, true, false, false, EvenDblSpc, 2, 2 ,false}, 224 { ARM::VLD2DUPq32OddPseudo, ARM::VLD2DUPd32x2, true, false, false, OddDblSpc, 2, 2 ,false}, 225 { ARM::VLD2DUPq8EvenPseudo, ARM::VLD2DUPd8x2, true, false, false, EvenDblSpc, 2, 8 ,false}, 226 { ARM::VLD2DUPq8OddPseudo, ARM::VLD2DUPd8x2, true, false, false, OddDblSpc, 2, 8 ,false}, 227 228 { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, 229 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, 230 { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, 231 { ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true}, 232 { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true}, 233 { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true}, 234 { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true}, 235 { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true}, 236 { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, 237 { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, 238 239 { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, 240 { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, 241 { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, 242 { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, 243 { ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, 244 { ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false}, 245 { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, 246 { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, 247 { ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false}, 248 249 { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, 250 { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, 251 { ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true}, 252 { ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true}, 253 { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true}, 254 { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true}, 255 { ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16, true, false, false, EvenDblSpc, 3, 4 ,true}, 256 { ARM::VLD3DUPq16OddPseudo, ARM::VLD3DUPq16, true, false, false, OddDblSpc, 3, 4 ,true}, 257 { ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32, true, false, false, EvenDblSpc, 3, 2 ,true}, 258 { ARM::VLD3DUPq32OddPseudo, ARM::VLD3DUPq32, true, false, false, OddDblSpc, 3, 2 ,true}, 259 { ARM::VLD3DUPq8EvenPseudo, ARM::VLD3DUPq8, true, false, false, EvenDblSpc, 3, 8 ,true}, 260 { ARM::VLD3DUPq8OddPseudo, ARM::VLD3DUPq8, true, false, false, OddDblSpc, 3, 8 ,true}, 261 262 { ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true}, 263 { ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, 264 { ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true}, 265 { ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, 266 { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true}, 267 { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, 268 { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true}, 269 { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, 270 { ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true}, 271 { ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, 272 273 { ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true}, 274 { ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, 275 { ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true}, 276 { ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, 277 { ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true}, 278 { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, 279 280 { ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, 281 { ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true}, 282 { ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, 283 { ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, 284 { ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true}, 285 { ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, 286 { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true}, 287 { ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true}, 288 { ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, 289 290 { ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true}, 291 { ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true}, 292 { ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true}, 293 { ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true}, 294 { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true}, 295 { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true}, 296 { ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16, true, false, false, EvenDblSpc, 4, 4 ,true}, 297 { ARM::VLD4DUPq16OddPseudo, ARM::VLD4DUPq16, true, false, false, OddDblSpc, 4, 4 ,true}, 298 { ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32, true, false, false, EvenDblSpc, 4, 2 ,true}, 299 { ARM::VLD4DUPq32OddPseudo, ARM::VLD4DUPq32, true, false, false, OddDblSpc, 4, 2 ,true}, 300 { ARM::VLD4DUPq8EvenPseudo, ARM::VLD4DUPq8, true, false, false, EvenDblSpc, 4, 8 ,true}, 301 { ARM::VLD4DUPq8OddPseudo, ARM::VLD4DUPq8, true, false, false, OddDblSpc, 4, 8 ,true}, 302 303 { ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true}, 304 { ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, 305 { ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true}, 306 { ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, 307 { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true}, 308 { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, 309 { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true}, 310 { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, 311 { ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true}, 312 { ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, 313 314 { ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true}, 315 { ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, 316 { ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true}, 317 { ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, 318 { ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true}, 319 { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, 320 321 { ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, 322 { ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true}, 323 { ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, 324 { ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, 325 { ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true}, 326 { ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, 327 { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true}, 328 { ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true}, 329 { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, 330 331 { ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true}, 332 { ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true}, 333 { ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true}, 334 { ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true}, 335 { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, 336 { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, 337 338 { ARM::VST1d16QPseudo, ARM::VST1d16Q, false, false, false, SingleSpc, 4, 4 ,false}, 339 { ARM::VST1d16QPseudoWB_fixed, ARM::VST1d16Qwb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, 340 { ARM::VST1d16QPseudoWB_register, ARM::VST1d16Qwb_register, false, true, true, SingleSpc, 4, 4 ,false}, 341 { ARM::VST1d16TPseudo, ARM::VST1d16T, false, false, false, SingleSpc, 3, 4 ,false}, 342 { ARM::VST1d16TPseudoWB_fixed, ARM::VST1d16Twb_fixed, false, true, false, SingleSpc, 3, 4 ,false}, 343 { ARM::VST1d16TPseudoWB_register, ARM::VST1d16Twb_register, false, true, true, SingleSpc, 3, 4 ,false}, 344 345 { ARM::VST1d32QPseudo, ARM::VST1d32Q, false, false, false, SingleSpc, 4, 2 ,false}, 346 { ARM::VST1d32QPseudoWB_fixed, ARM::VST1d32Qwb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, 347 { ARM::VST1d32QPseudoWB_register, ARM::VST1d32Qwb_register, false, true, true, SingleSpc, 4, 2 ,false}, 348 { ARM::VST1d32TPseudo, ARM::VST1d32T, false, false, false, SingleSpc, 3, 2 ,false}, 349 { ARM::VST1d32TPseudoWB_fixed, ARM::VST1d32Twb_fixed, false, true, false, SingleSpc, 3, 2 ,false}, 350 { ARM::VST1d32TPseudoWB_register, ARM::VST1d32Twb_register, false, true, true, SingleSpc, 3, 2 ,false}, 351 352 { ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false}, 353 { ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false}, 354 { ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false}, 355 { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false}, 356 { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, 357 { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, 358 359 { ARM::VST1d8QPseudo, ARM::VST1d8Q, false, false, false, SingleSpc, 4, 8 ,false}, 360 { ARM::VST1d8QPseudoWB_fixed, ARM::VST1d8Qwb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, 361 { ARM::VST1d8QPseudoWB_register, ARM::VST1d8Qwb_register, false, true, true, SingleSpc, 4, 8 ,false}, 362 { ARM::VST1d8TPseudo, ARM::VST1d8T, false, false, false, SingleSpc, 3, 8 ,false}, 363 { ARM::VST1d8TPseudoWB_fixed, ARM::VST1d8Twb_fixed, false, true, false, SingleSpc, 3, 8 ,false}, 364 { ARM::VST1d8TPseudoWB_register, ARM::VST1d8Twb_register, false, true, true, SingleSpc, 3, 8 ,false}, 365 366 { ARM::VST1q16HighQPseudo, ARM::VST1d16Q, false, false, false, SingleHighQSpc, 4, 4 ,false}, 367 { ARM::VST1q16HighQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 368 { ARM::VST1q16HighTPseudo, ARM::VST1d16T, false, false, false, SingleHighTSpc, 3, 4 ,false}, 369 { ARM::VST1q16HighTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleHighTSpc, 3, 4 ,false}, 370 { ARM::VST1q16LowQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleLowSpc, 4, 4 ,false}, 371 { ARM::VST1q16LowTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleLowSpc, 3, 4 ,false}, 372 373 { ARM::VST1q32HighQPseudo, ARM::VST1d32Q, false, false, false, SingleHighQSpc, 4, 2 ,false}, 374 { ARM::VST1q32HighQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 375 { ARM::VST1q32HighTPseudo, ARM::VST1d32T, false, false, false, SingleHighTSpc, 3, 2 ,false}, 376 { ARM::VST1q32HighTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleHighTSpc, 3, 2 ,false}, 377 { ARM::VST1q32LowQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleLowSpc, 4, 2 ,false}, 378 { ARM::VST1q32LowTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleLowSpc, 3, 2 ,false}, 379 380 { ARM::VST1q64HighQPseudo, ARM::VST1d64Q, false, false, false, SingleHighQSpc, 4, 1 ,false}, 381 { ARM::VST1q64HighQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 382 { ARM::VST1q64HighTPseudo, ARM::VST1d64T, false, false, false, SingleHighTSpc, 3, 1 ,false}, 383 { ARM::VST1q64HighTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleHighTSpc, 3, 1 ,false}, 384 { ARM::VST1q64LowQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleLowSpc, 4, 1 ,false}, 385 { ARM::VST1q64LowTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleLowSpc, 3, 1 ,false}, 386 387 { ARM::VST1q8HighQPseudo, ARM::VST1d8Q, false, false, false, SingleHighQSpc, 4, 8 ,false}, 388 { ARM::VST1q8HighQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 389 { ARM::VST1q8HighTPseudo, ARM::VST1d8T, false, false, false, SingleHighTSpc, 3, 8 ,false}, 390 { ARM::VST1q8HighTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleHighTSpc, 3, 8 ,false}, 391 { ARM::VST1q8LowQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleLowSpc, 4, 8 ,false}, 392 { ARM::VST1q8LowTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleLowSpc, 3, 8 ,false}, 393 394 { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, 395 { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, 396 { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, 397 { ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, 398 { ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true}, 399 { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, 400 { ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true}, 401 { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true}, 402 { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, 403 { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, 404 405 { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, 406 { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, 407 { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, 408 { ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, 409 { ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, 410 { ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, 411 { ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, 412 { ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, 413 { ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, 414 415 { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, 416 { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, 417 { ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true}, 418 { ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, 419 { ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true}, 420 { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, 421 { ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true}, 422 { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true}, 423 { ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true}, 424 { ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true}, 425 426 { ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true}, 427 { ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, 428 { ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true}, 429 { ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, 430 { ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true}, 431 { ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, 432 433 { ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true}, 434 { ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true}, 435 { ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true}, 436 { ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true}, 437 { ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true}, 438 { ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true}, 439 { ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true}, 440 { ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true}, 441 { ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true}, 442 443 { ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true}, 444 { ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, 445 { ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true}, 446 { ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, 447 { ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true}, 448 { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, 449 { ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true}, 450 { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true}, 451 { ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true}, 452 { ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true}, 453 454 { ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true}, 455 { ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, 456 { ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true}, 457 { ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, 458 { ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true}, 459 { ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, 460 461 { ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true}, 462 { ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true}, 463 { ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true}, 464 { ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true}, 465 { ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true}, 466 { ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true}, 467 { ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true}, 468 { ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true}, 469 { ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true} 470 }; 471 472 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON 473 /// load or store pseudo instruction. 474 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { 475 #ifndef NDEBUG 476 // Make sure the table is sorted. 477 static std::atomic<bool> TableChecked(false); 478 if (!TableChecked.load(std::memory_order_relaxed)) { 479 assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!"); 480 TableChecked.store(true, std::memory_order_relaxed); 481 } 482 #endif 483 484 auto I = llvm::lower_bound(NEONLdStTable, Opcode); 485 if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode) 486 return I; 487 return nullptr; 488 } 489 490 /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, 491 /// corresponding to the specified register spacing. Not all of the results 492 /// are necessarily valid, e.g., a Q register only has 2 D subregisters. 493 static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc, 494 const TargetRegisterInfo *TRI, unsigned &D0, 495 unsigned &D1, unsigned &D2, unsigned &D3) { 496 if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) { 497 D0 = TRI->getSubReg(Reg, ARM::dsub_0); 498 D1 = TRI->getSubReg(Reg, ARM::dsub_1); 499 D2 = TRI->getSubReg(Reg, ARM::dsub_2); 500 D3 = TRI->getSubReg(Reg, ARM::dsub_3); 501 } else if (RegSpc == SingleHighQSpc) { 502 D0 = TRI->getSubReg(Reg, ARM::dsub_4); 503 D1 = TRI->getSubReg(Reg, ARM::dsub_5); 504 D2 = TRI->getSubReg(Reg, ARM::dsub_6); 505 D3 = TRI->getSubReg(Reg, ARM::dsub_7); 506 } else if (RegSpc == SingleHighTSpc) { 507 D0 = TRI->getSubReg(Reg, ARM::dsub_3); 508 D1 = TRI->getSubReg(Reg, ARM::dsub_4); 509 D2 = TRI->getSubReg(Reg, ARM::dsub_5); 510 D3 = TRI->getSubReg(Reg, ARM::dsub_6); 511 } else if (RegSpc == EvenDblSpc) { 512 D0 = TRI->getSubReg(Reg, ARM::dsub_0); 513 D1 = TRI->getSubReg(Reg, ARM::dsub_2); 514 D2 = TRI->getSubReg(Reg, ARM::dsub_4); 515 D3 = TRI->getSubReg(Reg, ARM::dsub_6); 516 } else { 517 assert(RegSpc == OddDblSpc && "unknown register spacing"); 518 D0 = TRI->getSubReg(Reg, ARM::dsub_1); 519 D1 = TRI->getSubReg(Reg, ARM::dsub_3); 520 D2 = TRI->getSubReg(Reg, ARM::dsub_5); 521 D3 = TRI->getSubReg(Reg, ARM::dsub_7); 522 } 523 } 524 525 /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register 526 /// operands to real VLD instructions with D register operands. 527 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { 528 MachineInstr &MI = *MBBI; 529 MachineBasicBlock &MBB = *MI.getParent(); 530 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 531 532 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 533 assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed"); 534 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 535 unsigned NumRegs = TableEntry->NumRegs; 536 537 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 538 TII->get(TableEntry->RealOpc)); 539 unsigned OpIdx = 0; 540 541 bool DstIsDead = MI.getOperand(OpIdx).isDead(); 542 Register DstReg = MI.getOperand(OpIdx++).getReg(); 543 if(TableEntry->RealOpc == ARM::VLD2DUPd8x2 || 544 TableEntry->RealOpc == ARM::VLD2DUPd16x2 || 545 TableEntry->RealOpc == ARM::VLD2DUPd32x2) { 546 unsigned SubRegIndex; 547 if (RegSpc == EvenDblSpc) { 548 SubRegIndex = ARM::dsub_0; 549 } else { 550 assert(RegSpc == OddDblSpc && "Unexpected spacing!"); 551 SubRegIndex = ARM::dsub_1; 552 } 553 Register SubReg = TRI->getSubReg(DstReg, SubRegIndex); 554 unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0, 555 &ARM::DPairSpcRegClass); 556 MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead)); 557 } else { 558 unsigned D0, D1, D2, D3; 559 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); 560 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); 561 if (NumRegs > 1 && TableEntry->copyAllListRegs) 562 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 563 if (NumRegs > 2 && TableEntry->copyAllListRegs) 564 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); 565 if (NumRegs > 3 && TableEntry->copyAllListRegs) 566 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); 567 } 568 569 if (TableEntry->isUpdating) 570 MIB.add(MI.getOperand(OpIdx++)); 571 572 // Copy the addrmode6 operands. 573 MIB.add(MI.getOperand(OpIdx++)); 574 MIB.add(MI.getOperand(OpIdx++)); 575 576 // Copy the am6offset operand. 577 if (TableEntry->hasWritebackOperand) { 578 // TODO: The writing-back pseudo instructions we translate here are all 579 // defined to take am6offset nodes that are capable to represent both fixed 580 // and register forms. Some real instructions, however, do not rely on 581 // am6offset and have separate definitions for such forms. When this is the 582 // case, fixed forms do not take any offset nodes, so here we skip them for 583 // such instructions. Once all real and pseudo writing-back instructions are 584 // rewritten without use of am6offset nodes, this code will go away. 585 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++); 586 if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed || 587 TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed || 588 TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed || 589 TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed || 590 TableEntry->RealOpc == ARM::VLD1d8Twb_fixed || 591 TableEntry->RealOpc == ARM::VLD1d16Twb_fixed || 592 TableEntry->RealOpc == ARM::VLD1d32Twb_fixed || 593 TableEntry->RealOpc == ARM::VLD1d64Twb_fixed) { 594 assert(AM6Offset.getReg() == 0 && 595 "A fixed writing-back pseudo instruction provides an offset " 596 "register!"); 597 } else { 598 MIB.add(AM6Offset); 599 } 600 } 601 602 // For an instruction writing double-spaced subregs, the pseudo instruction 603 // has an extra operand that is a use of the super-register. Record the 604 // operand index and skip over it. 605 unsigned SrcOpIdx = 0; 606 if(TableEntry->RealOpc != ARM::VLD2DUPd8x2 && 607 TableEntry->RealOpc != ARM::VLD2DUPd16x2 && 608 TableEntry->RealOpc != ARM::VLD2DUPd32x2) { 609 if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc || 610 RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc || 611 RegSpc == SingleHighTSpc) 612 SrcOpIdx = OpIdx++; 613 } 614 615 // Copy the predicate operands. 616 MIB.add(MI.getOperand(OpIdx++)); 617 MIB.add(MI.getOperand(OpIdx++)); 618 619 // Copy the super-register source operand used for double-spaced subregs over 620 // to the new instruction as an implicit operand. 621 if (SrcOpIdx != 0) { 622 MachineOperand MO = MI.getOperand(SrcOpIdx); 623 MO.setImplicit(true); 624 MIB.add(MO); 625 } 626 // Add an implicit def for the super-register. 627 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 628 TransferImpOps(MI, MIB, MIB); 629 630 // Transfer memoperands. 631 MIB.cloneMemRefs(MI); 632 MI.eraseFromParent(); 633 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 634 } 635 636 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register 637 /// operands to real VST instructions with D register operands. 638 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { 639 MachineInstr &MI = *MBBI; 640 MachineBasicBlock &MBB = *MI.getParent(); 641 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 642 643 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 644 assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed"); 645 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 646 unsigned NumRegs = TableEntry->NumRegs; 647 648 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 649 TII->get(TableEntry->RealOpc)); 650 unsigned OpIdx = 0; 651 if (TableEntry->isUpdating) 652 MIB.add(MI.getOperand(OpIdx++)); 653 654 // Copy the addrmode6 operands. 655 MIB.add(MI.getOperand(OpIdx++)); 656 MIB.add(MI.getOperand(OpIdx++)); 657 658 if (TableEntry->hasWritebackOperand) { 659 // TODO: The writing-back pseudo instructions we translate here are all 660 // defined to take am6offset nodes that are capable to represent both fixed 661 // and register forms. Some real instructions, however, do not rely on 662 // am6offset and have separate definitions for such forms. When this is the 663 // case, fixed forms do not take any offset nodes, so here we skip them for 664 // such instructions. Once all real and pseudo writing-back instructions are 665 // rewritten without use of am6offset nodes, this code will go away. 666 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++); 667 if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed || 668 TableEntry->RealOpc == ARM::VST1d16Qwb_fixed || 669 TableEntry->RealOpc == ARM::VST1d32Qwb_fixed || 670 TableEntry->RealOpc == ARM::VST1d64Qwb_fixed || 671 TableEntry->RealOpc == ARM::VST1d8Twb_fixed || 672 TableEntry->RealOpc == ARM::VST1d16Twb_fixed || 673 TableEntry->RealOpc == ARM::VST1d32Twb_fixed || 674 TableEntry->RealOpc == ARM::VST1d64Twb_fixed) { 675 assert(AM6Offset.getReg() == 0 && 676 "A fixed writing-back pseudo instruction provides an offset " 677 "register!"); 678 } else { 679 MIB.add(AM6Offset); 680 } 681 } 682 683 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 684 bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); 685 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 686 unsigned D0, D1, D2, D3; 687 GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); 688 MIB.addReg(D0, getUndefRegState(SrcIsUndef)); 689 if (NumRegs > 1 && TableEntry->copyAllListRegs) 690 MIB.addReg(D1, getUndefRegState(SrcIsUndef)); 691 if (NumRegs > 2 && TableEntry->copyAllListRegs) 692 MIB.addReg(D2, getUndefRegState(SrcIsUndef)); 693 if (NumRegs > 3 && TableEntry->copyAllListRegs) 694 MIB.addReg(D3, getUndefRegState(SrcIsUndef)); 695 696 // Copy the predicate operands. 697 MIB.add(MI.getOperand(OpIdx++)); 698 MIB.add(MI.getOperand(OpIdx++)); 699 700 if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. 701 MIB->addRegisterKilled(SrcReg, TRI, true); 702 else if (!SrcIsUndef) 703 MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg. 704 TransferImpOps(MI, MIB, MIB); 705 706 // Transfer memoperands. 707 MIB.cloneMemRefs(MI); 708 MI.eraseFromParent(); 709 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 710 } 711 712 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ 713 /// register operands to real instructions with D register operands. 714 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { 715 MachineInstr &MI = *MBBI; 716 MachineBasicBlock &MBB = *MI.getParent(); 717 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 718 719 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 720 assert(TableEntry && "NEONLdStTable lookup failed"); 721 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 722 unsigned NumRegs = TableEntry->NumRegs; 723 unsigned RegElts = TableEntry->RegElts; 724 725 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 726 TII->get(TableEntry->RealOpc)); 727 unsigned OpIdx = 0; 728 // The lane operand is always the 3rd from last operand, before the 2 729 // predicate operands. 730 unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm(); 731 732 // Adjust the lane and spacing as needed for Q registers. 733 assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane"); 734 if (RegSpc == EvenDblSpc && Lane >= RegElts) { 735 RegSpc = OddDblSpc; 736 Lane -= RegElts; 737 } 738 assert(Lane < RegElts && "out of range lane for VLD/VST-lane"); 739 740 unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0; 741 unsigned DstReg = 0; 742 bool DstIsDead = false; 743 if (TableEntry->IsLoad) { 744 DstIsDead = MI.getOperand(OpIdx).isDead(); 745 DstReg = MI.getOperand(OpIdx++).getReg(); 746 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); 747 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); 748 if (NumRegs > 1) 749 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 750 if (NumRegs > 2) 751 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); 752 if (NumRegs > 3) 753 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); 754 } 755 756 if (TableEntry->isUpdating) 757 MIB.add(MI.getOperand(OpIdx++)); 758 759 // Copy the addrmode6 operands. 760 MIB.add(MI.getOperand(OpIdx++)); 761 MIB.add(MI.getOperand(OpIdx++)); 762 // Copy the am6offset operand. 763 if (TableEntry->hasWritebackOperand) 764 MIB.add(MI.getOperand(OpIdx++)); 765 766 // Grab the super-register source. 767 MachineOperand MO = MI.getOperand(OpIdx++); 768 if (!TableEntry->IsLoad) 769 GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3); 770 771 // Add the subregs as sources of the new instruction. 772 unsigned SrcFlags = (getUndefRegState(MO.isUndef()) | 773 getKillRegState(MO.isKill())); 774 MIB.addReg(D0, SrcFlags); 775 if (NumRegs > 1) 776 MIB.addReg(D1, SrcFlags); 777 if (NumRegs > 2) 778 MIB.addReg(D2, SrcFlags); 779 if (NumRegs > 3) 780 MIB.addReg(D3, SrcFlags); 781 782 // Add the lane number operand. 783 MIB.addImm(Lane); 784 OpIdx += 1; 785 786 // Copy the predicate operands. 787 MIB.add(MI.getOperand(OpIdx++)); 788 MIB.add(MI.getOperand(OpIdx++)); 789 790 // Copy the super-register source to be an implicit source. 791 MO.setImplicit(true); 792 MIB.add(MO); 793 if (TableEntry->IsLoad) 794 // Add an implicit def for the super-register. 795 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 796 TransferImpOps(MI, MIB, MIB); 797 // Transfer memoperands. 798 MIB.cloneMemRefs(MI); 799 MI.eraseFromParent(); 800 } 801 802 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ 803 /// register operands to real instructions with D register operands. 804 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, 805 unsigned Opc, bool IsExt) { 806 MachineInstr &MI = *MBBI; 807 MachineBasicBlock &MBB = *MI.getParent(); 808 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 809 810 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); 811 unsigned OpIdx = 0; 812 813 // Transfer the destination register operand. 814 MIB.add(MI.getOperand(OpIdx++)); 815 if (IsExt) { 816 MachineOperand VdSrc(MI.getOperand(OpIdx++)); 817 MIB.add(VdSrc); 818 } 819 820 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 821 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 822 unsigned D0, D1, D2, D3; 823 GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); 824 MIB.addReg(D0); 825 826 // Copy the other source register operand. 827 MachineOperand VmSrc(MI.getOperand(OpIdx++)); 828 MIB.add(VmSrc); 829 830 // Copy the predicate operands. 831 MIB.add(MI.getOperand(OpIdx++)); 832 MIB.add(MI.getOperand(OpIdx++)); 833 834 // Add an implicit kill and use for the super-reg. 835 MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill)); 836 TransferImpOps(MI, MIB, MIB); 837 MI.eraseFromParent(); 838 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 839 } 840 841 static bool IsAnAddressOperand(const MachineOperand &MO) { 842 // This check is overly conservative. Unless we are certain that the machine 843 // operand is not a symbol reference, we return that it is a symbol reference. 844 // This is important as the load pair may not be split up Windows. 845 switch (MO.getType()) { 846 case MachineOperand::MO_Register: 847 case MachineOperand::MO_Immediate: 848 case MachineOperand::MO_CImmediate: 849 case MachineOperand::MO_FPImmediate: 850 case MachineOperand::MO_ShuffleMask: 851 return false; 852 case MachineOperand::MO_MachineBasicBlock: 853 return true; 854 case MachineOperand::MO_FrameIndex: 855 return false; 856 case MachineOperand::MO_ConstantPoolIndex: 857 case MachineOperand::MO_TargetIndex: 858 case MachineOperand::MO_JumpTableIndex: 859 case MachineOperand::MO_ExternalSymbol: 860 case MachineOperand::MO_GlobalAddress: 861 case MachineOperand::MO_BlockAddress: 862 return true; 863 case MachineOperand::MO_RegisterMask: 864 case MachineOperand::MO_RegisterLiveOut: 865 return false; 866 case MachineOperand::MO_Metadata: 867 case MachineOperand::MO_MCSymbol: 868 return true; 869 case MachineOperand::MO_CFIIndex: 870 return false; 871 case MachineOperand::MO_IntrinsicID: 872 case MachineOperand::MO_Predicate: 873 llvm_unreachable("should not exist post-isel"); 874 } 875 llvm_unreachable("unhandled machine operand type"); 876 } 877 878 static MachineOperand makeImplicit(const MachineOperand &MO) { 879 MachineOperand NewMO = MO; 880 NewMO.setImplicit(); 881 return NewMO; 882 } 883 884 void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, 885 MachineBasicBlock::iterator &MBBI) { 886 MachineInstr &MI = *MBBI; 887 unsigned Opcode = MI.getOpcode(); 888 Register PredReg; 889 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 890 Register DstReg = MI.getOperand(0).getReg(); 891 bool DstIsDead = MI.getOperand(0).isDead(); 892 bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; 893 const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); 894 bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO); 895 MachineInstrBuilder LO16, HI16; 896 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 897 898 if (!STI->hasV6T2Ops() && 899 (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { 900 // FIXME Windows CE supports older ARM CPUs 901 assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+"); 902 903 assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); 904 unsigned ImmVal = (unsigned)MO.getImm(); 905 unsigned SOImmValV1 = 0, SOImmValV2 = 0; 906 907 if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr. 908 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); 909 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) 910 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 911 .addReg(DstReg); 912 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); 913 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); 914 } else { // Expand into a mvn + sub. 915 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg); 916 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri)) 917 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 918 .addReg(DstReg); 919 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal); 920 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal); 921 SOImmValV1 = ~(-SOImmValV1); 922 } 923 924 unsigned MIFlags = MI.getFlags(); 925 LO16 = LO16.addImm(SOImmValV1); 926 HI16 = HI16.addImm(SOImmValV2); 927 LO16.cloneMemRefs(MI); 928 HI16.cloneMemRefs(MI); 929 LO16.setMIFlags(MIFlags); 930 HI16.setMIFlags(MIFlags); 931 LO16.addImm(Pred).addReg(PredReg).add(condCodeOp()); 932 HI16.addImm(Pred).addReg(PredReg).add(condCodeOp()); 933 if (isCC) 934 LO16.add(makeImplicit(MI.getOperand(1))); 935 TransferImpOps(MI, LO16, HI16); 936 MI.eraseFromParent(); 937 return; 938 } 939 940 unsigned LO16Opc = 0; 941 unsigned HI16Opc = 0; 942 unsigned MIFlags = MI.getFlags(); 943 if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { 944 LO16Opc = ARM::t2MOVi16; 945 HI16Opc = ARM::t2MOVTi16; 946 } else { 947 LO16Opc = ARM::MOVi16; 948 HI16Opc = ARM::MOVTi16; 949 } 950 951 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); 952 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) 953 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 954 .addReg(DstReg); 955 956 LO16.setMIFlags(MIFlags); 957 HI16.setMIFlags(MIFlags); 958 959 switch (MO.getType()) { 960 case MachineOperand::MO_Immediate: { 961 unsigned Imm = MO.getImm(); 962 unsigned Lo16 = Imm & 0xffff; 963 unsigned Hi16 = (Imm >> 16) & 0xffff; 964 LO16 = LO16.addImm(Lo16); 965 HI16 = HI16.addImm(Hi16); 966 break; 967 } 968 case MachineOperand::MO_ExternalSymbol: { 969 const char *ES = MO.getSymbolName(); 970 unsigned TF = MO.getTargetFlags(); 971 LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16); 972 HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16); 973 break; 974 } 975 default: { 976 const GlobalValue *GV = MO.getGlobal(); 977 unsigned TF = MO.getTargetFlags(); 978 LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16); 979 HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16); 980 break; 981 } 982 } 983 984 LO16.cloneMemRefs(MI); 985 HI16.cloneMemRefs(MI); 986 LO16.addImm(Pred).addReg(PredReg); 987 HI16.addImm(Pred).addReg(PredReg); 988 989 if (RequiresBundling) 990 finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator()); 991 992 if (isCC) 993 LO16.add(makeImplicit(MI.getOperand(1))); 994 TransferImpOps(MI, LO16, HI16); 995 MI.eraseFromParent(); 996 LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump();); 997 LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump();); 998 } 999 1000 // The size of the area, accessed by that VLSTM/VLLDM 1001 // S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad) 1002 static const int CMSE_FP_SAVE_SIZE = 136; 1003 1004 static void determineGPRegsToClear(const MachineInstr &MI, 1005 const std::initializer_list<unsigned> &Regs, 1006 SmallVectorImpl<unsigned> &ClearRegs) { 1007 SmallVector<unsigned, 4> OpRegs; 1008 for (const MachineOperand &Op : MI.operands()) { 1009 if (!Op.isReg() || !Op.isUse()) 1010 continue; 1011 OpRegs.push_back(Op.getReg()); 1012 } 1013 llvm::sort(OpRegs); 1014 1015 std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(), 1016 std::back_inserter(ClearRegs)); 1017 } 1018 1019 void ARMExpandPseudo::CMSEClearGPRegs( 1020 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 1021 const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs, 1022 unsigned ClobberReg) { 1023 1024 if (STI->hasV8_1MMainlineOps()) { 1025 // Clear the registers using the CLRM instruction. 1026 MachineInstrBuilder CLRM = 1027 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL)); 1028 for (unsigned R : ClearRegs) 1029 CLRM.addReg(R, RegState::Define); 1030 CLRM.addReg(ARM::APSR, RegState::Define); 1031 CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit); 1032 } else { 1033 // Clear the registers and flags by copying ClobberReg into them. 1034 // (Baseline can't do a high register clear in one instruction). 1035 for (unsigned Reg : ClearRegs) { 1036 if (Reg == ClobberReg) 1037 continue; 1038 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg) 1039 .addReg(ClobberReg) 1040 .add(predOps(ARMCC::AL)); 1041 } 1042 1043 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M)) 1044 .addImm(STI->hasDSP() ? 0xc00 : 0x800) 1045 .addReg(ClobberReg) 1046 .add(predOps(ARMCC::AL)); 1047 } 1048 } 1049 1050 // Find which FP registers need to be cleared. The parameter `ClearRegs` is 1051 // initialised with all elements set to true, and this function resets all the 1052 // bits, which correspond to register uses. Returns true if any floating point 1053 // register is defined, false otherwise. 1054 static bool determineFPRegsToClear(const MachineInstr &MI, 1055 BitVector &ClearRegs) { 1056 bool DefFP = false; 1057 for (const MachineOperand &Op : MI.operands()) { 1058 if (!Op.isReg()) 1059 continue; 1060 1061 unsigned Reg = Op.getReg(); 1062 if (Op.isDef()) { 1063 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || 1064 (Reg >= ARM::D0 && Reg <= ARM::D15) || 1065 (Reg >= ARM::S0 && Reg <= ARM::S31)) 1066 DefFP = true; 1067 continue; 1068 } 1069 1070 if (Reg >= ARM::Q0 && Reg <= ARM::Q7) { 1071 int R = Reg - ARM::Q0; 1072 ClearRegs.reset(R * 4, (R + 1) * 4); 1073 } else if (Reg >= ARM::D0 && Reg <= ARM::D15) { 1074 int R = Reg - ARM::D0; 1075 ClearRegs.reset(R * 2, (R + 1) * 2); 1076 } else if (Reg >= ARM::S0 && Reg <= ARM::S31) { 1077 ClearRegs[Reg - ARM::S0] = false; 1078 } 1079 } 1080 return DefFP; 1081 } 1082 1083 MachineBasicBlock & 1084 ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB, 1085 MachineBasicBlock::iterator MBBI) { 1086 BitVector ClearRegs(16, true); 1087 (void)determineFPRegsToClear(*MBBI, ClearRegs); 1088 1089 if (STI->hasV8_1MMainlineOps()) 1090 return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); 1091 else 1092 return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs); 1093 } 1094 1095 // Clear the FP registers for v8.0-M, by copying over the content 1096 // of LR. Uses R12 as a scratch register. 1097 MachineBasicBlock & 1098 ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB, 1099 MachineBasicBlock::iterator MBBI, 1100 const BitVector &ClearRegs) { 1101 if (!STI->hasFPRegs()) 1102 return MBB; 1103 1104 auto &RetI = *MBBI; 1105 const DebugLoc &DL = RetI.getDebugLoc(); 1106 1107 // If optimising for minimum size, clear FP registers unconditionally. 1108 // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and 1109 // don't clear them if they belong to the non-secure state. 1110 MachineBasicBlock *ClearBB, *DoneBB; 1111 if (STI->hasMinSize()) { 1112 ClearBB = DoneBB = &MBB; 1113 } else { 1114 MachineFunction *MF = MBB.getParent(); 1115 ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1116 DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1117 1118 MF->insert(++MBB.getIterator(), ClearBB); 1119 MF->insert(++ClearBB->getIterator(), DoneBB); 1120 1121 DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end()); 1122 DoneBB->transferSuccessors(&MBB); 1123 MBB.addSuccessor(ClearBB); 1124 MBB.addSuccessor(DoneBB); 1125 ClearBB->addSuccessor(DoneBB); 1126 1127 // At the new basic blocks we need to have live-in the registers, used 1128 // for the return value as well as LR, used to clear registers. 1129 for (const MachineOperand &Op : RetI.operands()) { 1130 if (!Op.isReg()) 1131 continue; 1132 Register Reg = Op.getReg(); 1133 if (Reg == ARM::NoRegister || Reg == ARM::LR) 1134 continue; 1135 assert(Register::isPhysicalRegister(Reg) && "Unallocated register"); 1136 ClearBB->addLiveIn(Reg); 1137 DoneBB->addLiveIn(Reg); 1138 } 1139 ClearBB->addLiveIn(ARM::LR); 1140 DoneBB->addLiveIn(ARM::LR); 1141 1142 // Read the CONTROL register. 1143 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12) 1144 .addImm(20) 1145 .add(predOps(ARMCC::AL)); 1146 // Check bit 3 (SFPA). 1147 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri)) 1148 .addReg(ARM::R12) 1149 .addImm(8) 1150 .add(predOps(ARMCC::AL)); 1151 // If SFPA is clear, jump over ClearBB to DoneBB. 1152 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc)) 1153 .addMBB(DoneBB) 1154 .addImm(ARMCC::EQ) 1155 .addReg(ARM::CPSR, RegState::Kill); 1156 } 1157 1158 // Emit the clearing sequence 1159 for (unsigned D = 0; D < 8; D++) { 1160 // Attempt to clear as double 1161 if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) { 1162 unsigned Reg = ARM::D0 + D; 1163 BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg) 1164 .addReg(ARM::LR) 1165 .addReg(ARM::LR) 1166 .add(predOps(ARMCC::AL)); 1167 } else { 1168 // Clear first part as single 1169 if (ClearRegs[D * 2 + 0]) { 1170 unsigned Reg = ARM::S0 + D * 2; 1171 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) 1172 .addReg(ARM::LR) 1173 .add(predOps(ARMCC::AL)); 1174 } 1175 // Clear second part as single 1176 if (ClearRegs[D * 2 + 1]) { 1177 unsigned Reg = ARM::S0 + D * 2 + 1; 1178 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) 1179 .addReg(ARM::LR) 1180 .add(predOps(ARMCC::AL)); 1181 } 1182 } 1183 } 1184 1185 // Clear FPSCR bits 0-4, 7, 28-31 1186 // The other bits are program global according to the AAPCS 1187 BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12) 1188 .add(predOps(ARMCC::AL)); 1189 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) 1190 .addReg(ARM::R12) 1191 .addImm(0x0000009F) 1192 .add(predOps(ARMCC::AL)) 1193 .add(condCodeOp()); 1194 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) 1195 .addReg(ARM::R12) 1196 .addImm(0xF0000000) 1197 .add(predOps(ARMCC::AL)) 1198 .add(condCodeOp()); 1199 BuildMI(ClearBB, DL, TII->get(ARM::VMSR)) 1200 .addReg(ARM::R12) 1201 .add(predOps(ARMCC::AL)); 1202 1203 return *DoneBB; 1204 } 1205 1206 MachineBasicBlock & 1207 ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB, 1208 MachineBasicBlock::iterator MBBI, 1209 const BitVector &ClearRegs) { 1210 auto &RetI = *MBBI; 1211 1212 // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for 1213 // each contiguous sequence of S-registers. 1214 int Start = -1, End = -1; 1215 for (int S = 0, E = ClearRegs.size(); S != E; ++S) { 1216 if (ClearRegs[S] && S == End + 1) { 1217 End = S; // extend range 1218 continue; 1219 } 1220 // Emit current range. 1221 if (Start < End) { 1222 MachineInstrBuilder VSCCLRM = 1223 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) 1224 .add(predOps(ARMCC::AL)); 1225 while (++Start <= End) 1226 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); 1227 VSCCLRM.addReg(ARM::VPR, RegState::Define); 1228 } 1229 Start = End = S; 1230 } 1231 // Emit last range. 1232 if (Start < End) { 1233 MachineInstrBuilder VSCCLRM = 1234 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) 1235 .add(predOps(ARMCC::AL)); 1236 while (++Start <= End) 1237 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); 1238 VSCCLRM.addReg(ARM::VPR, RegState::Define); 1239 } 1240 1241 return MBB; 1242 } 1243 1244 void ARMExpandPseudo::CMSESaveClearFPRegs( 1245 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1246 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) { 1247 if (STI->hasV8_1MMainlineOps()) 1248 CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs); 1249 else 1250 CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs); 1251 } 1252 1253 // Save and clear FP registers if present 1254 void ARMExpandPseudo::CMSESaveClearFPRegsV8( 1255 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1256 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) { 1257 if (!STI->hasFPRegs()) 1258 return; 1259 1260 // Store an available register for FPSCR clearing 1261 assert(!ScratchRegs.empty()); 1262 unsigned SpareReg = ScratchRegs.front(); 1263 1264 // save space on stack for VLSTM 1265 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) 1266 .addReg(ARM::SP) 1267 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1268 .add(predOps(ARMCC::AL)); 1269 1270 // Use ScratchRegs to store the fp regs 1271 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs; 1272 std::vector<unsigned> NonclearedFPRegs; 1273 for (const MachineOperand &Op : MBBI->operands()) { 1274 if (Op.isReg() && Op.isUse()) { 1275 unsigned Reg = Op.getReg(); 1276 assert(!ARM::DPRRegClass.contains(Reg) || 1277 ARM::DPR_VFP2RegClass.contains(Reg)); 1278 assert(!ARM::QPRRegClass.contains(Reg)); 1279 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1280 if (ScratchRegs.size() >= 2) { 1281 unsigned SaveReg2 = ScratchRegs.pop_back_val(); 1282 unsigned SaveReg1 = ScratchRegs.pop_back_val(); 1283 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); 1284 1285 // Save the fp register to the normal registers 1286 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) 1287 .addReg(SaveReg1, RegState::Define) 1288 .addReg(SaveReg2, RegState::Define) 1289 .addReg(Reg) 1290 .add(predOps(ARMCC::AL)); 1291 } else { 1292 NonclearedFPRegs.push_back(Reg); 1293 } 1294 } else if (ARM::SPRRegClass.contains(Reg)) { 1295 if (ScratchRegs.size() >= 1) { 1296 unsigned SaveReg = ScratchRegs.pop_back_val(); 1297 ClearedFPRegs.emplace_back(Reg, SaveReg, 0); 1298 1299 // Save the fp register to the normal registers 1300 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) 1301 .addReg(Reg) 1302 .add(predOps(ARMCC::AL)); 1303 } else { 1304 NonclearedFPRegs.push_back(Reg); 1305 } 1306 } 1307 } 1308 } 1309 1310 bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty()); 1311 1312 // Lazy store all fp registers to the stack 1313 MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) 1314 .addReg(ARM::SP) 1315 .add(predOps(ARMCC::AL)); 1316 for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1, 1317 ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7}) 1318 VLSTM.addReg(R, RegState::Implicit | 1319 (LiveRegs.contains(R) ? 0 : RegState::Undef)); 1320 1321 // Restore all arguments 1322 for (const auto &Regs : ClearedFPRegs) { 1323 unsigned Reg, SaveReg1, SaveReg2; 1324 std::tie(Reg, SaveReg1, SaveReg2) = Regs; 1325 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1326 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) 1327 .addReg(SaveReg1) 1328 .addReg(SaveReg2) 1329 .add(predOps(ARMCC::AL)); 1330 else if (ARM::SPRRegClass.contains(Reg)) 1331 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) 1332 .addReg(SaveReg1) 1333 .add(predOps(ARMCC::AL)); 1334 } 1335 1336 for (unsigned Reg : NonclearedFPRegs) { 1337 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1338 if (STI->isLittle()) { 1339 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg) 1340 .addReg(ARM::SP) 1341 .addImm((Reg - ARM::D0) * 2) 1342 .add(predOps(ARMCC::AL)); 1343 } else { 1344 // For big-endian targets we need to load the two subregisters of Reg 1345 // manually because VLDRD would load them in wrong order 1346 unsigned SReg0 = TRI->getSubReg(Reg, ARM::ssub_0); 1347 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0) 1348 .addReg(ARM::SP) 1349 .addImm((Reg - ARM::D0) * 2) 1350 .add(predOps(ARMCC::AL)); 1351 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1) 1352 .addReg(ARM::SP) 1353 .addImm((Reg - ARM::D0) * 2 + 1) 1354 .add(predOps(ARMCC::AL)); 1355 } 1356 } else if (ARM::SPRRegClass.contains(Reg)) { 1357 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg) 1358 .addReg(ARM::SP) 1359 .addImm(Reg - ARM::S0) 1360 .add(predOps(ARMCC::AL)); 1361 } 1362 } 1363 // restore FPSCR from stack and clear bits 0-4, 7, 28-31 1364 // The other bits are program global according to the AAPCS 1365 if (passesFPReg) { 1366 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2LDRi8), SpareReg) 1367 .addReg(ARM::SP) 1368 .addImm(0x40) 1369 .add(predOps(ARMCC::AL)); 1370 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) 1371 .addReg(SpareReg) 1372 .addImm(0x0000009F) 1373 .add(predOps(ARMCC::AL)) 1374 .add(condCodeOp()); 1375 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) 1376 .addReg(SpareReg) 1377 .addImm(0xF0000000) 1378 .add(predOps(ARMCC::AL)) 1379 .add(condCodeOp()); 1380 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR)) 1381 .addReg(SpareReg) 1382 .add(predOps(ARMCC::AL)); 1383 // The ldr must happen after a floating point instruction. To prevent the 1384 // post-ra scheduler to mess with the order, we create a bundle. 1385 finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator()); 1386 } 1387 } 1388 1389 void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, 1390 MachineBasicBlock::iterator MBBI, 1391 DebugLoc &DL, 1392 const LivePhysRegs &LiveRegs) { 1393 BitVector ClearRegs(32, true); 1394 bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs); 1395 1396 // If the instruction does not write to a FP register and no elements were 1397 // removed from the set, then no FP registers were used to pass 1398 // arguments/returns. 1399 if (!DefFP && ClearRegs.count() == ClearRegs.size()) { 1400 // save space on stack for VLSTM 1401 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) 1402 .addReg(ARM::SP) 1403 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1404 .add(predOps(ARMCC::AL)); 1405 1406 // Lazy store all FP registers to the stack 1407 MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) 1408 .addReg(ARM::SP) 1409 .add(predOps(ARMCC::AL)); 1410 for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1, 1411 ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7}) 1412 VLSTM.addReg(R, RegState::Implicit | 1413 (LiveRegs.contains(R) ? 0 : RegState::Undef)); 1414 } else { 1415 // Push all the callee-saved registers (s16-s31). 1416 MachineInstrBuilder VPUSH = 1417 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP) 1418 .addReg(ARM::SP) 1419 .add(predOps(ARMCC::AL)); 1420 for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg) 1421 VPUSH.addReg(Reg); 1422 1423 // Clear FP registers with a VSCCLRM. 1424 (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); 1425 1426 // Save floating-point context. 1427 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP) 1428 .addReg(ARM::SP) 1429 .addImm(-8) 1430 .add(predOps(ARMCC::AL)); 1431 } 1432 } 1433 1434 // Restore FP registers if present 1435 void ARMExpandPseudo::CMSERestoreFPRegs( 1436 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1437 SmallVectorImpl<unsigned> &AvailableRegs) { 1438 if (STI->hasV8_1MMainlineOps()) 1439 CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs); 1440 else 1441 CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs); 1442 } 1443 1444 void ARMExpandPseudo::CMSERestoreFPRegsV8( 1445 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1446 SmallVectorImpl<unsigned> &AvailableRegs) { 1447 if (!STI->hasFPRegs()) 1448 return; 1449 1450 // Use AvailableRegs to store the fp regs 1451 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs; 1452 std::vector<unsigned> NonclearedFPRegs; 1453 for (const MachineOperand &Op : MBBI->operands()) { 1454 if (Op.isReg() && Op.isDef()) { 1455 unsigned Reg = Op.getReg(); 1456 assert(!ARM::DPRRegClass.contains(Reg) || 1457 ARM::DPR_VFP2RegClass.contains(Reg)); 1458 assert(!ARM::QPRRegClass.contains(Reg)); 1459 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1460 if (AvailableRegs.size() >= 2) { 1461 unsigned SaveReg2 = AvailableRegs.pop_back_val(); 1462 unsigned SaveReg1 = AvailableRegs.pop_back_val(); 1463 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); 1464 1465 // Save the fp register to the normal registers 1466 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) 1467 .addReg(SaveReg1, RegState::Define) 1468 .addReg(SaveReg2, RegState::Define) 1469 .addReg(Reg) 1470 .add(predOps(ARMCC::AL)); 1471 } else { 1472 NonclearedFPRegs.push_back(Reg); 1473 } 1474 } else if (ARM::SPRRegClass.contains(Reg)) { 1475 if (AvailableRegs.size() >= 1) { 1476 unsigned SaveReg = AvailableRegs.pop_back_val(); 1477 ClearedFPRegs.emplace_back(Reg, SaveReg, 0); 1478 1479 // Save the fp register to the normal registers 1480 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) 1481 .addReg(Reg) 1482 .add(predOps(ARMCC::AL)); 1483 } else { 1484 NonclearedFPRegs.push_back(Reg); 1485 } 1486 } 1487 } 1488 } 1489 1490 // Push FP regs that cannot be restored via normal registers on the stack 1491 for (unsigned Reg : NonclearedFPRegs) { 1492 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1493 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD), Reg) 1494 .addReg(ARM::SP) 1495 .addImm((Reg - ARM::D0) * 2) 1496 .add(predOps(ARMCC::AL)); 1497 else if (ARM::SPRRegClass.contains(Reg)) 1498 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS), Reg) 1499 .addReg(ARM::SP) 1500 .addImm(Reg - ARM::S0) 1501 .add(predOps(ARMCC::AL)); 1502 } 1503 1504 // Lazy load fp regs from stack 1505 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) 1506 .addReg(ARM::SP) 1507 .add(predOps(ARMCC::AL)); 1508 1509 // Restore all FP registers via normal registers 1510 for (const auto &Regs : ClearedFPRegs) { 1511 unsigned Reg, SaveReg1, SaveReg2; 1512 std::tie(Reg, SaveReg1, SaveReg2) = Regs; 1513 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1514 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) 1515 .addReg(SaveReg1) 1516 .addReg(SaveReg2) 1517 .add(predOps(ARMCC::AL)); 1518 else if (ARM::SPRRegClass.contains(Reg)) 1519 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) 1520 .addReg(SaveReg1) 1521 .add(predOps(ARMCC::AL)); 1522 } 1523 1524 // Pop the stack space 1525 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) 1526 .addReg(ARM::SP) 1527 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1528 .add(predOps(ARMCC::AL)); 1529 } 1530 1531 static bool definesOrUsesFPReg(const MachineInstr &MI) { 1532 for (const MachineOperand &Op : MI.operands()) { 1533 if (!Op.isReg()) 1534 continue; 1535 unsigned Reg = Op.getReg(); 1536 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || 1537 (Reg >= ARM::D0 && Reg <= ARM::D15) || 1538 (Reg >= ARM::S0 && Reg <= ARM::S31)) 1539 return true; 1540 } 1541 return false; 1542 } 1543 1544 void ARMExpandPseudo::CMSERestoreFPRegsV81( 1545 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1546 SmallVectorImpl<unsigned> &AvailableRegs) { 1547 if (!definesOrUsesFPReg(*MBBI)) { 1548 // Load FP registers from stack. 1549 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) 1550 .addReg(ARM::SP) 1551 .add(predOps(ARMCC::AL)); 1552 1553 // Pop the stack space 1554 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) 1555 .addReg(ARM::SP) 1556 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1557 .add(predOps(ARMCC::AL)); 1558 } else { 1559 // Restore the floating point context. 1560 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post), 1561 ARM::SP) 1562 .addReg(ARM::SP) 1563 .addImm(8) 1564 .add(predOps(ARMCC::AL)); 1565 1566 // Pop all the callee-saved registers (s16-s31). 1567 MachineInstrBuilder VPOP = 1568 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP) 1569 .addReg(ARM::SP) 1570 .add(predOps(ARMCC::AL)); 1571 for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg) 1572 VPOP.addReg(Reg, RegState::Define); 1573 } 1574 } 1575 1576 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as 1577 /// possible. This only gets used at -O0 so we don't care about efficiency of 1578 /// the generated code. 1579 bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, 1580 MachineBasicBlock::iterator MBBI, 1581 unsigned LdrexOp, unsigned StrexOp, 1582 unsigned UxtOp, 1583 MachineBasicBlock::iterator &NextMBBI) { 1584 bool IsThumb = STI->isThumb(); 1585 MachineInstr &MI = *MBBI; 1586 DebugLoc DL = MI.getDebugLoc(); 1587 const MachineOperand &Dest = MI.getOperand(0); 1588 Register TempReg = MI.getOperand(1).getReg(); 1589 // Duplicating undef operands into 2 instructions does not guarantee the same 1590 // value on both; However undef should be replaced by xzr anyway. 1591 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 1592 Register AddrReg = MI.getOperand(2).getReg(); 1593 Register DesiredReg = MI.getOperand(3).getReg(); 1594 Register NewReg = MI.getOperand(4).getReg(); 1595 1596 if (IsThumb) { 1597 assert(STI->hasV8MBaselineOps() && 1598 "CMP_SWAP not expected to be custom expanded for Thumb1"); 1599 assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) && 1600 "ARMv8-M.baseline does not have t2UXTB/t2UXTH"); 1601 assert(ARM::tGPRRegClass.contains(DesiredReg) && 1602 "DesiredReg used for UXT op must be tGPR"); 1603 } 1604 1605 MachineFunction *MF = MBB.getParent(); 1606 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1607 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1608 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1609 1610 MF->insert(++MBB.getIterator(), LoadCmpBB); 1611 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 1612 MF->insert(++StoreBB->getIterator(), DoneBB); 1613 1614 if (UxtOp) { 1615 MachineInstrBuilder MIB = 1616 BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg) 1617 .addReg(DesiredReg, RegState::Kill); 1618 if (!IsThumb) 1619 MIB.addImm(0); 1620 MIB.add(predOps(ARMCC::AL)); 1621 } 1622 1623 // .Lloadcmp: 1624 // ldrex rDest, [rAddr] 1625 // cmp rDest, rDesired 1626 // bne .Ldone 1627 1628 MachineInstrBuilder MIB; 1629 MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); 1630 MIB.addReg(AddrReg); 1631 if (LdrexOp == ARM::t2LDREX) 1632 MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. 1633 MIB.add(predOps(ARMCC::AL)); 1634 1635 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; 1636 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 1637 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 1638 .addReg(DesiredReg) 1639 .add(predOps(ARMCC::AL)); 1640 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; 1641 BuildMI(LoadCmpBB, DL, TII->get(Bcc)) 1642 .addMBB(DoneBB) 1643 .addImm(ARMCC::NE) 1644 .addReg(ARM::CPSR, RegState::Kill); 1645 LoadCmpBB->addSuccessor(DoneBB); 1646 LoadCmpBB->addSuccessor(StoreBB); 1647 1648 // .Lstore: 1649 // strex rTempReg, rNew, [rAddr] 1650 // cmp rTempReg, #0 1651 // bne .Lloadcmp 1652 MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg) 1653 .addReg(NewReg) 1654 .addReg(AddrReg); 1655 if (StrexOp == ARM::t2STREX) 1656 MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. 1657 MIB.add(predOps(ARMCC::AL)); 1658 1659 unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; 1660 BuildMI(StoreBB, DL, TII->get(CMPri)) 1661 .addReg(TempReg, RegState::Kill) 1662 .addImm(0) 1663 .add(predOps(ARMCC::AL)); 1664 BuildMI(StoreBB, DL, TII->get(Bcc)) 1665 .addMBB(LoadCmpBB) 1666 .addImm(ARMCC::NE) 1667 .addReg(ARM::CPSR, RegState::Kill); 1668 StoreBB->addSuccessor(LoadCmpBB); 1669 StoreBB->addSuccessor(DoneBB); 1670 1671 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 1672 DoneBB->transferSuccessors(&MBB); 1673 1674 MBB.addSuccessor(LoadCmpBB); 1675 1676 NextMBBI = MBB.end(); 1677 MI.eraseFromParent(); 1678 1679 // Recompute livein lists. 1680 LivePhysRegs LiveRegs; 1681 computeAndAddLiveIns(LiveRegs, *DoneBB); 1682 computeAndAddLiveIns(LiveRegs, *StoreBB); 1683 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1684 // Do an extra pass around the loop to get loop carried registers right. 1685 StoreBB->clearLiveIns(); 1686 computeAndAddLiveIns(LiveRegs, *StoreBB); 1687 LoadCmpBB->clearLiveIns(); 1688 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1689 1690 return true; 1691 } 1692 1693 /// ARM's ldrexd/strexd take a consecutive register pair (represented as a 1694 /// single GPRPair register), Thumb's take two separate registers so we need to 1695 /// extract the subregs from the pair. 1696 static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, 1697 unsigned Flags, bool IsThumb, 1698 const TargetRegisterInfo *TRI) { 1699 if (IsThumb) { 1700 Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); 1701 Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); 1702 MIB.addReg(RegLo, Flags); 1703 MIB.addReg(RegHi, Flags); 1704 } else 1705 MIB.addReg(Reg.getReg(), Flags); 1706 } 1707 1708 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop. 1709 bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, 1710 MachineBasicBlock::iterator MBBI, 1711 MachineBasicBlock::iterator &NextMBBI) { 1712 bool IsThumb = STI->isThumb(); 1713 MachineInstr &MI = *MBBI; 1714 DebugLoc DL = MI.getDebugLoc(); 1715 MachineOperand &Dest = MI.getOperand(0); 1716 Register TempReg = MI.getOperand(1).getReg(); 1717 // Duplicating undef operands into 2 instructions does not guarantee the same 1718 // value on both; However undef should be replaced by xzr anyway. 1719 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 1720 Register AddrReg = MI.getOperand(2).getReg(); 1721 Register DesiredReg = MI.getOperand(3).getReg(); 1722 MachineOperand New = MI.getOperand(4); 1723 New.setIsKill(false); 1724 1725 Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); 1726 Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); 1727 Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); 1728 Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); 1729 1730 MachineFunction *MF = MBB.getParent(); 1731 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1732 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1733 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1734 1735 MF->insert(++MBB.getIterator(), LoadCmpBB); 1736 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 1737 MF->insert(++StoreBB->getIterator(), DoneBB); 1738 1739 // .Lloadcmp: 1740 // ldrexd rDestLo, rDestHi, [rAddr] 1741 // cmp rDestLo, rDesiredLo 1742 // sbcs dead rTempReg, rDestHi, rDesiredHi 1743 // bne .Ldone 1744 unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; 1745 MachineInstrBuilder MIB; 1746 MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); 1747 addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); 1748 MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); 1749 1750 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; 1751 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 1752 .addReg(DestLo, getKillRegState(Dest.isDead())) 1753 .addReg(DesiredLo) 1754 .add(predOps(ARMCC::AL)); 1755 1756 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 1757 .addReg(DestHi, getKillRegState(Dest.isDead())) 1758 .addReg(DesiredHi) 1759 .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill); 1760 1761 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; 1762 BuildMI(LoadCmpBB, DL, TII->get(Bcc)) 1763 .addMBB(DoneBB) 1764 .addImm(ARMCC::NE) 1765 .addReg(ARM::CPSR, RegState::Kill); 1766 LoadCmpBB->addSuccessor(DoneBB); 1767 LoadCmpBB->addSuccessor(StoreBB); 1768 1769 // .Lstore: 1770 // strexd rTempReg, rNewLo, rNewHi, [rAddr] 1771 // cmp rTempReg, #0 1772 // bne .Lloadcmp 1773 unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; 1774 MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); 1775 unsigned Flags = getKillRegState(New.isDead()); 1776 addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI); 1777 MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); 1778 1779 unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; 1780 BuildMI(StoreBB, DL, TII->get(CMPri)) 1781 .addReg(TempReg, RegState::Kill) 1782 .addImm(0) 1783 .add(predOps(ARMCC::AL)); 1784 BuildMI(StoreBB, DL, TII->get(Bcc)) 1785 .addMBB(LoadCmpBB) 1786 .addImm(ARMCC::NE) 1787 .addReg(ARM::CPSR, RegState::Kill); 1788 StoreBB->addSuccessor(LoadCmpBB); 1789 StoreBB->addSuccessor(DoneBB); 1790 1791 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 1792 DoneBB->transferSuccessors(&MBB); 1793 1794 MBB.addSuccessor(LoadCmpBB); 1795 1796 NextMBBI = MBB.end(); 1797 MI.eraseFromParent(); 1798 1799 // Recompute livein lists. 1800 LivePhysRegs LiveRegs; 1801 computeAndAddLiveIns(LiveRegs, *DoneBB); 1802 computeAndAddLiveIns(LiveRegs, *StoreBB); 1803 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1804 // Do an extra pass around the loop to get loop carried registers right. 1805 StoreBB->clearLiveIns(); 1806 computeAndAddLiveIns(LiveRegs, *StoreBB); 1807 LoadCmpBB->clearLiveIns(); 1808 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1809 1810 return true; 1811 } 1812 1813 static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, 1814 MachineBasicBlock &MBB, 1815 MachineBasicBlock::iterator MBBI, int JumpReg, 1816 const LivePhysRegs &LiveRegs, bool Thumb1Only) { 1817 const DebugLoc &DL = MBBI->getDebugLoc(); 1818 if (Thumb1Only) { // push Lo and Hi regs separately 1819 MachineInstrBuilder PushMIB = 1820 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 1821 for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) { 1822 PushMIB.addReg( 1823 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); 1824 } 1825 1826 // Thumb1 can only tPUSH low regs, so we copy the high regs to the low 1827 // regs that we just saved and push the low regs again, taking care to 1828 // not clobber JumpReg. If JumpReg is one of the low registers, push first 1829 // the values of r9-r11, and then r8. That would leave them ordered in 1830 // memory, and allow us to later pop them with a single instructions. 1831 // FIXME: Could also use any of r0-r3 that are free (including in the 1832 // first PUSH above). 1833 for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) { 1834 if (JumpReg == LoReg) 1835 continue; 1836 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) 1837 .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef) 1838 .add(predOps(ARMCC::AL)); 1839 --HiReg; 1840 } 1841 MachineInstrBuilder PushMIB2 = 1842 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 1843 for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) { 1844 if (Reg == JumpReg) 1845 continue; 1846 PushMIB2.addReg(Reg, RegState::Kill); 1847 } 1848 1849 // If we couldn't use a low register for temporary storage (because it was 1850 // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been 1851 // saved. 1852 if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) { 1853 int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4; 1854 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) 1855 .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef) 1856 .add(predOps(ARMCC::AL)); 1857 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)) 1858 .add(predOps(ARMCC::AL)) 1859 .addReg(LoReg, RegState::Kill); 1860 } 1861 } else { // push Lo and Hi registers with a single instruction 1862 MachineInstrBuilder PushMIB = 1863 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP) 1864 .addReg(ARM::SP) 1865 .add(predOps(ARMCC::AL)); 1866 for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) { 1867 PushMIB.addReg( 1868 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); 1869 } 1870 } 1871 } 1872 1873 static void CMSEPopCalleeSaves(const TargetInstrInfo &TII, 1874 MachineBasicBlock &MBB, 1875 MachineBasicBlock::iterator MBBI, int JumpReg, 1876 bool Thumb1Only) { 1877 const DebugLoc &DL = MBBI->getDebugLoc(); 1878 if (Thumb1Only) { 1879 MachineInstrBuilder PopMIB = 1880 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 1881 for (int R = 0; R < 4; ++R) { 1882 PopMIB.addReg(ARM::R4 + R, RegState::Define); 1883 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R) 1884 .addReg(ARM::R4 + R, RegState::Kill) 1885 .add(predOps(ARMCC::AL)); 1886 } 1887 MachineInstrBuilder PopMIB2 = 1888 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 1889 for (int R = 0; R < 4; ++R) 1890 PopMIB2.addReg(ARM::R4 + R, RegState::Define); 1891 } else { // pop Lo and Hi registers with a single instruction 1892 MachineInstrBuilder PopMIB = 1893 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP) 1894 .addReg(ARM::SP) 1895 .add(predOps(ARMCC::AL)); 1896 for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) 1897 PopMIB.addReg(Reg, RegState::Define); 1898 } 1899 } 1900 1901 bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, 1902 MachineBasicBlock::iterator MBBI, 1903 MachineBasicBlock::iterator &NextMBBI) { 1904 MachineInstr &MI = *MBBI; 1905 unsigned Opcode = MI.getOpcode(); 1906 switch (Opcode) { 1907 default: 1908 return false; 1909 1910 case ARM::VBSPd: 1911 case ARM::VBSPq: { 1912 Register DstReg = MI.getOperand(0).getReg(); 1913 if (DstReg == MI.getOperand(3).getReg()) { 1914 // Expand to VBIT 1915 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq; 1916 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 1917 .add(MI.getOperand(0)) 1918 .add(MI.getOperand(3)) 1919 .add(MI.getOperand(2)) 1920 .add(MI.getOperand(1)) 1921 .addImm(MI.getOperand(4).getImm()) 1922 .add(MI.getOperand(5)); 1923 } else if (DstReg == MI.getOperand(2).getReg()) { 1924 // Expand to VBIF 1925 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq; 1926 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 1927 .add(MI.getOperand(0)) 1928 .add(MI.getOperand(2)) 1929 .add(MI.getOperand(3)) 1930 .add(MI.getOperand(1)) 1931 .addImm(MI.getOperand(4).getImm()) 1932 .add(MI.getOperand(5)); 1933 } else { 1934 // Expand to VBSL 1935 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq; 1936 if (DstReg == MI.getOperand(1).getReg()) { 1937 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 1938 .add(MI.getOperand(0)) 1939 .add(MI.getOperand(1)) 1940 .add(MI.getOperand(2)) 1941 .add(MI.getOperand(3)) 1942 .addImm(MI.getOperand(4).getImm()) 1943 .add(MI.getOperand(5)); 1944 } else { 1945 // Use move to satisfy constraints 1946 unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq; 1947 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc)) 1948 .addReg(DstReg, 1949 RegState::Define | 1950 getRenamableRegState(MI.getOperand(0).isRenamable())) 1951 .add(MI.getOperand(1)) 1952 .add(MI.getOperand(1)) 1953 .addImm(MI.getOperand(4).getImm()) 1954 .add(MI.getOperand(5)); 1955 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 1956 .add(MI.getOperand(0)) 1957 .addReg(DstReg, 1958 RegState::Kill | 1959 getRenamableRegState(MI.getOperand(0).isRenamable())) 1960 .add(MI.getOperand(2)) 1961 .add(MI.getOperand(3)) 1962 .addImm(MI.getOperand(4).getImm()) 1963 .add(MI.getOperand(5)); 1964 } 1965 } 1966 MI.eraseFromParent(); 1967 return true; 1968 } 1969 1970 case ARM::TCRETURNdi: 1971 case ARM::TCRETURNri: { 1972 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 1973 assert(MBBI->isReturn() && 1974 "Can only insert epilog into returning blocks"); 1975 unsigned RetOpcode = MBBI->getOpcode(); 1976 DebugLoc dl = MBBI->getDebugLoc(); 1977 const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( 1978 MBB.getParent()->getSubtarget().getInstrInfo()); 1979 1980 // Tail call return: adjust the stack pointer and jump to callee. 1981 MBBI = MBB.getLastNonDebugInstr(); 1982 MachineOperand &JumpTarget = MBBI->getOperand(0); 1983 1984 // Jump to label or value in register. 1985 if (RetOpcode == ARM::TCRETURNdi) { 1986 unsigned TCOpcode = 1987 STI->isThumb() 1988 ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND) 1989 : ARM::TAILJMPd; 1990 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); 1991 if (JumpTarget.isGlobal()) 1992 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 1993 JumpTarget.getTargetFlags()); 1994 else { 1995 assert(JumpTarget.isSymbol()); 1996 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 1997 JumpTarget.getTargetFlags()); 1998 } 1999 2000 // Add the default predicate in Thumb mode. 2001 if (STI->isThumb()) 2002 MIB.add(predOps(ARMCC::AL)); 2003 } else if (RetOpcode == ARM::TCRETURNri) { 2004 unsigned Opcode = 2005 STI->isThumb() ? ARM::tTAILJMPr 2006 : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4); 2007 BuildMI(MBB, MBBI, dl, 2008 TII.get(Opcode)) 2009 .addReg(JumpTarget.getReg(), RegState::Kill); 2010 } 2011 2012 auto NewMI = std::prev(MBBI); 2013 for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) 2014 NewMI->addOperand(MBBI->getOperand(i)); 2015 2016 2017 // Update call site info and delete the pseudo instruction TCRETURN. 2018 if (MI.isCandidateForCallSiteEntry()) 2019 MI.getMF()->moveCallSiteInfo(&MI, &*NewMI); 2020 MBB.erase(MBBI); 2021 2022 MBBI = NewMI; 2023 return true; 2024 } 2025 case ARM::tBXNS_RET: { 2026 MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI); 2027 2028 if (STI->hasV8_1MMainlineOps()) { 2029 // Restore the non-secure floating point context. 2030 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 2031 TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP) 2032 .addReg(ARM::SP) 2033 .addImm(4) 2034 .add(predOps(ARMCC::AL)); 2035 } 2036 2037 // Clear all GPR that are not a use of the return instruction. 2038 assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) { 2039 return !Op.isReg() || Op.getReg() != ARM::R12; 2040 })); 2041 SmallVector<unsigned, 5> ClearRegs; 2042 determineGPRegsToClear( 2043 *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs); 2044 CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs, 2045 ARM::LR); 2046 2047 MachineInstrBuilder NewMI = 2048 BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), 2049 TII->get(ARM::tBXNS)) 2050 .addReg(ARM::LR) 2051 .add(predOps(ARMCC::AL)); 2052 for (const MachineOperand &Op : MI.operands()) 2053 NewMI->addOperand(Op); 2054 MI.eraseFromParent(); 2055 return true; 2056 } 2057 case ARM::tBLXNS_CALL: { 2058 DebugLoc DL = MBBI->getDebugLoc(); 2059 unsigned JumpReg = MBBI->getOperand(0).getReg(); 2060 2061 // Figure out which registers are live at the point immediately before the 2062 // call. When we indiscriminately push a set of registers, the live 2063 // registers are added as ordinary use operands, whereas dead registers 2064 // are "undef". 2065 LivePhysRegs LiveRegs(*TRI); 2066 LiveRegs.addLiveOuts(MBB); 2067 for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse())) 2068 LiveRegs.stepBackward(MI); 2069 LiveRegs.stepBackward(*MBBI); 2070 2071 CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs, 2072 AFI->isThumb1OnlyFunction()); 2073 2074 SmallVector<unsigned, 16> ClearRegs; 2075 determineGPRegsToClear(*MBBI, 2076 {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, 2077 ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, 2078 ARM::R10, ARM::R11, ARM::R12}, 2079 ClearRegs); 2080 auto OriginalClearRegs = ClearRegs; 2081 2082 // Get the first cleared register as a scratch (to use later with tBIC). 2083 // We need to use the first so we can ensure it is a low register. 2084 unsigned ScratchReg = ClearRegs.front(); 2085 2086 // Clear LSB of JumpReg 2087 if (AFI->isThumb2Function()) { 2088 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg) 2089 .addReg(JumpReg) 2090 .addImm(1) 2091 .add(predOps(ARMCC::AL)) 2092 .add(condCodeOp()); 2093 } else { 2094 // We need to use an extra register to cope with 8M Baseline, 2095 // since we have saved all of the registers we are ok to trash a non 2096 // argument register here. 2097 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg) 2098 .add(condCodeOp()) 2099 .addImm(1) 2100 .add(predOps(ARMCC::AL)); 2101 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg) 2102 .addReg(ARM::CPSR, RegState::Define) 2103 .addReg(JumpReg) 2104 .addReg(ScratchReg) 2105 .add(predOps(ARMCC::AL)); 2106 } 2107 2108 CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs, 2109 ClearRegs); // save+clear FP regs with ClearRegs 2110 CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg); 2111 2112 const MachineInstrBuilder NewCall = 2113 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr)) 2114 .add(predOps(ARMCC::AL)) 2115 .addReg(JumpReg, RegState::Kill); 2116 2117 for (int I = 1, E = MI.getNumOperands(); I != E; ++I) 2118 NewCall->addOperand(MI.getOperand(I)); 2119 if (MI.isCandidateForCallSiteEntry()) 2120 MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr()); 2121 2122 CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers 2123 2124 CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction()); 2125 2126 MI.eraseFromParent(); 2127 return true; 2128 } 2129 case ARM::VMOVHcc: 2130 case ARM::VMOVScc: 2131 case ARM::VMOVDcc: { 2132 unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD; 2133 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), 2134 MI.getOperand(1).getReg()) 2135 .add(MI.getOperand(2)) 2136 .addImm(MI.getOperand(3).getImm()) // 'pred' 2137 .add(MI.getOperand(4)) 2138 .add(makeImplicit(MI.getOperand(1))); 2139 2140 MI.eraseFromParent(); 2141 return true; 2142 } 2143 case ARM::t2MOVCCr: 2144 case ARM::MOVCCr: { 2145 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; 2146 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2147 MI.getOperand(1).getReg()) 2148 .add(MI.getOperand(2)) 2149 .addImm(MI.getOperand(3).getImm()) // 'pred' 2150 .add(MI.getOperand(4)) 2151 .add(condCodeOp()) // 's' bit 2152 .add(makeImplicit(MI.getOperand(1))); 2153 2154 MI.eraseFromParent(); 2155 return true; 2156 } 2157 case ARM::MOVCCsi: { 2158 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2159 (MI.getOperand(1).getReg())) 2160 .add(MI.getOperand(2)) 2161 .addImm(MI.getOperand(3).getImm()) 2162 .addImm(MI.getOperand(4).getImm()) // 'pred' 2163 .add(MI.getOperand(5)) 2164 .add(condCodeOp()) // 's' bit 2165 .add(makeImplicit(MI.getOperand(1))); 2166 2167 MI.eraseFromParent(); 2168 return true; 2169 } 2170 case ARM::MOVCCsr: { 2171 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), 2172 (MI.getOperand(1).getReg())) 2173 .add(MI.getOperand(2)) 2174 .add(MI.getOperand(3)) 2175 .addImm(MI.getOperand(4).getImm()) 2176 .addImm(MI.getOperand(5).getImm()) // 'pred' 2177 .add(MI.getOperand(6)) 2178 .add(condCodeOp()) // 's' bit 2179 .add(makeImplicit(MI.getOperand(1))); 2180 2181 MI.eraseFromParent(); 2182 return true; 2183 } 2184 case ARM::t2MOVCCi16: 2185 case ARM::MOVCCi16: { 2186 unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; 2187 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), 2188 MI.getOperand(1).getReg()) 2189 .addImm(MI.getOperand(2).getImm()) 2190 .addImm(MI.getOperand(3).getImm()) // 'pred' 2191 .add(MI.getOperand(4)) 2192 .add(makeImplicit(MI.getOperand(1))); 2193 MI.eraseFromParent(); 2194 return true; 2195 } 2196 case ARM::t2MOVCCi: 2197 case ARM::MOVCCi: { 2198 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi; 2199 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2200 MI.getOperand(1).getReg()) 2201 .addImm(MI.getOperand(2).getImm()) 2202 .addImm(MI.getOperand(3).getImm()) // 'pred' 2203 .add(MI.getOperand(4)) 2204 .add(condCodeOp()) // 's' bit 2205 .add(makeImplicit(MI.getOperand(1))); 2206 2207 MI.eraseFromParent(); 2208 return true; 2209 } 2210 case ARM::t2MVNCCi: 2211 case ARM::MVNCCi: { 2212 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; 2213 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2214 MI.getOperand(1).getReg()) 2215 .addImm(MI.getOperand(2).getImm()) 2216 .addImm(MI.getOperand(3).getImm()) // 'pred' 2217 .add(MI.getOperand(4)) 2218 .add(condCodeOp()) // 's' bit 2219 .add(makeImplicit(MI.getOperand(1))); 2220 2221 MI.eraseFromParent(); 2222 return true; 2223 } 2224 case ARM::t2MOVCClsl: 2225 case ARM::t2MOVCClsr: 2226 case ARM::t2MOVCCasr: 2227 case ARM::t2MOVCCror: { 2228 unsigned NewOpc; 2229 switch (Opcode) { 2230 case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break; 2231 case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break; 2232 case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break; 2233 case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break; 2234 default: llvm_unreachable("unexpeced conditional move"); 2235 } 2236 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), 2237 MI.getOperand(1).getReg()) 2238 .add(MI.getOperand(2)) 2239 .addImm(MI.getOperand(3).getImm()) 2240 .addImm(MI.getOperand(4).getImm()) // 'pred' 2241 .add(MI.getOperand(5)) 2242 .add(condCodeOp()) // 's' bit 2243 .add(makeImplicit(MI.getOperand(1))); 2244 MI.eraseFromParent(); 2245 return true; 2246 } 2247 case ARM::Int_eh_sjlj_dispatchsetup: { 2248 MachineFunction &MF = *MI.getParent()->getParent(); 2249 const ARMBaseInstrInfo *AII = 2250 static_cast<const ARMBaseInstrInfo*>(TII); 2251 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 2252 // For functions using a base pointer, we rematerialize it (via the frame 2253 // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it 2254 // for us. Otherwise, expand to nothing. 2255 if (RI.hasBasePointer(MF)) { 2256 int32_t NumBytes = AFI->getFramePtrSpillOffset(); 2257 Register FramePtr = RI.getFrameRegister(MF); 2258 assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) && 2259 "base pointer without frame pointer?"); 2260 2261 if (AFI->isThumb2Function()) { 2262 emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2263 FramePtr, -NumBytes, ARMCC::AL, 0, *TII); 2264 } else if (AFI->isThumbFunction()) { 2265 emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2266 FramePtr, -NumBytes, *TII, RI); 2267 } else { 2268 emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2269 FramePtr, -NumBytes, ARMCC::AL, 0, 2270 *TII); 2271 } 2272 // If there's dynamic realignment, adjust for it. 2273 if (RI.hasStackRealignment(MF)) { 2274 MachineFrameInfo &MFI = MF.getFrameInfo(); 2275 Align MaxAlign = MFI.getMaxAlign(); 2276 assert (!AFI->isThumb1OnlyFunction()); 2277 // Emit bic r6, r6, MaxAlign 2278 assert(MaxAlign <= Align(256) && 2279 "The BIC instruction cannot encode " 2280 "immediates larger than 256 with all lower " 2281 "bits set."); 2282 unsigned bicOpc = AFI->isThumbFunction() ? 2283 ARM::t2BICri : ARM::BICri; 2284 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6) 2285 .addReg(ARM::R6, RegState::Kill) 2286 .addImm(MaxAlign.value() - 1) 2287 .add(predOps(ARMCC::AL)) 2288 .add(condCodeOp()); 2289 } 2290 } 2291 MI.eraseFromParent(); 2292 return true; 2293 } 2294 2295 case ARM::MOVsrl_flag: 2296 case ARM::MOVsra_flag: { 2297 // These are just fancy MOVs instructions. 2298 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2299 MI.getOperand(0).getReg()) 2300 .add(MI.getOperand(1)) 2301 .addImm(ARM_AM::getSORegOpc( 2302 (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1)) 2303 .add(predOps(ARMCC::AL)) 2304 .addReg(ARM::CPSR, RegState::Define); 2305 MI.eraseFromParent(); 2306 return true; 2307 } 2308 case ARM::RRX: { 2309 // This encodes as "MOVs Rd, Rm, rrx 2310 MachineInstrBuilder MIB = 2311 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2312 MI.getOperand(0).getReg()) 2313 .add(MI.getOperand(1)) 2314 .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)) 2315 .add(predOps(ARMCC::AL)) 2316 .add(condCodeOp()); 2317 TransferImpOps(MI, MIB, MIB); 2318 MI.eraseFromParent(); 2319 return true; 2320 } 2321 case ARM::tTPsoft: 2322 case ARM::TPsoft: { 2323 const bool Thumb = Opcode == ARM::tTPsoft; 2324 2325 MachineInstrBuilder MIB; 2326 MachineFunction *MF = MBB.getParent(); 2327 if (STI->genLongCalls()) { 2328 MachineConstantPool *MCP = MF->getConstantPool(); 2329 unsigned PCLabelID = AFI->createPICLabelUId(); 2330 MachineConstantPoolValue *CPV = 2331 ARMConstantPoolSymbol::Create(MF->getFunction().getContext(), 2332 "__aeabi_read_tp", PCLabelID, 0); 2333 Register Reg = MI.getOperand(0).getReg(); 2334 MIB = 2335 BuildMI(MBB, MBBI, MI.getDebugLoc(), 2336 TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg) 2337 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4))); 2338 if (!Thumb) 2339 MIB.addImm(0); 2340 MIB.add(predOps(ARMCC::AL)); 2341 2342 MIB = 2343 BuildMI(MBB, MBBI, MI.getDebugLoc(), 2344 TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF))); 2345 if (Thumb) 2346 MIB.add(predOps(ARMCC::AL)); 2347 MIB.addReg(Reg, RegState::Kill); 2348 } else { 2349 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 2350 TII->get(Thumb ? ARM::tBL : ARM::BL)); 2351 if (Thumb) 2352 MIB.add(predOps(ARMCC::AL)); 2353 MIB.addExternalSymbol("__aeabi_read_tp", 0); 2354 } 2355 2356 MIB.cloneMemRefs(MI); 2357 TransferImpOps(MI, MIB, MIB); 2358 // Update the call site info. 2359 if (MI.isCandidateForCallSiteEntry()) 2360 MF->moveCallSiteInfo(&MI, &*MIB); 2361 MI.eraseFromParent(); 2362 return true; 2363 } 2364 case ARM::tLDRpci_pic: 2365 case ARM::t2LDRpci_pic: { 2366 unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) 2367 ? ARM::tLDRpci : ARM::t2LDRpci; 2368 Register DstReg = MI.getOperand(0).getReg(); 2369 bool DstIsDead = MI.getOperand(0).isDead(); 2370 MachineInstrBuilder MIB1 = 2371 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) 2372 .add(MI.getOperand(1)) 2373 .add(predOps(ARMCC::AL)); 2374 MIB1.cloneMemRefs(MI); 2375 MachineInstrBuilder MIB2 = 2376 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) 2377 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2378 .addReg(DstReg) 2379 .add(MI.getOperand(2)); 2380 TransferImpOps(MI, MIB1, MIB2); 2381 MI.eraseFromParent(); 2382 return true; 2383 } 2384 2385 case ARM::LDRLIT_ga_abs: 2386 case ARM::LDRLIT_ga_pcrel: 2387 case ARM::LDRLIT_ga_pcrel_ldr: 2388 case ARM::tLDRLIT_ga_abs: 2389 case ARM::tLDRLIT_ga_pcrel: { 2390 Register DstReg = MI.getOperand(0).getReg(); 2391 bool DstIsDead = MI.getOperand(0).isDead(); 2392 const MachineOperand &MO1 = MI.getOperand(1); 2393 auto Flags = MO1.getTargetFlags(); 2394 const GlobalValue *GV = MO1.getGlobal(); 2395 bool IsARM = 2396 Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs; 2397 bool IsPIC = 2398 Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs; 2399 unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; 2400 unsigned PICAddOpc = 2401 IsARM 2402 ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) 2403 : ARM::tPICADD; 2404 2405 // We need a new const-pool entry to load from. 2406 MachineConstantPool *MCP = MBB.getParent()->getConstantPool(); 2407 unsigned ARMPCLabelIndex = 0; 2408 MachineConstantPoolValue *CPV; 2409 2410 if (IsPIC) { 2411 unsigned PCAdj = IsARM ? 8 : 4; 2412 auto Modifier = (Flags & ARMII::MO_GOT) 2413 ? ARMCP::GOT_PREL 2414 : ARMCP::no_modifier; 2415 ARMPCLabelIndex = AFI->createPICLabelUId(); 2416 CPV = ARMConstantPoolConstant::Create( 2417 GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier, 2418 /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL); 2419 } else 2420 CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier); 2421 2422 MachineInstrBuilder MIB = 2423 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg) 2424 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4))); 2425 if (IsARM) 2426 MIB.addImm(0); 2427 MIB.add(predOps(ARMCC::AL)); 2428 2429 if (IsPIC) { 2430 MachineInstrBuilder MIB = 2431 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) 2432 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2433 .addReg(DstReg) 2434 .addImm(ARMPCLabelIndex); 2435 2436 if (IsARM) 2437 MIB.add(predOps(ARMCC::AL)); 2438 } 2439 2440 MI.eraseFromParent(); 2441 return true; 2442 } 2443 case ARM::MOV_ga_pcrel: 2444 case ARM::MOV_ga_pcrel_ldr: 2445 case ARM::t2MOV_ga_pcrel: { 2446 // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. 2447 unsigned LabelId = AFI->createPICLabelUId(); 2448 Register DstReg = MI.getOperand(0).getReg(); 2449 bool DstIsDead = MI.getOperand(0).isDead(); 2450 const MachineOperand &MO1 = MI.getOperand(1); 2451 const GlobalValue *GV = MO1.getGlobal(); 2452 unsigned TF = MO1.getTargetFlags(); 2453 bool isARM = Opcode != ARM::t2MOV_ga_pcrel; 2454 unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; 2455 unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel; 2456 unsigned LO16TF = TF | ARMII::MO_LO16; 2457 unsigned HI16TF = TF | ARMII::MO_HI16; 2458 unsigned PICAddOpc = isARM 2459 ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) 2460 : ARM::tPICADD; 2461 MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 2462 TII->get(LO16Opc), DstReg) 2463 .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF) 2464 .addImm(LabelId); 2465 2466 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) 2467 .addReg(DstReg) 2468 .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF) 2469 .addImm(LabelId); 2470 2471 MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 2472 TII->get(PICAddOpc)) 2473 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2474 .addReg(DstReg).addImm(LabelId); 2475 if (isARM) { 2476 MIB3.add(predOps(ARMCC::AL)); 2477 if (Opcode == ARM::MOV_ga_pcrel_ldr) 2478 MIB3.cloneMemRefs(MI); 2479 } 2480 TransferImpOps(MI, MIB1, MIB3); 2481 MI.eraseFromParent(); 2482 return true; 2483 } 2484 2485 case ARM::MOVi32imm: 2486 case ARM::MOVCCi32imm: 2487 case ARM::t2MOVi32imm: 2488 case ARM::t2MOVCCi32imm: 2489 ExpandMOV32BitImm(MBB, MBBI); 2490 return true; 2491 2492 case ARM::SUBS_PC_LR: { 2493 MachineInstrBuilder MIB = 2494 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) 2495 .addReg(ARM::LR) 2496 .add(MI.getOperand(0)) 2497 .add(MI.getOperand(1)) 2498 .add(MI.getOperand(2)) 2499 .addReg(ARM::CPSR, RegState::Undef); 2500 TransferImpOps(MI, MIB, MIB); 2501 MI.eraseFromParent(); 2502 return true; 2503 } 2504 case ARM::VLDMQIA: { 2505 unsigned NewOpc = ARM::VLDMDIA; 2506 MachineInstrBuilder MIB = 2507 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 2508 unsigned OpIdx = 0; 2509 2510 // Grab the Q register destination. 2511 bool DstIsDead = MI.getOperand(OpIdx).isDead(); 2512 Register DstReg = MI.getOperand(OpIdx++).getReg(); 2513 2514 // Copy the source register. 2515 MIB.add(MI.getOperand(OpIdx++)); 2516 2517 // Copy the predicate operands. 2518 MIB.add(MI.getOperand(OpIdx++)); 2519 MIB.add(MI.getOperand(OpIdx++)); 2520 2521 // Add the destination operands (D subregs). 2522 Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0); 2523 Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1); 2524 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) 2525 .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 2526 2527 // Add an implicit def for the super-register. 2528 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 2529 TransferImpOps(MI, MIB, MIB); 2530 MIB.cloneMemRefs(MI); 2531 MI.eraseFromParent(); 2532 return true; 2533 } 2534 2535 case ARM::VSTMQIA: { 2536 unsigned NewOpc = ARM::VSTMDIA; 2537 MachineInstrBuilder MIB = 2538 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 2539 unsigned OpIdx = 0; 2540 2541 // Grab the Q register source. 2542 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 2543 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 2544 2545 // Copy the destination register. 2546 MachineOperand Dst(MI.getOperand(OpIdx++)); 2547 MIB.add(Dst); 2548 2549 // Copy the predicate operands. 2550 MIB.add(MI.getOperand(OpIdx++)); 2551 MIB.add(MI.getOperand(OpIdx++)); 2552 2553 // Add the source operands (D subregs). 2554 Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); 2555 Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); 2556 MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0) 2557 .addReg(D1, SrcIsKill ? RegState::Kill : 0); 2558 2559 if (SrcIsKill) // Add an implicit kill for the Q register. 2560 MIB->addRegisterKilled(SrcReg, TRI, true); 2561 2562 TransferImpOps(MI, MIB, MIB); 2563 MIB.cloneMemRefs(MI); 2564 MI.eraseFromParent(); 2565 return true; 2566 } 2567 2568 case ARM::VLD2q8Pseudo: 2569 case ARM::VLD2q16Pseudo: 2570 case ARM::VLD2q32Pseudo: 2571 case ARM::VLD2q8PseudoWB_fixed: 2572 case ARM::VLD2q16PseudoWB_fixed: 2573 case ARM::VLD2q32PseudoWB_fixed: 2574 case ARM::VLD2q8PseudoWB_register: 2575 case ARM::VLD2q16PseudoWB_register: 2576 case ARM::VLD2q32PseudoWB_register: 2577 case ARM::VLD3d8Pseudo: 2578 case ARM::VLD3d16Pseudo: 2579 case ARM::VLD3d32Pseudo: 2580 case ARM::VLD1d8TPseudo: 2581 case ARM::VLD1d16TPseudo: 2582 case ARM::VLD1d32TPseudo: 2583 case ARM::VLD1d64TPseudo: 2584 case ARM::VLD1d64TPseudoWB_fixed: 2585 case ARM::VLD1d64TPseudoWB_register: 2586 case ARM::VLD3d8Pseudo_UPD: 2587 case ARM::VLD3d16Pseudo_UPD: 2588 case ARM::VLD3d32Pseudo_UPD: 2589 case ARM::VLD3q8Pseudo_UPD: 2590 case ARM::VLD3q16Pseudo_UPD: 2591 case ARM::VLD3q32Pseudo_UPD: 2592 case ARM::VLD3q8oddPseudo: 2593 case ARM::VLD3q16oddPseudo: 2594 case ARM::VLD3q32oddPseudo: 2595 case ARM::VLD3q8oddPseudo_UPD: 2596 case ARM::VLD3q16oddPseudo_UPD: 2597 case ARM::VLD3q32oddPseudo_UPD: 2598 case ARM::VLD4d8Pseudo: 2599 case ARM::VLD4d16Pseudo: 2600 case ARM::VLD4d32Pseudo: 2601 case ARM::VLD1d8QPseudo: 2602 case ARM::VLD1d16QPseudo: 2603 case ARM::VLD1d32QPseudo: 2604 case ARM::VLD1d64QPseudo: 2605 case ARM::VLD1d64QPseudoWB_fixed: 2606 case ARM::VLD1d64QPseudoWB_register: 2607 case ARM::VLD1q8HighQPseudo: 2608 case ARM::VLD1q8LowQPseudo_UPD: 2609 case ARM::VLD1q8HighTPseudo: 2610 case ARM::VLD1q8LowTPseudo_UPD: 2611 case ARM::VLD1q16HighQPseudo: 2612 case ARM::VLD1q16LowQPseudo_UPD: 2613 case ARM::VLD1q16HighTPseudo: 2614 case ARM::VLD1q16LowTPseudo_UPD: 2615 case ARM::VLD1q32HighQPseudo: 2616 case ARM::VLD1q32LowQPseudo_UPD: 2617 case ARM::VLD1q32HighTPseudo: 2618 case ARM::VLD1q32LowTPseudo_UPD: 2619 case ARM::VLD1q64HighQPseudo: 2620 case ARM::VLD1q64LowQPseudo_UPD: 2621 case ARM::VLD1q64HighTPseudo: 2622 case ARM::VLD1q64LowTPseudo_UPD: 2623 case ARM::VLD4d8Pseudo_UPD: 2624 case ARM::VLD4d16Pseudo_UPD: 2625 case ARM::VLD4d32Pseudo_UPD: 2626 case ARM::VLD4q8Pseudo_UPD: 2627 case ARM::VLD4q16Pseudo_UPD: 2628 case ARM::VLD4q32Pseudo_UPD: 2629 case ARM::VLD4q8oddPseudo: 2630 case ARM::VLD4q16oddPseudo: 2631 case ARM::VLD4q32oddPseudo: 2632 case ARM::VLD4q8oddPseudo_UPD: 2633 case ARM::VLD4q16oddPseudo_UPD: 2634 case ARM::VLD4q32oddPseudo_UPD: 2635 case ARM::VLD3DUPd8Pseudo: 2636 case ARM::VLD3DUPd16Pseudo: 2637 case ARM::VLD3DUPd32Pseudo: 2638 case ARM::VLD3DUPd8Pseudo_UPD: 2639 case ARM::VLD3DUPd16Pseudo_UPD: 2640 case ARM::VLD3DUPd32Pseudo_UPD: 2641 case ARM::VLD4DUPd8Pseudo: 2642 case ARM::VLD4DUPd16Pseudo: 2643 case ARM::VLD4DUPd32Pseudo: 2644 case ARM::VLD4DUPd8Pseudo_UPD: 2645 case ARM::VLD4DUPd16Pseudo_UPD: 2646 case ARM::VLD4DUPd32Pseudo_UPD: 2647 case ARM::VLD2DUPq8EvenPseudo: 2648 case ARM::VLD2DUPq8OddPseudo: 2649 case ARM::VLD2DUPq16EvenPseudo: 2650 case ARM::VLD2DUPq16OddPseudo: 2651 case ARM::VLD2DUPq32EvenPseudo: 2652 case ARM::VLD2DUPq32OddPseudo: 2653 case ARM::VLD3DUPq8EvenPseudo: 2654 case ARM::VLD3DUPq8OddPseudo: 2655 case ARM::VLD3DUPq16EvenPseudo: 2656 case ARM::VLD3DUPq16OddPseudo: 2657 case ARM::VLD3DUPq32EvenPseudo: 2658 case ARM::VLD3DUPq32OddPseudo: 2659 case ARM::VLD4DUPq8EvenPseudo: 2660 case ARM::VLD4DUPq8OddPseudo: 2661 case ARM::VLD4DUPq16EvenPseudo: 2662 case ARM::VLD4DUPq16OddPseudo: 2663 case ARM::VLD4DUPq32EvenPseudo: 2664 case ARM::VLD4DUPq32OddPseudo: 2665 ExpandVLD(MBBI); 2666 return true; 2667 2668 case ARM::VST2q8Pseudo: 2669 case ARM::VST2q16Pseudo: 2670 case ARM::VST2q32Pseudo: 2671 case ARM::VST2q8PseudoWB_fixed: 2672 case ARM::VST2q16PseudoWB_fixed: 2673 case ARM::VST2q32PseudoWB_fixed: 2674 case ARM::VST2q8PseudoWB_register: 2675 case ARM::VST2q16PseudoWB_register: 2676 case ARM::VST2q32PseudoWB_register: 2677 case ARM::VST3d8Pseudo: 2678 case ARM::VST3d16Pseudo: 2679 case ARM::VST3d32Pseudo: 2680 case ARM::VST1d8TPseudo: 2681 case ARM::VST1d8TPseudoWB_fixed: 2682 case ARM::VST1d8TPseudoWB_register: 2683 case ARM::VST1d16TPseudo: 2684 case ARM::VST1d16TPseudoWB_fixed: 2685 case ARM::VST1d16TPseudoWB_register: 2686 case ARM::VST1d32TPseudo: 2687 case ARM::VST1d32TPseudoWB_fixed: 2688 case ARM::VST1d32TPseudoWB_register: 2689 case ARM::VST1d64TPseudo: 2690 case ARM::VST1d64TPseudoWB_fixed: 2691 case ARM::VST1d64TPseudoWB_register: 2692 case ARM::VST3d8Pseudo_UPD: 2693 case ARM::VST3d16Pseudo_UPD: 2694 case ARM::VST3d32Pseudo_UPD: 2695 case ARM::VST3q8Pseudo_UPD: 2696 case ARM::VST3q16Pseudo_UPD: 2697 case ARM::VST3q32Pseudo_UPD: 2698 case ARM::VST3q8oddPseudo: 2699 case ARM::VST3q16oddPseudo: 2700 case ARM::VST3q32oddPseudo: 2701 case ARM::VST3q8oddPseudo_UPD: 2702 case ARM::VST3q16oddPseudo_UPD: 2703 case ARM::VST3q32oddPseudo_UPD: 2704 case ARM::VST4d8Pseudo: 2705 case ARM::VST4d16Pseudo: 2706 case ARM::VST4d32Pseudo: 2707 case ARM::VST1d8QPseudo: 2708 case ARM::VST1d8QPseudoWB_fixed: 2709 case ARM::VST1d8QPseudoWB_register: 2710 case ARM::VST1d16QPseudo: 2711 case ARM::VST1d16QPseudoWB_fixed: 2712 case ARM::VST1d16QPseudoWB_register: 2713 case ARM::VST1d32QPseudo: 2714 case ARM::VST1d32QPseudoWB_fixed: 2715 case ARM::VST1d32QPseudoWB_register: 2716 case ARM::VST1d64QPseudo: 2717 case ARM::VST1d64QPseudoWB_fixed: 2718 case ARM::VST1d64QPseudoWB_register: 2719 case ARM::VST4d8Pseudo_UPD: 2720 case ARM::VST4d16Pseudo_UPD: 2721 case ARM::VST4d32Pseudo_UPD: 2722 case ARM::VST1q8HighQPseudo: 2723 case ARM::VST1q8LowQPseudo_UPD: 2724 case ARM::VST1q8HighTPseudo: 2725 case ARM::VST1q8LowTPseudo_UPD: 2726 case ARM::VST1q16HighQPseudo: 2727 case ARM::VST1q16LowQPseudo_UPD: 2728 case ARM::VST1q16HighTPseudo: 2729 case ARM::VST1q16LowTPseudo_UPD: 2730 case ARM::VST1q32HighQPseudo: 2731 case ARM::VST1q32LowQPseudo_UPD: 2732 case ARM::VST1q32HighTPseudo: 2733 case ARM::VST1q32LowTPseudo_UPD: 2734 case ARM::VST1q64HighQPseudo: 2735 case ARM::VST1q64LowQPseudo_UPD: 2736 case ARM::VST1q64HighTPseudo: 2737 case ARM::VST1q64LowTPseudo_UPD: 2738 case ARM::VST1q8HighTPseudo_UPD: 2739 case ARM::VST1q16HighTPseudo_UPD: 2740 case ARM::VST1q32HighTPseudo_UPD: 2741 case ARM::VST1q64HighTPseudo_UPD: 2742 case ARM::VST1q8HighQPseudo_UPD: 2743 case ARM::VST1q16HighQPseudo_UPD: 2744 case ARM::VST1q32HighQPseudo_UPD: 2745 case ARM::VST1q64HighQPseudo_UPD: 2746 case ARM::VST4q8Pseudo_UPD: 2747 case ARM::VST4q16Pseudo_UPD: 2748 case ARM::VST4q32Pseudo_UPD: 2749 case ARM::VST4q8oddPseudo: 2750 case ARM::VST4q16oddPseudo: 2751 case ARM::VST4q32oddPseudo: 2752 case ARM::VST4q8oddPseudo_UPD: 2753 case ARM::VST4q16oddPseudo_UPD: 2754 case ARM::VST4q32oddPseudo_UPD: 2755 ExpandVST(MBBI); 2756 return true; 2757 2758 case ARM::VLD1LNq8Pseudo: 2759 case ARM::VLD1LNq16Pseudo: 2760 case ARM::VLD1LNq32Pseudo: 2761 case ARM::VLD1LNq8Pseudo_UPD: 2762 case ARM::VLD1LNq16Pseudo_UPD: 2763 case ARM::VLD1LNq32Pseudo_UPD: 2764 case ARM::VLD2LNd8Pseudo: 2765 case ARM::VLD2LNd16Pseudo: 2766 case ARM::VLD2LNd32Pseudo: 2767 case ARM::VLD2LNq16Pseudo: 2768 case ARM::VLD2LNq32Pseudo: 2769 case ARM::VLD2LNd8Pseudo_UPD: 2770 case ARM::VLD2LNd16Pseudo_UPD: 2771 case ARM::VLD2LNd32Pseudo_UPD: 2772 case ARM::VLD2LNq16Pseudo_UPD: 2773 case ARM::VLD2LNq32Pseudo_UPD: 2774 case ARM::VLD3LNd8Pseudo: 2775 case ARM::VLD3LNd16Pseudo: 2776 case ARM::VLD3LNd32Pseudo: 2777 case ARM::VLD3LNq16Pseudo: 2778 case ARM::VLD3LNq32Pseudo: 2779 case ARM::VLD3LNd8Pseudo_UPD: 2780 case ARM::VLD3LNd16Pseudo_UPD: 2781 case ARM::VLD3LNd32Pseudo_UPD: 2782 case ARM::VLD3LNq16Pseudo_UPD: 2783 case ARM::VLD3LNq32Pseudo_UPD: 2784 case ARM::VLD4LNd8Pseudo: 2785 case ARM::VLD4LNd16Pseudo: 2786 case ARM::VLD4LNd32Pseudo: 2787 case ARM::VLD4LNq16Pseudo: 2788 case ARM::VLD4LNq32Pseudo: 2789 case ARM::VLD4LNd8Pseudo_UPD: 2790 case ARM::VLD4LNd16Pseudo_UPD: 2791 case ARM::VLD4LNd32Pseudo_UPD: 2792 case ARM::VLD4LNq16Pseudo_UPD: 2793 case ARM::VLD4LNq32Pseudo_UPD: 2794 case ARM::VST1LNq8Pseudo: 2795 case ARM::VST1LNq16Pseudo: 2796 case ARM::VST1LNq32Pseudo: 2797 case ARM::VST1LNq8Pseudo_UPD: 2798 case ARM::VST1LNq16Pseudo_UPD: 2799 case ARM::VST1LNq32Pseudo_UPD: 2800 case ARM::VST2LNd8Pseudo: 2801 case ARM::VST2LNd16Pseudo: 2802 case ARM::VST2LNd32Pseudo: 2803 case ARM::VST2LNq16Pseudo: 2804 case ARM::VST2LNq32Pseudo: 2805 case ARM::VST2LNd8Pseudo_UPD: 2806 case ARM::VST2LNd16Pseudo_UPD: 2807 case ARM::VST2LNd32Pseudo_UPD: 2808 case ARM::VST2LNq16Pseudo_UPD: 2809 case ARM::VST2LNq32Pseudo_UPD: 2810 case ARM::VST3LNd8Pseudo: 2811 case ARM::VST3LNd16Pseudo: 2812 case ARM::VST3LNd32Pseudo: 2813 case ARM::VST3LNq16Pseudo: 2814 case ARM::VST3LNq32Pseudo: 2815 case ARM::VST3LNd8Pseudo_UPD: 2816 case ARM::VST3LNd16Pseudo_UPD: 2817 case ARM::VST3LNd32Pseudo_UPD: 2818 case ARM::VST3LNq16Pseudo_UPD: 2819 case ARM::VST3LNq32Pseudo_UPD: 2820 case ARM::VST4LNd8Pseudo: 2821 case ARM::VST4LNd16Pseudo: 2822 case ARM::VST4LNd32Pseudo: 2823 case ARM::VST4LNq16Pseudo: 2824 case ARM::VST4LNq32Pseudo: 2825 case ARM::VST4LNd8Pseudo_UPD: 2826 case ARM::VST4LNd16Pseudo_UPD: 2827 case ARM::VST4LNd32Pseudo_UPD: 2828 case ARM::VST4LNq16Pseudo_UPD: 2829 case ARM::VST4LNq32Pseudo_UPD: 2830 ExpandLaneOp(MBBI); 2831 return true; 2832 2833 case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; 2834 case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; 2835 case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; 2836 case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; 2837 2838 case ARM::tCMP_SWAP_8: 2839 assert(STI->isThumb()); 2840 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB, 2841 NextMBBI); 2842 case ARM::tCMP_SWAP_16: 2843 assert(STI->isThumb()); 2844 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH, 2845 NextMBBI); 2846 2847 case ARM::CMP_SWAP_8: 2848 assert(!STI->isThumb()); 2849 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB, 2850 NextMBBI); 2851 case ARM::CMP_SWAP_16: 2852 assert(!STI->isThumb()); 2853 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, 2854 NextMBBI); 2855 case ARM::CMP_SWAP_32: 2856 if (STI->isThumb()) 2857 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, 2858 NextMBBI); 2859 else 2860 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); 2861 2862 case ARM::CMP_SWAP_64: 2863 return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); 2864 2865 case ARM::tBL_PUSHLR: 2866 case ARM::BL_PUSHLR: { 2867 const bool Thumb = Opcode == ARM::tBL_PUSHLR; 2868 Register Reg = MI.getOperand(0).getReg(); 2869 assert(Reg == ARM::LR && "expect LR register!"); 2870 MachineInstrBuilder MIB; 2871 if (Thumb) { 2872 // push {lr} 2873 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH)) 2874 .add(predOps(ARMCC::AL)) 2875 .addReg(Reg); 2876 2877 // bl __gnu_mcount_nc 2878 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL)); 2879 } else { 2880 // stmdb sp!, {lr} 2881 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD)) 2882 .addReg(ARM::SP, RegState::Define) 2883 .addReg(ARM::SP) 2884 .add(predOps(ARMCC::AL)) 2885 .addReg(Reg); 2886 2887 // bl __gnu_mcount_nc 2888 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL)); 2889 } 2890 MIB.cloneMemRefs(MI); 2891 for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i)); 2892 MI.eraseFromParent(); 2893 return true; 2894 } 2895 case ARM::LOADDUAL: 2896 case ARM::STOREDUAL: { 2897 Register PairReg = MI.getOperand(0).getReg(); 2898 2899 MachineInstrBuilder MIB = 2900 BuildMI(MBB, MBBI, MI.getDebugLoc(), 2901 TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD)) 2902 .addReg(TRI->getSubReg(PairReg, ARM::gsub_0), 2903 Opcode == ARM::LOADDUAL ? RegState::Define : 0) 2904 .addReg(TRI->getSubReg(PairReg, ARM::gsub_1), 2905 Opcode == ARM::LOADDUAL ? RegState::Define : 0); 2906 for (unsigned i = 1; i < MI.getNumOperands(); i++) 2907 MIB.add(MI.getOperand(i)); 2908 MIB.add(predOps(ARMCC::AL)); 2909 MIB.cloneMemRefs(MI); 2910 MI.eraseFromParent(); 2911 return true; 2912 } 2913 } 2914 } 2915 2916 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { 2917 bool Modified = false; 2918 2919 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 2920 while (MBBI != E) { 2921 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 2922 Modified |= ExpandMI(MBB, MBBI, NMBBI); 2923 MBBI = NMBBI; 2924 } 2925 2926 return Modified; 2927 } 2928 2929 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 2930 STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget()); 2931 TII = STI->getInstrInfo(); 2932 TRI = STI->getRegisterInfo(); 2933 AFI = MF.getInfo<ARMFunctionInfo>(); 2934 2935 LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n" 2936 << "********** Function: " << MF.getName() << '\n'); 2937 2938 bool Modified = false; 2939 for (MachineBasicBlock &MBB : MF) 2940 Modified |= ExpandMBB(MBB); 2941 if (VerifyARMPseudo) 2942 MF.verify(this, "After expanding ARM pseudo instructions."); 2943 2944 LLVM_DEBUG(dbgs() << "***************************************************\n"); 2945 return Modified; 2946 } 2947 2948 /// createARMExpandPseudoPass - returns an instance of the pseudo instruction 2949 /// expansion pass. 2950 FunctionPass *llvm::createARMExpandPseudoPass() { 2951 return new ARMExpandPseudo(); 2952 } 2953