1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the ARM target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "ARM.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMTargetMachine.h" 16 #include "MCTargetDesc/ARMAddressingModes.h" 17 #include "Utils/ARMBaseInfo.h" 18 #include "llvm/ADT/StringSwitch.h" 19 #include "llvm/CodeGen/MachineFrameInfo.h" 20 #include "llvm/CodeGen/MachineFunction.h" 21 #include "llvm/CodeGen/MachineInstrBuilder.h" 22 #include "llvm/CodeGen/MachineRegisterInfo.h" 23 #include "llvm/CodeGen/SelectionDAG.h" 24 #include "llvm/CodeGen/SelectionDAGISel.h" 25 #include "llvm/CodeGen/TargetLowering.h" 26 #include "llvm/IR/CallingConv.h" 27 #include "llvm/IR/Constants.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicsARM.h" 32 #include "llvm/IR/LLVMContext.h" 33 #include "llvm/Support/CommandLine.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/ErrorHandling.h" 36 #include "llvm/Target/TargetOptions.h" 37 38 using namespace llvm; 39 40 #define DEBUG_TYPE "arm-isel" 41 42 static cl::opt<bool> 43 DisableShifterOp("disable-shifter-op", cl::Hidden, 44 cl::desc("Disable isel of shifter-op"), 45 cl::init(false)); 46 47 //===--------------------------------------------------------------------===// 48 /// ARMDAGToDAGISel - ARM specific code to select ARM machine 49 /// instructions for SelectionDAG operations. 50 /// 51 namespace { 52 53 class ARMDAGToDAGISel : public SelectionDAGISel { 54 /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can 55 /// make the right decision when generating code for different targets. 56 const ARMSubtarget *Subtarget; 57 58 public: 59 explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) 60 : SelectionDAGISel(tm, OptLevel) {} 61 62 bool runOnMachineFunction(MachineFunction &MF) override { 63 // Reset the subtarget each time through. 64 Subtarget = &MF.getSubtarget<ARMSubtarget>(); 65 SelectionDAGISel::runOnMachineFunction(MF); 66 return true; 67 } 68 69 StringRef getPassName() const override { return "ARM Instruction Selection"; } 70 71 void PreprocessISelDAG() override; 72 73 /// getI32Imm - Return a target constant of type i32 with the specified 74 /// value. 75 inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) { 76 return CurDAG->getTargetConstant(Imm, dl, MVT::i32); 77 } 78 79 void Select(SDNode *N) override; 80 81 bool hasNoVMLxHazardUse(SDNode *N) const; 82 bool isShifterOpProfitable(const SDValue &Shift, 83 ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt); 84 bool SelectRegShifterOperand(SDValue N, SDValue &A, 85 SDValue &B, SDValue &C, 86 bool CheckProfitability = true); 87 bool SelectImmShifterOperand(SDValue N, SDValue &A, 88 SDValue &B, bool CheckProfitability = true); 89 bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B, 90 SDValue &C) { 91 // Don't apply the profitability check 92 return SelectRegShifterOperand(N, A, B, C, false); 93 } 94 bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) { 95 // Don't apply the profitability check 96 return SelectImmShifterOperand(N, A, B, false); 97 } 98 bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) { 99 if (!N.hasOneUse()) 100 return false; 101 return SelectImmShifterOperand(N, A, B, false); 102 } 103 104 bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out); 105 106 bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 107 bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc); 108 109 bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { 110 const ConstantSDNode *CN = cast<ConstantSDNode>(N); 111 Pred = CurDAG->getTargetConstant(CN->getZExtValue(), SDLoc(N), MVT::i32); 112 Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); 113 return true; 114 } 115 116 bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 117 SDValue &Offset, SDValue &Opc); 118 bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 119 SDValue &Offset, SDValue &Opc); 120 bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 121 SDValue &Offset, SDValue &Opc); 122 bool SelectAddrOffsetNone(SDValue N, SDValue &Base); 123 bool SelectAddrMode3(SDValue N, SDValue &Base, 124 SDValue &Offset, SDValue &Opc); 125 bool SelectAddrMode3Offset(SDNode *Op, SDValue N, 126 SDValue &Offset, SDValue &Opc); 127 bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16); 128 bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset); 129 bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset); 130 bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align); 131 bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset); 132 133 bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label); 134 135 // Thumb Addressing Modes: 136 bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset); 137 bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset); 138 bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base, 139 SDValue &OffImm); 140 bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 141 SDValue &OffImm); 142 bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 143 SDValue &OffImm); 144 bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 145 SDValue &OffImm); 146 bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm); 147 template <unsigned Shift> 148 bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 149 150 // Thumb 2 Addressing Modes: 151 bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm); 152 template <unsigned Shift> 153 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm); 154 bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, 155 SDValue &OffImm); 156 bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 157 SDValue &OffImm); 158 template <unsigned Shift> 159 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm); 160 bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm, 161 unsigned Shift); 162 template <unsigned Shift> 163 bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm); 164 bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, 165 SDValue &OffReg, SDValue &ShImm); 166 bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); 167 168 template<int Min, int Max> 169 bool SelectImmediateInRange(SDValue N, SDValue &OffImm); 170 171 inline bool is_so_imm(unsigned Imm) const { 172 return ARM_AM::getSOImmVal(Imm) != -1; 173 } 174 175 inline bool is_so_imm_not(unsigned Imm) const { 176 return ARM_AM::getSOImmVal(~Imm) != -1; 177 } 178 179 inline bool is_t2_so_imm(unsigned Imm) const { 180 return ARM_AM::getT2SOImmVal(Imm) != -1; 181 } 182 183 inline bool is_t2_so_imm_not(unsigned Imm) const { 184 return ARM_AM::getT2SOImmVal(~Imm) != -1; 185 } 186 187 // Include the pieces autogenerated from the target description. 188 #include "ARMGenDAGISel.inc" 189 190 private: 191 void transferMemOperands(SDNode *Src, SDNode *Dst); 192 193 /// Indexed (pre/post inc/dec) load matching code for ARM. 194 bool tryARMIndexedLoad(SDNode *N); 195 bool tryT1IndexedLoad(SDNode *N); 196 bool tryT2IndexedLoad(SDNode *N); 197 bool tryMVEIndexedLoad(SDNode *N); 198 199 /// SelectVLD - Select NEON load intrinsics. NumVecs should be 200 /// 1, 2, 3 or 4. The opcode arrays specify the instructions used for 201 /// loads of D registers and even subregs and odd subregs of Q registers. 202 /// For NumVecs <= 2, QOpcodes1 is not used. 203 void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 204 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 205 const uint16_t *QOpcodes1); 206 207 /// SelectVST - Select NEON store intrinsics. NumVecs should 208 /// be 1, 2, 3 or 4. The opcode arrays specify the instructions used for 209 /// stores of D registers and even subregs and odd subregs of Q registers. 210 /// For NumVecs <= 2, QOpcodes1 is not used. 211 void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 212 const uint16_t *DOpcodes, const uint16_t *QOpcodes0, 213 const uint16_t *QOpcodes1); 214 215 /// SelectVLDSTLane - Select NEON load/store lane intrinsics. NumVecs should 216 /// be 2, 3 or 4. The opcode arrays specify the instructions used for 217 /// load/store of D registers and Q registers. 218 void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 219 unsigned NumVecs, const uint16_t *DOpcodes, 220 const uint16_t *QOpcodes); 221 222 /// Helper functions for setting up clusters of MVE predication operands. 223 template <typename SDValueVector> 224 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 225 SDValue PredicateMask); 226 template <typename SDValueVector> 227 void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 228 SDValue PredicateMask, SDValue Inactive); 229 230 template <typename SDValueVector> 231 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc); 232 template <typename SDValueVector> 233 void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy); 234 235 /// SelectMVE_WB - Select MVE writeback load/store intrinsics. 236 void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated); 237 238 /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics. 239 void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate, 240 bool HasSaturationOperand); 241 242 /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics. 243 void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 244 uint16_t OpcodeWithNoCarry, bool Add, bool Predicated); 245 246 /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between 247 /// vector lanes. 248 void SelectMVE_VSHLC(SDNode *N, bool Predicated); 249 250 /// Select long MVE vector reductions with two vector operands 251 /// Stride is the number of vector element widths the instruction can operate 252 /// on: 253 /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32] 254 /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32] 255 /// Stride is used when addressing the OpcodesS array which contains multiple 256 /// opcodes for each element width. 257 /// TySize is the index into the list of element types listed above 258 void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 259 const uint16_t *OpcodesS, const uint16_t *OpcodesU, 260 size_t Stride, size_t TySize); 261 262 /// Select a 64-bit MVE vector reduction with two vector operands 263 /// arm_mve_vmlldava_[predicated] 264 void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 265 const uint16_t *OpcodesU); 266 /// Select a 72-bit MVE vector rounding reduction with two vector operands 267 /// int_arm_mve_vrmlldavha[_predicated] 268 void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS, 269 const uint16_t *OpcodesU); 270 271 /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs 272 /// should be 2 or 4. The opcode array specifies the instructions 273 /// used for 8, 16 and 32-bit lane sizes respectively, and each 274 /// pointer points to a set of NumVecs sub-opcodes used for the 275 /// different stages (e.g. VLD20 versus VLD21) of each load family. 276 void SelectMVE_VLD(SDNode *N, unsigned NumVecs, 277 const uint16_t *const *Opcodes, bool HasWriteback); 278 279 /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an 280 /// array of 3 elements for the 8, 16 and 32-bit lane sizes. 281 void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 282 bool Wrapping, bool Predicated); 283 284 /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D, 285 /// CX1DA, CX2D, CX2DA, CX3, CX3DA). 286 /// \arg \c NumExtraOps number of extra operands besides the coprocossor, 287 /// the accumulator and the immediate operand, i.e. 0 288 /// for CX1*, 1 for CX2*, 2 for CX3* 289 /// \arg \c HasAccum whether the instruction has an accumulator operand 290 void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps, 291 bool HasAccum); 292 293 /// SelectVLDDup - Select NEON load-duplicate intrinsics. NumVecs 294 /// should be 1, 2, 3 or 4. The opcode array specifies the instructions used 295 /// for loading D registers. 296 void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating, 297 unsigned NumVecs, const uint16_t *DOpcodes, 298 const uint16_t *QOpcodes0 = nullptr, 299 const uint16_t *QOpcodes1 = nullptr); 300 301 /// Try to select SBFX/UBFX instructions for ARM. 302 bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned); 303 304 bool tryInsertVectorElt(SDNode *N); 305 306 // Select special operations if node forms integer ABS pattern 307 bool tryABSOp(SDNode *N); 308 309 bool tryReadRegister(SDNode *N); 310 bool tryWriteRegister(SDNode *N); 311 312 bool tryInlineAsm(SDNode *N); 313 314 void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI); 315 316 void SelectCMP_SWAP(SDNode *N); 317 318 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 319 /// inline asm expressions. 320 bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 321 std::vector<SDValue> &OutOps) override; 322 323 // Form pairs of consecutive R, S, D, or Q registers. 324 SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1); 325 SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1); 326 SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1); 327 SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1); 328 329 // Form sequences of 4 consecutive S, D, or Q registers. 330 SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 331 SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 332 SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3); 333 334 // Get the alignment operand for a NEON VLD or VST instruction. 335 SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs, 336 bool is64BitVector); 337 338 /// Checks if N is a multiplication by a constant where we can extract out a 339 /// power of two from the constant so that it can be used in a shift, but only 340 /// if it simplifies the materialization of the constant. Returns true if it 341 /// is, and assigns to PowerOfTwo the power of two that should be extracted 342 /// out and to NewMulConst the new constant to be multiplied by. 343 bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift, 344 unsigned &PowerOfTwo, SDValue &NewMulConst) const; 345 346 /// Replace N with M in CurDAG, in a way that also ensures that M gets 347 /// selected when N would have been selected. 348 void replaceDAGValue(const SDValue &N, SDValue M); 349 }; 350 } 351 352 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant 353 /// operand. If so Imm will receive the 32-bit value. 354 static bool isInt32Immediate(SDNode *N, unsigned &Imm) { 355 if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) { 356 Imm = cast<ConstantSDNode>(N)->getZExtValue(); 357 return true; 358 } 359 return false; 360 } 361 362 // isInt32Immediate - This method tests to see if a constant operand. 363 // If so Imm will receive the 32 bit value. 364 static bool isInt32Immediate(SDValue N, unsigned &Imm) { 365 return isInt32Immediate(N.getNode(), Imm); 366 } 367 368 // isOpcWithIntImmediate - This method tests to see if the node is a specific 369 // opcode and that it has a immediate integer right operand. 370 // If so Imm will receive the 32 bit value. 371 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) { 372 return N->getOpcode() == Opc && 373 isInt32Immediate(N->getOperand(1).getNode(), Imm); 374 } 375 376 /// Check whether a particular node is a constant value representable as 377 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax). 378 /// 379 /// \param ScaledConstant [out] - On success, the pre-scaled constant value. 380 static bool isScaledConstantInRange(SDValue Node, int Scale, 381 int RangeMin, int RangeMax, 382 int &ScaledConstant) { 383 assert(Scale > 0 && "Invalid scale!"); 384 385 // Check that this is a constant. 386 const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node); 387 if (!C) 388 return false; 389 390 ScaledConstant = (int) C->getZExtValue(); 391 if ((ScaledConstant % Scale) != 0) 392 return false; 393 394 ScaledConstant /= Scale; 395 return ScaledConstant >= RangeMin && ScaledConstant < RangeMax; 396 } 397 398 void ARMDAGToDAGISel::PreprocessISelDAG() { 399 if (!Subtarget->hasV6T2Ops()) 400 return; 401 402 bool isThumb2 = Subtarget->isThumb(); 403 for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(), 404 E = CurDAG->allnodes_end(); I != E; ) { 405 SDNode *N = &*I++; // Preincrement iterator to avoid invalidation issues. 406 407 if (N->getOpcode() != ISD::ADD) 408 continue; 409 410 // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with 411 // leading zeros, followed by consecutive set bits, followed by 1 or 2 412 // trailing zeros, e.g. 1020. 413 // Transform the expression to 414 // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number 415 // of trailing zeros of c2. The left shift would be folded as an shifter 416 // operand of 'add' and the 'and' and 'srl' would become a bits extraction 417 // node (UBFX). 418 419 SDValue N0 = N->getOperand(0); 420 SDValue N1 = N->getOperand(1); 421 unsigned And_imm = 0; 422 if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) { 423 if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm)) 424 std::swap(N0, N1); 425 } 426 if (!And_imm) 427 continue; 428 429 // Check if the AND mask is an immediate of the form: 000.....1111111100 430 unsigned TZ = countTrailingZeros(And_imm); 431 if (TZ != 1 && TZ != 2) 432 // Be conservative here. Shifter operands aren't always free. e.g. On 433 // Swift, left shifter operand of 1 / 2 for free but others are not. 434 // e.g. 435 // ubfx r3, r1, #16, #8 436 // ldr.w r3, [r0, r3, lsl #2] 437 // vs. 438 // mov.w r9, #1020 439 // and.w r2, r9, r1, lsr #14 440 // ldr r2, [r0, r2] 441 continue; 442 And_imm >>= TZ; 443 if (And_imm & (And_imm + 1)) 444 continue; 445 446 // Look for (and (srl X, c1), c2). 447 SDValue Srl = N1.getOperand(0); 448 unsigned Srl_imm = 0; 449 if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) || 450 (Srl_imm <= 2)) 451 continue; 452 453 // Make sure first operand is not a shifter operand which would prevent 454 // folding of the left shift. 455 SDValue CPTmp0; 456 SDValue CPTmp1; 457 SDValue CPTmp2; 458 if (isThumb2) { 459 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1)) 460 continue; 461 } else { 462 if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) || 463 SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2)) 464 continue; 465 } 466 467 // Now make the transformation. 468 Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, 469 Srl.getOperand(0), 470 CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl), 471 MVT::i32)); 472 N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, 473 Srl, 474 CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32)); 475 N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, 476 N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32)); 477 CurDAG->UpdateNodeOperands(N, N0, N1); 478 } 479 } 480 481 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS 482 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at 483 /// least on current ARM implementations) which should be avoidded. 484 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { 485 if (OptLevel == CodeGenOpt::None) 486 return true; 487 488 if (!Subtarget->hasVMLxHazards()) 489 return true; 490 491 if (!N->hasOneUse()) 492 return false; 493 494 SDNode *Use = *N->use_begin(); 495 if (Use->getOpcode() == ISD::CopyToReg) 496 return true; 497 if (Use->isMachineOpcode()) { 498 const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>( 499 CurDAG->getSubtarget().getInstrInfo()); 500 501 const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); 502 if (MCID.mayStore()) 503 return true; 504 unsigned Opcode = MCID.getOpcode(); 505 if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD) 506 return true; 507 // vmlx feeding into another vmlx. We actually want to unfold 508 // the use later in the MLxExpansion pass. e.g. 509 // vmla 510 // vmla (stall 8 cycles) 511 // 512 // vmul (5 cycles) 513 // vadd (5 cycles) 514 // vmla 515 // This adds up to about 18 - 19 cycles. 516 // 517 // vmla 518 // vmul (stall 4 cycles) 519 // vadd adds up to about 14 cycles. 520 return TII->isFpMLxInstruction(Opcode); 521 } 522 523 return false; 524 } 525 526 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift, 527 ARM_AM::ShiftOpc ShOpcVal, 528 unsigned ShAmt) { 529 if (!Subtarget->isLikeA9() && !Subtarget->isSwift()) 530 return true; 531 if (Shift.hasOneUse()) 532 return true; 533 // R << 2 is free. 534 return ShOpcVal == ARM_AM::lsl && 535 (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1)); 536 } 537 538 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N, 539 unsigned MaxShift, 540 unsigned &PowerOfTwo, 541 SDValue &NewMulConst) const { 542 assert(N.getOpcode() == ISD::MUL); 543 assert(MaxShift > 0); 544 545 // If the multiply is used in more than one place then changing the constant 546 // will make other uses incorrect, so don't. 547 if (!N.hasOneUse()) return false; 548 // Check if the multiply is by a constant 549 ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1)); 550 if (!MulConst) return false; 551 // If the constant is used in more than one place then modifying it will mean 552 // we need to materialize two constants instead of one, which is a bad idea. 553 if (!MulConst->hasOneUse()) return false; 554 unsigned MulConstVal = MulConst->getZExtValue(); 555 if (MulConstVal == 0) return false; 556 557 // Find the largest power of 2 that MulConstVal is a multiple of 558 PowerOfTwo = MaxShift; 559 while ((MulConstVal % (1 << PowerOfTwo)) != 0) { 560 --PowerOfTwo; 561 if (PowerOfTwo == 0) return false; 562 } 563 564 // Only optimise if the new cost is better 565 unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo); 566 NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32); 567 unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget); 568 unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget); 569 return NewCost < OldCost; 570 } 571 572 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) { 573 CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode()); 574 ReplaceUses(N, M); 575 } 576 577 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, 578 SDValue &BaseReg, 579 SDValue &Opc, 580 bool CheckProfitability) { 581 if (DisableShifterOp) 582 return false; 583 584 // If N is a multiply-by-constant and it's profitable to extract a shift and 585 // use it in a shifted operand do so. 586 if (N.getOpcode() == ISD::MUL) { 587 unsigned PowerOfTwo = 0; 588 SDValue NewMulConst; 589 if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { 590 HandleSDNode Handle(N); 591 SDLoc Loc(N); 592 replaceDAGValue(N.getOperand(1), NewMulConst); 593 BaseReg = Handle.getValue(); 594 Opc = CurDAG->getTargetConstant( 595 ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32); 596 return true; 597 } 598 } 599 600 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 601 602 // Don't match base register only case. That is matched to a separate 603 // lower complexity pattern with explicit register operand. 604 if (ShOpcVal == ARM_AM::no_shift) return false; 605 606 BaseReg = N.getOperand(0); 607 unsigned ShImmVal = 0; 608 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 609 if (!RHS) return false; 610 ShImmVal = RHS->getZExtValue() & 31; 611 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 612 SDLoc(N), MVT::i32); 613 return true; 614 } 615 616 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N, 617 SDValue &BaseReg, 618 SDValue &ShReg, 619 SDValue &Opc, 620 bool CheckProfitability) { 621 if (DisableShifterOp) 622 return false; 623 624 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 625 626 // Don't match base register only case. That is matched to a separate 627 // lower complexity pattern with explicit register operand. 628 if (ShOpcVal == ARM_AM::no_shift) return false; 629 630 BaseReg = N.getOperand(0); 631 unsigned ShImmVal = 0; 632 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 633 if (RHS) return false; 634 635 ShReg = N.getOperand(1); 636 if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal)) 637 return false; 638 Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal), 639 SDLoc(N), MVT::i32); 640 return true; 641 } 642 643 // Determine whether an ISD::OR's operands are suitable to turn the operation 644 // into an addition, which often has more compact encodings. 645 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) { 646 assert(Parent->getOpcode() == ISD::OR && "unexpected parent"); 647 Out = N; 648 return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1)); 649 } 650 651 652 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, 653 SDValue &Base, 654 SDValue &OffImm) { 655 // Match simple R + imm12 operands. 656 657 // Base only. 658 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 659 !CurDAG->isBaseWithConstantOffset(N)) { 660 if (N.getOpcode() == ISD::FrameIndex) { 661 // Match frame index. 662 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 663 Base = CurDAG->getTargetFrameIndex( 664 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 665 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 666 return true; 667 } 668 669 if (N.getOpcode() == ARMISD::Wrapper && 670 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 671 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 672 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 673 Base = N.getOperand(0); 674 } else 675 Base = N; 676 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 677 return true; 678 } 679 680 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 681 int RHSC = (int)RHS->getSExtValue(); 682 if (N.getOpcode() == ISD::SUB) 683 RHSC = -RHSC; 684 685 if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits 686 Base = N.getOperand(0); 687 if (Base.getOpcode() == ISD::FrameIndex) { 688 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 689 Base = CurDAG->getTargetFrameIndex( 690 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 691 } 692 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 693 return true; 694 } 695 } 696 697 // Base only. 698 Base = N; 699 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 700 return true; 701 } 702 703 704 705 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, 706 SDValue &Opc) { 707 if (N.getOpcode() == ISD::MUL && 708 ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) { 709 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 710 // X * [3,5,9] -> X + X * [2,4,8] etc. 711 int RHSC = (int)RHS->getZExtValue(); 712 if (RHSC & 1) { 713 RHSC = RHSC & ~1; 714 ARM_AM::AddrOpc AddSub = ARM_AM::add; 715 if (RHSC < 0) { 716 AddSub = ARM_AM::sub; 717 RHSC = - RHSC; 718 } 719 if (isPowerOf2_32(RHSC)) { 720 unsigned ShAmt = Log2_32(RHSC); 721 Base = Offset = N.getOperand(0); 722 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, 723 ARM_AM::lsl), 724 SDLoc(N), MVT::i32); 725 return true; 726 } 727 } 728 } 729 } 730 731 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 732 // ISD::OR that is equivalent to an ISD::ADD. 733 !CurDAG->isBaseWithConstantOffset(N)) 734 return false; 735 736 // Leave simple R +/- imm12 operands for LDRi12 737 if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) { 738 int RHSC; 739 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 740 -0x1000+1, 0x1000, RHSC)) // 12 bits. 741 return false; 742 } 743 744 // Otherwise this is R +/- [possibly shifted] R. 745 ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add; 746 ARM_AM::ShiftOpc ShOpcVal = 747 ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode()); 748 unsigned ShAmt = 0; 749 750 Base = N.getOperand(0); 751 Offset = N.getOperand(1); 752 753 if (ShOpcVal != ARM_AM::no_shift) { 754 // Check to see if the RHS of the shift is a constant, if not, we can't fold 755 // it. 756 if (ConstantSDNode *Sh = 757 dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) { 758 ShAmt = Sh->getZExtValue(); 759 if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt)) 760 Offset = N.getOperand(1).getOperand(0); 761 else { 762 ShAmt = 0; 763 ShOpcVal = ARM_AM::no_shift; 764 } 765 } else { 766 ShOpcVal = ARM_AM::no_shift; 767 } 768 } 769 770 // Try matching (R shl C) + (R). 771 if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift && 772 !(Subtarget->isLikeA9() || Subtarget->isSwift() || 773 N.getOperand(0).hasOneUse())) { 774 ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode()); 775 if (ShOpcVal != ARM_AM::no_shift) { 776 // Check to see if the RHS of the shift is a constant, if not, we can't 777 // fold it. 778 if (ConstantSDNode *Sh = 779 dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) { 780 ShAmt = Sh->getZExtValue(); 781 if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) { 782 Offset = N.getOperand(0).getOperand(0); 783 Base = N.getOperand(1); 784 } else { 785 ShAmt = 0; 786 ShOpcVal = ARM_AM::no_shift; 787 } 788 } else { 789 ShOpcVal = ARM_AM::no_shift; 790 } 791 } 792 } 793 794 // If Offset is a multiply-by-constant and it's profitable to extract a shift 795 // and use it in a shifted operand do so. 796 if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) { 797 unsigned PowerOfTwo = 0; 798 SDValue NewMulConst; 799 if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) { 800 HandleSDNode Handle(Offset); 801 replaceDAGValue(Offset.getOperand(1), NewMulConst); 802 Offset = Handle.getValue(); 803 ShAmt = PowerOfTwo; 804 ShOpcVal = ARM_AM::lsl; 805 } 806 } 807 808 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 809 SDLoc(N), MVT::i32); 810 return true; 811 } 812 813 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, 814 SDValue &Offset, SDValue &Opc) { 815 unsigned Opcode = Op->getOpcode(); 816 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 817 ? cast<LoadSDNode>(Op)->getAddressingMode() 818 : cast<StoreSDNode>(Op)->getAddressingMode(); 819 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 820 ? ARM_AM::add : ARM_AM::sub; 821 int Val; 822 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) 823 return false; 824 825 Offset = N; 826 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode()); 827 unsigned ShAmt = 0; 828 if (ShOpcVal != ARM_AM::no_shift) { 829 // Check to see if the RHS of the shift is a constant, if not, we can't fold 830 // it. 831 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 832 ShAmt = Sh->getZExtValue(); 833 if (isShifterOpProfitable(N, ShOpcVal, ShAmt)) 834 Offset = N.getOperand(0); 835 else { 836 ShAmt = 0; 837 ShOpcVal = ARM_AM::no_shift; 838 } 839 } else { 840 ShOpcVal = ARM_AM::no_shift; 841 } 842 } 843 844 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal), 845 SDLoc(N), MVT::i32); 846 return true; 847 } 848 849 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N, 850 SDValue &Offset, SDValue &Opc) { 851 unsigned Opcode = Op->getOpcode(); 852 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 853 ? cast<LoadSDNode>(Op)->getAddressingMode() 854 : cast<StoreSDNode>(Op)->getAddressingMode(); 855 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 856 ? ARM_AM::add : ARM_AM::sub; 857 int Val; 858 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 859 if (AddSub == ARM_AM::sub) Val *= -1; 860 Offset = CurDAG->getRegister(0, MVT::i32); 861 Opc = CurDAG->getTargetConstant(Val, SDLoc(Op), MVT::i32); 862 return true; 863 } 864 865 return false; 866 } 867 868 869 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, 870 SDValue &Offset, SDValue &Opc) { 871 unsigned Opcode = Op->getOpcode(); 872 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 873 ? cast<LoadSDNode>(Op)->getAddressingMode() 874 : cast<StoreSDNode>(Op)->getAddressingMode(); 875 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 876 ? ARM_AM::add : ARM_AM::sub; 877 int Val; 878 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits. 879 Offset = CurDAG->getRegister(0, MVT::i32); 880 Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val, 881 ARM_AM::no_shift), 882 SDLoc(Op), MVT::i32); 883 return true; 884 } 885 886 return false; 887 } 888 889 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) { 890 Base = N; 891 return true; 892 } 893 894 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, 895 SDValue &Base, SDValue &Offset, 896 SDValue &Opc) { 897 if (N.getOpcode() == ISD::SUB) { 898 // X - C is canonicalize to X + -C, no need to handle it here. 899 Base = N.getOperand(0); 900 Offset = N.getOperand(1); 901 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N), 902 MVT::i32); 903 return true; 904 } 905 906 if (!CurDAG->isBaseWithConstantOffset(N)) { 907 Base = N; 908 if (N.getOpcode() == ISD::FrameIndex) { 909 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 910 Base = CurDAG->getTargetFrameIndex( 911 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 912 } 913 Offset = CurDAG->getRegister(0, MVT::i32); 914 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 915 MVT::i32); 916 return true; 917 } 918 919 // If the RHS is +/- imm8, fold into addr mode. 920 int RHSC; 921 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1, 922 -256 + 1, 256, RHSC)) { // 8 bits. 923 Base = N.getOperand(0); 924 if (Base.getOpcode() == ISD::FrameIndex) { 925 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 926 Base = CurDAG->getTargetFrameIndex( 927 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 928 } 929 Offset = CurDAG->getRegister(0, MVT::i32); 930 931 ARM_AM::AddrOpc AddSub = ARM_AM::add; 932 if (RHSC < 0) { 933 AddSub = ARM_AM::sub; 934 RHSC = -RHSC; 935 } 936 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N), 937 MVT::i32); 938 return true; 939 } 940 941 Base = N.getOperand(0); 942 Offset = N.getOperand(1); 943 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N), 944 MVT::i32); 945 return true; 946 } 947 948 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N, 949 SDValue &Offset, SDValue &Opc) { 950 unsigned Opcode = Op->getOpcode(); 951 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 952 ? cast<LoadSDNode>(Op)->getAddressingMode() 953 : cast<StoreSDNode>(Op)->getAddressingMode(); 954 ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC) 955 ? ARM_AM::add : ARM_AM::sub; 956 int Val; 957 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits. 958 Offset = CurDAG->getRegister(0, MVT::i32); 959 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op), 960 MVT::i32); 961 return true; 962 } 963 964 Offset = N; 965 Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op), 966 MVT::i32); 967 return true; 968 } 969 970 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, 971 bool FP16) { 972 if (!CurDAG->isBaseWithConstantOffset(N)) { 973 Base = N; 974 if (N.getOpcode() == ISD::FrameIndex) { 975 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 976 Base = CurDAG->getTargetFrameIndex( 977 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 978 } else if (N.getOpcode() == ARMISD::Wrapper && 979 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 980 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 981 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 982 Base = N.getOperand(0); 983 } 984 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 985 SDLoc(N), MVT::i32); 986 return true; 987 } 988 989 // If the RHS is +/- imm8, fold into addr mode. 990 int RHSC; 991 const int Scale = FP16 ? 2 : 4; 992 993 if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) { 994 Base = N.getOperand(0); 995 if (Base.getOpcode() == ISD::FrameIndex) { 996 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 997 Base = CurDAG->getTargetFrameIndex( 998 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 999 } 1000 1001 ARM_AM::AddrOpc AddSub = ARM_AM::add; 1002 if (RHSC < 0) { 1003 AddSub = ARM_AM::sub; 1004 RHSC = -RHSC; 1005 } 1006 1007 if (FP16) 1008 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC), 1009 SDLoc(N), MVT::i32); 1010 else 1011 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC), 1012 SDLoc(N), MVT::i32); 1013 1014 return true; 1015 } 1016 1017 Base = N; 1018 1019 if (FP16) 1020 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0), 1021 SDLoc(N), MVT::i32); 1022 else 1023 Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0), 1024 SDLoc(N), MVT::i32); 1025 1026 return true; 1027 } 1028 1029 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, 1030 SDValue &Base, SDValue &Offset) { 1031 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false); 1032 } 1033 1034 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N, 1035 SDValue &Base, SDValue &Offset) { 1036 return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true); 1037 } 1038 1039 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr, 1040 SDValue &Align) { 1041 Addr = N; 1042 1043 unsigned Alignment = 0; 1044 1045 MemSDNode *MemN = cast<MemSDNode>(Parent); 1046 1047 if (isa<LSBaseSDNode>(MemN) || 1048 ((MemN->getOpcode() == ARMISD::VST1_UPD || 1049 MemN->getOpcode() == ARMISD::VLD1_UPD) && 1050 MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) { 1051 // This case occurs only for VLD1-lane/dup and VST1-lane instructions. 1052 // The maximum alignment is equal to the memory size being referenced. 1053 unsigned MMOAlign = MemN->getAlignment(); 1054 unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8; 1055 if (MMOAlign >= MemSize && MemSize > 1) 1056 Alignment = MemSize; 1057 } else { 1058 // All other uses of addrmode6 are for intrinsics. For now just record 1059 // the raw alignment value; it will be refined later based on the legal 1060 // alignment operands for the intrinsic. 1061 Alignment = MemN->getAlignment(); 1062 } 1063 1064 Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32); 1065 return true; 1066 } 1067 1068 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N, 1069 SDValue &Offset) { 1070 LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op); 1071 ISD::MemIndexedMode AM = LdSt->getAddressingMode(); 1072 if (AM != ISD::POST_INC) 1073 return false; 1074 Offset = N; 1075 if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) { 1076 if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits()) 1077 Offset = CurDAG->getRegister(0, MVT::i32); 1078 } 1079 return true; 1080 } 1081 1082 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N, 1083 SDValue &Offset, SDValue &Label) { 1084 if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) { 1085 Offset = N.getOperand(0); 1086 SDValue N1 = N.getOperand(1); 1087 Label = CurDAG->getTargetConstant(cast<ConstantSDNode>(N1)->getZExtValue(), 1088 SDLoc(N), MVT::i32); 1089 return true; 1090 } 1091 1092 return false; 1093 } 1094 1095 1096 //===----------------------------------------------------------------------===// 1097 // Thumb Addressing Modes 1098 //===----------------------------------------------------------------------===// 1099 1100 static bool shouldUseZeroOffsetLdSt(SDValue N) { 1101 // Negative numbers are difficult to materialise in thumb1. If we are 1102 // selecting the add of a negative, instead try to select ri with a zero 1103 // offset, so create the add node directly which will become a sub. 1104 if (N.getOpcode() != ISD::ADD) 1105 return false; 1106 1107 // Look for an imm which is not legal for ld/st, but is legal for sub. 1108 if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1))) 1109 return C->getSExtValue() < 0 && C->getSExtValue() >= -255; 1110 1111 return false; 1112 } 1113 1114 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, 1115 SDValue &Offset) { 1116 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) { 1117 ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N); 1118 if (!NC || !NC->isNullValue()) 1119 return false; 1120 1121 Base = Offset = N; 1122 return true; 1123 } 1124 1125 Base = N.getOperand(0); 1126 Offset = N.getOperand(1); 1127 return true; 1128 } 1129 1130 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base, 1131 SDValue &Offset) { 1132 if (shouldUseZeroOffsetLdSt(N)) 1133 return false; // Select ri instead 1134 return SelectThumbAddrModeRRSext(N, Base, Offset); 1135 } 1136 1137 bool 1138 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, 1139 SDValue &Base, SDValue &OffImm) { 1140 if (shouldUseZeroOffsetLdSt(N)) { 1141 Base = N; 1142 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1143 return true; 1144 } 1145 1146 if (!CurDAG->isBaseWithConstantOffset(N)) { 1147 if (N.getOpcode() == ISD::ADD) { 1148 return false; // We want to select register offset instead 1149 } else if (N.getOpcode() == ARMISD::Wrapper && 1150 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1151 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1152 N.getOperand(0).getOpcode() != ISD::TargetConstantPool && 1153 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1154 Base = N.getOperand(0); 1155 } else { 1156 Base = N; 1157 } 1158 1159 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1160 return true; 1161 } 1162 1163 // If the RHS is + imm5 * scale, fold into addr mode. 1164 int RHSC; 1165 if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) { 1166 Base = N.getOperand(0); 1167 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1168 return true; 1169 } 1170 1171 // Offset is too large, so use register offset instead. 1172 return false; 1173 } 1174 1175 bool 1176 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base, 1177 SDValue &OffImm) { 1178 return SelectThumbAddrModeImm5S(N, 4, Base, OffImm); 1179 } 1180 1181 bool 1182 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base, 1183 SDValue &OffImm) { 1184 return SelectThumbAddrModeImm5S(N, 2, Base, OffImm); 1185 } 1186 1187 bool 1188 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base, 1189 SDValue &OffImm) { 1190 return SelectThumbAddrModeImm5S(N, 1, Base, OffImm); 1191 } 1192 1193 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, 1194 SDValue &Base, SDValue &OffImm) { 1195 if (N.getOpcode() == ISD::FrameIndex) { 1196 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1197 // Only multiples of 4 are allowed for the offset, so the frame object 1198 // alignment must be at least 4. 1199 MachineFrameInfo &MFI = MF->getFrameInfo(); 1200 if (MFI.getObjectAlign(FI) < Align(4)) 1201 MFI.setObjectAlignment(FI, Align(4)); 1202 Base = CurDAG->getTargetFrameIndex( 1203 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1204 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1205 return true; 1206 } 1207 1208 if (!CurDAG->isBaseWithConstantOffset(N)) 1209 return false; 1210 1211 if (N.getOperand(0).getOpcode() == ISD::FrameIndex) { 1212 // If the RHS is + imm8 * scale, fold into addr mode. 1213 int RHSC; 1214 if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) { 1215 Base = N.getOperand(0); 1216 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1217 // Make sure the offset is inside the object, or we might fail to 1218 // allocate an emergency spill slot. (An out-of-range access is UB, but 1219 // it could show up anyway.) 1220 MachineFrameInfo &MFI = MF->getFrameInfo(); 1221 if (RHSC * 4 < MFI.getObjectSize(FI)) { 1222 // For LHS+RHS to result in an offset that's a multiple of 4 the object 1223 // indexed by the LHS must be 4-byte aligned. 1224 if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4)) 1225 MFI.setObjectAlignment(FI, Align(4)); 1226 if (MFI.getObjectAlign(FI) >= Align(4)) { 1227 Base = CurDAG->getTargetFrameIndex( 1228 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1229 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1230 return true; 1231 } 1232 } 1233 } 1234 } 1235 1236 return false; 1237 } 1238 1239 template <unsigned Shift> 1240 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base, 1241 SDValue &OffImm) { 1242 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1243 int RHSC; 1244 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1245 RHSC)) { 1246 Base = N.getOperand(0); 1247 if (N.getOpcode() == ISD::SUB) 1248 RHSC = -RHSC; 1249 OffImm = 1250 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1251 return true; 1252 } 1253 } 1254 1255 // Base only. 1256 Base = N; 1257 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1258 return true; 1259 } 1260 1261 1262 //===----------------------------------------------------------------------===// 1263 // Thumb 2 Addressing Modes 1264 //===----------------------------------------------------------------------===// 1265 1266 1267 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, 1268 SDValue &Base, SDValue &OffImm) { 1269 // Match simple R + imm12 operands. 1270 1271 // Base only. 1272 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1273 !CurDAG->isBaseWithConstantOffset(N)) { 1274 if (N.getOpcode() == ISD::FrameIndex) { 1275 // Match frame index. 1276 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 1277 Base = CurDAG->getTargetFrameIndex( 1278 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1279 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1280 return true; 1281 } 1282 1283 if (N.getOpcode() == ARMISD::Wrapper && 1284 N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress && 1285 N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol && 1286 N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) { 1287 Base = N.getOperand(0); 1288 if (Base.getOpcode() == ISD::TargetConstantPool) 1289 return false; // We want to select t2LDRpci instead. 1290 } else 1291 Base = N; 1292 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1293 return true; 1294 } 1295 1296 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1297 if (SelectT2AddrModeImm8(N, Base, OffImm)) 1298 // Let t2LDRi8 handle (R - imm8). 1299 return false; 1300 1301 int RHSC = (int)RHS->getZExtValue(); 1302 if (N.getOpcode() == ISD::SUB) 1303 RHSC = -RHSC; 1304 1305 if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned) 1306 Base = N.getOperand(0); 1307 if (Base.getOpcode() == ISD::FrameIndex) { 1308 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1309 Base = CurDAG->getTargetFrameIndex( 1310 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1311 } 1312 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1313 return true; 1314 } 1315 } 1316 1317 // Base only. 1318 Base = N; 1319 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1320 return true; 1321 } 1322 1323 template <unsigned Shift> 1324 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base, 1325 SDValue &OffImm) { 1326 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1327 int RHSC; 1328 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) { 1329 Base = N.getOperand(0); 1330 if (Base.getOpcode() == ISD::FrameIndex) { 1331 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1332 Base = CurDAG->getTargetFrameIndex( 1333 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1334 } 1335 1336 if (N.getOpcode() == ISD::SUB) 1337 RHSC = -RHSC; 1338 OffImm = 1339 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1340 return true; 1341 } 1342 } 1343 1344 // Base only. 1345 Base = N; 1346 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1347 return true; 1348 } 1349 1350 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, 1351 SDValue &Base, SDValue &OffImm) { 1352 // Match simple R - imm8 operands. 1353 if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB && 1354 !CurDAG->isBaseWithConstantOffset(N)) 1355 return false; 1356 1357 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1358 int RHSC = (int)RHS->getSExtValue(); 1359 if (N.getOpcode() == ISD::SUB) 1360 RHSC = -RHSC; 1361 1362 if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative) 1363 Base = N.getOperand(0); 1364 if (Base.getOpcode() == ISD::FrameIndex) { 1365 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1366 Base = CurDAG->getTargetFrameIndex( 1367 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1368 } 1369 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32); 1370 return true; 1371 } 1372 } 1373 1374 return false; 1375 } 1376 1377 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N, 1378 SDValue &OffImm){ 1379 unsigned Opcode = Op->getOpcode(); 1380 ISD::MemIndexedMode AM = (Opcode == ISD::LOAD) 1381 ? cast<LoadSDNode>(Op)->getAddressingMode() 1382 : cast<StoreSDNode>(Op)->getAddressingMode(); 1383 int RHSC; 1384 if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits. 1385 OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1386 ? CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i32) 1387 : CurDAG->getTargetConstant(-RHSC, SDLoc(N), MVT::i32); 1388 return true; 1389 } 1390 1391 return false; 1392 } 1393 1394 template <unsigned Shift> 1395 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base, 1396 SDValue &OffImm) { 1397 if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) { 1398 int RHSC; 1399 if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80, 1400 RHSC)) { 1401 Base = N.getOperand(0); 1402 if (Base.getOpcode() == ISD::FrameIndex) { 1403 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1404 Base = CurDAG->getTargetFrameIndex( 1405 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1406 } 1407 1408 if (N.getOpcode() == ISD::SUB) 1409 RHSC = -RHSC; 1410 OffImm = 1411 CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32); 1412 return true; 1413 } 1414 } 1415 1416 // Base only. 1417 Base = N; 1418 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1419 return true; 1420 } 1421 1422 template <unsigned Shift> 1423 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1424 SDValue &OffImm) { 1425 return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift); 1426 } 1427 1428 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, 1429 SDValue &OffImm, 1430 unsigned Shift) { 1431 unsigned Opcode = Op->getOpcode(); 1432 ISD::MemIndexedMode AM; 1433 switch (Opcode) { 1434 case ISD::LOAD: 1435 AM = cast<LoadSDNode>(Op)->getAddressingMode(); 1436 break; 1437 case ISD::STORE: 1438 AM = cast<StoreSDNode>(Op)->getAddressingMode(); 1439 break; 1440 case ISD::MLOAD: 1441 AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode(); 1442 break; 1443 case ISD::MSTORE: 1444 AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode(); 1445 break; 1446 default: 1447 llvm_unreachable("Unexpected Opcode for Imm7Offset"); 1448 } 1449 1450 int RHSC; 1451 // 7 bit constant, shifted by Shift. 1452 if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) { 1453 OffImm = 1454 ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC)) 1455 ? CurDAG->getTargetConstant(RHSC * (1 << Shift), SDLoc(N), MVT::i32) 1456 : CurDAG->getTargetConstant(-RHSC * (1 << Shift), SDLoc(N), 1457 MVT::i32); 1458 return true; 1459 } 1460 return false; 1461 } 1462 1463 template <int Min, int Max> 1464 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) { 1465 int Val; 1466 if (isScaledConstantInRange(N, 1, Min, Max, Val)) { 1467 OffImm = CurDAG->getTargetConstant(Val, SDLoc(N), MVT::i32); 1468 return true; 1469 } 1470 return false; 1471 } 1472 1473 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, 1474 SDValue &Base, 1475 SDValue &OffReg, SDValue &ShImm) { 1476 // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12. 1477 if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) 1478 return false; 1479 1480 // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8. 1481 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 1482 int RHSC = (int)RHS->getZExtValue(); 1483 if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned) 1484 return false; 1485 else if (RHSC < 0 && RHSC >= -255) // 8 bits 1486 return false; 1487 } 1488 1489 // Look for (R + R) or (R + (R << [1,2,3])). 1490 unsigned ShAmt = 0; 1491 Base = N.getOperand(0); 1492 OffReg = N.getOperand(1); 1493 1494 // Swap if it is ((R << c) + R). 1495 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode()); 1496 if (ShOpcVal != ARM_AM::lsl) { 1497 ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode()); 1498 if (ShOpcVal == ARM_AM::lsl) 1499 std::swap(Base, OffReg); 1500 } 1501 1502 if (ShOpcVal == ARM_AM::lsl) { 1503 // Check to see if the RHS of the shift is a constant, if not, we can't fold 1504 // it. 1505 if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) { 1506 ShAmt = Sh->getZExtValue(); 1507 if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt)) 1508 OffReg = OffReg.getOperand(0); 1509 else { 1510 ShAmt = 0; 1511 } 1512 } 1513 } 1514 1515 // If OffReg is a multiply-by-constant and it's profitable to extract a shift 1516 // and use it in a shifted operand do so. 1517 if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) { 1518 unsigned PowerOfTwo = 0; 1519 SDValue NewMulConst; 1520 if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) { 1521 HandleSDNode Handle(OffReg); 1522 replaceDAGValue(OffReg.getOperand(1), NewMulConst); 1523 OffReg = Handle.getValue(); 1524 ShAmt = PowerOfTwo; 1525 } 1526 } 1527 1528 ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32); 1529 1530 return true; 1531 } 1532 1533 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, 1534 SDValue &OffImm) { 1535 // This *must* succeed since it's used for the irreplaceable ldrex and strex 1536 // instructions. 1537 Base = N; 1538 OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32); 1539 1540 if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) 1541 return true; 1542 1543 ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1)); 1544 if (!RHS) 1545 return true; 1546 1547 uint32_t RHSC = (int)RHS->getZExtValue(); 1548 if (RHSC > 1020 || RHSC % 4 != 0) 1549 return true; 1550 1551 Base = N.getOperand(0); 1552 if (Base.getOpcode() == ISD::FrameIndex) { 1553 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 1554 Base = CurDAG->getTargetFrameIndex( 1555 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 1556 } 1557 1558 OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32); 1559 return true; 1560 } 1561 1562 //===--------------------------------------------------------------------===// 1563 1564 /// getAL - Returns a ARMCC::AL immediate node. 1565 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) { 1566 return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32); 1567 } 1568 1569 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) { 1570 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1571 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp}); 1572 } 1573 1574 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) { 1575 LoadSDNode *LD = cast<LoadSDNode>(N); 1576 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1577 if (AM == ISD::UNINDEXED) 1578 return false; 1579 1580 EVT LoadedVT = LD->getMemoryVT(); 1581 SDValue Offset, AMOpc; 1582 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1583 unsigned Opcode = 0; 1584 bool Match = false; 1585 if (LoadedVT == MVT::i32 && isPre && 1586 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1587 Opcode = ARM::LDR_PRE_IMM; 1588 Match = true; 1589 } else if (LoadedVT == MVT::i32 && !isPre && 1590 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1591 Opcode = ARM::LDR_POST_IMM; 1592 Match = true; 1593 } else if (LoadedVT == MVT::i32 && 1594 SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1595 Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG; 1596 Match = true; 1597 1598 } else if (LoadedVT == MVT::i16 && 1599 SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1600 Match = true; 1601 Opcode = (LD->getExtensionType() == ISD::SEXTLOAD) 1602 ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST) 1603 : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST); 1604 } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) { 1605 if (LD->getExtensionType() == ISD::SEXTLOAD) { 1606 if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) { 1607 Match = true; 1608 Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST; 1609 } 1610 } else { 1611 if (isPre && 1612 SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) { 1613 Match = true; 1614 Opcode = ARM::LDRB_PRE_IMM; 1615 } else if (!isPre && 1616 SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) { 1617 Match = true; 1618 Opcode = ARM::LDRB_POST_IMM; 1619 } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) { 1620 Match = true; 1621 Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG; 1622 } 1623 } 1624 } 1625 1626 if (Match) { 1627 if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) { 1628 SDValue Chain = LD->getChain(); 1629 SDValue Base = LD->getBasePtr(); 1630 SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)), 1631 CurDAG->getRegister(0, MVT::i32), Chain }; 1632 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1633 MVT::Other, Ops); 1634 transferMemOperands(N, New); 1635 ReplaceNode(N, New); 1636 return true; 1637 } else { 1638 SDValue Chain = LD->getChain(); 1639 SDValue Base = LD->getBasePtr(); 1640 SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)), 1641 CurDAG->getRegister(0, MVT::i32), Chain }; 1642 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1643 MVT::Other, Ops); 1644 transferMemOperands(N, New); 1645 ReplaceNode(N, New); 1646 return true; 1647 } 1648 } 1649 1650 return false; 1651 } 1652 1653 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) { 1654 LoadSDNode *LD = cast<LoadSDNode>(N); 1655 EVT LoadedVT = LD->getMemoryVT(); 1656 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1657 if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD || 1658 LoadedVT.getSimpleVT().SimpleTy != MVT::i32) 1659 return false; 1660 1661 auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset()); 1662 if (!COffs || COffs->getZExtValue() != 4) 1663 return false; 1664 1665 // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}. 1666 // The encoding of LDM is not how the rest of ISel expects a post-inc load to 1667 // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after 1668 // ISel. 1669 SDValue Chain = LD->getChain(); 1670 SDValue Base = LD->getBasePtr(); 1671 SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)), 1672 CurDAG->getRegister(0, MVT::i32), Chain }; 1673 SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32, 1674 MVT::i32, MVT::Other, Ops); 1675 transferMemOperands(N, New); 1676 ReplaceNode(N, New); 1677 return true; 1678 } 1679 1680 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) { 1681 LoadSDNode *LD = cast<LoadSDNode>(N); 1682 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1683 if (AM == ISD::UNINDEXED) 1684 return false; 1685 1686 EVT LoadedVT = LD->getMemoryVT(); 1687 bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1688 SDValue Offset; 1689 bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1690 unsigned Opcode = 0; 1691 bool Match = false; 1692 if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) { 1693 switch (LoadedVT.getSimpleVT().SimpleTy) { 1694 case MVT::i32: 1695 Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST; 1696 break; 1697 case MVT::i16: 1698 if (isSExtLd) 1699 Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST; 1700 else 1701 Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST; 1702 break; 1703 case MVT::i8: 1704 case MVT::i1: 1705 if (isSExtLd) 1706 Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST; 1707 else 1708 Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST; 1709 break; 1710 default: 1711 return false; 1712 } 1713 Match = true; 1714 } 1715 1716 if (Match) { 1717 SDValue Chain = LD->getChain(); 1718 SDValue Base = LD->getBasePtr(); 1719 SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)), 1720 CurDAG->getRegister(0, MVT::i32), Chain }; 1721 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, 1722 MVT::Other, Ops); 1723 transferMemOperands(N, New); 1724 ReplaceNode(N, New); 1725 return true; 1726 } 1727 1728 return false; 1729 } 1730 1731 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) { 1732 EVT LoadedVT; 1733 unsigned Opcode = 0; 1734 bool isSExtLd, isPre; 1735 Align Alignment; 1736 ARMVCC::VPTCodes Pred; 1737 SDValue PredReg; 1738 SDValue Chain, Base, Offset; 1739 1740 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 1741 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1742 if (AM == ISD::UNINDEXED) 1743 return false; 1744 LoadedVT = LD->getMemoryVT(); 1745 if (!LoadedVT.isVector()) 1746 return false; 1747 1748 Chain = LD->getChain(); 1749 Base = LD->getBasePtr(); 1750 Offset = LD->getOffset(); 1751 Alignment = LD->getAlign(); 1752 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1753 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1754 Pred = ARMVCC::None; 1755 PredReg = CurDAG->getRegister(0, MVT::i32); 1756 } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) { 1757 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1758 if (AM == ISD::UNINDEXED) 1759 return false; 1760 LoadedVT = LD->getMemoryVT(); 1761 if (!LoadedVT.isVector()) 1762 return false; 1763 1764 Chain = LD->getChain(); 1765 Base = LD->getBasePtr(); 1766 Offset = LD->getOffset(); 1767 Alignment = LD->getAlign(); 1768 isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD; 1769 isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC); 1770 Pred = ARMVCC::Then; 1771 PredReg = LD->getMask(); 1772 } else 1773 llvm_unreachable("Expected a Load or a Masked Load!"); 1774 1775 // We allow LE non-masked loads to change the type (for example use a vldrb.8 1776 // as opposed to a vldrw.32). This can allow extra addressing modes or 1777 // alignments for what is otherwise an equivalent instruction. 1778 bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N); 1779 1780 SDValue NewOffset; 1781 if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 && 1782 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) { 1783 if (isSExtLd) 1784 Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post; 1785 else 1786 Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post; 1787 } else if (LoadedVT == MVT::v8i8 && 1788 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1789 if (isSExtLd) 1790 Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post; 1791 else 1792 Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post; 1793 } else if (LoadedVT == MVT::v4i8 && 1794 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) { 1795 if (isSExtLd) 1796 Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post; 1797 else 1798 Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post; 1799 } else if (Alignment >= Align(4) && 1800 (CanChangeType || LoadedVT == MVT::v4i32 || 1801 LoadedVT == MVT::v4f32) && 1802 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2)) 1803 Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post; 1804 else if (Alignment >= Align(2) && 1805 (CanChangeType || LoadedVT == MVT::v8i16 || 1806 LoadedVT == MVT::v8f16) && 1807 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) 1808 Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post; 1809 else if ((CanChangeType || LoadedVT == MVT::v16i8) && 1810 SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) 1811 Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post; 1812 else 1813 return false; 1814 1815 SDValue Ops[] = {Base, NewOffset, 1816 CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32), PredReg, 1817 Chain}; 1818 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, 1819 N->getValueType(0), MVT::Other, Ops); 1820 transferMemOperands(N, New); 1821 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 1822 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 1823 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 1824 CurDAG->RemoveDeadNode(N); 1825 return true; 1826 } 1827 1828 /// Form a GPRPair pseudo register from a pair of GPR regs. 1829 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { 1830 SDLoc dl(V0.getNode()); 1831 SDValue RegClass = 1832 CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); 1833 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 1834 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 1835 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1836 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1837 } 1838 1839 /// Form a D register from a pair of S registers. 1840 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1841 SDLoc dl(V0.getNode()); 1842 SDValue RegClass = 1843 CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32); 1844 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1845 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1846 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1847 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1848 } 1849 1850 /// Form a quad register from a pair of D registers. 1851 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1852 SDLoc dl(V0.getNode()); 1853 SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl, 1854 MVT::i32); 1855 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1856 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1857 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1858 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1859 } 1860 1861 /// Form 4 consecutive D registers from a pair of Q registers. 1862 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { 1863 SDLoc dl(V0.getNode()); 1864 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1865 MVT::i32); 1866 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1867 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1868 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; 1869 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1870 } 1871 1872 /// Form 4 consecutive S registers. 1873 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, 1874 SDValue V2, SDValue V3) { 1875 SDLoc dl(V0.getNode()); 1876 SDValue RegClass = 1877 CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32); 1878 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32); 1879 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32); 1880 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32); 1881 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32); 1882 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1883 V2, SubReg2, V3, SubReg3 }; 1884 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1885 } 1886 1887 /// Form 4 consecutive D registers. 1888 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, 1889 SDValue V2, SDValue V3) { 1890 SDLoc dl(V0.getNode()); 1891 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl, 1892 MVT::i32); 1893 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32); 1894 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32); 1895 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32); 1896 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32); 1897 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1898 V2, SubReg2, V3, SubReg3 }; 1899 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1900 } 1901 1902 /// Form 4 consecutive Q registers. 1903 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, 1904 SDValue V2, SDValue V3) { 1905 SDLoc dl(V0.getNode()); 1906 SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl, 1907 MVT::i32); 1908 SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32); 1909 SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32); 1910 SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32); 1911 SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32); 1912 const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1, 1913 V2, SubReg2, V3, SubReg3 }; 1914 return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops); 1915 } 1916 1917 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand 1918 /// of a NEON VLD or VST instruction. The supported values depend on the 1919 /// number of registers being loaded. 1920 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl, 1921 unsigned NumVecs, bool is64BitVector) { 1922 unsigned NumRegs = NumVecs; 1923 if (!is64BitVector && NumVecs < 3) 1924 NumRegs *= 2; 1925 1926 unsigned Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 1927 if (Alignment >= 32 && NumRegs == 4) 1928 Alignment = 32; 1929 else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4)) 1930 Alignment = 16; 1931 else if (Alignment >= 8) 1932 Alignment = 8; 1933 else 1934 Alignment = 0; 1935 1936 return CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 1937 } 1938 1939 static bool isVLDfixed(unsigned Opc) 1940 { 1941 switch (Opc) { 1942 default: return false; 1943 case ARM::VLD1d8wb_fixed : return true; 1944 case ARM::VLD1d16wb_fixed : return true; 1945 case ARM::VLD1d64Qwb_fixed : return true; 1946 case ARM::VLD1d32wb_fixed : return true; 1947 case ARM::VLD1d64wb_fixed : return true; 1948 case ARM::VLD1d64TPseudoWB_fixed : return true; 1949 case ARM::VLD1d64QPseudoWB_fixed : return true; 1950 case ARM::VLD1q8wb_fixed : return true; 1951 case ARM::VLD1q16wb_fixed : return true; 1952 case ARM::VLD1q32wb_fixed : return true; 1953 case ARM::VLD1q64wb_fixed : return true; 1954 case ARM::VLD1DUPd8wb_fixed : return true; 1955 case ARM::VLD1DUPd16wb_fixed : return true; 1956 case ARM::VLD1DUPd32wb_fixed : return true; 1957 case ARM::VLD1DUPq8wb_fixed : return true; 1958 case ARM::VLD1DUPq16wb_fixed : return true; 1959 case ARM::VLD1DUPq32wb_fixed : return true; 1960 case ARM::VLD2d8wb_fixed : return true; 1961 case ARM::VLD2d16wb_fixed : return true; 1962 case ARM::VLD2d32wb_fixed : return true; 1963 case ARM::VLD2q8PseudoWB_fixed : return true; 1964 case ARM::VLD2q16PseudoWB_fixed : return true; 1965 case ARM::VLD2q32PseudoWB_fixed : return true; 1966 case ARM::VLD2DUPd8wb_fixed : return true; 1967 case ARM::VLD2DUPd16wb_fixed : return true; 1968 case ARM::VLD2DUPd32wb_fixed : return true; 1969 } 1970 } 1971 1972 static bool isVSTfixed(unsigned Opc) 1973 { 1974 switch (Opc) { 1975 default: return false; 1976 case ARM::VST1d8wb_fixed : return true; 1977 case ARM::VST1d16wb_fixed : return true; 1978 case ARM::VST1d32wb_fixed : return true; 1979 case ARM::VST1d64wb_fixed : return true; 1980 case ARM::VST1q8wb_fixed : return true; 1981 case ARM::VST1q16wb_fixed : return true; 1982 case ARM::VST1q32wb_fixed : return true; 1983 case ARM::VST1q64wb_fixed : return true; 1984 case ARM::VST1d8TPseudoWB_fixed : return true; 1985 case ARM::VST1d16TPseudoWB_fixed : return true; 1986 case ARM::VST1d32TPseudoWB_fixed : return true; 1987 case ARM::VST1d64TPseudoWB_fixed : return true; 1988 case ARM::VST1d8QPseudoWB_fixed : return true; 1989 case ARM::VST1d16QPseudoWB_fixed : return true; 1990 case ARM::VST1d32QPseudoWB_fixed : return true; 1991 case ARM::VST1d64QPseudoWB_fixed : return true; 1992 case ARM::VST2d8wb_fixed : return true; 1993 case ARM::VST2d16wb_fixed : return true; 1994 case ARM::VST2d32wb_fixed : return true; 1995 case ARM::VST2q8PseudoWB_fixed : return true; 1996 case ARM::VST2q16PseudoWB_fixed : return true; 1997 case ARM::VST2q32PseudoWB_fixed : return true; 1998 } 1999 } 2000 2001 // Get the register stride update opcode of a VLD/VST instruction that 2002 // is otherwise equivalent to the given fixed stride updating instruction. 2003 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { 2004 assert((isVLDfixed(Opc) || isVSTfixed(Opc)) 2005 && "Incorrect fixed stride updating instruction."); 2006 switch (Opc) { 2007 default: break; 2008 case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register; 2009 case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register; 2010 case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register; 2011 case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register; 2012 case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register; 2013 case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register; 2014 case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register; 2015 case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register; 2016 case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register; 2017 case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register; 2018 case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register; 2019 case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register; 2020 case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register; 2021 case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register; 2022 case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register; 2023 case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register; 2024 case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register; 2025 case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register; 2026 2027 case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register; 2028 case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register; 2029 case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register; 2030 case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register; 2031 case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register; 2032 case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register; 2033 case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register; 2034 case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register; 2035 case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register; 2036 case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register; 2037 case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register; 2038 case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register; 2039 case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register; 2040 case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register; 2041 case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register; 2042 case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register; 2043 2044 case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register; 2045 case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register; 2046 case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register; 2047 case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register; 2048 case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register; 2049 case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register; 2050 2051 case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register; 2052 case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register; 2053 case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register; 2054 case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register; 2055 case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register; 2056 case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register; 2057 2058 case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register; 2059 case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register; 2060 case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register; 2061 } 2062 return Opc; // If not one we handle, return it unchanged. 2063 } 2064 2065 /// Returns true if the given increment is a Constant known to be equal to the 2066 /// access size performed by a NEON load/store. This means the "[rN]!" form can 2067 /// be used. 2068 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) { 2069 auto C = dyn_cast<ConstantSDNode>(Inc); 2070 return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs; 2071 } 2072 2073 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, 2074 const uint16_t *DOpcodes, 2075 const uint16_t *QOpcodes0, 2076 const uint16_t *QOpcodes1) { 2077 assert(Subtarget->hasNEON()); 2078 assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); 2079 SDLoc dl(N); 2080 2081 SDValue MemAddr, Align; 2082 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2083 // nodes are not intrinsics. 2084 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2085 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2086 return; 2087 2088 SDValue Chain = N->getOperand(0); 2089 EVT VT = N->getValueType(0); 2090 bool is64BitVector = VT.is64BitVector(); 2091 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2092 2093 unsigned OpcodeIndex; 2094 switch (VT.getSimpleVT().SimpleTy) { 2095 default: llvm_unreachable("unhandled vld type"); 2096 // Double-register operations: 2097 case MVT::v8i8: OpcodeIndex = 0; break; 2098 case MVT::v4f16: 2099 case MVT::v4bf16: 2100 case MVT::v4i16: OpcodeIndex = 1; break; 2101 case MVT::v2f32: 2102 case MVT::v2i32: OpcodeIndex = 2; break; 2103 case MVT::v1i64: OpcodeIndex = 3; break; 2104 // Quad-register operations: 2105 case MVT::v16i8: OpcodeIndex = 0; break; 2106 case MVT::v8f16: 2107 case MVT::v8bf16: 2108 case MVT::v8i16: OpcodeIndex = 1; break; 2109 case MVT::v4f32: 2110 case MVT::v4i32: OpcodeIndex = 2; break; 2111 case MVT::v2f64: 2112 case MVT::v2i64: OpcodeIndex = 3; break; 2113 } 2114 2115 EVT ResTy; 2116 if (NumVecs == 1) 2117 ResTy = VT; 2118 else { 2119 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2120 if (!is64BitVector) 2121 ResTyElts *= 2; 2122 ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2123 } 2124 std::vector<EVT> ResTys; 2125 ResTys.push_back(ResTy); 2126 if (isUpdating) 2127 ResTys.push_back(MVT::i32); 2128 ResTys.push_back(MVT::Other); 2129 2130 SDValue Pred = getAL(CurDAG, dl); 2131 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2132 SDNode *VLd; 2133 SmallVector<SDValue, 7> Ops; 2134 2135 // Double registers and VLD1/VLD2 quad registers are directly supported. 2136 if (is64BitVector || NumVecs <= 2) { 2137 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2138 QOpcodes0[OpcodeIndex]); 2139 Ops.push_back(MemAddr); 2140 Ops.push_back(Align); 2141 if (isUpdating) { 2142 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2143 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2144 if (!IsImmUpdate) { 2145 // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so 2146 // check for the opcode rather than the number of vector elements. 2147 if (isVLDfixed(Opc)) 2148 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2149 Ops.push_back(Inc); 2150 // VLD1/VLD2 fixed increment does not need Reg0 so only include it in 2151 // the operands if not such an opcode. 2152 } else if (!isVLDfixed(Opc)) 2153 Ops.push_back(Reg0); 2154 } 2155 Ops.push_back(Pred); 2156 Ops.push_back(Reg0); 2157 Ops.push_back(Chain); 2158 VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2159 2160 } else { 2161 // Otherwise, quad registers are loaded with two separate instructions, 2162 // where one loads the even registers and the other loads the odd registers. 2163 EVT AddrTy = MemAddr.getValueType(); 2164 2165 // Load the even subregs. This is always an updating load, so that it 2166 // provides the address to the second load for the odd subregs. 2167 SDValue ImplDef = 2168 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 2169 const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain }; 2170 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2171 ResTy, AddrTy, MVT::Other, OpsA); 2172 Chain = SDValue(VLdA, 2); 2173 2174 // Load the odd subregs. 2175 Ops.push_back(SDValue(VLdA, 1)); 2176 Ops.push_back(Align); 2177 if (isUpdating) { 2178 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2179 assert(isa<ConstantSDNode>(Inc.getNode()) && 2180 "only constant post-increment update allowed for VLD3/4"); 2181 (void)Inc; 2182 Ops.push_back(Reg0); 2183 } 2184 Ops.push_back(SDValue(VLdA, 0)); 2185 Ops.push_back(Pred); 2186 Ops.push_back(Reg0); 2187 Ops.push_back(Chain); 2188 VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops); 2189 } 2190 2191 // Transfer memoperands. 2192 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2193 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp}); 2194 2195 if (NumVecs == 1) { 2196 ReplaceNode(N, VLd); 2197 return; 2198 } 2199 2200 // Extract out the subregisters. 2201 SDValue SuperReg = SDValue(VLd, 0); 2202 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2203 ARM::qsub_3 == ARM::qsub_0 + 3, 2204 "Unexpected subreg numbering"); 2205 unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0); 2206 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2207 ReplaceUses(SDValue(N, Vec), 2208 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2209 ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); 2210 if (isUpdating) 2211 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); 2212 CurDAG->RemoveDeadNode(N); 2213 } 2214 2215 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, 2216 const uint16_t *DOpcodes, 2217 const uint16_t *QOpcodes0, 2218 const uint16_t *QOpcodes1) { 2219 assert(Subtarget->hasNEON()); 2220 assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); 2221 SDLoc dl(N); 2222 2223 SDValue MemAddr, Align; 2224 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2225 // nodes are not intrinsics. 2226 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2227 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2228 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2229 return; 2230 2231 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2232 2233 SDValue Chain = N->getOperand(0); 2234 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2235 bool is64BitVector = VT.is64BitVector(); 2236 Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector); 2237 2238 unsigned OpcodeIndex; 2239 switch (VT.getSimpleVT().SimpleTy) { 2240 default: llvm_unreachable("unhandled vst type"); 2241 // Double-register operations: 2242 case MVT::v8i8: OpcodeIndex = 0; break; 2243 case MVT::v4f16: 2244 case MVT::v4bf16: 2245 case MVT::v4i16: OpcodeIndex = 1; break; 2246 case MVT::v2f32: 2247 case MVT::v2i32: OpcodeIndex = 2; break; 2248 case MVT::v1i64: OpcodeIndex = 3; break; 2249 // Quad-register operations: 2250 case MVT::v16i8: OpcodeIndex = 0; break; 2251 case MVT::v8f16: 2252 case MVT::v8bf16: 2253 case MVT::v8i16: OpcodeIndex = 1; break; 2254 case MVT::v4f32: 2255 case MVT::v4i32: OpcodeIndex = 2; break; 2256 case MVT::v2f64: 2257 case MVT::v2i64: OpcodeIndex = 3; break; 2258 } 2259 2260 std::vector<EVT> ResTys; 2261 if (isUpdating) 2262 ResTys.push_back(MVT::i32); 2263 ResTys.push_back(MVT::Other); 2264 2265 SDValue Pred = getAL(CurDAG, dl); 2266 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2267 SmallVector<SDValue, 7> Ops; 2268 2269 // Double registers and VST1/VST2 quad registers are directly supported. 2270 if (is64BitVector || NumVecs <= 2) { 2271 SDValue SrcReg; 2272 if (NumVecs == 1) { 2273 SrcReg = N->getOperand(Vec0Idx); 2274 } else if (is64BitVector) { 2275 // Form a REG_SEQUENCE to force register allocation. 2276 SDValue V0 = N->getOperand(Vec0Idx + 0); 2277 SDValue V1 = N->getOperand(Vec0Idx + 1); 2278 if (NumVecs == 2) 2279 SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2280 else { 2281 SDValue V2 = N->getOperand(Vec0Idx + 2); 2282 // If it's a vst3, form a quad D-register and leave the last part as 2283 // an undef. 2284 SDValue V3 = (NumVecs == 3) 2285 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0) 2286 : N->getOperand(Vec0Idx + 3); 2287 SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2288 } 2289 } else { 2290 // Form a QQ register. 2291 SDValue Q0 = N->getOperand(Vec0Idx); 2292 SDValue Q1 = N->getOperand(Vec0Idx + 1); 2293 SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0); 2294 } 2295 2296 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2297 QOpcodes0[OpcodeIndex]); 2298 Ops.push_back(MemAddr); 2299 Ops.push_back(Align); 2300 if (isUpdating) { 2301 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2302 bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs); 2303 if (!IsImmUpdate) { 2304 // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so 2305 // check for the opcode rather than the number of vector elements. 2306 if (isVSTfixed(Opc)) 2307 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2308 Ops.push_back(Inc); 2309 } 2310 // VST1/VST2 fixed increment does not need Reg0 so only include it in 2311 // the operands if not such an opcode. 2312 else if (!isVSTfixed(Opc)) 2313 Ops.push_back(Reg0); 2314 } 2315 Ops.push_back(SrcReg); 2316 Ops.push_back(Pred); 2317 Ops.push_back(Reg0); 2318 Ops.push_back(Chain); 2319 SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2320 2321 // Transfer memoperands. 2322 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp}); 2323 2324 ReplaceNode(N, VSt); 2325 return; 2326 } 2327 2328 // Otherwise, quad registers are stored with two separate instructions, 2329 // where one stores the even registers and the other stores the odd registers. 2330 2331 // Form the QQQQ REG_SEQUENCE. 2332 SDValue V0 = N->getOperand(Vec0Idx + 0); 2333 SDValue V1 = N->getOperand(Vec0Idx + 1); 2334 SDValue V2 = N->getOperand(Vec0Idx + 2); 2335 SDValue V3 = (NumVecs == 3) 2336 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2337 : N->getOperand(Vec0Idx + 3); 2338 SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2339 2340 // Store the even D registers. This is always an updating store, so that it 2341 // provides the address to the second store for the odd subregs. 2342 const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain }; 2343 SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, 2344 MemAddr.getValueType(), 2345 MVT::Other, OpsA); 2346 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp}); 2347 Chain = SDValue(VStA, 1); 2348 2349 // Store the odd D registers. 2350 Ops.push_back(SDValue(VStA, 0)); 2351 Ops.push_back(Align); 2352 if (isUpdating) { 2353 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2354 assert(isa<ConstantSDNode>(Inc.getNode()) && 2355 "only constant post-increment update allowed for VST3/4"); 2356 (void)Inc; 2357 Ops.push_back(Reg0); 2358 } 2359 Ops.push_back(RegSeq); 2360 Ops.push_back(Pred); 2361 Ops.push_back(Reg0); 2362 Ops.push_back(Chain); 2363 SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, 2364 Ops); 2365 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp}); 2366 ReplaceNode(N, VStB); 2367 } 2368 2369 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, 2370 unsigned NumVecs, 2371 const uint16_t *DOpcodes, 2372 const uint16_t *QOpcodes) { 2373 assert(Subtarget->hasNEON()); 2374 assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); 2375 SDLoc dl(N); 2376 2377 SDValue MemAddr, Align; 2378 bool IsIntrinsic = !isUpdating; // By coincidence, all supported updating 2379 // nodes are not intrinsics. 2380 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2381 unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1) 2382 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2383 return; 2384 2385 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 2386 2387 SDValue Chain = N->getOperand(0); 2388 unsigned Lane = 2389 cast<ConstantSDNode>(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); 2390 EVT VT = N->getOperand(Vec0Idx).getValueType(); 2391 bool is64BitVector = VT.is64BitVector(); 2392 2393 unsigned Alignment = 0; 2394 if (NumVecs != 3) { 2395 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2396 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2397 if (Alignment > NumBytes) 2398 Alignment = NumBytes; 2399 if (Alignment < 8 && Alignment < NumBytes) 2400 Alignment = 0; 2401 // Alignment must be a power of two; make sure of that. 2402 Alignment = (Alignment & -Alignment); 2403 if (Alignment == 1) 2404 Alignment = 0; 2405 } 2406 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2407 2408 unsigned OpcodeIndex; 2409 switch (VT.getSimpleVT().SimpleTy) { 2410 default: llvm_unreachable("unhandled vld/vst lane type"); 2411 // Double-register operations: 2412 case MVT::v8i8: OpcodeIndex = 0; break; 2413 case MVT::v4f16: 2414 case MVT::v4bf16: 2415 case MVT::v4i16: OpcodeIndex = 1; break; 2416 case MVT::v2f32: 2417 case MVT::v2i32: OpcodeIndex = 2; break; 2418 // Quad-register operations: 2419 case MVT::v8f16: 2420 case MVT::v8bf16: 2421 case MVT::v8i16: OpcodeIndex = 0; break; 2422 case MVT::v4f32: 2423 case MVT::v4i32: OpcodeIndex = 1; break; 2424 } 2425 2426 std::vector<EVT> ResTys; 2427 if (IsLoad) { 2428 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2429 if (!is64BitVector) 2430 ResTyElts *= 2; 2431 ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(), 2432 MVT::i64, ResTyElts)); 2433 } 2434 if (isUpdating) 2435 ResTys.push_back(MVT::i32); 2436 ResTys.push_back(MVT::Other); 2437 2438 SDValue Pred = getAL(CurDAG, dl); 2439 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2440 2441 SmallVector<SDValue, 8> Ops; 2442 Ops.push_back(MemAddr); 2443 Ops.push_back(Align); 2444 if (isUpdating) { 2445 SDValue Inc = N->getOperand(AddrOpIdx + 1); 2446 bool IsImmUpdate = 2447 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2448 Ops.push_back(IsImmUpdate ? Reg0 : Inc); 2449 } 2450 2451 SDValue SuperReg; 2452 SDValue V0 = N->getOperand(Vec0Idx + 0); 2453 SDValue V1 = N->getOperand(Vec0Idx + 1); 2454 if (NumVecs == 2) { 2455 if (is64BitVector) 2456 SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0); 2457 else 2458 SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0); 2459 } else { 2460 SDValue V2 = N->getOperand(Vec0Idx + 2); 2461 SDValue V3 = (NumVecs == 3) 2462 ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0) 2463 : N->getOperand(Vec0Idx + 3); 2464 if (is64BitVector) 2465 SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0); 2466 else 2467 SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0); 2468 } 2469 Ops.push_back(SuperReg); 2470 Ops.push_back(getI32Imm(Lane, dl)); 2471 Ops.push_back(Pred); 2472 Ops.push_back(Reg0); 2473 Ops.push_back(Chain); 2474 2475 unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] : 2476 QOpcodes[OpcodeIndex]); 2477 SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 2478 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp}); 2479 if (!IsLoad) { 2480 ReplaceNode(N, VLdLn); 2481 return; 2482 } 2483 2484 // Extract the subregisters. 2485 SuperReg = SDValue(VLdLn, 0); 2486 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 && 2487 ARM::qsub_3 == ARM::qsub_0 + 3, 2488 "Unexpected subreg numbering"); 2489 unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 2490 for (unsigned Vec = 0; Vec < NumVecs; ++Vec) 2491 ReplaceUses(SDValue(N, Vec), 2492 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); 2493 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); 2494 if (isUpdating) 2495 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); 2496 CurDAG->RemoveDeadNode(N); 2497 } 2498 2499 template <typename SDValueVector> 2500 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2501 SDValue PredicateMask) { 2502 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2503 Ops.push_back(PredicateMask); 2504 } 2505 2506 template <typename SDValueVector> 2507 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2508 SDValue PredicateMask, 2509 SDValue Inactive) { 2510 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32)); 2511 Ops.push_back(PredicateMask); 2512 Ops.push_back(Inactive); 2513 } 2514 2515 template <typename SDValueVector> 2516 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) { 2517 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2518 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2519 } 2520 2521 template <typename SDValueVector> 2522 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, 2523 EVT InactiveTy) { 2524 Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32)); 2525 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2526 Ops.push_back(SDValue( 2527 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0)); 2528 } 2529 2530 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, 2531 bool Predicated) { 2532 SDLoc Loc(N); 2533 SmallVector<SDValue, 8> Ops; 2534 2535 uint16_t Opcode; 2536 switch (N->getValueType(1).getVectorElementType().getSizeInBits()) { 2537 case 32: 2538 Opcode = Opcodes[0]; 2539 break; 2540 case 64: 2541 Opcode = Opcodes[1]; 2542 break; 2543 default: 2544 llvm_unreachable("bad vector element size in SelectMVE_WB"); 2545 } 2546 2547 Ops.push_back(N->getOperand(2)); // vector of base addresses 2548 2549 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2550 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset 2551 2552 if (Predicated) 2553 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2554 else 2555 AddEmptyMVEPredicateToOps(Ops, Loc); 2556 2557 Ops.push_back(N->getOperand(0)); // chain 2558 2559 SmallVector<EVT, 8> VTs; 2560 VTs.push_back(N->getValueType(1)); 2561 VTs.push_back(N->getValueType(0)); 2562 VTs.push_back(N->getValueType(2)); 2563 2564 SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops); 2565 ReplaceUses(SDValue(N, 0), SDValue(New, 1)); 2566 ReplaceUses(SDValue(N, 1), SDValue(New, 0)); 2567 ReplaceUses(SDValue(N, 2), SDValue(New, 2)); 2568 transferMemOperands(N, New); 2569 CurDAG->RemoveDeadNode(N); 2570 } 2571 2572 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode, 2573 bool Immediate, 2574 bool HasSaturationOperand) { 2575 SDLoc Loc(N); 2576 SmallVector<SDValue, 8> Ops; 2577 2578 // Two 32-bit halves of the value to be shifted 2579 Ops.push_back(N->getOperand(1)); 2580 Ops.push_back(N->getOperand(2)); 2581 2582 // The shift count 2583 if (Immediate) { 2584 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2585 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2586 } else { 2587 Ops.push_back(N->getOperand(3)); 2588 } 2589 2590 // The immediate saturation operand, if any 2591 if (HasSaturationOperand) { 2592 int32_t SatOp = cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(); 2593 int SatBit = (SatOp == 64 ? 0 : 1); 2594 Ops.push_back(getI32Imm(SatBit, Loc)); 2595 } 2596 2597 // MVE scalar shifts are IT-predicable, so include the standard 2598 // predicate arguments. 2599 Ops.push_back(getAL(CurDAG, Loc)); 2600 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 2601 2602 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2603 } 2604 2605 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry, 2606 uint16_t OpcodeWithNoCarry, 2607 bool Add, bool Predicated) { 2608 SDLoc Loc(N); 2609 SmallVector<SDValue, 8> Ops; 2610 uint16_t Opcode; 2611 2612 unsigned FirstInputOp = Predicated ? 2 : 1; 2613 2614 // Two input vectors and the input carry flag 2615 Ops.push_back(N->getOperand(FirstInputOp)); 2616 Ops.push_back(N->getOperand(FirstInputOp + 1)); 2617 SDValue CarryIn = N->getOperand(FirstInputOp + 2); 2618 ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn); 2619 uint32_t CarryMask = 1 << 29; 2620 uint32_t CarryExpected = Add ? 0 : CarryMask; 2621 if (CarryInConstant && 2622 (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) { 2623 Opcode = OpcodeWithNoCarry; 2624 } else { 2625 Ops.push_back(CarryIn); 2626 Opcode = OpcodeWithCarry; 2627 } 2628 2629 if (Predicated) 2630 AddMVEPredicateToOps(Ops, Loc, 2631 N->getOperand(FirstInputOp + 3), // predicate 2632 N->getOperand(FirstInputOp - 1)); // inactive 2633 else 2634 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2635 2636 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2637 } 2638 2639 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) { 2640 SDLoc Loc(N); 2641 SmallVector<SDValue, 8> Ops; 2642 2643 // One vector input, followed by a 32-bit word of bits to shift in 2644 // and then an immediate shift count 2645 Ops.push_back(N->getOperand(1)); 2646 Ops.push_back(N->getOperand(2)); 2647 int32_t ImmValue = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 2648 Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count 2649 2650 if (Predicated) 2651 AddMVEPredicateToOps(Ops, Loc, N->getOperand(4)); 2652 else 2653 AddEmptyMVEPredicateToOps(Ops, Loc); 2654 2655 CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), makeArrayRef(Ops)); 2656 } 2657 2658 static bool SDValueToConstBool(SDValue SDVal) { 2659 assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant"); 2660 ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal); 2661 uint64_t Value = SDValConstant->getZExtValue(); 2662 assert((Value == 0 || Value == 1) && "expected value 0 or 1"); 2663 return Value; 2664 } 2665 2666 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated, 2667 const uint16_t *OpcodesS, 2668 const uint16_t *OpcodesU, 2669 size_t Stride, size_t TySize) { 2670 assert(TySize < Stride && "Invalid TySize"); 2671 bool IsUnsigned = SDValueToConstBool(N->getOperand(1)); 2672 bool IsSub = SDValueToConstBool(N->getOperand(2)); 2673 bool IsExchange = SDValueToConstBool(N->getOperand(3)); 2674 if (IsUnsigned) { 2675 assert(!IsSub && 2676 "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist"); 2677 assert(!IsExchange && 2678 "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist"); 2679 } 2680 2681 auto OpIsZero = [N](size_t OpNo) { 2682 if (ConstantSDNode *OpConst = dyn_cast<ConstantSDNode>(N->getOperand(OpNo))) 2683 if (OpConst->getZExtValue() == 0) 2684 return true; 2685 return false; 2686 }; 2687 2688 // If the input accumulator value is not zero, select an instruction with 2689 // accumulator, otherwise select an instruction without accumulator 2690 bool IsAccum = !(OpIsZero(4) && OpIsZero(5)); 2691 2692 const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS; 2693 if (IsSub) 2694 Opcodes += 4 * Stride; 2695 if (IsExchange) 2696 Opcodes += 2 * Stride; 2697 if (IsAccum) 2698 Opcodes += Stride; 2699 uint16_t Opcode = Opcodes[TySize]; 2700 2701 SDLoc Loc(N); 2702 SmallVector<SDValue, 8> Ops; 2703 // Push the accumulator operands, if they are used 2704 if (IsAccum) { 2705 Ops.push_back(N->getOperand(4)); 2706 Ops.push_back(N->getOperand(5)); 2707 } 2708 // Push the two vector operands 2709 Ops.push_back(N->getOperand(6)); 2710 Ops.push_back(N->getOperand(7)); 2711 2712 if (Predicated) 2713 AddMVEPredicateToOps(Ops, Loc, N->getOperand(8)); 2714 else 2715 AddEmptyMVEPredicateToOps(Ops, Loc); 2716 2717 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2718 } 2719 2720 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated, 2721 const uint16_t *OpcodesS, 2722 const uint16_t *OpcodesU) { 2723 EVT VecTy = N->getOperand(6).getValueType(); 2724 size_t SizeIndex; 2725 switch (VecTy.getVectorElementType().getSizeInBits()) { 2726 case 16: 2727 SizeIndex = 0; 2728 break; 2729 case 32: 2730 SizeIndex = 1; 2731 break; 2732 default: 2733 llvm_unreachable("bad vector element size"); 2734 } 2735 2736 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex); 2737 } 2738 2739 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, 2740 const uint16_t *OpcodesS, 2741 const uint16_t *OpcodesU) { 2742 assert( 2743 N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() == 2744 32 && 2745 "bad vector element size"); 2746 SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0); 2747 } 2748 2749 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs, 2750 const uint16_t *const *Opcodes, 2751 bool HasWriteback) { 2752 EVT VT = N->getValueType(0); 2753 SDLoc Loc(N); 2754 2755 const uint16_t *OurOpcodes; 2756 switch (VT.getVectorElementType().getSizeInBits()) { 2757 case 8: 2758 OurOpcodes = Opcodes[0]; 2759 break; 2760 case 16: 2761 OurOpcodes = Opcodes[1]; 2762 break; 2763 case 32: 2764 OurOpcodes = Opcodes[2]; 2765 break; 2766 default: 2767 llvm_unreachable("bad vector element size in SelectMVE_VLD"); 2768 } 2769 2770 EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2); 2771 SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other}; 2772 unsigned PtrOperand = HasWriteback ? 1 : 2; 2773 2774 auto Data = SDValue( 2775 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0); 2776 SDValue Chain = N->getOperand(0); 2777 // Add a MVE_VLDn instruction for each Vec, except the last 2778 for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) { 2779 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2780 auto LoadInst = 2781 CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops); 2782 Data = SDValue(LoadInst, 0); 2783 Chain = SDValue(LoadInst, 1); 2784 transferMemOperands(N, LoadInst); 2785 } 2786 // The last may need a writeback on it 2787 if (HasWriteback) 2788 ResultTys = {DataTy, MVT::i32, MVT::Other}; 2789 SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain}; 2790 auto LoadInst = 2791 CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops); 2792 transferMemOperands(N, LoadInst); 2793 2794 unsigned i; 2795 for (i = 0; i < NumVecs; i++) 2796 ReplaceUses(SDValue(N, i), 2797 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT, 2798 SDValue(LoadInst, 0))); 2799 if (HasWriteback) 2800 ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1)); 2801 ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1)); 2802 CurDAG->RemoveDeadNode(N); 2803 } 2804 2805 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes, 2806 bool Wrapping, bool Predicated) { 2807 EVT VT = N->getValueType(0); 2808 SDLoc Loc(N); 2809 2810 uint16_t Opcode; 2811 switch (VT.getScalarSizeInBits()) { 2812 case 8: 2813 Opcode = Opcodes[0]; 2814 break; 2815 case 16: 2816 Opcode = Opcodes[1]; 2817 break; 2818 case 32: 2819 Opcode = Opcodes[2]; 2820 break; 2821 default: 2822 llvm_unreachable("bad vector element size in SelectMVE_VxDUP"); 2823 } 2824 2825 SmallVector<SDValue, 8> Ops; 2826 unsigned OpIdx = 1; 2827 2828 SDValue Inactive; 2829 if (Predicated) 2830 Inactive = N->getOperand(OpIdx++); 2831 2832 Ops.push_back(N->getOperand(OpIdx++)); // base 2833 if (Wrapping) 2834 Ops.push_back(N->getOperand(OpIdx++)); // limit 2835 2836 SDValue ImmOp = N->getOperand(OpIdx++); // step 2837 int ImmValue = cast<ConstantSDNode>(ImmOp)->getZExtValue(); 2838 Ops.push_back(getI32Imm(ImmValue, Loc)); 2839 2840 if (Predicated) 2841 AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive); 2842 else 2843 AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0)); 2844 2845 CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), makeArrayRef(Ops)); 2846 } 2847 2848 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode, 2849 size_t NumExtraOps, bool HasAccum) { 2850 bool IsBigEndian = CurDAG->getDataLayout().isBigEndian(); 2851 SDLoc Loc(N); 2852 SmallVector<SDValue, 8> Ops; 2853 2854 unsigned OpIdx = 1; 2855 2856 // Convert and append the immediate operand designating the coprocessor. 2857 SDValue ImmCorpoc = N->getOperand(OpIdx++); 2858 uint32_t ImmCoprocVal = cast<ConstantSDNode>(ImmCorpoc)->getZExtValue(); 2859 Ops.push_back(getI32Imm(ImmCoprocVal, Loc)); 2860 2861 // For accumulating variants copy the low and high order parts of the 2862 // accumulator into a register pair and add it to the operand vector. 2863 if (HasAccum) { 2864 SDValue AccLo = N->getOperand(OpIdx++); 2865 SDValue AccHi = N->getOperand(OpIdx++); 2866 if (IsBigEndian) 2867 std::swap(AccLo, AccHi); 2868 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0)); 2869 } 2870 2871 // Copy extra operands as-is. 2872 for (size_t I = 0; I < NumExtraOps; I++) 2873 Ops.push_back(N->getOperand(OpIdx++)); 2874 2875 // Convert and append the immediate operand 2876 SDValue Imm = N->getOperand(OpIdx); 2877 uint32_t ImmVal = cast<ConstantSDNode>(Imm)->getZExtValue(); 2878 Ops.push_back(getI32Imm(ImmVal, Loc)); 2879 2880 // Accumulating variants are IT-predicable, add predicate operands. 2881 if (HasAccum) { 2882 SDValue Pred = getAL(CurDAG, Loc); 2883 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 2884 Ops.push_back(Pred); 2885 Ops.push_back(PredReg); 2886 } 2887 2888 // Create the CDE intruction 2889 SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops); 2890 SDValue ResultPair = SDValue(InstrNode, 0); 2891 2892 // The original intrinsic had two outputs, and the output of the dual-register 2893 // CDE instruction is a register pair. We need to extract the two subregisters 2894 // and replace all uses of the original outputs with the extracted 2895 // subregisters. 2896 uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1}; 2897 if (IsBigEndian) 2898 std::swap(SubRegs[0], SubRegs[1]); 2899 2900 for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) { 2901 if (SDValue(N, ResIdx).use_empty()) 2902 continue; 2903 SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc, 2904 MVT::i32, ResultPair); 2905 ReplaceUses(SDValue(N, ResIdx), SubReg); 2906 } 2907 2908 CurDAG->RemoveDeadNode(N); 2909 } 2910 2911 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic, 2912 bool isUpdating, unsigned NumVecs, 2913 const uint16_t *DOpcodes, 2914 const uint16_t *QOpcodes0, 2915 const uint16_t *QOpcodes1) { 2916 assert(Subtarget->hasNEON()); 2917 assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); 2918 SDLoc dl(N); 2919 2920 SDValue MemAddr, Align; 2921 unsigned AddrOpIdx = IsIntrinsic ? 2 : 1; 2922 if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align)) 2923 return; 2924 2925 SDValue Chain = N->getOperand(0); 2926 EVT VT = N->getValueType(0); 2927 bool is64BitVector = VT.is64BitVector(); 2928 2929 unsigned Alignment = 0; 2930 if (NumVecs != 3) { 2931 Alignment = cast<ConstantSDNode>(Align)->getZExtValue(); 2932 unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8; 2933 if (Alignment > NumBytes) 2934 Alignment = NumBytes; 2935 if (Alignment < 8 && Alignment < NumBytes) 2936 Alignment = 0; 2937 // Alignment must be a power of two; make sure of that. 2938 Alignment = (Alignment & -Alignment); 2939 if (Alignment == 1) 2940 Alignment = 0; 2941 } 2942 Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32); 2943 2944 unsigned OpcodeIndex; 2945 switch (VT.getSimpleVT().SimpleTy) { 2946 default: llvm_unreachable("unhandled vld-dup type"); 2947 case MVT::v8i8: 2948 case MVT::v16i8: OpcodeIndex = 0; break; 2949 case MVT::v4i16: 2950 case MVT::v8i16: 2951 case MVT::v4f16: 2952 case MVT::v8f16: 2953 case MVT::v4bf16: 2954 case MVT::v8bf16: 2955 OpcodeIndex = 1; break; 2956 case MVT::v2f32: 2957 case MVT::v2i32: 2958 case MVT::v4f32: 2959 case MVT::v4i32: OpcodeIndex = 2; break; 2960 case MVT::v1f64: 2961 case MVT::v1i64: OpcodeIndex = 3; break; 2962 } 2963 2964 unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs; 2965 if (!is64BitVector) 2966 ResTyElts *= 2; 2967 EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts); 2968 2969 std::vector<EVT> ResTys; 2970 ResTys.push_back(ResTy); 2971 if (isUpdating) 2972 ResTys.push_back(MVT::i32); 2973 ResTys.push_back(MVT::Other); 2974 2975 SDValue Pred = getAL(CurDAG, dl); 2976 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 2977 2978 SDNode *VLdDup; 2979 if (is64BitVector || NumVecs == 1) { 2980 SmallVector<SDValue, 6> Ops; 2981 Ops.push_back(MemAddr); 2982 Ops.push_back(Align); 2983 unsigned Opc = is64BitVector ? DOpcodes[OpcodeIndex] : 2984 QOpcodes0[OpcodeIndex]; 2985 if (isUpdating) { 2986 // fixed-stride update instructions don't have an explicit writeback 2987 // operand. It's implicit in the opcode itself. 2988 SDValue Inc = N->getOperand(2); 2989 bool IsImmUpdate = 2990 isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs); 2991 if (NumVecs <= 2 && !IsImmUpdate) 2992 Opc = getVLDSTRegisterUpdateOpcode(Opc); 2993 if (!IsImmUpdate) 2994 Ops.push_back(Inc); 2995 // FIXME: VLD3 and VLD4 haven't been updated to that form yet. 2996 else if (NumVecs > 2) 2997 Ops.push_back(Reg0); 2998 } 2999 Ops.push_back(Pred); 3000 Ops.push_back(Reg0); 3001 Ops.push_back(Chain); 3002 VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 3003 } else if (NumVecs == 2) { 3004 const SDValue OpsA[] = { MemAddr, Align, Pred, Reg0, Chain }; 3005 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 3006 dl, ResTys, OpsA); 3007 3008 Chain = SDValue(VLdA, 1); 3009 const SDValue OpsB[] = { MemAddr, Align, Pred, Reg0, Chain }; 3010 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 3011 } else { 3012 SDValue ImplDef = 3013 SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0); 3014 const SDValue OpsA[] = { MemAddr, Align, ImplDef, Pred, Reg0, Chain }; 3015 SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], 3016 dl, ResTys, OpsA); 3017 3018 SDValue SuperReg = SDValue(VLdA, 0); 3019 Chain = SDValue(VLdA, 1); 3020 const SDValue OpsB[] = { MemAddr, Align, SuperReg, Pred, Reg0, Chain }; 3021 VLdDup = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, OpsB); 3022 } 3023 3024 // Transfer memoperands. 3025 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 3026 CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp}); 3027 3028 // Extract the subregisters. 3029 if (NumVecs == 1) { 3030 ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0)); 3031 } else { 3032 SDValue SuperReg = SDValue(VLdDup, 0); 3033 static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering"); 3034 unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0; 3035 for (unsigned Vec = 0; Vec != NumVecs; ++Vec) { 3036 ReplaceUses(SDValue(N, Vec), 3037 CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg)); 3038 } 3039 } 3040 ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); 3041 if (isUpdating) 3042 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); 3043 CurDAG->RemoveDeadNode(N); 3044 } 3045 3046 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) { 3047 if (!Subtarget->hasMVEIntegerOps()) 3048 return false; 3049 3050 SDLoc dl(N); 3051 3052 // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and 3053 // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent 3054 // inserts of the correct type: 3055 SDValue Ins1 = SDValue(N, 0); 3056 SDValue Ins2 = N->getOperand(0); 3057 EVT VT = Ins1.getValueType(); 3058 if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() || 3059 !isa<ConstantSDNode>(Ins1.getOperand(2)) || 3060 !isa<ConstantSDNode>(Ins2.getOperand(2)) || 3061 (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT)) 3062 return false; 3063 3064 unsigned Lane1 = Ins1.getConstantOperandVal(2); 3065 unsigned Lane2 = Ins2.getConstantOperandVal(2); 3066 if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1) 3067 return false; 3068 3069 // If the inserted values will be able to use T/B already, leave it to the 3070 // existing tablegen patterns. For example VCVTT/VCVTB. 3071 SDValue Val1 = Ins1.getOperand(1); 3072 SDValue Val2 = Ins2.getOperand(1); 3073 if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND) 3074 return false; 3075 3076 // Check if the inserted values are both extracts. 3077 if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3078 Val1.getOpcode() == ARMISD::VGETLANEu) && 3079 (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT || 3080 Val2.getOpcode() == ARMISD::VGETLANEu) && 3081 isa<ConstantSDNode>(Val1.getOperand(1)) && 3082 isa<ConstantSDNode>(Val2.getOperand(1)) && 3083 (Val1.getOperand(0).getValueType() == MVT::v8f16 || 3084 Val1.getOperand(0).getValueType() == MVT::v8i16) && 3085 (Val2.getOperand(0).getValueType() == MVT::v8f16 || 3086 Val2.getOperand(0).getValueType() == MVT::v8i16)) { 3087 unsigned ExtractLane1 = Val1.getConstantOperandVal(1); 3088 unsigned ExtractLane2 = Val2.getConstantOperandVal(1); 3089 3090 // If the two extracted lanes are from the same place and adjacent, this 3091 // simplifies into a f32 lane move. 3092 if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 && 3093 ExtractLane1 == ExtractLane2 + 1) { 3094 SDValue NewExt = CurDAG->getTargetExtractSubreg( 3095 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0)); 3096 SDValue NewIns = CurDAG->getTargetInsertSubreg( 3097 ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0), 3098 NewExt); 3099 ReplaceUses(Ins1, NewIns); 3100 return true; 3101 } 3102 3103 // Else v8i16 pattern of an extract and an insert, with a optional vmovx for 3104 // extracting odd lanes. 3105 if (VT == MVT::v8i16) { 3106 SDValue Inp1 = CurDAG->getTargetExtractSubreg( 3107 ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0)); 3108 SDValue Inp2 = CurDAG->getTargetExtractSubreg( 3109 ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0)); 3110 if (ExtractLane1 % 2 != 0) 3111 Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0); 3112 if (ExtractLane2 % 2 != 0) 3113 Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0); 3114 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1); 3115 SDValue NewIns = 3116 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3117 Ins2.getOperand(0), SDValue(VINS, 0)); 3118 ReplaceUses(Ins1, NewIns); 3119 return true; 3120 } 3121 } 3122 3123 // The inserted values are not extracted - if they are f16 then insert them 3124 // directly using a VINS. 3125 if (VT == MVT::v8f16) { 3126 SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1); 3127 SDValue NewIns = 3128 CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32, 3129 Ins2.getOperand(0), SDValue(VINS, 0)); 3130 ReplaceUses(Ins1, NewIns); 3131 return true; 3132 } 3133 3134 return false; 3135 } 3136 3137 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) { 3138 if (!Subtarget->hasV6T2Ops()) 3139 return false; 3140 3141 unsigned Opc = isSigned 3142 ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX) 3143 : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX); 3144 SDLoc dl(N); 3145 3146 // For unsigned extracts, check for a shift right and mask 3147 unsigned And_imm = 0; 3148 if (N->getOpcode() == ISD::AND) { 3149 if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) { 3150 3151 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 3152 if (And_imm & (And_imm + 1)) 3153 return false; 3154 3155 unsigned Srl_imm = 0; 3156 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, 3157 Srl_imm)) { 3158 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3159 3160 // Mask off the unnecessary bits of the AND immediate; normally 3161 // DAGCombine will do this, but that might not happen if 3162 // targetShrinkDemandedConstant chooses a different immediate. 3163 And_imm &= -1U >> Srl_imm; 3164 3165 // Note: The width operand is encoded as width-1. 3166 unsigned Width = countTrailingOnes(And_imm) - 1; 3167 unsigned LSB = Srl_imm; 3168 3169 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3170 3171 if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) { 3172 // It's cheaper to use a right shift to extract the top bits. 3173 if (Subtarget->isThumb()) { 3174 Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri; 3175 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3176 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3177 getAL(CurDAG, dl), Reg0, Reg0 }; 3178 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3179 return true; 3180 } 3181 3182 // ARM models shift instructions as MOVsi with shifter operand. 3183 ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL); 3184 SDValue ShOpc = 3185 CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl, 3186 MVT::i32); 3187 SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc, 3188 getAL(CurDAG, dl), Reg0, Reg0 }; 3189 CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops); 3190 return true; 3191 } 3192 3193 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3194 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3195 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3196 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3197 getAL(CurDAG, dl), Reg0 }; 3198 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3199 return true; 3200 } 3201 } 3202 return false; 3203 } 3204 3205 // Otherwise, we're looking for a shift of a shift 3206 unsigned Shl_imm = 0; 3207 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) { 3208 assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!"); 3209 unsigned Srl_imm = 0; 3210 if (isInt32Immediate(N->getOperand(1), Srl_imm)) { 3211 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3212 // Note: The width operand is encoded as width-1. 3213 unsigned Width = 32 - Srl_imm - 1; 3214 int LSB = Srl_imm - Shl_imm; 3215 if (LSB < 0) 3216 return false; 3217 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3218 assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3219 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3220 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3221 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3222 getAL(CurDAG, dl), Reg0 }; 3223 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3224 return true; 3225 } 3226 } 3227 3228 // Or we are looking for a shift of an and, with a mask operand 3229 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) && 3230 isShiftedMask_32(And_imm)) { 3231 unsigned Srl_imm = 0; 3232 unsigned LSB = countTrailingZeros(And_imm); 3233 // Shift must be the same as the ands lsb 3234 if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) { 3235 assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!"); 3236 unsigned MSB = 31 - countLeadingZeros(And_imm); 3237 // Note: The width operand is encoded as width-1. 3238 unsigned Width = MSB - LSB; 3239 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3240 assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx"); 3241 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3242 CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32), 3243 CurDAG->getTargetConstant(Width, dl, MVT::i32), 3244 getAL(CurDAG, dl), Reg0 }; 3245 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3246 return true; 3247 } 3248 } 3249 3250 if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) { 3251 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 3252 unsigned LSB = 0; 3253 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) && 3254 !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB)) 3255 return false; 3256 3257 if (LSB + Width > 32) 3258 return false; 3259 3260 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3261 assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx"); 3262 SDValue Ops[] = { N->getOperand(0).getOperand(0), 3263 CurDAG->getTargetConstant(LSB, dl, MVT::i32), 3264 CurDAG->getTargetConstant(Width - 1, dl, MVT::i32), 3265 getAL(CurDAG, dl), Reg0 }; 3266 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3267 return true; 3268 } 3269 3270 return false; 3271 } 3272 3273 /// Target-specific DAG combining for ISD::XOR. 3274 /// Target-independent combining lowers SELECT_CC nodes of the form 3275 /// select_cc setg[ge] X, 0, X, -X 3276 /// select_cc setgt X, -1, X, -X 3277 /// select_cc setl[te] X, 0, -X, X 3278 /// select_cc setlt X, 1, -X, X 3279 /// which represent Integer ABS into: 3280 /// Y = sra (X, size(X)-1); xor (add (X, Y), Y) 3281 /// ARM instruction selection detects the latter and matches it to 3282 /// ARM::ABS or ARM::t2ABS machine node. 3283 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){ 3284 SDValue XORSrc0 = N->getOperand(0); 3285 SDValue XORSrc1 = N->getOperand(1); 3286 EVT VT = N->getValueType(0); 3287 3288 if (Subtarget->isThumb1Only()) 3289 return false; 3290 3291 if (XORSrc0.getOpcode() != ISD::ADD || XORSrc1.getOpcode() != ISD::SRA) 3292 return false; 3293 3294 SDValue ADDSrc0 = XORSrc0.getOperand(0); 3295 SDValue ADDSrc1 = XORSrc0.getOperand(1); 3296 SDValue SRASrc0 = XORSrc1.getOperand(0); 3297 SDValue SRASrc1 = XORSrc1.getOperand(1); 3298 ConstantSDNode *SRAConstant = dyn_cast<ConstantSDNode>(SRASrc1); 3299 EVT XType = SRASrc0.getValueType(); 3300 unsigned Size = XType.getSizeInBits() - 1; 3301 3302 if (ADDSrc1 == XORSrc1 && ADDSrc0 == SRASrc0 && 3303 XType.isInteger() && SRAConstant != nullptr && 3304 Size == SRAConstant->getZExtValue()) { 3305 unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS; 3306 CurDAG->SelectNodeTo(N, Opcode, VT, ADDSrc0); 3307 return true; 3308 } 3309 3310 return false; 3311 } 3312 3313 /// We've got special pseudo-instructions for these 3314 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3315 unsigned Opcode; 3316 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3317 if (MemTy == MVT::i8) 3318 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8; 3319 else if (MemTy == MVT::i16) 3320 Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16; 3321 else if (MemTy == MVT::i32) 3322 Opcode = ARM::CMP_SWAP_32; 3323 else 3324 llvm_unreachable("Unknown AtomicCmpSwap type"); 3325 3326 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3327 N->getOperand(0)}; 3328 SDNode *CmpSwap = CurDAG->getMachineNode( 3329 Opcode, SDLoc(N), 3330 CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); 3331 3332 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3333 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3334 3335 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3336 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3337 CurDAG->RemoveDeadNode(N); 3338 } 3339 3340 static Optional<std::pair<unsigned, unsigned>> 3341 getContiguousRangeOfSetBits(const APInt &A) { 3342 unsigned FirstOne = A.getBitWidth() - A.countLeadingZeros() - 1; 3343 unsigned LastOne = A.countTrailingZeros(); 3344 if (A.countPopulation() != (FirstOne - LastOne + 1)) 3345 return Optional<std::pair<unsigned,unsigned>>(); 3346 return std::make_pair(FirstOne, LastOne); 3347 } 3348 3349 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) { 3350 assert(N->getOpcode() == ARMISD::CMPZ); 3351 SwitchEQNEToPLMI = false; 3352 3353 if (!Subtarget->isThumb()) 3354 // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and 3355 // LSR don't exist as standalone instructions - they need the barrel shifter. 3356 return; 3357 3358 // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X)) 3359 SDValue And = N->getOperand(0); 3360 if (!And->hasOneUse()) 3361 return; 3362 3363 SDValue Zero = N->getOperand(1); 3364 if (!isa<ConstantSDNode>(Zero) || !cast<ConstantSDNode>(Zero)->isNullValue() || 3365 And->getOpcode() != ISD::AND) 3366 return; 3367 SDValue X = And.getOperand(0); 3368 auto C = dyn_cast<ConstantSDNode>(And.getOperand(1)); 3369 3370 if (!C) 3371 return; 3372 auto Range = getContiguousRangeOfSetBits(C->getAPIntValue()); 3373 if (!Range) 3374 return; 3375 3376 // There are several ways to lower this: 3377 SDNode *NewN; 3378 SDLoc dl(N); 3379 3380 auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* { 3381 if (Subtarget->isThumb2()) { 3382 Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri; 3383 SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3384 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3385 CurDAG->getRegister(0, MVT::i32) }; 3386 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3387 } else { 3388 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src, 3389 CurDAG->getTargetConstant(Imm, dl, MVT::i32), 3390 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 3391 return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 3392 } 3393 }; 3394 3395 if (Range->second == 0) { 3396 // 1. Mask includes the LSB -> Simply shift the top N bits off 3397 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3398 ReplaceNode(And.getNode(), NewN); 3399 } else if (Range->first == 31) { 3400 // 2. Mask includes the MSB -> Simply shift the bottom N bits off 3401 NewN = EmitShift(ARM::tLSRri, X, Range->second); 3402 ReplaceNode(And.getNode(), NewN); 3403 } else if (Range->first == Range->second) { 3404 // 3. Only one bit is set. We can shift this into the sign bit and use a 3405 // PL/MI comparison. 3406 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3407 ReplaceNode(And.getNode(), NewN); 3408 3409 SwitchEQNEToPLMI = true; 3410 } else if (!Subtarget->hasV6T2Ops()) { 3411 // 4. Do a double shift to clear bottom and top bits, but only in 3412 // thumb-1 mode as in thumb-2 we can use UBFX. 3413 NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first); 3414 NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0), 3415 Range->second + (31 - Range->first)); 3416 ReplaceNode(And.getNode(), NewN); 3417 } 3418 3419 } 3420 3421 void ARMDAGToDAGISel::Select(SDNode *N) { 3422 SDLoc dl(N); 3423 3424 if (N->isMachineOpcode()) { 3425 N->setNodeId(-1); 3426 return; // Already selected. 3427 } 3428 3429 switch (N->getOpcode()) { 3430 default: break; 3431 case ISD::STORE: { 3432 // For Thumb1, match an sp-relative store in C++. This is a little 3433 // unfortunate, but I don't think I can make the chain check work 3434 // otherwise. (The chain of the store has to be the same as the chain 3435 // of the CopyFromReg, or else we can't replace the CopyFromReg with 3436 // a direct reference to "SP".) 3437 // 3438 // This is only necessary on Thumb1 because Thumb1 sp-relative stores use 3439 // a different addressing mode from other four-byte stores. 3440 // 3441 // This pattern usually comes up with call arguments. 3442 StoreSDNode *ST = cast<StoreSDNode>(N); 3443 SDValue Ptr = ST->getBasePtr(); 3444 if (Subtarget->isThumb1Only() && ST->isUnindexed()) { 3445 int RHSC = 0; 3446 if (Ptr.getOpcode() == ISD::ADD && 3447 isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) 3448 Ptr = Ptr.getOperand(0); 3449 3450 if (Ptr.getOpcode() == ISD::CopyFromReg && 3451 cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP && 3452 Ptr.getOperand(0) == ST->getChain()) { 3453 SDValue Ops[] = {ST->getValue(), 3454 CurDAG->getRegister(ARM::SP, MVT::i32), 3455 CurDAG->getTargetConstant(RHSC, dl, MVT::i32), 3456 getAL(CurDAG, dl), 3457 CurDAG->getRegister(0, MVT::i32), 3458 ST->getChain()}; 3459 MachineSDNode *ResNode = 3460 CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops); 3461 MachineMemOperand *MemOp = ST->getMemOperand(); 3462 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3463 ReplaceNode(N, ResNode); 3464 return; 3465 } 3466 } 3467 break; 3468 } 3469 case ISD::WRITE_REGISTER: 3470 if (tryWriteRegister(N)) 3471 return; 3472 break; 3473 case ISD::READ_REGISTER: 3474 if (tryReadRegister(N)) 3475 return; 3476 break; 3477 case ISD::INLINEASM: 3478 case ISD::INLINEASM_BR: 3479 if (tryInlineAsm(N)) 3480 return; 3481 break; 3482 case ISD::XOR: 3483 // Select special operations if XOR node forms integer ABS pattern 3484 if (tryABSOp(N)) 3485 return; 3486 // Other cases are autogenerated. 3487 break; 3488 case ISD::Constant: { 3489 unsigned Val = cast<ConstantSDNode>(N)->getZExtValue(); 3490 // If we can't materialize the constant we need to use a literal pool 3491 if (ConstantMaterializationCost(Val, Subtarget) > 2) { 3492 SDValue CPIdx = CurDAG->getTargetConstantPool( 3493 ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val), 3494 TLI->getPointerTy(CurDAG->getDataLayout())); 3495 3496 SDNode *ResNode; 3497 if (Subtarget->isThumb()) { 3498 SDValue Ops[] = { 3499 CPIdx, 3500 getAL(CurDAG, dl), 3501 CurDAG->getRegister(0, MVT::i32), 3502 CurDAG->getEntryNode() 3503 }; 3504 ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other, 3505 Ops); 3506 } else { 3507 SDValue Ops[] = { 3508 CPIdx, 3509 CurDAG->getTargetConstant(0, dl, MVT::i32), 3510 getAL(CurDAG, dl), 3511 CurDAG->getRegister(0, MVT::i32), 3512 CurDAG->getEntryNode() 3513 }; 3514 ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other, 3515 Ops); 3516 } 3517 // Annotate the Node with memory operand information so that MachineInstr 3518 // queries work properly. This e.g. gives the register allocation the 3519 // required information for rematerialization. 3520 MachineFunction& MF = CurDAG->getMachineFunction(); 3521 MachineMemOperand *MemOp = 3522 MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF), 3523 MachineMemOperand::MOLoad, 4, Align(4)); 3524 3525 CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp}); 3526 3527 ReplaceNode(N, ResNode); 3528 return; 3529 } 3530 3531 // Other cases are autogenerated. 3532 break; 3533 } 3534 case ISD::FrameIndex: { 3535 // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. 3536 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 3537 SDValue TFI = CurDAG->getTargetFrameIndex( 3538 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3539 if (Subtarget->isThumb1Only()) { 3540 // Set the alignment of the frame object to 4, to avoid having to generate 3541 // more than one ADD 3542 MachineFrameInfo &MFI = MF->getFrameInfo(); 3543 if (MFI.getObjectAlign(FI) < Align(4)) 3544 MFI.setObjectAlignment(FI, Align(4)); 3545 CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI, 3546 CurDAG->getTargetConstant(0, dl, MVT::i32)); 3547 return; 3548 } else { 3549 unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ? 3550 ARM::t2ADDri : ARM::ADDri); 3551 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32), 3552 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3553 CurDAG->getRegister(0, MVT::i32) }; 3554 CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops); 3555 return; 3556 } 3557 } 3558 case ISD::INSERT_VECTOR_ELT: { 3559 if (tryInsertVectorElt(N)) 3560 return; 3561 break; 3562 } 3563 case ISD::SRL: 3564 if (tryV6T2BitfieldExtractOp(N, false)) 3565 return; 3566 break; 3567 case ISD::SIGN_EXTEND_INREG: 3568 case ISD::SRA: 3569 if (tryV6T2BitfieldExtractOp(N, true)) 3570 return; 3571 break; 3572 case ISD::MUL: 3573 if (Subtarget->isThumb1Only()) 3574 break; 3575 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) { 3576 unsigned RHSV = C->getZExtValue(); 3577 if (!RHSV) break; 3578 if (isPowerOf2_32(RHSV-1)) { // 2^n+1? 3579 unsigned ShImm = Log2_32(RHSV-1); 3580 if (ShImm >= 32) 3581 break; 3582 SDValue V = N->getOperand(0); 3583 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3584 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3585 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3586 if (Subtarget->isThumb()) { 3587 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3588 CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops); 3589 return; 3590 } else { 3591 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3592 Reg0 }; 3593 CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops); 3594 return; 3595 } 3596 } 3597 if (isPowerOf2_32(RHSV+1)) { // 2^n-1? 3598 unsigned ShImm = Log2_32(RHSV+1); 3599 if (ShImm >= 32) 3600 break; 3601 SDValue V = N->getOperand(0); 3602 ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm); 3603 SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32); 3604 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 3605 if (Subtarget->isThumb()) { 3606 SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 }; 3607 CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops); 3608 return; 3609 } else { 3610 SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0, 3611 Reg0 }; 3612 CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops); 3613 return; 3614 } 3615 } 3616 } 3617 break; 3618 case ISD::AND: { 3619 // Check for unsigned bitfield extract 3620 if (tryV6T2BitfieldExtractOp(N, false)) 3621 return; 3622 3623 // If an immediate is used in an AND node, it is possible that the immediate 3624 // can be more optimally materialized when negated. If this is the case we 3625 // can negate the immediate and use a BIC instead. 3626 auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1)); 3627 if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) { 3628 uint32_t Imm = (uint32_t) N1C->getZExtValue(); 3629 3630 // In Thumb2 mode, an AND can take a 12-bit immediate. If this 3631 // immediate can be negated and fit in the immediate operand of 3632 // a t2BIC, don't do any manual transform here as this can be 3633 // handled by the generic ISel machinery. 3634 bool PreferImmediateEncoding = 3635 Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm)); 3636 if (!PreferImmediateEncoding && 3637 ConstantMaterializationCost(Imm, Subtarget) > 3638 ConstantMaterializationCost(~Imm, Subtarget)) { 3639 // The current immediate costs more to materialize than a negated 3640 // immediate, so negate the immediate and use a BIC. 3641 SDValue NewImm = 3642 CurDAG->getConstant(~N1C->getZExtValue(), dl, MVT::i32); 3643 // If the new constant didn't exist before, reposition it in the topological 3644 // ordering so it is just before N. Otherwise, don't touch its location. 3645 if (NewImm->getNodeId() == -1) 3646 CurDAG->RepositionNode(N->getIterator(), NewImm.getNode()); 3647 3648 if (!Subtarget->hasThumb2()) { 3649 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), 3650 N->getOperand(0), NewImm, getAL(CurDAG, dl), 3651 CurDAG->getRegister(0, MVT::i32)}; 3652 ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops)); 3653 return; 3654 } else { 3655 SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl), 3656 CurDAG->getRegister(0, MVT::i32), 3657 CurDAG->getRegister(0, MVT::i32)}; 3658 ReplaceNode(N, 3659 CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops)); 3660 return; 3661 } 3662 } 3663 } 3664 3665 // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits 3666 // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits 3667 // are entirely contributed by c2 and lower 16-bits are entirely contributed 3668 // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)). 3669 // Select it to: "movt x, ((c1 & 0xffff) >> 16) 3670 EVT VT = N->getValueType(0); 3671 if (VT != MVT::i32) 3672 break; 3673 unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2()) 3674 ? ARM::t2MOVTi16 3675 : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0); 3676 if (!Opc) 3677 break; 3678 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 3679 N1C = dyn_cast<ConstantSDNode>(N1); 3680 if (!N1C) 3681 break; 3682 if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) { 3683 SDValue N2 = N0.getOperand(1); 3684 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 3685 if (!N2C) 3686 break; 3687 unsigned N1CVal = N1C->getZExtValue(); 3688 unsigned N2CVal = N2C->getZExtValue(); 3689 if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) && 3690 (N1CVal & 0xffffU) == 0xffffU && 3691 (N2CVal & 0xffffU) == 0x0U) { 3692 SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16, 3693 dl, MVT::i32); 3694 SDValue Ops[] = { N0.getOperand(0), Imm16, 3695 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) }; 3696 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 3697 return; 3698 } 3699 } 3700 3701 break; 3702 } 3703 case ARMISD::UMAAL: { 3704 unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL; 3705 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), 3706 N->getOperand(2), N->getOperand(3), 3707 getAL(CurDAG, dl), 3708 CurDAG->getRegister(0, MVT::i32) }; 3709 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops)); 3710 return; 3711 } 3712 case ARMISD::UMLAL:{ 3713 if (Subtarget->isThumb()) { 3714 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3715 N->getOperand(3), getAL(CurDAG, dl), 3716 CurDAG->getRegister(0, MVT::i32)}; 3717 ReplaceNode( 3718 N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops)); 3719 return; 3720 }else{ 3721 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3722 N->getOperand(3), getAL(CurDAG, dl), 3723 CurDAG->getRegister(0, MVT::i32), 3724 CurDAG->getRegister(0, MVT::i32) }; 3725 ReplaceNode(N, CurDAG->getMachineNode( 3726 Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl, 3727 MVT::i32, MVT::i32, Ops)); 3728 return; 3729 } 3730 } 3731 case ARMISD::SMLAL:{ 3732 if (Subtarget->isThumb()) { 3733 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3734 N->getOperand(3), getAL(CurDAG, dl), 3735 CurDAG->getRegister(0, MVT::i32)}; 3736 ReplaceNode( 3737 N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops)); 3738 return; 3739 }else{ 3740 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2), 3741 N->getOperand(3), getAL(CurDAG, dl), 3742 CurDAG->getRegister(0, MVT::i32), 3743 CurDAG->getRegister(0, MVT::i32) }; 3744 ReplaceNode(N, CurDAG->getMachineNode( 3745 Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl, 3746 MVT::i32, MVT::i32, Ops)); 3747 return; 3748 } 3749 } 3750 case ARMISD::SUBE: { 3751 if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP()) 3752 break; 3753 // Look for a pattern to match SMMLS 3754 // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b)))) 3755 if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI || 3756 N->getOperand(2).getOpcode() != ARMISD::SUBC || 3757 !SDValue(N, 1).use_empty()) 3758 break; 3759 3760 if (Subtarget->isThumb()) 3761 assert(Subtarget->hasThumb2() && 3762 "This pattern should not be generated for Thumb"); 3763 3764 SDValue SmulLoHi = N->getOperand(1); 3765 SDValue Subc = N->getOperand(2); 3766 auto *Zero = dyn_cast<ConstantSDNode>(Subc.getOperand(0)); 3767 3768 if (!Zero || Zero->getZExtValue() != 0 || 3769 Subc.getOperand(1) != SmulLoHi.getValue(0) || 3770 N->getOperand(1) != SmulLoHi.getValue(1) || 3771 N->getOperand(2) != Subc.getValue(1)) 3772 break; 3773 3774 unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS; 3775 SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1), 3776 N->getOperand(0), getAL(CurDAG, dl), 3777 CurDAG->getRegister(0, MVT::i32) }; 3778 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops)); 3779 return; 3780 } 3781 case ISD::LOAD: { 3782 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3783 return; 3784 if (Subtarget->isThumb() && Subtarget->hasThumb2()) { 3785 if (tryT2IndexedLoad(N)) 3786 return; 3787 } else if (Subtarget->isThumb()) { 3788 if (tryT1IndexedLoad(N)) 3789 return; 3790 } else if (tryARMIndexedLoad(N)) 3791 return; 3792 // Other cases are autogenerated. 3793 break; 3794 } 3795 case ISD::MLOAD: 3796 if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N)) 3797 return; 3798 // Other cases are autogenerated. 3799 break; 3800 case ARMISD::WLSSETUP: { 3801 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32, 3802 N->getOperand(0)); 3803 ReplaceUses(N, New); 3804 CurDAG->RemoveDeadNode(N); 3805 return; 3806 } 3807 case ARMISD::WLS: { 3808 SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other, 3809 N->getOperand(1), N->getOperand(2), 3810 N->getOperand(0)); 3811 ReplaceUses(N, New); 3812 CurDAG->RemoveDeadNode(N); 3813 return; 3814 } 3815 case ARMISD::LE: { 3816 SDValue Ops[] = { N->getOperand(1), 3817 N->getOperand(2), 3818 N->getOperand(0) }; 3819 unsigned Opc = ARM::t2LoopEnd; 3820 SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 3821 ReplaceUses(N, New); 3822 CurDAG->RemoveDeadNode(N); 3823 return; 3824 } 3825 case ARMISD::LDRD: { 3826 if (Subtarget->isThumb2()) 3827 break; // TableGen handles isel in this case. 3828 SDValue Base, RegOffset, ImmOffset; 3829 const SDValue &Chain = N->getOperand(0); 3830 const SDValue &Addr = N->getOperand(1); 3831 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3832 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3833 // The register-offset variant of LDRD mandates that the register 3834 // allocated to RegOffset is not reused in any of the remaining operands. 3835 // This restriction is currently not enforced. Therefore emitting this 3836 // variant is explicitly avoided. 3837 Base = Addr; 3838 RegOffset = CurDAG->getRegister(0, MVT::i32); 3839 } 3840 SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain}; 3841 SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl, 3842 {MVT::Untyped, MVT::Other}, Ops); 3843 SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 3844 SDValue(New, 0)); 3845 SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 3846 SDValue(New, 0)); 3847 transferMemOperands(N, New); 3848 ReplaceUses(SDValue(N, 0), Lo); 3849 ReplaceUses(SDValue(N, 1), Hi); 3850 ReplaceUses(SDValue(N, 2), SDValue(New, 1)); 3851 CurDAG->RemoveDeadNode(N); 3852 return; 3853 } 3854 case ARMISD::STRD: { 3855 if (Subtarget->isThumb2()) 3856 break; // TableGen handles isel in this case. 3857 SDValue Base, RegOffset, ImmOffset; 3858 const SDValue &Chain = N->getOperand(0); 3859 const SDValue &Addr = N->getOperand(3); 3860 SelectAddrMode3(Addr, Base, RegOffset, ImmOffset); 3861 if (RegOffset != CurDAG->getRegister(0, MVT::i32)) { 3862 // The register-offset variant of STRD mandates that the register 3863 // allocated to RegOffset is not reused in any of the remaining operands. 3864 // This restriction is currently not enforced. Therefore emitting this 3865 // variant is explicitly avoided. 3866 Base = Addr; 3867 RegOffset = CurDAG->getRegister(0, MVT::i32); 3868 } 3869 SDNode *RegPair = 3870 createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2)); 3871 SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain}; 3872 SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops); 3873 transferMemOperands(N, New); 3874 ReplaceUses(SDValue(N, 0), SDValue(New, 0)); 3875 CurDAG->RemoveDeadNode(N); 3876 return; 3877 } 3878 case ARMISD::LOOP_DEC: { 3879 SDValue Ops[] = { N->getOperand(1), 3880 N->getOperand(2), 3881 N->getOperand(0) }; 3882 SDNode *Dec = 3883 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3884 CurDAG->getVTList(MVT::i32, MVT::Other), Ops); 3885 ReplaceUses(N, Dec); 3886 CurDAG->RemoveDeadNode(N); 3887 return; 3888 } 3889 case ARMISD::BRCOND: { 3890 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3891 // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3892 // Pattern complexity = 6 cost = 1 size = 0 3893 3894 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3895 // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc) 3896 // Pattern complexity = 6 cost = 1 size = 0 3897 3898 // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc) 3899 // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc) 3900 // Pattern complexity = 6 cost = 1 size = 0 3901 3902 unsigned Opc = Subtarget->isThumb() ? 3903 ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc; 3904 SDValue Chain = N->getOperand(0); 3905 SDValue N1 = N->getOperand(1); 3906 SDValue N2 = N->getOperand(2); 3907 SDValue N3 = N->getOperand(3); 3908 SDValue InFlag = N->getOperand(4); 3909 assert(N1.getOpcode() == ISD::BasicBlock); 3910 assert(N2.getOpcode() == ISD::Constant); 3911 assert(N3.getOpcode() == ISD::Register); 3912 3913 unsigned CC = (unsigned) cast<ConstantSDNode>(N2)->getZExtValue(); 3914 3915 if (InFlag.getOpcode() == ARMISD::CMPZ) { 3916 if (InFlag.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) { 3917 SDValue Int = InFlag.getOperand(0); 3918 uint64_t ID = cast<ConstantSDNode>(Int->getOperand(1))->getZExtValue(); 3919 3920 // Handle low-overhead loops. 3921 if (ID == Intrinsic::loop_decrement_reg) { 3922 SDValue Elements = Int.getOperand(2); 3923 SDValue Size = CurDAG->getTargetConstant( 3924 cast<ConstantSDNode>(Int.getOperand(3))->getZExtValue(), dl, 3925 MVT::i32); 3926 3927 SDValue Args[] = { Elements, Size, Int.getOperand(0) }; 3928 SDNode *LoopDec = 3929 CurDAG->getMachineNode(ARM::t2LoopDec, dl, 3930 CurDAG->getVTList(MVT::i32, MVT::Other), 3931 Args); 3932 ReplaceUses(Int.getNode(), LoopDec); 3933 3934 SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain }; 3935 SDNode *LoopEnd = 3936 CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs); 3937 3938 ReplaceUses(N, LoopEnd); 3939 CurDAG->RemoveDeadNode(N); 3940 CurDAG->RemoveDeadNode(InFlag.getNode()); 3941 CurDAG->RemoveDeadNode(Int.getNode()); 3942 return; 3943 } 3944 } 3945 3946 bool SwitchEQNEToPLMI; 3947 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 3948 InFlag = N->getOperand(4); 3949 3950 if (SwitchEQNEToPLMI) { 3951 switch ((ARMCC::CondCodes)CC) { 3952 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 3953 case ARMCC::NE: 3954 CC = (unsigned)ARMCC::MI; 3955 break; 3956 case ARMCC::EQ: 3957 CC = (unsigned)ARMCC::PL; 3958 break; 3959 } 3960 } 3961 } 3962 3963 SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32); 3964 SDValue Ops[] = { N1, Tmp2, N3, Chain, InFlag }; 3965 SDNode *ResNode = CurDAG->getMachineNode(Opc, dl, MVT::Other, 3966 MVT::Glue, Ops); 3967 Chain = SDValue(ResNode, 0); 3968 if (N->getNumValues() == 2) { 3969 InFlag = SDValue(ResNode, 1); 3970 ReplaceUses(SDValue(N, 1), InFlag); 3971 } 3972 ReplaceUses(SDValue(N, 0), 3973 SDValue(Chain.getNode(), Chain.getResNo())); 3974 CurDAG->RemoveDeadNode(N); 3975 return; 3976 } 3977 3978 case ARMISD::CMPZ: { 3979 // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0) 3980 // This allows us to avoid materializing the expensive negative constant. 3981 // The CMPZ #0 is useless and will be peepholed away but we need to keep it 3982 // for its glue output. 3983 SDValue X = N->getOperand(0); 3984 auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode()); 3985 if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) { 3986 int64_t Addend = -C->getSExtValue(); 3987 3988 SDNode *Add = nullptr; 3989 // ADDS can be better than CMN if the immediate fits in a 3990 // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3. 3991 // Outside that range we can just use a CMN which is 32-bit but has a 3992 // 12-bit immediate range. 3993 if (Addend < 1<<8) { 3994 if (Subtarget->isThumb2()) { 3995 SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32), 3996 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32), 3997 CurDAG->getRegister(0, MVT::i32) }; 3998 Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops); 3999 } else { 4000 unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8; 4001 SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X, 4002 CurDAG->getTargetConstant(Addend, dl, MVT::i32), 4003 getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)}; 4004 Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops); 4005 } 4006 } 4007 if (Add) { 4008 SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)}; 4009 CurDAG->MorphNodeTo(N, ARMISD::CMPZ, CurDAG->getVTList(MVT::Glue), Ops2); 4010 } 4011 } 4012 // Other cases are autogenerated. 4013 break; 4014 } 4015 4016 case ARMISD::CMOV: { 4017 SDValue InFlag = N->getOperand(4); 4018 4019 if (InFlag.getOpcode() == ARMISD::CMPZ) { 4020 bool SwitchEQNEToPLMI; 4021 SelectCMPZ(InFlag.getNode(), SwitchEQNEToPLMI); 4022 4023 if (SwitchEQNEToPLMI) { 4024 SDValue ARMcc = N->getOperand(2); 4025 ARMCC::CondCodes CC = 4026 (ARMCC::CondCodes)cast<ConstantSDNode>(ARMcc)->getZExtValue(); 4027 4028 switch (CC) { 4029 default: llvm_unreachable("CMPZ must be either NE or EQ!"); 4030 case ARMCC::NE: 4031 CC = ARMCC::MI; 4032 break; 4033 case ARMCC::EQ: 4034 CC = ARMCC::PL; 4035 break; 4036 } 4037 SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32); 4038 SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc, 4039 N->getOperand(3), N->getOperand(4)}; 4040 CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops); 4041 } 4042 4043 } 4044 // Other cases are autogenerated. 4045 break; 4046 } 4047 4048 case ARMISD::VZIP: { 4049 unsigned Opc = 0; 4050 EVT VT = N->getValueType(0); 4051 switch (VT.getSimpleVT().SimpleTy) { 4052 default: return; 4053 case MVT::v8i8: Opc = ARM::VZIPd8; break; 4054 case MVT::v4f16: 4055 case MVT::v4i16: Opc = ARM::VZIPd16; break; 4056 case MVT::v2f32: 4057 // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4058 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4059 case MVT::v16i8: Opc = ARM::VZIPq8; break; 4060 case MVT::v8f16: 4061 case MVT::v8i16: Opc = ARM::VZIPq16; break; 4062 case MVT::v4f32: 4063 case MVT::v4i32: Opc = ARM::VZIPq32; break; 4064 } 4065 SDValue Pred = getAL(CurDAG, dl); 4066 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4067 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4068 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4069 return; 4070 } 4071 case ARMISD::VUZP: { 4072 unsigned Opc = 0; 4073 EVT VT = N->getValueType(0); 4074 switch (VT.getSimpleVT().SimpleTy) { 4075 default: return; 4076 case MVT::v8i8: Opc = ARM::VUZPd8; break; 4077 case MVT::v4f16: 4078 case MVT::v4i16: Opc = ARM::VUZPd16; break; 4079 case MVT::v2f32: 4080 // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm. 4081 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4082 case MVT::v16i8: Opc = ARM::VUZPq8; break; 4083 case MVT::v8f16: 4084 case MVT::v8i16: Opc = ARM::VUZPq16; break; 4085 case MVT::v4f32: 4086 case MVT::v4i32: Opc = ARM::VUZPq32; break; 4087 } 4088 SDValue Pred = getAL(CurDAG, dl); 4089 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4090 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4091 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4092 return; 4093 } 4094 case ARMISD::VTRN: { 4095 unsigned Opc = 0; 4096 EVT VT = N->getValueType(0); 4097 switch (VT.getSimpleVT().SimpleTy) { 4098 default: return; 4099 case MVT::v8i8: Opc = ARM::VTRNd8; break; 4100 case MVT::v4f16: 4101 case MVT::v4i16: Opc = ARM::VTRNd16; break; 4102 case MVT::v2f32: 4103 case MVT::v2i32: Opc = ARM::VTRNd32; break; 4104 case MVT::v16i8: Opc = ARM::VTRNq8; break; 4105 case MVT::v8f16: 4106 case MVT::v8i16: Opc = ARM::VTRNq16; break; 4107 case MVT::v4f32: 4108 case MVT::v4i32: Opc = ARM::VTRNq32; break; 4109 } 4110 SDValue Pred = getAL(CurDAG, dl); 4111 SDValue PredReg = CurDAG->getRegister(0, MVT::i32); 4112 SDValue Ops[] = { N->getOperand(0), N->getOperand(1), Pred, PredReg }; 4113 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops)); 4114 return; 4115 } 4116 case ARMISD::BUILD_VECTOR: { 4117 EVT VecVT = N->getValueType(0); 4118 EVT EltVT = VecVT.getVectorElementType(); 4119 unsigned NumElts = VecVT.getVectorNumElements(); 4120 if (EltVT == MVT::f64) { 4121 assert(NumElts == 2 && "unexpected type for BUILD_VECTOR"); 4122 ReplaceNode( 4123 N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4124 return; 4125 } 4126 assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR"); 4127 if (NumElts == 2) { 4128 ReplaceNode( 4129 N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1))); 4130 return; 4131 } 4132 assert(NumElts == 4 && "unexpected type for BUILD_VECTOR"); 4133 ReplaceNode(N, 4134 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1), 4135 N->getOperand(2), N->getOperand(3))); 4136 return; 4137 } 4138 4139 case ARMISD::VLD1DUP: { 4140 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16, 4141 ARM::VLD1DUPd32 }; 4142 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16, 4143 ARM::VLD1DUPq32 }; 4144 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes); 4145 return; 4146 } 4147 4148 case ARMISD::VLD2DUP: { 4149 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4150 ARM::VLD2DUPd32 }; 4151 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes); 4152 return; 4153 } 4154 4155 case ARMISD::VLD3DUP: { 4156 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo, 4157 ARM::VLD3DUPd16Pseudo, 4158 ARM::VLD3DUPd32Pseudo }; 4159 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes); 4160 return; 4161 } 4162 4163 case ARMISD::VLD4DUP: { 4164 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo, 4165 ARM::VLD4DUPd16Pseudo, 4166 ARM::VLD4DUPd32Pseudo }; 4167 SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes); 4168 return; 4169 } 4170 4171 case ARMISD::VLD1DUP_UPD: { 4172 static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed, 4173 ARM::VLD1DUPd16wb_fixed, 4174 ARM::VLD1DUPd32wb_fixed }; 4175 static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed, 4176 ARM::VLD1DUPq16wb_fixed, 4177 ARM::VLD1DUPq32wb_fixed }; 4178 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes); 4179 return; 4180 } 4181 4182 case ARMISD::VLD2DUP_UPD: { 4183 static const uint16_t Opcodes[] = { ARM::VLD2DUPd8wb_fixed, 4184 ARM::VLD2DUPd16wb_fixed, 4185 ARM::VLD2DUPd32wb_fixed }; 4186 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, Opcodes); 4187 return; 4188 } 4189 4190 case ARMISD::VLD3DUP_UPD: { 4191 static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo_UPD, 4192 ARM::VLD3DUPd16Pseudo_UPD, 4193 ARM::VLD3DUPd32Pseudo_UPD }; 4194 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, Opcodes); 4195 return; 4196 } 4197 4198 case ARMISD::VLD4DUP_UPD: { 4199 static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo_UPD, 4200 ARM::VLD4DUPd16Pseudo_UPD, 4201 ARM::VLD4DUPd32Pseudo_UPD }; 4202 SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, Opcodes); 4203 return; 4204 } 4205 4206 case ARMISD::VLD1_UPD: { 4207 static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed, 4208 ARM::VLD1d16wb_fixed, 4209 ARM::VLD1d32wb_fixed, 4210 ARM::VLD1d64wb_fixed }; 4211 static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed, 4212 ARM::VLD1q16wb_fixed, 4213 ARM::VLD1q32wb_fixed, 4214 ARM::VLD1q64wb_fixed }; 4215 SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr); 4216 return; 4217 } 4218 4219 case ARMISD::VLD2_UPD: { 4220 if (Subtarget->hasNEON()) { 4221 static const uint16_t DOpcodes[] = { 4222 ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed, 4223 ARM::VLD1q64wb_fixed}; 4224 static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed, 4225 ARM::VLD2q16PseudoWB_fixed, 4226 ARM::VLD2q32PseudoWB_fixed}; 4227 SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr); 4228 } else { 4229 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, 4230 ARM::MVE_VLD21_8_wb}; 4231 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4232 ARM::MVE_VLD21_16_wb}; 4233 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4234 ARM::MVE_VLD21_32_wb}; 4235 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4236 SelectMVE_VLD(N, 2, Opcodes, true); 4237 } 4238 return; 4239 } 4240 4241 case ARMISD::VLD3_UPD: { 4242 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD, 4243 ARM::VLD3d16Pseudo_UPD, 4244 ARM::VLD3d32Pseudo_UPD, 4245 ARM::VLD1d64TPseudoWB_fixed}; 4246 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4247 ARM::VLD3q16Pseudo_UPD, 4248 ARM::VLD3q32Pseudo_UPD }; 4249 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD, 4250 ARM::VLD3q16oddPseudo_UPD, 4251 ARM::VLD3q32oddPseudo_UPD }; 4252 SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4253 return; 4254 } 4255 4256 case ARMISD::VLD4_UPD: { 4257 if (Subtarget->hasNEON()) { 4258 static const uint16_t DOpcodes[] = { 4259 ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD, 4260 ARM::VLD1d64QPseudoWB_fixed}; 4261 static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD, 4262 ARM::VLD4q16Pseudo_UPD, 4263 ARM::VLD4q32Pseudo_UPD}; 4264 static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD, 4265 ARM::VLD4q16oddPseudo_UPD, 4266 ARM::VLD4q32oddPseudo_UPD}; 4267 SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4268 } else { 4269 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4270 ARM::MVE_VLD42_8, 4271 ARM::MVE_VLD43_8_wb}; 4272 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4273 ARM::MVE_VLD42_16, 4274 ARM::MVE_VLD43_16_wb}; 4275 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4276 ARM::MVE_VLD42_32, 4277 ARM::MVE_VLD43_32_wb}; 4278 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4279 SelectMVE_VLD(N, 4, Opcodes, true); 4280 } 4281 return; 4282 } 4283 4284 case ARMISD::VLD2LN_UPD: { 4285 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD, 4286 ARM::VLD2LNd16Pseudo_UPD, 4287 ARM::VLD2LNd32Pseudo_UPD }; 4288 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD, 4289 ARM::VLD2LNq32Pseudo_UPD }; 4290 SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes); 4291 return; 4292 } 4293 4294 case ARMISD::VLD3LN_UPD: { 4295 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD, 4296 ARM::VLD3LNd16Pseudo_UPD, 4297 ARM::VLD3LNd32Pseudo_UPD }; 4298 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD, 4299 ARM::VLD3LNq32Pseudo_UPD }; 4300 SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes); 4301 return; 4302 } 4303 4304 case ARMISD::VLD4LN_UPD: { 4305 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD, 4306 ARM::VLD4LNd16Pseudo_UPD, 4307 ARM::VLD4LNd32Pseudo_UPD }; 4308 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD, 4309 ARM::VLD4LNq32Pseudo_UPD }; 4310 SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes); 4311 return; 4312 } 4313 4314 case ARMISD::VST1_UPD: { 4315 static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed, 4316 ARM::VST1d16wb_fixed, 4317 ARM::VST1d32wb_fixed, 4318 ARM::VST1d64wb_fixed }; 4319 static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed, 4320 ARM::VST1q16wb_fixed, 4321 ARM::VST1q32wb_fixed, 4322 ARM::VST1q64wb_fixed }; 4323 SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr); 4324 return; 4325 } 4326 4327 case ARMISD::VST2_UPD: { 4328 if (Subtarget->hasNEON()) { 4329 static const uint16_t DOpcodes[] = { 4330 ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed, 4331 ARM::VST1q64wb_fixed}; 4332 static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed, 4333 ARM::VST2q16PseudoWB_fixed, 4334 ARM::VST2q32PseudoWB_fixed}; 4335 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4336 return; 4337 } 4338 break; 4339 } 4340 4341 case ARMISD::VST3_UPD: { 4342 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD, 4343 ARM::VST3d16Pseudo_UPD, 4344 ARM::VST3d32Pseudo_UPD, 4345 ARM::VST1d64TPseudoWB_fixed}; 4346 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4347 ARM::VST3q16Pseudo_UPD, 4348 ARM::VST3q32Pseudo_UPD }; 4349 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD, 4350 ARM::VST3q16oddPseudo_UPD, 4351 ARM::VST3q32oddPseudo_UPD }; 4352 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4353 return; 4354 } 4355 4356 case ARMISD::VST4_UPD: { 4357 if (Subtarget->hasNEON()) { 4358 static const uint16_t DOpcodes[] = { 4359 ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD, 4360 ARM::VST1d64QPseudoWB_fixed}; 4361 static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD, 4362 ARM::VST4q16Pseudo_UPD, 4363 ARM::VST4q32Pseudo_UPD}; 4364 static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD, 4365 ARM::VST4q16oddPseudo_UPD, 4366 ARM::VST4q32oddPseudo_UPD}; 4367 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4368 return; 4369 } 4370 break; 4371 } 4372 4373 case ARMISD::VST1x2_UPD: { 4374 if (Subtarget->hasNEON()) { 4375 static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed, 4376 ARM::VST1q16wb_fixed, 4377 ARM::VST1q32wb_fixed, 4378 ARM::VST1q64wb_fixed}; 4379 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4380 ARM::VST1d16QPseudoWB_fixed, 4381 ARM::VST1d32QPseudoWB_fixed, 4382 ARM::VST1d64QPseudoWB_fixed }; 4383 SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr); 4384 return; 4385 } 4386 break; 4387 } 4388 4389 case ARMISD::VST1x3_UPD: { 4390 if (Subtarget->hasNEON()) { 4391 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed, 4392 ARM::VST1d16TPseudoWB_fixed, 4393 ARM::VST1d32TPseudoWB_fixed, 4394 ARM::VST1d64TPseudoWB_fixed }; 4395 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4396 ARM::VST1q16LowTPseudo_UPD, 4397 ARM::VST1q32LowTPseudo_UPD, 4398 ARM::VST1q64LowTPseudo_UPD }; 4399 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD, 4400 ARM::VST1q16HighTPseudo_UPD, 4401 ARM::VST1q32HighTPseudo_UPD, 4402 ARM::VST1q64HighTPseudo_UPD }; 4403 SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1); 4404 return; 4405 } 4406 break; 4407 } 4408 4409 case ARMISD::VST1x4_UPD: { 4410 if (Subtarget->hasNEON()) { 4411 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed, 4412 ARM::VST1d16QPseudoWB_fixed, 4413 ARM::VST1d32QPseudoWB_fixed, 4414 ARM::VST1d64QPseudoWB_fixed }; 4415 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4416 ARM::VST1q16LowQPseudo_UPD, 4417 ARM::VST1q32LowQPseudo_UPD, 4418 ARM::VST1q64LowQPseudo_UPD }; 4419 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD, 4420 ARM::VST1q16HighQPseudo_UPD, 4421 ARM::VST1q32HighQPseudo_UPD, 4422 ARM::VST1q64HighQPseudo_UPD }; 4423 SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1); 4424 return; 4425 } 4426 break; 4427 } 4428 case ARMISD::VST2LN_UPD: { 4429 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD, 4430 ARM::VST2LNd16Pseudo_UPD, 4431 ARM::VST2LNd32Pseudo_UPD }; 4432 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD, 4433 ARM::VST2LNq32Pseudo_UPD }; 4434 SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes); 4435 return; 4436 } 4437 4438 case ARMISD::VST3LN_UPD: { 4439 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD, 4440 ARM::VST3LNd16Pseudo_UPD, 4441 ARM::VST3LNd32Pseudo_UPD }; 4442 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD, 4443 ARM::VST3LNq32Pseudo_UPD }; 4444 SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes); 4445 return; 4446 } 4447 4448 case ARMISD::VST4LN_UPD: { 4449 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD, 4450 ARM::VST4LNd16Pseudo_UPD, 4451 ARM::VST4LNd32Pseudo_UPD }; 4452 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD, 4453 ARM::VST4LNq32Pseudo_UPD }; 4454 SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes); 4455 return; 4456 } 4457 4458 case ISD::INTRINSIC_VOID: 4459 case ISD::INTRINSIC_W_CHAIN: { 4460 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 4461 switch (IntNo) { 4462 default: 4463 break; 4464 4465 case Intrinsic::arm_mrrc: 4466 case Intrinsic::arm_mrrc2: { 4467 SDLoc dl(N); 4468 SDValue Chain = N->getOperand(0); 4469 unsigned Opc; 4470 4471 if (Subtarget->isThumb()) 4472 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2); 4473 else 4474 Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2); 4475 4476 SmallVector<SDValue, 5> Ops; 4477 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(), dl)); /* coproc */ 4478 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(), dl)); /* opc */ 4479 Ops.push_back(getI32Imm(cast<ConstantSDNode>(N->getOperand(4))->getZExtValue(), dl)); /* CRm */ 4480 4481 // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded 4482 // instruction will always be '1111' but it is possible in assembly language to specify 4483 // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction. 4484 if (Opc != ARM::MRRC2) { 4485 Ops.push_back(getAL(CurDAG, dl)); 4486 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4487 } 4488 4489 Ops.push_back(Chain); 4490 4491 // Writes to two registers. 4492 const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other}; 4493 4494 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops)); 4495 return; 4496 } 4497 case Intrinsic::arm_ldaexd: 4498 case Intrinsic::arm_ldrexd: { 4499 SDLoc dl(N); 4500 SDValue Chain = N->getOperand(0); 4501 SDValue MemAddr = N->getOperand(2); 4502 bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps(); 4503 4504 bool IsAcquire = IntNo == Intrinsic::arm_ldaexd; 4505 unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD) 4506 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD); 4507 4508 // arm_ldrexd returns a i64 value in {i32, i32} 4509 std::vector<EVT> ResTys; 4510 if (isThumb) { 4511 ResTys.push_back(MVT::i32); 4512 ResTys.push_back(MVT::i32); 4513 } else 4514 ResTys.push_back(MVT::Untyped); 4515 ResTys.push_back(MVT::Other); 4516 4517 // Place arguments in the right order. 4518 SDValue Ops[] = {MemAddr, getAL(CurDAG, dl), 4519 CurDAG->getRegister(0, MVT::i32), Chain}; 4520 SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4521 // Transfer memoperands. 4522 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4523 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 4524 4525 // Remap uses. 4526 SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1); 4527 if (!SDValue(N, 0).use_empty()) { 4528 SDValue Result; 4529 if (isThumb) 4530 Result = SDValue(Ld, 0); 4531 else { 4532 SDValue SubRegIdx = 4533 CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32); 4534 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4535 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4536 Result = SDValue(ResNode,0); 4537 } 4538 ReplaceUses(SDValue(N, 0), Result); 4539 } 4540 if (!SDValue(N, 1).use_empty()) { 4541 SDValue Result; 4542 if (isThumb) 4543 Result = SDValue(Ld, 1); 4544 else { 4545 SDValue SubRegIdx = 4546 CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32); 4547 SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 4548 dl, MVT::i32, SDValue(Ld, 0), SubRegIdx); 4549 Result = SDValue(ResNode,0); 4550 } 4551 ReplaceUses(SDValue(N, 1), Result); 4552 } 4553 ReplaceUses(SDValue(N, 2), OutChain); 4554 CurDAG->RemoveDeadNode(N); 4555 return; 4556 } 4557 case Intrinsic::arm_stlexd: 4558 case Intrinsic::arm_strexd: { 4559 SDLoc dl(N); 4560 SDValue Chain = N->getOperand(0); 4561 SDValue Val0 = N->getOperand(2); 4562 SDValue Val1 = N->getOperand(3); 4563 SDValue MemAddr = N->getOperand(4); 4564 4565 // Store exclusive double return a i32 value which is the return status 4566 // of the issued store. 4567 const EVT ResTys[] = {MVT::i32, MVT::Other}; 4568 4569 bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); 4570 // Place arguments in the right order. 4571 SmallVector<SDValue, 7> Ops; 4572 if (isThumb) { 4573 Ops.push_back(Val0); 4574 Ops.push_back(Val1); 4575 } else 4576 // arm_strexd uses GPRPair. 4577 Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0)); 4578 Ops.push_back(MemAddr); 4579 Ops.push_back(getAL(CurDAG, dl)); 4580 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 4581 Ops.push_back(Chain); 4582 4583 bool IsRelease = IntNo == Intrinsic::arm_stlexd; 4584 unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD) 4585 : (IsRelease ? ARM::STLEXD : ARM::STREXD); 4586 4587 SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops); 4588 // Transfer memoperands. 4589 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 4590 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 4591 4592 ReplaceNode(N, St); 4593 return; 4594 } 4595 4596 case Intrinsic::arm_neon_vld1: { 4597 static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16, 4598 ARM::VLD1d32, ARM::VLD1d64 }; 4599 static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4600 ARM::VLD1q32, ARM::VLD1q64}; 4601 SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr); 4602 return; 4603 } 4604 4605 case Intrinsic::arm_neon_vld1x2: { 4606 static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16, 4607 ARM::VLD1q32, ARM::VLD1q64 }; 4608 static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo, 4609 ARM::VLD1d16QPseudo, 4610 ARM::VLD1d32QPseudo, 4611 ARM::VLD1d64QPseudo }; 4612 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4613 return; 4614 } 4615 4616 case Intrinsic::arm_neon_vld1x3: { 4617 static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo, 4618 ARM::VLD1d16TPseudo, 4619 ARM::VLD1d32TPseudo, 4620 ARM::VLD1d64TPseudo }; 4621 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD, 4622 ARM::VLD1q16LowTPseudo_UPD, 4623 ARM::VLD1q32LowTPseudo_UPD, 4624 ARM::VLD1q64LowTPseudo_UPD }; 4625 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo, 4626 ARM::VLD1q16HighTPseudo, 4627 ARM::VLD1q32HighTPseudo, 4628 ARM::VLD1q64HighTPseudo }; 4629 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4630 return; 4631 } 4632 4633 case Intrinsic::arm_neon_vld1x4: { 4634 static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo, 4635 ARM::VLD1d16QPseudo, 4636 ARM::VLD1d32QPseudo, 4637 ARM::VLD1d64QPseudo }; 4638 static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD, 4639 ARM::VLD1q16LowQPseudo_UPD, 4640 ARM::VLD1q32LowQPseudo_UPD, 4641 ARM::VLD1q64LowQPseudo_UPD }; 4642 static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo, 4643 ARM::VLD1q16HighQPseudo, 4644 ARM::VLD1q32HighQPseudo, 4645 ARM::VLD1q64HighQPseudo }; 4646 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4647 return; 4648 } 4649 4650 case Intrinsic::arm_neon_vld2: { 4651 static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16, 4652 ARM::VLD2d32, ARM::VLD1q64 }; 4653 static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo, 4654 ARM::VLD2q32Pseudo }; 4655 SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr); 4656 return; 4657 } 4658 4659 case Intrinsic::arm_neon_vld3: { 4660 static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo, 4661 ARM::VLD3d16Pseudo, 4662 ARM::VLD3d32Pseudo, 4663 ARM::VLD1d64TPseudo }; 4664 static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD, 4665 ARM::VLD3q16Pseudo_UPD, 4666 ARM::VLD3q32Pseudo_UPD }; 4667 static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo, 4668 ARM::VLD3q16oddPseudo, 4669 ARM::VLD3q32oddPseudo }; 4670 SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4671 return; 4672 } 4673 4674 case Intrinsic::arm_neon_vld4: { 4675 static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo, 4676 ARM::VLD4d16Pseudo, 4677 ARM::VLD4d32Pseudo, 4678 ARM::VLD1d64QPseudo }; 4679 static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD, 4680 ARM::VLD4q16Pseudo_UPD, 4681 ARM::VLD4q32Pseudo_UPD }; 4682 static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo, 4683 ARM::VLD4q16oddPseudo, 4684 ARM::VLD4q32oddPseudo }; 4685 SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4686 return; 4687 } 4688 4689 case Intrinsic::arm_neon_vld2dup: { 4690 static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16, 4691 ARM::VLD2DUPd32, ARM::VLD1q64 }; 4692 static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo, 4693 ARM::VLD2DUPq16EvenPseudo, 4694 ARM::VLD2DUPq32EvenPseudo }; 4695 static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo, 4696 ARM::VLD2DUPq16OddPseudo, 4697 ARM::VLD2DUPq32OddPseudo }; 4698 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2, 4699 DOpcodes, QOpcodes0, QOpcodes1); 4700 return; 4701 } 4702 4703 case Intrinsic::arm_neon_vld3dup: { 4704 static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo, 4705 ARM::VLD3DUPd16Pseudo, 4706 ARM::VLD3DUPd32Pseudo, 4707 ARM::VLD1d64TPseudo }; 4708 static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo, 4709 ARM::VLD3DUPq16EvenPseudo, 4710 ARM::VLD3DUPq32EvenPseudo }; 4711 static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo, 4712 ARM::VLD3DUPq16OddPseudo, 4713 ARM::VLD3DUPq32OddPseudo }; 4714 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3, 4715 DOpcodes, QOpcodes0, QOpcodes1); 4716 return; 4717 } 4718 4719 case Intrinsic::arm_neon_vld4dup: { 4720 static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo, 4721 ARM::VLD4DUPd16Pseudo, 4722 ARM::VLD4DUPd32Pseudo, 4723 ARM::VLD1d64QPseudo }; 4724 static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo, 4725 ARM::VLD4DUPq16EvenPseudo, 4726 ARM::VLD4DUPq32EvenPseudo }; 4727 static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo, 4728 ARM::VLD4DUPq16OddPseudo, 4729 ARM::VLD4DUPq32OddPseudo }; 4730 SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4, 4731 DOpcodes, QOpcodes0, QOpcodes1); 4732 return; 4733 } 4734 4735 case Intrinsic::arm_neon_vld2lane: { 4736 static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo, 4737 ARM::VLD2LNd16Pseudo, 4738 ARM::VLD2LNd32Pseudo }; 4739 static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo, 4740 ARM::VLD2LNq32Pseudo }; 4741 SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes); 4742 return; 4743 } 4744 4745 case Intrinsic::arm_neon_vld3lane: { 4746 static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo, 4747 ARM::VLD3LNd16Pseudo, 4748 ARM::VLD3LNd32Pseudo }; 4749 static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo, 4750 ARM::VLD3LNq32Pseudo }; 4751 SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes); 4752 return; 4753 } 4754 4755 case Intrinsic::arm_neon_vld4lane: { 4756 static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo, 4757 ARM::VLD4LNd16Pseudo, 4758 ARM::VLD4LNd32Pseudo }; 4759 static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo, 4760 ARM::VLD4LNq32Pseudo }; 4761 SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes); 4762 return; 4763 } 4764 4765 case Intrinsic::arm_neon_vst1: { 4766 static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16, 4767 ARM::VST1d32, ARM::VST1d64 }; 4768 static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4769 ARM::VST1q32, ARM::VST1q64 }; 4770 SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr); 4771 return; 4772 } 4773 4774 case Intrinsic::arm_neon_vst1x2: { 4775 static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16, 4776 ARM::VST1q32, ARM::VST1q64 }; 4777 static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo, 4778 ARM::VST1d16QPseudo, 4779 ARM::VST1d32QPseudo, 4780 ARM::VST1d64QPseudo }; 4781 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4782 return; 4783 } 4784 4785 case Intrinsic::arm_neon_vst1x3: { 4786 static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo, 4787 ARM::VST1d16TPseudo, 4788 ARM::VST1d32TPseudo, 4789 ARM::VST1d64TPseudo }; 4790 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD, 4791 ARM::VST1q16LowTPseudo_UPD, 4792 ARM::VST1q32LowTPseudo_UPD, 4793 ARM::VST1q64LowTPseudo_UPD }; 4794 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo, 4795 ARM::VST1q16HighTPseudo, 4796 ARM::VST1q32HighTPseudo, 4797 ARM::VST1q64HighTPseudo }; 4798 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4799 return; 4800 } 4801 4802 case Intrinsic::arm_neon_vst1x4: { 4803 static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo, 4804 ARM::VST1d16QPseudo, 4805 ARM::VST1d32QPseudo, 4806 ARM::VST1d64QPseudo }; 4807 static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD, 4808 ARM::VST1q16LowQPseudo_UPD, 4809 ARM::VST1q32LowQPseudo_UPD, 4810 ARM::VST1q64LowQPseudo_UPD }; 4811 static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo, 4812 ARM::VST1q16HighQPseudo, 4813 ARM::VST1q32HighQPseudo, 4814 ARM::VST1q64HighQPseudo }; 4815 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4816 return; 4817 } 4818 4819 case Intrinsic::arm_neon_vst2: { 4820 static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16, 4821 ARM::VST2d32, ARM::VST1q64 }; 4822 static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo, 4823 ARM::VST2q32Pseudo }; 4824 SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr); 4825 return; 4826 } 4827 4828 case Intrinsic::arm_neon_vst3: { 4829 static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo, 4830 ARM::VST3d16Pseudo, 4831 ARM::VST3d32Pseudo, 4832 ARM::VST1d64TPseudo }; 4833 static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD, 4834 ARM::VST3q16Pseudo_UPD, 4835 ARM::VST3q32Pseudo_UPD }; 4836 static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo, 4837 ARM::VST3q16oddPseudo, 4838 ARM::VST3q32oddPseudo }; 4839 SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1); 4840 return; 4841 } 4842 4843 case Intrinsic::arm_neon_vst4: { 4844 static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo, 4845 ARM::VST4d16Pseudo, 4846 ARM::VST4d32Pseudo, 4847 ARM::VST1d64QPseudo }; 4848 static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD, 4849 ARM::VST4q16Pseudo_UPD, 4850 ARM::VST4q32Pseudo_UPD }; 4851 static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo, 4852 ARM::VST4q16oddPseudo, 4853 ARM::VST4q32oddPseudo }; 4854 SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1); 4855 return; 4856 } 4857 4858 case Intrinsic::arm_neon_vst2lane: { 4859 static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo, 4860 ARM::VST2LNd16Pseudo, 4861 ARM::VST2LNd32Pseudo }; 4862 static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo, 4863 ARM::VST2LNq32Pseudo }; 4864 SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes); 4865 return; 4866 } 4867 4868 case Intrinsic::arm_neon_vst3lane: { 4869 static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo, 4870 ARM::VST3LNd16Pseudo, 4871 ARM::VST3LNd32Pseudo }; 4872 static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo, 4873 ARM::VST3LNq32Pseudo }; 4874 SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes); 4875 return; 4876 } 4877 4878 case Intrinsic::arm_neon_vst4lane: { 4879 static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo, 4880 ARM::VST4LNd16Pseudo, 4881 ARM::VST4LNd32Pseudo }; 4882 static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo, 4883 ARM::VST4LNq32Pseudo }; 4884 SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes); 4885 return; 4886 } 4887 4888 case Intrinsic::arm_mve_vldr_gather_base_wb: 4889 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: { 4890 static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre, 4891 ARM::MVE_VLDRDU64_qi_pre}; 4892 SelectMVE_WB(N, Opcodes, 4893 IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated); 4894 return; 4895 } 4896 4897 case Intrinsic::arm_mve_vld2q: { 4898 static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8}; 4899 static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16, 4900 ARM::MVE_VLD21_16}; 4901 static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32, 4902 ARM::MVE_VLD21_32}; 4903 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4904 SelectMVE_VLD(N, 2, Opcodes, false); 4905 return; 4906 } 4907 4908 case Intrinsic::arm_mve_vld4q: { 4909 static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8, 4910 ARM::MVE_VLD42_8, ARM::MVE_VLD43_8}; 4911 static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16, 4912 ARM::MVE_VLD42_16, 4913 ARM::MVE_VLD43_16}; 4914 static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32, 4915 ARM::MVE_VLD42_32, 4916 ARM::MVE_VLD43_32}; 4917 static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32}; 4918 SelectMVE_VLD(N, 4, Opcodes, false); 4919 return; 4920 } 4921 } 4922 break; 4923 } 4924 4925 case ISD::INTRINSIC_WO_CHAIN: { 4926 unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue(); 4927 switch (IntNo) { 4928 default: 4929 break; 4930 4931 // Scalar f32 -> bf16 4932 case Intrinsic::arm_neon_vcvtbfp2bf: { 4933 SDLoc dl(N); 4934 const SDValue &Src = N->getOperand(1); 4935 llvm::EVT DestTy = N->getValueType(0); 4936 SDValue Pred = getAL(CurDAG, dl); 4937 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 4938 SDValue Ops[] = { Src, Src, Pred, Reg0 }; 4939 CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops); 4940 return; 4941 } 4942 4943 // Vector v4f32 -> v4bf16 4944 case Intrinsic::arm_neon_vcvtfp2bf: { 4945 SDLoc dl(N); 4946 const SDValue &Src = N->getOperand(1); 4947 SDValue Pred = getAL(CurDAG, dl); 4948 SDValue Reg0 = CurDAG->getRegister(0, MVT::i32); 4949 SDValue Ops[] = { Src, Pred, Reg0 }; 4950 CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops); 4951 return; 4952 } 4953 4954 case Intrinsic::arm_mve_urshrl: 4955 SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false); 4956 return; 4957 case Intrinsic::arm_mve_uqshll: 4958 SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false); 4959 return; 4960 case Intrinsic::arm_mve_srshrl: 4961 SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false); 4962 return; 4963 case Intrinsic::arm_mve_sqshll: 4964 SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false); 4965 return; 4966 case Intrinsic::arm_mve_uqrshll: 4967 SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true); 4968 return; 4969 case Intrinsic::arm_mve_sqrshrl: 4970 SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true); 4971 return; 4972 4973 case Intrinsic::arm_mve_vadc: 4974 case Intrinsic::arm_mve_vadc_predicated: 4975 SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true, 4976 IntNo == Intrinsic::arm_mve_vadc_predicated); 4977 return; 4978 case Intrinsic::arm_mve_vsbc: 4979 case Intrinsic::arm_mve_vsbc_predicated: 4980 SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, true, 4981 IntNo == Intrinsic::arm_mve_vsbc_predicated); 4982 return; 4983 case Intrinsic::arm_mve_vshlc: 4984 case Intrinsic::arm_mve_vshlc_predicated: 4985 SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated); 4986 return; 4987 4988 case Intrinsic::arm_mve_vmlldava: 4989 case Intrinsic::arm_mve_vmlldava_predicated: { 4990 static const uint16_t OpcodesU[] = { 4991 ARM::MVE_VMLALDAVu16, ARM::MVE_VMLALDAVu32, 4992 ARM::MVE_VMLALDAVau16, ARM::MVE_VMLALDAVau32, 4993 }; 4994 static const uint16_t OpcodesS[] = { 4995 ARM::MVE_VMLALDAVs16, ARM::MVE_VMLALDAVs32, 4996 ARM::MVE_VMLALDAVas16, ARM::MVE_VMLALDAVas32, 4997 ARM::MVE_VMLALDAVxs16, ARM::MVE_VMLALDAVxs32, 4998 ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32, 4999 ARM::MVE_VMLSLDAVs16, ARM::MVE_VMLSLDAVs32, 5000 ARM::MVE_VMLSLDAVas16, ARM::MVE_VMLSLDAVas32, 5001 ARM::MVE_VMLSLDAVxs16, ARM::MVE_VMLSLDAVxs32, 5002 ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32, 5003 }; 5004 SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated, 5005 OpcodesS, OpcodesU); 5006 return; 5007 } 5008 5009 case Intrinsic::arm_mve_vrmlldavha: 5010 case Intrinsic::arm_mve_vrmlldavha_predicated: { 5011 static const uint16_t OpcodesU[] = { 5012 ARM::MVE_VRMLALDAVHu32, ARM::MVE_VRMLALDAVHau32, 5013 }; 5014 static const uint16_t OpcodesS[] = { 5015 ARM::MVE_VRMLALDAVHs32, ARM::MVE_VRMLALDAVHas32, 5016 ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32, 5017 ARM::MVE_VRMLSLDAVHs32, ARM::MVE_VRMLSLDAVHas32, 5018 ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32, 5019 }; 5020 SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated, 5021 OpcodesS, OpcodesU); 5022 return; 5023 } 5024 5025 case Intrinsic::arm_mve_vidup: 5026 case Intrinsic::arm_mve_vidup_predicated: { 5027 static const uint16_t Opcodes[] = { 5028 ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32, 5029 }; 5030 SelectMVE_VxDUP(N, Opcodes, false, 5031 IntNo == Intrinsic::arm_mve_vidup_predicated); 5032 return; 5033 } 5034 5035 case Intrinsic::arm_mve_vddup: 5036 case Intrinsic::arm_mve_vddup_predicated: { 5037 static const uint16_t Opcodes[] = { 5038 ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32, 5039 }; 5040 SelectMVE_VxDUP(N, Opcodes, false, 5041 IntNo == Intrinsic::arm_mve_vddup_predicated); 5042 return; 5043 } 5044 5045 case Intrinsic::arm_mve_viwdup: 5046 case Intrinsic::arm_mve_viwdup_predicated: { 5047 static const uint16_t Opcodes[] = { 5048 ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32, 5049 }; 5050 SelectMVE_VxDUP(N, Opcodes, true, 5051 IntNo == Intrinsic::arm_mve_viwdup_predicated); 5052 return; 5053 } 5054 5055 case Intrinsic::arm_mve_vdwdup: 5056 case Intrinsic::arm_mve_vdwdup_predicated: { 5057 static const uint16_t Opcodes[] = { 5058 ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32, 5059 }; 5060 SelectMVE_VxDUP(N, Opcodes, true, 5061 IntNo == Intrinsic::arm_mve_vdwdup_predicated); 5062 return; 5063 } 5064 5065 case Intrinsic::arm_cde_cx1d: 5066 case Intrinsic::arm_cde_cx1da: 5067 case Intrinsic::arm_cde_cx2d: 5068 case Intrinsic::arm_cde_cx2da: 5069 case Intrinsic::arm_cde_cx3d: 5070 case Intrinsic::arm_cde_cx3da: { 5071 bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da || 5072 IntNo == Intrinsic::arm_cde_cx2da || 5073 IntNo == Intrinsic::arm_cde_cx3da; 5074 size_t NumExtraOps; 5075 uint16_t Opcode; 5076 switch (IntNo) { 5077 case Intrinsic::arm_cde_cx1d: 5078 case Intrinsic::arm_cde_cx1da: 5079 NumExtraOps = 0; 5080 Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D; 5081 break; 5082 case Intrinsic::arm_cde_cx2d: 5083 case Intrinsic::arm_cde_cx2da: 5084 NumExtraOps = 1; 5085 Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D; 5086 break; 5087 case Intrinsic::arm_cde_cx3d: 5088 case Intrinsic::arm_cde_cx3da: 5089 NumExtraOps = 2; 5090 Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D; 5091 break; 5092 default: 5093 llvm_unreachable("Unexpected opcode"); 5094 } 5095 SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum); 5096 return; 5097 } 5098 } 5099 break; 5100 } 5101 5102 case ISD::ATOMIC_CMP_SWAP: 5103 SelectCMP_SWAP(N); 5104 return; 5105 } 5106 5107 SelectCode(N); 5108 } 5109 5110 // Inspect a register string of the form 5111 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or 5112 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string 5113 // and obtain the integer operands from them, adding these operands to the 5114 // provided vector. 5115 static void getIntOperandsFromRegisterString(StringRef RegString, 5116 SelectionDAG *CurDAG, 5117 const SDLoc &DL, 5118 std::vector<SDValue> &Ops) { 5119 SmallVector<StringRef, 5> Fields; 5120 RegString.split(Fields, ':'); 5121 5122 if (Fields.size() > 1) { 5123 bool AllIntFields = true; 5124 5125 for (StringRef Field : Fields) { 5126 // Need to trim out leading 'cp' characters and get the integer field. 5127 unsigned IntField; 5128 AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField); 5129 Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32)); 5130 } 5131 5132 assert(AllIntFields && 5133 "Unexpected non-integer value in special register string."); 5134 } 5135 } 5136 5137 // Maps a Banked Register string to its mask value. The mask value returned is 5138 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register 5139 // mask operand, which expresses which register is to be used, e.g. r8, and in 5140 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string 5141 // was invalid. 5142 static inline int getBankedRegisterMask(StringRef RegString) { 5143 auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower()); 5144 if (!TheReg) 5145 return -1; 5146 return TheReg->Encoding; 5147 } 5148 5149 // The flags here are common to those allowed for apsr in the A class cores and 5150 // those allowed for the special registers in the M class cores. Returns a 5151 // value representing which flags were present, -1 if invalid. 5152 static inline int getMClassFlagsMask(StringRef Flags) { 5153 return StringSwitch<int>(Flags) 5154 .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is 5155 // correct when flags are not permitted 5156 .Case("g", 0x1) 5157 .Case("nzcvq", 0x2) 5158 .Case("nzcvqg", 0x3) 5159 .Default(-1); 5160 } 5161 5162 // Maps MClass special registers string to its value for use in the 5163 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand. 5164 // Returns -1 to signify that the string was invalid. 5165 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) { 5166 auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg); 5167 const FeatureBitset &FeatureBits = Subtarget->getFeatureBits(); 5168 if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits)) 5169 return -1; 5170 return (int)(TheReg->Encoding & 0xFFF); // SYSm value 5171 } 5172 5173 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) { 5174 // The mask operand contains the special register (R Bit) in bit 4, whether 5175 // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and 5176 // bits 3-0 contains the fields to be accessed in the special register, set by 5177 // the flags provided with the register. 5178 int Mask = 0; 5179 if (Reg == "apsr") { 5180 // The flags permitted for apsr are the same flags that are allowed in 5181 // M class registers. We get the flag value and then shift the flags into 5182 // the correct place to combine with the mask. 5183 Mask = getMClassFlagsMask(Flags); 5184 if (Mask == -1) 5185 return -1; 5186 return Mask << 2; 5187 } 5188 5189 if (Reg != "cpsr" && Reg != "spsr") { 5190 return -1; 5191 } 5192 5193 // This is the same as if the flags were "fc" 5194 if (Flags.empty() || Flags == "all") 5195 return Mask | 0x9; 5196 5197 // Inspect the supplied flags string and set the bits in the mask for 5198 // the relevant and valid flags allowed for cpsr and spsr. 5199 for (char Flag : Flags) { 5200 int FlagVal; 5201 switch (Flag) { 5202 case 'c': 5203 FlagVal = 0x1; 5204 break; 5205 case 'x': 5206 FlagVal = 0x2; 5207 break; 5208 case 's': 5209 FlagVal = 0x4; 5210 break; 5211 case 'f': 5212 FlagVal = 0x8; 5213 break; 5214 default: 5215 FlagVal = 0; 5216 } 5217 5218 // This avoids allowing strings where the same flag bit appears twice. 5219 if (!FlagVal || (Mask & FlagVal)) 5220 return -1; 5221 Mask |= FlagVal; 5222 } 5223 5224 // If the register is spsr then we need to set the R bit. 5225 if (Reg == "spsr") 5226 Mask |= 0x10; 5227 5228 return Mask; 5229 } 5230 5231 // Lower the read_register intrinsic to ARM specific DAG nodes 5232 // using the supplied metadata string to select the instruction node to use 5233 // and the registers/masks to construct as operands for the node. 5234 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){ 5235 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5236 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5237 bool IsThumb2 = Subtarget->isThumb2(); 5238 SDLoc DL(N); 5239 5240 std::vector<SDValue> Ops; 5241 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5242 5243 if (!Ops.empty()) { 5244 // If the special register string was constructed of fields (as defined 5245 // in the ACLE) then need to lower to MRC node (32 bit) or 5246 // MRRC node(64 bit), we can make the distinction based on the number of 5247 // operands we have. 5248 unsigned Opcode; 5249 SmallVector<EVT, 3> ResTypes; 5250 if (Ops.size() == 5){ 5251 Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC; 5252 ResTypes.append({ MVT::i32, MVT::Other }); 5253 } else { 5254 assert(Ops.size() == 3 && 5255 "Invalid number of fields in special register string."); 5256 Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC; 5257 ResTypes.append({ MVT::i32, MVT::i32, MVT::Other }); 5258 } 5259 5260 Ops.push_back(getAL(CurDAG, DL)); 5261 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5262 Ops.push_back(N->getOperand(0)); 5263 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops)); 5264 return true; 5265 } 5266 5267 std::string SpecialReg = RegString->getString().lower(); 5268 5269 int BankedReg = getBankedRegisterMask(SpecialReg); 5270 if (BankedReg != -1) { 5271 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), 5272 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5273 N->getOperand(0) }; 5274 ReplaceNode( 5275 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked, 5276 DL, MVT::i32, MVT::Other, Ops)); 5277 return true; 5278 } 5279 5280 // The VFP registers are read by creating SelectionDAG nodes with opcodes 5281 // corresponding to the register that is being read from. So we switch on the 5282 // string to find which opcode we need to use. 5283 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5284 .Case("fpscr", ARM::VMRS) 5285 .Case("fpexc", ARM::VMRS_FPEXC) 5286 .Case("fpsid", ARM::VMRS_FPSID) 5287 .Case("mvfr0", ARM::VMRS_MVFR0) 5288 .Case("mvfr1", ARM::VMRS_MVFR1) 5289 .Case("mvfr2", ARM::VMRS_MVFR2) 5290 .Case("fpinst", ARM::VMRS_FPINST) 5291 .Case("fpinst2", ARM::VMRS_FPINST2) 5292 .Default(0); 5293 5294 // If an opcode was found then we can lower the read to a VFP instruction. 5295 if (Opcode) { 5296 if (!Subtarget->hasVFP2Base()) 5297 return false; 5298 if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base()) 5299 return false; 5300 5301 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5302 N->getOperand(0) }; 5303 ReplaceNode(N, 5304 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops)); 5305 return true; 5306 } 5307 5308 // If the target is M Class then need to validate that the register string 5309 // is an acceptable value, so check that a mask can be constructed from the 5310 // string. 5311 if (Subtarget->isMClass()) { 5312 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5313 if (SYSmValue == -1) 5314 return false; 5315 5316 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5317 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5318 N->getOperand(0) }; 5319 ReplaceNode( 5320 N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops)); 5321 return true; 5322 } 5323 5324 // Here we know the target is not M Class so we need to check if it is one 5325 // of the remaining possible values which are apsr, cpsr or spsr. 5326 if (SpecialReg == "apsr" || SpecialReg == "cpsr") { 5327 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5328 N->getOperand(0) }; 5329 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS, 5330 DL, MVT::i32, MVT::Other, Ops)); 5331 return true; 5332 } 5333 5334 if (SpecialReg == "spsr") { 5335 Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5336 N->getOperand(0) }; 5337 ReplaceNode( 5338 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL, 5339 MVT::i32, MVT::Other, Ops)); 5340 return true; 5341 } 5342 5343 return false; 5344 } 5345 5346 // Lower the write_register intrinsic to ARM specific DAG nodes 5347 // using the supplied metadata string to select the instruction node to use 5348 // and the registers/masks to use in the nodes 5349 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){ 5350 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 5351 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 5352 bool IsThumb2 = Subtarget->isThumb2(); 5353 SDLoc DL(N); 5354 5355 std::vector<SDValue> Ops; 5356 getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops); 5357 5358 if (!Ops.empty()) { 5359 // If the special register string was constructed of fields (as defined 5360 // in the ACLE) then need to lower to MCR node (32 bit) or 5361 // MCRR node(64 bit), we can make the distinction based on the number of 5362 // operands we have. 5363 unsigned Opcode; 5364 if (Ops.size() == 5) { 5365 Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR; 5366 Ops.insert(Ops.begin()+2, N->getOperand(2)); 5367 } else { 5368 assert(Ops.size() == 3 && 5369 "Invalid number of fields in special register string."); 5370 Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR; 5371 SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) }; 5372 Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2); 5373 } 5374 5375 Ops.push_back(getAL(CurDAG, DL)); 5376 Ops.push_back(CurDAG->getRegister(0, MVT::i32)); 5377 Ops.push_back(N->getOperand(0)); 5378 5379 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5380 return true; 5381 } 5382 5383 std::string SpecialReg = RegString->getString().lower(); 5384 int BankedReg = getBankedRegisterMask(SpecialReg); 5385 if (BankedReg != -1) { 5386 Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2), 5387 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5388 N->getOperand(0) }; 5389 ReplaceNode( 5390 N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked, 5391 DL, MVT::Other, Ops)); 5392 return true; 5393 } 5394 5395 // The VFP registers are written to by creating SelectionDAG nodes with 5396 // opcodes corresponding to the register that is being written. So we switch 5397 // on the string to find which opcode we need to use. 5398 unsigned Opcode = StringSwitch<unsigned>(SpecialReg) 5399 .Case("fpscr", ARM::VMSR) 5400 .Case("fpexc", ARM::VMSR_FPEXC) 5401 .Case("fpsid", ARM::VMSR_FPSID) 5402 .Case("fpinst", ARM::VMSR_FPINST) 5403 .Case("fpinst2", ARM::VMSR_FPINST2) 5404 .Default(0); 5405 5406 if (Opcode) { 5407 if (!Subtarget->hasVFP2Base()) 5408 return false; 5409 Ops = { N->getOperand(2), getAL(CurDAG, DL), 5410 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5411 ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops)); 5412 return true; 5413 } 5414 5415 std::pair<StringRef, StringRef> Fields; 5416 Fields = StringRef(SpecialReg).rsplit('_'); 5417 std::string Reg = Fields.first.str(); 5418 StringRef Flags = Fields.second; 5419 5420 // If the target was M Class then need to validate the special register value 5421 // and retrieve the mask for use in the instruction node. 5422 if (Subtarget->isMClass()) { 5423 int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget); 5424 if (SYSmValue == -1) 5425 return false; 5426 5427 SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32), 5428 N->getOperand(2), getAL(CurDAG, DL), 5429 CurDAG->getRegister(0, MVT::i32), N->getOperand(0) }; 5430 ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops)); 5431 return true; 5432 } 5433 5434 // We then check to see if a valid mask can be constructed for one of the 5435 // register string values permitted for the A and R class cores. These values 5436 // are apsr, spsr and cpsr; these are also valid on older cores. 5437 int Mask = getARClassRegisterMask(Reg, Flags); 5438 if (Mask != -1) { 5439 Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2), 5440 getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32), 5441 N->getOperand(0) }; 5442 ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR, 5443 DL, MVT::Other, Ops)); 5444 return true; 5445 } 5446 5447 return false; 5448 } 5449 5450 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){ 5451 std::vector<SDValue> AsmNodeOperands; 5452 unsigned Flag, Kind; 5453 bool Changed = false; 5454 unsigned NumOps = N->getNumOperands(); 5455 5456 // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. 5457 // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require 5458 // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs 5459 // respectively. Since there is no constraint to explicitly specify a 5460 // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, 5461 // the 64-bit data may be referred by H, Q, R modifiers, so we still pack 5462 // them into a GPRPair. 5463 5464 SDLoc dl(N); 5465 SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) 5466 : SDValue(nullptr,0); 5467 5468 SmallVector<bool, 8> OpChanged; 5469 // Glue node will be appended late. 5470 for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { 5471 SDValue op = N->getOperand(i); 5472 AsmNodeOperands.push_back(op); 5473 5474 if (i < InlineAsm::Op_FirstOperand) 5475 continue; 5476 5477 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(i))) { 5478 Flag = C->getZExtValue(); 5479 Kind = InlineAsm::getKind(Flag); 5480 } 5481 else 5482 continue; 5483 5484 // Immediate operands to inline asm in the SelectionDAG are modeled with 5485 // two operands. The first is a constant of value InlineAsm::Kind_Imm, and 5486 // the second is a constant with the value of the immediate. If we get here 5487 // and we have a Kind_Imm, skip the next operand, and continue. 5488 if (Kind == InlineAsm::Kind_Imm) { 5489 SDValue op = N->getOperand(++i); 5490 AsmNodeOperands.push_back(op); 5491 continue; 5492 } 5493 5494 unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); 5495 if (NumRegs) 5496 OpChanged.push_back(false); 5497 5498 unsigned DefIdx = 0; 5499 bool IsTiedToChangedOp = false; 5500 // If it's a use that is tied with a previous def, it has no 5501 // reg class constraint. 5502 if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) 5503 IsTiedToChangedOp = OpChanged[DefIdx]; 5504 5505 // Memory operands to inline asm in the SelectionDAG are modeled with two 5506 // operands: a constant of value InlineAsm::Kind_Mem followed by the input 5507 // operand. If we get here and we have a Kind_Mem, skip the next operand (so 5508 // it doesn't get misinterpreted), and continue. We do this here because 5509 // it's important to update the OpChanged array correctly before moving on. 5510 if (Kind == InlineAsm::Kind_Mem) { 5511 SDValue op = N->getOperand(++i); 5512 AsmNodeOperands.push_back(op); 5513 continue; 5514 } 5515 5516 if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef 5517 && Kind != InlineAsm::Kind_RegDefEarlyClobber) 5518 continue; 5519 5520 unsigned RC; 5521 bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); 5522 if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) 5523 || NumRegs != 2) 5524 continue; 5525 5526 assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); 5527 SDValue V0 = N->getOperand(i+1); 5528 SDValue V1 = N->getOperand(i+2); 5529 unsigned Reg0 = cast<RegisterSDNode>(V0)->getReg(); 5530 unsigned Reg1 = cast<RegisterSDNode>(V1)->getReg(); 5531 SDValue PairedReg; 5532 MachineRegisterInfo &MRI = MF->getRegInfo(); 5533 5534 if (Kind == InlineAsm::Kind_RegDef || 5535 Kind == InlineAsm::Kind_RegDefEarlyClobber) { 5536 // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to 5537 // the original GPRs. 5538 5539 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5540 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5541 SDValue Chain = SDValue(N,0); 5542 5543 SDNode *GU = N->getGluedUser(); 5544 SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped, 5545 Chain.getValue(1)); 5546 5547 // Extract values from a GPRPair reg and copy to the original GPR reg. 5548 SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32, 5549 RegCopy); 5550 SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32, 5551 RegCopy); 5552 SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0, 5553 RegCopy.getValue(1)); 5554 SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1)); 5555 5556 // Update the original glue user. 5557 std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1); 5558 Ops.push_back(T1.getValue(1)); 5559 CurDAG->UpdateNodeOperands(GU, Ops); 5560 } 5561 else { 5562 // For Kind == InlineAsm::Kind_RegUse, we first copy two GPRs into a 5563 // GPRPair and then pass the GPRPair to the inline asm. 5564 SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain]; 5565 5566 // As REG_SEQ doesn't take RegisterSDNode, we copy them first. 5567 SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32, 5568 Chain.getValue(1)); 5569 SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32, 5570 T0.getValue(1)); 5571 SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0); 5572 5573 // Copy REG_SEQ into a GPRPair-typed VR and replace the original two 5574 // i32 VRs of inline asm with it. 5575 Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass); 5576 PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped); 5577 Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1)); 5578 5579 AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; 5580 Glue = Chain.getValue(1); 5581 } 5582 5583 Changed = true; 5584 5585 if(PairedReg.getNode()) { 5586 OpChanged[OpChanged.size() -1 ] = true; 5587 Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); 5588 if (IsTiedToChangedOp) 5589 Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); 5590 else 5591 Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); 5592 // Replace the current flag. 5593 AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( 5594 Flag, dl, MVT::i32); 5595 // Add the new register node and skip the original two GPRs. 5596 AsmNodeOperands.push_back(PairedReg); 5597 // Skip the next two GPRs. 5598 i += 2; 5599 } 5600 } 5601 5602 if (Glue.getNode()) 5603 AsmNodeOperands.push_back(Glue); 5604 if (!Changed) 5605 return false; 5606 5607 SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N), 5608 CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands); 5609 New->setNodeId(-1); 5610 ReplaceNode(N, New.getNode()); 5611 return true; 5612 } 5613 5614 5615 bool ARMDAGToDAGISel:: 5616 SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, 5617 std::vector<SDValue> &OutOps) { 5618 switch(ConstraintID) { 5619 default: 5620 llvm_unreachable("Unexpected asm memory constraint"); 5621 case InlineAsm::Constraint_m: 5622 case InlineAsm::Constraint_o: 5623 case InlineAsm::Constraint_Q: 5624 case InlineAsm::Constraint_Um: 5625 case InlineAsm::Constraint_Un: 5626 case InlineAsm::Constraint_Uq: 5627 case InlineAsm::Constraint_Us: 5628 case InlineAsm::Constraint_Ut: 5629 case InlineAsm::Constraint_Uv: 5630 case InlineAsm::Constraint_Uy: 5631 // Require the address to be in a register. That is safe for all ARM 5632 // variants and it is hard to do anything much smarter without knowing 5633 // how the operand is used. 5634 OutOps.push_back(Op); 5635 return false; 5636 } 5637 return true; 5638 } 5639 5640 /// createARMISelDag - This pass converts a legalized DAG into a 5641 /// ARM-specific DAG, ready for instruction scheduling. 5642 /// 5643 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM, 5644 CodeGenOpt::Level OptLevel) { 5645 return new ARMDAGToDAGISel(TM, OptLevel); 5646 } 5647