1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an instruction selector for the AArch64 target. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64MachineFunctionInfo.h" 14 #include "AArch64TargetMachine.h" 15 #include "MCTargetDesc/AArch64AddressingModes.h" 16 #include "llvm/ADT/APSInt.h" 17 #include "llvm/CodeGen/SelectionDAGISel.h" 18 #include "llvm/IR/Function.h" // To access function attributes. 19 #include "llvm/IR/GlobalValue.h" 20 #include "llvm/IR/Intrinsics.h" 21 #include "llvm/IR/IntrinsicsAArch64.h" 22 #include "llvm/Support/Debug.h" 23 #include "llvm/Support/ErrorHandling.h" 24 #include "llvm/Support/KnownBits.h" 25 #include "llvm/Support/MathExtras.h" 26 #include "llvm/Support/raw_ostream.h" 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "aarch64-isel" 31 32 //===--------------------------------------------------------------------===// 33 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine 34 /// instructions for SelectionDAG operations. 35 /// 36 namespace { 37 38 class AArch64DAGToDAGISel : public SelectionDAGISel { 39 40 /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can 41 /// make the right decision when generating code for different targets. 42 const AArch64Subtarget *Subtarget; 43 44 public: 45 explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, 46 CodeGenOpt::Level OptLevel) 47 : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {} 48 49 StringRef getPassName() const override { 50 return "AArch64 Instruction Selection"; 51 } 52 53 bool runOnMachineFunction(MachineFunction &MF) override { 54 Subtarget = &MF.getSubtarget<AArch64Subtarget>(); 55 return SelectionDAGISel::runOnMachineFunction(MF); 56 } 57 58 void Select(SDNode *Node) override; 59 60 /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for 61 /// inline asm expressions. 62 bool SelectInlineAsmMemoryOperand(const SDValue &Op, 63 unsigned ConstraintID, 64 std::vector<SDValue> &OutOps) override; 65 66 template <signed Low, signed High, signed Scale> 67 bool SelectRDVLImm(SDValue N, SDValue &Imm); 68 69 bool tryMLAV64LaneV128(SDNode *N); 70 bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N); 71 bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift); 72 bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 73 bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift); 74 bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 75 return SelectShiftedRegister(N, false, Reg, Shift); 76 } 77 bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) { 78 return SelectShiftedRegister(N, true, Reg, Shift); 79 } 80 bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) { 81 return SelectAddrModeIndexed7S(N, 1, Base, OffImm); 82 } 83 bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) { 84 return SelectAddrModeIndexed7S(N, 2, Base, OffImm); 85 } 86 bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) { 87 return SelectAddrModeIndexed7S(N, 4, Base, OffImm); 88 } 89 bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) { 90 return SelectAddrModeIndexed7S(N, 8, Base, OffImm); 91 } 92 bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) { 93 return SelectAddrModeIndexed7S(N, 16, Base, OffImm); 94 } 95 bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) { 96 return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm); 97 } 98 bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) { 99 return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm); 100 } 101 bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) { 102 return SelectAddrModeIndexed(N, 1, Base, OffImm); 103 } 104 bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) { 105 return SelectAddrModeIndexed(N, 2, Base, OffImm); 106 } 107 bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) { 108 return SelectAddrModeIndexed(N, 4, Base, OffImm); 109 } 110 bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) { 111 return SelectAddrModeIndexed(N, 8, Base, OffImm); 112 } 113 bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) { 114 return SelectAddrModeIndexed(N, 16, Base, OffImm); 115 } 116 bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) { 117 return SelectAddrModeUnscaled(N, 1, Base, OffImm); 118 } 119 bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) { 120 return SelectAddrModeUnscaled(N, 2, Base, OffImm); 121 } 122 bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) { 123 return SelectAddrModeUnscaled(N, 4, Base, OffImm); 124 } 125 bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) { 126 return SelectAddrModeUnscaled(N, 8, Base, OffImm); 127 } 128 bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) { 129 return SelectAddrModeUnscaled(N, 16, Base, OffImm); 130 } 131 132 template<int Width> 133 bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset, 134 SDValue &SignExtend, SDValue &DoShift) { 135 return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 136 } 137 138 template<int Width> 139 bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset, 140 SDValue &SignExtend, SDValue &DoShift) { 141 return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift); 142 } 143 144 bool SelectDupZeroOrUndef(SDValue N) { 145 switch(N->getOpcode()) { 146 case ISD::UNDEF: 147 return true; 148 case AArch64ISD::DUP: 149 case ISD::SPLAT_VECTOR: { 150 auto Opnd0 = N->getOperand(0); 151 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 152 if (CN->isNullValue()) 153 return true; 154 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 155 if (CN->isZero()) 156 return true; 157 break; 158 } 159 default: 160 break; 161 } 162 163 return false; 164 } 165 166 bool SelectDupZero(SDValue N) { 167 switch(N->getOpcode()) { 168 case AArch64ISD::DUP: 169 case ISD::SPLAT_VECTOR: { 170 auto Opnd0 = N->getOperand(0); 171 if (auto CN = dyn_cast<ConstantSDNode>(Opnd0)) 172 if (CN->isNullValue()) 173 return true; 174 if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0)) 175 if (CN->isZero()) 176 return true; 177 break; 178 } 179 } 180 181 return false; 182 } 183 184 template<MVT::SimpleValueType VT> 185 bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) { 186 return SelectSVEAddSubImm(N, VT, Imm, Shift); 187 } 188 189 template <MVT::SimpleValueType VT, bool Invert = false> 190 bool SelectSVELogicalImm(SDValue N, SDValue &Imm) { 191 return SelectSVELogicalImm(N, VT, Imm, Invert); 192 } 193 194 template <MVT::SimpleValueType VT> 195 bool SelectSVEArithImm(SDValue N, SDValue &Imm) { 196 return SelectSVEArithImm(N, VT, Imm); 197 } 198 199 template <unsigned Low, unsigned High, bool AllowSaturation = false> 200 bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { 201 return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); 202 } 203 204 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 205 template<signed Min, signed Max, signed Scale, bool Shift> 206 bool SelectCntImm(SDValue N, SDValue &Imm) { 207 if (!isa<ConstantSDNode>(N)) 208 return false; 209 210 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 211 if (Shift) 212 MulImm = 1LL << MulImm; 213 214 if ((MulImm % std::abs(Scale)) != 0) 215 return false; 216 217 MulImm /= Scale; 218 if ((MulImm >= Min) && (MulImm <= Max)) { 219 Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32); 220 return true; 221 } 222 223 return false; 224 } 225 226 /// Form sequences of consecutive 64/128-bit registers for use in NEON 227 /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have 228 /// between 1 and 4 elements. If it contains a single element that is returned 229 /// unchanged; otherwise a REG_SEQUENCE value is returned. 230 SDValue createDTuple(ArrayRef<SDValue> Vecs); 231 SDValue createQTuple(ArrayRef<SDValue> Vecs); 232 // Form a sequence of SVE registers for instructions using list of vectors, 233 // e.g. structured loads and stores (ldN, stN). 234 SDValue createZTuple(ArrayRef<SDValue> Vecs); 235 236 /// Generic helper for the createDTuple/createQTuple 237 /// functions. Those should almost always be called instead. 238 SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[], 239 const unsigned SubRegs[]); 240 241 void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt); 242 243 bool tryIndexedLoad(SDNode *N); 244 245 bool trySelectStackSlotTagP(SDNode *N); 246 void SelectTagP(SDNode *N); 247 248 void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 249 unsigned SubRegIdx); 250 void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 251 unsigned SubRegIdx); 252 void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 253 void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc); 254 void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale, 255 unsigned Opc_rr, unsigned Opc_ri); 256 257 bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm); 258 /// SVE Reg+Imm addressing mode. 259 template <int64_t Min, int64_t Max> 260 bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base, 261 SDValue &OffImm); 262 /// SVE Reg+Reg address mode. 263 template <unsigned Scale> 264 bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) { 265 return SelectSVERegRegAddrMode(N, Scale, Base, Offset); 266 } 267 268 void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc); 269 void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc); 270 void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 271 void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc); 272 void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale, 273 unsigned Opc_rr, unsigned Opc_ri); 274 std::tuple<unsigned, SDValue, SDValue> 275 findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri, 276 const SDValue &OldBase, const SDValue &OldOffset, 277 unsigned Scale); 278 279 bool tryBitfieldExtractOp(SDNode *N); 280 bool tryBitfieldExtractOpFromSExt(SDNode *N); 281 bool tryBitfieldInsertOp(SDNode *N); 282 bool tryBitfieldInsertInZeroOp(SDNode *N); 283 bool tryShiftAmountMod(SDNode *N); 284 bool tryHighFPExt(SDNode *N); 285 286 bool tryReadRegister(SDNode *N); 287 bool tryWriteRegister(SDNode *N); 288 289 // Include the pieces autogenerated from the target description. 290 #include "AArch64GenDAGISel.inc" 291 292 private: 293 bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg, 294 SDValue &Shift); 295 bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base, 296 SDValue &OffImm) { 297 return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm); 298 } 299 bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW, 300 unsigned Size, SDValue &Base, 301 SDValue &OffImm); 302 bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base, 303 SDValue &OffImm); 304 bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base, 305 SDValue &OffImm); 306 bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base, 307 SDValue &Offset, SDValue &SignExtend, 308 SDValue &DoShift); 309 bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base, 310 SDValue &Offset, SDValue &SignExtend, 311 SDValue &DoShift); 312 bool isWorthFolding(SDValue V) const; 313 bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, 314 SDValue &Offset, SDValue &SignExtend); 315 316 template<unsigned RegWidth> 317 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) { 318 return SelectCVTFixedPosOperand(N, FixedPos, RegWidth); 319 } 320 321 bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width); 322 323 bool SelectCMP_SWAP(SDNode *N); 324 325 bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift); 326 327 bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift); 328 329 bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert); 330 331 bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); 332 bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, 333 bool AllowSaturation, SDValue &Imm); 334 335 bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm); 336 bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, 337 SDValue &Offset); 338 339 bool SelectAllActivePredicate(SDValue N); 340 }; 341 } // end anonymous namespace 342 343 /// isIntImmediate - This method tests to see if the node is a constant 344 /// operand. If so Imm will receive the 32-bit value. 345 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) { 346 if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) { 347 Imm = C->getZExtValue(); 348 return true; 349 } 350 return false; 351 } 352 353 // isIntImmediate - This method tests to see if a constant operand. 354 // If so Imm will receive the value. 355 static bool isIntImmediate(SDValue N, uint64_t &Imm) { 356 return isIntImmediate(N.getNode(), Imm); 357 } 358 359 // isOpcWithIntImmediate - This method tests to see if the node is a specific 360 // opcode and that it has a immediate integer right operand. 361 // If so Imm will receive the 32 bit value. 362 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc, 363 uint64_t &Imm) { 364 return N->getOpcode() == Opc && 365 isIntImmediate(N->getOperand(1).getNode(), Imm); 366 } 367 368 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand( 369 const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) { 370 switch(ConstraintID) { 371 default: 372 llvm_unreachable("Unexpected asm memory constraint"); 373 case InlineAsm::Constraint_m: 374 case InlineAsm::Constraint_o: 375 case InlineAsm::Constraint_Q: 376 // We need to make sure that this one operand does not end up in XZR, thus 377 // require the address to be in a PointerRegClass register. 378 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo(); 379 const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF); 380 SDLoc dl(Op); 381 SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64); 382 SDValue NewOp = 383 SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, 384 dl, Op.getValueType(), 385 Op, RC), 0); 386 OutOps.push_back(NewOp); 387 return false; 388 } 389 return true; 390 } 391 392 /// SelectArithImmed - Select an immediate value that can be represented as 393 /// a 12-bit value shifted left by either 0 or 12. If so, return true with 394 /// Val set to the 12-bit value and Shift set to the shifter operand. 395 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val, 396 SDValue &Shift) { 397 // This function is called from the addsub_shifted_imm ComplexPattern, 398 // which lists [imm] as the list of opcode it's interested in, however 399 // we still need to check whether the operand is actually an immediate 400 // here because the ComplexPattern opcode list is only used in 401 // root-level opcode matching. 402 if (!isa<ConstantSDNode>(N.getNode())) 403 return false; 404 405 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 406 unsigned ShiftAmt; 407 408 if (Immed >> 12 == 0) { 409 ShiftAmt = 0; 410 } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) { 411 ShiftAmt = 12; 412 Immed = Immed >> 12; 413 } else 414 return false; 415 416 unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt); 417 SDLoc dl(N); 418 Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32); 419 Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32); 420 return true; 421 } 422 423 /// SelectNegArithImmed - As above, but negates the value before trying to 424 /// select it. 425 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val, 426 SDValue &Shift) { 427 // This function is called from the addsub_shifted_imm ComplexPattern, 428 // which lists [imm] as the list of opcode it's interested in, however 429 // we still need to check whether the operand is actually an immediate 430 // here because the ComplexPattern opcode list is only used in 431 // root-level opcode matching. 432 if (!isa<ConstantSDNode>(N.getNode())) 433 return false; 434 435 // The immediate operand must be a 24-bit zero-extended immediate. 436 uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue(); 437 438 // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0" 439 // have the opposite effect on the C flag, so this pattern mustn't match under 440 // those circumstances. 441 if (Immed == 0) 442 return false; 443 444 if (N.getValueType() == MVT::i32) 445 Immed = ~((uint32_t)Immed) + 1; 446 else 447 Immed = ~Immed + 1ULL; 448 if (Immed & 0xFFFFFFFFFF000000ULL) 449 return false; 450 451 Immed &= 0xFFFFFFULL; 452 return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val, 453 Shift); 454 } 455 456 /// getShiftTypeForNode - Translate a shift node to the corresponding 457 /// ShiftType value. 458 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) { 459 switch (N.getOpcode()) { 460 default: 461 return AArch64_AM::InvalidShiftExtend; 462 case ISD::SHL: 463 return AArch64_AM::LSL; 464 case ISD::SRL: 465 return AArch64_AM::LSR; 466 case ISD::SRA: 467 return AArch64_AM::ASR; 468 case ISD::ROTR: 469 return AArch64_AM::ROR; 470 } 471 } 472 473 /// Determine whether it is worth it to fold SHL into the addressing 474 /// mode. 475 static bool isWorthFoldingSHL(SDValue V) { 476 assert(V.getOpcode() == ISD::SHL && "invalid opcode"); 477 // It is worth folding logical shift of up to three places. 478 auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1)); 479 if (!CSD) 480 return false; 481 unsigned ShiftVal = CSD->getZExtValue(); 482 if (ShiftVal > 3) 483 return false; 484 485 // Check if this particular node is reused in any non-memory related 486 // operation. If yes, do not try to fold this node into the address 487 // computation, since the computation will be kept. 488 const SDNode *Node = V.getNode(); 489 for (SDNode *UI : Node->uses()) 490 if (!isa<MemSDNode>(*UI)) 491 for (SDNode *UII : UI->uses()) 492 if (!isa<MemSDNode>(*UII)) 493 return false; 494 return true; 495 } 496 497 /// Determine whether it is worth to fold V into an extended register. 498 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const { 499 // Trivial if we are optimizing for code size or if there is only 500 // one use of the value. 501 if (CurDAG->shouldOptForSize() || V.hasOneUse()) 502 return true; 503 // If a subtarget has a fastpath LSL we can fold a logical shift into 504 // the addressing mode and save a cycle. 505 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL && 506 isWorthFoldingSHL(V)) 507 return true; 508 if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) { 509 const SDValue LHS = V.getOperand(0); 510 const SDValue RHS = V.getOperand(1); 511 if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) 512 return true; 513 if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS)) 514 return true; 515 } 516 517 // It hurts otherwise, since the value will be reused. 518 return false; 519 } 520 521 /// SelectShiftedRegister - Select a "shifted register" operand. If the value 522 /// is not shifted, set the Shift operand to default of "LSL 0". The logical 523 /// instructions allow the shifted register to be rotated, but the arithmetic 524 /// instructions do not. The AllowROR parameter specifies whether ROR is 525 /// supported. 526 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR, 527 SDValue &Reg, SDValue &Shift) { 528 AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N); 529 if (ShType == AArch64_AM::InvalidShiftExtend) 530 return false; 531 if (!AllowROR && ShType == AArch64_AM::ROR) 532 return false; 533 534 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 535 unsigned BitSize = N.getValueSizeInBits(); 536 unsigned Val = RHS->getZExtValue() & (BitSize - 1); 537 unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val); 538 539 Reg = N.getOperand(0); 540 Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32); 541 return isWorthFolding(N); 542 } 543 544 return false; 545 } 546 547 /// getExtendTypeForNode - Translate an extend node to the corresponding 548 /// ExtendType value. 549 static AArch64_AM::ShiftExtendType 550 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) { 551 if (N.getOpcode() == ISD::SIGN_EXTEND || 552 N.getOpcode() == ISD::SIGN_EXTEND_INREG) { 553 EVT SrcVT; 554 if (N.getOpcode() == ISD::SIGN_EXTEND_INREG) 555 SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT(); 556 else 557 SrcVT = N.getOperand(0).getValueType(); 558 559 if (!IsLoadStore && SrcVT == MVT::i8) 560 return AArch64_AM::SXTB; 561 else if (!IsLoadStore && SrcVT == MVT::i16) 562 return AArch64_AM::SXTH; 563 else if (SrcVT == MVT::i32) 564 return AArch64_AM::SXTW; 565 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 566 567 return AArch64_AM::InvalidShiftExtend; 568 } else if (N.getOpcode() == ISD::ZERO_EXTEND || 569 N.getOpcode() == ISD::ANY_EXTEND) { 570 EVT SrcVT = N.getOperand(0).getValueType(); 571 if (!IsLoadStore && SrcVT == MVT::i8) 572 return AArch64_AM::UXTB; 573 else if (!IsLoadStore && SrcVT == MVT::i16) 574 return AArch64_AM::UXTH; 575 else if (SrcVT == MVT::i32) 576 return AArch64_AM::UXTW; 577 assert(SrcVT != MVT::i64 && "extend from 64-bits?"); 578 579 return AArch64_AM::InvalidShiftExtend; 580 } else if (N.getOpcode() == ISD::AND) { 581 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 582 if (!CSD) 583 return AArch64_AM::InvalidShiftExtend; 584 uint64_t AndMask = CSD->getZExtValue(); 585 586 switch (AndMask) { 587 default: 588 return AArch64_AM::InvalidShiftExtend; 589 case 0xFF: 590 return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend; 591 case 0xFFFF: 592 return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend; 593 case 0xFFFFFFFF: 594 return AArch64_AM::UXTW; 595 } 596 } 597 598 return AArch64_AM::InvalidShiftExtend; 599 } 600 601 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts. 602 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) { 603 if (DL->getOpcode() != AArch64ISD::DUPLANE16 && 604 DL->getOpcode() != AArch64ISD::DUPLANE32) 605 return false; 606 607 SDValue SV = DL->getOperand(0); 608 if (SV.getOpcode() != ISD::INSERT_SUBVECTOR) 609 return false; 610 611 SDValue EV = SV.getOperand(1); 612 if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR) 613 return false; 614 615 ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode()); 616 ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode()); 617 LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue(); 618 LaneOp = EV.getOperand(0); 619 620 return true; 621 } 622 623 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a 624 // high lane extract. 625 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp, 626 SDValue &LaneOp, int &LaneIdx) { 627 628 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) { 629 std::swap(Op0, Op1); 630 if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) 631 return false; 632 } 633 StdOp = Op1; 634 return true; 635 } 636 637 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand 638 /// is a lane in the upper half of a 128-bit vector. Recognize and select this 639 /// so that we don't emit unnecessary lane extracts. 640 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) { 641 SDLoc dl(N); 642 SDValue Op0 = N->getOperand(0); 643 SDValue Op1 = N->getOperand(1); 644 SDValue MLAOp1; // Will hold ordinary multiplicand for MLA. 645 SDValue MLAOp2; // Will hold lane-accessed multiplicand for MLA. 646 int LaneIdx = -1; // Will hold the lane index. 647 648 if (Op1.getOpcode() != ISD::MUL || 649 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 650 LaneIdx)) { 651 std::swap(Op0, Op1); 652 if (Op1.getOpcode() != ISD::MUL || 653 !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2, 654 LaneIdx)) 655 return false; 656 } 657 658 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 659 660 SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal }; 661 662 unsigned MLAOpc = ~0U; 663 664 switch (N->getSimpleValueType(0).SimpleTy) { 665 default: 666 llvm_unreachable("Unrecognized MLA."); 667 case MVT::v4i16: 668 MLAOpc = AArch64::MLAv4i16_indexed; 669 break; 670 case MVT::v8i16: 671 MLAOpc = AArch64::MLAv8i16_indexed; 672 break; 673 case MVT::v2i32: 674 MLAOpc = AArch64::MLAv2i32_indexed; 675 break; 676 case MVT::v4i32: 677 MLAOpc = AArch64::MLAv4i32_indexed; 678 break; 679 } 680 681 ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops)); 682 return true; 683 } 684 685 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) { 686 SDLoc dl(N); 687 SDValue SMULLOp0; 688 SDValue SMULLOp1; 689 int LaneIdx; 690 691 if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1, 692 LaneIdx)) 693 return false; 694 695 SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64); 696 697 SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal }; 698 699 unsigned SMULLOpc = ~0U; 700 701 if (IntNo == Intrinsic::aarch64_neon_smull) { 702 switch (N->getSimpleValueType(0).SimpleTy) { 703 default: 704 llvm_unreachable("Unrecognized SMULL."); 705 case MVT::v4i32: 706 SMULLOpc = AArch64::SMULLv4i16_indexed; 707 break; 708 case MVT::v2i64: 709 SMULLOpc = AArch64::SMULLv2i32_indexed; 710 break; 711 } 712 } else if (IntNo == Intrinsic::aarch64_neon_umull) { 713 switch (N->getSimpleValueType(0).SimpleTy) { 714 default: 715 llvm_unreachable("Unrecognized SMULL."); 716 case MVT::v4i32: 717 SMULLOpc = AArch64::UMULLv4i16_indexed; 718 break; 719 case MVT::v2i64: 720 SMULLOpc = AArch64::UMULLv2i32_indexed; 721 break; 722 } 723 } else 724 llvm_unreachable("Unrecognized intrinsic."); 725 726 ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops)); 727 return true; 728 } 729 730 /// Instructions that accept extend modifiers like UXTW expect the register 731 /// being extended to be a GPR32, but the incoming DAG might be acting on a 732 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if 733 /// this is the case. 734 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) { 735 if (N.getValueType() == MVT::i32) 736 return N; 737 738 SDLoc dl(N); 739 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 740 MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, 741 dl, MVT::i32, N, SubReg); 742 return SDValue(Node, 0); 743 } 744 745 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. 746 template<signed Low, signed High, signed Scale> 747 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) { 748 if (!isa<ConstantSDNode>(N)) 749 return false; 750 751 int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue(); 752 if ((MulImm % std::abs(Scale)) == 0) { 753 int64_t RDVLImm = MulImm / Scale; 754 if ((RDVLImm >= Low) && (RDVLImm <= High)) { 755 Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32); 756 return true; 757 } 758 } 759 760 return false; 761 } 762 763 /// SelectArithExtendedRegister - Select a "extended register" operand. This 764 /// operand folds in an extend followed by an optional left shift. 765 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg, 766 SDValue &Shift) { 767 unsigned ShiftVal = 0; 768 AArch64_AM::ShiftExtendType Ext; 769 770 if (N.getOpcode() == ISD::SHL) { 771 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 772 if (!CSD) 773 return false; 774 ShiftVal = CSD->getZExtValue(); 775 if (ShiftVal > 4) 776 return false; 777 778 Ext = getExtendTypeForNode(N.getOperand(0)); 779 if (Ext == AArch64_AM::InvalidShiftExtend) 780 return false; 781 782 Reg = N.getOperand(0).getOperand(0); 783 } else { 784 Ext = getExtendTypeForNode(N); 785 if (Ext == AArch64_AM::InvalidShiftExtend) 786 return false; 787 788 Reg = N.getOperand(0); 789 790 // Don't match if free 32-bit -> 64-bit zext can be used instead. 791 if (Ext == AArch64_AM::UXTW && 792 Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode())) 793 return false; 794 } 795 796 // AArch64 mandates that the RHS of the operation must use the smallest 797 // register class that could contain the size being extended from. Thus, 798 // if we're folding a (sext i8), we need the RHS to be a GPR32, even though 799 // there might not be an actual 32-bit value in the program. We can 800 // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here. 801 assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX); 802 Reg = narrowIfNeeded(CurDAG, Reg); 803 Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N), 804 MVT::i32); 805 return isWorthFolding(N); 806 } 807 808 /// If there's a use of this ADDlow that's not itself a load/store then we'll 809 /// need to create a real ADD instruction from it anyway and there's no point in 810 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's 811 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding 812 /// leads to duplicated ADRP instructions. 813 static bool isWorthFoldingADDlow(SDValue N) { 814 for (auto Use : N->uses()) { 815 if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE && 816 Use->getOpcode() != ISD::ATOMIC_LOAD && 817 Use->getOpcode() != ISD::ATOMIC_STORE) 818 return false; 819 820 // ldar and stlr have much more restrictive addressing modes (just a 821 // register). 822 if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering())) 823 return false; 824 } 825 826 return true; 827 } 828 829 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit 830 /// immediate" address. The "Size" argument is the size in bytes of the memory 831 /// reference, which determines the scale. 832 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, 833 unsigned BW, unsigned Size, 834 SDValue &Base, 835 SDValue &OffImm) { 836 SDLoc dl(N); 837 const DataLayout &DL = CurDAG->getDataLayout(); 838 const TargetLowering *TLI = getTargetLowering(); 839 if (N.getOpcode() == ISD::FrameIndex) { 840 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 841 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 842 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 843 return true; 844 } 845 846 // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed 847 // selected here doesn't support labels/immediates, only base+offset. 848 if (CurDAG->isBaseWithConstantOffset(N)) { 849 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 850 if (IsSignedImm) { 851 int64_t RHSC = RHS->getSExtValue(); 852 unsigned Scale = Log2_32(Size); 853 int64_t Range = 0x1LL << (BW - 1); 854 855 if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) && 856 RHSC < (Range << Scale)) { 857 Base = N.getOperand(0); 858 if (Base.getOpcode() == ISD::FrameIndex) { 859 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 860 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 861 } 862 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 863 return true; 864 } 865 } else { 866 // unsigned Immediate 867 uint64_t RHSC = RHS->getZExtValue(); 868 unsigned Scale = Log2_32(Size); 869 uint64_t Range = 0x1ULL << BW; 870 871 if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) { 872 Base = N.getOperand(0); 873 if (Base.getOpcode() == ISD::FrameIndex) { 874 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 875 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 876 } 877 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 878 return true; 879 } 880 } 881 } 882 } 883 // Base only. The address will be materialized into a register before 884 // the memory is accessed. 885 // add x0, Xbase, #offset 886 // stp x1, x2, [x0] 887 Base = N; 888 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 889 return true; 890 } 891 892 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit 893 /// immediate" address. The "Size" argument is the size in bytes of the memory 894 /// reference, which determines the scale. 895 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size, 896 SDValue &Base, SDValue &OffImm) { 897 SDLoc dl(N); 898 const DataLayout &DL = CurDAG->getDataLayout(); 899 const TargetLowering *TLI = getTargetLowering(); 900 if (N.getOpcode() == ISD::FrameIndex) { 901 int FI = cast<FrameIndexSDNode>(N)->getIndex(); 902 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 903 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 904 return true; 905 } 906 907 if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) { 908 GlobalAddressSDNode *GAN = 909 dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode()); 910 Base = N.getOperand(0); 911 OffImm = N.getOperand(1); 912 if (!GAN) 913 return true; 914 915 if (GAN->getOffset() % Size == 0 && 916 GAN->getGlobal()->getPointerAlignment(DL) >= Size) 917 return true; 918 } 919 920 if (CurDAG->isBaseWithConstantOffset(N)) { 921 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 922 int64_t RHSC = (int64_t)RHS->getZExtValue(); 923 unsigned Scale = Log2_32(Size); 924 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) { 925 Base = N.getOperand(0); 926 if (Base.getOpcode() == ISD::FrameIndex) { 927 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 928 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 929 } 930 OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64); 931 return true; 932 } 933 } 934 } 935 936 // Before falling back to our general case, check if the unscaled 937 // instructions can handle this. If so, that's preferable. 938 if (SelectAddrModeUnscaled(N, Size, Base, OffImm)) 939 return false; 940 941 // Base only. The address will be materialized into a register before 942 // the memory is accessed. 943 // add x0, Xbase, #offset 944 // ldr x0, [x0] 945 Base = N; 946 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 947 return true; 948 } 949 950 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit 951 /// immediate" address. This should only match when there is an offset that 952 /// is not valid for a scaled immediate addressing mode. The "Size" argument 953 /// is the size in bytes of the memory reference, which is needed here to know 954 /// what is valid for a scaled immediate. 955 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size, 956 SDValue &Base, 957 SDValue &OffImm) { 958 if (!CurDAG->isBaseWithConstantOffset(N)) 959 return false; 960 if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) { 961 int64_t RHSC = RHS->getSExtValue(); 962 // If the offset is valid as a scaled immediate, don't match here. 963 if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && 964 RHSC < (0x1000 << Log2_32(Size))) 965 return false; 966 if (RHSC >= -256 && RHSC < 256) { 967 Base = N.getOperand(0); 968 if (Base.getOpcode() == ISD::FrameIndex) { 969 int FI = cast<FrameIndexSDNode>(Base)->getIndex(); 970 const TargetLowering *TLI = getTargetLowering(); 971 Base = CurDAG->getTargetFrameIndex( 972 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 973 } 974 OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64); 975 return true; 976 } 977 } 978 return false; 979 } 980 981 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) { 982 SDLoc dl(N); 983 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 984 SDValue ImpDef = SDValue( 985 CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0); 986 MachineSDNode *Node = CurDAG->getMachineNode( 987 TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg); 988 return SDValue(Node, 0); 989 } 990 991 /// Check if the given SHL node (\p N), can be used to form an 992 /// extended register for an addressing mode. 993 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, 994 bool WantExtend, SDValue &Offset, 995 SDValue &SignExtend) { 996 assert(N.getOpcode() == ISD::SHL && "Invalid opcode."); 997 ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1)); 998 if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue()) 999 return false; 1000 1001 SDLoc dl(N); 1002 if (WantExtend) { 1003 AArch64_AM::ShiftExtendType Ext = 1004 getExtendTypeForNode(N.getOperand(0), true); 1005 if (Ext == AArch64_AM::InvalidShiftExtend) 1006 return false; 1007 1008 Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0)); 1009 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1010 MVT::i32); 1011 } else { 1012 Offset = N.getOperand(0); 1013 SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32); 1014 } 1015 1016 unsigned LegalShiftVal = Log2_32(Size); 1017 unsigned ShiftVal = CSD->getZExtValue(); 1018 1019 if (ShiftVal != 0 && ShiftVal != LegalShiftVal) 1020 return false; 1021 1022 return isWorthFolding(N); 1023 } 1024 1025 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, 1026 SDValue &Base, SDValue &Offset, 1027 SDValue &SignExtend, 1028 SDValue &DoShift) { 1029 if (N.getOpcode() != ISD::ADD) 1030 return false; 1031 SDValue LHS = N.getOperand(0); 1032 SDValue RHS = N.getOperand(1); 1033 SDLoc dl(N); 1034 1035 // We don't want to match immediate adds here, because they are better lowered 1036 // to the register-immediate addressing modes. 1037 if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS)) 1038 return false; 1039 1040 // Check if this particular node is reused in any non-memory related 1041 // operation. If yes, do not try to fold this node into the address 1042 // computation, since the computation will be kept. 1043 const SDNode *Node = N.getNode(); 1044 for (SDNode *UI : Node->uses()) { 1045 if (!isa<MemSDNode>(*UI)) 1046 return false; 1047 } 1048 1049 // Remember if it is worth folding N when it produces extended register. 1050 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1051 1052 // Try to match a shifted extend on the RHS. 1053 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1054 SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) { 1055 Base = LHS; 1056 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1057 return true; 1058 } 1059 1060 // Try to match a shifted extend on the LHS. 1061 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1062 SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) { 1063 Base = RHS; 1064 DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32); 1065 return true; 1066 } 1067 1068 // There was no shift, whatever else we find. 1069 DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32); 1070 1071 AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend; 1072 // Try to match an unshifted extend on the LHS. 1073 if (IsExtendedRegisterWorthFolding && 1074 (Ext = getExtendTypeForNode(LHS, true)) != 1075 AArch64_AM::InvalidShiftExtend) { 1076 Base = RHS; 1077 Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); 1078 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1079 MVT::i32); 1080 if (isWorthFolding(LHS)) 1081 return true; 1082 } 1083 1084 // Try to match an unshifted extend on the RHS. 1085 if (IsExtendedRegisterWorthFolding && 1086 (Ext = getExtendTypeForNode(RHS, true)) != 1087 AArch64_AM::InvalidShiftExtend) { 1088 Base = LHS; 1089 Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); 1090 SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, 1091 MVT::i32); 1092 if (isWorthFolding(RHS)) 1093 return true; 1094 } 1095 1096 return false; 1097 } 1098 1099 // Check if the given immediate is preferred by ADD. If an immediate can be 1100 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be 1101 // encoded by one MOVZ, return true. 1102 static bool isPreferredADD(int64_t ImmOff) { 1103 // Constant in [0x0, 0xfff] can be encoded in ADD. 1104 if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL) 1105 return true; 1106 // Check if it can be encoded in an "ADD LSL #12". 1107 if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL) 1108 // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant. 1109 return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL && 1110 (ImmOff & 0xffffffffffff0fffLL) != 0x0LL; 1111 return false; 1112 } 1113 1114 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, 1115 SDValue &Base, SDValue &Offset, 1116 SDValue &SignExtend, 1117 SDValue &DoShift) { 1118 if (N.getOpcode() != ISD::ADD) 1119 return false; 1120 SDValue LHS = N.getOperand(0); 1121 SDValue RHS = N.getOperand(1); 1122 SDLoc DL(N); 1123 1124 // Check if this particular node is reused in any non-memory related 1125 // operation. If yes, do not try to fold this node into the address 1126 // computation, since the computation will be kept. 1127 const SDNode *Node = N.getNode(); 1128 for (SDNode *UI : Node->uses()) { 1129 if (!isa<MemSDNode>(*UI)) 1130 return false; 1131 } 1132 1133 // Watch out if RHS is a wide immediate, it can not be selected into 1134 // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into 1135 // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate 1136 // instructions like: 1137 // MOV X0, WideImmediate 1138 // ADD X1, BaseReg, X0 1139 // LDR X2, [X1, 0] 1140 // For such situation, using [BaseReg, XReg] addressing mode can save one 1141 // ADD/SUB: 1142 // MOV X0, WideImmediate 1143 // LDR X2, [BaseReg, X0] 1144 if (isa<ConstantSDNode>(RHS)) { 1145 int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue(); 1146 unsigned Scale = Log2_32(Size); 1147 // Skip the immediate can be selected by load/store addressing mode. 1148 // Also skip the immediate can be encoded by a single ADD (SUB is also 1149 // checked by using -ImmOff). 1150 if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) || 1151 isPreferredADD(ImmOff) || isPreferredADD(-ImmOff)) 1152 return false; 1153 1154 SDValue Ops[] = { RHS }; 1155 SDNode *MOVI = 1156 CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops); 1157 SDValue MOVIV = SDValue(MOVI, 0); 1158 // This ADD of two X register will be selected into [Reg+Reg] mode. 1159 N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV); 1160 } 1161 1162 // Remember if it is worth folding N when it produces extended register. 1163 bool IsExtendedRegisterWorthFolding = isWorthFolding(N); 1164 1165 // Try to match a shifted extend on the RHS. 1166 if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && 1167 SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) { 1168 Base = LHS; 1169 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1170 return true; 1171 } 1172 1173 // Try to match a shifted extend on the LHS. 1174 if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL && 1175 SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) { 1176 Base = RHS; 1177 DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32); 1178 return true; 1179 } 1180 1181 // Match any non-shifted, non-extend, non-immediate add expression. 1182 Base = LHS; 1183 Offset = RHS; 1184 SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32); 1185 DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32); 1186 // Reg1 + Reg2 is free: no check needed. 1187 return true; 1188 } 1189 1190 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) { 1191 static const unsigned RegClassIDs[] = { 1192 AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID}; 1193 static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1, 1194 AArch64::dsub2, AArch64::dsub3}; 1195 1196 return createTuple(Regs, RegClassIDs, SubRegs); 1197 } 1198 1199 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) { 1200 static const unsigned RegClassIDs[] = { 1201 AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID}; 1202 static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1, 1203 AArch64::qsub2, AArch64::qsub3}; 1204 1205 return createTuple(Regs, RegClassIDs, SubRegs); 1206 } 1207 1208 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) { 1209 static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID, 1210 AArch64::ZPR3RegClassID, 1211 AArch64::ZPR4RegClassID}; 1212 static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1, 1213 AArch64::zsub2, AArch64::zsub3}; 1214 1215 return createTuple(Regs, RegClassIDs, SubRegs); 1216 } 1217 1218 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs, 1219 const unsigned RegClassIDs[], 1220 const unsigned SubRegs[]) { 1221 // There's no special register-class for a vector-list of 1 element: it's just 1222 // a vector. 1223 if (Regs.size() == 1) 1224 return Regs[0]; 1225 1226 assert(Regs.size() >= 2 && Regs.size() <= 4); 1227 1228 SDLoc DL(Regs[0]); 1229 1230 SmallVector<SDValue, 4> Ops; 1231 1232 // First operand of REG_SEQUENCE is the desired RegClass. 1233 Ops.push_back( 1234 CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32)); 1235 1236 // Then we get pairs of source & subregister-position for the components. 1237 for (unsigned i = 0; i < Regs.size(); ++i) { 1238 Ops.push_back(Regs[i]); 1239 Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32)); 1240 } 1241 1242 SDNode *N = 1243 CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); 1244 return SDValue(N, 0); 1245 } 1246 1247 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, 1248 bool isExt) { 1249 SDLoc dl(N); 1250 EVT VT = N->getValueType(0); 1251 1252 unsigned ExtOff = isExt; 1253 1254 // Form a REG_SEQUENCE to force register allocation. 1255 unsigned Vec0Off = ExtOff + 1; 1256 SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off, 1257 N->op_begin() + Vec0Off + NumVecs); 1258 SDValue RegSeq = createQTuple(Regs); 1259 1260 SmallVector<SDValue, 6> Ops; 1261 if (isExt) 1262 Ops.push_back(N->getOperand(1)); 1263 Ops.push_back(RegSeq); 1264 Ops.push_back(N->getOperand(NumVecs + ExtOff + 1)); 1265 ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops)); 1266 } 1267 1268 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) { 1269 LoadSDNode *LD = cast<LoadSDNode>(N); 1270 if (LD->isUnindexed()) 1271 return false; 1272 EVT VT = LD->getMemoryVT(); 1273 EVT DstVT = N->getValueType(0); 1274 ISD::MemIndexedMode AM = LD->getAddressingMode(); 1275 bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC; 1276 1277 // We're not doing validity checking here. That was done when checking 1278 // if we should mark the load as indexed or not. We're just selecting 1279 // the right instruction. 1280 unsigned Opcode = 0; 1281 1282 ISD::LoadExtType ExtType = LD->getExtensionType(); 1283 bool InsertTo64 = false; 1284 if (VT == MVT::i64) 1285 Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost; 1286 else if (VT == MVT::i32) { 1287 if (ExtType == ISD::NON_EXTLOAD) 1288 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1289 else if (ExtType == ISD::SEXTLOAD) 1290 Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost; 1291 else { 1292 Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost; 1293 InsertTo64 = true; 1294 // The result of the load is only i32. It's the subreg_to_reg that makes 1295 // it into an i64. 1296 DstVT = MVT::i32; 1297 } 1298 } else if (VT == MVT::i16) { 1299 if (ExtType == ISD::SEXTLOAD) { 1300 if (DstVT == MVT::i64) 1301 Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost; 1302 else 1303 Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost; 1304 } else { 1305 Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost; 1306 InsertTo64 = DstVT == MVT::i64; 1307 // The result of the load is only i32. It's the subreg_to_reg that makes 1308 // it into an i64. 1309 DstVT = MVT::i32; 1310 } 1311 } else if (VT == MVT::i8) { 1312 if (ExtType == ISD::SEXTLOAD) { 1313 if (DstVT == MVT::i64) 1314 Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost; 1315 else 1316 Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost; 1317 } else { 1318 Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost; 1319 InsertTo64 = DstVT == MVT::i64; 1320 // The result of the load is only i32. It's the subreg_to_reg that makes 1321 // it into an i64. 1322 DstVT = MVT::i32; 1323 } 1324 } else if (VT == MVT::f16) { 1325 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1326 } else if (VT == MVT::bf16) { 1327 Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost; 1328 } else if (VT == MVT::f32) { 1329 Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost; 1330 } else if (VT == MVT::f64 || VT.is64BitVector()) { 1331 Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost; 1332 } else if (VT.is128BitVector()) { 1333 Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost; 1334 } else 1335 return false; 1336 SDValue Chain = LD->getChain(); 1337 SDValue Base = LD->getBasePtr(); 1338 ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset()); 1339 int OffsetVal = (int)OffsetOp->getZExtValue(); 1340 SDLoc dl(N); 1341 SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64); 1342 SDValue Ops[] = { Base, Offset, Chain }; 1343 SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT, 1344 MVT::Other, Ops); 1345 1346 // Transfer memoperands. 1347 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 1348 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp}); 1349 1350 // Either way, we're replacing the node, so tell the caller that. 1351 SDValue LoadedVal = SDValue(Res, 1); 1352 if (InsertTo64) { 1353 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 1354 LoadedVal = 1355 SDValue(CurDAG->getMachineNode( 1356 AArch64::SUBREG_TO_REG, dl, MVT::i64, 1357 CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal, 1358 SubReg), 1359 0); 1360 } 1361 1362 ReplaceUses(SDValue(N, 0), LoadedVal); 1363 ReplaceUses(SDValue(N, 1), SDValue(Res, 0)); 1364 ReplaceUses(SDValue(N, 2), SDValue(Res, 2)); 1365 CurDAG->RemoveDeadNode(N); 1366 return true; 1367 } 1368 1369 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc, 1370 unsigned SubRegIdx) { 1371 SDLoc dl(N); 1372 EVT VT = N->getValueType(0); 1373 SDValue Chain = N->getOperand(0); 1374 1375 SDValue Ops[] = {N->getOperand(2), // Mem operand; 1376 Chain}; 1377 1378 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1379 1380 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1381 SDValue SuperReg = SDValue(Ld, 0); 1382 for (unsigned i = 0; i < NumVecs; ++i) 1383 ReplaceUses(SDValue(N, i), 1384 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1385 1386 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1387 1388 // Transfer memoperands. In the case of AArch64::LD64B, there won't be one, 1389 // because it's too simple to have needed special treatment during lowering. 1390 if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) { 1391 MachineMemOperand *MemOp = MemIntr->getMemOperand(); 1392 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 1393 } 1394 1395 CurDAG->RemoveDeadNode(N); 1396 } 1397 1398 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs, 1399 unsigned Opc, unsigned SubRegIdx) { 1400 SDLoc dl(N); 1401 EVT VT = N->getValueType(0); 1402 SDValue Chain = N->getOperand(0); 1403 1404 SDValue Ops[] = {N->getOperand(1), // Mem operand 1405 N->getOperand(2), // Incremental 1406 Chain}; 1407 1408 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1409 MVT::Untyped, MVT::Other}; 1410 1411 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1412 1413 // Update uses of write back register 1414 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1415 1416 // Update uses of vector list 1417 SDValue SuperReg = SDValue(Ld, 1); 1418 if (NumVecs == 1) 1419 ReplaceUses(SDValue(N, 0), SuperReg); 1420 else 1421 for (unsigned i = 0; i < NumVecs; ++i) 1422 ReplaceUses(SDValue(N, i), 1423 CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg)); 1424 1425 // Update the chain 1426 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1427 CurDAG->RemoveDeadNode(N); 1428 } 1429 1430 /// Optimize \param OldBase and \param OldOffset selecting the best addressing 1431 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the 1432 /// new Base and an SDValue representing the new offset. 1433 std::tuple<unsigned, SDValue, SDValue> 1434 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, 1435 unsigned Opc_ri, 1436 const SDValue &OldBase, 1437 const SDValue &OldOffset, 1438 unsigned Scale) { 1439 SDValue NewBase = OldBase; 1440 SDValue NewOffset = OldOffset; 1441 // Detect a possible Reg+Imm addressing mode. 1442 const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>( 1443 N, OldBase, NewBase, NewOffset); 1444 1445 // Detect a possible reg+reg addressing mode, but only if we haven't already 1446 // detected a Reg+Imm one. 1447 const bool IsRegReg = 1448 !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset); 1449 1450 // Select the instruction. 1451 return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset); 1452 } 1453 1454 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs, 1455 unsigned Scale, unsigned Opc_ri, 1456 unsigned Opc_rr) { 1457 assert(Scale < 4 && "Invalid scaling value."); 1458 SDLoc DL(N); 1459 EVT VT = N->getValueType(0); 1460 SDValue Chain = N->getOperand(0); 1461 1462 // Optimize addressing mode. 1463 SDValue Base, Offset; 1464 unsigned Opc; 1465 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1466 N, Opc_rr, Opc_ri, N->getOperand(2), 1467 CurDAG->getTargetConstant(0, DL, MVT::i64), Scale); 1468 1469 SDValue Ops[] = {N->getOperand(1), // Predicate 1470 Base, // Memory operand 1471 Offset, Chain}; 1472 1473 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1474 1475 SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops); 1476 SDValue SuperReg = SDValue(Load, 0); 1477 for (unsigned i = 0; i < NumVecs; ++i) 1478 ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg( 1479 AArch64::zsub0 + i, DL, VT, SuperReg)); 1480 1481 // Copy chain 1482 unsigned ChainIdx = NumVecs; 1483 ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1)); 1484 CurDAG->RemoveDeadNode(N); 1485 } 1486 1487 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs, 1488 unsigned Opc) { 1489 SDLoc dl(N); 1490 EVT VT = N->getOperand(2)->getValueType(0); 1491 1492 // Form a REG_SEQUENCE to force register allocation. 1493 bool Is128Bit = VT.getSizeInBits() == 128; 1494 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1495 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1496 1497 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)}; 1498 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1499 1500 // Transfer memoperands. 1501 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1502 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1503 1504 ReplaceNode(N, St); 1505 } 1506 1507 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs, 1508 unsigned Scale, unsigned Opc_rr, 1509 unsigned Opc_ri) { 1510 SDLoc dl(N); 1511 1512 // Form a REG_SEQUENCE to force register allocation. 1513 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1514 SDValue RegSeq = createZTuple(Regs); 1515 1516 // Optimize addressing mode. 1517 unsigned Opc; 1518 SDValue Offset, Base; 1519 std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore( 1520 N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3), 1521 CurDAG->getTargetConstant(0, dl, MVT::i64), Scale); 1522 1523 SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate 1524 Base, // address 1525 Offset, // offset 1526 N->getOperand(0)}; // chain 1527 SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops); 1528 1529 ReplaceNode(N, St); 1530 } 1531 1532 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, 1533 SDValue &OffImm) { 1534 SDLoc dl(N); 1535 const DataLayout &DL = CurDAG->getDataLayout(); 1536 const TargetLowering *TLI = getTargetLowering(); 1537 1538 // Try to match it for the frame address 1539 if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) { 1540 int FI = FINode->getIndex(); 1541 Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL)); 1542 OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64); 1543 return true; 1544 } 1545 1546 return false; 1547 } 1548 1549 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs, 1550 unsigned Opc) { 1551 SDLoc dl(N); 1552 EVT VT = N->getOperand(2)->getValueType(0); 1553 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1554 MVT::Other}; // Type for the Chain 1555 1556 // Form a REG_SEQUENCE to force register allocation. 1557 bool Is128Bit = VT.getSizeInBits() == 128; 1558 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1559 SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs); 1560 1561 SDValue Ops[] = {RegSeq, 1562 N->getOperand(NumVecs + 1), // base register 1563 N->getOperand(NumVecs + 2), // Incremental 1564 N->getOperand(0)}; // Chain 1565 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1566 1567 ReplaceNode(N, St); 1568 } 1569 1570 namespace { 1571 /// WidenVector - Given a value in the V64 register class, produce the 1572 /// equivalent value in the V128 register class. 1573 class WidenVector { 1574 SelectionDAG &DAG; 1575 1576 public: 1577 WidenVector(SelectionDAG &DAG) : DAG(DAG) {} 1578 1579 SDValue operator()(SDValue V64Reg) { 1580 EVT VT = V64Reg.getValueType(); 1581 unsigned NarrowSize = VT.getVectorNumElements(); 1582 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1583 MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize); 1584 SDLoc DL(V64Reg); 1585 1586 SDValue Undef = 1587 SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0); 1588 return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg); 1589 } 1590 }; 1591 } // namespace 1592 1593 /// NarrowVector - Given a value in the V128 register class, produce the 1594 /// equivalent value in the V64 register class. 1595 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) { 1596 EVT VT = V128Reg.getValueType(); 1597 unsigned WideSize = VT.getVectorNumElements(); 1598 MVT EltTy = VT.getVectorElementType().getSimpleVT(); 1599 MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2); 1600 1601 return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy, 1602 V128Reg); 1603 } 1604 1605 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs, 1606 unsigned Opc) { 1607 SDLoc dl(N); 1608 EVT VT = N->getValueType(0); 1609 bool Narrow = VT.getSizeInBits() == 64; 1610 1611 // Form a REG_SEQUENCE to force register allocation. 1612 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1613 1614 if (Narrow) 1615 transform(Regs, Regs.begin(), 1616 WidenVector(*CurDAG)); 1617 1618 SDValue RegSeq = createQTuple(Regs); 1619 1620 const EVT ResTys[] = {MVT::Untyped, MVT::Other}; 1621 1622 unsigned LaneNo = 1623 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1624 1625 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1626 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1627 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1628 SDValue SuperReg = SDValue(Ld, 0); 1629 1630 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1631 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1632 AArch64::qsub2, AArch64::qsub3 }; 1633 for (unsigned i = 0; i < NumVecs; ++i) { 1634 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg); 1635 if (Narrow) 1636 NV = NarrowVector(NV, *CurDAG); 1637 ReplaceUses(SDValue(N, i), NV); 1638 } 1639 1640 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1)); 1641 CurDAG->RemoveDeadNode(N); 1642 } 1643 1644 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs, 1645 unsigned Opc) { 1646 SDLoc dl(N); 1647 EVT VT = N->getValueType(0); 1648 bool Narrow = VT.getSizeInBits() == 64; 1649 1650 // Form a REG_SEQUENCE to force register allocation. 1651 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1652 1653 if (Narrow) 1654 transform(Regs, Regs.begin(), 1655 WidenVector(*CurDAG)); 1656 1657 SDValue RegSeq = createQTuple(Regs); 1658 1659 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1660 RegSeq->getValueType(0), MVT::Other}; 1661 1662 unsigned LaneNo = 1663 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1664 1665 SDValue Ops[] = {RegSeq, 1666 CurDAG->getTargetConstant(LaneNo, dl, 1667 MVT::i64), // Lane Number 1668 N->getOperand(NumVecs + 2), // Base register 1669 N->getOperand(NumVecs + 3), // Incremental 1670 N->getOperand(0)}; 1671 SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1672 1673 // Update uses of the write back register 1674 ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0)); 1675 1676 // Update uses of the vector list 1677 SDValue SuperReg = SDValue(Ld, 1); 1678 if (NumVecs == 1) { 1679 ReplaceUses(SDValue(N, 0), 1680 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg); 1681 } else { 1682 EVT WideVT = RegSeq.getOperand(1)->getValueType(0); 1683 static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1, 1684 AArch64::qsub2, AArch64::qsub3 }; 1685 for (unsigned i = 0; i < NumVecs; ++i) { 1686 SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, 1687 SuperReg); 1688 if (Narrow) 1689 NV = NarrowVector(NV, *CurDAG); 1690 ReplaceUses(SDValue(N, i), NV); 1691 } 1692 } 1693 1694 // Update the Chain 1695 ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2)); 1696 CurDAG->RemoveDeadNode(N); 1697 } 1698 1699 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs, 1700 unsigned Opc) { 1701 SDLoc dl(N); 1702 EVT VT = N->getOperand(2)->getValueType(0); 1703 bool Narrow = VT.getSizeInBits() == 64; 1704 1705 // Form a REG_SEQUENCE to force register allocation. 1706 SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs); 1707 1708 if (Narrow) 1709 transform(Regs, Regs.begin(), 1710 WidenVector(*CurDAG)); 1711 1712 SDValue RegSeq = createQTuple(Regs); 1713 1714 unsigned LaneNo = 1715 cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue(); 1716 1717 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1718 N->getOperand(NumVecs + 3), N->getOperand(0)}; 1719 SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops); 1720 1721 // Transfer memoperands. 1722 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1723 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1724 1725 ReplaceNode(N, St); 1726 } 1727 1728 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs, 1729 unsigned Opc) { 1730 SDLoc dl(N); 1731 EVT VT = N->getOperand(2)->getValueType(0); 1732 bool Narrow = VT.getSizeInBits() == 64; 1733 1734 // Form a REG_SEQUENCE to force register allocation. 1735 SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs); 1736 1737 if (Narrow) 1738 transform(Regs, Regs.begin(), 1739 WidenVector(*CurDAG)); 1740 1741 SDValue RegSeq = createQTuple(Regs); 1742 1743 const EVT ResTys[] = {MVT::i64, // Type of the write back register 1744 MVT::Other}; 1745 1746 unsigned LaneNo = 1747 cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue(); 1748 1749 SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64), 1750 N->getOperand(NumVecs + 2), // Base Register 1751 N->getOperand(NumVecs + 3), // Incremental 1752 N->getOperand(0)}; 1753 SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); 1754 1755 // Transfer memoperands. 1756 MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand(); 1757 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 1758 1759 ReplaceNode(N, St); 1760 } 1761 1762 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N, 1763 unsigned &Opc, SDValue &Opd0, 1764 unsigned &LSB, unsigned &MSB, 1765 unsigned NumberOfIgnoredLowBits, 1766 bool BiggerPattern) { 1767 assert(N->getOpcode() == ISD::AND && 1768 "N must be a AND operation to call this function"); 1769 1770 EVT VT = N->getValueType(0); 1771 1772 // Here we can test the type of VT and return false when the type does not 1773 // match, but since it is done prior to that call in the current context 1774 // we turned that into an assert to avoid redundant code. 1775 assert((VT == MVT::i32 || VT == MVT::i64) && 1776 "Type checking must have been done before calling this function"); 1777 1778 // FIXME: simplify-demanded-bits in DAGCombine will probably have 1779 // changed the AND node to a 32-bit mask operation. We'll have to 1780 // undo that as part of the transform here if we want to catch all 1781 // the opportunities. 1782 // Currently the NumberOfIgnoredLowBits argument helps to recover 1783 // form these situations when matching bigger pattern (bitfield insert). 1784 1785 // For unsigned extracts, check for a shift right and mask 1786 uint64_t AndImm = 0; 1787 if (!isOpcWithIntImmediate(N, ISD::AND, AndImm)) 1788 return false; 1789 1790 const SDNode *Op0 = N->getOperand(0).getNode(); 1791 1792 // Because of simplify-demanded-bits in DAGCombine, the mask may have been 1793 // simplified. Try to undo that 1794 AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits); 1795 1796 // The immediate is a mask of the low bits iff imm & (imm+1) == 0 1797 if (AndImm & (AndImm + 1)) 1798 return false; 1799 1800 bool ClampMSB = false; 1801 uint64_t SrlImm = 0; 1802 // Handle the SRL + ANY_EXTEND case. 1803 if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND && 1804 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) { 1805 // Extend the incoming operand of the SRL to 64-bit. 1806 Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0)); 1807 // Make sure to clamp the MSB so that we preserve the semantics of the 1808 // original operations. 1809 ClampMSB = true; 1810 } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE && 1811 isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, 1812 SrlImm)) { 1813 // If the shift result was truncated, we can still combine them. 1814 Opd0 = Op0->getOperand(0).getOperand(0); 1815 1816 // Use the type of SRL node. 1817 VT = Opd0->getValueType(0); 1818 } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) { 1819 Opd0 = Op0->getOperand(0); 1820 } else if (BiggerPattern) { 1821 // Let's pretend a 0 shift right has been performed. 1822 // The resulting code will be at least as good as the original one 1823 // plus it may expose more opportunities for bitfield insert pattern. 1824 // FIXME: Currently we limit this to the bigger pattern, because 1825 // some optimizations expect AND and not UBFM. 1826 Opd0 = N->getOperand(0); 1827 } else 1828 return false; 1829 1830 // Bail out on large immediates. This happens when no proper 1831 // combining/constant folding was performed. 1832 if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) { 1833 LLVM_DEBUG( 1834 (dbgs() << N 1835 << ": Found large shift immediate, this should not happen\n")); 1836 return false; 1837 } 1838 1839 LSB = SrlImm; 1840 MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm) 1841 : countTrailingOnes<uint64_t>(AndImm)) - 1842 1; 1843 if (ClampMSB) 1844 // Since we're moving the extend before the right shift operation, we need 1845 // to clamp the MSB to make sure we don't shift in undefined bits instead of 1846 // the zeros which would get shifted in with the original right shift 1847 // operation. 1848 MSB = MSB > 31 ? 31 : MSB; 1849 1850 Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri; 1851 return true; 1852 } 1853 1854 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc, 1855 SDValue &Opd0, unsigned &Immr, 1856 unsigned &Imms) { 1857 assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG); 1858 1859 EVT VT = N->getValueType(0); 1860 unsigned BitWidth = VT.getSizeInBits(); 1861 assert((VT == MVT::i32 || VT == MVT::i64) && 1862 "Type checking must have been done before calling this function"); 1863 1864 SDValue Op = N->getOperand(0); 1865 if (Op->getOpcode() == ISD::TRUNCATE) { 1866 Op = Op->getOperand(0); 1867 VT = Op->getValueType(0); 1868 BitWidth = VT.getSizeInBits(); 1869 } 1870 1871 uint64_t ShiftImm; 1872 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) && 1873 !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 1874 return false; 1875 1876 unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits(); 1877 if (ShiftImm + Width > BitWidth) 1878 return false; 1879 1880 Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri; 1881 Opd0 = Op.getOperand(0); 1882 Immr = ShiftImm; 1883 Imms = ShiftImm + Width - 1; 1884 return true; 1885 } 1886 1887 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc, 1888 SDValue &Opd0, unsigned &LSB, 1889 unsigned &MSB) { 1890 // We are looking for the following pattern which basically extracts several 1891 // continuous bits from the source value and places it from the LSB of the 1892 // destination value, all other bits of the destination value or set to zero: 1893 // 1894 // Value2 = AND Value, MaskImm 1895 // SRL Value2, ShiftImm 1896 // 1897 // with MaskImm >> ShiftImm to search for the bit width. 1898 // 1899 // This gets selected into a single UBFM: 1900 // 1901 // UBFM Value, ShiftImm, BitWide + SrlImm -1 1902 // 1903 1904 if (N->getOpcode() != ISD::SRL) 1905 return false; 1906 1907 uint64_t AndMask = 0; 1908 if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask)) 1909 return false; 1910 1911 Opd0 = N->getOperand(0).getOperand(0); 1912 1913 uint64_t SrlImm = 0; 1914 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1915 return false; 1916 1917 // Check whether we really have several bits extract here. 1918 unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm)); 1919 if (BitWide && isMask_64(AndMask >> SrlImm)) { 1920 if (N->getValueType(0) == MVT::i32) 1921 Opc = AArch64::UBFMWri; 1922 else 1923 Opc = AArch64::UBFMXri; 1924 1925 LSB = SrlImm; 1926 MSB = BitWide + SrlImm - 1; 1927 return true; 1928 } 1929 1930 return false; 1931 } 1932 1933 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0, 1934 unsigned &Immr, unsigned &Imms, 1935 bool BiggerPattern) { 1936 assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) && 1937 "N must be a SHR/SRA operation to call this function"); 1938 1939 EVT VT = N->getValueType(0); 1940 1941 // Here we can test the type of VT and return false when the type does not 1942 // match, but since it is done prior to that call in the current context 1943 // we turned that into an assert to avoid redundant code. 1944 assert((VT == MVT::i32 || VT == MVT::i64) && 1945 "Type checking must have been done before calling this function"); 1946 1947 // Check for AND + SRL doing several bits extract. 1948 if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms)) 1949 return true; 1950 1951 // We're looking for a shift of a shift. 1952 uint64_t ShlImm = 0; 1953 uint64_t TruncBits = 0; 1954 if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) { 1955 Opd0 = N->getOperand(0).getOperand(0); 1956 } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL && 1957 N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) { 1958 // We are looking for a shift of truncate. Truncate from i64 to i32 could 1959 // be considered as setting high 32 bits as zero. Our strategy here is to 1960 // always generate 64bit UBFM. This consistency will help the CSE pass 1961 // later find more redundancy. 1962 Opd0 = N->getOperand(0).getOperand(0); 1963 TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits(); 1964 VT = Opd0.getValueType(); 1965 assert(VT == MVT::i64 && "the promoted type should be i64"); 1966 } else if (BiggerPattern) { 1967 // Let's pretend a 0 shift left has been performed. 1968 // FIXME: Currently we limit this to the bigger pattern case, 1969 // because some optimizations expect AND and not UBFM 1970 Opd0 = N->getOperand(0); 1971 } else 1972 return false; 1973 1974 // Missing combines/constant folding may have left us with strange 1975 // constants. 1976 if (ShlImm >= VT.getSizeInBits()) { 1977 LLVM_DEBUG( 1978 (dbgs() << N 1979 << ": Found large shift immediate, this should not happen\n")); 1980 return false; 1981 } 1982 1983 uint64_t SrlImm = 0; 1984 if (!isIntImmediate(N->getOperand(1), SrlImm)) 1985 return false; 1986 1987 assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() && 1988 "bad amount in shift node!"); 1989 int immr = SrlImm - ShlImm; 1990 Immr = immr < 0 ? immr + VT.getSizeInBits() : immr; 1991 Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1; 1992 // SRA requires a signed extraction 1993 if (VT == MVT::i32) 1994 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri; 1995 else 1996 Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri; 1997 return true; 1998 } 1999 2000 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) { 2001 assert(N->getOpcode() == ISD::SIGN_EXTEND); 2002 2003 EVT VT = N->getValueType(0); 2004 EVT NarrowVT = N->getOperand(0)->getValueType(0); 2005 if (VT != MVT::i64 || NarrowVT != MVT::i32) 2006 return false; 2007 2008 uint64_t ShiftImm; 2009 SDValue Op = N->getOperand(0); 2010 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm)) 2011 return false; 2012 2013 SDLoc dl(N); 2014 // Extend the incoming operand of the shift to 64-bits. 2015 SDValue Opd0 = Widen(CurDAG, Op.getOperand(0)); 2016 unsigned Immr = ShiftImm; 2017 unsigned Imms = NarrowVT.getSizeInBits() - 1; 2018 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2019 CurDAG->getTargetConstant(Imms, dl, VT)}; 2020 CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops); 2021 return true; 2022 } 2023 2024 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half 2025 /// extract of a subvector. 2026 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) { 2027 assert(N->getOpcode() == ISD::FP_EXTEND); 2028 2029 // There are 2 forms of fcvtl2 - extend to double or extend to float. 2030 SDValue Extract = N->getOperand(0); 2031 EVT VT = N->getValueType(0); 2032 EVT NarrowVT = Extract.getValueType(); 2033 if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) && 2034 (VT != MVT::v4f32 || NarrowVT != MVT::v4f16)) 2035 return false; 2036 2037 // Optionally look past a bitcast. 2038 Extract = peekThroughBitcasts(Extract); 2039 if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR) 2040 return false; 2041 2042 // Match extract from start of high half index. 2043 // Example: v8i16 -> v4i16 means the extract must begin at index 4. 2044 unsigned ExtractIndex = Extract.getConstantOperandVal(1); 2045 if (ExtractIndex != Extract.getValueType().getVectorNumElements()) 2046 return false; 2047 2048 auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16; 2049 CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0)); 2050 return true; 2051 } 2052 2053 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc, 2054 SDValue &Opd0, unsigned &Immr, unsigned &Imms, 2055 unsigned NumberOfIgnoredLowBits = 0, 2056 bool BiggerPattern = false) { 2057 if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64) 2058 return false; 2059 2060 switch (N->getOpcode()) { 2061 default: 2062 if (!N->isMachineOpcode()) 2063 return false; 2064 break; 2065 case ISD::AND: 2066 return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms, 2067 NumberOfIgnoredLowBits, BiggerPattern); 2068 case ISD::SRL: 2069 case ISD::SRA: 2070 return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern); 2071 2072 case ISD::SIGN_EXTEND_INREG: 2073 return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms); 2074 } 2075 2076 unsigned NOpc = N->getMachineOpcode(); 2077 switch (NOpc) { 2078 default: 2079 return false; 2080 case AArch64::SBFMWri: 2081 case AArch64::UBFMWri: 2082 case AArch64::SBFMXri: 2083 case AArch64::UBFMXri: 2084 Opc = NOpc; 2085 Opd0 = N->getOperand(0); 2086 Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue(); 2087 Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue(); 2088 return true; 2089 } 2090 // Unreachable 2091 return false; 2092 } 2093 2094 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) { 2095 unsigned Opc, Immr, Imms; 2096 SDValue Opd0; 2097 if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms)) 2098 return false; 2099 2100 EVT VT = N->getValueType(0); 2101 SDLoc dl(N); 2102 2103 // If the bit extract operation is 64bit but the original type is 32bit, we 2104 // need to add one EXTRACT_SUBREG. 2105 if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) { 2106 SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64), 2107 CurDAG->getTargetConstant(Imms, dl, MVT::i64)}; 2108 2109 SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64); 2110 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32); 2111 ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl, 2112 MVT::i32, SDValue(BFM, 0), SubReg)); 2113 return true; 2114 } 2115 2116 SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT), 2117 CurDAG->getTargetConstant(Imms, dl, VT)}; 2118 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2119 return true; 2120 } 2121 2122 /// Does DstMask form a complementary pair with the mask provided by 2123 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking, 2124 /// this asks whether DstMask zeroes precisely those bits that will be set by 2125 /// the other half. 2126 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted, 2127 unsigned NumberOfIgnoredHighBits, EVT VT) { 2128 assert((VT == MVT::i32 || VT == MVT::i64) && 2129 "i32 or i64 mask type expected!"); 2130 unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits; 2131 2132 APInt SignificantDstMask = APInt(BitWidth, DstMask); 2133 APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth); 2134 2135 return (SignificantDstMask & SignificantBitsToBeInserted) == 0 && 2136 (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue(); 2137 } 2138 2139 // Look for bits that will be useful for later uses. 2140 // A bit is consider useless as soon as it is dropped and never used 2141 // before it as been dropped. 2142 // E.g., looking for useful bit of x 2143 // 1. y = x & 0x7 2144 // 2. z = y >> 2 2145 // After #1, x useful bits are 0x7, then the useful bits of x, live through 2146 // y. 2147 // After #2, the useful bits of x are 0x4. 2148 // However, if x is used on an unpredicatable instruction, then all its bits 2149 // are useful. 2150 // E.g. 2151 // 1. y = x & 0x7 2152 // 2. z = y >> 2 2153 // 3. str x, [@x] 2154 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0); 2155 2156 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits, 2157 unsigned Depth) { 2158 uint64_t Imm = 2159 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2160 Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth()); 2161 UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm); 2162 getUsefulBits(Op, UsefulBits, Depth + 1); 2163 } 2164 2165 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits, 2166 uint64_t Imm, uint64_t MSB, 2167 unsigned Depth) { 2168 // inherit the bitwidth value 2169 APInt OpUsefulBits(UsefulBits); 2170 OpUsefulBits = 1; 2171 2172 if (MSB >= Imm) { 2173 OpUsefulBits <<= MSB - Imm + 1; 2174 --OpUsefulBits; 2175 // The interesting part will be in the lower part of the result 2176 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2177 // The interesting part was starting at Imm in the argument 2178 OpUsefulBits <<= Imm; 2179 } else { 2180 OpUsefulBits <<= MSB + 1; 2181 --OpUsefulBits; 2182 // The interesting part will be shifted in the result 2183 OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm; 2184 getUsefulBits(Op, OpUsefulBits, Depth + 1); 2185 // The interesting part was at zero in the argument 2186 OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm); 2187 } 2188 2189 UsefulBits &= OpUsefulBits; 2190 } 2191 2192 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits, 2193 unsigned Depth) { 2194 uint64_t Imm = 2195 cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue(); 2196 uint64_t MSB = 2197 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2198 2199 getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth); 2200 } 2201 2202 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits, 2203 unsigned Depth) { 2204 uint64_t ShiftTypeAndValue = 2205 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2206 APInt Mask(UsefulBits); 2207 Mask.clearAllBits(); 2208 Mask.flipAllBits(); 2209 2210 if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) { 2211 // Shift Left 2212 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2213 Mask <<= ShiftAmt; 2214 getUsefulBits(Op, Mask, Depth + 1); 2215 Mask.lshrInPlace(ShiftAmt); 2216 } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) { 2217 // Shift Right 2218 // We do not handle AArch64_AM::ASR, because the sign will change the 2219 // number of useful bits 2220 uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue); 2221 Mask.lshrInPlace(ShiftAmt); 2222 getUsefulBits(Op, Mask, Depth + 1); 2223 Mask <<= ShiftAmt; 2224 } else 2225 return; 2226 2227 UsefulBits &= Mask; 2228 } 2229 2230 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits, 2231 unsigned Depth) { 2232 uint64_t Imm = 2233 cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue(); 2234 uint64_t MSB = 2235 cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue(); 2236 2237 APInt OpUsefulBits(UsefulBits); 2238 OpUsefulBits = 1; 2239 2240 APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0); 2241 ResultUsefulBits.flipAllBits(); 2242 APInt Mask(UsefulBits.getBitWidth(), 0); 2243 2244 getUsefulBits(Op, ResultUsefulBits, Depth + 1); 2245 2246 if (MSB >= Imm) { 2247 // The instruction is a BFXIL. 2248 uint64_t Width = MSB - Imm + 1; 2249 uint64_t LSB = Imm; 2250 2251 OpUsefulBits <<= Width; 2252 --OpUsefulBits; 2253 2254 if (Op.getOperand(1) == Orig) { 2255 // Copy the low bits from the result to bits starting from LSB. 2256 Mask = ResultUsefulBits & OpUsefulBits; 2257 Mask <<= LSB; 2258 } 2259 2260 if (Op.getOperand(0) == Orig) 2261 // Bits starting from LSB in the input contribute to the result. 2262 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2263 } else { 2264 // The instruction is a BFI. 2265 uint64_t Width = MSB + 1; 2266 uint64_t LSB = UsefulBits.getBitWidth() - Imm; 2267 2268 OpUsefulBits <<= Width; 2269 --OpUsefulBits; 2270 OpUsefulBits <<= LSB; 2271 2272 if (Op.getOperand(1) == Orig) { 2273 // Copy the bits from the result to the zero bits. 2274 Mask = ResultUsefulBits & OpUsefulBits; 2275 Mask.lshrInPlace(LSB); 2276 } 2277 2278 if (Op.getOperand(0) == Orig) 2279 Mask |= (ResultUsefulBits & ~OpUsefulBits); 2280 } 2281 2282 UsefulBits &= Mask; 2283 } 2284 2285 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits, 2286 SDValue Orig, unsigned Depth) { 2287 2288 // Users of this node should have already been instruction selected 2289 // FIXME: Can we turn that into an assert? 2290 if (!UserNode->isMachineOpcode()) 2291 return; 2292 2293 switch (UserNode->getMachineOpcode()) { 2294 default: 2295 return; 2296 case AArch64::ANDSWri: 2297 case AArch64::ANDSXri: 2298 case AArch64::ANDWri: 2299 case AArch64::ANDXri: 2300 // We increment Depth only when we call the getUsefulBits 2301 return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits, 2302 Depth); 2303 case AArch64::UBFMWri: 2304 case AArch64::UBFMXri: 2305 return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth); 2306 2307 case AArch64::ORRWrs: 2308 case AArch64::ORRXrs: 2309 if (UserNode->getOperand(1) != Orig) 2310 return; 2311 return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits, 2312 Depth); 2313 case AArch64::BFMWri: 2314 case AArch64::BFMXri: 2315 return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth); 2316 2317 case AArch64::STRBBui: 2318 case AArch64::STURBBi: 2319 if (UserNode->getOperand(0) != Orig) 2320 return; 2321 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff); 2322 return; 2323 2324 case AArch64::STRHHui: 2325 case AArch64::STURHHi: 2326 if (UserNode->getOperand(0) != Orig) 2327 return; 2328 UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff); 2329 return; 2330 } 2331 } 2332 2333 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) { 2334 if (Depth >= SelectionDAG::MaxRecursionDepth) 2335 return; 2336 // Initialize UsefulBits 2337 if (!Depth) { 2338 unsigned Bitwidth = Op.getScalarValueSizeInBits(); 2339 // At the beginning, assume every produced bits is useful 2340 UsefulBits = APInt(Bitwidth, 0); 2341 UsefulBits.flipAllBits(); 2342 } 2343 APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0); 2344 2345 for (SDNode *Node : Op.getNode()->uses()) { 2346 // A use cannot produce useful bits 2347 APInt UsefulBitsForUse = APInt(UsefulBits); 2348 getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth); 2349 UsersUsefulBits |= UsefulBitsForUse; 2350 } 2351 // UsefulBits contains the produced bits that are meaningful for the 2352 // current definition, thus a user cannot make a bit meaningful at 2353 // this point 2354 UsefulBits &= UsersUsefulBits; 2355 } 2356 2357 /// Create a machine node performing a notional SHL of Op by ShlAmount. If 2358 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is 2359 /// 0, return Op unchanged. 2360 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) { 2361 if (ShlAmount == 0) 2362 return Op; 2363 2364 EVT VT = Op.getValueType(); 2365 SDLoc dl(Op); 2366 unsigned BitWidth = VT.getSizeInBits(); 2367 unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri; 2368 2369 SDNode *ShiftNode; 2370 if (ShlAmount > 0) { 2371 // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt 2372 ShiftNode = CurDAG->getMachineNode( 2373 UBFMOpc, dl, VT, Op, 2374 CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT), 2375 CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT)); 2376 } else { 2377 // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1 2378 assert(ShlAmount < 0 && "expected right shift"); 2379 int ShrAmount = -ShlAmount; 2380 ShiftNode = CurDAG->getMachineNode( 2381 UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT), 2382 CurDAG->getTargetConstant(BitWidth - 1, dl, VT)); 2383 } 2384 2385 return SDValue(ShiftNode, 0); 2386 } 2387 2388 /// Does this tree qualify as an attempt to move a bitfield into position, 2389 /// essentially "(and (shl VAL, N), Mask)". 2390 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op, 2391 bool BiggerPattern, 2392 SDValue &Src, int &ShiftAmount, 2393 int &MaskWidth) { 2394 EVT VT = Op.getValueType(); 2395 unsigned BitWidth = VT.getSizeInBits(); 2396 (void)BitWidth; 2397 assert(BitWidth == 32 || BitWidth == 64); 2398 2399 KnownBits Known = CurDAG->computeKnownBits(Op); 2400 2401 // Non-zero in the sense that they're not provably zero, which is the key 2402 // point if we want to use this value 2403 uint64_t NonZeroBits = (~Known.Zero).getZExtValue(); 2404 2405 // Discard a constant AND mask if present. It's safe because the node will 2406 // already have been factored into the computeKnownBits calculation above. 2407 uint64_t AndImm; 2408 if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) { 2409 assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0); 2410 Op = Op.getOperand(0); 2411 } 2412 2413 // Don't match if the SHL has more than one use, since then we'll end up 2414 // generating SHL+UBFIZ instead of just keeping SHL+AND. 2415 if (!BiggerPattern && !Op.hasOneUse()) 2416 return false; 2417 2418 uint64_t ShlImm; 2419 if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm)) 2420 return false; 2421 Op = Op.getOperand(0); 2422 2423 if (!isShiftedMask_64(NonZeroBits)) 2424 return false; 2425 2426 ShiftAmount = countTrailingZeros(NonZeroBits); 2427 MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount); 2428 2429 // BFI encompasses sufficiently many nodes that it's worth inserting an extra 2430 // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL 2431 // amount. BiggerPattern is true when this pattern is being matched for BFI, 2432 // BiggerPattern is false when this pattern is being matched for UBFIZ, in 2433 // which case it is not profitable to insert an extra shift. 2434 if (ShlImm - ShiftAmount != 0 && !BiggerPattern) 2435 return false; 2436 Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount); 2437 2438 return true; 2439 } 2440 2441 static bool isShiftedMask(uint64_t Mask, EVT VT) { 2442 assert(VT == MVT::i32 || VT == MVT::i64); 2443 if (VT == MVT::i32) 2444 return isShiftedMask_32(Mask); 2445 return isShiftedMask_64(Mask); 2446 } 2447 2448 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being 2449 // inserted only sets known zero bits. 2450 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) { 2451 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2452 2453 EVT VT = N->getValueType(0); 2454 if (VT != MVT::i32 && VT != MVT::i64) 2455 return false; 2456 2457 unsigned BitWidth = VT.getSizeInBits(); 2458 2459 uint64_t OrImm; 2460 if (!isOpcWithIntImmediate(N, ISD::OR, OrImm)) 2461 return false; 2462 2463 // Skip this transformation if the ORR immediate can be encoded in the ORR. 2464 // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely 2465 // performance neutral. 2466 if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth)) 2467 return false; 2468 2469 uint64_t MaskImm; 2470 SDValue And = N->getOperand(0); 2471 // Must be a single use AND with an immediate operand. 2472 if (!And.hasOneUse() || 2473 !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm)) 2474 return false; 2475 2476 // Compute the Known Zero for the AND as this allows us to catch more general 2477 // cases than just looking for AND with imm. 2478 KnownBits Known = CurDAG->computeKnownBits(And); 2479 2480 // Non-zero in the sense that they're not provably zero, which is the key 2481 // point if we want to use this value. 2482 uint64_t NotKnownZero = (~Known.Zero).getZExtValue(); 2483 2484 // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00). 2485 if (!isShiftedMask(Known.Zero.getZExtValue(), VT)) 2486 return false; 2487 2488 // The bits being inserted must only set those bits that are known to be zero. 2489 if ((OrImm & NotKnownZero) != 0) { 2490 // FIXME: It's okay if the OrImm sets NotKnownZero bits to 1, but we don't 2491 // currently handle this case. 2492 return false; 2493 } 2494 2495 // BFI/BFXIL dst, src, #lsb, #width. 2496 int LSB = countTrailingOnes(NotKnownZero); 2497 int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation(); 2498 2499 // BFI/BFXIL is an alias of BFM, so translate to BFM operands. 2500 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2501 unsigned ImmS = Width - 1; 2502 2503 // If we're creating a BFI instruction avoid cases where we need more 2504 // instructions to materialize the BFI constant as compared to the original 2505 // ORR. A BFXIL will use the same constant as the original ORR, so the code 2506 // should be no worse in this case. 2507 bool IsBFI = LSB != 0; 2508 uint64_t BFIImm = OrImm >> LSB; 2509 if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) { 2510 // We have a BFI instruction and we know the constant can't be materialized 2511 // with a ORR-immediate with the zero register. 2512 unsigned OrChunks = 0, BFIChunks = 0; 2513 for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) { 2514 if (((OrImm >> Shift) & 0xFFFF) != 0) 2515 ++OrChunks; 2516 if (((BFIImm >> Shift) & 0xFFFF) != 0) 2517 ++BFIChunks; 2518 } 2519 if (BFIChunks > OrChunks) 2520 return false; 2521 } 2522 2523 // Materialize the constant to be inserted. 2524 SDLoc DL(N); 2525 unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm; 2526 SDNode *MOVI = CurDAG->getMachineNode( 2527 MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT)); 2528 2529 // Create the BFI/BFXIL instruction. 2530 SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0), 2531 CurDAG->getTargetConstant(ImmR, DL, VT), 2532 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2533 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2534 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2535 return true; 2536 } 2537 2538 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits, 2539 SelectionDAG *CurDAG) { 2540 assert(N->getOpcode() == ISD::OR && "Expect a OR operation"); 2541 2542 EVT VT = N->getValueType(0); 2543 if (VT != MVT::i32 && VT != MVT::i64) 2544 return false; 2545 2546 unsigned BitWidth = VT.getSizeInBits(); 2547 2548 // Because of simplify-demanded-bits in DAGCombine, involved masks may not 2549 // have the expected shape. Try to undo that. 2550 2551 unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros(); 2552 unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros(); 2553 2554 // Given a OR operation, check if we have the following pattern 2555 // ubfm c, b, imm, imm2 (or something that does the same jobs, see 2556 // isBitfieldExtractOp) 2557 // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and 2558 // countTrailingZeros(mask2) == imm2 - imm + 1 2559 // f = d | c 2560 // if yes, replace the OR instruction with: 2561 // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2 2562 2563 // OR is commutative, check all combinations of operand order and values of 2564 // BiggerPattern, i.e. 2565 // Opd0, Opd1, BiggerPattern=false 2566 // Opd1, Opd0, BiggerPattern=false 2567 // Opd0, Opd1, BiggerPattern=true 2568 // Opd1, Opd0, BiggerPattern=true 2569 // Several of these combinations may match, so check with BiggerPattern=false 2570 // first since that will produce better results by matching more instructions 2571 // and/or inserting fewer extra instructions. 2572 for (int I = 0; I < 4; ++I) { 2573 2574 SDValue Dst, Src; 2575 unsigned ImmR, ImmS; 2576 bool BiggerPattern = I / 2; 2577 SDValue OrOpd0Val = N->getOperand(I % 2); 2578 SDNode *OrOpd0 = OrOpd0Val.getNode(); 2579 SDValue OrOpd1Val = N->getOperand((I + 1) % 2); 2580 SDNode *OrOpd1 = OrOpd1Val.getNode(); 2581 2582 unsigned BFXOpc; 2583 int DstLSB, Width; 2584 if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS, 2585 NumberOfIgnoredLowBits, BiggerPattern)) { 2586 // Check that the returned opcode is compatible with the pattern, 2587 // i.e., same type and zero extended (U and not S) 2588 if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) || 2589 (BFXOpc != AArch64::UBFMWri && VT == MVT::i32)) 2590 continue; 2591 2592 // Compute the width of the bitfield insertion 2593 DstLSB = 0; 2594 Width = ImmS - ImmR + 1; 2595 // FIXME: This constraint is to catch bitfield insertion we may 2596 // want to widen the pattern if we want to grab general bitfied 2597 // move case 2598 if (Width <= 0) 2599 continue; 2600 2601 // If the mask on the insertee is correct, we have a BFXIL operation. We 2602 // can share the ImmR and ImmS values from the already-computed UBFM. 2603 } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val, 2604 BiggerPattern, 2605 Src, DstLSB, Width)) { 2606 ImmR = (BitWidth - DstLSB) % BitWidth; 2607 ImmS = Width - 1; 2608 } else 2609 continue; 2610 2611 // Check the second part of the pattern 2612 EVT VT = OrOpd1Val.getValueType(); 2613 assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand"); 2614 2615 // Compute the Known Zero for the candidate of the first operand. 2616 // This allows to catch more general case than just looking for 2617 // AND with imm. Indeed, simplify-demanded-bits may have removed 2618 // the AND instruction because it proves it was useless. 2619 KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val); 2620 2621 // Check if there is enough room for the second operand to appear 2622 // in the first one 2623 APInt BitsToBeInserted = 2624 APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width); 2625 2626 if ((BitsToBeInserted & ~Known.Zero) != 0) 2627 continue; 2628 2629 // Set the first operand 2630 uint64_t Imm; 2631 if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) && 2632 isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT)) 2633 // In that case, we can eliminate the AND 2634 Dst = OrOpd1->getOperand(0); 2635 else 2636 // Maybe the AND has been removed by simplify-demanded-bits 2637 // or is useful because it discards more bits 2638 Dst = OrOpd1Val; 2639 2640 // both parts match 2641 SDLoc DL(N); 2642 SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT), 2643 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2644 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2645 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2646 return true; 2647 } 2648 2649 // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff 2650 // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted 2651 // mask (e.g., 0x000ffff0). 2652 uint64_t Mask0Imm, Mask1Imm; 2653 SDValue And0 = N->getOperand(0); 2654 SDValue And1 = N->getOperand(1); 2655 if (And0.hasOneUse() && And1.hasOneUse() && 2656 isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) && 2657 isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) && 2658 APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) && 2659 (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) { 2660 2661 // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm), 2662 // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the 2663 // bits to be inserted. 2664 if (isShiftedMask(Mask0Imm, VT)) { 2665 std::swap(And0, And1); 2666 std::swap(Mask0Imm, Mask1Imm); 2667 } 2668 2669 SDValue Src = And1->getOperand(0); 2670 SDValue Dst = And0->getOperand(0); 2671 unsigned LSB = countTrailingZeros(Mask1Imm); 2672 int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation(); 2673 2674 // The BFXIL inserts the low-order bits from a source register, so right 2675 // shift the needed bits into place. 2676 SDLoc DL(N); 2677 unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2678 SDNode *LSR = CurDAG->getMachineNode( 2679 ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT), 2680 CurDAG->getTargetConstant(BitWidth - 1, DL, VT)); 2681 2682 // BFXIL is an alias of BFM, so translate to BFM operands. 2683 unsigned ImmR = (BitWidth - LSB) % BitWidth; 2684 unsigned ImmS = Width - 1; 2685 2686 // Create the BFXIL instruction. 2687 SDValue Ops[] = {Dst, SDValue(LSR, 0), 2688 CurDAG->getTargetConstant(ImmR, DL, VT), 2689 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2690 unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri; 2691 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2692 return true; 2693 } 2694 2695 return false; 2696 } 2697 2698 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) { 2699 if (N->getOpcode() != ISD::OR) 2700 return false; 2701 2702 APInt NUsefulBits; 2703 getUsefulBits(SDValue(N, 0), NUsefulBits); 2704 2705 // If all bits are not useful, just return UNDEF. 2706 if (!NUsefulBits) { 2707 CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0)); 2708 return true; 2709 } 2710 2711 if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG)) 2712 return true; 2713 2714 return tryBitfieldInsertOpFromOrAndImm(N, CurDAG); 2715 } 2716 2717 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the 2718 /// equivalent of a left shift by a constant amount followed by an and masking 2719 /// out a contiguous set of bits. 2720 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) { 2721 if (N->getOpcode() != ISD::AND) 2722 return false; 2723 2724 EVT VT = N->getValueType(0); 2725 if (VT != MVT::i32 && VT != MVT::i64) 2726 return false; 2727 2728 SDValue Op0; 2729 int DstLSB, Width; 2730 if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false, 2731 Op0, DstLSB, Width)) 2732 return false; 2733 2734 // ImmR is the rotate right amount. 2735 unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits(); 2736 // ImmS is the most significant bit of the source to be moved. 2737 unsigned ImmS = Width - 1; 2738 2739 SDLoc DL(N); 2740 SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT), 2741 CurDAG->getTargetConstant(ImmS, DL, VT)}; 2742 unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri; 2743 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2744 return true; 2745 } 2746 2747 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in 2748 /// variable shift/rotate instructions. 2749 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) { 2750 EVT VT = N->getValueType(0); 2751 2752 unsigned Opc; 2753 switch (N->getOpcode()) { 2754 case ISD::ROTR: 2755 Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr; 2756 break; 2757 case ISD::SHL: 2758 Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr; 2759 break; 2760 case ISD::SRL: 2761 Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr; 2762 break; 2763 case ISD::SRA: 2764 Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr; 2765 break; 2766 default: 2767 return false; 2768 } 2769 2770 uint64_t Size; 2771 uint64_t Bits; 2772 if (VT == MVT::i32) { 2773 Bits = 5; 2774 Size = 32; 2775 } else if (VT == MVT::i64) { 2776 Bits = 6; 2777 Size = 64; 2778 } else 2779 return false; 2780 2781 SDValue ShiftAmt = N->getOperand(1); 2782 SDLoc DL(N); 2783 SDValue NewShiftAmt; 2784 2785 // Skip over an extend of the shift amount. 2786 if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND || 2787 ShiftAmt->getOpcode() == ISD::ANY_EXTEND) 2788 ShiftAmt = ShiftAmt->getOperand(0); 2789 2790 if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { 2791 SDValue Add0 = ShiftAmt->getOperand(0); 2792 SDValue Add1 = ShiftAmt->getOperand(1); 2793 uint64_t Add0Imm; 2794 uint64_t Add1Imm; 2795 // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X 2796 // to avoid the ADD/SUB. 2797 if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0)) 2798 NewShiftAmt = Add0; 2799 // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to 2800 // generate a NEG instead of a SUB of a constant. 2801 else if (ShiftAmt->getOpcode() == ISD::SUB && 2802 isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 && 2803 (Add0Imm % Size == 0)) { 2804 unsigned NegOpc; 2805 unsigned ZeroReg; 2806 EVT SubVT = ShiftAmt->getValueType(0); 2807 if (SubVT == MVT::i32) { 2808 NegOpc = AArch64::SUBWrr; 2809 ZeroReg = AArch64::WZR; 2810 } else { 2811 assert(SubVT == MVT::i64); 2812 NegOpc = AArch64::SUBXrr; 2813 ZeroReg = AArch64::XZR; 2814 } 2815 SDValue Zero = 2816 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT); 2817 MachineSDNode *Neg = 2818 CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1); 2819 NewShiftAmt = SDValue(Neg, 0); 2820 } else 2821 return false; 2822 } else { 2823 // If the shift amount is masked with an AND, check that the mask covers the 2824 // bits that are implicitly ANDed off by the above opcodes and if so, skip 2825 // the AND. 2826 uint64_t MaskImm; 2827 if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) && 2828 !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm)) 2829 return false; 2830 2831 if (countTrailingOnes(MaskImm) < Bits) 2832 return false; 2833 2834 NewShiftAmt = ShiftAmt->getOperand(0); 2835 } 2836 2837 // Narrow/widen the shift amount to match the size of the shift operation. 2838 if (VT == MVT::i32) 2839 NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt); 2840 else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) { 2841 SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32); 2842 MachineSDNode *Ext = CurDAG->getMachineNode( 2843 AArch64::SUBREG_TO_REG, DL, VT, 2844 CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg); 2845 NewShiftAmt = SDValue(Ext, 0); 2846 } 2847 2848 SDValue Ops[] = {N->getOperand(0), NewShiftAmt}; 2849 CurDAG->SelectNodeTo(N, Opc, VT, Ops); 2850 return true; 2851 } 2852 2853 bool 2854 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, 2855 unsigned RegWidth) { 2856 APFloat FVal(0.0); 2857 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 2858 FVal = CN->getValueAPF(); 2859 else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) { 2860 // Some otherwise illegal constants are allowed in this case. 2861 if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow || 2862 !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1))) 2863 return false; 2864 2865 ConstantPoolSDNode *CN = 2866 dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)); 2867 FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF(); 2868 } else 2869 return false; 2870 2871 // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits 2872 // is between 1 and 32 for a destination w-register, or 1 and 64 for an 2873 // x-register. 2874 // 2875 // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we 2876 // want THIS_NODE to be 2^fbits. This is much easier to deal with using 2877 // integers. 2878 bool IsExact; 2879 2880 // fbits is between 1 and 64 in the worst-case, which means the fmul 2881 // could have 2^64 as an actual operand. Need 65 bits of precision. 2882 APSInt IntVal(65, true); 2883 FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact); 2884 2885 // N.b. isPowerOf2 also checks for > 0. 2886 if (!IsExact || !IntVal.isPowerOf2()) return false; 2887 unsigned FBits = IntVal.logBase2(); 2888 2889 // Checks above should have guaranteed that we haven't lost information in 2890 // finding FBits, but it must still be in range. 2891 if (FBits == 0 || FBits > RegWidth) return false; 2892 2893 FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32); 2894 return true; 2895 } 2896 2897 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields 2898 // of the string and obtains the integer values from them and combines these 2899 // into a single value to be used in the MRS/MSR instruction. 2900 static int getIntOperandFromRegisterString(StringRef RegString) { 2901 SmallVector<StringRef, 5> Fields; 2902 RegString.split(Fields, ':'); 2903 2904 if (Fields.size() == 1) 2905 return -1; 2906 2907 assert(Fields.size() == 5 2908 && "Invalid number of fields in read register string"); 2909 2910 SmallVector<int, 5> Ops; 2911 bool AllIntFields = true; 2912 2913 for (StringRef Field : Fields) { 2914 unsigned IntField; 2915 AllIntFields &= !Field.getAsInteger(10, IntField); 2916 Ops.push_back(IntField); 2917 } 2918 2919 assert(AllIntFields && 2920 "Unexpected non-integer value in special register string."); 2921 2922 // Need to combine the integer fields of the string into a single value 2923 // based on the bit encoding of MRS/MSR instruction. 2924 return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) | 2925 (Ops[3] << 3) | (Ops[4]); 2926 } 2927 2928 // Lower the read_register intrinsic to an MRS instruction node if the special 2929 // register string argument is either of the form detailed in the ALCE (the 2930 // form described in getIntOperandsFromRegsterString) or is a named register 2931 // known by the MRS SysReg mapper. 2932 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) { 2933 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2934 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2935 SDLoc DL(N); 2936 2937 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2938 if (Reg != -1) { 2939 ReplaceNode(N, CurDAG->getMachineNode( 2940 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2941 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2942 N->getOperand(0))); 2943 return true; 2944 } 2945 2946 // Use the sysreg mapper to map the remaining possible strings to the 2947 // value for the register to be used for the instruction operand. 2948 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 2949 if (TheReg && TheReg->Readable && 2950 TheReg->haveFeatures(Subtarget->getFeatureBits())) 2951 Reg = TheReg->Encoding; 2952 else 2953 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 2954 2955 if (Reg != -1) { 2956 ReplaceNode(N, CurDAG->getMachineNode( 2957 AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other, 2958 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2959 N->getOperand(0))); 2960 return true; 2961 } 2962 2963 if (RegString->getString() == "pc") { 2964 ReplaceNode(N, CurDAG->getMachineNode( 2965 AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other, 2966 CurDAG->getTargetConstant(0, DL, MVT::i32), 2967 N->getOperand(0))); 2968 return true; 2969 } 2970 2971 return false; 2972 } 2973 2974 // Lower the write_register intrinsic to an MSR instruction node if the special 2975 // register string argument is either of the form detailed in the ALCE (the 2976 // form described in getIntOperandsFromRegsterString) or is a named register 2977 // known by the MSR SysReg mapper. 2978 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) { 2979 const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1)); 2980 const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0)); 2981 SDLoc DL(N); 2982 2983 int Reg = getIntOperandFromRegisterString(RegString->getString()); 2984 if (Reg != -1) { 2985 ReplaceNode( 2986 N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other, 2987 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 2988 N->getOperand(2), N->getOperand(0))); 2989 return true; 2990 } 2991 2992 // Check if the register was one of those allowed as the pstatefield value in 2993 // the MSR (immediate) instruction. To accept the values allowed in the 2994 // pstatefield for the MSR (immediate) instruction, we also require that an 2995 // immediate value has been provided as an argument, we know that this is 2996 // the case as it has been ensured by semantic checking. 2997 auto PMapper = AArch64PState::lookupPStateByName(RegString->getString()); 2998 if (PMapper) { 2999 assert (isa<ConstantSDNode>(N->getOperand(2)) 3000 && "Expected a constant integer expression."); 3001 unsigned Reg = PMapper->Encoding; 3002 uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue(); 3003 unsigned State; 3004 if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) { 3005 assert(Immed < 2 && "Bad imm"); 3006 State = AArch64::MSRpstateImm1; 3007 } else { 3008 assert(Immed < 16 && "Bad imm"); 3009 State = AArch64::MSRpstateImm4; 3010 } 3011 ReplaceNode(N, CurDAG->getMachineNode( 3012 State, DL, MVT::Other, 3013 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3014 CurDAG->getTargetConstant(Immed, DL, MVT::i16), 3015 N->getOperand(0))); 3016 return true; 3017 } 3018 3019 // Use the sysreg mapper to attempt to map the remaining possible strings 3020 // to the value for the register to be used for the MSR (register) 3021 // instruction operand. 3022 auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString()); 3023 if (TheReg && TheReg->Writeable && 3024 TheReg->haveFeatures(Subtarget->getFeatureBits())) 3025 Reg = TheReg->Encoding; 3026 else 3027 Reg = AArch64SysReg::parseGenericRegister(RegString->getString()); 3028 if (Reg != -1) { 3029 ReplaceNode(N, CurDAG->getMachineNode( 3030 AArch64::MSR, DL, MVT::Other, 3031 CurDAG->getTargetConstant(Reg, DL, MVT::i32), 3032 N->getOperand(2), N->getOperand(0))); 3033 return true; 3034 } 3035 3036 return false; 3037 } 3038 3039 /// We've got special pseudo-instructions for these 3040 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) { 3041 unsigned Opcode; 3042 EVT MemTy = cast<MemSDNode>(N)->getMemoryVT(); 3043 3044 // Leave IR for LSE if subtarget supports it. 3045 if (Subtarget->hasLSE()) return false; 3046 3047 if (MemTy == MVT::i8) 3048 Opcode = AArch64::CMP_SWAP_8; 3049 else if (MemTy == MVT::i16) 3050 Opcode = AArch64::CMP_SWAP_16; 3051 else if (MemTy == MVT::i32) 3052 Opcode = AArch64::CMP_SWAP_32; 3053 else if (MemTy == MVT::i64) 3054 Opcode = AArch64::CMP_SWAP_64; 3055 else 3056 llvm_unreachable("Unknown AtomicCmpSwap type"); 3057 3058 MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32; 3059 SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), 3060 N->getOperand(0)}; 3061 SDNode *CmpSwap = CurDAG->getMachineNode( 3062 Opcode, SDLoc(N), 3063 CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops); 3064 3065 MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand(); 3066 CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp}); 3067 3068 ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); 3069 ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); 3070 CurDAG->RemoveDeadNode(N); 3071 3072 return true; 3073 } 3074 3075 bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base, 3076 SDValue &Offset) { 3077 auto C = dyn_cast<ConstantSDNode>(N); 3078 if (!C) 3079 return false; 3080 3081 auto Ty = N->getValueType(0); 3082 3083 int64_t Imm = C->getSExtValue(); 3084 SDLoc DL(N); 3085 3086 if ((Imm >= -128) && (Imm <= 127)) { 3087 Base = CurDAG->getTargetConstant(Imm, DL, Ty); 3088 Offset = CurDAG->getTargetConstant(0, DL, Ty); 3089 return true; 3090 } 3091 3092 if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) { 3093 Base = CurDAG->getTargetConstant(Imm/256, DL, Ty); 3094 Offset = CurDAG->getTargetConstant(8, DL, Ty); 3095 return true; 3096 } 3097 3098 return false; 3099 } 3100 3101 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) { 3102 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3103 const int64_t ImmVal = CNode->getSExtValue(); 3104 SDLoc DL(N); 3105 3106 switch (VT.SimpleTy) { 3107 case MVT::i8: 3108 // Can always select i8s, no shift, mask the immediate value to 3109 // deal with sign-extended value from lowering. 3110 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3111 Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i32); 3112 return true; 3113 case MVT::i16: 3114 // i16 values get sign-extended to 32-bits during lowering. 3115 if ((ImmVal & 0xFF) == ImmVal) { 3116 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3117 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3118 return true; 3119 } else if ((ImmVal & 0xFF) == 0) { 3120 assert((ImmVal >= -32768) && (ImmVal <= 32512)); 3121 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3122 Imm = CurDAG->getTargetConstant((ImmVal >> 8) & 0xFF, DL, MVT::i32); 3123 return true; 3124 } 3125 break; 3126 case MVT::i32: 3127 case MVT::i64: 3128 // Range of immediate won't trigger signedness problems for 32/64b. 3129 if ((ImmVal & 0xFF) == ImmVal) { 3130 Shift = CurDAG->getTargetConstant(0, DL, MVT::i32); 3131 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3132 return true; 3133 } else if ((ImmVal & 0xFF00) == ImmVal) { 3134 Shift = CurDAG->getTargetConstant(8, DL, MVT::i32); 3135 Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32); 3136 return true; 3137 } 3138 break; 3139 default: 3140 break; 3141 } 3142 } 3143 3144 return false; 3145 } 3146 3147 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) { 3148 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3149 int64_t ImmVal = CNode->getSExtValue(); 3150 SDLoc DL(N); 3151 if (ImmVal >= -128 && ImmVal < 128) { 3152 Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); 3153 return true; 3154 } 3155 } 3156 return false; 3157 } 3158 3159 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) { 3160 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3161 uint64_t ImmVal = CNode->getZExtValue(); 3162 3163 switch (VT.SimpleTy) { 3164 case MVT::i8: 3165 ImmVal &= 0xFF; 3166 break; 3167 case MVT::i16: 3168 ImmVal &= 0xFFFF; 3169 break; 3170 case MVT::i32: 3171 ImmVal &= 0xFFFFFFFF; 3172 break; 3173 case MVT::i64: 3174 break; 3175 default: 3176 llvm_unreachable("Unexpected type"); 3177 } 3178 3179 if (ImmVal < 256) { 3180 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3181 return true; 3182 } 3183 } 3184 return false; 3185 } 3186 3187 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, 3188 bool Invert) { 3189 if (auto CNode = dyn_cast<ConstantSDNode>(N)) { 3190 uint64_t ImmVal = CNode->getZExtValue(); 3191 SDLoc DL(N); 3192 3193 if (Invert) 3194 ImmVal = ~ImmVal; 3195 3196 // Shift mask depending on type size. 3197 switch (VT.SimpleTy) { 3198 case MVT::i8: 3199 ImmVal &= 0xFF; 3200 ImmVal |= ImmVal << 8; 3201 ImmVal |= ImmVal << 16; 3202 ImmVal |= ImmVal << 32; 3203 break; 3204 case MVT::i16: 3205 ImmVal &= 0xFFFF; 3206 ImmVal |= ImmVal << 16; 3207 ImmVal |= ImmVal << 32; 3208 break; 3209 case MVT::i32: 3210 ImmVal &= 0xFFFFFFFF; 3211 ImmVal |= ImmVal << 32; 3212 break; 3213 case MVT::i64: 3214 break; 3215 default: 3216 llvm_unreachable("Unexpected type"); 3217 } 3218 3219 uint64_t encoding; 3220 if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) { 3221 Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64); 3222 return true; 3223 } 3224 } 3225 return false; 3226 } 3227 3228 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth. 3229 // Rather than attempt to normalise everything we can sometimes saturate the 3230 // shift amount during selection. This function also allows for consistent 3231 // isel patterns by ensuring the resulting "Imm" node is of the i32 type 3232 // required by the instructions. 3233 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, 3234 uint64_t High, bool AllowSaturation, 3235 SDValue &Imm) { 3236 if (auto *CN = dyn_cast<ConstantSDNode>(N)) { 3237 uint64_t ImmVal = CN->getZExtValue(); 3238 3239 // Reject shift amounts that are too small. 3240 if (ImmVal < Low) 3241 return false; 3242 3243 // Reject or saturate shift amounts that are too big. 3244 if (ImmVal > High) { 3245 if (!AllowSaturation) 3246 return false; 3247 ImmVal = High; 3248 } 3249 3250 Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); 3251 return true; 3252 } 3253 3254 return false; 3255 } 3256 3257 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) { 3258 // tagp(FrameIndex, IRGstack, tag_offset): 3259 // since the offset between FrameIndex and IRGstack is a compile-time 3260 // constant, this can be lowered to a single ADDG instruction. 3261 if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) { 3262 return false; 3263 } 3264 3265 SDValue IRG_SP = N->getOperand(2); 3266 if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN || 3267 cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() != 3268 Intrinsic::aarch64_irg_sp) { 3269 return false; 3270 } 3271 3272 const TargetLowering *TLI = getTargetLowering(); 3273 SDLoc DL(N); 3274 int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex(); 3275 SDValue FiOp = CurDAG->getTargetFrameIndex( 3276 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3277 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3278 3279 SDNode *Out = CurDAG->getMachineNode( 3280 AArch64::TAGPstack, DL, MVT::i64, 3281 {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2), 3282 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3283 ReplaceNode(N, Out); 3284 return true; 3285 } 3286 3287 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) { 3288 assert(isa<ConstantSDNode>(N->getOperand(3)) && 3289 "llvm.aarch64.tagp third argument must be an immediate"); 3290 if (trySelectStackSlotTagP(N)) 3291 return; 3292 // FIXME: above applies in any case when offset between Op1 and Op2 is a 3293 // compile-time constant, not just for stack allocations. 3294 3295 // General case for unrelated pointers in Op1 and Op2. 3296 SDLoc DL(N); 3297 int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue(); 3298 SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64, 3299 {N->getOperand(1), N->getOperand(2)}); 3300 SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64, 3301 {SDValue(N1, 0), N->getOperand(2)}); 3302 SDNode *N3 = CurDAG->getMachineNode( 3303 AArch64::ADDG, DL, MVT::i64, 3304 {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64), 3305 CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)}); 3306 ReplaceNode(N, N3); 3307 } 3308 3309 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length 3310 // vector types larger than NEON don't have a matching SubRegIndex. 3311 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3312 assert(V.getValueType().isScalableVector() && 3313 V.getValueType().getSizeInBits().getKnownMinSize() == 3314 AArch64::SVEBitsPerBlock && 3315 "Expected to extract from a packed scalable vector!"); 3316 assert(VT.isFixedLengthVector() && 3317 "Expected to extract a fixed length vector!"); 3318 3319 SDLoc DL(V); 3320 switch (VT.getSizeInBits()) { 3321 case 64: { 3322 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3323 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3324 } 3325 case 128: { 3326 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3327 return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg); 3328 } 3329 default: { 3330 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3331 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3332 } 3333 } 3334 } 3335 3336 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length 3337 // vector types larger than NEON don't have a matching SubRegIndex. 3338 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) { 3339 assert(VT.isScalableVector() && 3340 VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock && 3341 "Expected to insert into a packed scalable vector!"); 3342 assert(V.getValueType().isFixedLengthVector() && 3343 "Expected to insert a fixed length vector!"); 3344 3345 SDLoc DL(V); 3346 switch (V.getValueType().getSizeInBits()) { 3347 case 64: { 3348 auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32); 3349 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3350 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3351 SDValue(Container, 0), V, SubReg); 3352 } 3353 case 128: { 3354 auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32); 3355 auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT); 3356 return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT, 3357 SDValue(Container, 0), V, SubReg); 3358 } 3359 default: { 3360 auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64); 3361 return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC); 3362 } 3363 } 3364 } 3365 3366 void AArch64DAGToDAGISel::Select(SDNode *Node) { 3367 // If we have a custom node, we already have selected! 3368 if (Node->isMachineOpcode()) { 3369 LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n"); 3370 Node->setNodeId(-1); 3371 return; 3372 } 3373 3374 // Few custom selection stuff. 3375 EVT VT = Node->getValueType(0); 3376 3377 switch (Node->getOpcode()) { 3378 default: 3379 break; 3380 3381 case ISD::ATOMIC_CMP_SWAP: 3382 if (SelectCMP_SWAP(Node)) 3383 return; 3384 break; 3385 3386 case ISD::READ_REGISTER: 3387 if (tryReadRegister(Node)) 3388 return; 3389 break; 3390 3391 case ISD::WRITE_REGISTER: 3392 if (tryWriteRegister(Node)) 3393 return; 3394 break; 3395 3396 case ISD::ADD: 3397 if (tryMLAV64LaneV128(Node)) 3398 return; 3399 break; 3400 3401 case ISD::LOAD: { 3402 // Try to select as an indexed load. Fall through to normal processing 3403 // if we can't. 3404 if (tryIndexedLoad(Node)) 3405 return; 3406 break; 3407 } 3408 3409 case ISD::SRL: 3410 case ISD::AND: 3411 case ISD::SRA: 3412 case ISD::SIGN_EXTEND_INREG: 3413 if (tryBitfieldExtractOp(Node)) 3414 return; 3415 if (tryBitfieldInsertInZeroOp(Node)) 3416 return; 3417 LLVM_FALLTHROUGH; 3418 case ISD::ROTR: 3419 case ISD::SHL: 3420 if (tryShiftAmountMod(Node)) 3421 return; 3422 break; 3423 3424 case ISD::SIGN_EXTEND: 3425 if (tryBitfieldExtractOpFromSExt(Node)) 3426 return; 3427 break; 3428 3429 case ISD::FP_EXTEND: 3430 if (tryHighFPExt(Node)) 3431 return; 3432 break; 3433 3434 case ISD::OR: 3435 if (tryBitfieldInsertOp(Node)) 3436 return; 3437 break; 3438 3439 case ISD::EXTRACT_SUBVECTOR: { 3440 // Bail when not a "cast" like extract_subvector. 3441 if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0) 3442 break; 3443 3444 // Bail when normal isel can do the job. 3445 EVT InVT = Node->getOperand(0).getValueType(); 3446 if (VT.isScalableVector() || InVT.isFixedLengthVector()) 3447 break; 3448 3449 // NOTE: We can only get here when doing fixed length SVE code generation. 3450 // We do manual selection because the types involved are not linked to real 3451 // registers (despite being legal) and must be coerced into SVE registers. 3452 // 3453 // NOTE: If the above changes, be aware that selection will still not work 3454 // because the td definition of extract_vector does not support extracting 3455 // a fixed length vector from a scalable vector. 3456 3457 ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0))); 3458 return; 3459 } 3460 3461 case ISD::INSERT_SUBVECTOR: { 3462 // Bail when not a "cast" like insert_subvector. 3463 if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0) 3464 break; 3465 if (!Node->getOperand(0).isUndef()) 3466 break; 3467 3468 // Bail when normal isel should do the job. 3469 EVT InVT = Node->getOperand(1).getValueType(); 3470 if (VT.isFixedLengthVector() || InVT.isScalableVector()) 3471 break; 3472 3473 // NOTE: We can only get here when doing fixed length SVE code generation. 3474 // We do manual selection because the types involved are not linked to real 3475 // registers (despite being legal) and must be coerced into SVE registers. 3476 // 3477 // NOTE: If the above changes, be aware that selection will still not work 3478 // because the td definition of insert_vector does not support inserting a 3479 // fixed length vector into a scalable vector. 3480 3481 ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1))); 3482 return; 3483 } 3484 3485 case ISD::Constant: { 3486 // Materialize zero constants as copies from WZR/XZR. This allows 3487 // the coalescer to propagate these into other instructions. 3488 ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node); 3489 if (ConstNode->isNullValue()) { 3490 if (VT == MVT::i32) { 3491 SDValue New = CurDAG->getCopyFromReg( 3492 CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32); 3493 ReplaceNode(Node, New.getNode()); 3494 return; 3495 } else if (VT == MVT::i64) { 3496 SDValue New = CurDAG->getCopyFromReg( 3497 CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64); 3498 ReplaceNode(Node, New.getNode()); 3499 return; 3500 } 3501 } 3502 break; 3503 } 3504 3505 case ISD::FrameIndex: { 3506 // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm. 3507 int FI = cast<FrameIndexSDNode>(Node)->getIndex(); 3508 unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0); 3509 const TargetLowering *TLI = getTargetLowering(); 3510 SDValue TFI = CurDAG->getTargetFrameIndex( 3511 FI, TLI->getPointerTy(CurDAG->getDataLayout())); 3512 SDLoc DL(Node); 3513 SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32), 3514 CurDAG->getTargetConstant(Shifter, DL, MVT::i32) }; 3515 CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops); 3516 return; 3517 } 3518 case ISD::INTRINSIC_W_CHAIN: { 3519 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3520 switch (IntNo) { 3521 default: 3522 break; 3523 case Intrinsic::aarch64_ldaxp: 3524 case Intrinsic::aarch64_ldxp: { 3525 unsigned Op = 3526 IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX; 3527 SDValue MemAddr = Node->getOperand(2); 3528 SDLoc DL(Node); 3529 SDValue Chain = Node->getOperand(0); 3530 3531 SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64, 3532 MVT::Other, MemAddr, Chain); 3533 3534 // Transfer memoperands. 3535 MachineMemOperand *MemOp = 3536 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3537 CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp}); 3538 ReplaceNode(Node, Ld); 3539 return; 3540 } 3541 case Intrinsic::aarch64_stlxp: 3542 case Intrinsic::aarch64_stxp: { 3543 unsigned Op = 3544 IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX; 3545 SDLoc DL(Node); 3546 SDValue Chain = Node->getOperand(0); 3547 SDValue ValLo = Node->getOperand(2); 3548 SDValue ValHi = Node->getOperand(3); 3549 SDValue MemAddr = Node->getOperand(4); 3550 3551 // Place arguments in the right order. 3552 SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain}; 3553 3554 SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops); 3555 // Transfer memoperands. 3556 MachineMemOperand *MemOp = 3557 cast<MemIntrinsicSDNode>(Node)->getMemOperand(); 3558 CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp}); 3559 3560 ReplaceNode(Node, St); 3561 return; 3562 } 3563 case Intrinsic::aarch64_neon_ld1x2: 3564 if (VT == MVT::v8i8) { 3565 SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0); 3566 return; 3567 } else if (VT == MVT::v16i8) { 3568 SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0); 3569 return; 3570 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3571 SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0); 3572 return; 3573 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3574 SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0); 3575 return; 3576 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3577 SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0); 3578 return; 3579 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3580 SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0); 3581 return; 3582 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3583 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3584 return; 3585 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3586 SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0); 3587 return; 3588 } 3589 break; 3590 case Intrinsic::aarch64_neon_ld1x3: 3591 if (VT == MVT::v8i8) { 3592 SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0); 3593 return; 3594 } else if (VT == MVT::v16i8) { 3595 SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0); 3596 return; 3597 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3598 SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0); 3599 return; 3600 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3601 SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0); 3602 return; 3603 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3604 SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0); 3605 return; 3606 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3607 SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0); 3608 return; 3609 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3610 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3611 return; 3612 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3613 SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0); 3614 return; 3615 } 3616 break; 3617 case Intrinsic::aarch64_neon_ld1x4: 3618 if (VT == MVT::v8i8) { 3619 SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0); 3620 return; 3621 } else if (VT == MVT::v16i8) { 3622 SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0); 3623 return; 3624 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3625 SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0); 3626 return; 3627 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3628 SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0); 3629 return; 3630 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3631 SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0); 3632 return; 3633 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3634 SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0); 3635 return; 3636 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3637 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3638 return; 3639 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3640 SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0); 3641 return; 3642 } 3643 break; 3644 case Intrinsic::aarch64_neon_ld2: 3645 if (VT == MVT::v8i8) { 3646 SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0); 3647 return; 3648 } else if (VT == MVT::v16i8) { 3649 SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0); 3650 return; 3651 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3652 SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0); 3653 return; 3654 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3655 SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0); 3656 return; 3657 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3658 SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0); 3659 return; 3660 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3661 SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0); 3662 return; 3663 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3664 SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0); 3665 return; 3666 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3667 SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0); 3668 return; 3669 } 3670 break; 3671 case Intrinsic::aarch64_neon_ld3: 3672 if (VT == MVT::v8i8) { 3673 SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0); 3674 return; 3675 } else if (VT == MVT::v16i8) { 3676 SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0); 3677 return; 3678 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3679 SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0); 3680 return; 3681 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3682 SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0); 3683 return; 3684 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3685 SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0); 3686 return; 3687 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3688 SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0); 3689 return; 3690 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3691 SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0); 3692 return; 3693 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3694 SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0); 3695 return; 3696 } 3697 break; 3698 case Intrinsic::aarch64_neon_ld4: 3699 if (VT == MVT::v8i8) { 3700 SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0); 3701 return; 3702 } else if (VT == MVT::v16i8) { 3703 SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0); 3704 return; 3705 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3706 SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0); 3707 return; 3708 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3709 SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0); 3710 return; 3711 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3712 SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0); 3713 return; 3714 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3715 SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0); 3716 return; 3717 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3718 SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0); 3719 return; 3720 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3721 SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0); 3722 return; 3723 } 3724 break; 3725 case Intrinsic::aarch64_neon_ld2r: 3726 if (VT == MVT::v8i8) { 3727 SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0); 3728 return; 3729 } else if (VT == MVT::v16i8) { 3730 SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0); 3731 return; 3732 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3733 SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0); 3734 return; 3735 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3736 SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0); 3737 return; 3738 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3739 SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0); 3740 return; 3741 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3742 SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0); 3743 return; 3744 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3745 SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0); 3746 return; 3747 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3748 SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0); 3749 return; 3750 } 3751 break; 3752 case Intrinsic::aarch64_neon_ld3r: 3753 if (VT == MVT::v8i8) { 3754 SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0); 3755 return; 3756 } else if (VT == MVT::v16i8) { 3757 SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0); 3758 return; 3759 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3760 SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0); 3761 return; 3762 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3763 SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0); 3764 return; 3765 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3766 SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0); 3767 return; 3768 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3769 SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0); 3770 return; 3771 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3772 SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0); 3773 return; 3774 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3775 SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0); 3776 return; 3777 } 3778 break; 3779 case Intrinsic::aarch64_neon_ld4r: 3780 if (VT == MVT::v8i8) { 3781 SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0); 3782 return; 3783 } else if (VT == MVT::v16i8) { 3784 SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0); 3785 return; 3786 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 3787 SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0); 3788 return; 3789 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 3790 SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0); 3791 return; 3792 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3793 SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0); 3794 return; 3795 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3796 SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0); 3797 return; 3798 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3799 SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0); 3800 return; 3801 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3802 SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0); 3803 return; 3804 } 3805 break; 3806 case Intrinsic::aarch64_neon_ld2lane: 3807 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3808 SelectLoadLane(Node, 2, AArch64::LD2i8); 3809 return; 3810 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3811 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3812 SelectLoadLane(Node, 2, AArch64::LD2i16); 3813 return; 3814 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3815 VT == MVT::v2f32) { 3816 SelectLoadLane(Node, 2, AArch64::LD2i32); 3817 return; 3818 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3819 VT == MVT::v1f64) { 3820 SelectLoadLane(Node, 2, AArch64::LD2i64); 3821 return; 3822 } 3823 break; 3824 case Intrinsic::aarch64_neon_ld3lane: 3825 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3826 SelectLoadLane(Node, 3, AArch64::LD3i8); 3827 return; 3828 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3829 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3830 SelectLoadLane(Node, 3, AArch64::LD3i16); 3831 return; 3832 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3833 VT == MVT::v2f32) { 3834 SelectLoadLane(Node, 3, AArch64::LD3i32); 3835 return; 3836 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3837 VT == MVT::v1f64) { 3838 SelectLoadLane(Node, 3, AArch64::LD3i64); 3839 return; 3840 } 3841 break; 3842 case Intrinsic::aarch64_neon_ld4lane: 3843 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 3844 SelectLoadLane(Node, 4, AArch64::LD4i8); 3845 return; 3846 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 3847 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 3848 SelectLoadLane(Node, 4, AArch64::LD4i16); 3849 return; 3850 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 3851 VT == MVT::v2f32) { 3852 SelectLoadLane(Node, 4, AArch64::LD4i32); 3853 return; 3854 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 3855 VT == MVT::v1f64) { 3856 SelectLoadLane(Node, 4, AArch64::LD4i64); 3857 return; 3858 } 3859 break; 3860 case Intrinsic::aarch64_ld64b: 3861 SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0); 3862 return; 3863 } 3864 } break; 3865 case ISD::INTRINSIC_WO_CHAIN: { 3866 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue(); 3867 switch (IntNo) { 3868 default: 3869 break; 3870 case Intrinsic::aarch64_tagp: 3871 SelectTagP(Node); 3872 return; 3873 case Intrinsic::aarch64_neon_tbl2: 3874 SelectTable(Node, 2, 3875 VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two, 3876 false); 3877 return; 3878 case Intrinsic::aarch64_neon_tbl3: 3879 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three 3880 : AArch64::TBLv16i8Three, 3881 false); 3882 return; 3883 case Intrinsic::aarch64_neon_tbl4: 3884 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four 3885 : AArch64::TBLv16i8Four, 3886 false); 3887 return; 3888 case Intrinsic::aarch64_neon_tbx2: 3889 SelectTable(Node, 2, 3890 VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two, 3891 true); 3892 return; 3893 case Intrinsic::aarch64_neon_tbx3: 3894 SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three 3895 : AArch64::TBXv16i8Three, 3896 true); 3897 return; 3898 case Intrinsic::aarch64_neon_tbx4: 3899 SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four 3900 : AArch64::TBXv16i8Four, 3901 true); 3902 return; 3903 case Intrinsic::aarch64_neon_smull: 3904 case Intrinsic::aarch64_neon_umull: 3905 if (tryMULLV64LaneV128(IntNo, Node)) 3906 return; 3907 break; 3908 case Intrinsic::swift_async_context_addr: { 3909 SDLoc DL(Node); 3910 CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64, 3911 CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, 3912 AArch64::FP, MVT::i64), 3913 CurDAG->getTargetConstant(8, DL, MVT::i32), 3914 CurDAG->getTargetConstant(0, DL, MVT::i32)); 3915 auto &MF = CurDAG->getMachineFunction(); 3916 MF.getFrameInfo().setFrameAddressIsTaken(true); 3917 MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true); 3918 return; 3919 } 3920 } 3921 break; 3922 } 3923 case ISD::INTRINSIC_VOID: { 3924 unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue(); 3925 if (Node->getNumOperands() >= 3) 3926 VT = Node->getOperand(2)->getValueType(0); 3927 switch (IntNo) { 3928 default: 3929 break; 3930 case Intrinsic::aarch64_neon_st1x2: { 3931 if (VT == MVT::v8i8) { 3932 SelectStore(Node, 2, AArch64::ST1Twov8b); 3933 return; 3934 } else if (VT == MVT::v16i8) { 3935 SelectStore(Node, 2, AArch64::ST1Twov16b); 3936 return; 3937 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3938 VT == MVT::v4bf16) { 3939 SelectStore(Node, 2, AArch64::ST1Twov4h); 3940 return; 3941 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3942 VT == MVT::v8bf16) { 3943 SelectStore(Node, 2, AArch64::ST1Twov8h); 3944 return; 3945 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3946 SelectStore(Node, 2, AArch64::ST1Twov2s); 3947 return; 3948 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3949 SelectStore(Node, 2, AArch64::ST1Twov4s); 3950 return; 3951 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3952 SelectStore(Node, 2, AArch64::ST1Twov2d); 3953 return; 3954 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3955 SelectStore(Node, 2, AArch64::ST1Twov1d); 3956 return; 3957 } 3958 break; 3959 } 3960 case Intrinsic::aarch64_neon_st1x3: { 3961 if (VT == MVT::v8i8) { 3962 SelectStore(Node, 3, AArch64::ST1Threev8b); 3963 return; 3964 } else if (VT == MVT::v16i8) { 3965 SelectStore(Node, 3, AArch64::ST1Threev16b); 3966 return; 3967 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3968 VT == MVT::v4bf16) { 3969 SelectStore(Node, 3, AArch64::ST1Threev4h); 3970 return; 3971 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 3972 VT == MVT::v8bf16) { 3973 SelectStore(Node, 3, AArch64::ST1Threev8h); 3974 return; 3975 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 3976 SelectStore(Node, 3, AArch64::ST1Threev2s); 3977 return; 3978 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 3979 SelectStore(Node, 3, AArch64::ST1Threev4s); 3980 return; 3981 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 3982 SelectStore(Node, 3, AArch64::ST1Threev2d); 3983 return; 3984 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 3985 SelectStore(Node, 3, AArch64::ST1Threev1d); 3986 return; 3987 } 3988 break; 3989 } 3990 case Intrinsic::aarch64_neon_st1x4: { 3991 if (VT == MVT::v8i8) { 3992 SelectStore(Node, 4, AArch64::ST1Fourv8b); 3993 return; 3994 } else if (VT == MVT::v16i8) { 3995 SelectStore(Node, 4, AArch64::ST1Fourv16b); 3996 return; 3997 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 3998 VT == MVT::v4bf16) { 3999 SelectStore(Node, 4, AArch64::ST1Fourv4h); 4000 return; 4001 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4002 VT == MVT::v8bf16) { 4003 SelectStore(Node, 4, AArch64::ST1Fourv8h); 4004 return; 4005 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4006 SelectStore(Node, 4, AArch64::ST1Fourv2s); 4007 return; 4008 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4009 SelectStore(Node, 4, AArch64::ST1Fourv4s); 4010 return; 4011 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4012 SelectStore(Node, 4, AArch64::ST1Fourv2d); 4013 return; 4014 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4015 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4016 return; 4017 } 4018 break; 4019 } 4020 case Intrinsic::aarch64_neon_st2: { 4021 if (VT == MVT::v8i8) { 4022 SelectStore(Node, 2, AArch64::ST2Twov8b); 4023 return; 4024 } else if (VT == MVT::v16i8) { 4025 SelectStore(Node, 2, AArch64::ST2Twov16b); 4026 return; 4027 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4028 VT == MVT::v4bf16) { 4029 SelectStore(Node, 2, AArch64::ST2Twov4h); 4030 return; 4031 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4032 VT == MVT::v8bf16) { 4033 SelectStore(Node, 2, AArch64::ST2Twov8h); 4034 return; 4035 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4036 SelectStore(Node, 2, AArch64::ST2Twov2s); 4037 return; 4038 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4039 SelectStore(Node, 2, AArch64::ST2Twov4s); 4040 return; 4041 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4042 SelectStore(Node, 2, AArch64::ST2Twov2d); 4043 return; 4044 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4045 SelectStore(Node, 2, AArch64::ST1Twov1d); 4046 return; 4047 } 4048 break; 4049 } 4050 case Intrinsic::aarch64_neon_st3: { 4051 if (VT == MVT::v8i8) { 4052 SelectStore(Node, 3, AArch64::ST3Threev8b); 4053 return; 4054 } else if (VT == MVT::v16i8) { 4055 SelectStore(Node, 3, AArch64::ST3Threev16b); 4056 return; 4057 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4058 VT == MVT::v4bf16) { 4059 SelectStore(Node, 3, AArch64::ST3Threev4h); 4060 return; 4061 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4062 VT == MVT::v8bf16) { 4063 SelectStore(Node, 3, AArch64::ST3Threev8h); 4064 return; 4065 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4066 SelectStore(Node, 3, AArch64::ST3Threev2s); 4067 return; 4068 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4069 SelectStore(Node, 3, AArch64::ST3Threev4s); 4070 return; 4071 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4072 SelectStore(Node, 3, AArch64::ST3Threev2d); 4073 return; 4074 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4075 SelectStore(Node, 3, AArch64::ST1Threev1d); 4076 return; 4077 } 4078 break; 4079 } 4080 case Intrinsic::aarch64_neon_st4: { 4081 if (VT == MVT::v8i8) { 4082 SelectStore(Node, 4, AArch64::ST4Fourv8b); 4083 return; 4084 } else if (VT == MVT::v16i8) { 4085 SelectStore(Node, 4, AArch64::ST4Fourv16b); 4086 return; 4087 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || 4088 VT == MVT::v4bf16) { 4089 SelectStore(Node, 4, AArch64::ST4Fourv4h); 4090 return; 4091 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || 4092 VT == MVT::v8bf16) { 4093 SelectStore(Node, 4, AArch64::ST4Fourv8h); 4094 return; 4095 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4096 SelectStore(Node, 4, AArch64::ST4Fourv2s); 4097 return; 4098 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4099 SelectStore(Node, 4, AArch64::ST4Fourv4s); 4100 return; 4101 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4102 SelectStore(Node, 4, AArch64::ST4Fourv2d); 4103 return; 4104 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4105 SelectStore(Node, 4, AArch64::ST1Fourv1d); 4106 return; 4107 } 4108 break; 4109 } 4110 case Intrinsic::aarch64_neon_st2lane: { 4111 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4112 SelectStoreLane(Node, 2, AArch64::ST2i8); 4113 return; 4114 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4115 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4116 SelectStoreLane(Node, 2, AArch64::ST2i16); 4117 return; 4118 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4119 VT == MVT::v2f32) { 4120 SelectStoreLane(Node, 2, AArch64::ST2i32); 4121 return; 4122 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4123 VT == MVT::v1f64) { 4124 SelectStoreLane(Node, 2, AArch64::ST2i64); 4125 return; 4126 } 4127 break; 4128 } 4129 case Intrinsic::aarch64_neon_st3lane: { 4130 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4131 SelectStoreLane(Node, 3, AArch64::ST3i8); 4132 return; 4133 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4134 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4135 SelectStoreLane(Node, 3, AArch64::ST3i16); 4136 return; 4137 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4138 VT == MVT::v2f32) { 4139 SelectStoreLane(Node, 3, AArch64::ST3i32); 4140 return; 4141 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4142 VT == MVT::v1f64) { 4143 SelectStoreLane(Node, 3, AArch64::ST3i64); 4144 return; 4145 } 4146 break; 4147 } 4148 case Intrinsic::aarch64_neon_st4lane: { 4149 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4150 SelectStoreLane(Node, 4, AArch64::ST4i8); 4151 return; 4152 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4153 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4154 SelectStoreLane(Node, 4, AArch64::ST4i16); 4155 return; 4156 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4157 VT == MVT::v2f32) { 4158 SelectStoreLane(Node, 4, AArch64::ST4i32); 4159 return; 4160 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4161 VT == MVT::v1f64) { 4162 SelectStoreLane(Node, 4, AArch64::ST4i64); 4163 return; 4164 } 4165 break; 4166 } 4167 case Intrinsic::aarch64_sve_st2: { 4168 if (VT == MVT::nxv16i8) { 4169 SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM); 4170 return; 4171 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4172 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4173 SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM); 4174 return; 4175 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4176 SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM); 4177 return; 4178 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4179 SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM); 4180 return; 4181 } 4182 break; 4183 } 4184 case Intrinsic::aarch64_sve_st3: { 4185 if (VT == MVT::nxv16i8) { 4186 SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM); 4187 return; 4188 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4189 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4190 SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM); 4191 return; 4192 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4193 SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM); 4194 return; 4195 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4196 SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM); 4197 return; 4198 } 4199 break; 4200 } 4201 case Intrinsic::aarch64_sve_st4: { 4202 if (VT == MVT::nxv16i8) { 4203 SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM); 4204 return; 4205 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4206 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4207 SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM); 4208 return; 4209 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4210 SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM); 4211 return; 4212 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4213 SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM); 4214 return; 4215 } 4216 break; 4217 } 4218 } 4219 break; 4220 } 4221 case AArch64ISD::LD2post: { 4222 if (VT == MVT::v8i8) { 4223 SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0); 4224 return; 4225 } else if (VT == MVT::v16i8) { 4226 SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0); 4227 return; 4228 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4229 SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0); 4230 return; 4231 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4232 SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0); 4233 return; 4234 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4235 SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0); 4236 return; 4237 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4238 SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0); 4239 return; 4240 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4241 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4242 return; 4243 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4244 SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0); 4245 return; 4246 } 4247 break; 4248 } 4249 case AArch64ISD::LD3post: { 4250 if (VT == MVT::v8i8) { 4251 SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0); 4252 return; 4253 } else if (VT == MVT::v16i8) { 4254 SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0); 4255 return; 4256 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4257 SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0); 4258 return; 4259 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4260 SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0); 4261 return; 4262 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4263 SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0); 4264 return; 4265 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4266 SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0); 4267 return; 4268 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4269 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4270 return; 4271 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4272 SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0); 4273 return; 4274 } 4275 break; 4276 } 4277 case AArch64ISD::LD4post: { 4278 if (VT == MVT::v8i8) { 4279 SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0); 4280 return; 4281 } else if (VT == MVT::v16i8) { 4282 SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0); 4283 return; 4284 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4285 SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0); 4286 return; 4287 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4288 SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0); 4289 return; 4290 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4291 SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0); 4292 return; 4293 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4294 SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0); 4295 return; 4296 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4297 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4298 return; 4299 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4300 SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0); 4301 return; 4302 } 4303 break; 4304 } 4305 case AArch64ISD::LD1x2post: { 4306 if (VT == MVT::v8i8) { 4307 SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0); 4308 return; 4309 } else if (VT == MVT::v16i8) { 4310 SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0); 4311 return; 4312 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4313 SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0); 4314 return; 4315 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4316 SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0); 4317 return; 4318 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4319 SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0); 4320 return; 4321 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4322 SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0); 4323 return; 4324 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4325 SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0); 4326 return; 4327 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4328 SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0); 4329 return; 4330 } 4331 break; 4332 } 4333 case AArch64ISD::LD1x3post: { 4334 if (VT == MVT::v8i8) { 4335 SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0); 4336 return; 4337 } else if (VT == MVT::v16i8) { 4338 SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0); 4339 return; 4340 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4341 SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0); 4342 return; 4343 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4344 SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0); 4345 return; 4346 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4347 SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0); 4348 return; 4349 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4350 SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0); 4351 return; 4352 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4353 SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0); 4354 return; 4355 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4356 SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0); 4357 return; 4358 } 4359 break; 4360 } 4361 case AArch64ISD::LD1x4post: { 4362 if (VT == MVT::v8i8) { 4363 SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0); 4364 return; 4365 } else if (VT == MVT::v16i8) { 4366 SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0); 4367 return; 4368 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4369 SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0); 4370 return; 4371 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4372 SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0); 4373 return; 4374 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4375 SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0); 4376 return; 4377 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4378 SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0); 4379 return; 4380 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4381 SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0); 4382 return; 4383 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4384 SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0); 4385 return; 4386 } 4387 break; 4388 } 4389 case AArch64ISD::LD1DUPpost: { 4390 if (VT == MVT::v8i8) { 4391 SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0); 4392 return; 4393 } else if (VT == MVT::v16i8) { 4394 SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0); 4395 return; 4396 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4397 SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0); 4398 return; 4399 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4400 SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0); 4401 return; 4402 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4403 SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0); 4404 return; 4405 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4406 SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0); 4407 return; 4408 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4409 SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0); 4410 return; 4411 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4412 SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0); 4413 return; 4414 } 4415 break; 4416 } 4417 case AArch64ISD::LD2DUPpost: { 4418 if (VT == MVT::v8i8) { 4419 SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0); 4420 return; 4421 } else if (VT == MVT::v16i8) { 4422 SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0); 4423 return; 4424 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4425 SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0); 4426 return; 4427 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4428 SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0); 4429 return; 4430 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4431 SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0); 4432 return; 4433 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4434 SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0); 4435 return; 4436 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4437 SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0); 4438 return; 4439 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4440 SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0); 4441 return; 4442 } 4443 break; 4444 } 4445 case AArch64ISD::LD3DUPpost: { 4446 if (VT == MVT::v8i8) { 4447 SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0); 4448 return; 4449 } else if (VT == MVT::v16i8) { 4450 SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0); 4451 return; 4452 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4453 SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0); 4454 return; 4455 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4456 SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0); 4457 return; 4458 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4459 SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0); 4460 return; 4461 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4462 SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0); 4463 return; 4464 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4465 SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0); 4466 return; 4467 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4468 SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0); 4469 return; 4470 } 4471 break; 4472 } 4473 case AArch64ISD::LD4DUPpost: { 4474 if (VT == MVT::v8i8) { 4475 SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0); 4476 return; 4477 } else if (VT == MVT::v16i8) { 4478 SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0); 4479 return; 4480 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4481 SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0); 4482 return; 4483 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4484 SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0); 4485 return; 4486 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4487 SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0); 4488 return; 4489 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4490 SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0); 4491 return; 4492 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4493 SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0); 4494 return; 4495 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4496 SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0); 4497 return; 4498 } 4499 break; 4500 } 4501 case AArch64ISD::LD1LANEpost: { 4502 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4503 SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST); 4504 return; 4505 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4506 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4507 SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST); 4508 return; 4509 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4510 VT == MVT::v2f32) { 4511 SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST); 4512 return; 4513 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4514 VT == MVT::v1f64) { 4515 SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST); 4516 return; 4517 } 4518 break; 4519 } 4520 case AArch64ISD::LD2LANEpost: { 4521 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4522 SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST); 4523 return; 4524 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4525 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4526 SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST); 4527 return; 4528 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4529 VT == MVT::v2f32) { 4530 SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST); 4531 return; 4532 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4533 VT == MVT::v1f64) { 4534 SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST); 4535 return; 4536 } 4537 break; 4538 } 4539 case AArch64ISD::LD3LANEpost: { 4540 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4541 SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST); 4542 return; 4543 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4544 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4545 SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST); 4546 return; 4547 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4548 VT == MVT::v2f32) { 4549 SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST); 4550 return; 4551 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4552 VT == MVT::v1f64) { 4553 SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST); 4554 return; 4555 } 4556 break; 4557 } 4558 case AArch64ISD::LD4LANEpost: { 4559 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4560 SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST); 4561 return; 4562 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4563 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4564 SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST); 4565 return; 4566 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4567 VT == MVT::v2f32) { 4568 SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST); 4569 return; 4570 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4571 VT == MVT::v1f64) { 4572 SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST); 4573 return; 4574 } 4575 break; 4576 } 4577 case AArch64ISD::ST2post: { 4578 VT = Node->getOperand(1).getValueType(); 4579 if (VT == MVT::v8i8) { 4580 SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST); 4581 return; 4582 } else if (VT == MVT::v16i8) { 4583 SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST); 4584 return; 4585 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4586 SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST); 4587 return; 4588 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4589 SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST); 4590 return; 4591 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4592 SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST); 4593 return; 4594 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4595 SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST); 4596 return; 4597 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4598 SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST); 4599 return; 4600 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4601 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4602 return; 4603 } 4604 break; 4605 } 4606 case AArch64ISD::ST3post: { 4607 VT = Node->getOperand(1).getValueType(); 4608 if (VT == MVT::v8i8) { 4609 SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST); 4610 return; 4611 } else if (VT == MVT::v16i8) { 4612 SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST); 4613 return; 4614 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4615 SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST); 4616 return; 4617 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4618 SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST); 4619 return; 4620 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4621 SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST); 4622 return; 4623 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4624 SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST); 4625 return; 4626 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4627 SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST); 4628 return; 4629 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4630 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4631 return; 4632 } 4633 break; 4634 } 4635 case AArch64ISD::ST4post: { 4636 VT = Node->getOperand(1).getValueType(); 4637 if (VT == MVT::v8i8) { 4638 SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST); 4639 return; 4640 } else if (VT == MVT::v16i8) { 4641 SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST); 4642 return; 4643 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4644 SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST); 4645 return; 4646 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4647 SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST); 4648 return; 4649 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4650 SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST); 4651 return; 4652 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4653 SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST); 4654 return; 4655 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4656 SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST); 4657 return; 4658 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4659 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4660 return; 4661 } 4662 break; 4663 } 4664 case AArch64ISD::ST1x2post: { 4665 VT = Node->getOperand(1).getValueType(); 4666 if (VT == MVT::v8i8) { 4667 SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST); 4668 return; 4669 } else if (VT == MVT::v16i8) { 4670 SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST); 4671 return; 4672 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4673 SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST); 4674 return; 4675 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4676 SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST); 4677 return; 4678 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4679 SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST); 4680 return; 4681 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4682 SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST); 4683 return; 4684 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4685 SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST); 4686 return; 4687 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4688 SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST); 4689 return; 4690 } 4691 break; 4692 } 4693 case AArch64ISD::ST1x3post: { 4694 VT = Node->getOperand(1).getValueType(); 4695 if (VT == MVT::v8i8) { 4696 SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST); 4697 return; 4698 } else if (VT == MVT::v16i8) { 4699 SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST); 4700 return; 4701 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4702 SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST); 4703 return; 4704 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) { 4705 SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST); 4706 return; 4707 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4708 SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST); 4709 return; 4710 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4711 SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST); 4712 return; 4713 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4714 SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST); 4715 return; 4716 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4717 SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST); 4718 return; 4719 } 4720 break; 4721 } 4722 case AArch64ISD::ST1x4post: { 4723 VT = Node->getOperand(1).getValueType(); 4724 if (VT == MVT::v8i8) { 4725 SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST); 4726 return; 4727 } else if (VT == MVT::v16i8) { 4728 SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST); 4729 return; 4730 } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) { 4731 SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST); 4732 return; 4733 } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) { 4734 SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST); 4735 return; 4736 } else if (VT == MVT::v2i32 || VT == MVT::v2f32) { 4737 SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST); 4738 return; 4739 } else if (VT == MVT::v4i32 || VT == MVT::v4f32) { 4740 SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST); 4741 return; 4742 } else if (VT == MVT::v1i64 || VT == MVT::v1f64) { 4743 SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST); 4744 return; 4745 } else if (VT == MVT::v2i64 || VT == MVT::v2f64) { 4746 SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST); 4747 return; 4748 } 4749 break; 4750 } 4751 case AArch64ISD::ST2LANEpost: { 4752 VT = Node->getOperand(1).getValueType(); 4753 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4754 SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST); 4755 return; 4756 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4757 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4758 SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST); 4759 return; 4760 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4761 VT == MVT::v2f32) { 4762 SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST); 4763 return; 4764 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4765 VT == MVT::v1f64) { 4766 SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST); 4767 return; 4768 } 4769 break; 4770 } 4771 case AArch64ISD::ST3LANEpost: { 4772 VT = Node->getOperand(1).getValueType(); 4773 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4774 SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST); 4775 return; 4776 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4777 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4778 SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST); 4779 return; 4780 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4781 VT == MVT::v2f32) { 4782 SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST); 4783 return; 4784 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4785 VT == MVT::v1f64) { 4786 SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST); 4787 return; 4788 } 4789 break; 4790 } 4791 case AArch64ISD::ST4LANEpost: { 4792 VT = Node->getOperand(1).getValueType(); 4793 if (VT == MVT::v16i8 || VT == MVT::v8i8) { 4794 SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST); 4795 return; 4796 } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 || 4797 VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) { 4798 SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST); 4799 return; 4800 } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 || 4801 VT == MVT::v2f32) { 4802 SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST); 4803 return; 4804 } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 || 4805 VT == MVT::v1f64) { 4806 SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST); 4807 return; 4808 } 4809 break; 4810 } 4811 case AArch64ISD::SVE_LD2_MERGE_ZERO: { 4812 if (VT == MVT::nxv16i8) { 4813 SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B); 4814 return; 4815 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4816 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4817 SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H); 4818 return; 4819 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4820 SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W); 4821 return; 4822 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4823 SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D); 4824 return; 4825 } 4826 break; 4827 } 4828 case AArch64ISD::SVE_LD3_MERGE_ZERO: { 4829 if (VT == MVT::nxv16i8) { 4830 SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B); 4831 return; 4832 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4833 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4834 SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H); 4835 return; 4836 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4837 SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W); 4838 return; 4839 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4840 SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D); 4841 return; 4842 } 4843 break; 4844 } 4845 case AArch64ISD::SVE_LD4_MERGE_ZERO: { 4846 if (VT == MVT::nxv16i8) { 4847 SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B); 4848 return; 4849 } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 || 4850 (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) { 4851 SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H); 4852 return; 4853 } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) { 4854 SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W); 4855 return; 4856 } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) { 4857 SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D); 4858 return; 4859 } 4860 break; 4861 } 4862 } 4863 4864 // Select the default instruction 4865 SelectCode(Node); 4866 } 4867 4868 /// createAArch64ISelDag - This pass converts a legalized DAG into a 4869 /// AArch64-specific DAG, ready for instruction scheduling. 4870 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM, 4871 CodeGenOpt::Level OptLevel) { 4872 return new AArch64DAGToDAGISel(TM, OptLevel); 4873 } 4874 4875 /// When \p PredVT is a scalable vector predicate in the form 4876 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of 4877 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting 4878 /// structured vectors (NumVec >1), the output data type is 4879 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input 4880 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid 4881 /// EVT. 4882 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT, 4883 unsigned NumVec) { 4884 assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors."); 4885 if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1) 4886 return EVT(); 4887 4888 if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 && 4889 PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1) 4890 return EVT(); 4891 4892 ElementCount EC = PredVT.getVectorElementCount(); 4893 EVT ScalarVT = 4894 EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue()); 4895 EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec); 4896 4897 return MemVT; 4898 } 4899 4900 /// Return the EVT of the data associated to a memory operation in \p 4901 /// Root. If such EVT cannot be retrived, it returns an invalid EVT. 4902 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) { 4903 if (isa<MemSDNode>(Root)) 4904 return cast<MemSDNode>(Root)->getMemoryVT(); 4905 4906 if (isa<MemIntrinsicSDNode>(Root)) 4907 return cast<MemIntrinsicSDNode>(Root)->getMemoryVT(); 4908 4909 const unsigned Opcode = Root->getOpcode(); 4910 // For custom ISD nodes, we have to look at them individually to extract the 4911 // type of the data moved to/from memory. 4912 switch (Opcode) { 4913 case AArch64ISD::LD1_MERGE_ZERO: 4914 case AArch64ISD::LD1S_MERGE_ZERO: 4915 case AArch64ISD::LDNF1_MERGE_ZERO: 4916 case AArch64ISD::LDNF1S_MERGE_ZERO: 4917 return cast<VTSDNode>(Root->getOperand(3))->getVT(); 4918 case AArch64ISD::ST1_PRED: 4919 return cast<VTSDNode>(Root->getOperand(4))->getVT(); 4920 case AArch64ISD::SVE_LD2_MERGE_ZERO: 4921 return getPackedVectorTypeFromPredicateType( 4922 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2); 4923 case AArch64ISD::SVE_LD3_MERGE_ZERO: 4924 return getPackedVectorTypeFromPredicateType( 4925 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3); 4926 case AArch64ISD::SVE_LD4_MERGE_ZERO: 4927 return getPackedVectorTypeFromPredicateType( 4928 Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4); 4929 default: 4930 break; 4931 } 4932 4933 if (Opcode != ISD::INTRINSIC_VOID) 4934 return EVT(); 4935 4936 const unsigned IntNo = 4937 cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue(); 4938 if (IntNo != Intrinsic::aarch64_sve_prf) 4939 return EVT(); 4940 4941 // We are using an SVE prefetch intrinsic. Type must be inferred 4942 // from the width of the predicate. 4943 return getPackedVectorTypeFromPredicateType( 4944 Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1); 4945 } 4946 4947 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode: 4948 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max 4949 /// where Root is the memory access using N for its address. 4950 template <int64_t Min, int64_t Max> 4951 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, 4952 SDValue &Base, 4953 SDValue &OffImm) { 4954 const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root); 4955 4956 if (MemVT == EVT()) 4957 return false; 4958 4959 if (N.getOpcode() != ISD::ADD) 4960 return false; 4961 4962 SDValue VScale = N.getOperand(1); 4963 if (VScale.getOpcode() != ISD::VSCALE) 4964 return false; 4965 4966 TypeSize TS = MemVT.getSizeInBits(); 4967 int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8; 4968 int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue(); 4969 4970 if ((MulImm % MemWidthBytes) != 0) 4971 return false; 4972 4973 int64_t Offset = MulImm / MemWidthBytes; 4974 if (Offset < Min || Offset > Max) 4975 return false; 4976 4977 Base = N.getOperand(0); 4978 OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64); 4979 return true; 4980 } 4981 4982 /// Select register plus register addressing mode for SVE, with scaled 4983 /// offset. 4984 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale, 4985 SDValue &Base, 4986 SDValue &Offset) { 4987 if (N.getOpcode() != ISD::ADD) 4988 return false; 4989 4990 // Process an ADD node. 4991 const SDValue LHS = N.getOperand(0); 4992 const SDValue RHS = N.getOperand(1); 4993 4994 // 8 bit data does not come with the SHL node, so it is treated 4995 // separately. 4996 if (Scale == 0) { 4997 Base = LHS; 4998 Offset = RHS; 4999 return true; 5000 } 5001 5002 // Check if the RHS is a shift node with a constant. 5003 if (RHS.getOpcode() != ISD::SHL) 5004 return false; 5005 5006 const SDValue ShiftRHS = RHS.getOperand(1); 5007 if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS)) 5008 if (C->getZExtValue() == Scale) { 5009 Base = LHS; 5010 Offset = RHS.getOperand(0); 5011 return true; 5012 } 5013 5014 return false; 5015 } 5016 5017 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) { 5018 const AArch64TargetLowering *TLI = 5019 static_cast<const AArch64TargetLowering *>(getTargetLowering()); 5020 5021 return TLI->isAllActivePredicate(N); 5022 } 5023