Home | History | Annotate | Line # | Download | only in AArch64
      1 //===-- AArch64ISelDAGToDAG.cpp - A dag to dag inst selector for AArch64 --===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file defines an instruction selector for the AArch64 target.
     10 //
     11 //===----------------------------------------------------------------------===//
     12 
     13 #include "AArch64MachineFunctionInfo.h"
     14 #include "AArch64TargetMachine.h"
     15 #include "MCTargetDesc/AArch64AddressingModes.h"
     16 #include "llvm/ADT/APSInt.h"
     17 #include "llvm/CodeGen/SelectionDAGISel.h"
     18 #include "llvm/IR/Function.h" // To access function attributes.
     19 #include "llvm/IR/GlobalValue.h"
     20 #include "llvm/IR/Intrinsics.h"
     21 #include "llvm/IR/IntrinsicsAArch64.h"
     22 #include "llvm/Support/Debug.h"
     23 #include "llvm/Support/ErrorHandling.h"
     24 #include "llvm/Support/KnownBits.h"
     25 #include "llvm/Support/MathExtras.h"
     26 #include "llvm/Support/raw_ostream.h"
     27 
     28 using namespace llvm;
     29 
     30 #define DEBUG_TYPE "aarch64-isel"
     31 
     32 //===--------------------------------------------------------------------===//
     33 /// AArch64DAGToDAGISel - AArch64 specific code to select AArch64 machine
     34 /// instructions for SelectionDAG operations.
     35 ///
     36 namespace {
     37 
     38 class AArch64DAGToDAGISel : public SelectionDAGISel {
     39 
     40   /// Subtarget - Keep a pointer to the AArch64Subtarget around so that we can
     41   /// make the right decision when generating code for different targets.
     42   const AArch64Subtarget *Subtarget;
     43 
     44 public:
     45   explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm,
     46                                CodeGenOpt::Level OptLevel)
     47       : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr) {}
     48 
     49   StringRef getPassName() const override {
     50     return "AArch64 Instruction Selection";
     51   }
     52 
     53   bool runOnMachineFunction(MachineFunction &MF) override {
     54     Subtarget = &MF.getSubtarget<AArch64Subtarget>();
     55     return SelectionDAGISel::runOnMachineFunction(MF);
     56   }
     57 
     58   void Select(SDNode *Node) override;
     59 
     60   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
     61   /// inline asm expressions.
     62   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
     63                                     unsigned ConstraintID,
     64                                     std::vector<SDValue> &OutOps) override;
     65 
     66   template <signed Low, signed High, signed Scale>
     67   bool SelectRDVLImm(SDValue N, SDValue &Imm);
     68 
     69   bool tryMLAV64LaneV128(SDNode *N);
     70   bool tryMULLV64LaneV128(unsigned IntNo, SDNode *N);
     71   bool SelectArithExtendedRegister(SDValue N, SDValue &Reg, SDValue &Shift);
     72   bool SelectArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
     73   bool SelectNegArithImmed(SDValue N, SDValue &Val, SDValue &Shift);
     74   bool SelectArithShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
     75     return SelectShiftedRegister(N, false, Reg, Shift);
     76   }
     77   bool SelectLogicalShiftedRegister(SDValue N, SDValue &Reg, SDValue &Shift) {
     78     return SelectShiftedRegister(N, true, Reg, Shift);
     79   }
     80   bool SelectAddrModeIndexed7S8(SDValue N, SDValue &Base, SDValue &OffImm) {
     81     return SelectAddrModeIndexed7S(N, 1, Base, OffImm);
     82   }
     83   bool SelectAddrModeIndexed7S16(SDValue N, SDValue &Base, SDValue &OffImm) {
     84     return SelectAddrModeIndexed7S(N, 2, Base, OffImm);
     85   }
     86   bool SelectAddrModeIndexed7S32(SDValue N, SDValue &Base, SDValue &OffImm) {
     87     return SelectAddrModeIndexed7S(N, 4, Base, OffImm);
     88   }
     89   bool SelectAddrModeIndexed7S64(SDValue N, SDValue &Base, SDValue &OffImm) {
     90     return SelectAddrModeIndexed7S(N, 8, Base, OffImm);
     91   }
     92   bool SelectAddrModeIndexed7S128(SDValue N, SDValue &Base, SDValue &OffImm) {
     93     return SelectAddrModeIndexed7S(N, 16, Base, OffImm);
     94   }
     95   bool SelectAddrModeIndexedS9S128(SDValue N, SDValue &Base, SDValue &OffImm) {
     96     return SelectAddrModeIndexedBitWidth(N, true, 9, 16, Base, OffImm);
     97   }
     98   bool SelectAddrModeIndexedU6S128(SDValue N, SDValue &Base, SDValue &OffImm) {
     99     return SelectAddrModeIndexedBitWidth(N, false, 6, 16, Base, OffImm);
    100   }
    101   bool SelectAddrModeIndexed8(SDValue N, SDValue &Base, SDValue &OffImm) {
    102     return SelectAddrModeIndexed(N, 1, Base, OffImm);
    103   }
    104   bool SelectAddrModeIndexed16(SDValue N, SDValue &Base, SDValue &OffImm) {
    105     return SelectAddrModeIndexed(N, 2, Base, OffImm);
    106   }
    107   bool SelectAddrModeIndexed32(SDValue N, SDValue &Base, SDValue &OffImm) {
    108     return SelectAddrModeIndexed(N, 4, Base, OffImm);
    109   }
    110   bool SelectAddrModeIndexed64(SDValue N, SDValue &Base, SDValue &OffImm) {
    111     return SelectAddrModeIndexed(N, 8, Base, OffImm);
    112   }
    113   bool SelectAddrModeIndexed128(SDValue N, SDValue &Base, SDValue &OffImm) {
    114     return SelectAddrModeIndexed(N, 16, Base, OffImm);
    115   }
    116   bool SelectAddrModeUnscaled8(SDValue N, SDValue &Base, SDValue &OffImm) {
    117     return SelectAddrModeUnscaled(N, 1, Base, OffImm);
    118   }
    119   bool SelectAddrModeUnscaled16(SDValue N, SDValue &Base, SDValue &OffImm) {
    120     return SelectAddrModeUnscaled(N, 2, Base, OffImm);
    121   }
    122   bool SelectAddrModeUnscaled32(SDValue N, SDValue &Base, SDValue &OffImm) {
    123     return SelectAddrModeUnscaled(N, 4, Base, OffImm);
    124   }
    125   bool SelectAddrModeUnscaled64(SDValue N, SDValue &Base, SDValue &OffImm) {
    126     return SelectAddrModeUnscaled(N, 8, Base, OffImm);
    127   }
    128   bool SelectAddrModeUnscaled128(SDValue N, SDValue &Base, SDValue &OffImm) {
    129     return SelectAddrModeUnscaled(N, 16, Base, OffImm);
    130   }
    131 
    132   template<int Width>
    133   bool SelectAddrModeWRO(SDValue N, SDValue &Base, SDValue &Offset,
    134                          SDValue &SignExtend, SDValue &DoShift) {
    135     return SelectAddrModeWRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
    136   }
    137 
    138   template<int Width>
    139   bool SelectAddrModeXRO(SDValue N, SDValue &Base, SDValue &Offset,
    140                          SDValue &SignExtend, SDValue &DoShift) {
    141     return SelectAddrModeXRO(N, Width / 8, Base, Offset, SignExtend, DoShift);
    142   }
    143 
    144   bool SelectDupZeroOrUndef(SDValue N) {
    145     switch(N->getOpcode()) {
    146     case ISD::UNDEF:
    147       return true;
    148     case AArch64ISD::DUP:
    149     case ISD::SPLAT_VECTOR: {
    150       auto Opnd0 = N->getOperand(0);
    151       if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
    152         if (CN->isNullValue())
    153           return true;
    154       if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
    155         if (CN->isZero())
    156           return true;
    157       break;
    158     }
    159     default:
    160       break;
    161     }
    162 
    163     return false;
    164   }
    165 
    166   bool SelectDupZero(SDValue N) {
    167     switch(N->getOpcode()) {
    168     case AArch64ISD::DUP:
    169     case ISD::SPLAT_VECTOR: {
    170       auto Opnd0 = N->getOperand(0);
    171       if (auto CN = dyn_cast<ConstantSDNode>(Opnd0))
    172         if (CN->isNullValue())
    173           return true;
    174       if (auto CN = dyn_cast<ConstantFPSDNode>(Opnd0))
    175         if (CN->isZero())
    176           return true;
    177       break;
    178     }
    179     }
    180 
    181     return false;
    182   }
    183 
    184   template<MVT::SimpleValueType VT>
    185   bool SelectSVEAddSubImm(SDValue N, SDValue &Imm, SDValue &Shift) {
    186     return SelectSVEAddSubImm(N, VT, Imm, Shift);
    187   }
    188 
    189   template <MVT::SimpleValueType VT, bool Invert = false>
    190   bool SelectSVELogicalImm(SDValue N, SDValue &Imm) {
    191     return SelectSVELogicalImm(N, VT, Imm, Invert);
    192   }
    193 
    194   template <MVT::SimpleValueType VT>
    195   bool SelectSVEArithImm(SDValue N, SDValue &Imm) {
    196     return SelectSVEArithImm(N, VT, Imm);
    197   }
    198 
    199   template <unsigned Low, unsigned High, bool AllowSaturation = false>
    200   bool SelectSVEShiftImm(SDValue N, SDValue &Imm) {
    201     return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm);
    202   }
    203 
    204   // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
    205   template<signed Min, signed Max, signed Scale, bool Shift>
    206   bool SelectCntImm(SDValue N, SDValue &Imm) {
    207     if (!isa<ConstantSDNode>(N))
    208       return false;
    209 
    210     int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
    211     if (Shift)
    212       MulImm = 1LL << MulImm;
    213 
    214     if ((MulImm % std::abs(Scale)) != 0)
    215       return false;
    216 
    217     MulImm /= Scale;
    218     if ((MulImm >= Min) && (MulImm <= Max)) {
    219       Imm = CurDAG->getTargetConstant(MulImm, SDLoc(N), MVT::i32);
    220       return true;
    221     }
    222 
    223     return false;
    224   }
    225 
    226   /// Form sequences of consecutive 64/128-bit registers for use in NEON
    227   /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have
    228   /// between 1 and 4 elements. If it contains a single element that is returned
    229   /// unchanged; otherwise a REG_SEQUENCE value is returned.
    230   SDValue createDTuple(ArrayRef<SDValue> Vecs);
    231   SDValue createQTuple(ArrayRef<SDValue> Vecs);
    232   // Form a sequence of SVE registers for instructions using list of vectors,
    233   // e.g. structured loads and stores (ldN, stN).
    234   SDValue createZTuple(ArrayRef<SDValue> Vecs);
    235 
    236   /// Generic helper for the createDTuple/createQTuple
    237   /// functions. Those should almost always be called instead.
    238   SDValue createTuple(ArrayRef<SDValue> Vecs, const unsigned RegClassIDs[],
    239                       const unsigned SubRegs[]);
    240 
    241   void SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc, bool isExt);
    242 
    243   bool tryIndexedLoad(SDNode *N);
    244 
    245   bool trySelectStackSlotTagP(SDNode *N);
    246   void SelectTagP(SDNode *N);
    247 
    248   void SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
    249                      unsigned SubRegIdx);
    250   void SelectPostLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
    251                          unsigned SubRegIdx);
    252   void SelectLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    253   void SelectPostLoadLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    254   void SelectPredicatedLoad(SDNode *N, unsigned NumVecs, unsigned Scale,
    255                             unsigned Opc_rr, unsigned Opc_ri);
    256 
    257   bool SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base, SDValue &OffImm);
    258   /// SVE Reg+Imm addressing mode.
    259   template <int64_t Min, int64_t Max>
    260   bool SelectAddrModeIndexedSVE(SDNode *Root, SDValue N, SDValue &Base,
    261                                 SDValue &OffImm);
    262   /// SVE Reg+Reg address mode.
    263   template <unsigned Scale>
    264   bool SelectSVERegRegAddrMode(SDValue N, SDValue &Base, SDValue &Offset) {
    265     return SelectSVERegRegAddrMode(N, Scale, Base, Offset);
    266   }
    267 
    268   void SelectStore(SDNode *N, unsigned NumVecs, unsigned Opc);
    269   void SelectPostStore(SDNode *N, unsigned NumVecs, unsigned Opc);
    270   void SelectStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    271   void SelectPostStoreLane(SDNode *N, unsigned NumVecs, unsigned Opc);
    272   void SelectPredicatedStore(SDNode *N, unsigned NumVecs, unsigned Scale,
    273                              unsigned Opc_rr, unsigned Opc_ri);
    274   std::tuple<unsigned, SDValue, SDValue>
    275   findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr, unsigned Opc_ri,
    276                            const SDValue &OldBase, const SDValue &OldOffset,
    277                            unsigned Scale);
    278 
    279   bool tryBitfieldExtractOp(SDNode *N);
    280   bool tryBitfieldExtractOpFromSExt(SDNode *N);
    281   bool tryBitfieldInsertOp(SDNode *N);
    282   bool tryBitfieldInsertInZeroOp(SDNode *N);
    283   bool tryShiftAmountMod(SDNode *N);
    284   bool tryHighFPExt(SDNode *N);
    285 
    286   bool tryReadRegister(SDNode *N);
    287   bool tryWriteRegister(SDNode *N);
    288 
    289 // Include the pieces autogenerated from the target description.
    290 #include "AArch64GenDAGISel.inc"
    291 
    292 private:
    293   bool SelectShiftedRegister(SDValue N, bool AllowROR, SDValue &Reg,
    294                              SDValue &Shift);
    295   bool SelectAddrModeIndexed7S(SDValue N, unsigned Size, SDValue &Base,
    296                                SDValue &OffImm) {
    297     return SelectAddrModeIndexedBitWidth(N, true, 7, Size, Base, OffImm);
    298   }
    299   bool SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm, unsigned BW,
    300                                      unsigned Size, SDValue &Base,
    301                                      SDValue &OffImm);
    302   bool SelectAddrModeIndexed(SDValue N, unsigned Size, SDValue &Base,
    303                              SDValue &OffImm);
    304   bool SelectAddrModeUnscaled(SDValue N, unsigned Size, SDValue &Base,
    305                               SDValue &OffImm);
    306   bool SelectAddrModeWRO(SDValue N, unsigned Size, SDValue &Base,
    307                          SDValue &Offset, SDValue &SignExtend,
    308                          SDValue &DoShift);
    309   bool SelectAddrModeXRO(SDValue N, unsigned Size, SDValue &Base,
    310                          SDValue &Offset, SDValue &SignExtend,
    311                          SDValue &DoShift);
    312   bool isWorthFolding(SDValue V) const;
    313   bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend,
    314                          SDValue &Offset, SDValue &SignExtend);
    315 
    316   template<unsigned RegWidth>
    317   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos) {
    318     return SelectCVTFixedPosOperand(N, FixedPos, RegWidth);
    319   }
    320 
    321   bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
    322 
    323   bool SelectCMP_SWAP(SDNode *N);
    324 
    325   bool SelectSVE8BitLslImm(SDValue N, SDValue &Imm, SDValue &Shift);
    326 
    327   bool SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift);
    328 
    329   bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm, bool Invert);
    330 
    331   bool SelectSVESignedArithImm(SDValue N, SDValue &Imm);
    332   bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High,
    333                          bool AllowSaturation, SDValue &Imm);
    334 
    335   bool SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm);
    336   bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base,
    337                                SDValue &Offset);
    338 
    339   bool SelectAllActivePredicate(SDValue N);
    340 };
    341 } // end anonymous namespace
    342 
    343 /// isIntImmediate - This method tests to see if the node is a constant
    344 /// operand. If so Imm will receive the 32-bit value.
    345 static bool isIntImmediate(const SDNode *N, uint64_t &Imm) {
    346   if (const ConstantSDNode *C = dyn_cast<const ConstantSDNode>(N)) {
    347     Imm = C->getZExtValue();
    348     return true;
    349   }
    350   return false;
    351 }
    352 
    353 // isIntImmediate - This method tests to see if a constant operand.
    354 // If so Imm will receive the value.
    355 static bool isIntImmediate(SDValue N, uint64_t &Imm) {
    356   return isIntImmediate(N.getNode(), Imm);
    357 }
    358 
    359 // isOpcWithIntImmediate - This method tests to see if the node is a specific
    360 // opcode and that it has a immediate integer right operand.
    361 // If so Imm will receive the 32 bit value.
    362 static bool isOpcWithIntImmediate(const SDNode *N, unsigned Opc,
    363                                   uint64_t &Imm) {
    364   return N->getOpcode() == Opc &&
    365          isIntImmediate(N->getOperand(1).getNode(), Imm);
    366 }
    367 
    368 bool AArch64DAGToDAGISel::SelectInlineAsmMemoryOperand(
    369     const SDValue &Op, unsigned ConstraintID, std::vector<SDValue> &OutOps) {
    370   switch(ConstraintID) {
    371   default:
    372     llvm_unreachable("Unexpected asm memory constraint");
    373   case InlineAsm::Constraint_m:
    374   case InlineAsm::Constraint_o:
    375   case InlineAsm::Constraint_Q:
    376     // We need to make sure that this one operand does not end up in XZR, thus
    377     // require the address to be in a PointerRegClass register.
    378     const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
    379     const TargetRegisterClass *TRC = TRI->getPointerRegClass(*MF);
    380     SDLoc dl(Op);
    381     SDValue RC = CurDAG->getTargetConstant(TRC->getID(), dl, MVT::i64);
    382     SDValue NewOp =
    383         SDValue(CurDAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS,
    384                                        dl, Op.getValueType(),
    385                                        Op, RC), 0);
    386     OutOps.push_back(NewOp);
    387     return false;
    388   }
    389   return true;
    390 }
    391 
    392 /// SelectArithImmed - Select an immediate value that can be represented as
    393 /// a 12-bit value shifted left by either 0 or 12.  If so, return true with
    394 /// Val set to the 12-bit value and Shift set to the shifter operand.
    395 bool AArch64DAGToDAGISel::SelectArithImmed(SDValue N, SDValue &Val,
    396                                            SDValue &Shift) {
    397   // This function is called from the addsub_shifted_imm ComplexPattern,
    398   // which lists [imm] as the list of opcode it's interested in, however
    399   // we still need to check whether the operand is actually an immediate
    400   // here because the ComplexPattern opcode list is only used in
    401   // root-level opcode matching.
    402   if (!isa<ConstantSDNode>(N.getNode()))
    403     return false;
    404 
    405   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
    406   unsigned ShiftAmt;
    407 
    408   if (Immed >> 12 == 0) {
    409     ShiftAmt = 0;
    410   } else if ((Immed & 0xfff) == 0 && Immed >> 24 == 0) {
    411     ShiftAmt = 12;
    412     Immed = Immed >> 12;
    413   } else
    414     return false;
    415 
    416   unsigned ShVal = AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftAmt);
    417   SDLoc dl(N);
    418   Val = CurDAG->getTargetConstant(Immed, dl, MVT::i32);
    419   Shift = CurDAG->getTargetConstant(ShVal, dl, MVT::i32);
    420   return true;
    421 }
    422 
    423 /// SelectNegArithImmed - As above, but negates the value before trying to
    424 /// select it.
    425 bool AArch64DAGToDAGISel::SelectNegArithImmed(SDValue N, SDValue &Val,
    426                                               SDValue &Shift) {
    427   // This function is called from the addsub_shifted_imm ComplexPattern,
    428   // which lists [imm] as the list of opcode it's interested in, however
    429   // we still need to check whether the operand is actually an immediate
    430   // here because the ComplexPattern opcode list is only used in
    431   // root-level opcode matching.
    432   if (!isa<ConstantSDNode>(N.getNode()))
    433     return false;
    434 
    435   // The immediate operand must be a 24-bit zero-extended immediate.
    436   uint64_t Immed = cast<ConstantSDNode>(N.getNode())->getZExtValue();
    437 
    438   // This negation is almost always valid, but "cmp wN, #0" and "cmn wN, #0"
    439   // have the opposite effect on the C flag, so this pattern mustn't match under
    440   // those circumstances.
    441   if (Immed == 0)
    442     return false;
    443 
    444   if (N.getValueType() == MVT::i32)
    445     Immed = ~((uint32_t)Immed) + 1;
    446   else
    447     Immed = ~Immed + 1ULL;
    448   if (Immed & 0xFFFFFFFFFF000000ULL)
    449     return false;
    450 
    451   Immed &= 0xFFFFFFULL;
    452   return SelectArithImmed(CurDAG->getConstant(Immed, SDLoc(N), MVT::i32), Val,
    453                           Shift);
    454 }
    455 
    456 /// getShiftTypeForNode - Translate a shift node to the corresponding
    457 /// ShiftType value.
    458 static AArch64_AM::ShiftExtendType getShiftTypeForNode(SDValue N) {
    459   switch (N.getOpcode()) {
    460   default:
    461     return AArch64_AM::InvalidShiftExtend;
    462   case ISD::SHL:
    463     return AArch64_AM::LSL;
    464   case ISD::SRL:
    465     return AArch64_AM::LSR;
    466   case ISD::SRA:
    467     return AArch64_AM::ASR;
    468   case ISD::ROTR:
    469     return AArch64_AM::ROR;
    470   }
    471 }
    472 
    473 /// Determine whether it is worth it to fold SHL into the addressing
    474 /// mode.
    475 static bool isWorthFoldingSHL(SDValue V) {
    476   assert(V.getOpcode() == ISD::SHL && "invalid opcode");
    477   // It is worth folding logical shift of up to three places.
    478   auto *CSD = dyn_cast<ConstantSDNode>(V.getOperand(1));
    479   if (!CSD)
    480     return false;
    481   unsigned ShiftVal = CSD->getZExtValue();
    482   if (ShiftVal > 3)
    483     return false;
    484 
    485   // Check if this particular node is reused in any non-memory related
    486   // operation.  If yes, do not try to fold this node into the address
    487   // computation, since the computation will be kept.
    488   const SDNode *Node = V.getNode();
    489   for (SDNode *UI : Node->uses())
    490     if (!isa<MemSDNode>(*UI))
    491       for (SDNode *UII : UI->uses())
    492         if (!isa<MemSDNode>(*UII))
    493           return false;
    494   return true;
    495 }
    496 
    497 /// Determine whether it is worth to fold V into an extended register.
    498 bool AArch64DAGToDAGISel::isWorthFolding(SDValue V) const {
    499   // Trivial if we are optimizing for code size or if there is only
    500   // one use of the value.
    501   if (CurDAG->shouldOptForSize() || V.hasOneUse())
    502     return true;
    503   // If a subtarget has a fastpath LSL we can fold a logical shift into
    504   // the addressing mode and save a cycle.
    505   if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::SHL &&
    506       isWorthFoldingSHL(V))
    507     return true;
    508   if (Subtarget->hasLSLFast() && V.getOpcode() == ISD::ADD) {
    509     const SDValue LHS = V.getOperand(0);
    510     const SDValue RHS = V.getOperand(1);
    511     if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS))
    512       return true;
    513     if (RHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(RHS))
    514       return true;
    515   }
    516 
    517   // It hurts otherwise, since the value will be reused.
    518   return false;
    519 }
    520 
    521 /// SelectShiftedRegister - Select a "shifted register" operand.  If the value
    522 /// is not shifted, set the Shift operand to default of "LSL 0".  The logical
    523 /// instructions allow the shifted register to be rotated, but the arithmetic
    524 /// instructions do not.  The AllowROR parameter specifies whether ROR is
    525 /// supported.
    526 bool AArch64DAGToDAGISel::SelectShiftedRegister(SDValue N, bool AllowROR,
    527                                                 SDValue &Reg, SDValue &Shift) {
    528   AArch64_AM::ShiftExtendType ShType = getShiftTypeForNode(N);
    529   if (ShType == AArch64_AM::InvalidShiftExtend)
    530     return false;
    531   if (!AllowROR && ShType == AArch64_AM::ROR)
    532     return false;
    533 
    534   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    535     unsigned BitSize = N.getValueSizeInBits();
    536     unsigned Val = RHS->getZExtValue() & (BitSize - 1);
    537     unsigned ShVal = AArch64_AM::getShifterImm(ShType, Val);
    538 
    539     Reg = N.getOperand(0);
    540     Shift = CurDAG->getTargetConstant(ShVal, SDLoc(N), MVT::i32);
    541     return isWorthFolding(N);
    542   }
    543 
    544   return false;
    545 }
    546 
    547 /// getExtendTypeForNode - Translate an extend node to the corresponding
    548 /// ExtendType value.
    549 static AArch64_AM::ShiftExtendType
    550 getExtendTypeForNode(SDValue N, bool IsLoadStore = false) {
    551   if (N.getOpcode() == ISD::SIGN_EXTEND ||
    552       N.getOpcode() == ISD::SIGN_EXTEND_INREG) {
    553     EVT SrcVT;
    554     if (N.getOpcode() == ISD::SIGN_EXTEND_INREG)
    555       SrcVT = cast<VTSDNode>(N.getOperand(1))->getVT();
    556     else
    557       SrcVT = N.getOperand(0).getValueType();
    558 
    559     if (!IsLoadStore && SrcVT == MVT::i8)
    560       return AArch64_AM::SXTB;
    561     else if (!IsLoadStore && SrcVT == MVT::i16)
    562       return AArch64_AM::SXTH;
    563     else if (SrcVT == MVT::i32)
    564       return AArch64_AM::SXTW;
    565     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
    566 
    567     return AArch64_AM::InvalidShiftExtend;
    568   } else if (N.getOpcode() == ISD::ZERO_EXTEND ||
    569              N.getOpcode() == ISD::ANY_EXTEND) {
    570     EVT SrcVT = N.getOperand(0).getValueType();
    571     if (!IsLoadStore && SrcVT == MVT::i8)
    572       return AArch64_AM::UXTB;
    573     else if (!IsLoadStore && SrcVT == MVT::i16)
    574       return AArch64_AM::UXTH;
    575     else if (SrcVT == MVT::i32)
    576       return AArch64_AM::UXTW;
    577     assert(SrcVT != MVT::i64 && "extend from 64-bits?");
    578 
    579     return AArch64_AM::InvalidShiftExtend;
    580   } else if (N.getOpcode() == ISD::AND) {
    581     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
    582     if (!CSD)
    583       return AArch64_AM::InvalidShiftExtend;
    584     uint64_t AndMask = CSD->getZExtValue();
    585 
    586     switch (AndMask) {
    587     default:
    588       return AArch64_AM::InvalidShiftExtend;
    589     case 0xFF:
    590       return !IsLoadStore ? AArch64_AM::UXTB : AArch64_AM::InvalidShiftExtend;
    591     case 0xFFFF:
    592       return !IsLoadStore ? AArch64_AM::UXTH : AArch64_AM::InvalidShiftExtend;
    593     case 0xFFFFFFFF:
    594       return AArch64_AM::UXTW;
    595     }
    596   }
    597 
    598   return AArch64_AM::InvalidShiftExtend;
    599 }
    600 
    601 // Helper for SelectMLAV64LaneV128 - Recognize high lane extracts.
    602 static bool checkHighLaneIndex(SDNode *DL, SDValue &LaneOp, int &LaneIdx) {
    603   if (DL->getOpcode() != AArch64ISD::DUPLANE16 &&
    604       DL->getOpcode() != AArch64ISD::DUPLANE32)
    605     return false;
    606 
    607   SDValue SV = DL->getOperand(0);
    608   if (SV.getOpcode() != ISD::INSERT_SUBVECTOR)
    609     return false;
    610 
    611   SDValue EV = SV.getOperand(1);
    612   if (EV.getOpcode() != ISD::EXTRACT_SUBVECTOR)
    613     return false;
    614 
    615   ConstantSDNode *DLidx = cast<ConstantSDNode>(DL->getOperand(1).getNode());
    616   ConstantSDNode *EVidx = cast<ConstantSDNode>(EV.getOperand(1).getNode());
    617   LaneIdx = DLidx->getSExtValue() + EVidx->getSExtValue();
    618   LaneOp = EV.getOperand(0);
    619 
    620   return true;
    621 }
    622 
    623 // Helper for SelectOpcV64LaneV128 - Recognize operations where one operand is a
    624 // high lane extract.
    625 static bool checkV64LaneV128(SDValue Op0, SDValue Op1, SDValue &StdOp,
    626                              SDValue &LaneOp, int &LaneIdx) {
    627 
    628   if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx)) {
    629     std::swap(Op0, Op1);
    630     if (!checkHighLaneIndex(Op0.getNode(), LaneOp, LaneIdx))
    631       return false;
    632   }
    633   StdOp = Op1;
    634   return true;
    635 }
    636 
    637 /// SelectMLAV64LaneV128 - AArch64 supports vector MLAs where one multiplicand
    638 /// is a lane in the upper half of a 128-bit vector.  Recognize and select this
    639 /// so that we don't emit unnecessary lane extracts.
    640 bool AArch64DAGToDAGISel::tryMLAV64LaneV128(SDNode *N) {
    641   SDLoc dl(N);
    642   SDValue Op0 = N->getOperand(0);
    643   SDValue Op1 = N->getOperand(1);
    644   SDValue MLAOp1;   // Will hold ordinary multiplicand for MLA.
    645   SDValue MLAOp2;   // Will hold lane-accessed multiplicand for MLA.
    646   int LaneIdx = -1; // Will hold the lane index.
    647 
    648   if (Op1.getOpcode() != ISD::MUL ||
    649       !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
    650                         LaneIdx)) {
    651     std::swap(Op0, Op1);
    652     if (Op1.getOpcode() != ISD::MUL ||
    653         !checkV64LaneV128(Op1.getOperand(0), Op1.getOperand(1), MLAOp1, MLAOp2,
    654                           LaneIdx))
    655       return false;
    656   }
    657 
    658   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
    659 
    660   SDValue Ops[] = { Op0, MLAOp1, MLAOp2, LaneIdxVal };
    661 
    662   unsigned MLAOpc = ~0U;
    663 
    664   switch (N->getSimpleValueType(0).SimpleTy) {
    665   default:
    666     llvm_unreachable("Unrecognized MLA.");
    667   case MVT::v4i16:
    668     MLAOpc = AArch64::MLAv4i16_indexed;
    669     break;
    670   case MVT::v8i16:
    671     MLAOpc = AArch64::MLAv8i16_indexed;
    672     break;
    673   case MVT::v2i32:
    674     MLAOpc = AArch64::MLAv2i32_indexed;
    675     break;
    676   case MVT::v4i32:
    677     MLAOpc = AArch64::MLAv4i32_indexed;
    678     break;
    679   }
    680 
    681   ReplaceNode(N, CurDAG->getMachineNode(MLAOpc, dl, N->getValueType(0), Ops));
    682   return true;
    683 }
    684 
    685 bool AArch64DAGToDAGISel::tryMULLV64LaneV128(unsigned IntNo, SDNode *N) {
    686   SDLoc dl(N);
    687   SDValue SMULLOp0;
    688   SDValue SMULLOp1;
    689   int LaneIdx;
    690 
    691   if (!checkV64LaneV128(N->getOperand(1), N->getOperand(2), SMULLOp0, SMULLOp1,
    692                         LaneIdx))
    693     return false;
    694 
    695   SDValue LaneIdxVal = CurDAG->getTargetConstant(LaneIdx, dl, MVT::i64);
    696 
    697   SDValue Ops[] = { SMULLOp0, SMULLOp1, LaneIdxVal };
    698 
    699   unsigned SMULLOpc = ~0U;
    700 
    701   if (IntNo == Intrinsic::aarch64_neon_smull) {
    702     switch (N->getSimpleValueType(0).SimpleTy) {
    703     default:
    704       llvm_unreachable("Unrecognized SMULL.");
    705     case MVT::v4i32:
    706       SMULLOpc = AArch64::SMULLv4i16_indexed;
    707       break;
    708     case MVT::v2i64:
    709       SMULLOpc = AArch64::SMULLv2i32_indexed;
    710       break;
    711     }
    712   } else if (IntNo == Intrinsic::aarch64_neon_umull) {
    713     switch (N->getSimpleValueType(0).SimpleTy) {
    714     default:
    715       llvm_unreachable("Unrecognized SMULL.");
    716     case MVT::v4i32:
    717       SMULLOpc = AArch64::UMULLv4i16_indexed;
    718       break;
    719     case MVT::v2i64:
    720       SMULLOpc = AArch64::UMULLv2i32_indexed;
    721       break;
    722     }
    723   } else
    724     llvm_unreachable("Unrecognized intrinsic.");
    725 
    726   ReplaceNode(N, CurDAG->getMachineNode(SMULLOpc, dl, N->getValueType(0), Ops));
    727   return true;
    728 }
    729 
    730 /// Instructions that accept extend modifiers like UXTW expect the register
    731 /// being extended to be a GPR32, but the incoming DAG might be acting on a
    732 /// GPR64 (either via SEXT_INREG or AND). Extract the appropriate low bits if
    733 /// this is the case.
    734 static SDValue narrowIfNeeded(SelectionDAG *CurDAG, SDValue N) {
    735   if (N.getValueType() == MVT::i32)
    736     return N;
    737 
    738   SDLoc dl(N);
    739   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
    740   MachineSDNode *Node = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
    741                                                dl, MVT::i32, N, SubReg);
    742   return SDValue(Node, 0);
    743 }
    744 
    745 // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N.
    746 template<signed Low, signed High, signed Scale>
    747 bool AArch64DAGToDAGISel::SelectRDVLImm(SDValue N, SDValue &Imm) {
    748   if (!isa<ConstantSDNode>(N))
    749     return false;
    750 
    751   int64_t MulImm = cast<ConstantSDNode>(N)->getSExtValue();
    752   if ((MulImm % std::abs(Scale)) == 0) {
    753     int64_t RDVLImm = MulImm / Scale;
    754     if ((RDVLImm >= Low) && (RDVLImm <= High)) {
    755       Imm = CurDAG->getTargetConstant(RDVLImm, SDLoc(N), MVT::i32);
    756       return true;
    757     }
    758   }
    759 
    760   return false;
    761 }
    762 
    763 /// SelectArithExtendedRegister - Select a "extended register" operand.  This
    764 /// operand folds in an extend followed by an optional left shift.
    765 bool AArch64DAGToDAGISel::SelectArithExtendedRegister(SDValue N, SDValue &Reg,
    766                                                       SDValue &Shift) {
    767   unsigned ShiftVal = 0;
    768   AArch64_AM::ShiftExtendType Ext;
    769 
    770   if (N.getOpcode() == ISD::SHL) {
    771     ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
    772     if (!CSD)
    773       return false;
    774     ShiftVal = CSD->getZExtValue();
    775     if (ShiftVal > 4)
    776       return false;
    777 
    778     Ext = getExtendTypeForNode(N.getOperand(0));
    779     if (Ext == AArch64_AM::InvalidShiftExtend)
    780       return false;
    781 
    782     Reg = N.getOperand(0).getOperand(0);
    783   } else {
    784     Ext = getExtendTypeForNode(N);
    785     if (Ext == AArch64_AM::InvalidShiftExtend)
    786       return false;
    787 
    788     Reg = N.getOperand(0);
    789 
    790     // Don't match if free 32-bit -> 64-bit zext can be used instead.
    791     if (Ext == AArch64_AM::UXTW &&
    792         Reg->getValueType(0).getSizeInBits() == 32 && isDef32(*Reg.getNode()))
    793       return false;
    794   }
    795 
    796   // AArch64 mandates that the RHS of the operation must use the smallest
    797   // register class that could contain the size being extended from.  Thus,
    798   // if we're folding a (sext i8), we need the RHS to be a GPR32, even though
    799   // there might not be an actual 32-bit value in the program.  We can
    800   // (harmlessly) synthesize one by injected an EXTRACT_SUBREG here.
    801   assert(Ext != AArch64_AM::UXTX && Ext != AArch64_AM::SXTX);
    802   Reg = narrowIfNeeded(CurDAG, Reg);
    803   Shift = CurDAG->getTargetConstant(getArithExtendImm(Ext, ShiftVal), SDLoc(N),
    804                                     MVT::i32);
    805   return isWorthFolding(N);
    806 }
    807 
    808 /// If there's a use of this ADDlow that's not itself a load/store then we'll
    809 /// need to create a real ADD instruction from it anyway and there's no point in
    810 /// folding it into the mem op. Theoretically, it shouldn't matter, but there's
    811 /// a single pseudo-instruction for an ADRP/ADD pair so over-aggressive folding
    812 /// leads to duplicated ADRP instructions.
    813 static bool isWorthFoldingADDlow(SDValue N) {
    814   for (auto Use : N->uses()) {
    815     if (Use->getOpcode() != ISD::LOAD && Use->getOpcode() != ISD::STORE &&
    816         Use->getOpcode() != ISD::ATOMIC_LOAD &&
    817         Use->getOpcode() != ISD::ATOMIC_STORE)
    818       return false;
    819 
    820     // ldar and stlr have much more restrictive addressing modes (just a
    821     // register).
    822     if (isStrongerThanMonotonic(cast<MemSDNode>(Use)->getOrdering()))
    823       return false;
    824   }
    825 
    826   return true;
    827 }
    828 
    829 /// SelectAddrModeIndexedBitWidth - Select a "register plus scaled (un)signed BW-bit
    830 /// immediate" address.  The "Size" argument is the size in bytes of the memory
    831 /// reference, which determines the scale.
    832 bool AArch64DAGToDAGISel::SelectAddrModeIndexedBitWidth(SDValue N, bool IsSignedImm,
    833                                                         unsigned BW, unsigned Size,
    834                                                         SDValue &Base,
    835                                                         SDValue &OffImm) {
    836   SDLoc dl(N);
    837   const DataLayout &DL = CurDAG->getDataLayout();
    838   const TargetLowering *TLI = getTargetLowering();
    839   if (N.getOpcode() == ISD::FrameIndex) {
    840     int FI = cast<FrameIndexSDNode>(N)->getIndex();
    841     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
    842     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
    843     return true;
    844   }
    845 
    846   // As opposed to the (12-bit) Indexed addressing mode below, the 7/9-bit signed
    847   // selected here doesn't support labels/immediates, only base+offset.
    848   if (CurDAG->isBaseWithConstantOffset(N)) {
    849     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    850       if (IsSignedImm) {
    851         int64_t RHSC = RHS->getSExtValue();
    852         unsigned Scale = Log2_32(Size);
    853         int64_t Range = 0x1LL << (BW - 1);
    854 
    855         if ((RHSC & (Size - 1)) == 0 && RHSC >= -(Range << Scale) &&
    856             RHSC < (Range << Scale)) {
    857           Base = N.getOperand(0);
    858           if (Base.getOpcode() == ISD::FrameIndex) {
    859             int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    860             Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
    861           }
    862           OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
    863           return true;
    864         }
    865       } else {
    866         // unsigned Immediate
    867         uint64_t RHSC = RHS->getZExtValue();
    868         unsigned Scale = Log2_32(Size);
    869         uint64_t Range = 0x1ULL << BW;
    870 
    871         if ((RHSC & (Size - 1)) == 0 && RHSC < (Range << Scale)) {
    872           Base = N.getOperand(0);
    873           if (Base.getOpcode() == ISD::FrameIndex) {
    874             int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    875             Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
    876           }
    877           OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
    878           return true;
    879         }
    880       }
    881     }
    882   }
    883   // Base only. The address will be materialized into a register before
    884   // the memory is accessed.
    885   //    add x0, Xbase, #offset
    886   //    stp x1, x2, [x0]
    887   Base = N;
    888   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
    889   return true;
    890 }
    891 
    892 /// SelectAddrModeIndexed - Select a "register plus scaled unsigned 12-bit
    893 /// immediate" address.  The "Size" argument is the size in bytes of the memory
    894 /// reference, which determines the scale.
    895 bool AArch64DAGToDAGISel::SelectAddrModeIndexed(SDValue N, unsigned Size,
    896                                               SDValue &Base, SDValue &OffImm) {
    897   SDLoc dl(N);
    898   const DataLayout &DL = CurDAG->getDataLayout();
    899   const TargetLowering *TLI = getTargetLowering();
    900   if (N.getOpcode() == ISD::FrameIndex) {
    901     int FI = cast<FrameIndexSDNode>(N)->getIndex();
    902     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
    903     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
    904     return true;
    905   }
    906 
    907   if (N.getOpcode() == AArch64ISD::ADDlow && isWorthFoldingADDlow(N)) {
    908     GlobalAddressSDNode *GAN =
    909         dyn_cast<GlobalAddressSDNode>(N.getOperand(1).getNode());
    910     Base = N.getOperand(0);
    911     OffImm = N.getOperand(1);
    912     if (!GAN)
    913       return true;
    914 
    915     if (GAN->getOffset() % Size == 0 &&
    916         GAN->getGlobal()->getPointerAlignment(DL) >= Size)
    917       return true;
    918   }
    919 
    920   if (CurDAG->isBaseWithConstantOffset(N)) {
    921     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    922       int64_t RHSC = (int64_t)RHS->getZExtValue();
    923       unsigned Scale = Log2_32(Size);
    924       if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 && RHSC < (0x1000 << Scale)) {
    925         Base = N.getOperand(0);
    926         if (Base.getOpcode() == ISD::FrameIndex) {
    927           int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    928           Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
    929         }
    930         OffImm = CurDAG->getTargetConstant(RHSC >> Scale, dl, MVT::i64);
    931         return true;
    932       }
    933     }
    934   }
    935 
    936   // Before falling back to our general case, check if the unscaled
    937   // instructions can handle this. If so, that's preferable.
    938   if (SelectAddrModeUnscaled(N, Size, Base, OffImm))
    939     return false;
    940 
    941   // Base only. The address will be materialized into a register before
    942   // the memory is accessed.
    943   //    add x0, Xbase, #offset
    944   //    ldr x0, [x0]
    945   Base = N;
    946   OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
    947   return true;
    948 }
    949 
    950 /// SelectAddrModeUnscaled - Select a "register plus unscaled signed 9-bit
    951 /// immediate" address.  This should only match when there is an offset that
    952 /// is not valid for a scaled immediate addressing mode.  The "Size" argument
    953 /// is the size in bytes of the memory reference, which is needed here to know
    954 /// what is valid for a scaled immediate.
    955 bool AArch64DAGToDAGISel::SelectAddrModeUnscaled(SDValue N, unsigned Size,
    956                                                  SDValue &Base,
    957                                                  SDValue &OffImm) {
    958   if (!CurDAG->isBaseWithConstantOffset(N))
    959     return false;
    960   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
    961     int64_t RHSC = RHS->getSExtValue();
    962     // If the offset is valid as a scaled immediate, don't match here.
    963     if ((RHSC & (Size - 1)) == 0 && RHSC >= 0 &&
    964         RHSC < (0x1000 << Log2_32(Size)))
    965       return false;
    966     if (RHSC >= -256 && RHSC < 256) {
    967       Base = N.getOperand(0);
    968       if (Base.getOpcode() == ISD::FrameIndex) {
    969         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
    970         const TargetLowering *TLI = getTargetLowering();
    971         Base = CurDAG->getTargetFrameIndex(
    972             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
    973       }
    974       OffImm = CurDAG->getTargetConstant(RHSC, SDLoc(N), MVT::i64);
    975       return true;
    976     }
    977   }
    978   return false;
    979 }
    980 
    981 static SDValue Widen(SelectionDAG *CurDAG, SDValue N) {
    982   SDLoc dl(N);
    983   SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
    984   SDValue ImpDef = SDValue(
    985       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, MVT::i64), 0);
    986   MachineSDNode *Node = CurDAG->getMachineNode(
    987       TargetOpcode::INSERT_SUBREG, dl, MVT::i64, ImpDef, N, SubReg);
    988   return SDValue(Node, 0);
    989 }
    990 
    991 /// Check if the given SHL node (\p N), can be used to form an
    992 /// extended register for an addressing mode.
    993 bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size,
    994                                             bool WantExtend, SDValue &Offset,
    995                                             SDValue &SignExtend) {
    996   assert(N.getOpcode() == ISD::SHL && "Invalid opcode.");
    997   ConstantSDNode *CSD = dyn_cast<ConstantSDNode>(N.getOperand(1));
    998   if (!CSD || (CSD->getZExtValue() & 0x7) != CSD->getZExtValue())
    999     return false;
   1000 
   1001   SDLoc dl(N);
   1002   if (WantExtend) {
   1003     AArch64_AM::ShiftExtendType Ext =
   1004         getExtendTypeForNode(N.getOperand(0), true);
   1005     if (Ext == AArch64_AM::InvalidShiftExtend)
   1006       return false;
   1007 
   1008     Offset = narrowIfNeeded(CurDAG, N.getOperand(0).getOperand(0));
   1009     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
   1010                                            MVT::i32);
   1011   } else {
   1012     Offset = N.getOperand(0);
   1013     SignExtend = CurDAG->getTargetConstant(0, dl, MVT::i32);
   1014   }
   1015 
   1016   unsigned LegalShiftVal = Log2_32(Size);
   1017   unsigned ShiftVal = CSD->getZExtValue();
   1018 
   1019   if (ShiftVal != 0 && ShiftVal != LegalShiftVal)
   1020     return false;
   1021 
   1022   return isWorthFolding(N);
   1023 }
   1024 
   1025 bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size,
   1026                                             SDValue &Base, SDValue &Offset,
   1027                                             SDValue &SignExtend,
   1028                                             SDValue &DoShift) {
   1029   if (N.getOpcode() != ISD::ADD)
   1030     return false;
   1031   SDValue LHS = N.getOperand(0);
   1032   SDValue RHS = N.getOperand(1);
   1033   SDLoc dl(N);
   1034 
   1035   // We don't want to match immediate adds here, because they are better lowered
   1036   // to the register-immediate addressing modes.
   1037   if (isa<ConstantSDNode>(LHS) || isa<ConstantSDNode>(RHS))
   1038     return false;
   1039 
   1040   // Check if this particular node is reused in any non-memory related
   1041   // operation.  If yes, do not try to fold this node into the address
   1042   // computation, since the computation will be kept.
   1043   const SDNode *Node = N.getNode();
   1044   for (SDNode *UI : Node->uses()) {
   1045     if (!isa<MemSDNode>(*UI))
   1046       return false;
   1047   }
   1048 
   1049   // Remember if it is worth folding N when it produces extended register.
   1050   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
   1051 
   1052   // Try to match a shifted extend on the RHS.
   1053   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
   1054       SelectExtendedSHL(RHS, Size, true, Offset, SignExtend)) {
   1055     Base = LHS;
   1056     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
   1057     return true;
   1058   }
   1059 
   1060   // Try to match a shifted extend on the LHS.
   1061   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
   1062       SelectExtendedSHL(LHS, Size, true, Offset, SignExtend)) {
   1063     Base = RHS;
   1064     DoShift = CurDAG->getTargetConstant(true, dl, MVT::i32);
   1065     return true;
   1066   }
   1067 
   1068   // There was no shift, whatever else we find.
   1069   DoShift = CurDAG->getTargetConstant(false, dl, MVT::i32);
   1070 
   1071   AArch64_AM::ShiftExtendType Ext = AArch64_AM::InvalidShiftExtend;
   1072   // Try to match an unshifted extend on the LHS.
   1073   if (IsExtendedRegisterWorthFolding &&
   1074       (Ext = getExtendTypeForNode(LHS, true)) !=
   1075           AArch64_AM::InvalidShiftExtend) {
   1076     Base = RHS;
   1077     Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0));
   1078     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
   1079                                            MVT::i32);
   1080     if (isWorthFolding(LHS))
   1081       return true;
   1082   }
   1083 
   1084   // Try to match an unshifted extend on the RHS.
   1085   if (IsExtendedRegisterWorthFolding &&
   1086       (Ext = getExtendTypeForNode(RHS, true)) !=
   1087           AArch64_AM::InvalidShiftExtend) {
   1088     Base = LHS;
   1089     Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0));
   1090     SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl,
   1091                                            MVT::i32);
   1092     if (isWorthFolding(RHS))
   1093       return true;
   1094   }
   1095 
   1096   return false;
   1097 }
   1098 
   1099 // Check if the given immediate is preferred by ADD. If an immediate can be
   1100 // encoded in an ADD, or it can be encoded in an "ADD LSL #12" and can not be
   1101 // encoded by one MOVZ, return true.
   1102 static bool isPreferredADD(int64_t ImmOff) {
   1103   // Constant in [0x0, 0xfff] can be encoded in ADD.
   1104   if ((ImmOff & 0xfffffffffffff000LL) == 0x0LL)
   1105     return true;
   1106   // Check if it can be encoded in an "ADD LSL #12".
   1107   if ((ImmOff & 0xffffffffff000fffLL) == 0x0LL)
   1108     // As a single MOVZ is faster than a "ADD of LSL #12", ignore such constant.
   1109     return (ImmOff & 0xffffffffff00ffffLL) != 0x0LL &&
   1110            (ImmOff & 0xffffffffffff0fffLL) != 0x0LL;
   1111   return false;
   1112 }
   1113 
   1114 bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size,
   1115                                             SDValue &Base, SDValue &Offset,
   1116                                             SDValue &SignExtend,
   1117                                             SDValue &DoShift) {
   1118   if (N.getOpcode() != ISD::ADD)
   1119     return false;
   1120   SDValue LHS = N.getOperand(0);
   1121   SDValue RHS = N.getOperand(1);
   1122   SDLoc DL(N);
   1123 
   1124   // Check if this particular node is reused in any non-memory related
   1125   // operation.  If yes, do not try to fold this node into the address
   1126   // computation, since the computation will be kept.
   1127   const SDNode *Node = N.getNode();
   1128   for (SDNode *UI : Node->uses()) {
   1129     if (!isa<MemSDNode>(*UI))
   1130       return false;
   1131   }
   1132 
   1133   // Watch out if RHS is a wide immediate, it can not be selected into
   1134   // [BaseReg+Imm] addressing mode. Also it may not be able to be encoded into
   1135   // ADD/SUB. Instead it will use [BaseReg + 0] address mode and generate
   1136   // instructions like:
   1137   //     MOV  X0, WideImmediate
   1138   //     ADD  X1, BaseReg, X0
   1139   //     LDR  X2, [X1, 0]
   1140   // For such situation, using [BaseReg, XReg] addressing mode can save one
   1141   // ADD/SUB:
   1142   //     MOV  X0, WideImmediate
   1143   //     LDR  X2, [BaseReg, X0]
   1144   if (isa<ConstantSDNode>(RHS)) {
   1145     int64_t ImmOff = (int64_t)cast<ConstantSDNode>(RHS)->getZExtValue();
   1146     unsigned Scale = Log2_32(Size);
   1147     // Skip the immediate can be selected by load/store addressing mode.
   1148     // Also skip the immediate can be encoded by a single ADD (SUB is also
   1149     // checked by using -ImmOff).
   1150     if ((ImmOff % Size == 0 && ImmOff >= 0 && ImmOff < (0x1000 << Scale)) ||
   1151         isPreferredADD(ImmOff) || isPreferredADD(-ImmOff))
   1152       return false;
   1153 
   1154     SDValue Ops[] = { RHS };
   1155     SDNode *MOVI =
   1156         CurDAG->getMachineNode(AArch64::MOVi64imm, DL, MVT::i64, Ops);
   1157     SDValue MOVIV = SDValue(MOVI, 0);
   1158     // This ADD of two X register will be selected into [Reg+Reg] mode.
   1159     N = CurDAG->getNode(ISD::ADD, DL, MVT::i64, LHS, MOVIV);
   1160   }
   1161 
   1162   // Remember if it is worth folding N when it produces extended register.
   1163   bool IsExtendedRegisterWorthFolding = isWorthFolding(N);
   1164 
   1165   // Try to match a shifted extend on the RHS.
   1166   if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL &&
   1167       SelectExtendedSHL(RHS, Size, false, Offset, SignExtend)) {
   1168     Base = LHS;
   1169     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
   1170     return true;
   1171   }
   1172 
   1173   // Try to match a shifted extend on the LHS.
   1174   if (IsExtendedRegisterWorthFolding && LHS.getOpcode() == ISD::SHL &&
   1175       SelectExtendedSHL(LHS, Size, false, Offset, SignExtend)) {
   1176     Base = RHS;
   1177     DoShift = CurDAG->getTargetConstant(true, DL, MVT::i32);
   1178     return true;
   1179   }
   1180 
   1181   // Match any non-shifted, non-extend, non-immediate add expression.
   1182   Base = LHS;
   1183   Offset = RHS;
   1184   SignExtend = CurDAG->getTargetConstant(false, DL, MVT::i32);
   1185   DoShift = CurDAG->getTargetConstant(false, DL, MVT::i32);
   1186   // Reg1 + Reg2 is free: no check needed.
   1187   return true;
   1188 }
   1189 
   1190 SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef<SDValue> Regs) {
   1191   static const unsigned RegClassIDs[] = {
   1192       AArch64::DDRegClassID, AArch64::DDDRegClassID, AArch64::DDDDRegClassID};
   1193   static const unsigned SubRegs[] = {AArch64::dsub0, AArch64::dsub1,
   1194                                      AArch64::dsub2, AArch64::dsub3};
   1195 
   1196   return createTuple(Regs, RegClassIDs, SubRegs);
   1197 }
   1198 
   1199 SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef<SDValue> Regs) {
   1200   static const unsigned RegClassIDs[] = {
   1201       AArch64::QQRegClassID, AArch64::QQQRegClassID, AArch64::QQQQRegClassID};
   1202   static const unsigned SubRegs[] = {AArch64::qsub0, AArch64::qsub1,
   1203                                      AArch64::qsub2, AArch64::qsub3};
   1204 
   1205   return createTuple(Regs, RegClassIDs, SubRegs);
   1206 }
   1207 
   1208 SDValue AArch64DAGToDAGISel::createZTuple(ArrayRef<SDValue> Regs) {
   1209   static const unsigned RegClassIDs[] = {AArch64::ZPR2RegClassID,
   1210                                          AArch64::ZPR3RegClassID,
   1211                                          AArch64::ZPR4RegClassID};
   1212   static const unsigned SubRegs[] = {AArch64::zsub0, AArch64::zsub1,
   1213                                      AArch64::zsub2, AArch64::zsub3};
   1214 
   1215   return createTuple(Regs, RegClassIDs, SubRegs);
   1216 }
   1217 
   1218 SDValue AArch64DAGToDAGISel::createTuple(ArrayRef<SDValue> Regs,
   1219                                          const unsigned RegClassIDs[],
   1220                                          const unsigned SubRegs[]) {
   1221   // There's no special register-class for a vector-list of 1 element: it's just
   1222   // a vector.
   1223   if (Regs.size() == 1)
   1224     return Regs[0];
   1225 
   1226   assert(Regs.size() >= 2 && Regs.size() <= 4);
   1227 
   1228   SDLoc DL(Regs[0]);
   1229 
   1230   SmallVector<SDValue, 4> Ops;
   1231 
   1232   // First operand of REG_SEQUENCE is the desired RegClass.
   1233   Ops.push_back(
   1234       CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], DL, MVT::i32));
   1235 
   1236   // Then we get pairs of source & subregister-position for the components.
   1237   for (unsigned i = 0; i < Regs.size(); ++i) {
   1238     Ops.push_back(Regs[i]);
   1239     Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], DL, MVT::i32));
   1240   }
   1241 
   1242   SDNode *N =
   1243       CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops);
   1244   return SDValue(N, 0);
   1245 }
   1246 
   1247 void AArch64DAGToDAGISel::SelectTable(SDNode *N, unsigned NumVecs, unsigned Opc,
   1248                                       bool isExt) {
   1249   SDLoc dl(N);
   1250   EVT VT = N->getValueType(0);
   1251 
   1252   unsigned ExtOff = isExt;
   1253 
   1254   // Form a REG_SEQUENCE to force register allocation.
   1255   unsigned Vec0Off = ExtOff + 1;
   1256   SmallVector<SDValue, 4> Regs(N->op_begin() + Vec0Off,
   1257                                N->op_begin() + Vec0Off + NumVecs);
   1258   SDValue RegSeq = createQTuple(Regs);
   1259 
   1260   SmallVector<SDValue, 6> Ops;
   1261   if (isExt)
   1262     Ops.push_back(N->getOperand(1));
   1263   Ops.push_back(RegSeq);
   1264   Ops.push_back(N->getOperand(NumVecs + ExtOff + 1));
   1265   ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
   1266 }
   1267 
   1268 bool AArch64DAGToDAGISel::tryIndexedLoad(SDNode *N) {
   1269   LoadSDNode *LD = cast<LoadSDNode>(N);
   1270   if (LD->isUnindexed())
   1271     return false;
   1272   EVT VT = LD->getMemoryVT();
   1273   EVT DstVT = N->getValueType(0);
   1274   ISD::MemIndexedMode AM = LD->getAddressingMode();
   1275   bool IsPre = AM == ISD::PRE_INC || AM == ISD::PRE_DEC;
   1276 
   1277   // We're not doing validity checking here. That was done when checking
   1278   // if we should mark the load as indexed or not. We're just selecting
   1279   // the right instruction.
   1280   unsigned Opcode = 0;
   1281 
   1282   ISD::LoadExtType ExtType = LD->getExtensionType();
   1283   bool InsertTo64 = false;
   1284   if (VT == MVT::i64)
   1285     Opcode = IsPre ? AArch64::LDRXpre : AArch64::LDRXpost;
   1286   else if (VT == MVT::i32) {
   1287     if (ExtType == ISD::NON_EXTLOAD)
   1288       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
   1289     else if (ExtType == ISD::SEXTLOAD)
   1290       Opcode = IsPre ? AArch64::LDRSWpre : AArch64::LDRSWpost;
   1291     else {
   1292       Opcode = IsPre ? AArch64::LDRWpre : AArch64::LDRWpost;
   1293       InsertTo64 = true;
   1294       // The result of the load is only i32. It's the subreg_to_reg that makes
   1295       // it into an i64.
   1296       DstVT = MVT::i32;
   1297     }
   1298   } else if (VT == MVT::i16) {
   1299     if (ExtType == ISD::SEXTLOAD) {
   1300       if (DstVT == MVT::i64)
   1301         Opcode = IsPre ? AArch64::LDRSHXpre : AArch64::LDRSHXpost;
   1302       else
   1303         Opcode = IsPre ? AArch64::LDRSHWpre : AArch64::LDRSHWpost;
   1304     } else {
   1305       Opcode = IsPre ? AArch64::LDRHHpre : AArch64::LDRHHpost;
   1306       InsertTo64 = DstVT == MVT::i64;
   1307       // The result of the load is only i32. It's the subreg_to_reg that makes
   1308       // it into an i64.
   1309       DstVT = MVT::i32;
   1310     }
   1311   } else if (VT == MVT::i8) {
   1312     if (ExtType == ISD::SEXTLOAD) {
   1313       if (DstVT == MVT::i64)
   1314         Opcode = IsPre ? AArch64::LDRSBXpre : AArch64::LDRSBXpost;
   1315       else
   1316         Opcode = IsPre ? AArch64::LDRSBWpre : AArch64::LDRSBWpost;
   1317     } else {
   1318       Opcode = IsPre ? AArch64::LDRBBpre : AArch64::LDRBBpost;
   1319       InsertTo64 = DstVT == MVT::i64;
   1320       // The result of the load is only i32. It's the subreg_to_reg that makes
   1321       // it into an i64.
   1322       DstVT = MVT::i32;
   1323     }
   1324   } else if (VT == MVT::f16) {
   1325     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
   1326   } else if (VT == MVT::bf16) {
   1327     Opcode = IsPre ? AArch64::LDRHpre : AArch64::LDRHpost;
   1328   } else if (VT == MVT::f32) {
   1329     Opcode = IsPre ? AArch64::LDRSpre : AArch64::LDRSpost;
   1330   } else if (VT == MVT::f64 || VT.is64BitVector()) {
   1331     Opcode = IsPre ? AArch64::LDRDpre : AArch64::LDRDpost;
   1332   } else if (VT.is128BitVector()) {
   1333     Opcode = IsPre ? AArch64::LDRQpre : AArch64::LDRQpost;
   1334   } else
   1335     return false;
   1336   SDValue Chain = LD->getChain();
   1337   SDValue Base = LD->getBasePtr();
   1338   ConstantSDNode *OffsetOp = cast<ConstantSDNode>(LD->getOffset());
   1339   int OffsetVal = (int)OffsetOp->getZExtValue();
   1340   SDLoc dl(N);
   1341   SDValue Offset = CurDAG->getTargetConstant(OffsetVal, dl, MVT::i64);
   1342   SDValue Ops[] = { Base, Offset, Chain };
   1343   SDNode *Res = CurDAG->getMachineNode(Opcode, dl, MVT::i64, DstVT,
   1344                                        MVT::Other, Ops);
   1345 
   1346   // Transfer memoperands.
   1347   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
   1348   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Res), {MemOp});
   1349 
   1350   // Either way, we're replacing the node, so tell the caller that.
   1351   SDValue LoadedVal = SDValue(Res, 1);
   1352   if (InsertTo64) {
   1353     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
   1354     LoadedVal =
   1355         SDValue(CurDAG->getMachineNode(
   1356                     AArch64::SUBREG_TO_REG, dl, MVT::i64,
   1357                     CurDAG->getTargetConstant(0, dl, MVT::i64), LoadedVal,
   1358                     SubReg),
   1359                 0);
   1360   }
   1361 
   1362   ReplaceUses(SDValue(N, 0), LoadedVal);
   1363   ReplaceUses(SDValue(N, 1), SDValue(Res, 0));
   1364   ReplaceUses(SDValue(N, 2), SDValue(Res, 2));
   1365   CurDAG->RemoveDeadNode(N);
   1366   return true;
   1367 }
   1368 
   1369 void AArch64DAGToDAGISel::SelectLoad(SDNode *N, unsigned NumVecs, unsigned Opc,
   1370                                      unsigned SubRegIdx) {
   1371   SDLoc dl(N);
   1372   EVT VT = N->getValueType(0);
   1373   SDValue Chain = N->getOperand(0);
   1374 
   1375   SDValue Ops[] = {N->getOperand(2), // Mem operand;
   1376                    Chain};
   1377 
   1378   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
   1379 
   1380   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1381   SDValue SuperReg = SDValue(Ld, 0);
   1382   for (unsigned i = 0; i < NumVecs; ++i)
   1383     ReplaceUses(SDValue(N, i),
   1384         CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
   1385 
   1386   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
   1387 
   1388   // Transfer memoperands. In the case of AArch64::LD64B, there won't be one,
   1389   // because it's too simple to have needed special treatment during lowering.
   1390   if (auto *MemIntr = dyn_cast<MemIntrinsicSDNode>(N)) {
   1391     MachineMemOperand *MemOp = MemIntr->getMemOperand();
   1392     CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
   1393   }
   1394 
   1395   CurDAG->RemoveDeadNode(N);
   1396 }
   1397 
   1398 void AArch64DAGToDAGISel::SelectPostLoad(SDNode *N, unsigned NumVecs,
   1399                                          unsigned Opc, unsigned SubRegIdx) {
   1400   SDLoc dl(N);
   1401   EVT VT = N->getValueType(0);
   1402   SDValue Chain = N->getOperand(0);
   1403 
   1404   SDValue Ops[] = {N->getOperand(1), // Mem operand
   1405                    N->getOperand(2), // Incremental
   1406                    Chain};
   1407 
   1408   const EVT ResTys[] = {MVT::i64, // Type of the write back register
   1409                         MVT::Untyped, MVT::Other};
   1410 
   1411   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1412 
   1413   // Update uses of write back register
   1414   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
   1415 
   1416   // Update uses of vector list
   1417   SDValue SuperReg = SDValue(Ld, 1);
   1418   if (NumVecs == 1)
   1419     ReplaceUses(SDValue(N, 0), SuperReg);
   1420   else
   1421     for (unsigned i = 0; i < NumVecs; ++i)
   1422       ReplaceUses(SDValue(N, i),
   1423           CurDAG->getTargetExtractSubreg(SubRegIdx + i, dl, VT, SuperReg));
   1424 
   1425   // Update the chain
   1426   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
   1427   CurDAG->RemoveDeadNode(N);
   1428 }
   1429 
   1430 /// Optimize \param OldBase and \param OldOffset selecting the best addressing
   1431 /// mode. Returns a tuple consisting of an Opcode, an SDValue representing the
   1432 /// new Base and an SDValue representing the new offset.
   1433 std::tuple<unsigned, SDValue, SDValue>
   1434 AArch64DAGToDAGISel::findAddrModeSVELoadStore(SDNode *N, unsigned Opc_rr,
   1435                                               unsigned Opc_ri,
   1436                                               const SDValue &OldBase,
   1437                                               const SDValue &OldOffset,
   1438                                               unsigned Scale) {
   1439   SDValue NewBase = OldBase;
   1440   SDValue NewOffset = OldOffset;
   1441   // Detect a possible Reg+Imm addressing mode.
   1442   const bool IsRegImm = SelectAddrModeIndexedSVE</*Min=*/-8, /*Max=*/7>(
   1443       N, OldBase, NewBase, NewOffset);
   1444 
   1445   // Detect a possible reg+reg addressing mode, but only if we haven't already
   1446   // detected a Reg+Imm one.
   1447   const bool IsRegReg =
   1448       !IsRegImm && SelectSVERegRegAddrMode(OldBase, Scale, NewBase, NewOffset);
   1449 
   1450   // Select the instruction.
   1451   return std::make_tuple(IsRegReg ? Opc_rr : Opc_ri, NewBase, NewOffset);
   1452 }
   1453 
   1454 void AArch64DAGToDAGISel::SelectPredicatedLoad(SDNode *N, unsigned NumVecs,
   1455                                                unsigned Scale, unsigned Opc_ri,
   1456                                                unsigned Opc_rr) {
   1457   assert(Scale < 4 && "Invalid scaling value.");
   1458   SDLoc DL(N);
   1459   EVT VT = N->getValueType(0);
   1460   SDValue Chain = N->getOperand(0);
   1461 
   1462   // Optimize addressing mode.
   1463   SDValue Base, Offset;
   1464   unsigned Opc;
   1465   std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
   1466       N, Opc_rr, Opc_ri, N->getOperand(2),
   1467       CurDAG->getTargetConstant(0, DL, MVT::i64), Scale);
   1468 
   1469   SDValue Ops[] = {N->getOperand(1), // Predicate
   1470                    Base,             // Memory operand
   1471                    Offset, Chain};
   1472 
   1473   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
   1474 
   1475   SDNode *Load = CurDAG->getMachineNode(Opc, DL, ResTys, Ops);
   1476   SDValue SuperReg = SDValue(Load, 0);
   1477   for (unsigned i = 0; i < NumVecs; ++i)
   1478     ReplaceUses(SDValue(N, i), CurDAG->getTargetExtractSubreg(
   1479                                    AArch64::zsub0 + i, DL, VT, SuperReg));
   1480 
   1481   // Copy chain
   1482   unsigned ChainIdx = NumVecs;
   1483   ReplaceUses(SDValue(N, ChainIdx), SDValue(Load, 1));
   1484   CurDAG->RemoveDeadNode(N);
   1485 }
   1486 
   1487 void AArch64DAGToDAGISel::SelectStore(SDNode *N, unsigned NumVecs,
   1488                                       unsigned Opc) {
   1489   SDLoc dl(N);
   1490   EVT VT = N->getOperand(2)->getValueType(0);
   1491 
   1492   // Form a REG_SEQUENCE to force register allocation.
   1493   bool Is128Bit = VT.getSizeInBits() == 128;
   1494   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1495   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
   1496 
   1497   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), N->getOperand(0)};
   1498   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
   1499 
   1500   // Transfer memoperands.
   1501   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1502   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
   1503 
   1504   ReplaceNode(N, St);
   1505 }
   1506 
   1507 void AArch64DAGToDAGISel::SelectPredicatedStore(SDNode *N, unsigned NumVecs,
   1508                                                 unsigned Scale, unsigned Opc_rr,
   1509                                                 unsigned Opc_ri) {
   1510   SDLoc dl(N);
   1511 
   1512   // Form a REG_SEQUENCE to force register allocation.
   1513   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1514   SDValue RegSeq = createZTuple(Regs);
   1515 
   1516   // Optimize addressing mode.
   1517   unsigned Opc;
   1518   SDValue Offset, Base;
   1519   std::tie(Opc, Base, Offset) = findAddrModeSVELoadStore(
   1520       N, Opc_rr, Opc_ri, N->getOperand(NumVecs + 3),
   1521       CurDAG->getTargetConstant(0, dl, MVT::i64), Scale);
   1522 
   1523   SDValue Ops[] = {RegSeq, N->getOperand(NumVecs + 2), // predicate
   1524                    Base,                               // address
   1525                    Offset,                             // offset
   1526                    N->getOperand(0)};                  // chain
   1527   SDNode *St = CurDAG->getMachineNode(Opc, dl, N->getValueType(0), Ops);
   1528 
   1529   ReplaceNode(N, St);
   1530 }
   1531 
   1532 bool AArch64DAGToDAGISel::SelectAddrModeFrameIndexSVE(SDValue N, SDValue &Base,
   1533                                                       SDValue &OffImm) {
   1534   SDLoc dl(N);
   1535   const DataLayout &DL = CurDAG->getDataLayout();
   1536   const TargetLowering *TLI = getTargetLowering();
   1537 
   1538   // Try to match it for the frame address
   1539   if (auto FINode = dyn_cast<FrameIndexSDNode>(N)) {
   1540     int FI = FINode->getIndex();
   1541     Base = CurDAG->getTargetFrameIndex(FI, TLI->getPointerTy(DL));
   1542     OffImm = CurDAG->getTargetConstant(0, dl, MVT::i64);
   1543     return true;
   1544   }
   1545 
   1546   return false;
   1547 }
   1548 
   1549 void AArch64DAGToDAGISel::SelectPostStore(SDNode *N, unsigned NumVecs,
   1550                                           unsigned Opc) {
   1551   SDLoc dl(N);
   1552   EVT VT = N->getOperand(2)->getValueType(0);
   1553   const EVT ResTys[] = {MVT::i64,    // Type of the write back register
   1554                         MVT::Other}; // Type for the Chain
   1555 
   1556   // Form a REG_SEQUENCE to force register allocation.
   1557   bool Is128Bit = VT.getSizeInBits() == 128;
   1558   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   1559   SDValue RegSeq = Is128Bit ? createQTuple(Regs) : createDTuple(Regs);
   1560 
   1561   SDValue Ops[] = {RegSeq,
   1562                    N->getOperand(NumVecs + 1), // base register
   1563                    N->getOperand(NumVecs + 2), // Incremental
   1564                    N->getOperand(0)};          // Chain
   1565   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1566 
   1567   ReplaceNode(N, St);
   1568 }
   1569 
   1570 namespace {
   1571 /// WidenVector - Given a value in the V64 register class, produce the
   1572 /// equivalent value in the V128 register class.
   1573 class WidenVector {
   1574   SelectionDAG &DAG;
   1575 
   1576 public:
   1577   WidenVector(SelectionDAG &DAG) : DAG(DAG) {}
   1578 
   1579   SDValue operator()(SDValue V64Reg) {
   1580     EVT VT = V64Reg.getValueType();
   1581     unsigned NarrowSize = VT.getVectorNumElements();
   1582     MVT EltTy = VT.getVectorElementType().getSimpleVT();
   1583     MVT WideTy = MVT::getVectorVT(EltTy, 2 * NarrowSize);
   1584     SDLoc DL(V64Reg);
   1585 
   1586     SDValue Undef =
   1587         SDValue(DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, WideTy), 0);
   1588     return DAG.getTargetInsertSubreg(AArch64::dsub, DL, WideTy, Undef, V64Reg);
   1589   }
   1590 };
   1591 } // namespace
   1592 
   1593 /// NarrowVector - Given a value in the V128 register class, produce the
   1594 /// equivalent value in the V64 register class.
   1595 static SDValue NarrowVector(SDValue V128Reg, SelectionDAG &DAG) {
   1596   EVT VT = V128Reg.getValueType();
   1597   unsigned WideSize = VT.getVectorNumElements();
   1598   MVT EltTy = VT.getVectorElementType().getSimpleVT();
   1599   MVT NarrowTy = MVT::getVectorVT(EltTy, WideSize / 2);
   1600 
   1601   return DAG.getTargetExtractSubreg(AArch64::dsub, SDLoc(V128Reg), NarrowTy,
   1602                                     V128Reg);
   1603 }
   1604 
   1605 void AArch64DAGToDAGISel::SelectLoadLane(SDNode *N, unsigned NumVecs,
   1606                                          unsigned Opc) {
   1607   SDLoc dl(N);
   1608   EVT VT = N->getValueType(0);
   1609   bool Narrow = VT.getSizeInBits() == 64;
   1610 
   1611   // Form a REG_SEQUENCE to force register allocation.
   1612   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1613 
   1614   if (Narrow)
   1615     transform(Regs, Regs.begin(),
   1616                    WidenVector(*CurDAG));
   1617 
   1618   SDValue RegSeq = createQTuple(Regs);
   1619 
   1620   const EVT ResTys[] = {MVT::Untyped, MVT::Other};
   1621 
   1622   unsigned LaneNo =
   1623       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
   1624 
   1625   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
   1626                    N->getOperand(NumVecs + 3), N->getOperand(0)};
   1627   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1628   SDValue SuperReg = SDValue(Ld, 0);
   1629 
   1630   EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
   1631   static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
   1632                                     AArch64::qsub2, AArch64::qsub3 };
   1633   for (unsigned i = 0; i < NumVecs; ++i) {
   1634     SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT, SuperReg);
   1635     if (Narrow)
   1636       NV = NarrowVector(NV, *CurDAG);
   1637     ReplaceUses(SDValue(N, i), NV);
   1638   }
   1639 
   1640   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 1));
   1641   CurDAG->RemoveDeadNode(N);
   1642 }
   1643 
   1644 void AArch64DAGToDAGISel::SelectPostLoadLane(SDNode *N, unsigned NumVecs,
   1645                                              unsigned Opc) {
   1646   SDLoc dl(N);
   1647   EVT VT = N->getValueType(0);
   1648   bool Narrow = VT.getSizeInBits() == 64;
   1649 
   1650   // Form a REG_SEQUENCE to force register allocation.
   1651   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   1652 
   1653   if (Narrow)
   1654     transform(Regs, Regs.begin(),
   1655                    WidenVector(*CurDAG));
   1656 
   1657   SDValue RegSeq = createQTuple(Regs);
   1658 
   1659   const EVT ResTys[] = {MVT::i64, // Type of the write back register
   1660                         RegSeq->getValueType(0), MVT::Other};
   1661 
   1662   unsigned LaneNo =
   1663       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
   1664 
   1665   SDValue Ops[] = {RegSeq,
   1666                    CurDAG->getTargetConstant(LaneNo, dl,
   1667                                              MVT::i64),         // Lane Number
   1668                    N->getOperand(NumVecs + 2),                  // Base register
   1669                    N->getOperand(NumVecs + 3),                  // Incremental
   1670                    N->getOperand(0)};
   1671   SDNode *Ld = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1672 
   1673   // Update uses of the write back register
   1674   ReplaceUses(SDValue(N, NumVecs), SDValue(Ld, 0));
   1675 
   1676   // Update uses of the vector list
   1677   SDValue SuperReg = SDValue(Ld, 1);
   1678   if (NumVecs == 1) {
   1679     ReplaceUses(SDValue(N, 0),
   1680                 Narrow ? NarrowVector(SuperReg, *CurDAG) : SuperReg);
   1681   } else {
   1682     EVT WideVT = RegSeq.getOperand(1)->getValueType(0);
   1683     static const unsigned QSubs[] = { AArch64::qsub0, AArch64::qsub1,
   1684                                       AArch64::qsub2, AArch64::qsub3 };
   1685     for (unsigned i = 0; i < NumVecs; ++i) {
   1686       SDValue NV = CurDAG->getTargetExtractSubreg(QSubs[i], dl, WideVT,
   1687                                                   SuperReg);
   1688       if (Narrow)
   1689         NV = NarrowVector(NV, *CurDAG);
   1690       ReplaceUses(SDValue(N, i), NV);
   1691     }
   1692   }
   1693 
   1694   // Update the Chain
   1695   ReplaceUses(SDValue(N, NumVecs + 1), SDValue(Ld, 2));
   1696   CurDAG->RemoveDeadNode(N);
   1697 }
   1698 
   1699 void AArch64DAGToDAGISel::SelectStoreLane(SDNode *N, unsigned NumVecs,
   1700                                           unsigned Opc) {
   1701   SDLoc dl(N);
   1702   EVT VT = N->getOperand(2)->getValueType(0);
   1703   bool Narrow = VT.getSizeInBits() == 64;
   1704 
   1705   // Form a REG_SEQUENCE to force register allocation.
   1706   SmallVector<SDValue, 4> Regs(N->op_begin() + 2, N->op_begin() + 2 + NumVecs);
   1707 
   1708   if (Narrow)
   1709     transform(Regs, Regs.begin(),
   1710                    WidenVector(*CurDAG));
   1711 
   1712   SDValue RegSeq = createQTuple(Regs);
   1713 
   1714   unsigned LaneNo =
   1715       cast<ConstantSDNode>(N->getOperand(NumVecs + 2))->getZExtValue();
   1716 
   1717   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
   1718                    N->getOperand(NumVecs + 3), N->getOperand(0)};
   1719   SDNode *St = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
   1720 
   1721   // Transfer memoperands.
   1722   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1723   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
   1724 
   1725   ReplaceNode(N, St);
   1726 }
   1727 
   1728 void AArch64DAGToDAGISel::SelectPostStoreLane(SDNode *N, unsigned NumVecs,
   1729                                               unsigned Opc) {
   1730   SDLoc dl(N);
   1731   EVT VT = N->getOperand(2)->getValueType(0);
   1732   bool Narrow = VT.getSizeInBits() == 64;
   1733 
   1734   // Form a REG_SEQUENCE to force register allocation.
   1735   SmallVector<SDValue, 4> Regs(N->op_begin() + 1, N->op_begin() + 1 + NumVecs);
   1736 
   1737   if (Narrow)
   1738     transform(Regs, Regs.begin(),
   1739                    WidenVector(*CurDAG));
   1740 
   1741   SDValue RegSeq = createQTuple(Regs);
   1742 
   1743   const EVT ResTys[] = {MVT::i64, // Type of the write back register
   1744                         MVT::Other};
   1745 
   1746   unsigned LaneNo =
   1747       cast<ConstantSDNode>(N->getOperand(NumVecs + 1))->getZExtValue();
   1748 
   1749   SDValue Ops[] = {RegSeq, CurDAG->getTargetConstant(LaneNo, dl, MVT::i64),
   1750                    N->getOperand(NumVecs + 2), // Base Register
   1751                    N->getOperand(NumVecs + 3), // Incremental
   1752                    N->getOperand(0)};
   1753   SDNode *St = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
   1754 
   1755   // Transfer memoperands.
   1756   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
   1757   CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
   1758 
   1759   ReplaceNode(N, St);
   1760 }
   1761 
   1762 static bool isBitfieldExtractOpFromAnd(SelectionDAG *CurDAG, SDNode *N,
   1763                                        unsigned &Opc, SDValue &Opd0,
   1764                                        unsigned &LSB, unsigned &MSB,
   1765                                        unsigned NumberOfIgnoredLowBits,
   1766                                        bool BiggerPattern) {
   1767   assert(N->getOpcode() == ISD::AND &&
   1768          "N must be a AND operation to call this function");
   1769 
   1770   EVT VT = N->getValueType(0);
   1771 
   1772   // Here we can test the type of VT and return false when the type does not
   1773   // match, but since it is done prior to that call in the current context
   1774   // we turned that into an assert to avoid redundant code.
   1775   assert((VT == MVT::i32 || VT == MVT::i64) &&
   1776          "Type checking must have been done before calling this function");
   1777 
   1778   // FIXME: simplify-demanded-bits in DAGCombine will probably have
   1779   // changed the AND node to a 32-bit mask operation. We'll have to
   1780   // undo that as part of the transform here if we want to catch all
   1781   // the opportunities.
   1782   // Currently the NumberOfIgnoredLowBits argument helps to recover
   1783   // form these situations when matching bigger pattern (bitfield insert).
   1784 
   1785   // For unsigned extracts, check for a shift right and mask
   1786   uint64_t AndImm = 0;
   1787   if (!isOpcWithIntImmediate(N, ISD::AND, AndImm))
   1788     return false;
   1789 
   1790   const SDNode *Op0 = N->getOperand(0).getNode();
   1791 
   1792   // Because of simplify-demanded-bits in DAGCombine, the mask may have been
   1793   // simplified. Try to undo that
   1794   AndImm |= maskTrailingOnes<uint64_t>(NumberOfIgnoredLowBits);
   1795 
   1796   // The immediate is a mask of the low bits iff imm & (imm+1) == 0
   1797   if (AndImm & (AndImm + 1))
   1798     return false;
   1799 
   1800   bool ClampMSB = false;
   1801   uint64_t SrlImm = 0;
   1802   // Handle the SRL + ANY_EXTEND case.
   1803   if (VT == MVT::i64 && Op0->getOpcode() == ISD::ANY_EXTEND &&
   1804       isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL, SrlImm)) {
   1805     // Extend the incoming operand of the SRL to 64-bit.
   1806     Opd0 = Widen(CurDAG, Op0->getOperand(0).getOperand(0));
   1807     // Make sure to clamp the MSB so that we preserve the semantics of the
   1808     // original operations.
   1809     ClampMSB = true;
   1810   } else if (VT == MVT::i32 && Op0->getOpcode() == ISD::TRUNCATE &&
   1811              isOpcWithIntImmediate(Op0->getOperand(0).getNode(), ISD::SRL,
   1812                                    SrlImm)) {
   1813     // If the shift result was truncated, we can still combine them.
   1814     Opd0 = Op0->getOperand(0).getOperand(0);
   1815 
   1816     // Use the type of SRL node.
   1817     VT = Opd0->getValueType(0);
   1818   } else if (isOpcWithIntImmediate(Op0, ISD::SRL, SrlImm)) {
   1819     Opd0 = Op0->getOperand(0);
   1820   } else if (BiggerPattern) {
   1821     // Let's pretend a 0 shift right has been performed.
   1822     // The resulting code will be at least as good as the original one
   1823     // plus it may expose more opportunities for bitfield insert pattern.
   1824     // FIXME: Currently we limit this to the bigger pattern, because
   1825     // some optimizations expect AND and not UBFM.
   1826     Opd0 = N->getOperand(0);
   1827   } else
   1828     return false;
   1829 
   1830   // Bail out on large immediates. This happens when no proper
   1831   // combining/constant folding was performed.
   1832   if (!BiggerPattern && (SrlImm <= 0 || SrlImm >= VT.getSizeInBits())) {
   1833     LLVM_DEBUG(
   1834         (dbgs() << N
   1835                 << ": Found large shift immediate, this should not happen\n"));
   1836     return false;
   1837   }
   1838 
   1839   LSB = SrlImm;
   1840   MSB = SrlImm + (VT == MVT::i32 ? countTrailingOnes<uint32_t>(AndImm)
   1841                                  : countTrailingOnes<uint64_t>(AndImm)) -
   1842         1;
   1843   if (ClampMSB)
   1844     // Since we're moving the extend before the right shift operation, we need
   1845     // to clamp the MSB to make sure we don't shift in undefined bits instead of
   1846     // the zeros which would get shifted in with the original right shift
   1847     // operation.
   1848     MSB = MSB > 31 ? 31 : MSB;
   1849 
   1850   Opc = VT == MVT::i32 ? AArch64::UBFMWri : AArch64::UBFMXri;
   1851   return true;
   1852 }
   1853 
   1854 static bool isBitfieldExtractOpFromSExtInReg(SDNode *N, unsigned &Opc,
   1855                                              SDValue &Opd0, unsigned &Immr,
   1856                                              unsigned &Imms) {
   1857   assert(N->getOpcode() == ISD::SIGN_EXTEND_INREG);
   1858 
   1859   EVT VT = N->getValueType(0);
   1860   unsigned BitWidth = VT.getSizeInBits();
   1861   assert((VT == MVT::i32 || VT == MVT::i64) &&
   1862          "Type checking must have been done before calling this function");
   1863 
   1864   SDValue Op = N->getOperand(0);
   1865   if (Op->getOpcode() == ISD::TRUNCATE) {
   1866     Op = Op->getOperand(0);
   1867     VT = Op->getValueType(0);
   1868     BitWidth = VT.getSizeInBits();
   1869   }
   1870 
   1871   uint64_t ShiftImm;
   1872   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRL, ShiftImm) &&
   1873       !isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
   1874     return false;
   1875 
   1876   unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
   1877   if (ShiftImm + Width > BitWidth)
   1878     return false;
   1879 
   1880   Opc = (VT == MVT::i32) ? AArch64::SBFMWri : AArch64::SBFMXri;
   1881   Opd0 = Op.getOperand(0);
   1882   Immr = ShiftImm;
   1883   Imms = ShiftImm + Width - 1;
   1884   return true;
   1885 }
   1886 
   1887 static bool isSeveralBitsExtractOpFromShr(SDNode *N, unsigned &Opc,
   1888                                           SDValue &Opd0, unsigned &LSB,
   1889                                           unsigned &MSB) {
   1890   // We are looking for the following pattern which basically extracts several
   1891   // continuous bits from the source value and places it from the LSB of the
   1892   // destination value, all other bits of the destination value or set to zero:
   1893   //
   1894   // Value2 = AND Value, MaskImm
   1895   // SRL Value2, ShiftImm
   1896   //
   1897   // with MaskImm >> ShiftImm to search for the bit width.
   1898   //
   1899   // This gets selected into a single UBFM:
   1900   //
   1901   // UBFM Value, ShiftImm, BitWide + SrlImm -1
   1902   //
   1903 
   1904   if (N->getOpcode() != ISD::SRL)
   1905     return false;
   1906 
   1907   uint64_t AndMask = 0;
   1908   if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, AndMask))
   1909     return false;
   1910 
   1911   Opd0 = N->getOperand(0).getOperand(0);
   1912 
   1913   uint64_t SrlImm = 0;
   1914   if (!isIntImmediate(N->getOperand(1), SrlImm))
   1915     return false;
   1916 
   1917   // Check whether we really have several bits extract here.
   1918   unsigned BitWide = 64 - countLeadingOnes(~(AndMask >> SrlImm));
   1919   if (BitWide && isMask_64(AndMask >> SrlImm)) {
   1920     if (N->getValueType(0) == MVT::i32)
   1921       Opc = AArch64::UBFMWri;
   1922     else
   1923       Opc = AArch64::UBFMXri;
   1924 
   1925     LSB = SrlImm;
   1926     MSB = BitWide + SrlImm - 1;
   1927     return true;
   1928   }
   1929 
   1930   return false;
   1931 }
   1932 
   1933 static bool isBitfieldExtractOpFromShr(SDNode *N, unsigned &Opc, SDValue &Opd0,
   1934                                        unsigned &Immr, unsigned &Imms,
   1935                                        bool BiggerPattern) {
   1936   assert((N->getOpcode() == ISD::SRA || N->getOpcode() == ISD::SRL) &&
   1937          "N must be a SHR/SRA operation to call this function");
   1938 
   1939   EVT VT = N->getValueType(0);
   1940 
   1941   // Here we can test the type of VT and return false when the type does not
   1942   // match, but since it is done prior to that call in the current context
   1943   // we turned that into an assert to avoid redundant code.
   1944   assert((VT == MVT::i32 || VT == MVT::i64) &&
   1945          "Type checking must have been done before calling this function");
   1946 
   1947   // Check for AND + SRL doing several bits extract.
   1948   if (isSeveralBitsExtractOpFromShr(N, Opc, Opd0, Immr, Imms))
   1949     return true;
   1950 
   1951   // We're looking for a shift of a shift.
   1952   uint64_t ShlImm = 0;
   1953   uint64_t TruncBits = 0;
   1954   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, ShlImm)) {
   1955     Opd0 = N->getOperand(0).getOperand(0);
   1956   } else if (VT == MVT::i32 && N->getOpcode() == ISD::SRL &&
   1957              N->getOperand(0).getNode()->getOpcode() == ISD::TRUNCATE) {
   1958     // We are looking for a shift of truncate. Truncate from i64 to i32 could
   1959     // be considered as setting high 32 bits as zero. Our strategy here is to
   1960     // always generate 64bit UBFM. This consistency will help the CSE pass
   1961     // later find more redundancy.
   1962     Opd0 = N->getOperand(0).getOperand(0);
   1963     TruncBits = Opd0->getValueType(0).getSizeInBits() - VT.getSizeInBits();
   1964     VT = Opd0.getValueType();
   1965     assert(VT == MVT::i64 && "the promoted type should be i64");
   1966   } else if (BiggerPattern) {
   1967     // Let's pretend a 0 shift left has been performed.
   1968     // FIXME: Currently we limit this to the bigger pattern case,
   1969     // because some optimizations expect AND and not UBFM
   1970     Opd0 = N->getOperand(0);
   1971   } else
   1972     return false;
   1973 
   1974   // Missing combines/constant folding may have left us with strange
   1975   // constants.
   1976   if (ShlImm >= VT.getSizeInBits()) {
   1977     LLVM_DEBUG(
   1978         (dbgs() << N
   1979                 << ": Found large shift immediate, this should not happen\n"));
   1980     return false;
   1981   }
   1982 
   1983   uint64_t SrlImm = 0;
   1984   if (!isIntImmediate(N->getOperand(1), SrlImm))
   1985     return false;
   1986 
   1987   assert(SrlImm > 0 && SrlImm < VT.getSizeInBits() &&
   1988          "bad amount in shift node!");
   1989   int immr = SrlImm - ShlImm;
   1990   Immr = immr < 0 ? immr + VT.getSizeInBits() : immr;
   1991   Imms = VT.getSizeInBits() - ShlImm - TruncBits - 1;
   1992   // SRA requires a signed extraction
   1993   if (VT == MVT::i32)
   1994     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMWri : AArch64::UBFMWri;
   1995   else
   1996     Opc = N->getOpcode() == ISD::SRA ? AArch64::SBFMXri : AArch64::UBFMXri;
   1997   return true;
   1998 }
   1999 
   2000 bool AArch64DAGToDAGISel::tryBitfieldExtractOpFromSExt(SDNode *N) {
   2001   assert(N->getOpcode() == ISD::SIGN_EXTEND);
   2002 
   2003   EVT VT = N->getValueType(0);
   2004   EVT NarrowVT = N->getOperand(0)->getValueType(0);
   2005   if (VT != MVT::i64 || NarrowVT != MVT::i32)
   2006     return false;
   2007 
   2008   uint64_t ShiftImm;
   2009   SDValue Op = N->getOperand(0);
   2010   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SRA, ShiftImm))
   2011     return false;
   2012 
   2013   SDLoc dl(N);
   2014   // Extend the incoming operand of the shift to 64-bits.
   2015   SDValue Opd0 = Widen(CurDAG, Op.getOperand(0));
   2016   unsigned Immr = ShiftImm;
   2017   unsigned Imms = NarrowVT.getSizeInBits() - 1;
   2018   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
   2019                    CurDAG->getTargetConstant(Imms, dl, VT)};
   2020   CurDAG->SelectNodeTo(N, AArch64::SBFMXri, VT, Ops);
   2021   return true;
   2022 }
   2023 
   2024 /// Try to form fcvtl2 instructions from a floating-point extend of a high-half
   2025 /// extract of a subvector.
   2026 bool AArch64DAGToDAGISel::tryHighFPExt(SDNode *N) {
   2027   assert(N->getOpcode() == ISD::FP_EXTEND);
   2028 
   2029   // There are 2 forms of fcvtl2 - extend to double or extend to float.
   2030   SDValue Extract = N->getOperand(0);
   2031   EVT VT = N->getValueType(0);
   2032   EVT NarrowVT = Extract.getValueType();
   2033   if ((VT != MVT::v2f64 || NarrowVT != MVT::v2f32) &&
   2034       (VT != MVT::v4f32 || NarrowVT != MVT::v4f16))
   2035     return false;
   2036 
   2037   // Optionally look past a bitcast.
   2038   Extract = peekThroughBitcasts(Extract);
   2039   if (Extract.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   2040     return false;
   2041 
   2042   // Match extract from start of high half index.
   2043   // Example: v8i16 -> v4i16 means the extract must begin at index 4.
   2044   unsigned ExtractIndex = Extract.getConstantOperandVal(1);
   2045   if (ExtractIndex != Extract.getValueType().getVectorNumElements())
   2046     return false;
   2047 
   2048   auto Opcode = VT == MVT::v2f64 ? AArch64::FCVTLv4i32 : AArch64::FCVTLv8i16;
   2049   CurDAG->SelectNodeTo(N, Opcode, VT, Extract.getOperand(0));
   2050   return true;
   2051 }
   2052 
   2053 static bool isBitfieldExtractOp(SelectionDAG *CurDAG, SDNode *N, unsigned &Opc,
   2054                                 SDValue &Opd0, unsigned &Immr, unsigned &Imms,
   2055                                 unsigned NumberOfIgnoredLowBits = 0,
   2056                                 bool BiggerPattern = false) {
   2057   if (N->getValueType(0) != MVT::i32 && N->getValueType(0) != MVT::i64)
   2058     return false;
   2059 
   2060   switch (N->getOpcode()) {
   2061   default:
   2062     if (!N->isMachineOpcode())
   2063       return false;
   2064     break;
   2065   case ISD::AND:
   2066     return isBitfieldExtractOpFromAnd(CurDAG, N, Opc, Opd0, Immr, Imms,
   2067                                       NumberOfIgnoredLowBits, BiggerPattern);
   2068   case ISD::SRL:
   2069   case ISD::SRA:
   2070     return isBitfieldExtractOpFromShr(N, Opc, Opd0, Immr, Imms, BiggerPattern);
   2071 
   2072   case ISD::SIGN_EXTEND_INREG:
   2073     return isBitfieldExtractOpFromSExtInReg(N, Opc, Opd0, Immr, Imms);
   2074   }
   2075 
   2076   unsigned NOpc = N->getMachineOpcode();
   2077   switch (NOpc) {
   2078   default:
   2079     return false;
   2080   case AArch64::SBFMWri:
   2081   case AArch64::UBFMWri:
   2082   case AArch64::SBFMXri:
   2083   case AArch64::UBFMXri:
   2084     Opc = NOpc;
   2085     Opd0 = N->getOperand(0);
   2086     Immr = cast<ConstantSDNode>(N->getOperand(1).getNode())->getZExtValue();
   2087     Imms = cast<ConstantSDNode>(N->getOperand(2).getNode())->getZExtValue();
   2088     return true;
   2089   }
   2090   // Unreachable
   2091   return false;
   2092 }
   2093 
   2094 bool AArch64DAGToDAGISel::tryBitfieldExtractOp(SDNode *N) {
   2095   unsigned Opc, Immr, Imms;
   2096   SDValue Opd0;
   2097   if (!isBitfieldExtractOp(CurDAG, N, Opc, Opd0, Immr, Imms))
   2098     return false;
   2099 
   2100   EVT VT = N->getValueType(0);
   2101   SDLoc dl(N);
   2102 
   2103   // If the bit extract operation is 64bit but the original type is 32bit, we
   2104   // need to add one EXTRACT_SUBREG.
   2105   if ((Opc == AArch64::SBFMXri || Opc == AArch64::UBFMXri) && VT == MVT::i32) {
   2106     SDValue Ops64[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, MVT::i64),
   2107                        CurDAG->getTargetConstant(Imms, dl, MVT::i64)};
   2108 
   2109     SDNode *BFM = CurDAG->getMachineNode(Opc, dl, MVT::i64, Ops64);
   2110     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, dl, MVT::i32);
   2111     ReplaceNode(N, CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, dl,
   2112                                           MVT::i32, SDValue(BFM, 0), SubReg));
   2113     return true;
   2114   }
   2115 
   2116   SDValue Ops[] = {Opd0, CurDAG->getTargetConstant(Immr, dl, VT),
   2117                    CurDAG->getTargetConstant(Imms, dl, VT)};
   2118   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   2119   return true;
   2120 }
   2121 
   2122 /// Does DstMask form a complementary pair with the mask provided by
   2123 /// BitsToBeInserted, suitable for use in a BFI instruction. Roughly speaking,
   2124 /// this asks whether DstMask zeroes precisely those bits that will be set by
   2125 /// the other half.
   2126 static bool isBitfieldDstMask(uint64_t DstMask, const APInt &BitsToBeInserted,
   2127                               unsigned NumberOfIgnoredHighBits, EVT VT) {
   2128   assert((VT == MVT::i32 || VT == MVT::i64) &&
   2129          "i32 or i64 mask type expected!");
   2130   unsigned BitWidth = VT.getSizeInBits() - NumberOfIgnoredHighBits;
   2131 
   2132   APInt SignificantDstMask = APInt(BitWidth, DstMask);
   2133   APInt SignificantBitsToBeInserted = BitsToBeInserted.zextOrTrunc(BitWidth);
   2134 
   2135   return (SignificantDstMask & SignificantBitsToBeInserted) == 0 &&
   2136          (SignificantDstMask | SignificantBitsToBeInserted).isAllOnesValue();
   2137 }
   2138 
   2139 // Look for bits that will be useful for later uses.
   2140 // A bit is consider useless as soon as it is dropped and never used
   2141 // before it as been dropped.
   2142 // E.g., looking for useful bit of x
   2143 // 1. y = x & 0x7
   2144 // 2. z = y >> 2
   2145 // After #1, x useful bits are 0x7, then the useful bits of x, live through
   2146 // y.
   2147 // After #2, the useful bits of x are 0x4.
   2148 // However, if x is used on an unpredicatable instruction, then all its bits
   2149 // are useful.
   2150 // E.g.
   2151 // 1. y = x & 0x7
   2152 // 2. z = y >> 2
   2153 // 3. str x, [@x]
   2154 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth = 0);
   2155 
   2156 static void getUsefulBitsFromAndWithImmediate(SDValue Op, APInt &UsefulBits,
   2157                                               unsigned Depth) {
   2158   uint64_t Imm =
   2159       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
   2160   Imm = AArch64_AM::decodeLogicalImmediate(Imm, UsefulBits.getBitWidth());
   2161   UsefulBits &= APInt(UsefulBits.getBitWidth(), Imm);
   2162   getUsefulBits(Op, UsefulBits, Depth + 1);
   2163 }
   2164 
   2165 static void getUsefulBitsFromBitfieldMoveOpd(SDValue Op, APInt &UsefulBits,
   2166                                              uint64_t Imm, uint64_t MSB,
   2167                                              unsigned Depth) {
   2168   // inherit the bitwidth value
   2169   APInt OpUsefulBits(UsefulBits);
   2170   OpUsefulBits = 1;
   2171 
   2172   if (MSB >= Imm) {
   2173     OpUsefulBits <<= MSB - Imm + 1;
   2174     --OpUsefulBits;
   2175     // The interesting part will be in the lower part of the result
   2176     getUsefulBits(Op, OpUsefulBits, Depth + 1);
   2177     // The interesting part was starting at Imm in the argument
   2178     OpUsefulBits <<= Imm;
   2179   } else {
   2180     OpUsefulBits <<= MSB + 1;
   2181     --OpUsefulBits;
   2182     // The interesting part will be shifted in the result
   2183     OpUsefulBits <<= OpUsefulBits.getBitWidth() - Imm;
   2184     getUsefulBits(Op, OpUsefulBits, Depth + 1);
   2185     // The interesting part was at zero in the argument
   2186     OpUsefulBits.lshrInPlace(OpUsefulBits.getBitWidth() - Imm);
   2187   }
   2188 
   2189   UsefulBits &= OpUsefulBits;
   2190 }
   2191 
   2192 static void getUsefulBitsFromUBFM(SDValue Op, APInt &UsefulBits,
   2193                                   unsigned Depth) {
   2194   uint64_t Imm =
   2195       cast<const ConstantSDNode>(Op.getOperand(1).getNode())->getZExtValue();
   2196   uint64_t MSB =
   2197       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
   2198 
   2199   getUsefulBitsFromBitfieldMoveOpd(Op, UsefulBits, Imm, MSB, Depth);
   2200 }
   2201 
   2202 static void getUsefulBitsFromOrWithShiftedReg(SDValue Op, APInt &UsefulBits,
   2203                                               unsigned Depth) {
   2204   uint64_t ShiftTypeAndValue =
   2205       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
   2206   APInt Mask(UsefulBits);
   2207   Mask.clearAllBits();
   2208   Mask.flipAllBits();
   2209 
   2210   if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSL) {
   2211     // Shift Left
   2212     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
   2213     Mask <<= ShiftAmt;
   2214     getUsefulBits(Op, Mask, Depth + 1);
   2215     Mask.lshrInPlace(ShiftAmt);
   2216   } else if (AArch64_AM::getShiftType(ShiftTypeAndValue) == AArch64_AM::LSR) {
   2217     // Shift Right
   2218     // We do not handle AArch64_AM::ASR, because the sign will change the
   2219     // number of useful bits
   2220     uint64_t ShiftAmt = AArch64_AM::getShiftValue(ShiftTypeAndValue);
   2221     Mask.lshrInPlace(ShiftAmt);
   2222     getUsefulBits(Op, Mask, Depth + 1);
   2223     Mask <<= ShiftAmt;
   2224   } else
   2225     return;
   2226 
   2227   UsefulBits &= Mask;
   2228 }
   2229 
   2230 static void getUsefulBitsFromBFM(SDValue Op, SDValue Orig, APInt &UsefulBits,
   2231                                  unsigned Depth) {
   2232   uint64_t Imm =
   2233       cast<const ConstantSDNode>(Op.getOperand(2).getNode())->getZExtValue();
   2234   uint64_t MSB =
   2235       cast<const ConstantSDNode>(Op.getOperand(3).getNode())->getZExtValue();
   2236 
   2237   APInt OpUsefulBits(UsefulBits);
   2238   OpUsefulBits = 1;
   2239 
   2240   APInt ResultUsefulBits(UsefulBits.getBitWidth(), 0);
   2241   ResultUsefulBits.flipAllBits();
   2242   APInt Mask(UsefulBits.getBitWidth(), 0);
   2243 
   2244   getUsefulBits(Op, ResultUsefulBits, Depth + 1);
   2245 
   2246   if (MSB >= Imm) {
   2247     // The instruction is a BFXIL.
   2248     uint64_t Width = MSB - Imm + 1;
   2249     uint64_t LSB = Imm;
   2250 
   2251     OpUsefulBits <<= Width;
   2252     --OpUsefulBits;
   2253 
   2254     if (Op.getOperand(1) == Orig) {
   2255       // Copy the low bits from the result to bits starting from LSB.
   2256       Mask = ResultUsefulBits & OpUsefulBits;
   2257       Mask <<= LSB;
   2258     }
   2259 
   2260     if (Op.getOperand(0) == Orig)
   2261       // Bits starting from LSB in the input contribute to the result.
   2262       Mask |= (ResultUsefulBits & ~OpUsefulBits);
   2263   } else {
   2264     // The instruction is a BFI.
   2265     uint64_t Width = MSB + 1;
   2266     uint64_t LSB = UsefulBits.getBitWidth() - Imm;
   2267 
   2268     OpUsefulBits <<= Width;
   2269     --OpUsefulBits;
   2270     OpUsefulBits <<= LSB;
   2271 
   2272     if (Op.getOperand(1) == Orig) {
   2273       // Copy the bits from the result to the zero bits.
   2274       Mask = ResultUsefulBits & OpUsefulBits;
   2275       Mask.lshrInPlace(LSB);
   2276     }
   2277 
   2278     if (Op.getOperand(0) == Orig)
   2279       Mask |= (ResultUsefulBits & ~OpUsefulBits);
   2280   }
   2281 
   2282   UsefulBits &= Mask;
   2283 }
   2284 
   2285 static void getUsefulBitsForUse(SDNode *UserNode, APInt &UsefulBits,
   2286                                 SDValue Orig, unsigned Depth) {
   2287 
   2288   // Users of this node should have already been instruction selected
   2289   // FIXME: Can we turn that into an assert?
   2290   if (!UserNode->isMachineOpcode())
   2291     return;
   2292 
   2293   switch (UserNode->getMachineOpcode()) {
   2294   default:
   2295     return;
   2296   case AArch64::ANDSWri:
   2297   case AArch64::ANDSXri:
   2298   case AArch64::ANDWri:
   2299   case AArch64::ANDXri:
   2300     // We increment Depth only when we call the getUsefulBits
   2301     return getUsefulBitsFromAndWithImmediate(SDValue(UserNode, 0), UsefulBits,
   2302                                              Depth);
   2303   case AArch64::UBFMWri:
   2304   case AArch64::UBFMXri:
   2305     return getUsefulBitsFromUBFM(SDValue(UserNode, 0), UsefulBits, Depth);
   2306 
   2307   case AArch64::ORRWrs:
   2308   case AArch64::ORRXrs:
   2309     if (UserNode->getOperand(1) != Orig)
   2310       return;
   2311     return getUsefulBitsFromOrWithShiftedReg(SDValue(UserNode, 0), UsefulBits,
   2312                                              Depth);
   2313   case AArch64::BFMWri:
   2314   case AArch64::BFMXri:
   2315     return getUsefulBitsFromBFM(SDValue(UserNode, 0), Orig, UsefulBits, Depth);
   2316 
   2317   case AArch64::STRBBui:
   2318   case AArch64::STURBBi:
   2319     if (UserNode->getOperand(0) != Orig)
   2320       return;
   2321     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xff);
   2322     return;
   2323 
   2324   case AArch64::STRHHui:
   2325   case AArch64::STURHHi:
   2326     if (UserNode->getOperand(0) != Orig)
   2327       return;
   2328     UsefulBits &= APInt(UsefulBits.getBitWidth(), 0xffff);
   2329     return;
   2330   }
   2331 }
   2332 
   2333 static void getUsefulBits(SDValue Op, APInt &UsefulBits, unsigned Depth) {
   2334   if (Depth >= SelectionDAG::MaxRecursionDepth)
   2335     return;
   2336   // Initialize UsefulBits
   2337   if (!Depth) {
   2338     unsigned Bitwidth = Op.getScalarValueSizeInBits();
   2339     // At the beginning, assume every produced bits is useful
   2340     UsefulBits = APInt(Bitwidth, 0);
   2341     UsefulBits.flipAllBits();
   2342   }
   2343   APInt UsersUsefulBits(UsefulBits.getBitWidth(), 0);
   2344 
   2345   for (SDNode *Node : Op.getNode()->uses()) {
   2346     // A use cannot produce useful bits
   2347     APInt UsefulBitsForUse = APInt(UsefulBits);
   2348     getUsefulBitsForUse(Node, UsefulBitsForUse, Op, Depth);
   2349     UsersUsefulBits |= UsefulBitsForUse;
   2350   }
   2351   // UsefulBits contains the produced bits that are meaningful for the
   2352   // current definition, thus a user cannot make a bit meaningful at
   2353   // this point
   2354   UsefulBits &= UsersUsefulBits;
   2355 }
   2356 
   2357 /// Create a machine node performing a notional SHL of Op by ShlAmount. If
   2358 /// ShlAmount is negative, do a (logical) right-shift instead. If ShlAmount is
   2359 /// 0, return Op unchanged.
   2360 static SDValue getLeftShift(SelectionDAG *CurDAG, SDValue Op, int ShlAmount) {
   2361   if (ShlAmount == 0)
   2362     return Op;
   2363 
   2364   EVT VT = Op.getValueType();
   2365   SDLoc dl(Op);
   2366   unsigned BitWidth = VT.getSizeInBits();
   2367   unsigned UBFMOpc = BitWidth == 32 ? AArch64::UBFMWri : AArch64::UBFMXri;
   2368 
   2369   SDNode *ShiftNode;
   2370   if (ShlAmount > 0) {
   2371     // LSL wD, wN, #Amt == UBFM wD, wN, #32-Amt, #31-Amt
   2372     ShiftNode = CurDAG->getMachineNode(
   2373         UBFMOpc, dl, VT, Op,
   2374         CurDAG->getTargetConstant(BitWidth - ShlAmount, dl, VT),
   2375         CurDAG->getTargetConstant(BitWidth - 1 - ShlAmount, dl, VT));
   2376   } else {
   2377     // LSR wD, wN, #Amt == UBFM wD, wN, #Amt, #32-1
   2378     assert(ShlAmount < 0 && "expected right shift");
   2379     int ShrAmount = -ShlAmount;
   2380     ShiftNode = CurDAG->getMachineNode(
   2381         UBFMOpc, dl, VT, Op, CurDAG->getTargetConstant(ShrAmount, dl, VT),
   2382         CurDAG->getTargetConstant(BitWidth - 1, dl, VT));
   2383   }
   2384 
   2385   return SDValue(ShiftNode, 0);
   2386 }
   2387 
   2388 /// Does this tree qualify as an attempt to move a bitfield into position,
   2389 /// essentially "(and (shl VAL, N), Mask)".
   2390 static bool isBitfieldPositioningOp(SelectionDAG *CurDAG, SDValue Op,
   2391                                     bool BiggerPattern,
   2392                                     SDValue &Src, int &ShiftAmount,
   2393                                     int &MaskWidth) {
   2394   EVT VT = Op.getValueType();
   2395   unsigned BitWidth = VT.getSizeInBits();
   2396   (void)BitWidth;
   2397   assert(BitWidth == 32 || BitWidth == 64);
   2398 
   2399   KnownBits Known = CurDAG->computeKnownBits(Op);
   2400 
   2401   // Non-zero in the sense that they're not provably zero, which is the key
   2402   // point if we want to use this value
   2403   uint64_t NonZeroBits = (~Known.Zero).getZExtValue();
   2404 
   2405   // Discard a constant AND mask if present. It's safe because the node will
   2406   // already have been factored into the computeKnownBits calculation above.
   2407   uint64_t AndImm;
   2408   if (isOpcWithIntImmediate(Op.getNode(), ISD::AND, AndImm)) {
   2409     assert((~APInt(BitWidth, AndImm) & ~Known.Zero) == 0);
   2410     Op = Op.getOperand(0);
   2411   }
   2412 
   2413   // Don't match if the SHL has more than one use, since then we'll end up
   2414   // generating SHL+UBFIZ instead of just keeping SHL+AND.
   2415   if (!BiggerPattern && !Op.hasOneUse())
   2416     return false;
   2417 
   2418   uint64_t ShlImm;
   2419   if (!isOpcWithIntImmediate(Op.getNode(), ISD::SHL, ShlImm))
   2420     return false;
   2421   Op = Op.getOperand(0);
   2422 
   2423   if (!isShiftedMask_64(NonZeroBits))
   2424     return false;
   2425 
   2426   ShiftAmount = countTrailingZeros(NonZeroBits);
   2427   MaskWidth = countTrailingOnes(NonZeroBits >> ShiftAmount);
   2428 
   2429   // BFI encompasses sufficiently many nodes that it's worth inserting an extra
   2430   // LSL/LSR if the mask in NonZeroBits doesn't quite match up with the ISD::SHL
   2431   // amount.  BiggerPattern is true when this pattern is being matched for BFI,
   2432   // BiggerPattern is false when this pattern is being matched for UBFIZ, in
   2433   // which case it is not profitable to insert an extra shift.
   2434   if (ShlImm - ShiftAmount != 0 && !BiggerPattern)
   2435     return false;
   2436   Src = getLeftShift(CurDAG, Op, ShlImm - ShiftAmount);
   2437 
   2438   return true;
   2439 }
   2440 
   2441 static bool isShiftedMask(uint64_t Mask, EVT VT) {
   2442   assert(VT == MVT::i32 || VT == MVT::i64);
   2443   if (VT == MVT::i32)
   2444     return isShiftedMask_32(Mask);
   2445   return isShiftedMask_64(Mask);
   2446 }
   2447 
   2448 // Generate a BFI/BFXIL from 'or (and X, MaskImm), OrImm' iff the value being
   2449 // inserted only sets known zero bits.
   2450 static bool tryBitfieldInsertOpFromOrAndImm(SDNode *N, SelectionDAG *CurDAG) {
   2451   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
   2452 
   2453   EVT VT = N->getValueType(0);
   2454   if (VT != MVT::i32 && VT != MVT::i64)
   2455     return false;
   2456 
   2457   unsigned BitWidth = VT.getSizeInBits();
   2458 
   2459   uint64_t OrImm;
   2460   if (!isOpcWithIntImmediate(N, ISD::OR, OrImm))
   2461     return false;
   2462 
   2463   // Skip this transformation if the ORR immediate can be encoded in the ORR.
   2464   // Otherwise, we'll trade an AND+ORR for ORR+BFI/BFXIL, which is most likely
   2465   // performance neutral.
   2466   if (AArch64_AM::isLogicalImmediate(OrImm, BitWidth))
   2467     return false;
   2468 
   2469   uint64_t MaskImm;
   2470   SDValue And = N->getOperand(0);
   2471   // Must be a single use AND with an immediate operand.
   2472   if (!And.hasOneUse() ||
   2473       !isOpcWithIntImmediate(And.getNode(), ISD::AND, MaskImm))
   2474     return false;
   2475 
   2476   // Compute the Known Zero for the AND as this allows us to catch more general
   2477   // cases than just looking for AND with imm.
   2478   KnownBits Known = CurDAG->computeKnownBits(And);
   2479 
   2480   // Non-zero in the sense that they're not provably zero, which is the key
   2481   // point if we want to use this value.
   2482   uint64_t NotKnownZero = (~Known.Zero).getZExtValue();
   2483 
   2484   // The KnownZero mask must be a shifted mask (e.g., 1110..011, 11100..00).
   2485   if (!isShiftedMask(Known.Zero.getZExtValue(), VT))
   2486     return false;
   2487 
   2488   // The bits being inserted must only set those bits that are known to be zero.
   2489   if ((OrImm & NotKnownZero) != 0) {
   2490     // FIXME:  It's okay if the OrImm sets NotKnownZero bits to 1, but we don't
   2491     // currently handle this case.
   2492     return false;
   2493   }
   2494 
   2495   // BFI/BFXIL dst, src, #lsb, #width.
   2496   int LSB = countTrailingOnes(NotKnownZero);
   2497   int Width = BitWidth - APInt(BitWidth, NotKnownZero).countPopulation();
   2498 
   2499   // BFI/BFXIL is an alias of BFM, so translate to BFM operands.
   2500   unsigned ImmR = (BitWidth - LSB) % BitWidth;
   2501   unsigned ImmS = Width - 1;
   2502 
   2503   // If we're creating a BFI instruction avoid cases where we need more
   2504   // instructions to materialize the BFI constant as compared to the original
   2505   // ORR.  A BFXIL will use the same constant as the original ORR, so the code
   2506   // should be no worse in this case.
   2507   bool IsBFI = LSB != 0;
   2508   uint64_t BFIImm = OrImm >> LSB;
   2509   if (IsBFI && !AArch64_AM::isLogicalImmediate(BFIImm, BitWidth)) {
   2510     // We have a BFI instruction and we know the constant can't be materialized
   2511     // with a ORR-immediate with the zero register.
   2512     unsigned OrChunks = 0, BFIChunks = 0;
   2513     for (unsigned Shift = 0; Shift < BitWidth; Shift += 16) {
   2514       if (((OrImm >> Shift) & 0xFFFF) != 0)
   2515         ++OrChunks;
   2516       if (((BFIImm >> Shift) & 0xFFFF) != 0)
   2517         ++BFIChunks;
   2518     }
   2519     if (BFIChunks > OrChunks)
   2520       return false;
   2521   }
   2522 
   2523   // Materialize the constant to be inserted.
   2524   SDLoc DL(N);
   2525   unsigned MOVIOpc = VT == MVT::i32 ? AArch64::MOVi32imm : AArch64::MOVi64imm;
   2526   SDNode *MOVI = CurDAG->getMachineNode(
   2527       MOVIOpc, DL, VT, CurDAG->getTargetConstant(BFIImm, DL, VT));
   2528 
   2529   // Create the BFI/BFXIL instruction.
   2530   SDValue Ops[] = {And.getOperand(0), SDValue(MOVI, 0),
   2531                    CurDAG->getTargetConstant(ImmR, DL, VT),
   2532                    CurDAG->getTargetConstant(ImmS, DL, VT)};
   2533   unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
   2534   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   2535   return true;
   2536 }
   2537 
   2538 static bool tryBitfieldInsertOpFromOr(SDNode *N, const APInt &UsefulBits,
   2539                                       SelectionDAG *CurDAG) {
   2540   assert(N->getOpcode() == ISD::OR && "Expect a OR operation");
   2541 
   2542   EVT VT = N->getValueType(0);
   2543   if (VT != MVT::i32 && VT != MVT::i64)
   2544     return false;
   2545 
   2546   unsigned BitWidth = VT.getSizeInBits();
   2547 
   2548   // Because of simplify-demanded-bits in DAGCombine, involved masks may not
   2549   // have the expected shape. Try to undo that.
   2550 
   2551   unsigned NumberOfIgnoredLowBits = UsefulBits.countTrailingZeros();
   2552   unsigned NumberOfIgnoredHighBits = UsefulBits.countLeadingZeros();
   2553 
   2554   // Given a OR operation, check if we have the following pattern
   2555   // ubfm c, b, imm, imm2 (or something that does the same jobs, see
   2556   //                       isBitfieldExtractOp)
   2557   // d = e & mask2 ; where mask is a binary sequence of 1..10..0 and
   2558   //                 countTrailingZeros(mask2) == imm2 - imm + 1
   2559   // f = d | c
   2560   // if yes, replace the OR instruction with:
   2561   // f = BFM Opd0, Opd1, LSB, MSB ; where LSB = imm, and MSB = imm2
   2562 
   2563   // OR is commutative, check all combinations of operand order and values of
   2564   // BiggerPattern, i.e.
   2565   //     Opd0, Opd1, BiggerPattern=false
   2566   //     Opd1, Opd0, BiggerPattern=false
   2567   //     Opd0, Opd1, BiggerPattern=true
   2568   //     Opd1, Opd0, BiggerPattern=true
   2569   // Several of these combinations may match, so check with BiggerPattern=false
   2570   // first since that will produce better results by matching more instructions
   2571   // and/or inserting fewer extra instructions.
   2572   for (int I = 0; I < 4; ++I) {
   2573 
   2574     SDValue Dst, Src;
   2575     unsigned ImmR, ImmS;
   2576     bool BiggerPattern = I / 2;
   2577     SDValue OrOpd0Val = N->getOperand(I % 2);
   2578     SDNode *OrOpd0 = OrOpd0Val.getNode();
   2579     SDValue OrOpd1Val = N->getOperand((I + 1) % 2);
   2580     SDNode *OrOpd1 = OrOpd1Val.getNode();
   2581 
   2582     unsigned BFXOpc;
   2583     int DstLSB, Width;
   2584     if (isBitfieldExtractOp(CurDAG, OrOpd0, BFXOpc, Src, ImmR, ImmS,
   2585                             NumberOfIgnoredLowBits, BiggerPattern)) {
   2586       // Check that the returned opcode is compatible with the pattern,
   2587       // i.e., same type and zero extended (U and not S)
   2588       if ((BFXOpc != AArch64::UBFMXri && VT == MVT::i64) ||
   2589           (BFXOpc != AArch64::UBFMWri && VT == MVT::i32))
   2590         continue;
   2591 
   2592       // Compute the width of the bitfield insertion
   2593       DstLSB = 0;
   2594       Width = ImmS - ImmR + 1;
   2595       // FIXME: This constraint is to catch bitfield insertion we may
   2596       // want to widen the pattern if we want to grab general bitfied
   2597       // move case
   2598       if (Width <= 0)
   2599         continue;
   2600 
   2601       // If the mask on the insertee is correct, we have a BFXIL operation. We
   2602       // can share the ImmR and ImmS values from the already-computed UBFM.
   2603     } else if (isBitfieldPositioningOp(CurDAG, OrOpd0Val,
   2604                                        BiggerPattern,
   2605                                        Src, DstLSB, Width)) {
   2606       ImmR = (BitWidth - DstLSB) % BitWidth;
   2607       ImmS = Width - 1;
   2608     } else
   2609       continue;
   2610 
   2611     // Check the second part of the pattern
   2612     EVT VT = OrOpd1Val.getValueType();
   2613     assert((VT == MVT::i32 || VT == MVT::i64) && "unexpected OR operand");
   2614 
   2615     // Compute the Known Zero for the candidate of the first operand.
   2616     // This allows to catch more general case than just looking for
   2617     // AND with imm. Indeed, simplify-demanded-bits may have removed
   2618     // the AND instruction because it proves it was useless.
   2619     KnownBits Known = CurDAG->computeKnownBits(OrOpd1Val);
   2620 
   2621     // Check if there is enough room for the second operand to appear
   2622     // in the first one
   2623     APInt BitsToBeInserted =
   2624         APInt::getBitsSet(Known.getBitWidth(), DstLSB, DstLSB + Width);
   2625 
   2626     if ((BitsToBeInserted & ~Known.Zero) != 0)
   2627       continue;
   2628 
   2629     // Set the first operand
   2630     uint64_t Imm;
   2631     if (isOpcWithIntImmediate(OrOpd1, ISD::AND, Imm) &&
   2632         isBitfieldDstMask(Imm, BitsToBeInserted, NumberOfIgnoredHighBits, VT))
   2633       // In that case, we can eliminate the AND
   2634       Dst = OrOpd1->getOperand(0);
   2635     else
   2636       // Maybe the AND has been removed by simplify-demanded-bits
   2637       // or is useful because it discards more bits
   2638       Dst = OrOpd1Val;
   2639 
   2640     // both parts match
   2641     SDLoc DL(N);
   2642     SDValue Ops[] = {Dst, Src, CurDAG->getTargetConstant(ImmR, DL, VT),
   2643                      CurDAG->getTargetConstant(ImmS, DL, VT)};
   2644     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
   2645     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   2646     return true;
   2647   }
   2648 
   2649   // Generate a BFXIL from 'or (and X, Mask0Imm), (and Y, Mask1Imm)' iff
   2650   // Mask0Imm and ~Mask1Imm are equivalent and one of the MaskImms is a shifted
   2651   // mask (e.g., 0x000ffff0).
   2652   uint64_t Mask0Imm, Mask1Imm;
   2653   SDValue And0 = N->getOperand(0);
   2654   SDValue And1 = N->getOperand(1);
   2655   if (And0.hasOneUse() && And1.hasOneUse() &&
   2656       isOpcWithIntImmediate(And0.getNode(), ISD::AND, Mask0Imm) &&
   2657       isOpcWithIntImmediate(And1.getNode(), ISD::AND, Mask1Imm) &&
   2658       APInt(BitWidth, Mask0Imm) == ~APInt(BitWidth, Mask1Imm) &&
   2659       (isShiftedMask(Mask0Imm, VT) || isShiftedMask(Mask1Imm, VT))) {
   2660 
   2661     // ORR is commutative, so canonicalize to the form 'or (and X, Mask0Imm),
   2662     // (and Y, Mask1Imm)' where Mask1Imm is the shifted mask masking off the
   2663     // bits to be inserted.
   2664     if (isShiftedMask(Mask0Imm, VT)) {
   2665       std::swap(And0, And1);
   2666       std::swap(Mask0Imm, Mask1Imm);
   2667     }
   2668 
   2669     SDValue Src = And1->getOperand(0);
   2670     SDValue Dst = And0->getOperand(0);
   2671     unsigned LSB = countTrailingZeros(Mask1Imm);
   2672     int Width = BitWidth - APInt(BitWidth, Mask0Imm).countPopulation();
   2673 
   2674     // The BFXIL inserts the low-order bits from a source register, so right
   2675     // shift the needed bits into place.
   2676     SDLoc DL(N);
   2677     unsigned ShiftOpc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
   2678     SDNode *LSR = CurDAG->getMachineNode(
   2679         ShiftOpc, DL, VT, Src, CurDAG->getTargetConstant(LSB, DL, VT),
   2680         CurDAG->getTargetConstant(BitWidth - 1, DL, VT));
   2681 
   2682     // BFXIL is an alias of BFM, so translate to BFM operands.
   2683     unsigned ImmR = (BitWidth - LSB) % BitWidth;
   2684     unsigned ImmS = Width - 1;
   2685 
   2686     // Create the BFXIL instruction.
   2687     SDValue Ops[] = {Dst, SDValue(LSR, 0),
   2688                      CurDAG->getTargetConstant(ImmR, DL, VT),
   2689                      CurDAG->getTargetConstant(ImmS, DL, VT)};
   2690     unsigned Opc = (VT == MVT::i32) ? AArch64::BFMWri : AArch64::BFMXri;
   2691     CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   2692     return true;
   2693   }
   2694 
   2695   return false;
   2696 }
   2697 
   2698 bool AArch64DAGToDAGISel::tryBitfieldInsertOp(SDNode *N) {
   2699   if (N->getOpcode() != ISD::OR)
   2700     return false;
   2701 
   2702   APInt NUsefulBits;
   2703   getUsefulBits(SDValue(N, 0), NUsefulBits);
   2704 
   2705   // If all bits are not useful, just return UNDEF.
   2706   if (!NUsefulBits) {
   2707     CurDAG->SelectNodeTo(N, TargetOpcode::IMPLICIT_DEF, N->getValueType(0));
   2708     return true;
   2709   }
   2710 
   2711   if (tryBitfieldInsertOpFromOr(N, NUsefulBits, CurDAG))
   2712     return true;
   2713 
   2714   return tryBitfieldInsertOpFromOrAndImm(N, CurDAG);
   2715 }
   2716 
   2717 /// SelectBitfieldInsertInZeroOp - Match a UBFIZ instruction that is the
   2718 /// equivalent of a left shift by a constant amount followed by an and masking
   2719 /// out a contiguous set of bits.
   2720 bool AArch64DAGToDAGISel::tryBitfieldInsertInZeroOp(SDNode *N) {
   2721   if (N->getOpcode() != ISD::AND)
   2722     return false;
   2723 
   2724   EVT VT = N->getValueType(0);
   2725   if (VT != MVT::i32 && VT != MVT::i64)
   2726     return false;
   2727 
   2728   SDValue Op0;
   2729   int DstLSB, Width;
   2730   if (!isBitfieldPositioningOp(CurDAG, SDValue(N, 0), /*BiggerPattern=*/false,
   2731                                Op0, DstLSB, Width))
   2732     return false;
   2733 
   2734   // ImmR is the rotate right amount.
   2735   unsigned ImmR = (VT.getSizeInBits() - DstLSB) % VT.getSizeInBits();
   2736   // ImmS is the most significant bit of the source to be moved.
   2737   unsigned ImmS = Width - 1;
   2738 
   2739   SDLoc DL(N);
   2740   SDValue Ops[] = {Op0, CurDAG->getTargetConstant(ImmR, DL, VT),
   2741                    CurDAG->getTargetConstant(ImmS, DL, VT)};
   2742   unsigned Opc = (VT == MVT::i32) ? AArch64::UBFMWri : AArch64::UBFMXri;
   2743   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   2744   return true;
   2745 }
   2746 
   2747 /// tryShiftAmountMod - Take advantage of built-in mod of shift amount in
   2748 /// variable shift/rotate instructions.
   2749 bool AArch64DAGToDAGISel::tryShiftAmountMod(SDNode *N) {
   2750   EVT VT = N->getValueType(0);
   2751 
   2752   unsigned Opc;
   2753   switch (N->getOpcode()) {
   2754   case ISD::ROTR:
   2755     Opc = (VT == MVT::i32) ? AArch64::RORVWr : AArch64::RORVXr;
   2756     break;
   2757   case ISD::SHL:
   2758     Opc = (VT == MVT::i32) ? AArch64::LSLVWr : AArch64::LSLVXr;
   2759     break;
   2760   case ISD::SRL:
   2761     Opc = (VT == MVT::i32) ? AArch64::LSRVWr : AArch64::LSRVXr;
   2762     break;
   2763   case ISD::SRA:
   2764     Opc = (VT == MVT::i32) ? AArch64::ASRVWr : AArch64::ASRVXr;
   2765     break;
   2766   default:
   2767     return false;
   2768   }
   2769 
   2770   uint64_t Size;
   2771   uint64_t Bits;
   2772   if (VT == MVT::i32) {
   2773     Bits = 5;
   2774     Size = 32;
   2775   } else if (VT == MVT::i64) {
   2776     Bits = 6;
   2777     Size = 64;
   2778   } else
   2779     return false;
   2780 
   2781   SDValue ShiftAmt = N->getOperand(1);
   2782   SDLoc DL(N);
   2783   SDValue NewShiftAmt;
   2784 
   2785   // Skip over an extend of the shift amount.
   2786   if (ShiftAmt->getOpcode() == ISD::ZERO_EXTEND ||
   2787       ShiftAmt->getOpcode() == ISD::ANY_EXTEND)
   2788     ShiftAmt = ShiftAmt->getOperand(0);
   2789 
   2790   if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) {
   2791     SDValue Add0 = ShiftAmt->getOperand(0);
   2792     SDValue Add1 = ShiftAmt->getOperand(1);
   2793     uint64_t Add0Imm;
   2794     uint64_t Add1Imm;
   2795     // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X
   2796     // to avoid the ADD/SUB.
   2797     if (isIntImmediate(Add1, Add1Imm) && (Add1Imm % Size == 0))
   2798       NewShiftAmt = Add0;
   2799     // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to
   2800     // generate a NEG instead of a SUB of a constant.
   2801     else if (ShiftAmt->getOpcode() == ISD::SUB &&
   2802              isIntImmediate(Add0, Add0Imm) && Add0Imm != 0 &&
   2803              (Add0Imm % Size == 0)) {
   2804       unsigned NegOpc;
   2805       unsigned ZeroReg;
   2806       EVT SubVT = ShiftAmt->getValueType(0);
   2807       if (SubVT == MVT::i32) {
   2808         NegOpc = AArch64::SUBWrr;
   2809         ZeroReg = AArch64::WZR;
   2810       } else {
   2811         assert(SubVT == MVT::i64);
   2812         NegOpc = AArch64::SUBXrr;
   2813         ZeroReg = AArch64::XZR;
   2814       }
   2815       SDValue Zero =
   2816           CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL, ZeroReg, SubVT);
   2817       MachineSDNode *Neg =
   2818           CurDAG->getMachineNode(NegOpc, DL, SubVT, Zero, Add1);
   2819       NewShiftAmt = SDValue(Neg, 0);
   2820     } else
   2821       return false;
   2822   } else {
   2823     // If the shift amount is masked with an AND, check that the mask covers the
   2824     // bits that are implicitly ANDed off by the above opcodes and if so, skip
   2825     // the AND.
   2826     uint64_t MaskImm;
   2827     if (!isOpcWithIntImmediate(ShiftAmt.getNode(), ISD::AND, MaskImm) &&
   2828         !isOpcWithIntImmediate(ShiftAmt.getNode(), AArch64ISD::ANDS, MaskImm))
   2829       return false;
   2830 
   2831     if (countTrailingOnes(MaskImm) < Bits)
   2832       return false;
   2833 
   2834     NewShiftAmt = ShiftAmt->getOperand(0);
   2835   }
   2836 
   2837   // Narrow/widen the shift amount to match the size of the shift operation.
   2838   if (VT == MVT::i32)
   2839     NewShiftAmt = narrowIfNeeded(CurDAG, NewShiftAmt);
   2840   else if (VT == MVT::i64 && NewShiftAmt->getValueType(0) == MVT::i32) {
   2841     SDValue SubReg = CurDAG->getTargetConstant(AArch64::sub_32, DL, MVT::i32);
   2842     MachineSDNode *Ext = CurDAG->getMachineNode(
   2843         AArch64::SUBREG_TO_REG, DL, VT,
   2844         CurDAG->getTargetConstant(0, DL, MVT::i64), NewShiftAmt, SubReg);
   2845     NewShiftAmt = SDValue(Ext, 0);
   2846   }
   2847 
   2848   SDValue Ops[] = {N->getOperand(0), NewShiftAmt};
   2849   CurDAG->SelectNodeTo(N, Opc, VT, Ops);
   2850   return true;
   2851 }
   2852 
   2853 bool
   2854 AArch64DAGToDAGISel::SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos,
   2855                                               unsigned RegWidth) {
   2856   APFloat FVal(0.0);
   2857   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
   2858     FVal = CN->getValueAPF();
   2859   else if (LoadSDNode *LN = dyn_cast<LoadSDNode>(N)) {
   2860     // Some otherwise illegal constants are allowed in this case.
   2861     if (LN->getOperand(1).getOpcode() != AArch64ISD::ADDlow ||
   2862         !isa<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1)))
   2863       return false;
   2864 
   2865     ConstantPoolSDNode *CN =
   2866         dyn_cast<ConstantPoolSDNode>(LN->getOperand(1)->getOperand(1));
   2867     FVal = cast<ConstantFP>(CN->getConstVal())->getValueAPF();
   2868   } else
   2869     return false;
   2870 
   2871   // An FCVT[SU] instruction performs: convertToInt(Val * 2^fbits) where fbits
   2872   // is between 1 and 32 for a destination w-register, or 1 and 64 for an
   2873   // x-register.
   2874   //
   2875   // By this stage, we've detected (fp_to_[su]int (fmul Val, THIS_NODE)) so we
   2876   // want THIS_NODE to be 2^fbits. This is much easier to deal with using
   2877   // integers.
   2878   bool IsExact;
   2879 
   2880   // fbits is between 1 and 64 in the worst-case, which means the fmul
   2881   // could have 2^64 as an actual operand. Need 65 bits of precision.
   2882   APSInt IntVal(65, true);
   2883   FVal.convertToInteger(IntVal, APFloat::rmTowardZero, &IsExact);
   2884 
   2885   // N.b. isPowerOf2 also checks for > 0.
   2886   if (!IsExact || !IntVal.isPowerOf2()) return false;
   2887   unsigned FBits = IntVal.logBase2();
   2888 
   2889   // Checks above should have guaranteed that we haven't lost information in
   2890   // finding FBits, but it must still be in range.
   2891   if (FBits == 0 || FBits > RegWidth) return false;
   2892 
   2893   FixedPos = CurDAG->getTargetConstant(FBits, SDLoc(N), MVT::i32);
   2894   return true;
   2895 }
   2896 
   2897 // Inspects a register string of the form o0:op1:CRn:CRm:op2 gets the fields
   2898 // of the string and obtains the integer values from them and combines these
   2899 // into a single value to be used in the MRS/MSR instruction.
   2900 static int getIntOperandFromRegisterString(StringRef RegString) {
   2901   SmallVector<StringRef, 5> Fields;
   2902   RegString.split(Fields, ':');
   2903 
   2904   if (Fields.size() == 1)
   2905     return -1;
   2906 
   2907   assert(Fields.size() == 5
   2908             && "Invalid number of fields in read register string");
   2909 
   2910   SmallVector<int, 5> Ops;
   2911   bool AllIntFields = true;
   2912 
   2913   for (StringRef Field : Fields) {
   2914     unsigned IntField;
   2915     AllIntFields &= !Field.getAsInteger(10, IntField);
   2916     Ops.push_back(IntField);
   2917   }
   2918 
   2919   assert(AllIntFields &&
   2920           "Unexpected non-integer value in special register string.");
   2921 
   2922   // Need to combine the integer fields of the string into a single value
   2923   // based on the bit encoding of MRS/MSR instruction.
   2924   return (Ops[0] << 14) | (Ops[1] << 11) | (Ops[2] << 7) |
   2925          (Ops[3] << 3) | (Ops[4]);
   2926 }
   2927 
   2928 // Lower the read_register intrinsic to an MRS instruction node if the special
   2929 // register string argument is either of the form detailed in the ALCE (the
   2930 // form described in getIntOperandsFromRegsterString) or is a named register
   2931 // known by the MRS SysReg mapper.
   2932 bool AArch64DAGToDAGISel::tryReadRegister(SDNode *N) {
   2933   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
   2934   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   2935   SDLoc DL(N);
   2936 
   2937   int Reg = getIntOperandFromRegisterString(RegString->getString());
   2938   if (Reg != -1) {
   2939     ReplaceNode(N, CurDAG->getMachineNode(
   2940                        AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
   2941                        CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   2942                        N->getOperand(0)));
   2943     return true;
   2944   }
   2945 
   2946   // Use the sysreg mapper to map the remaining possible strings to the
   2947   // value for the register to be used for the instruction operand.
   2948   auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
   2949   if (TheReg && TheReg->Readable &&
   2950       TheReg->haveFeatures(Subtarget->getFeatureBits()))
   2951     Reg = TheReg->Encoding;
   2952   else
   2953     Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
   2954 
   2955   if (Reg != -1) {
   2956     ReplaceNode(N, CurDAG->getMachineNode(
   2957                        AArch64::MRS, DL, N->getSimpleValueType(0), MVT::Other,
   2958                        CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   2959                        N->getOperand(0)));
   2960     return true;
   2961   }
   2962 
   2963   if (RegString->getString() == "pc") {
   2964     ReplaceNode(N, CurDAG->getMachineNode(
   2965                        AArch64::ADR, DL, N->getSimpleValueType(0), MVT::Other,
   2966                        CurDAG->getTargetConstant(0, DL, MVT::i32),
   2967                        N->getOperand(0)));
   2968     return true;
   2969   }
   2970 
   2971   return false;
   2972 }
   2973 
   2974 // Lower the write_register intrinsic to an MSR instruction node if the special
   2975 // register string argument is either of the form detailed in the ALCE (the
   2976 // form described in getIntOperandsFromRegsterString) or is a named register
   2977 // known by the MSR SysReg mapper.
   2978 bool AArch64DAGToDAGISel::tryWriteRegister(SDNode *N) {
   2979   const MDNodeSDNode *MD = dyn_cast<MDNodeSDNode>(N->getOperand(1));
   2980   const MDString *RegString = dyn_cast<MDString>(MD->getMD()->getOperand(0));
   2981   SDLoc DL(N);
   2982 
   2983   int Reg = getIntOperandFromRegisterString(RegString->getString());
   2984   if (Reg != -1) {
   2985     ReplaceNode(
   2986         N, CurDAG->getMachineNode(AArch64::MSR, DL, MVT::Other,
   2987                                   CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   2988                                   N->getOperand(2), N->getOperand(0)));
   2989     return true;
   2990   }
   2991 
   2992   // Check if the register was one of those allowed as the pstatefield value in
   2993   // the MSR (immediate) instruction. To accept the values allowed in the
   2994   // pstatefield for the MSR (immediate) instruction, we also require that an
   2995   // immediate value has been provided as an argument, we know that this is
   2996   // the case as it has been ensured by semantic checking.
   2997   auto PMapper = AArch64PState::lookupPStateByName(RegString->getString());
   2998   if (PMapper) {
   2999     assert (isa<ConstantSDNode>(N->getOperand(2))
   3000               && "Expected a constant integer expression.");
   3001     unsigned Reg = PMapper->Encoding;
   3002     uint64_t Immed = cast<ConstantSDNode>(N->getOperand(2))->getZExtValue();
   3003     unsigned State;
   3004     if (Reg == AArch64PState::PAN || Reg == AArch64PState::UAO || Reg == AArch64PState::SSBS) {
   3005       assert(Immed < 2 && "Bad imm");
   3006       State = AArch64::MSRpstateImm1;
   3007     } else {
   3008       assert(Immed < 16 && "Bad imm");
   3009       State = AArch64::MSRpstateImm4;
   3010     }
   3011     ReplaceNode(N, CurDAG->getMachineNode(
   3012                        State, DL, MVT::Other,
   3013                        CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   3014                        CurDAG->getTargetConstant(Immed, DL, MVT::i16),
   3015                        N->getOperand(0)));
   3016     return true;
   3017   }
   3018 
   3019   // Use the sysreg mapper to attempt to map the remaining possible strings
   3020   // to the value for the register to be used for the MSR (register)
   3021   // instruction operand.
   3022   auto TheReg = AArch64SysReg::lookupSysRegByName(RegString->getString());
   3023   if (TheReg && TheReg->Writeable &&
   3024       TheReg->haveFeatures(Subtarget->getFeatureBits()))
   3025     Reg = TheReg->Encoding;
   3026   else
   3027     Reg = AArch64SysReg::parseGenericRegister(RegString->getString());
   3028   if (Reg != -1) {
   3029     ReplaceNode(N, CurDAG->getMachineNode(
   3030                        AArch64::MSR, DL, MVT::Other,
   3031                        CurDAG->getTargetConstant(Reg, DL, MVT::i32),
   3032                        N->getOperand(2), N->getOperand(0)));
   3033     return true;
   3034   }
   3035 
   3036   return false;
   3037 }
   3038 
   3039 /// We've got special pseudo-instructions for these
   3040 bool AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
   3041   unsigned Opcode;
   3042   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
   3043 
   3044   // Leave IR for LSE if subtarget supports it.
   3045   if (Subtarget->hasLSE()) return false;
   3046 
   3047   if (MemTy == MVT::i8)
   3048     Opcode = AArch64::CMP_SWAP_8;
   3049   else if (MemTy == MVT::i16)
   3050     Opcode = AArch64::CMP_SWAP_16;
   3051   else if (MemTy == MVT::i32)
   3052     Opcode = AArch64::CMP_SWAP_32;
   3053   else if (MemTy == MVT::i64)
   3054     Opcode = AArch64::CMP_SWAP_64;
   3055   else
   3056     llvm_unreachable("Unknown AtomicCmpSwap type");
   3057 
   3058   MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
   3059   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
   3060                    N->getOperand(0)};
   3061   SDNode *CmpSwap = CurDAG->getMachineNode(
   3062       Opcode, SDLoc(N),
   3063       CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
   3064 
   3065   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
   3066   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
   3067 
   3068   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
   3069   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
   3070   CurDAG->RemoveDeadNode(N);
   3071 
   3072   return true;
   3073 }
   3074 
   3075 bool AArch64DAGToDAGISel::SelectSVE8BitLslImm(SDValue N, SDValue &Base,
   3076                                                   SDValue &Offset) {
   3077   auto C = dyn_cast<ConstantSDNode>(N);
   3078   if (!C)
   3079     return false;
   3080 
   3081   auto Ty = N->getValueType(0);
   3082 
   3083   int64_t Imm = C->getSExtValue();
   3084   SDLoc DL(N);
   3085 
   3086   if ((Imm >= -128) && (Imm <= 127)) {
   3087     Base = CurDAG->getTargetConstant(Imm, DL, Ty);
   3088     Offset = CurDAG->getTargetConstant(0, DL, Ty);
   3089     return true;
   3090   }
   3091 
   3092   if (((Imm % 256) == 0) && (Imm >= -32768) && (Imm <= 32512)) {
   3093     Base = CurDAG->getTargetConstant(Imm/256, DL, Ty);
   3094     Offset = CurDAG->getTargetConstant(8, DL, Ty);
   3095     return true;
   3096   }
   3097 
   3098   return false;
   3099 }
   3100 
   3101 bool AArch64DAGToDAGISel::SelectSVEAddSubImm(SDValue N, MVT VT, SDValue &Imm, SDValue &Shift) {
   3102   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
   3103     const int64_t ImmVal = CNode->getSExtValue();
   3104     SDLoc DL(N);
   3105 
   3106     switch (VT.SimpleTy) {
   3107     case MVT::i8:
   3108       // Can always select i8s, no shift, mask the immediate value to
   3109       // deal with sign-extended value from lowering.
   3110       Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
   3111       Imm = CurDAG->getTargetConstant(ImmVal & 0xFF, DL, MVT::i32);
   3112       return true;
   3113     case MVT::i16:
   3114       // i16 values get sign-extended to 32-bits during lowering.
   3115       if ((ImmVal & 0xFF) == ImmVal) {
   3116         Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
   3117         Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
   3118         return true;
   3119       } else if ((ImmVal & 0xFF) == 0) {
   3120         assert((ImmVal >= -32768) && (ImmVal <= 32512));
   3121         Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
   3122         Imm = CurDAG->getTargetConstant((ImmVal >> 8) & 0xFF, DL, MVT::i32);
   3123         return true;
   3124       }
   3125       break;
   3126     case MVT::i32:
   3127     case MVT::i64:
   3128       // Range of immediate won't trigger signedness problems for 32/64b.
   3129       if ((ImmVal & 0xFF) == ImmVal) {
   3130         Shift = CurDAG->getTargetConstant(0, DL, MVT::i32);
   3131         Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
   3132         return true;
   3133       } else if ((ImmVal & 0xFF00) == ImmVal) {
   3134         Shift = CurDAG->getTargetConstant(8, DL, MVT::i32);
   3135         Imm = CurDAG->getTargetConstant(ImmVal >> 8, DL, MVT::i32);
   3136         return true;
   3137       }
   3138       break;
   3139     default:
   3140       break;
   3141     }
   3142   }
   3143 
   3144   return false;
   3145 }
   3146 
   3147 bool AArch64DAGToDAGISel::SelectSVESignedArithImm(SDValue N, SDValue &Imm) {
   3148   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
   3149     int64_t ImmVal = CNode->getSExtValue();
   3150     SDLoc DL(N);
   3151     if (ImmVal >= -128 && ImmVal < 128) {
   3152       Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32);
   3153       return true;
   3154     }
   3155   }
   3156   return false;
   3157 }
   3158 
   3159 bool AArch64DAGToDAGISel::SelectSVEArithImm(SDValue N, MVT VT, SDValue &Imm) {
   3160   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
   3161     uint64_t ImmVal = CNode->getZExtValue();
   3162 
   3163     switch (VT.SimpleTy) {
   3164     case MVT::i8:
   3165       ImmVal &= 0xFF;
   3166       break;
   3167     case MVT::i16:
   3168       ImmVal &= 0xFFFF;
   3169       break;
   3170     case MVT::i32:
   3171       ImmVal &= 0xFFFFFFFF;
   3172       break;
   3173     case MVT::i64:
   3174       break;
   3175     default:
   3176       llvm_unreachable("Unexpected type");
   3177     }
   3178 
   3179     if (ImmVal < 256) {
   3180       Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
   3181       return true;
   3182     }
   3183   }
   3184   return false;
   3185 }
   3186 
   3187 bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm,
   3188                                               bool Invert) {
   3189   if (auto CNode = dyn_cast<ConstantSDNode>(N)) {
   3190     uint64_t ImmVal = CNode->getZExtValue();
   3191     SDLoc DL(N);
   3192 
   3193     if (Invert)
   3194       ImmVal = ~ImmVal;
   3195 
   3196     // Shift mask depending on type size.
   3197     switch (VT.SimpleTy) {
   3198     case MVT::i8:
   3199       ImmVal &= 0xFF;
   3200       ImmVal |= ImmVal << 8;
   3201       ImmVal |= ImmVal << 16;
   3202       ImmVal |= ImmVal << 32;
   3203       break;
   3204     case MVT::i16:
   3205       ImmVal &= 0xFFFF;
   3206       ImmVal |= ImmVal << 16;
   3207       ImmVal |= ImmVal << 32;
   3208       break;
   3209     case MVT::i32:
   3210       ImmVal &= 0xFFFFFFFF;
   3211       ImmVal |= ImmVal << 32;
   3212       break;
   3213     case MVT::i64:
   3214       break;
   3215     default:
   3216       llvm_unreachable("Unexpected type");
   3217     }
   3218 
   3219     uint64_t encoding;
   3220     if (AArch64_AM::processLogicalImmediate(ImmVal, 64, encoding)) {
   3221       Imm = CurDAG->getTargetConstant(encoding, DL, MVT::i64);
   3222       return true;
   3223     }
   3224   }
   3225   return false;
   3226 }
   3227 
   3228 // SVE shift intrinsics allow shift amounts larger than the element's bitwidth.
   3229 // Rather than attempt to normalise everything we can sometimes saturate the
   3230 // shift amount during selection. This function also allows for consistent
   3231 // isel patterns by ensuring the resulting "Imm" node is of the i32 type
   3232 // required by the instructions.
   3233 bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low,
   3234                                             uint64_t High, bool AllowSaturation,
   3235                                             SDValue &Imm) {
   3236   if (auto *CN = dyn_cast<ConstantSDNode>(N)) {
   3237     uint64_t ImmVal = CN->getZExtValue();
   3238 
   3239     // Reject shift amounts that are too small.
   3240     if (ImmVal < Low)
   3241       return false;
   3242 
   3243     // Reject or saturate shift amounts that are too big.
   3244     if (ImmVal > High) {
   3245       if (!AllowSaturation)
   3246         return false;
   3247       ImmVal = High;
   3248     }
   3249 
   3250     Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32);
   3251     return true;
   3252   }
   3253 
   3254   return false;
   3255 }
   3256 
   3257 bool AArch64DAGToDAGISel::trySelectStackSlotTagP(SDNode *N) {
   3258   // tagp(FrameIndex, IRGstack, tag_offset):
   3259   // since the offset between FrameIndex and IRGstack is a compile-time
   3260   // constant, this can be lowered to a single ADDG instruction.
   3261   if (!(isa<FrameIndexSDNode>(N->getOperand(1)))) {
   3262     return false;
   3263   }
   3264 
   3265   SDValue IRG_SP = N->getOperand(2);
   3266   if (IRG_SP->getOpcode() != ISD::INTRINSIC_W_CHAIN ||
   3267       cast<ConstantSDNode>(IRG_SP->getOperand(1))->getZExtValue() !=
   3268           Intrinsic::aarch64_irg_sp) {
   3269     return false;
   3270   }
   3271 
   3272   const TargetLowering *TLI = getTargetLowering();
   3273   SDLoc DL(N);
   3274   int FI = cast<FrameIndexSDNode>(N->getOperand(1))->getIndex();
   3275   SDValue FiOp = CurDAG->getTargetFrameIndex(
   3276       FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   3277   int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
   3278 
   3279   SDNode *Out = CurDAG->getMachineNode(
   3280       AArch64::TAGPstack, DL, MVT::i64,
   3281       {FiOp, CurDAG->getTargetConstant(0, DL, MVT::i64), N->getOperand(2),
   3282        CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
   3283   ReplaceNode(N, Out);
   3284   return true;
   3285 }
   3286 
   3287 void AArch64DAGToDAGISel::SelectTagP(SDNode *N) {
   3288   assert(isa<ConstantSDNode>(N->getOperand(3)) &&
   3289          "llvm.aarch64.tagp third argument must be an immediate");
   3290   if (trySelectStackSlotTagP(N))
   3291     return;
   3292   // FIXME: above applies in any case when offset between Op1 and Op2 is a
   3293   // compile-time constant, not just for stack allocations.
   3294 
   3295   // General case for unrelated pointers in Op1 and Op2.
   3296   SDLoc DL(N);
   3297   int TagOffset = cast<ConstantSDNode>(N->getOperand(3))->getZExtValue();
   3298   SDNode *N1 = CurDAG->getMachineNode(AArch64::SUBP, DL, MVT::i64,
   3299                                       {N->getOperand(1), N->getOperand(2)});
   3300   SDNode *N2 = CurDAG->getMachineNode(AArch64::ADDXrr, DL, MVT::i64,
   3301                                       {SDValue(N1, 0), N->getOperand(2)});
   3302   SDNode *N3 = CurDAG->getMachineNode(
   3303       AArch64::ADDG, DL, MVT::i64,
   3304       {SDValue(N2, 0), CurDAG->getTargetConstant(0, DL, MVT::i64),
   3305        CurDAG->getTargetConstant(TagOffset, DL, MVT::i64)});
   3306   ReplaceNode(N, N3);
   3307 }
   3308 
   3309 // NOTE: We cannot use EXTRACT_SUBREG in all cases because the fixed length
   3310 // vector types larger than NEON don't have a matching SubRegIndex.
   3311 static SDNode *extractSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
   3312   assert(V.getValueType().isScalableVector() &&
   3313          V.getValueType().getSizeInBits().getKnownMinSize() ==
   3314              AArch64::SVEBitsPerBlock &&
   3315          "Expected to extract from a packed scalable vector!");
   3316   assert(VT.isFixedLengthVector() &&
   3317          "Expected to extract a fixed length vector!");
   3318 
   3319   SDLoc DL(V);
   3320   switch (VT.getSizeInBits()) {
   3321   case 64: {
   3322     auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
   3323     return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
   3324   }
   3325   case 128: {
   3326     auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
   3327     return DAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT, V, SubReg);
   3328   }
   3329   default: {
   3330     auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
   3331     return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
   3332   }
   3333   }
   3334 }
   3335 
   3336 // NOTE: We cannot use INSERT_SUBREG in all cases because the fixed length
   3337 // vector types larger than NEON don't have a matching SubRegIndex.
   3338 static SDNode *insertSubReg(SelectionDAG *DAG, EVT VT, SDValue V) {
   3339   assert(VT.isScalableVector() &&
   3340          VT.getSizeInBits().getKnownMinSize() == AArch64::SVEBitsPerBlock &&
   3341          "Expected to insert into a packed scalable vector!");
   3342   assert(V.getValueType().isFixedLengthVector() &&
   3343          "Expected to insert a fixed length vector!");
   3344 
   3345   SDLoc DL(V);
   3346   switch (V.getValueType().getSizeInBits()) {
   3347   case 64: {
   3348     auto SubReg = DAG->getTargetConstant(AArch64::dsub, DL, MVT::i32);
   3349     auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
   3350     return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
   3351                                SDValue(Container, 0), V, SubReg);
   3352   }
   3353   case 128: {
   3354     auto SubReg = DAG->getTargetConstant(AArch64::zsub, DL, MVT::i32);
   3355     auto Container = DAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, VT);
   3356     return DAG->getMachineNode(TargetOpcode::INSERT_SUBREG, DL, VT,
   3357                                SDValue(Container, 0), V, SubReg);
   3358   }
   3359   default: {
   3360     auto RC = DAG->getTargetConstant(AArch64::ZPRRegClassID, DL, MVT::i64);
   3361     return DAG->getMachineNode(TargetOpcode::COPY_TO_REGCLASS, DL, VT, V, RC);
   3362   }
   3363   }
   3364 }
   3365 
   3366 void AArch64DAGToDAGISel::Select(SDNode *Node) {
   3367   // If we have a custom node, we already have selected!
   3368   if (Node->isMachineOpcode()) {
   3369     LLVM_DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
   3370     Node->setNodeId(-1);
   3371     return;
   3372   }
   3373 
   3374   // Few custom selection stuff.
   3375   EVT VT = Node->getValueType(0);
   3376 
   3377   switch (Node->getOpcode()) {
   3378   default:
   3379     break;
   3380 
   3381   case ISD::ATOMIC_CMP_SWAP:
   3382     if (SelectCMP_SWAP(Node))
   3383       return;
   3384     break;
   3385 
   3386   case ISD::READ_REGISTER:
   3387     if (tryReadRegister(Node))
   3388       return;
   3389     break;
   3390 
   3391   case ISD::WRITE_REGISTER:
   3392     if (tryWriteRegister(Node))
   3393       return;
   3394     break;
   3395 
   3396   case ISD::ADD:
   3397     if (tryMLAV64LaneV128(Node))
   3398       return;
   3399     break;
   3400 
   3401   case ISD::LOAD: {
   3402     // Try to select as an indexed load. Fall through to normal processing
   3403     // if we can't.
   3404     if (tryIndexedLoad(Node))
   3405       return;
   3406     break;
   3407   }
   3408 
   3409   case ISD::SRL:
   3410   case ISD::AND:
   3411   case ISD::SRA:
   3412   case ISD::SIGN_EXTEND_INREG:
   3413     if (tryBitfieldExtractOp(Node))
   3414       return;
   3415     if (tryBitfieldInsertInZeroOp(Node))
   3416       return;
   3417     LLVM_FALLTHROUGH;
   3418   case ISD::ROTR:
   3419   case ISD::SHL:
   3420     if (tryShiftAmountMod(Node))
   3421       return;
   3422     break;
   3423 
   3424   case ISD::SIGN_EXTEND:
   3425     if (tryBitfieldExtractOpFromSExt(Node))
   3426       return;
   3427     break;
   3428 
   3429   case ISD::FP_EXTEND:
   3430     if (tryHighFPExt(Node))
   3431       return;
   3432     break;
   3433 
   3434   case ISD::OR:
   3435     if (tryBitfieldInsertOp(Node))
   3436       return;
   3437     break;
   3438 
   3439   case ISD::EXTRACT_SUBVECTOR: {
   3440     // Bail when not a "cast" like extract_subvector.
   3441     if (cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue() != 0)
   3442       break;
   3443 
   3444     // Bail when normal isel can do the job.
   3445     EVT InVT = Node->getOperand(0).getValueType();
   3446     if (VT.isScalableVector() || InVT.isFixedLengthVector())
   3447       break;
   3448 
   3449     // NOTE: We can only get here when doing fixed length SVE code generation.
   3450     // We do manual selection because the types involved are not linked to real
   3451     // registers (despite being legal) and must be coerced into SVE registers.
   3452     //
   3453     // NOTE: If the above changes, be aware that selection will still not work
   3454     // because the td definition of extract_vector does not support extracting
   3455     // a fixed length vector from a scalable vector.
   3456 
   3457     ReplaceNode(Node, extractSubReg(CurDAG, VT, Node->getOperand(0)));
   3458     return;
   3459   }
   3460 
   3461   case ISD::INSERT_SUBVECTOR: {
   3462     // Bail when not a "cast" like insert_subvector.
   3463     if (cast<ConstantSDNode>(Node->getOperand(2))->getZExtValue() != 0)
   3464       break;
   3465     if (!Node->getOperand(0).isUndef())
   3466       break;
   3467 
   3468     // Bail when normal isel should do the job.
   3469     EVT InVT = Node->getOperand(1).getValueType();
   3470     if (VT.isFixedLengthVector() || InVT.isScalableVector())
   3471       break;
   3472 
   3473     // NOTE: We can only get here when doing fixed length SVE code generation.
   3474     // We do manual selection because the types involved are not linked to real
   3475     // registers (despite being legal) and must be coerced into SVE registers.
   3476     //
   3477     // NOTE: If the above changes, be aware that selection will still not work
   3478     // because the td definition of insert_vector does not support inserting a
   3479     // fixed length vector into a scalable vector.
   3480 
   3481     ReplaceNode(Node, insertSubReg(CurDAG, VT, Node->getOperand(1)));
   3482     return;
   3483   }
   3484 
   3485   case ISD::Constant: {
   3486     // Materialize zero constants as copies from WZR/XZR.  This allows
   3487     // the coalescer to propagate these into other instructions.
   3488     ConstantSDNode *ConstNode = cast<ConstantSDNode>(Node);
   3489     if (ConstNode->isNullValue()) {
   3490       if (VT == MVT::i32) {
   3491         SDValue New = CurDAG->getCopyFromReg(
   3492             CurDAG->getEntryNode(), SDLoc(Node), AArch64::WZR, MVT::i32);
   3493         ReplaceNode(Node, New.getNode());
   3494         return;
   3495       } else if (VT == MVT::i64) {
   3496         SDValue New = CurDAG->getCopyFromReg(
   3497             CurDAG->getEntryNode(), SDLoc(Node), AArch64::XZR, MVT::i64);
   3498         ReplaceNode(Node, New.getNode());
   3499         return;
   3500       }
   3501     }
   3502     break;
   3503   }
   3504 
   3505   case ISD::FrameIndex: {
   3506     // Selects to ADDXri FI, 0 which in turn will become ADDXri SP, imm.
   3507     int FI = cast<FrameIndexSDNode>(Node)->getIndex();
   3508     unsigned Shifter = AArch64_AM::getShifterImm(AArch64_AM::LSL, 0);
   3509     const TargetLowering *TLI = getTargetLowering();
   3510     SDValue TFI = CurDAG->getTargetFrameIndex(
   3511         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
   3512     SDLoc DL(Node);
   3513     SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, DL, MVT::i32),
   3514                       CurDAG->getTargetConstant(Shifter, DL, MVT::i32) };
   3515     CurDAG->SelectNodeTo(Node, AArch64::ADDXri, MVT::i64, Ops);
   3516     return;
   3517   }
   3518   case ISD::INTRINSIC_W_CHAIN: {
   3519     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
   3520     switch (IntNo) {
   3521     default:
   3522       break;
   3523     case Intrinsic::aarch64_ldaxp:
   3524     case Intrinsic::aarch64_ldxp: {
   3525       unsigned Op =
   3526           IntNo == Intrinsic::aarch64_ldaxp ? AArch64::LDAXPX : AArch64::LDXPX;
   3527       SDValue MemAddr = Node->getOperand(2);
   3528       SDLoc DL(Node);
   3529       SDValue Chain = Node->getOperand(0);
   3530 
   3531       SDNode *Ld = CurDAG->getMachineNode(Op, DL, MVT::i64, MVT::i64,
   3532                                           MVT::Other, MemAddr, Chain);
   3533 
   3534       // Transfer memoperands.
   3535       MachineMemOperand *MemOp =
   3536           cast<MemIntrinsicSDNode>(Node)->getMemOperand();
   3537       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
   3538       ReplaceNode(Node, Ld);
   3539       return;
   3540     }
   3541     case Intrinsic::aarch64_stlxp:
   3542     case Intrinsic::aarch64_stxp: {
   3543       unsigned Op =
   3544           IntNo == Intrinsic::aarch64_stlxp ? AArch64::STLXPX : AArch64::STXPX;
   3545       SDLoc DL(Node);
   3546       SDValue Chain = Node->getOperand(0);
   3547       SDValue ValLo = Node->getOperand(2);
   3548       SDValue ValHi = Node->getOperand(3);
   3549       SDValue MemAddr = Node->getOperand(4);
   3550 
   3551       // Place arguments in the right order.
   3552       SDValue Ops[] = {ValLo, ValHi, MemAddr, Chain};
   3553 
   3554       SDNode *St = CurDAG->getMachineNode(Op, DL, MVT::i32, MVT::Other, Ops);
   3555       // Transfer memoperands.
   3556       MachineMemOperand *MemOp =
   3557           cast<MemIntrinsicSDNode>(Node)->getMemOperand();
   3558       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
   3559 
   3560       ReplaceNode(Node, St);
   3561       return;
   3562     }
   3563     case Intrinsic::aarch64_neon_ld1x2:
   3564       if (VT == MVT::v8i8) {
   3565         SelectLoad(Node, 2, AArch64::LD1Twov8b, AArch64::dsub0);
   3566         return;
   3567       } else if (VT == MVT::v16i8) {
   3568         SelectLoad(Node, 2, AArch64::LD1Twov16b, AArch64::qsub0);
   3569         return;
   3570       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   3571         SelectLoad(Node, 2, AArch64::LD1Twov4h, AArch64::dsub0);
   3572         return;
   3573       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   3574         SelectLoad(Node, 2, AArch64::LD1Twov8h, AArch64::qsub0);
   3575         return;
   3576       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3577         SelectLoad(Node, 2, AArch64::LD1Twov2s, AArch64::dsub0);
   3578         return;
   3579       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3580         SelectLoad(Node, 2, AArch64::LD1Twov4s, AArch64::qsub0);
   3581         return;
   3582       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3583         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
   3584         return;
   3585       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3586         SelectLoad(Node, 2, AArch64::LD1Twov2d, AArch64::qsub0);
   3587         return;
   3588       }
   3589       break;
   3590     case Intrinsic::aarch64_neon_ld1x3:
   3591       if (VT == MVT::v8i8) {
   3592         SelectLoad(Node, 3, AArch64::LD1Threev8b, AArch64::dsub0);
   3593         return;
   3594       } else if (VT == MVT::v16i8) {
   3595         SelectLoad(Node, 3, AArch64::LD1Threev16b, AArch64::qsub0);
   3596         return;
   3597       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   3598         SelectLoad(Node, 3, AArch64::LD1Threev4h, AArch64::dsub0);
   3599         return;
   3600       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   3601         SelectLoad(Node, 3, AArch64::LD1Threev8h, AArch64::qsub0);
   3602         return;
   3603       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3604         SelectLoad(Node, 3, AArch64::LD1Threev2s, AArch64::dsub0);
   3605         return;
   3606       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3607         SelectLoad(Node, 3, AArch64::LD1Threev4s, AArch64::qsub0);
   3608         return;
   3609       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3610         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
   3611         return;
   3612       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3613         SelectLoad(Node, 3, AArch64::LD1Threev2d, AArch64::qsub0);
   3614         return;
   3615       }
   3616       break;
   3617     case Intrinsic::aarch64_neon_ld1x4:
   3618       if (VT == MVT::v8i8) {
   3619         SelectLoad(Node, 4, AArch64::LD1Fourv8b, AArch64::dsub0);
   3620         return;
   3621       } else if (VT == MVT::v16i8) {
   3622         SelectLoad(Node, 4, AArch64::LD1Fourv16b, AArch64::qsub0);
   3623         return;
   3624       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   3625         SelectLoad(Node, 4, AArch64::LD1Fourv4h, AArch64::dsub0);
   3626         return;
   3627       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   3628         SelectLoad(Node, 4, AArch64::LD1Fourv8h, AArch64::qsub0);
   3629         return;
   3630       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3631         SelectLoad(Node, 4, AArch64::LD1Fourv2s, AArch64::dsub0);
   3632         return;
   3633       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3634         SelectLoad(Node, 4, AArch64::LD1Fourv4s, AArch64::qsub0);
   3635         return;
   3636       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3637         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
   3638         return;
   3639       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3640         SelectLoad(Node, 4, AArch64::LD1Fourv2d, AArch64::qsub0);
   3641         return;
   3642       }
   3643       break;
   3644     case Intrinsic::aarch64_neon_ld2:
   3645       if (VT == MVT::v8i8) {
   3646         SelectLoad(Node, 2, AArch64::LD2Twov8b, AArch64::dsub0);
   3647         return;
   3648       } else if (VT == MVT::v16i8) {
   3649         SelectLoad(Node, 2, AArch64::LD2Twov16b, AArch64::qsub0);
   3650         return;
   3651       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   3652         SelectLoad(Node, 2, AArch64::LD2Twov4h, AArch64::dsub0);
   3653         return;
   3654       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   3655         SelectLoad(Node, 2, AArch64::LD2Twov8h, AArch64::qsub0);
   3656         return;
   3657       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3658         SelectLoad(Node, 2, AArch64::LD2Twov2s, AArch64::dsub0);
   3659         return;
   3660       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3661         SelectLoad(Node, 2, AArch64::LD2Twov4s, AArch64::qsub0);
   3662         return;
   3663       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3664         SelectLoad(Node, 2, AArch64::LD1Twov1d, AArch64::dsub0);
   3665         return;
   3666       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3667         SelectLoad(Node, 2, AArch64::LD2Twov2d, AArch64::qsub0);
   3668         return;
   3669       }
   3670       break;
   3671     case Intrinsic::aarch64_neon_ld3:
   3672       if (VT == MVT::v8i8) {
   3673         SelectLoad(Node, 3, AArch64::LD3Threev8b, AArch64::dsub0);
   3674         return;
   3675       } else if (VT == MVT::v16i8) {
   3676         SelectLoad(Node, 3, AArch64::LD3Threev16b, AArch64::qsub0);
   3677         return;
   3678       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   3679         SelectLoad(Node, 3, AArch64::LD3Threev4h, AArch64::dsub0);
   3680         return;
   3681       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   3682         SelectLoad(Node, 3, AArch64::LD3Threev8h, AArch64::qsub0);
   3683         return;
   3684       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3685         SelectLoad(Node, 3, AArch64::LD3Threev2s, AArch64::dsub0);
   3686         return;
   3687       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3688         SelectLoad(Node, 3, AArch64::LD3Threev4s, AArch64::qsub0);
   3689         return;
   3690       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3691         SelectLoad(Node, 3, AArch64::LD1Threev1d, AArch64::dsub0);
   3692         return;
   3693       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3694         SelectLoad(Node, 3, AArch64::LD3Threev2d, AArch64::qsub0);
   3695         return;
   3696       }
   3697       break;
   3698     case Intrinsic::aarch64_neon_ld4:
   3699       if (VT == MVT::v8i8) {
   3700         SelectLoad(Node, 4, AArch64::LD4Fourv8b, AArch64::dsub0);
   3701         return;
   3702       } else if (VT == MVT::v16i8) {
   3703         SelectLoad(Node, 4, AArch64::LD4Fourv16b, AArch64::qsub0);
   3704         return;
   3705       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   3706         SelectLoad(Node, 4, AArch64::LD4Fourv4h, AArch64::dsub0);
   3707         return;
   3708       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   3709         SelectLoad(Node, 4, AArch64::LD4Fourv8h, AArch64::qsub0);
   3710         return;
   3711       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3712         SelectLoad(Node, 4, AArch64::LD4Fourv2s, AArch64::dsub0);
   3713         return;
   3714       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3715         SelectLoad(Node, 4, AArch64::LD4Fourv4s, AArch64::qsub0);
   3716         return;
   3717       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3718         SelectLoad(Node, 4, AArch64::LD1Fourv1d, AArch64::dsub0);
   3719         return;
   3720       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3721         SelectLoad(Node, 4, AArch64::LD4Fourv2d, AArch64::qsub0);
   3722         return;
   3723       }
   3724       break;
   3725     case Intrinsic::aarch64_neon_ld2r:
   3726       if (VT == MVT::v8i8) {
   3727         SelectLoad(Node, 2, AArch64::LD2Rv8b, AArch64::dsub0);
   3728         return;
   3729       } else if (VT == MVT::v16i8) {
   3730         SelectLoad(Node, 2, AArch64::LD2Rv16b, AArch64::qsub0);
   3731         return;
   3732       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   3733         SelectLoad(Node, 2, AArch64::LD2Rv4h, AArch64::dsub0);
   3734         return;
   3735       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   3736         SelectLoad(Node, 2, AArch64::LD2Rv8h, AArch64::qsub0);
   3737         return;
   3738       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3739         SelectLoad(Node, 2, AArch64::LD2Rv2s, AArch64::dsub0);
   3740         return;
   3741       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3742         SelectLoad(Node, 2, AArch64::LD2Rv4s, AArch64::qsub0);
   3743         return;
   3744       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3745         SelectLoad(Node, 2, AArch64::LD2Rv1d, AArch64::dsub0);
   3746         return;
   3747       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3748         SelectLoad(Node, 2, AArch64::LD2Rv2d, AArch64::qsub0);
   3749         return;
   3750       }
   3751       break;
   3752     case Intrinsic::aarch64_neon_ld3r:
   3753       if (VT == MVT::v8i8) {
   3754         SelectLoad(Node, 3, AArch64::LD3Rv8b, AArch64::dsub0);
   3755         return;
   3756       } else if (VT == MVT::v16i8) {
   3757         SelectLoad(Node, 3, AArch64::LD3Rv16b, AArch64::qsub0);
   3758         return;
   3759       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   3760         SelectLoad(Node, 3, AArch64::LD3Rv4h, AArch64::dsub0);
   3761         return;
   3762       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   3763         SelectLoad(Node, 3, AArch64::LD3Rv8h, AArch64::qsub0);
   3764         return;
   3765       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3766         SelectLoad(Node, 3, AArch64::LD3Rv2s, AArch64::dsub0);
   3767         return;
   3768       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3769         SelectLoad(Node, 3, AArch64::LD3Rv4s, AArch64::qsub0);
   3770         return;
   3771       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3772         SelectLoad(Node, 3, AArch64::LD3Rv1d, AArch64::dsub0);
   3773         return;
   3774       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3775         SelectLoad(Node, 3, AArch64::LD3Rv2d, AArch64::qsub0);
   3776         return;
   3777       }
   3778       break;
   3779     case Intrinsic::aarch64_neon_ld4r:
   3780       if (VT == MVT::v8i8) {
   3781         SelectLoad(Node, 4, AArch64::LD4Rv8b, AArch64::dsub0);
   3782         return;
   3783       } else if (VT == MVT::v16i8) {
   3784         SelectLoad(Node, 4, AArch64::LD4Rv16b, AArch64::qsub0);
   3785         return;
   3786       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   3787         SelectLoad(Node, 4, AArch64::LD4Rv4h, AArch64::dsub0);
   3788         return;
   3789       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   3790         SelectLoad(Node, 4, AArch64::LD4Rv8h, AArch64::qsub0);
   3791         return;
   3792       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3793         SelectLoad(Node, 4, AArch64::LD4Rv2s, AArch64::dsub0);
   3794         return;
   3795       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3796         SelectLoad(Node, 4, AArch64::LD4Rv4s, AArch64::qsub0);
   3797         return;
   3798       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3799         SelectLoad(Node, 4, AArch64::LD4Rv1d, AArch64::dsub0);
   3800         return;
   3801       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3802         SelectLoad(Node, 4, AArch64::LD4Rv2d, AArch64::qsub0);
   3803         return;
   3804       }
   3805       break;
   3806     case Intrinsic::aarch64_neon_ld2lane:
   3807       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   3808         SelectLoadLane(Node, 2, AArch64::LD2i8);
   3809         return;
   3810       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3811                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   3812         SelectLoadLane(Node, 2, AArch64::LD2i16);
   3813         return;
   3814       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3815                  VT == MVT::v2f32) {
   3816         SelectLoadLane(Node, 2, AArch64::LD2i32);
   3817         return;
   3818       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3819                  VT == MVT::v1f64) {
   3820         SelectLoadLane(Node, 2, AArch64::LD2i64);
   3821         return;
   3822       }
   3823       break;
   3824     case Intrinsic::aarch64_neon_ld3lane:
   3825       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   3826         SelectLoadLane(Node, 3, AArch64::LD3i8);
   3827         return;
   3828       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3829                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   3830         SelectLoadLane(Node, 3, AArch64::LD3i16);
   3831         return;
   3832       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3833                  VT == MVT::v2f32) {
   3834         SelectLoadLane(Node, 3, AArch64::LD3i32);
   3835         return;
   3836       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3837                  VT == MVT::v1f64) {
   3838         SelectLoadLane(Node, 3, AArch64::LD3i64);
   3839         return;
   3840       }
   3841       break;
   3842     case Intrinsic::aarch64_neon_ld4lane:
   3843       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   3844         SelectLoadLane(Node, 4, AArch64::LD4i8);
   3845         return;
   3846       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3847                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   3848         SelectLoadLane(Node, 4, AArch64::LD4i16);
   3849         return;
   3850       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   3851                  VT == MVT::v2f32) {
   3852         SelectLoadLane(Node, 4, AArch64::LD4i32);
   3853         return;
   3854       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   3855                  VT == MVT::v1f64) {
   3856         SelectLoadLane(Node, 4, AArch64::LD4i64);
   3857         return;
   3858       }
   3859       break;
   3860     case Intrinsic::aarch64_ld64b:
   3861       SelectLoad(Node, 8, AArch64::LD64B, AArch64::x8sub_0);
   3862       return;
   3863     }
   3864   } break;
   3865   case ISD::INTRINSIC_WO_CHAIN: {
   3866     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(0))->getZExtValue();
   3867     switch (IntNo) {
   3868     default:
   3869       break;
   3870     case Intrinsic::aarch64_tagp:
   3871       SelectTagP(Node);
   3872       return;
   3873     case Intrinsic::aarch64_neon_tbl2:
   3874       SelectTable(Node, 2,
   3875                   VT == MVT::v8i8 ? AArch64::TBLv8i8Two : AArch64::TBLv16i8Two,
   3876                   false);
   3877       return;
   3878     case Intrinsic::aarch64_neon_tbl3:
   3879       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBLv8i8Three
   3880                                            : AArch64::TBLv16i8Three,
   3881                   false);
   3882       return;
   3883     case Intrinsic::aarch64_neon_tbl4:
   3884       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBLv8i8Four
   3885                                            : AArch64::TBLv16i8Four,
   3886                   false);
   3887       return;
   3888     case Intrinsic::aarch64_neon_tbx2:
   3889       SelectTable(Node, 2,
   3890                   VT == MVT::v8i8 ? AArch64::TBXv8i8Two : AArch64::TBXv16i8Two,
   3891                   true);
   3892       return;
   3893     case Intrinsic::aarch64_neon_tbx3:
   3894       SelectTable(Node, 3, VT == MVT::v8i8 ? AArch64::TBXv8i8Three
   3895                                            : AArch64::TBXv16i8Three,
   3896                   true);
   3897       return;
   3898     case Intrinsic::aarch64_neon_tbx4:
   3899       SelectTable(Node, 4, VT == MVT::v8i8 ? AArch64::TBXv8i8Four
   3900                                            : AArch64::TBXv16i8Four,
   3901                   true);
   3902       return;
   3903     case Intrinsic::aarch64_neon_smull:
   3904     case Intrinsic::aarch64_neon_umull:
   3905       if (tryMULLV64LaneV128(IntNo, Node))
   3906         return;
   3907       break;
   3908     case Intrinsic::swift_async_context_addr: {
   3909       SDLoc DL(Node);
   3910       CurDAG->SelectNodeTo(Node, AArch64::SUBXri, MVT::i64,
   3911                            CurDAG->getCopyFromReg(CurDAG->getEntryNode(), DL,
   3912                                                   AArch64::FP, MVT::i64),
   3913                            CurDAG->getTargetConstant(8, DL, MVT::i32),
   3914                            CurDAG->getTargetConstant(0, DL, MVT::i32));
   3915       auto &MF = CurDAG->getMachineFunction();
   3916       MF.getFrameInfo().setFrameAddressIsTaken(true);
   3917       MF.getInfo<AArch64FunctionInfo>()->setHasSwiftAsyncContext(true);
   3918       return;
   3919     }
   3920     }
   3921     break;
   3922   }
   3923   case ISD::INTRINSIC_VOID: {
   3924     unsigned IntNo = cast<ConstantSDNode>(Node->getOperand(1))->getZExtValue();
   3925     if (Node->getNumOperands() >= 3)
   3926       VT = Node->getOperand(2)->getValueType(0);
   3927     switch (IntNo) {
   3928     default:
   3929       break;
   3930     case Intrinsic::aarch64_neon_st1x2: {
   3931       if (VT == MVT::v8i8) {
   3932         SelectStore(Node, 2, AArch64::ST1Twov8b);
   3933         return;
   3934       } else if (VT == MVT::v16i8) {
   3935         SelectStore(Node, 2, AArch64::ST1Twov16b);
   3936         return;
   3937       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3938                  VT == MVT::v4bf16) {
   3939         SelectStore(Node, 2, AArch64::ST1Twov4h);
   3940         return;
   3941       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
   3942                  VT == MVT::v8bf16) {
   3943         SelectStore(Node, 2, AArch64::ST1Twov8h);
   3944         return;
   3945       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3946         SelectStore(Node, 2, AArch64::ST1Twov2s);
   3947         return;
   3948       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3949         SelectStore(Node, 2, AArch64::ST1Twov4s);
   3950         return;
   3951       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3952         SelectStore(Node, 2, AArch64::ST1Twov2d);
   3953         return;
   3954       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3955         SelectStore(Node, 2, AArch64::ST1Twov1d);
   3956         return;
   3957       }
   3958       break;
   3959     }
   3960     case Intrinsic::aarch64_neon_st1x3: {
   3961       if (VT == MVT::v8i8) {
   3962         SelectStore(Node, 3, AArch64::ST1Threev8b);
   3963         return;
   3964       } else if (VT == MVT::v16i8) {
   3965         SelectStore(Node, 3, AArch64::ST1Threev16b);
   3966         return;
   3967       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3968                  VT == MVT::v4bf16) {
   3969         SelectStore(Node, 3, AArch64::ST1Threev4h);
   3970         return;
   3971       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
   3972                  VT == MVT::v8bf16) {
   3973         SelectStore(Node, 3, AArch64::ST1Threev8h);
   3974         return;
   3975       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   3976         SelectStore(Node, 3, AArch64::ST1Threev2s);
   3977         return;
   3978       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   3979         SelectStore(Node, 3, AArch64::ST1Threev4s);
   3980         return;
   3981       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   3982         SelectStore(Node, 3, AArch64::ST1Threev2d);
   3983         return;
   3984       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   3985         SelectStore(Node, 3, AArch64::ST1Threev1d);
   3986         return;
   3987       }
   3988       break;
   3989     }
   3990     case Intrinsic::aarch64_neon_st1x4: {
   3991       if (VT == MVT::v8i8) {
   3992         SelectStore(Node, 4, AArch64::ST1Fourv8b);
   3993         return;
   3994       } else if (VT == MVT::v16i8) {
   3995         SelectStore(Node, 4, AArch64::ST1Fourv16b);
   3996         return;
   3997       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
   3998                  VT == MVT::v4bf16) {
   3999         SelectStore(Node, 4, AArch64::ST1Fourv4h);
   4000         return;
   4001       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
   4002                  VT == MVT::v8bf16) {
   4003         SelectStore(Node, 4, AArch64::ST1Fourv8h);
   4004         return;
   4005       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4006         SelectStore(Node, 4, AArch64::ST1Fourv2s);
   4007         return;
   4008       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4009         SelectStore(Node, 4, AArch64::ST1Fourv4s);
   4010         return;
   4011       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4012         SelectStore(Node, 4, AArch64::ST1Fourv2d);
   4013         return;
   4014       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4015         SelectStore(Node, 4, AArch64::ST1Fourv1d);
   4016         return;
   4017       }
   4018       break;
   4019     }
   4020     case Intrinsic::aarch64_neon_st2: {
   4021       if (VT == MVT::v8i8) {
   4022         SelectStore(Node, 2, AArch64::ST2Twov8b);
   4023         return;
   4024       } else if (VT == MVT::v16i8) {
   4025         SelectStore(Node, 2, AArch64::ST2Twov16b);
   4026         return;
   4027       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4028                  VT == MVT::v4bf16) {
   4029         SelectStore(Node, 2, AArch64::ST2Twov4h);
   4030         return;
   4031       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
   4032                  VT == MVT::v8bf16) {
   4033         SelectStore(Node, 2, AArch64::ST2Twov8h);
   4034         return;
   4035       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4036         SelectStore(Node, 2, AArch64::ST2Twov2s);
   4037         return;
   4038       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4039         SelectStore(Node, 2, AArch64::ST2Twov4s);
   4040         return;
   4041       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4042         SelectStore(Node, 2, AArch64::ST2Twov2d);
   4043         return;
   4044       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4045         SelectStore(Node, 2, AArch64::ST1Twov1d);
   4046         return;
   4047       }
   4048       break;
   4049     }
   4050     case Intrinsic::aarch64_neon_st3: {
   4051       if (VT == MVT::v8i8) {
   4052         SelectStore(Node, 3, AArch64::ST3Threev8b);
   4053         return;
   4054       } else if (VT == MVT::v16i8) {
   4055         SelectStore(Node, 3, AArch64::ST3Threev16b);
   4056         return;
   4057       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4058                  VT == MVT::v4bf16) {
   4059         SelectStore(Node, 3, AArch64::ST3Threev4h);
   4060         return;
   4061       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
   4062                  VT == MVT::v8bf16) {
   4063         SelectStore(Node, 3, AArch64::ST3Threev8h);
   4064         return;
   4065       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4066         SelectStore(Node, 3, AArch64::ST3Threev2s);
   4067         return;
   4068       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4069         SelectStore(Node, 3, AArch64::ST3Threev4s);
   4070         return;
   4071       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4072         SelectStore(Node, 3, AArch64::ST3Threev2d);
   4073         return;
   4074       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4075         SelectStore(Node, 3, AArch64::ST1Threev1d);
   4076         return;
   4077       }
   4078       break;
   4079     }
   4080     case Intrinsic::aarch64_neon_st4: {
   4081       if (VT == MVT::v8i8) {
   4082         SelectStore(Node, 4, AArch64::ST4Fourv8b);
   4083         return;
   4084       } else if (VT == MVT::v16i8) {
   4085         SelectStore(Node, 4, AArch64::ST4Fourv16b);
   4086         return;
   4087       } else if (VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4088                  VT == MVT::v4bf16) {
   4089         SelectStore(Node, 4, AArch64::ST4Fourv4h);
   4090         return;
   4091       } else if (VT == MVT::v8i16 || VT == MVT::v8f16 ||
   4092                  VT == MVT::v8bf16) {
   4093         SelectStore(Node, 4, AArch64::ST4Fourv8h);
   4094         return;
   4095       } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4096         SelectStore(Node, 4, AArch64::ST4Fourv2s);
   4097         return;
   4098       } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4099         SelectStore(Node, 4, AArch64::ST4Fourv4s);
   4100         return;
   4101       } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4102         SelectStore(Node, 4, AArch64::ST4Fourv2d);
   4103         return;
   4104       } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4105         SelectStore(Node, 4, AArch64::ST1Fourv1d);
   4106         return;
   4107       }
   4108       break;
   4109     }
   4110     case Intrinsic::aarch64_neon_st2lane: {
   4111       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4112         SelectStoreLane(Node, 2, AArch64::ST2i8);
   4113         return;
   4114       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4115                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4116         SelectStoreLane(Node, 2, AArch64::ST2i16);
   4117         return;
   4118       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4119                  VT == MVT::v2f32) {
   4120         SelectStoreLane(Node, 2, AArch64::ST2i32);
   4121         return;
   4122       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4123                  VT == MVT::v1f64) {
   4124         SelectStoreLane(Node, 2, AArch64::ST2i64);
   4125         return;
   4126       }
   4127       break;
   4128     }
   4129     case Intrinsic::aarch64_neon_st3lane: {
   4130       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4131         SelectStoreLane(Node, 3, AArch64::ST3i8);
   4132         return;
   4133       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4134                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4135         SelectStoreLane(Node, 3, AArch64::ST3i16);
   4136         return;
   4137       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4138                  VT == MVT::v2f32) {
   4139         SelectStoreLane(Node, 3, AArch64::ST3i32);
   4140         return;
   4141       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4142                  VT == MVT::v1f64) {
   4143         SelectStoreLane(Node, 3, AArch64::ST3i64);
   4144         return;
   4145       }
   4146       break;
   4147     }
   4148     case Intrinsic::aarch64_neon_st4lane: {
   4149       if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4150         SelectStoreLane(Node, 4, AArch64::ST4i8);
   4151         return;
   4152       } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4153                  VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4154         SelectStoreLane(Node, 4, AArch64::ST4i16);
   4155         return;
   4156       } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4157                  VT == MVT::v2f32) {
   4158         SelectStoreLane(Node, 4, AArch64::ST4i32);
   4159         return;
   4160       } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4161                  VT == MVT::v1f64) {
   4162         SelectStoreLane(Node, 4, AArch64::ST4i64);
   4163         return;
   4164       }
   4165       break;
   4166     }
   4167     case Intrinsic::aarch64_sve_st2: {
   4168       if (VT == MVT::nxv16i8) {
   4169         SelectPredicatedStore(Node, 2, 0, AArch64::ST2B, AArch64::ST2B_IMM);
   4170         return;
   4171       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
   4172                  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
   4173         SelectPredicatedStore(Node, 2, 1, AArch64::ST2H, AArch64::ST2H_IMM);
   4174         return;
   4175       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
   4176         SelectPredicatedStore(Node, 2, 2, AArch64::ST2W, AArch64::ST2W_IMM);
   4177         return;
   4178       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
   4179         SelectPredicatedStore(Node, 2, 3, AArch64::ST2D, AArch64::ST2D_IMM);
   4180         return;
   4181       }
   4182       break;
   4183     }
   4184     case Intrinsic::aarch64_sve_st3: {
   4185       if (VT == MVT::nxv16i8) {
   4186         SelectPredicatedStore(Node, 3, 0, AArch64::ST3B, AArch64::ST3B_IMM);
   4187         return;
   4188       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
   4189                  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
   4190         SelectPredicatedStore(Node, 3, 1, AArch64::ST3H, AArch64::ST3H_IMM);
   4191         return;
   4192       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
   4193         SelectPredicatedStore(Node, 3, 2, AArch64::ST3W, AArch64::ST3W_IMM);
   4194         return;
   4195       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
   4196         SelectPredicatedStore(Node, 3, 3, AArch64::ST3D, AArch64::ST3D_IMM);
   4197         return;
   4198       }
   4199       break;
   4200     }
   4201     case Intrinsic::aarch64_sve_st4: {
   4202       if (VT == MVT::nxv16i8) {
   4203         SelectPredicatedStore(Node, 4, 0, AArch64::ST4B, AArch64::ST4B_IMM);
   4204         return;
   4205       } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
   4206                  (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
   4207         SelectPredicatedStore(Node, 4, 1, AArch64::ST4H, AArch64::ST4H_IMM);
   4208         return;
   4209       } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
   4210         SelectPredicatedStore(Node, 4, 2, AArch64::ST4W, AArch64::ST4W_IMM);
   4211         return;
   4212       } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
   4213         SelectPredicatedStore(Node, 4, 3, AArch64::ST4D, AArch64::ST4D_IMM);
   4214         return;
   4215       }
   4216       break;
   4217     }
   4218     }
   4219     break;
   4220   }
   4221   case AArch64ISD::LD2post: {
   4222     if (VT == MVT::v8i8) {
   4223       SelectPostLoad(Node, 2, AArch64::LD2Twov8b_POST, AArch64::dsub0);
   4224       return;
   4225     } else if (VT == MVT::v16i8) {
   4226       SelectPostLoad(Node, 2, AArch64::LD2Twov16b_POST, AArch64::qsub0);
   4227       return;
   4228     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4229       SelectPostLoad(Node, 2, AArch64::LD2Twov4h_POST, AArch64::dsub0);
   4230       return;
   4231     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4232       SelectPostLoad(Node, 2, AArch64::LD2Twov8h_POST, AArch64::qsub0);
   4233       return;
   4234     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4235       SelectPostLoad(Node, 2, AArch64::LD2Twov2s_POST, AArch64::dsub0);
   4236       return;
   4237     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4238       SelectPostLoad(Node, 2, AArch64::LD2Twov4s_POST, AArch64::qsub0);
   4239       return;
   4240     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4241       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
   4242       return;
   4243     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4244       SelectPostLoad(Node, 2, AArch64::LD2Twov2d_POST, AArch64::qsub0);
   4245       return;
   4246     }
   4247     break;
   4248   }
   4249   case AArch64ISD::LD3post: {
   4250     if (VT == MVT::v8i8) {
   4251       SelectPostLoad(Node, 3, AArch64::LD3Threev8b_POST, AArch64::dsub0);
   4252       return;
   4253     } else if (VT == MVT::v16i8) {
   4254       SelectPostLoad(Node, 3, AArch64::LD3Threev16b_POST, AArch64::qsub0);
   4255       return;
   4256     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4257       SelectPostLoad(Node, 3, AArch64::LD3Threev4h_POST, AArch64::dsub0);
   4258       return;
   4259     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4260       SelectPostLoad(Node, 3, AArch64::LD3Threev8h_POST, AArch64::qsub0);
   4261       return;
   4262     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4263       SelectPostLoad(Node, 3, AArch64::LD3Threev2s_POST, AArch64::dsub0);
   4264       return;
   4265     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4266       SelectPostLoad(Node, 3, AArch64::LD3Threev4s_POST, AArch64::qsub0);
   4267       return;
   4268     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4269       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
   4270       return;
   4271     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4272       SelectPostLoad(Node, 3, AArch64::LD3Threev2d_POST, AArch64::qsub0);
   4273       return;
   4274     }
   4275     break;
   4276   }
   4277   case AArch64ISD::LD4post: {
   4278     if (VT == MVT::v8i8) {
   4279       SelectPostLoad(Node, 4, AArch64::LD4Fourv8b_POST, AArch64::dsub0);
   4280       return;
   4281     } else if (VT == MVT::v16i8) {
   4282       SelectPostLoad(Node, 4, AArch64::LD4Fourv16b_POST, AArch64::qsub0);
   4283       return;
   4284     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4285       SelectPostLoad(Node, 4, AArch64::LD4Fourv4h_POST, AArch64::dsub0);
   4286       return;
   4287     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4288       SelectPostLoad(Node, 4, AArch64::LD4Fourv8h_POST, AArch64::qsub0);
   4289       return;
   4290     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4291       SelectPostLoad(Node, 4, AArch64::LD4Fourv2s_POST, AArch64::dsub0);
   4292       return;
   4293     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4294       SelectPostLoad(Node, 4, AArch64::LD4Fourv4s_POST, AArch64::qsub0);
   4295       return;
   4296     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4297       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
   4298       return;
   4299     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4300       SelectPostLoad(Node, 4, AArch64::LD4Fourv2d_POST, AArch64::qsub0);
   4301       return;
   4302     }
   4303     break;
   4304   }
   4305   case AArch64ISD::LD1x2post: {
   4306     if (VT == MVT::v8i8) {
   4307       SelectPostLoad(Node, 2, AArch64::LD1Twov8b_POST, AArch64::dsub0);
   4308       return;
   4309     } else if (VT == MVT::v16i8) {
   4310       SelectPostLoad(Node, 2, AArch64::LD1Twov16b_POST, AArch64::qsub0);
   4311       return;
   4312     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4313       SelectPostLoad(Node, 2, AArch64::LD1Twov4h_POST, AArch64::dsub0);
   4314       return;
   4315     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4316       SelectPostLoad(Node, 2, AArch64::LD1Twov8h_POST, AArch64::qsub0);
   4317       return;
   4318     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4319       SelectPostLoad(Node, 2, AArch64::LD1Twov2s_POST, AArch64::dsub0);
   4320       return;
   4321     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4322       SelectPostLoad(Node, 2, AArch64::LD1Twov4s_POST, AArch64::qsub0);
   4323       return;
   4324     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4325       SelectPostLoad(Node, 2, AArch64::LD1Twov1d_POST, AArch64::dsub0);
   4326       return;
   4327     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4328       SelectPostLoad(Node, 2, AArch64::LD1Twov2d_POST, AArch64::qsub0);
   4329       return;
   4330     }
   4331     break;
   4332   }
   4333   case AArch64ISD::LD1x3post: {
   4334     if (VT == MVT::v8i8) {
   4335       SelectPostLoad(Node, 3, AArch64::LD1Threev8b_POST, AArch64::dsub0);
   4336       return;
   4337     } else if (VT == MVT::v16i8) {
   4338       SelectPostLoad(Node, 3, AArch64::LD1Threev16b_POST, AArch64::qsub0);
   4339       return;
   4340     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4341       SelectPostLoad(Node, 3, AArch64::LD1Threev4h_POST, AArch64::dsub0);
   4342       return;
   4343     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4344       SelectPostLoad(Node, 3, AArch64::LD1Threev8h_POST, AArch64::qsub0);
   4345       return;
   4346     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4347       SelectPostLoad(Node, 3, AArch64::LD1Threev2s_POST, AArch64::dsub0);
   4348       return;
   4349     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4350       SelectPostLoad(Node, 3, AArch64::LD1Threev4s_POST, AArch64::qsub0);
   4351       return;
   4352     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4353       SelectPostLoad(Node, 3, AArch64::LD1Threev1d_POST, AArch64::dsub0);
   4354       return;
   4355     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4356       SelectPostLoad(Node, 3, AArch64::LD1Threev2d_POST, AArch64::qsub0);
   4357       return;
   4358     }
   4359     break;
   4360   }
   4361   case AArch64ISD::LD1x4post: {
   4362     if (VT == MVT::v8i8) {
   4363       SelectPostLoad(Node, 4, AArch64::LD1Fourv8b_POST, AArch64::dsub0);
   4364       return;
   4365     } else if (VT == MVT::v16i8) {
   4366       SelectPostLoad(Node, 4, AArch64::LD1Fourv16b_POST, AArch64::qsub0);
   4367       return;
   4368     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4369       SelectPostLoad(Node, 4, AArch64::LD1Fourv4h_POST, AArch64::dsub0);
   4370       return;
   4371     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4372       SelectPostLoad(Node, 4, AArch64::LD1Fourv8h_POST, AArch64::qsub0);
   4373       return;
   4374     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4375       SelectPostLoad(Node, 4, AArch64::LD1Fourv2s_POST, AArch64::dsub0);
   4376       return;
   4377     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4378       SelectPostLoad(Node, 4, AArch64::LD1Fourv4s_POST, AArch64::qsub0);
   4379       return;
   4380     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4381       SelectPostLoad(Node, 4, AArch64::LD1Fourv1d_POST, AArch64::dsub0);
   4382       return;
   4383     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4384       SelectPostLoad(Node, 4, AArch64::LD1Fourv2d_POST, AArch64::qsub0);
   4385       return;
   4386     }
   4387     break;
   4388   }
   4389   case AArch64ISD::LD1DUPpost: {
   4390     if (VT == MVT::v8i8) {
   4391       SelectPostLoad(Node, 1, AArch64::LD1Rv8b_POST, AArch64::dsub0);
   4392       return;
   4393     } else if (VT == MVT::v16i8) {
   4394       SelectPostLoad(Node, 1, AArch64::LD1Rv16b_POST, AArch64::qsub0);
   4395       return;
   4396     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4397       SelectPostLoad(Node, 1, AArch64::LD1Rv4h_POST, AArch64::dsub0);
   4398       return;
   4399     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4400       SelectPostLoad(Node, 1, AArch64::LD1Rv8h_POST, AArch64::qsub0);
   4401       return;
   4402     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4403       SelectPostLoad(Node, 1, AArch64::LD1Rv2s_POST, AArch64::dsub0);
   4404       return;
   4405     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4406       SelectPostLoad(Node, 1, AArch64::LD1Rv4s_POST, AArch64::qsub0);
   4407       return;
   4408     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4409       SelectPostLoad(Node, 1, AArch64::LD1Rv1d_POST, AArch64::dsub0);
   4410       return;
   4411     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4412       SelectPostLoad(Node, 1, AArch64::LD1Rv2d_POST, AArch64::qsub0);
   4413       return;
   4414     }
   4415     break;
   4416   }
   4417   case AArch64ISD::LD2DUPpost: {
   4418     if (VT == MVT::v8i8) {
   4419       SelectPostLoad(Node, 2, AArch64::LD2Rv8b_POST, AArch64::dsub0);
   4420       return;
   4421     } else if (VT == MVT::v16i8) {
   4422       SelectPostLoad(Node, 2, AArch64::LD2Rv16b_POST, AArch64::qsub0);
   4423       return;
   4424     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4425       SelectPostLoad(Node, 2, AArch64::LD2Rv4h_POST, AArch64::dsub0);
   4426       return;
   4427     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4428       SelectPostLoad(Node, 2, AArch64::LD2Rv8h_POST, AArch64::qsub0);
   4429       return;
   4430     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4431       SelectPostLoad(Node, 2, AArch64::LD2Rv2s_POST, AArch64::dsub0);
   4432       return;
   4433     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4434       SelectPostLoad(Node, 2, AArch64::LD2Rv4s_POST, AArch64::qsub0);
   4435       return;
   4436     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4437       SelectPostLoad(Node, 2, AArch64::LD2Rv1d_POST, AArch64::dsub0);
   4438       return;
   4439     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4440       SelectPostLoad(Node, 2, AArch64::LD2Rv2d_POST, AArch64::qsub0);
   4441       return;
   4442     }
   4443     break;
   4444   }
   4445   case AArch64ISD::LD3DUPpost: {
   4446     if (VT == MVT::v8i8) {
   4447       SelectPostLoad(Node, 3, AArch64::LD3Rv8b_POST, AArch64::dsub0);
   4448       return;
   4449     } else if (VT == MVT::v16i8) {
   4450       SelectPostLoad(Node, 3, AArch64::LD3Rv16b_POST, AArch64::qsub0);
   4451       return;
   4452     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4453       SelectPostLoad(Node, 3, AArch64::LD3Rv4h_POST, AArch64::dsub0);
   4454       return;
   4455     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4456       SelectPostLoad(Node, 3, AArch64::LD3Rv8h_POST, AArch64::qsub0);
   4457       return;
   4458     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4459       SelectPostLoad(Node, 3, AArch64::LD3Rv2s_POST, AArch64::dsub0);
   4460       return;
   4461     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4462       SelectPostLoad(Node, 3, AArch64::LD3Rv4s_POST, AArch64::qsub0);
   4463       return;
   4464     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4465       SelectPostLoad(Node, 3, AArch64::LD3Rv1d_POST, AArch64::dsub0);
   4466       return;
   4467     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4468       SelectPostLoad(Node, 3, AArch64::LD3Rv2d_POST, AArch64::qsub0);
   4469       return;
   4470     }
   4471     break;
   4472   }
   4473   case AArch64ISD::LD4DUPpost: {
   4474     if (VT == MVT::v8i8) {
   4475       SelectPostLoad(Node, 4, AArch64::LD4Rv8b_POST, AArch64::dsub0);
   4476       return;
   4477     } else if (VT == MVT::v16i8) {
   4478       SelectPostLoad(Node, 4, AArch64::LD4Rv16b_POST, AArch64::qsub0);
   4479       return;
   4480     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4481       SelectPostLoad(Node, 4, AArch64::LD4Rv4h_POST, AArch64::dsub0);
   4482       return;
   4483     } else if (VT == MVT::v8i16 || VT == MVT::v8f16  || VT == MVT::v8bf16) {
   4484       SelectPostLoad(Node, 4, AArch64::LD4Rv8h_POST, AArch64::qsub0);
   4485       return;
   4486     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4487       SelectPostLoad(Node, 4, AArch64::LD4Rv2s_POST, AArch64::dsub0);
   4488       return;
   4489     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4490       SelectPostLoad(Node, 4, AArch64::LD4Rv4s_POST, AArch64::qsub0);
   4491       return;
   4492     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4493       SelectPostLoad(Node, 4, AArch64::LD4Rv1d_POST, AArch64::dsub0);
   4494       return;
   4495     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4496       SelectPostLoad(Node, 4, AArch64::LD4Rv2d_POST, AArch64::qsub0);
   4497       return;
   4498     }
   4499     break;
   4500   }
   4501   case AArch64ISD::LD1LANEpost: {
   4502     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4503       SelectPostLoadLane(Node, 1, AArch64::LD1i8_POST);
   4504       return;
   4505     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4506                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4507       SelectPostLoadLane(Node, 1, AArch64::LD1i16_POST);
   4508       return;
   4509     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4510                VT == MVT::v2f32) {
   4511       SelectPostLoadLane(Node, 1, AArch64::LD1i32_POST);
   4512       return;
   4513     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4514                VT == MVT::v1f64) {
   4515       SelectPostLoadLane(Node, 1, AArch64::LD1i64_POST);
   4516       return;
   4517     }
   4518     break;
   4519   }
   4520   case AArch64ISD::LD2LANEpost: {
   4521     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4522       SelectPostLoadLane(Node, 2, AArch64::LD2i8_POST);
   4523       return;
   4524     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4525                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4526       SelectPostLoadLane(Node, 2, AArch64::LD2i16_POST);
   4527       return;
   4528     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4529                VT == MVT::v2f32) {
   4530       SelectPostLoadLane(Node, 2, AArch64::LD2i32_POST);
   4531       return;
   4532     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4533                VT == MVT::v1f64) {
   4534       SelectPostLoadLane(Node, 2, AArch64::LD2i64_POST);
   4535       return;
   4536     }
   4537     break;
   4538   }
   4539   case AArch64ISD::LD3LANEpost: {
   4540     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4541       SelectPostLoadLane(Node, 3, AArch64::LD3i8_POST);
   4542       return;
   4543     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4544                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4545       SelectPostLoadLane(Node, 3, AArch64::LD3i16_POST);
   4546       return;
   4547     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4548                VT == MVT::v2f32) {
   4549       SelectPostLoadLane(Node, 3, AArch64::LD3i32_POST);
   4550       return;
   4551     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4552                VT == MVT::v1f64) {
   4553       SelectPostLoadLane(Node, 3, AArch64::LD3i64_POST);
   4554       return;
   4555     }
   4556     break;
   4557   }
   4558   case AArch64ISD::LD4LANEpost: {
   4559     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4560       SelectPostLoadLane(Node, 4, AArch64::LD4i8_POST);
   4561       return;
   4562     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4563                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4564       SelectPostLoadLane(Node, 4, AArch64::LD4i16_POST);
   4565       return;
   4566     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4567                VT == MVT::v2f32) {
   4568       SelectPostLoadLane(Node, 4, AArch64::LD4i32_POST);
   4569       return;
   4570     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4571                VT == MVT::v1f64) {
   4572       SelectPostLoadLane(Node, 4, AArch64::LD4i64_POST);
   4573       return;
   4574     }
   4575     break;
   4576   }
   4577   case AArch64ISD::ST2post: {
   4578     VT = Node->getOperand(1).getValueType();
   4579     if (VT == MVT::v8i8) {
   4580       SelectPostStore(Node, 2, AArch64::ST2Twov8b_POST);
   4581       return;
   4582     } else if (VT == MVT::v16i8) {
   4583       SelectPostStore(Node, 2, AArch64::ST2Twov16b_POST);
   4584       return;
   4585     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4586       SelectPostStore(Node, 2, AArch64::ST2Twov4h_POST);
   4587       return;
   4588     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   4589       SelectPostStore(Node, 2, AArch64::ST2Twov8h_POST);
   4590       return;
   4591     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4592       SelectPostStore(Node, 2, AArch64::ST2Twov2s_POST);
   4593       return;
   4594     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4595       SelectPostStore(Node, 2, AArch64::ST2Twov4s_POST);
   4596       return;
   4597     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4598       SelectPostStore(Node, 2, AArch64::ST2Twov2d_POST);
   4599       return;
   4600     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4601       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
   4602       return;
   4603     }
   4604     break;
   4605   }
   4606   case AArch64ISD::ST3post: {
   4607     VT = Node->getOperand(1).getValueType();
   4608     if (VT == MVT::v8i8) {
   4609       SelectPostStore(Node, 3, AArch64::ST3Threev8b_POST);
   4610       return;
   4611     } else if (VT == MVT::v16i8) {
   4612       SelectPostStore(Node, 3, AArch64::ST3Threev16b_POST);
   4613       return;
   4614     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4615       SelectPostStore(Node, 3, AArch64::ST3Threev4h_POST);
   4616       return;
   4617     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   4618       SelectPostStore(Node, 3, AArch64::ST3Threev8h_POST);
   4619       return;
   4620     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4621       SelectPostStore(Node, 3, AArch64::ST3Threev2s_POST);
   4622       return;
   4623     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4624       SelectPostStore(Node, 3, AArch64::ST3Threev4s_POST);
   4625       return;
   4626     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4627       SelectPostStore(Node, 3, AArch64::ST3Threev2d_POST);
   4628       return;
   4629     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4630       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
   4631       return;
   4632     }
   4633     break;
   4634   }
   4635   case AArch64ISD::ST4post: {
   4636     VT = Node->getOperand(1).getValueType();
   4637     if (VT == MVT::v8i8) {
   4638       SelectPostStore(Node, 4, AArch64::ST4Fourv8b_POST);
   4639       return;
   4640     } else if (VT == MVT::v16i8) {
   4641       SelectPostStore(Node, 4, AArch64::ST4Fourv16b_POST);
   4642       return;
   4643     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4644       SelectPostStore(Node, 4, AArch64::ST4Fourv4h_POST);
   4645       return;
   4646     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   4647       SelectPostStore(Node, 4, AArch64::ST4Fourv8h_POST);
   4648       return;
   4649     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4650       SelectPostStore(Node, 4, AArch64::ST4Fourv2s_POST);
   4651       return;
   4652     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4653       SelectPostStore(Node, 4, AArch64::ST4Fourv4s_POST);
   4654       return;
   4655     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4656       SelectPostStore(Node, 4, AArch64::ST4Fourv2d_POST);
   4657       return;
   4658     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4659       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
   4660       return;
   4661     }
   4662     break;
   4663   }
   4664   case AArch64ISD::ST1x2post: {
   4665     VT = Node->getOperand(1).getValueType();
   4666     if (VT == MVT::v8i8) {
   4667       SelectPostStore(Node, 2, AArch64::ST1Twov8b_POST);
   4668       return;
   4669     } else if (VT == MVT::v16i8) {
   4670       SelectPostStore(Node, 2, AArch64::ST1Twov16b_POST);
   4671       return;
   4672     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4673       SelectPostStore(Node, 2, AArch64::ST1Twov4h_POST);
   4674       return;
   4675     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   4676       SelectPostStore(Node, 2, AArch64::ST1Twov8h_POST);
   4677       return;
   4678     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4679       SelectPostStore(Node, 2, AArch64::ST1Twov2s_POST);
   4680       return;
   4681     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4682       SelectPostStore(Node, 2, AArch64::ST1Twov4s_POST);
   4683       return;
   4684     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4685       SelectPostStore(Node, 2, AArch64::ST1Twov1d_POST);
   4686       return;
   4687     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4688       SelectPostStore(Node, 2, AArch64::ST1Twov2d_POST);
   4689       return;
   4690     }
   4691     break;
   4692   }
   4693   case AArch64ISD::ST1x3post: {
   4694     VT = Node->getOperand(1).getValueType();
   4695     if (VT == MVT::v8i8) {
   4696       SelectPostStore(Node, 3, AArch64::ST1Threev8b_POST);
   4697       return;
   4698     } else if (VT == MVT::v16i8) {
   4699       SelectPostStore(Node, 3, AArch64::ST1Threev16b_POST);
   4700       return;
   4701     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4702       SelectPostStore(Node, 3, AArch64::ST1Threev4h_POST);
   4703       return;
   4704     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16 ) {
   4705       SelectPostStore(Node, 3, AArch64::ST1Threev8h_POST);
   4706       return;
   4707     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4708       SelectPostStore(Node, 3, AArch64::ST1Threev2s_POST);
   4709       return;
   4710     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4711       SelectPostStore(Node, 3, AArch64::ST1Threev4s_POST);
   4712       return;
   4713     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4714       SelectPostStore(Node, 3, AArch64::ST1Threev1d_POST);
   4715       return;
   4716     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4717       SelectPostStore(Node, 3, AArch64::ST1Threev2d_POST);
   4718       return;
   4719     }
   4720     break;
   4721   }
   4722   case AArch64ISD::ST1x4post: {
   4723     VT = Node->getOperand(1).getValueType();
   4724     if (VT == MVT::v8i8) {
   4725       SelectPostStore(Node, 4, AArch64::ST1Fourv8b_POST);
   4726       return;
   4727     } else if (VT == MVT::v16i8) {
   4728       SelectPostStore(Node, 4, AArch64::ST1Fourv16b_POST);
   4729       return;
   4730     } else if (VT == MVT::v4i16 || VT == MVT::v4f16 || VT == MVT::v4bf16) {
   4731       SelectPostStore(Node, 4, AArch64::ST1Fourv4h_POST);
   4732       return;
   4733     } else if (VT == MVT::v8i16 || VT == MVT::v8f16 || VT == MVT::v8bf16) {
   4734       SelectPostStore(Node, 4, AArch64::ST1Fourv8h_POST);
   4735       return;
   4736     } else if (VT == MVT::v2i32 || VT == MVT::v2f32) {
   4737       SelectPostStore(Node, 4, AArch64::ST1Fourv2s_POST);
   4738       return;
   4739     } else if (VT == MVT::v4i32 || VT == MVT::v4f32) {
   4740       SelectPostStore(Node, 4, AArch64::ST1Fourv4s_POST);
   4741       return;
   4742     } else if (VT == MVT::v1i64 || VT == MVT::v1f64) {
   4743       SelectPostStore(Node, 4, AArch64::ST1Fourv1d_POST);
   4744       return;
   4745     } else if (VT == MVT::v2i64 || VT == MVT::v2f64) {
   4746       SelectPostStore(Node, 4, AArch64::ST1Fourv2d_POST);
   4747       return;
   4748     }
   4749     break;
   4750   }
   4751   case AArch64ISD::ST2LANEpost: {
   4752     VT = Node->getOperand(1).getValueType();
   4753     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4754       SelectPostStoreLane(Node, 2, AArch64::ST2i8_POST);
   4755       return;
   4756     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4757                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4758       SelectPostStoreLane(Node, 2, AArch64::ST2i16_POST);
   4759       return;
   4760     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4761                VT == MVT::v2f32) {
   4762       SelectPostStoreLane(Node, 2, AArch64::ST2i32_POST);
   4763       return;
   4764     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4765                VT == MVT::v1f64) {
   4766       SelectPostStoreLane(Node, 2, AArch64::ST2i64_POST);
   4767       return;
   4768     }
   4769     break;
   4770   }
   4771   case AArch64ISD::ST3LANEpost: {
   4772     VT = Node->getOperand(1).getValueType();
   4773     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4774       SelectPostStoreLane(Node, 3, AArch64::ST3i8_POST);
   4775       return;
   4776     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4777                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4778       SelectPostStoreLane(Node, 3, AArch64::ST3i16_POST);
   4779       return;
   4780     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4781                VT == MVT::v2f32) {
   4782       SelectPostStoreLane(Node, 3, AArch64::ST3i32_POST);
   4783       return;
   4784     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4785                VT == MVT::v1f64) {
   4786       SelectPostStoreLane(Node, 3, AArch64::ST3i64_POST);
   4787       return;
   4788     }
   4789     break;
   4790   }
   4791   case AArch64ISD::ST4LANEpost: {
   4792     VT = Node->getOperand(1).getValueType();
   4793     if (VT == MVT::v16i8 || VT == MVT::v8i8) {
   4794       SelectPostStoreLane(Node, 4, AArch64::ST4i8_POST);
   4795       return;
   4796     } else if (VT == MVT::v8i16 || VT == MVT::v4i16 || VT == MVT::v4f16 ||
   4797                VT == MVT::v8f16 || VT == MVT::v4bf16 || VT == MVT::v8bf16) {
   4798       SelectPostStoreLane(Node, 4, AArch64::ST4i16_POST);
   4799       return;
   4800     } else if (VT == MVT::v4i32 || VT == MVT::v2i32 || VT == MVT::v4f32 ||
   4801                VT == MVT::v2f32) {
   4802       SelectPostStoreLane(Node, 4, AArch64::ST4i32_POST);
   4803       return;
   4804     } else if (VT == MVT::v2i64 || VT == MVT::v1i64 || VT == MVT::v2f64 ||
   4805                VT == MVT::v1f64) {
   4806       SelectPostStoreLane(Node, 4, AArch64::ST4i64_POST);
   4807       return;
   4808     }
   4809     break;
   4810   }
   4811   case AArch64ISD::SVE_LD2_MERGE_ZERO: {
   4812     if (VT == MVT::nxv16i8) {
   4813       SelectPredicatedLoad(Node, 2, 0, AArch64::LD2B_IMM, AArch64::LD2B);
   4814       return;
   4815     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
   4816                (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
   4817       SelectPredicatedLoad(Node, 2, 1, AArch64::LD2H_IMM, AArch64::LD2H);
   4818       return;
   4819     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
   4820       SelectPredicatedLoad(Node, 2, 2, AArch64::LD2W_IMM, AArch64::LD2W);
   4821       return;
   4822     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
   4823       SelectPredicatedLoad(Node, 2, 3, AArch64::LD2D_IMM, AArch64::LD2D);
   4824       return;
   4825     }
   4826     break;
   4827   }
   4828   case AArch64ISD::SVE_LD3_MERGE_ZERO: {
   4829     if (VT == MVT::nxv16i8) {
   4830       SelectPredicatedLoad(Node, 3, 0, AArch64::LD3B_IMM, AArch64::LD3B);
   4831       return;
   4832     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
   4833                (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
   4834       SelectPredicatedLoad(Node, 3, 1, AArch64::LD3H_IMM, AArch64::LD3H);
   4835       return;
   4836     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
   4837       SelectPredicatedLoad(Node, 3, 2, AArch64::LD3W_IMM, AArch64::LD3W);
   4838       return;
   4839     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
   4840       SelectPredicatedLoad(Node, 3, 3, AArch64::LD3D_IMM, AArch64::LD3D);
   4841       return;
   4842     }
   4843     break;
   4844   }
   4845   case AArch64ISD::SVE_LD4_MERGE_ZERO: {
   4846     if (VT == MVT::nxv16i8) {
   4847       SelectPredicatedLoad(Node, 4, 0, AArch64::LD4B_IMM, AArch64::LD4B);
   4848       return;
   4849     } else if (VT == MVT::nxv8i16 || VT == MVT::nxv8f16 ||
   4850                (VT == MVT::nxv8bf16 && Subtarget->hasBF16())) {
   4851       SelectPredicatedLoad(Node, 4, 1, AArch64::LD4H_IMM, AArch64::LD4H);
   4852       return;
   4853     } else if (VT == MVT::nxv4i32 || VT == MVT::nxv4f32) {
   4854       SelectPredicatedLoad(Node, 4, 2, AArch64::LD4W_IMM, AArch64::LD4W);
   4855       return;
   4856     } else if (VT == MVT::nxv2i64 || VT == MVT::nxv2f64) {
   4857       SelectPredicatedLoad(Node, 4, 3, AArch64::LD4D_IMM, AArch64::LD4D);
   4858       return;
   4859     }
   4860     break;
   4861   }
   4862   }
   4863 
   4864   // Select the default instruction
   4865   SelectCode(Node);
   4866 }
   4867 
   4868 /// createAArch64ISelDag - This pass converts a legalized DAG into a
   4869 /// AArch64-specific DAG, ready for instruction scheduling.
   4870 FunctionPass *llvm::createAArch64ISelDag(AArch64TargetMachine &TM,
   4871                                          CodeGenOpt::Level OptLevel) {
   4872   return new AArch64DAGToDAGISel(TM, OptLevel);
   4873 }
   4874 
   4875 /// When \p PredVT is a scalable vector predicate in the form
   4876 /// MVT::nx<M>xi1, it builds the correspondent scalable vector of
   4877 /// integers MVT::nx<M>xi<bits> s.t. M x bits = 128. When targeting
   4878 /// structured vectors (NumVec >1), the output data type is
   4879 /// MVT::nx<M*NumVec>xi<bits> s.t. M x bits = 128. If the input
   4880 /// PredVT is not in the form MVT::nx<M>xi1, it returns an invalid
   4881 /// EVT.
   4882 static EVT getPackedVectorTypeFromPredicateType(LLVMContext &Ctx, EVT PredVT,
   4883                                                 unsigned NumVec) {
   4884   assert(NumVec > 0 && NumVec < 5 && "Invalid number of vectors.");
   4885   if (!PredVT.isScalableVector() || PredVT.getVectorElementType() != MVT::i1)
   4886     return EVT();
   4887 
   4888   if (PredVT != MVT::nxv16i1 && PredVT != MVT::nxv8i1 &&
   4889       PredVT != MVT::nxv4i1 && PredVT != MVT::nxv2i1)
   4890     return EVT();
   4891 
   4892   ElementCount EC = PredVT.getVectorElementCount();
   4893   EVT ScalarVT =
   4894       EVT::getIntegerVT(Ctx, AArch64::SVEBitsPerBlock / EC.getKnownMinValue());
   4895   EVT MemVT = EVT::getVectorVT(Ctx, ScalarVT, EC * NumVec);
   4896 
   4897   return MemVT;
   4898 }
   4899 
   4900 /// Return the EVT of the data associated to a memory operation in \p
   4901 /// Root. If such EVT cannot be retrived, it returns an invalid EVT.
   4902 static EVT getMemVTFromNode(LLVMContext &Ctx, SDNode *Root) {
   4903   if (isa<MemSDNode>(Root))
   4904     return cast<MemSDNode>(Root)->getMemoryVT();
   4905 
   4906   if (isa<MemIntrinsicSDNode>(Root))
   4907     return cast<MemIntrinsicSDNode>(Root)->getMemoryVT();
   4908 
   4909   const unsigned Opcode = Root->getOpcode();
   4910   // For custom ISD nodes, we have to look at them individually to extract the
   4911   // type of the data moved to/from memory.
   4912   switch (Opcode) {
   4913   case AArch64ISD::LD1_MERGE_ZERO:
   4914   case AArch64ISD::LD1S_MERGE_ZERO:
   4915   case AArch64ISD::LDNF1_MERGE_ZERO:
   4916   case AArch64ISD::LDNF1S_MERGE_ZERO:
   4917     return cast<VTSDNode>(Root->getOperand(3))->getVT();
   4918   case AArch64ISD::ST1_PRED:
   4919     return cast<VTSDNode>(Root->getOperand(4))->getVT();
   4920   case AArch64ISD::SVE_LD2_MERGE_ZERO:
   4921     return getPackedVectorTypeFromPredicateType(
   4922         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/2);
   4923   case AArch64ISD::SVE_LD3_MERGE_ZERO:
   4924     return getPackedVectorTypeFromPredicateType(
   4925         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/3);
   4926   case AArch64ISD::SVE_LD4_MERGE_ZERO:
   4927     return getPackedVectorTypeFromPredicateType(
   4928         Ctx, Root->getOperand(1)->getValueType(0), /*NumVec=*/4);
   4929   default:
   4930     break;
   4931   }
   4932 
   4933   if (Opcode != ISD::INTRINSIC_VOID)
   4934     return EVT();
   4935 
   4936   const unsigned IntNo =
   4937       cast<ConstantSDNode>(Root->getOperand(1))->getZExtValue();
   4938   if (IntNo != Intrinsic::aarch64_sve_prf)
   4939     return EVT();
   4940 
   4941   // We are using an SVE prefetch intrinsic. Type must be inferred
   4942   // from the width of the predicate.
   4943   return getPackedVectorTypeFromPredicateType(
   4944       Ctx, Root->getOperand(2)->getValueType(0), /*NumVec=*/1);
   4945 }
   4946 
   4947 /// SelectAddrModeIndexedSVE - Attempt selection of the addressing mode:
   4948 /// Base + OffImm * sizeof(MemVT) for Min >= OffImm <= Max
   4949 /// where Root is the memory access using N for its address.
   4950 template <int64_t Min, int64_t Max>
   4951 bool AArch64DAGToDAGISel::SelectAddrModeIndexedSVE(SDNode *Root, SDValue N,
   4952                                                    SDValue &Base,
   4953                                                    SDValue &OffImm) {
   4954   const EVT MemVT = getMemVTFromNode(*(CurDAG->getContext()), Root);
   4955 
   4956   if (MemVT == EVT())
   4957     return false;
   4958 
   4959   if (N.getOpcode() != ISD::ADD)
   4960     return false;
   4961 
   4962   SDValue VScale = N.getOperand(1);
   4963   if (VScale.getOpcode() != ISD::VSCALE)
   4964     return false;
   4965 
   4966   TypeSize TS = MemVT.getSizeInBits();
   4967   int64_t MemWidthBytes = static_cast<int64_t>(TS.getKnownMinSize()) / 8;
   4968   int64_t MulImm = cast<ConstantSDNode>(VScale.getOperand(0))->getSExtValue();
   4969 
   4970   if ((MulImm % MemWidthBytes) != 0)
   4971     return false;
   4972 
   4973   int64_t Offset = MulImm / MemWidthBytes;
   4974   if (Offset < Min || Offset > Max)
   4975     return false;
   4976 
   4977   Base = N.getOperand(0);
   4978   OffImm = CurDAG->getTargetConstant(Offset, SDLoc(N), MVT::i64);
   4979   return true;
   4980 }
   4981 
   4982 /// Select register plus register addressing mode for SVE, with scaled
   4983 /// offset.
   4984 bool AArch64DAGToDAGISel::SelectSVERegRegAddrMode(SDValue N, unsigned Scale,
   4985                                                   SDValue &Base,
   4986                                                   SDValue &Offset) {
   4987   if (N.getOpcode() != ISD::ADD)
   4988     return false;
   4989 
   4990   // Process an ADD node.
   4991   const SDValue LHS = N.getOperand(0);
   4992   const SDValue RHS = N.getOperand(1);
   4993 
   4994   // 8 bit data does not come with the SHL node, so it is treated
   4995   // separately.
   4996   if (Scale == 0) {
   4997     Base = LHS;
   4998     Offset = RHS;
   4999     return true;
   5000   }
   5001 
   5002   // Check if the RHS is a shift node with a constant.
   5003   if (RHS.getOpcode() != ISD::SHL)
   5004     return false;
   5005 
   5006   const SDValue ShiftRHS = RHS.getOperand(1);
   5007   if (auto *C = dyn_cast<ConstantSDNode>(ShiftRHS))
   5008     if (C->getZExtValue() == Scale) {
   5009       Base = LHS;
   5010       Offset = RHS.getOperand(0);
   5011       return true;
   5012     }
   5013 
   5014   return false;
   5015 }
   5016 
   5017 bool AArch64DAGToDAGISel::SelectAllActivePredicate(SDValue N) {
   5018   const AArch64TargetLowering *TLI =
   5019       static_cast<const AArch64TargetLowering *>(getTargetLowering());
   5020 
   5021   return TLI->isAllActivePredicate(N);
   5022 }
   5023