Home | History | Annotate | Line # | Download | only in RISCV
      1 //===-- RISCVISelLowering.cpp - RISCV DAG Lowering Implementation  --------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file defines the interfaces that RISCV uses to lower LLVM code into a
     10 // selection DAG.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "RISCVISelLowering.h"
     15 #include "MCTargetDesc/RISCVMatInt.h"
     16 #include "RISCV.h"
     17 #include "RISCVMachineFunctionInfo.h"
     18 #include "RISCVRegisterInfo.h"
     19 #include "RISCVSubtarget.h"
     20 #include "RISCVTargetMachine.h"
     21 #include "llvm/ADT/SmallSet.h"
     22 #include "llvm/ADT/Statistic.h"
     23 #include "llvm/CodeGen/CallingConvLower.h"
     24 #include "llvm/CodeGen/MachineFrameInfo.h"
     25 #include "llvm/CodeGen/MachineFunction.h"
     26 #include "llvm/CodeGen/MachineInstrBuilder.h"
     27 #include "llvm/CodeGen/MachineRegisterInfo.h"
     28 #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
     29 #include "llvm/CodeGen/ValueTypes.h"
     30 #include "llvm/IR/DiagnosticInfo.h"
     31 #include "llvm/IR/DiagnosticPrinter.h"
     32 #include "llvm/IR/IntrinsicsRISCV.h"
     33 #include "llvm/Support/Debug.h"
     34 #include "llvm/Support/ErrorHandling.h"
     35 #include "llvm/Support/KnownBits.h"
     36 #include "llvm/Support/MathExtras.h"
     37 #include "llvm/Support/raw_ostream.h"
     38 
     39 using namespace llvm;
     40 
     41 #define DEBUG_TYPE "riscv-lower"
     42 
     43 STATISTIC(NumTailCalls, "Number of tail calls");
     44 
     45 RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     46                                          const RISCVSubtarget &STI)
     47     : TargetLowering(TM), Subtarget(STI) {
     48 
     49   if (Subtarget.isRV32E())
     50     report_fatal_error("Codegen not yet implemented for RV32E");
     51 
     52   RISCVABI::ABI ABI = Subtarget.getTargetABI();
     53   assert(ABI != RISCVABI::ABI_Unknown && "Improperly initialised target ABI");
     54 
     55   if ((ABI == RISCVABI::ABI_ILP32F || ABI == RISCVABI::ABI_LP64F) &&
     56       !Subtarget.hasStdExtF()) {
     57     errs() << "Hard-float 'f' ABI can't be used for a target that "
     58                 "doesn't support the F instruction set extension (ignoring "
     59                           "target-abi)\n";
     60     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
     61   } else if ((ABI == RISCVABI::ABI_ILP32D || ABI == RISCVABI::ABI_LP64D) &&
     62              !Subtarget.hasStdExtD()) {
     63     errs() << "Hard-float 'd' ABI can't be used for a target that "
     64               "doesn't support the D instruction set extension (ignoring "
     65               "target-abi)\n";
     66     ABI = Subtarget.is64Bit() ? RISCVABI::ABI_LP64 : RISCVABI::ABI_ILP32;
     67   }
     68 
     69   switch (ABI) {
     70   default:
     71     report_fatal_error("Don't know how to lower this ABI");
     72   case RISCVABI::ABI_ILP32:
     73   case RISCVABI::ABI_ILP32F:
     74   case RISCVABI::ABI_ILP32D:
     75   case RISCVABI::ABI_LP64:
     76   case RISCVABI::ABI_LP64F:
     77   case RISCVABI::ABI_LP64D:
     78     break;
     79   }
     80 
     81   MVT XLenVT = Subtarget.getXLenVT();
     82 
     83   // Set up the register classes.
     84   addRegisterClass(XLenVT, &RISCV::GPRRegClass);
     85 
     86   if (Subtarget.hasStdExtZfh())
     87     addRegisterClass(MVT::f16, &RISCV::FPR16RegClass);
     88   if (Subtarget.hasStdExtF())
     89     addRegisterClass(MVT::f32, &RISCV::FPR32RegClass);
     90   if (Subtarget.hasStdExtD())
     91     addRegisterClass(MVT::f64, &RISCV::FPR64RegClass);
     92 
     93   static const MVT::SimpleValueType BoolVecVTs[] = {
     94       MVT::nxv1i1,  MVT::nxv2i1,  MVT::nxv4i1, MVT::nxv8i1,
     95       MVT::nxv16i1, MVT::nxv32i1, MVT::nxv64i1};
     96   static const MVT::SimpleValueType IntVecVTs[] = {
     97       MVT::nxv1i8,  MVT::nxv2i8,   MVT::nxv4i8,   MVT::nxv8i8,  MVT::nxv16i8,
     98       MVT::nxv32i8, MVT::nxv64i8,  MVT::nxv1i16,  MVT::nxv2i16, MVT::nxv4i16,
     99       MVT::nxv8i16, MVT::nxv16i16, MVT::nxv32i16, MVT::nxv1i32, MVT::nxv2i32,
    100       MVT::nxv4i32, MVT::nxv8i32,  MVT::nxv16i32, MVT::nxv1i64, MVT::nxv2i64,
    101       MVT::nxv4i64, MVT::nxv8i64};
    102   static const MVT::SimpleValueType F16VecVTs[] = {
    103       MVT::nxv1f16, MVT::nxv2f16,  MVT::nxv4f16,
    104       MVT::nxv8f16, MVT::nxv16f16, MVT::nxv32f16};
    105   static const MVT::SimpleValueType F32VecVTs[] = {
    106       MVT::nxv1f32, MVT::nxv2f32, MVT::nxv4f32, MVT::nxv8f32, MVT::nxv16f32};
    107   static const MVT::SimpleValueType F64VecVTs[] = {
    108       MVT::nxv1f64, MVT::nxv2f64, MVT::nxv4f64, MVT::nxv8f64};
    109 
    110   if (Subtarget.hasStdExtV()) {
    111     auto addRegClassForRVV = [this](MVT VT) {
    112       unsigned Size = VT.getSizeInBits().getKnownMinValue();
    113       assert(Size <= 512 && isPowerOf2_32(Size));
    114       const TargetRegisterClass *RC;
    115       if (Size <= 64)
    116         RC = &RISCV::VRRegClass;
    117       else if (Size == 128)
    118         RC = &RISCV::VRM2RegClass;
    119       else if (Size == 256)
    120         RC = &RISCV::VRM4RegClass;
    121       else
    122         RC = &RISCV::VRM8RegClass;
    123 
    124       addRegisterClass(VT, RC);
    125     };
    126 
    127     for (MVT VT : BoolVecVTs)
    128       addRegClassForRVV(VT);
    129     for (MVT VT : IntVecVTs)
    130       addRegClassForRVV(VT);
    131 
    132     if (Subtarget.hasStdExtZfh())
    133       for (MVT VT : F16VecVTs)
    134         addRegClassForRVV(VT);
    135 
    136     if (Subtarget.hasStdExtF())
    137       for (MVT VT : F32VecVTs)
    138         addRegClassForRVV(VT);
    139 
    140     if (Subtarget.hasStdExtD())
    141       for (MVT VT : F64VecVTs)
    142         addRegClassForRVV(VT);
    143 
    144     if (Subtarget.useRVVForFixedLengthVectors()) {
    145       auto addRegClassForFixedVectors = [this](MVT VT) {
    146         MVT ContainerVT = getContainerForFixedLengthVector(VT);
    147         unsigned RCID = getRegClassIDForVecVT(ContainerVT);
    148         const RISCVRegisterInfo &TRI = *Subtarget.getRegisterInfo();
    149         addRegisterClass(VT, TRI.getRegClass(RCID));
    150       };
    151       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes())
    152         if (useRVVForFixedLengthVectorVT(VT))
    153           addRegClassForFixedVectors(VT);
    154 
    155       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes())
    156         if (useRVVForFixedLengthVectorVT(VT))
    157           addRegClassForFixedVectors(VT);
    158     }
    159   }
    160 
    161   // Compute derived properties from the register classes.
    162   computeRegisterProperties(STI.getRegisterInfo());
    163 
    164   setStackPointerRegisterToSaveRestore(RISCV::X2);
    165 
    166   for (auto N : {ISD::EXTLOAD, ISD::SEXTLOAD, ISD::ZEXTLOAD})
    167     setLoadExtAction(N, XLenVT, MVT::i1, Promote);
    168 
    169   // TODO: add all necessary setOperationAction calls.
    170   setOperationAction(ISD::DYNAMIC_STACKALLOC, XLenVT, Expand);
    171 
    172   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
    173   setOperationAction(ISD::BR_CC, XLenVT, Expand);
    174   setOperationAction(ISD::BRCOND, MVT::Other, Custom);
    175   setOperationAction(ISD::SELECT_CC, XLenVT, Expand);
    176 
    177   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
    178   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
    179 
    180   setOperationAction(ISD::VASTART, MVT::Other, Custom);
    181   setOperationAction(ISD::VAARG, MVT::Other, Expand);
    182   setOperationAction(ISD::VACOPY, MVT::Other, Expand);
    183   setOperationAction(ISD::VAEND, MVT::Other, Expand);
    184 
    185   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
    186   if (!Subtarget.hasStdExtZbb()) {
    187     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Expand);
    188     setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
    189   }
    190 
    191   if (Subtarget.is64Bit()) {
    192     setOperationAction(ISD::ADD, MVT::i32, Custom);
    193     setOperationAction(ISD::SUB, MVT::i32, Custom);
    194     setOperationAction(ISD::SHL, MVT::i32, Custom);
    195     setOperationAction(ISD::SRA, MVT::i32, Custom);
    196     setOperationAction(ISD::SRL, MVT::i32, Custom);
    197 
    198     setOperationAction(ISD::UADDO, MVT::i32, Custom);
    199     setOperationAction(ISD::USUBO, MVT::i32, Custom);
    200     setOperationAction(ISD::UADDSAT, MVT::i32, Custom);
    201     setOperationAction(ISD::USUBSAT, MVT::i32, Custom);
    202   }
    203 
    204   if (!Subtarget.hasStdExtM()) {
    205     setOperationAction(ISD::MUL, XLenVT, Expand);
    206     setOperationAction(ISD::MULHS, XLenVT, Expand);
    207     setOperationAction(ISD::MULHU, XLenVT, Expand);
    208     setOperationAction(ISD::SDIV, XLenVT, Expand);
    209     setOperationAction(ISD::UDIV, XLenVT, Expand);
    210     setOperationAction(ISD::SREM, XLenVT, Expand);
    211     setOperationAction(ISD::UREM, XLenVT, Expand);
    212   } else {
    213     if (Subtarget.is64Bit()) {
    214       setOperationAction(ISD::MUL, MVT::i32, Custom);
    215       setOperationAction(ISD::MUL, MVT::i128, Custom);
    216 
    217       setOperationAction(ISD::SDIV, MVT::i8, Custom);
    218       setOperationAction(ISD::UDIV, MVT::i8, Custom);
    219       setOperationAction(ISD::UREM, MVT::i8, Custom);
    220       setOperationAction(ISD::SDIV, MVT::i16, Custom);
    221       setOperationAction(ISD::UDIV, MVT::i16, Custom);
    222       setOperationAction(ISD::UREM, MVT::i16, Custom);
    223       setOperationAction(ISD::SDIV, MVT::i32, Custom);
    224       setOperationAction(ISD::UDIV, MVT::i32, Custom);
    225       setOperationAction(ISD::UREM, MVT::i32, Custom);
    226     } else {
    227       setOperationAction(ISD::MUL, MVT::i64, Custom);
    228     }
    229   }
    230 
    231   setOperationAction(ISD::SDIVREM, XLenVT, Expand);
    232   setOperationAction(ISD::UDIVREM, XLenVT, Expand);
    233   setOperationAction(ISD::SMUL_LOHI, XLenVT, Expand);
    234   setOperationAction(ISD::UMUL_LOHI, XLenVT, Expand);
    235 
    236   setOperationAction(ISD::SHL_PARTS, XLenVT, Custom);
    237   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
    238   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
    239 
    240   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
    241     if (Subtarget.is64Bit()) {
    242       setOperationAction(ISD::ROTL, MVT::i32, Custom);
    243       setOperationAction(ISD::ROTR, MVT::i32, Custom);
    244     }
    245   } else {
    246     setOperationAction(ISD::ROTL, XLenVT, Expand);
    247     setOperationAction(ISD::ROTR, XLenVT, Expand);
    248   }
    249 
    250   if (Subtarget.hasStdExtZbp()) {
    251     // Custom lower bswap/bitreverse so we can convert them to GREVI to enable
    252     // more combining.
    253     setOperationAction(ISD::BITREVERSE, XLenVT, Custom);
    254     setOperationAction(ISD::BSWAP, XLenVT, Custom);
    255 
    256     if (Subtarget.is64Bit()) {
    257       setOperationAction(ISD::BITREVERSE, MVT::i32, Custom);
    258       setOperationAction(ISD::BSWAP, MVT::i32, Custom);
    259     }
    260   } else {
    261     // With Zbb we have an XLen rev8 instruction, but not GREVI. So we'll
    262     // pattern match it directly in isel.
    263     setOperationAction(ISD::BSWAP, XLenVT,
    264                        Subtarget.hasStdExtZbb() ? Legal : Expand);
    265   }
    266 
    267   if (Subtarget.hasStdExtZbb()) {
    268     setOperationAction(ISD::SMIN, XLenVT, Legal);
    269     setOperationAction(ISD::SMAX, XLenVT, Legal);
    270     setOperationAction(ISD::UMIN, XLenVT, Legal);
    271     setOperationAction(ISD::UMAX, XLenVT, Legal);
    272 
    273     if (Subtarget.is64Bit()) {
    274       setOperationAction(ISD::CTTZ, MVT::i32, Custom);
    275       setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Custom);
    276       setOperationAction(ISD::CTLZ, MVT::i32, Custom);
    277       setOperationAction(ISD::CTLZ_ZERO_UNDEF, MVT::i32, Custom);
    278     }
    279   } else {
    280     setOperationAction(ISD::CTTZ, XLenVT, Expand);
    281     setOperationAction(ISD::CTLZ, XLenVT, Expand);
    282     setOperationAction(ISD::CTPOP, XLenVT, Expand);
    283   }
    284 
    285   if (Subtarget.hasStdExtZbt()) {
    286     setOperationAction(ISD::FSHL, XLenVT, Custom);
    287     setOperationAction(ISD::FSHR, XLenVT, Custom);
    288     setOperationAction(ISD::SELECT, XLenVT, Legal);
    289 
    290     if (Subtarget.is64Bit()) {
    291       setOperationAction(ISD::FSHL, MVT::i32, Custom);
    292       setOperationAction(ISD::FSHR, MVT::i32, Custom);
    293     }
    294   } else {
    295     setOperationAction(ISD::SELECT, XLenVT, Custom);
    296   }
    297 
    298   ISD::CondCode FPCCToExpand[] = {
    299       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
    300       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
    301       ISD::SETGE,  ISD::SETNE,  ISD::SETO,   ISD::SETUO};
    302 
    303   ISD::NodeType FPOpToExpand[] = {
    304       ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOW, ISD::FREM, ISD::FP16_TO_FP,
    305       ISD::FP_TO_FP16};
    306 
    307   if (Subtarget.hasStdExtZfh())
    308     setOperationAction(ISD::BITCAST, MVT::i16, Custom);
    309 
    310   if (Subtarget.hasStdExtZfh()) {
    311     setOperationAction(ISD::FMINNUM, MVT::f16, Legal);
    312     setOperationAction(ISD::FMAXNUM, MVT::f16, Legal);
    313     for (auto CC : FPCCToExpand)
    314       setCondCodeAction(CC, MVT::f16, Expand);
    315     setOperationAction(ISD::SELECT_CC, MVT::f16, Expand);
    316     setOperationAction(ISD::SELECT, MVT::f16, Custom);
    317     setOperationAction(ISD::BR_CC, MVT::f16, Expand);
    318     for (auto Op : FPOpToExpand)
    319       setOperationAction(Op, MVT::f16, Expand);
    320   }
    321 
    322   if (Subtarget.hasStdExtF()) {
    323     setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
    324     setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
    325     for (auto CC : FPCCToExpand)
    326       setCondCodeAction(CC, MVT::f32, Expand);
    327     setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
    328     setOperationAction(ISD::SELECT, MVT::f32, Custom);
    329     setOperationAction(ISD::BR_CC, MVT::f32, Expand);
    330     for (auto Op : FPOpToExpand)
    331       setOperationAction(Op, MVT::f32, Expand);
    332     setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f16, Expand);
    333     setTruncStoreAction(MVT::f32, MVT::f16, Expand);
    334   }
    335 
    336   if (Subtarget.hasStdExtF() && Subtarget.is64Bit())
    337     setOperationAction(ISD::BITCAST, MVT::i32, Custom);
    338 
    339   if (Subtarget.hasStdExtD()) {
    340     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
    341     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
    342     for (auto CC : FPCCToExpand)
    343       setCondCodeAction(CC, MVT::f64, Expand);
    344     setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
    345     setOperationAction(ISD::SELECT, MVT::f64, Custom);
    346     setOperationAction(ISD::BR_CC, MVT::f64, Expand);
    347     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand);
    348     setTruncStoreAction(MVT::f64, MVT::f32, Expand);
    349     for (auto Op : FPOpToExpand)
    350       setOperationAction(Op, MVT::f64, Expand);
    351     setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f16, Expand);
    352     setTruncStoreAction(MVT::f64, MVT::f16, Expand);
    353   }
    354 
    355   if (Subtarget.is64Bit()) {
    356     setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
    357     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
    358     setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
    359     setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
    360   }
    361 
    362   if (Subtarget.hasStdExtF()) {
    363     setOperationAction(ISD::FLT_ROUNDS_, XLenVT, Custom);
    364     setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
    365   }
    366 
    367   setOperationAction(ISD::GlobalAddress, XLenVT, Custom);
    368   setOperationAction(ISD::BlockAddress, XLenVT, Custom);
    369   setOperationAction(ISD::ConstantPool, XLenVT, Custom);
    370   setOperationAction(ISD::JumpTable, XLenVT, Custom);
    371 
    372   setOperationAction(ISD::GlobalTLSAddress, XLenVT, Custom);
    373 
    374   // TODO: On M-mode only targets, the cycle[h] CSR may not be present.
    375   // Unfortunately this can't be determined just from the ISA naming string.
    376   setOperationAction(ISD::READCYCLECOUNTER, MVT::i64,
    377                      Subtarget.is64Bit() ? Legal : Custom);
    378 
    379   setOperationAction(ISD::TRAP, MVT::Other, Legal);
    380   setOperationAction(ISD::DEBUGTRAP, MVT::Other, Legal);
    381   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
    382   if (Subtarget.is64Bit())
    383     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i32, Custom);
    384 
    385   if (Subtarget.hasStdExtA()) {
    386     setMaxAtomicSizeInBitsSupported(Subtarget.getXLen());
    387     setMinCmpXchgSizeInBits(32);
    388   } else {
    389     setMaxAtomicSizeInBitsSupported(0);
    390   }
    391 
    392   setBooleanContents(ZeroOrOneBooleanContent);
    393 
    394   if (Subtarget.hasStdExtV()) {
    395     setBooleanVectorContents(ZeroOrOneBooleanContent);
    396 
    397     setOperationAction(ISD::VSCALE, XLenVT, Custom);
    398 
    399     // RVV intrinsics may have illegal operands.
    400     // We also need to custom legalize vmv.x.s.
    401     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i8, Custom);
    402     setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i16, Custom);
    403     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i8, Custom);
    404     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i16, Custom);
    405     if (Subtarget.is64Bit()) {
    406       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i32, Custom);
    407     } else {
    408       setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom);
    409       setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom);
    410     }
    411 
    412     setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
    413 
    414     static unsigned IntegerVPOps[] = {
    415         ISD::VP_ADD,  ISD::VP_SUB,  ISD::VP_MUL, ISD::VP_SDIV, ISD::VP_UDIV,
    416         ISD::VP_SREM, ISD::VP_UREM, ISD::VP_AND, ISD::VP_OR,   ISD::VP_XOR,
    417         ISD::VP_ASHR, ISD::VP_LSHR, ISD::VP_SHL};
    418 
    419     if (!Subtarget.is64Bit()) {
    420       // We must custom-lower certain vXi64 operations on RV32 due to the vector
    421       // element type being illegal.
    422       setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::i64, Custom);
    423       setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::i64, Custom);
    424 
    425       setOperationAction(ISD::VECREDUCE_ADD, MVT::i64, Custom);
    426       setOperationAction(ISD::VECREDUCE_AND, MVT::i64, Custom);
    427       setOperationAction(ISD::VECREDUCE_OR, MVT::i64, Custom);
    428       setOperationAction(ISD::VECREDUCE_XOR, MVT::i64, Custom);
    429       setOperationAction(ISD::VECREDUCE_SMAX, MVT::i64, Custom);
    430       setOperationAction(ISD::VECREDUCE_SMIN, MVT::i64, Custom);
    431       setOperationAction(ISD::VECREDUCE_UMAX, MVT::i64, Custom);
    432       setOperationAction(ISD::VECREDUCE_UMIN, MVT::i64, Custom);
    433     }
    434 
    435     for (MVT VT : BoolVecVTs) {
    436       setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
    437 
    438       // Mask VTs are custom-expanded into a series of standard nodes
    439       setOperationAction(ISD::TRUNCATE, VT, Custom);
    440       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
    441       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
    442 
    443       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    444       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    445 
    446       setOperationAction(ISD::SELECT, VT, Expand);
    447       setOperationAction(ISD::SELECT_CC, VT, Expand);
    448 
    449       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
    450       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
    451       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
    452 
    453       // Expand all extending loads to types larger than this, and truncating
    454       // stores from types larger than this.
    455       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
    456         setTruncStoreAction(OtherVT, VT, Expand);
    457         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
    458         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
    459         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
    460       }
    461     }
    462 
    463     for (MVT VT : IntVecVTs) {
    464       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
    465       setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
    466 
    467       setOperationAction(ISD::SMIN, VT, Legal);
    468       setOperationAction(ISD::SMAX, VT, Legal);
    469       setOperationAction(ISD::UMIN, VT, Legal);
    470       setOperationAction(ISD::UMAX, VT, Legal);
    471 
    472       setOperationAction(ISD::ROTL, VT, Expand);
    473       setOperationAction(ISD::ROTR, VT, Expand);
    474 
    475       // Custom-lower extensions and truncations from/to mask types.
    476       setOperationAction(ISD::ANY_EXTEND, VT, Custom);
    477       setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
    478       setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
    479 
    480       // RVV has native int->float & float->int conversions where the
    481       // element type sizes are within one power-of-two of each other. Any
    482       // wider distances between type sizes have to be lowered as sequences
    483       // which progressively narrow the gap in stages.
    484       setOperationAction(ISD::SINT_TO_FP, VT, Custom);
    485       setOperationAction(ISD::UINT_TO_FP, VT, Custom);
    486       setOperationAction(ISD::FP_TO_SINT, VT, Custom);
    487       setOperationAction(ISD::FP_TO_UINT, VT, Custom);
    488 
    489       // Integer VTs are lowered as a series of "RISCVISD::TRUNCATE_VECTOR_VL"
    490       // nodes which truncate by one power of two at a time.
    491       setOperationAction(ISD::TRUNCATE, VT, Custom);
    492 
    493       // Custom-lower insert/extract operations to simplify patterns.
    494       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    495       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    496 
    497       // Custom-lower reduction operations to set up the corresponding custom
    498       // nodes' operands.
    499       setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
    500       setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
    501       setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
    502       setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
    503       setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
    504       setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
    505       setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
    506       setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
    507 
    508       for (unsigned VPOpc : IntegerVPOps) {
    509         setOperationAction(VPOpc, VT, Custom);
    510         // RV64 must custom-legalize the i32 EVL parameter.
    511         if (Subtarget.is64Bit())
    512           setOperationAction(VPOpc, MVT::i32, Custom);
    513       }
    514 
    515       setOperationAction(ISD::MLOAD, VT, Custom);
    516       setOperationAction(ISD::MSTORE, VT, Custom);
    517       setOperationAction(ISD::MGATHER, VT, Custom);
    518       setOperationAction(ISD::MSCATTER, VT, Custom);
    519 
    520       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
    521       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
    522       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
    523 
    524       setOperationAction(ISD::SELECT, VT, Expand);
    525       setOperationAction(ISD::SELECT_CC, VT, Expand);
    526 
    527       setOperationAction(ISD::STEP_VECTOR, VT, Custom);
    528       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
    529 
    530       for (MVT OtherVT : MVT::integer_scalable_vector_valuetypes()) {
    531         setTruncStoreAction(VT, OtherVT, Expand);
    532         setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
    533         setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
    534         setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
    535       }
    536     }
    537 
    538     // Expand various CCs to best match the RVV ISA, which natively supports UNE
    539     // but no other unordered comparisons, and supports all ordered comparisons
    540     // except ONE. Additionally, we expand GT,OGT,GE,OGE for optimization
    541     // purposes; they are expanded to their swapped-operand CCs (LT,OLT,LE,OLE),
    542     // and we pattern-match those back to the "original", swapping operands once
    543     // more. This way we catch both operations and both "vf" and "fv" forms with
    544     // fewer patterns.
    545     ISD::CondCode VFPCCToExpand[] = {
    546         ISD::SETO,   ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
    547         ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUO,
    548         ISD::SETGT,  ISD::SETOGT, ISD::SETGE,  ISD::SETOGE,
    549     };
    550 
    551     // Sets common operation actions on RVV floating-point vector types.
    552     const auto SetCommonVFPActions = [&](MVT VT) {
    553       setOperationAction(ISD::SPLAT_VECTOR, VT, Legal);
    554       // RVV has native FP_ROUND & FP_EXTEND conversions where the element type
    555       // sizes are within one power-of-two of each other. Therefore conversions
    556       // between vXf16 and vXf64 must be lowered as sequences which convert via
    557       // vXf32.
    558       setOperationAction(ISD::FP_ROUND, VT, Custom);
    559       setOperationAction(ISD::FP_EXTEND, VT, Custom);
    560       // Custom-lower insert/extract operations to simplify patterns.
    561       setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    562       setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    563       // Expand various condition codes (explained above).
    564       for (auto CC : VFPCCToExpand)
    565         setCondCodeAction(CC, VT, Expand);
    566 
    567       setOperationAction(ISD::FMINNUM, VT, Legal);
    568       setOperationAction(ISD::FMAXNUM, VT, Legal);
    569 
    570       setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
    571       setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
    572       setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
    573       setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
    574       setOperationAction(ISD::FCOPYSIGN, VT, Legal);
    575 
    576       setOperationAction(ISD::MLOAD, VT, Custom);
    577       setOperationAction(ISD::MSTORE, VT, Custom);
    578       setOperationAction(ISD::MGATHER, VT, Custom);
    579       setOperationAction(ISD::MSCATTER, VT, Custom);
    580 
    581       setOperationAction(ISD::SELECT, VT, Expand);
    582       setOperationAction(ISD::SELECT_CC, VT, Expand);
    583 
    584       setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
    585       setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
    586       setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
    587 
    588       setOperationAction(ISD::VECTOR_REVERSE, VT, Custom);
    589     };
    590 
    591     // Sets common extload/truncstore actions on RVV floating-point vector
    592     // types.
    593     const auto SetCommonVFPExtLoadTruncStoreActions =
    594         [&](MVT VT, ArrayRef<MVT::SimpleValueType> SmallerVTs) {
    595           for (auto SmallVT : SmallerVTs) {
    596             setTruncStoreAction(VT, SmallVT, Expand);
    597             setLoadExtAction(ISD::EXTLOAD, VT, SmallVT, Expand);
    598           }
    599         };
    600 
    601     if (Subtarget.hasStdExtZfh())
    602       for (MVT VT : F16VecVTs)
    603         SetCommonVFPActions(VT);
    604 
    605     for (MVT VT : F32VecVTs) {
    606       if (Subtarget.hasStdExtF())
    607         SetCommonVFPActions(VT);
    608       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
    609     }
    610 
    611     for (MVT VT : F64VecVTs) {
    612       if (Subtarget.hasStdExtD())
    613         SetCommonVFPActions(VT);
    614       SetCommonVFPExtLoadTruncStoreActions(VT, F16VecVTs);
    615       SetCommonVFPExtLoadTruncStoreActions(VT, F32VecVTs);
    616     }
    617 
    618     if (Subtarget.useRVVForFixedLengthVectors()) {
    619       for (MVT VT : MVT::integer_fixedlen_vector_valuetypes()) {
    620         if (!useRVVForFixedLengthVectorVT(VT))
    621           continue;
    622 
    623         // By default everything must be expanded.
    624         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
    625           setOperationAction(Op, VT, Expand);
    626         for (MVT OtherVT : MVT::integer_fixedlen_vector_valuetypes()) {
    627           setTruncStoreAction(VT, OtherVT, Expand);
    628           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
    629           setLoadExtAction(ISD::SEXTLOAD, OtherVT, VT, Expand);
    630           setLoadExtAction(ISD::ZEXTLOAD, OtherVT, VT, Expand);
    631         }
    632 
    633         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
    634         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
    635         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
    636 
    637         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
    638         setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
    639 
    640         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    641         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    642 
    643         setOperationAction(ISD::LOAD, VT, Custom);
    644         setOperationAction(ISD::STORE, VT, Custom);
    645 
    646         setOperationAction(ISD::SETCC, VT, Custom);
    647 
    648         setOperationAction(ISD::TRUNCATE, VT, Custom);
    649 
    650         setOperationAction(ISD::BITCAST, VT, Custom);
    651 
    652         setOperationAction(ISD::VECREDUCE_AND, VT, Custom);
    653         setOperationAction(ISD::VECREDUCE_OR, VT, Custom);
    654         setOperationAction(ISD::VECREDUCE_XOR, VT, Custom);
    655 
    656         // Operations below are different for between masks and other vectors.
    657         if (VT.getVectorElementType() == MVT::i1) {
    658           setOperationAction(ISD::AND, VT, Custom);
    659           setOperationAction(ISD::OR, VT, Custom);
    660           setOperationAction(ISD::XOR, VT, Custom);
    661           continue;
    662         }
    663 
    664         // Use SPLAT_VECTOR to prevent type legalization from destroying the
    665         // splats when type legalizing i64 scalar on RV32.
    666         // FIXME: Use SPLAT_VECTOR for all types? DAGCombine probably needs
    667         // improvements first.
    668         if (!Subtarget.is64Bit() && VT.getVectorElementType() == MVT::i64) {
    669           setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
    670           setOperationAction(ISD::SPLAT_VECTOR_PARTS, VT, Custom);
    671         }
    672 
    673         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
    674         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    675 
    676         setOperationAction(ISD::MLOAD, VT, Custom);
    677         setOperationAction(ISD::MSTORE, VT, Custom);
    678         setOperationAction(ISD::MGATHER, VT, Custom);
    679         setOperationAction(ISD::MSCATTER, VT, Custom);
    680         setOperationAction(ISD::ADD, VT, Custom);
    681         setOperationAction(ISD::MUL, VT, Custom);
    682         setOperationAction(ISD::SUB, VT, Custom);
    683         setOperationAction(ISD::AND, VT, Custom);
    684         setOperationAction(ISD::OR, VT, Custom);
    685         setOperationAction(ISD::XOR, VT, Custom);
    686         setOperationAction(ISD::SDIV, VT, Custom);
    687         setOperationAction(ISD::SREM, VT, Custom);
    688         setOperationAction(ISD::UDIV, VT, Custom);
    689         setOperationAction(ISD::UREM, VT, Custom);
    690         setOperationAction(ISD::SHL, VT, Custom);
    691         setOperationAction(ISD::SRA, VT, Custom);
    692         setOperationAction(ISD::SRL, VT, Custom);
    693 
    694         setOperationAction(ISD::SMIN, VT, Custom);
    695         setOperationAction(ISD::SMAX, VT, Custom);
    696         setOperationAction(ISD::UMIN, VT, Custom);
    697         setOperationAction(ISD::UMAX, VT, Custom);
    698         setOperationAction(ISD::ABS,  VT, Custom);
    699 
    700         setOperationAction(ISD::MULHS, VT, Custom);
    701         setOperationAction(ISD::MULHU, VT, Custom);
    702 
    703         setOperationAction(ISD::SINT_TO_FP, VT, Custom);
    704         setOperationAction(ISD::UINT_TO_FP, VT, Custom);
    705         setOperationAction(ISD::FP_TO_SINT, VT, Custom);
    706         setOperationAction(ISD::FP_TO_UINT, VT, Custom);
    707 
    708         setOperationAction(ISD::VSELECT, VT, Custom);
    709         setOperationAction(ISD::SELECT, VT, Expand);
    710         setOperationAction(ISD::SELECT_CC, VT, Expand);
    711 
    712         setOperationAction(ISD::ANY_EXTEND, VT, Custom);
    713         setOperationAction(ISD::SIGN_EXTEND, VT, Custom);
    714         setOperationAction(ISD::ZERO_EXTEND, VT, Custom);
    715 
    716         // Custom-lower reduction operations to set up the corresponding custom
    717         // nodes' operands.
    718         setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
    719         setOperationAction(ISD::VECREDUCE_SMAX, VT, Custom);
    720         setOperationAction(ISD::VECREDUCE_SMIN, VT, Custom);
    721         setOperationAction(ISD::VECREDUCE_UMAX, VT, Custom);
    722         setOperationAction(ISD::VECREDUCE_UMIN, VT, Custom);
    723 
    724         for (unsigned VPOpc : IntegerVPOps) {
    725           setOperationAction(VPOpc, VT, Custom);
    726           // RV64 must custom-legalize the i32 EVL parameter.
    727           if (Subtarget.is64Bit())
    728             setOperationAction(VPOpc, MVT::i32, Custom);
    729         }
    730       }
    731 
    732       for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
    733         if (!useRVVForFixedLengthVectorVT(VT))
    734           continue;
    735 
    736         // By default everything must be expanded.
    737         for (unsigned Op = 0; Op < ISD::BUILTIN_OP_END; ++Op)
    738           setOperationAction(Op, VT, Expand);
    739         for (MVT OtherVT : MVT::fp_fixedlen_vector_valuetypes()) {
    740           setLoadExtAction(ISD::EXTLOAD, OtherVT, VT, Expand);
    741           setTruncStoreAction(VT, OtherVT, Expand);
    742         }
    743 
    744         // We use EXTRACT_SUBVECTOR as a "cast" from scalable to fixed.
    745         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
    746         setOperationAction(ISD::EXTRACT_SUBVECTOR, VT, Custom);
    747 
    748         setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
    749         setOperationAction(ISD::VECTOR_SHUFFLE, VT, Custom);
    750         setOperationAction(ISD::INSERT_VECTOR_ELT, VT, Custom);
    751         setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom);
    752 
    753         setOperationAction(ISD::LOAD, VT, Custom);
    754         setOperationAction(ISD::STORE, VT, Custom);
    755         setOperationAction(ISD::MLOAD, VT, Custom);
    756         setOperationAction(ISD::MSTORE, VT, Custom);
    757         setOperationAction(ISD::MGATHER, VT, Custom);
    758         setOperationAction(ISD::MSCATTER, VT, Custom);
    759         setOperationAction(ISD::FADD, VT, Custom);
    760         setOperationAction(ISD::FSUB, VT, Custom);
    761         setOperationAction(ISD::FMUL, VT, Custom);
    762         setOperationAction(ISD::FDIV, VT, Custom);
    763         setOperationAction(ISD::FNEG, VT, Custom);
    764         setOperationAction(ISD::FABS, VT, Custom);
    765         setOperationAction(ISD::FCOPYSIGN, VT, Custom);
    766         setOperationAction(ISD::FSQRT, VT, Custom);
    767         setOperationAction(ISD::FMA, VT, Custom);
    768         setOperationAction(ISD::FMINNUM, VT, Custom);
    769         setOperationAction(ISD::FMAXNUM, VT, Custom);
    770 
    771         setOperationAction(ISD::FP_ROUND, VT, Custom);
    772         setOperationAction(ISD::FP_EXTEND, VT, Custom);
    773 
    774         for (auto CC : VFPCCToExpand)
    775           setCondCodeAction(CC, VT, Expand);
    776 
    777         setOperationAction(ISD::VSELECT, VT, Custom);
    778         setOperationAction(ISD::SELECT, VT, Expand);
    779         setOperationAction(ISD::SELECT_CC, VT, Expand);
    780 
    781         setOperationAction(ISD::BITCAST, VT, Custom);
    782 
    783         setOperationAction(ISD::VECREDUCE_FADD, VT, Custom);
    784         setOperationAction(ISD::VECREDUCE_SEQ_FADD, VT, Custom);
    785         setOperationAction(ISD::VECREDUCE_FMIN, VT, Custom);
    786         setOperationAction(ISD::VECREDUCE_FMAX, VT, Custom);
    787       }
    788 
    789       // Custom-legalize bitcasts from fixed-length vectors to scalar types.
    790       setOperationAction(ISD::BITCAST, MVT::i8, Custom);
    791       setOperationAction(ISD::BITCAST, MVT::i16, Custom);
    792       setOperationAction(ISD::BITCAST, MVT::i32, Custom);
    793       setOperationAction(ISD::BITCAST, MVT::i64, Custom);
    794       setOperationAction(ISD::BITCAST, MVT::f16, Custom);
    795       setOperationAction(ISD::BITCAST, MVT::f32, Custom);
    796       setOperationAction(ISD::BITCAST, MVT::f64, Custom);
    797     }
    798   }
    799 
    800   // Function alignments.
    801   const Align FunctionAlignment(Subtarget.hasStdExtC() ? 2 : 4);
    802   setMinFunctionAlignment(FunctionAlignment);
    803   setPrefFunctionAlignment(FunctionAlignment);
    804 
    805   setMinimumJumpTableEntries(5);
    806 
    807   // Jumps are expensive, compared to logic
    808   setJumpIsExpensive();
    809 
    810   // We can use any register for comparisons
    811   setHasMultipleConditionRegisters();
    812 
    813   setTargetDAGCombine(ISD::AND);
    814   setTargetDAGCombine(ISD::OR);
    815   setTargetDAGCombine(ISD::XOR);
    816   if (Subtarget.hasStdExtV()) {
    817     setTargetDAGCombine(ISD::FCOPYSIGN);
    818     setTargetDAGCombine(ISD::MGATHER);
    819     setTargetDAGCombine(ISD::MSCATTER);
    820   }
    821 }
    822 
    823 EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
    824                                             LLVMContext &Context,
    825                                             EVT VT) const {
    826   if (!VT.isVector())
    827     return getPointerTy(DL);
    828   if (Subtarget.hasStdExtV() &&
    829       (VT.isScalableVector() || Subtarget.useRVVForFixedLengthVectors()))
    830     return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
    831   return VT.changeVectorElementTypeToInteger();
    832 }
    833 
    834 bool RISCVTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
    835                                              const CallInst &I,
    836                                              MachineFunction &MF,
    837                                              unsigned Intrinsic) const {
    838   switch (Intrinsic) {
    839   default:
    840     return false;
    841   case Intrinsic::riscv_masked_atomicrmw_xchg_i32:
    842   case Intrinsic::riscv_masked_atomicrmw_add_i32:
    843   case Intrinsic::riscv_masked_atomicrmw_sub_i32:
    844   case Intrinsic::riscv_masked_atomicrmw_nand_i32:
    845   case Intrinsic::riscv_masked_atomicrmw_max_i32:
    846   case Intrinsic::riscv_masked_atomicrmw_min_i32:
    847   case Intrinsic::riscv_masked_atomicrmw_umax_i32:
    848   case Intrinsic::riscv_masked_atomicrmw_umin_i32:
    849   case Intrinsic::riscv_masked_cmpxchg_i32:
    850     PointerType *PtrTy = cast<PointerType>(I.getArgOperand(0)->getType());
    851     Info.opc = ISD::INTRINSIC_W_CHAIN;
    852     Info.memVT = MVT::getVT(PtrTy->getElementType());
    853     Info.ptrVal = I.getArgOperand(0);
    854     Info.offset = 0;
    855     Info.align = Align(4);
    856     Info.flags = MachineMemOperand::MOLoad | MachineMemOperand::MOStore |
    857                  MachineMemOperand::MOVolatile;
    858     return true;
    859   }
    860 }
    861 
    862 bool RISCVTargetLowering::isLegalAddressingMode(const DataLayout &DL,
    863                                                 const AddrMode &AM, Type *Ty,
    864                                                 unsigned AS,
    865                                                 Instruction *I) const {
    866   // No global is ever allowed as a base.
    867   if (AM.BaseGV)
    868     return false;
    869 
    870   // Require a 12-bit signed offset.
    871   if (!isInt<12>(AM.BaseOffs))
    872     return false;
    873 
    874   switch (AM.Scale) {
    875   case 0: // "r+i" or just "i", depending on HasBaseReg.
    876     break;
    877   case 1:
    878     if (!AM.HasBaseReg) // allow "r+i".
    879       break;
    880     return false; // disallow "r+r" or "r+r+i".
    881   default:
    882     return false;
    883   }
    884 
    885   return true;
    886 }
    887 
    888 bool RISCVTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
    889   return isInt<12>(Imm);
    890 }
    891 
    892 bool RISCVTargetLowering::isLegalAddImmediate(int64_t Imm) const {
    893   return isInt<12>(Imm);
    894 }
    895 
    896 // On RV32, 64-bit integers are split into their high and low parts and held
    897 // in two different registers, so the trunc is free since the low register can
    898 // just be used.
    899 bool RISCVTargetLowering::isTruncateFree(Type *SrcTy, Type *DstTy) const {
    900   if (Subtarget.is64Bit() || !SrcTy->isIntegerTy() || !DstTy->isIntegerTy())
    901     return false;
    902   unsigned SrcBits = SrcTy->getPrimitiveSizeInBits();
    903   unsigned DestBits = DstTy->getPrimitiveSizeInBits();
    904   return (SrcBits == 64 && DestBits == 32);
    905 }
    906 
    907 bool RISCVTargetLowering::isTruncateFree(EVT SrcVT, EVT DstVT) const {
    908   if (Subtarget.is64Bit() || SrcVT.isVector() || DstVT.isVector() ||
    909       !SrcVT.isInteger() || !DstVT.isInteger())
    910     return false;
    911   unsigned SrcBits = SrcVT.getSizeInBits();
    912   unsigned DestBits = DstVT.getSizeInBits();
    913   return (SrcBits == 64 && DestBits == 32);
    914 }
    915 
    916 bool RISCVTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
    917   // Zexts are free if they can be combined with a load.
    918   if (auto *LD = dyn_cast<LoadSDNode>(Val)) {
    919     EVT MemVT = LD->getMemoryVT();
    920     if ((MemVT == MVT::i8 || MemVT == MVT::i16 ||
    921          (Subtarget.is64Bit() && MemVT == MVT::i32)) &&
    922         (LD->getExtensionType() == ISD::NON_EXTLOAD ||
    923          LD->getExtensionType() == ISD::ZEXTLOAD))
    924       return true;
    925   }
    926 
    927   return TargetLowering::isZExtFree(Val, VT2);
    928 }
    929 
    930 bool RISCVTargetLowering::isSExtCheaperThanZExt(EVT SrcVT, EVT DstVT) const {
    931   return Subtarget.is64Bit() && SrcVT == MVT::i32 && DstVT == MVT::i64;
    932 }
    933 
    934 bool RISCVTargetLowering::isCheapToSpeculateCttz() const {
    935   return Subtarget.hasStdExtZbb();
    936 }
    937 
    938 bool RISCVTargetLowering::isCheapToSpeculateCtlz() const {
    939   return Subtarget.hasStdExtZbb();
    940 }
    941 
    942 bool RISCVTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
    943                                        bool ForCodeSize) const {
    944   if (VT == MVT::f16 && !Subtarget.hasStdExtZfh())
    945     return false;
    946   if (VT == MVT::f32 && !Subtarget.hasStdExtF())
    947     return false;
    948   if (VT == MVT::f64 && !Subtarget.hasStdExtD())
    949     return false;
    950   if (Imm.isNegZero())
    951     return false;
    952   return Imm.isZero();
    953 }
    954 
    955 bool RISCVTargetLowering::hasBitPreservingFPLogic(EVT VT) const {
    956   return (VT == MVT::f16 && Subtarget.hasStdExtZfh()) ||
    957          (VT == MVT::f32 && Subtarget.hasStdExtF()) ||
    958          (VT == MVT::f64 && Subtarget.hasStdExtD());
    959 }
    960 
    961 MVT RISCVTargetLowering::getRegisterTypeForCallingConv(LLVMContext &Context,
    962                                                       CallingConv::ID CC,
    963                                                       EVT VT) const {
    964   // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
    965   // end up using a GPR but that will be decided based on ABI.
    966   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
    967     return MVT::f32;
    968 
    969   return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
    970 }
    971 
    972 unsigned RISCVTargetLowering::getNumRegistersForCallingConv(LLVMContext &Context,
    973                                                            CallingConv::ID CC,
    974                                                            EVT VT) const {
    975   // Use f32 to pass f16 if it is legal and Zfh is not enabled. We might still
    976   // end up using a GPR but that will be decided based on ABI.
    977   if (VT == MVT::f16 && Subtarget.hasStdExtF() && !Subtarget.hasStdExtZfh())
    978     return 1;
    979 
    980   return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
    981 }
    982 
    983 // Changes the condition code and swaps operands if necessary, so the SetCC
    984 // operation matches one of the comparisons supported directly by branches
    985 // in the RISC-V ISA. May adjust compares to favor compare with 0 over compare
    986 // with 1/-1.
    987 static void translateSetCCForBranch(const SDLoc &DL, SDValue &LHS, SDValue &RHS,
    988                                     ISD::CondCode &CC, SelectionDAG &DAG) {
    989   // Convert X > -1 to X >= 0.
    990   if (CC == ISD::SETGT && isAllOnesConstant(RHS)) {
    991     RHS = DAG.getConstant(0, DL, RHS.getValueType());
    992     CC = ISD::SETGE;
    993     return;
    994   }
    995   // Convert X < 1 to 0 >= X.
    996   if (CC == ISD::SETLT && isOneConstant(RHS)) {
    997     RHS = LHS;
    998     LHS = DAG.getConstant(0, DL, RHS.getValueType());
    999     CC = ISD::SETGE;
   1000     return;
   1001   }
   1002 
   1003   switch (CC) {
   1004   default:
   1005     break;
   1006   case ISD::SETGT:
   1007   case ISD::SETLE:
   1008   case ISD::SETUGT:
   1009   case ISD::SETULE:
   1010     CC = ISD::getSetCCSwappedOperands(CC);
   1011     std::swap(LHS, RHS);
   1012     break;
   1013   }
   1014 }
   1015 
   1016 // Return the RISC-V branch opcode that matches the given DAG integer
   1017 // condition code. The CondCode must be one of those supported by the RISC-V
   1018 // ISA (see translateSetCCForBranch).
   1019 static unsigned getBranchOpcodeForIntCondCode(ISD::CondCode CC) {
   1020   switch (CC) {
   1021   default:
   1022     llvm_unreachable("Unsupported CondCode");
   1023   case ISD::SETEQ:
   1024     return RISCV::BEQ;
   1025   case ISD::SETNE:
   1026     return RISCV::BNE;
   1027   case ISD::SETLT:
   1028     return RISCV::BLT;
   1029   case ISD::SETGE:
   1030     return RISCV::BGE;
   1031   case ISD::SETULT:
   1032     return RISCV::BLTU;
   1033   case ISD::SETUGE:
   1034     return RISCV::BGEU;
   1035   }
   1036 }
   1037 
   1038 RISCVII::VLMUL RISCVTargetLowering::getLMUL(MVT VT) {
   1039   assert(VT.isScalableVector() && "Expecting a scalable vector type");
   1040   unsigned KnownSize = VT.getSizeInBits().getKnownMinValue();
   1041   if (VT.getVectorElementType() == MVT::i1)
   1042     KnownSize *= 8;
   1043 
   1044   switch (KnownSize) {
   1045   default:
   1046     llvm_unreachable("Invalid LMUL.");
   1047   case 8:
   1048     return RISCVII::VLMUL::LMUL_F8;
   1049   case 16:
   1050     return RISCVII::VLMUL::LMUL_F4;
   1051   case 32:
   1052     return RISCVII::VLMUL::LMUL_F2;
   1053   case 64:
   1054     return RISCVII::VLMUL::LMUL_1;
   1055   case 128:
   1056     return RISCVII::VLMUL::LMUL_2;
   1057   case 256:
   1058     return RISCVII::VLMUL::LMUL_4;
   1059   case 512:
   1060     return RISCVII::VLMUL::LMUL_8;
   1061   }
   1062 }
   1063 
   1064 unsigned RISCVTargetLowering::getRegClassIDForLMUL(RISCVII::VLMUL LMul) {
   1065   switch (LMul) {
   1066   default:
   1067     llvm_unreachable("Invalid LMUL.");
   1068   case RISCVII::VLMUL::LMUL_F8:
   1069   case RISCVII::VLMUL::LMUL_F4:
   1070   case RISCVII::VLMUL::LMUL_F2:
   1071   case RISCVII::VLMUL::LMUL_1:
   1072     return RISCV::VRRegClassID;
   1073   case RISCVII::VLMUL::LMUL_2:
   1074     return RISCV::VRM2RegClassID;
   1075   case RISCVII::VLMUL::LMUL_4:
   1076     return RISCV::VRM4RegClassID;
   1077   case RISCVII::VLMUL::LMUL_8:
   1078     return RISCV::VRM8RegClassID;
   1079   }
   1080 }
   1081 
   1082 unsigned RISCVTargetLowering::getSubregIndexByMVT(MVT VT, unsigned Index) {
   1083   RISCVII::VLMUL LMUL = getLMUL(VT);
   1084   if (LMUL == RISCVII::VLMUL::LMUL_F8 ||
   1085       LMUL == RISCVII::VLMUL::LMUL_F4 ||
   1086       LMUL == RISCVII::VLMUL::LMUL_F2 ||
   1087       LMUL == RISCVII::VLMUL::LMUL_1) {
   1088     static_assert(RISCV::sub_vrm1_7 == RISCV::sub_vrm1_0 + 7,
   1089                   "Unexpected subreg numbering");
   1090     return RISCV::sub_vrm1_0 + Index;
   1091   }
   1092   if (LMUL == RISCVII::VLMUL::LMUL_2) {
   1093     static_assert(RISCV::sub_vrm2_3 == RISCV::sub_vrm2_0 + 3,
   1094                   "Unexpected subreg numbering");
   1095     return RISCV::sub_vrm2_0 + Index;
   1096   }
   1097   if (LMUL == RISCVII::VLMUL::LMUL_4) {
   1098     static_assert(RISCV::sub_vrm4_1 == RISCV::sub_vrm4_0 + 1,
   1099                   "Unexpected subreg numbering");
   1100     return RISCV::sub_vrm4_0 + Index;
   1101   }
   1102   llvm_unreachable("Invalid vector type.");
   1103 }
   1104 
   1105 unsigned RISCVTargetLowering::getRegClassIDForVecVT(MVT VT) {
   1106   if (VT.getVectorElementType() == MVT::i1)
   1107     return RISCV::VRRegClassID;
   1108   return getRegClassIDForLMUL(getLMUL(VT));
   1109 }
   1110 
   1111 // Attempt to decompose a subvector insert/extract between VecVT and
   1112 // SubVecVT via subregister indices. Returns the subregister index that
   1113 // can perform the subvector insert/extract with the given element index, as
   1114 // well as the index corresponding to any leftover subvectors that must be
   1115 // further inserted/extracted within the register class for SubVecVT.
   1116 std::pair<unsigned, unsigned>
   1117 RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
   1118     MVT VecVT, MVT SubVecVT, unsigned InsertExtractIdx,
   1119     const RISCVRegisterInfo *TRI) {
   1120   static_assert((RISCV::VRM8RegClassID > RISCV::VRM4RegClassID &&
   1121                  RISCV::VRM4RegClassID > RISCV::VRM2RegClassID &&
   1122                  RISCV::VRM2RegClassID > RISCV::VRRegClassID),
   1123                 "Register classes not ordered");
   1124   unsigned VecRegClassID = getRegClassIDForVecVT(VecVT);
   1125   unsigned SubRegClassID = getRegClassIDForVecVT(SubVecVT);
   1126   // Try to compose a subregister index that takes us from the incoming
   1127   // LMUL>1 register class down to the outgoing one. At each step we half
   1128   // the LMUL:
   1129   //   nxv16i32@12 -> nxv2i32: sub_vrm4_1_then_sub_vrm2_1_then_sub_vrm1_0
   1130   // Note that this is not guaranteed to find a subregister index, such as
   1131   // when we are extracting from one VR type to another.
   1132   unsigned SubRegIdx = RISCV::NoSubRegister;
   1133   for (const unsigned RCID :
   1134        {RISCV::VRM4RegClassID, RISCV::VRM2RegClassID, RISCV::VRRegClassID})
   1135     if (VecRegClassID > RCID && SubRegClassID <= RCID) {
   1136       VecVT = VecVT.getHalfNumVectorElementsVT();
   1137       bool IsHi =
   1138           InsertExtractIdx >= VecVT.getVectorElementCount().getKnownMinValue();
   1139       SubRegIdx = TRI->composeSubRegIndices(SubRegIdx,
   1140                                             getSubregIndexByMVT(VecVT, IsHi));
   1141       if (IsHi)
   1142         InsertExtractIdx -= VecVT.getVectorElementCount().getKnownMinValue();
   1143     }
   1144   return {SubRegIdx, InsertExtractIdx};
   1145 }
   1146 
   1147 static bool useRVVForFixedLengthVectorVT(MVT VT,
   1148                                          const RISCVSubtarget &Subtarget) {
   1149   assert(VT.isFixedLengthVector() && "Expected a fixed length vector type!");
   1150   if (!Subtarget.useRVVForFixedLengthVectors())
   1151     return false;
   1152 
   1153   // We only support a set of vector types with an equivalent number of
   1154   // elements to avoid legalization issues. Therefore -- since we don't have
   1155   // v512i8/v512i16/etc -- the longest fixed-length vector type we support has
   1156   // 256 elements.
   1157   if (VT.getVectorNumElements() > 256)
   1158     return false;
   1159 
   1160   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
   1161 
   1162   // Don't use RVV for vectors we cannot scalarize if required.
   1163   switch (VT.getVectorElementType().SimpleTy) {
   1164   // i1 is supported but has different rules.
   1165   default:
   1166     return false;
   1167   case MVT::i1:
   1168     // Masks can only use a single register.
   1169     if (VT.getVectorNumElements() > MinVLen)
   1170       return false;
   1171     MinVLen /= 8;
   1172     break;
   1173   case MVT::i8:
   1174   case MVT::i16:
   1175   case MVT::i32:
   1176   case MVT::i64:
   1177     break;
   1178   case MVT::f16:
   1179     if (!Subtarget.hasStdExtZfh())
   1180       return false;
   1181     break;
   1182   case MVT::f32:
   1183     if (!Subtarget.hasStdExtF())
   1184       return false;
   1185     break;
   1186   case MVT::f64:
   1187     if (!Subtarget.hasStdExtD())
   1188       return false;
   1189     break;
   1190   }
   1191 
   1192   unsigned LMul = divideCeil(VT.getSizeInBits(), MinVLen);
   1193   // Don't use RVV for types that don't fit.
   1194   if (LMul > Subtarget.getMaxLMULForFixedLengthVectors())
   1195     return false;
   1196 
   1197   // TODO: Perhaps an artificial restriction, but worth having whilst getting
   1198   // the base fixed length RVV support in place.
   1199   if (!VT.isPow2VectorType())
   1200     return false;
   1201 
   1202   return true;
   1203 }
   1204 
   1205 bool RISCVTargetLowering::useRVVForFixedLengthVectorVT(MVT VT) const {
   1206   return ::useRVVForFixedLengthVectorVT(VT, Subtarget);
   1207 }
   1208 
   1209 // Return the largest legal scalable vector type that matches VT's element type.
   1210 static MVT getContainerForFixedLengthVector(const TargetLowering &TLI, MVT VT,
   1211                                             const RISCVSubtarget &Subtarget) {
   1212   // This may be called before legal types are setup.
   1213   assert(((VT.isFixedLengthVector() && TLI.isTypeLegal(VT)) ||
   1214           useRVVForFixedLengthVectorVT(VT, Subtarget)) &&
   1215          "Expected legal fixed length vector!");
   1216 
   1217   unsigned MinVLen = Subtarget.getMinRVVVectorSizeInBits();
   1218 
   1219   MVT EltVT = VT.getVectorElementType();
   1220   switch (EltVT.SimpleTy) {
   1221   default:
   1222     llvm_unreachable("unexpected element type for RVV container");
   1223   case MVT::i1:
   1224   case MVT::i8:
   1225   case MVT::i16:
   1226   case MVT::i32:
   1227   case MVT::i64:
   1228   case MVT::f16:
   1229   case MVT::f32:
   1230   case MVT::f64: {
   1231     // We prefer to use LMUL=1 for VLEN sized types. Use fractional lmuls for
   1232     // narrower types, but we can't have a fractional LMUL with demoninator less
   1233     // than 64/SEW.
   1234     unsigned NumElts =
   1235         divideCeil(VT.getVectorNumElements(), MinVLen / RISCV::RVVBitsPerBlock);
   1236     return MVT::getScalableVectorVT(EltVT, NumElts);
   1237   }
   1238   }
   1239 }
   1240 
   1241 static MVT getContainerForFixedLengthVector(SelectionDAG &DAG, MVT VT,
   1242                                             const RISCVSubtarget &Subtarget) {
   1243   return getContainerForFixedLengthVector(DAG.getTargetLoweringInfo(), VT,
   1244                                           Subtarget);
   1245 }
   1246 
   1247 MVT RISCVTargetLowering::getContainerForFixedLengthVector(MVT VT) const {
   1248   return ::getContainerForFixedLengthVector(*this, VT, getSubtarget());
   1249 }
   1250 
   1251 // Grow V to consume an entire RVV register.
   1252 static SDValue convertToScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
   1253                                        const RISCVSubtarget &Subtarget) {
   1254   assert(VT.isScalableVector() &&
   1255          "Expected to convert into a scalable vector!");
   1256   assert(V.getValueType().isFixedLengthVector() &&
   1257          "Expected a fixed length vector operand!");
   1258   SDLoc DL(V);
   1259   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
   1260   return DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, DAG.getUNDEF(VT), V, Zero);
   1261 }
   1262 
   1263 // Shrink V so it's just big enough to maintain a VT's worth of data.
   1264 static SDValue convertFromScalableVector(EVT VT, SDValue V, SelectionDAG &DAG,
   1265                                          const RISCVSubtarget &Subtarget) {
   1266   assert(VT.isFixedLengthVector() &&
   1267          "Expected to convert into a fixed length vector!");
   1268   assert(V.getValueType().isScalableVector() &&
   1269          "Expected a scalable vector operand!");
   1270   SDLoc DL(V);
   1271   SDValue Zero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
   1272   return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V, Zero);
   1273 }
   1274 
   1275 // Gets the two common "VL" operands: an all-ones mask and the vector length.
   1276 // VecVT is a vector type, either fixed-length or scalable, and ContainerVT is
   1277 // the vector type that it is contained in.
   1278 static std::pair<SDValue, SDValue>
   1279 getDefaultVLOps(MVT VecVT, MVT ContainerVT, SDLoc DL, SelectionDAG &DAG,
   1280                 const RISCVSubtarget &Subtarget) {
   1281   assert(ContainerVT.isScalableVector() && "Expecting scalable container type");
   1282   MVT XLenVT = Subtarget.getXLenVT();
   1283   SDValue VL = VecVT.isFixedLengthVector()
   1284                    ? DAG.getConstant(VecVT.getVectorNumElements(), DL, XLenVT)
   1285                    : DAG.getRegister(RISCV::X0, XLenVT);
   1286   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
   1287   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
   1288   return {Mask, VL};
   1289 }
   1290 
   1291 // As above but assuming the given type is a scalable vector type.
   1292 static std::pair<SDValue, SDValue>
   1293 getDefaultScalableVLOps(MVT VecVT, SDLoc DL, SelectionDAG &DAG,
   1294                         const RISCVSubtarget &Subtarget) {
   1295   assert(VecVT.isScalableVector() && "Expecting a scalable vector");
   1296   return getDefaultVLOps(VecVT, VecVT, DL, DAG, Subtarget);
   1297 }
   1298 
   1299 // The state of RVV BUILD_VECTOR and VECTOR_SHUFFLE lowering is that very few
   1300 // of either is (currently) supported. This can get us into an infinite loop
   1301 // where we try to lower a BUILD_VECTOR as a VECTOR_SHUFFLE as a BUILD_VECTOR
   1302 // as a ..., etc.
   1303 // Until either (or both) of these can reliably lower any node, reporting that
   1304 // we don't want to expand BUILD_VECTORs via VECTOR_SHUFFLEs at least breaks
   1305 // the infinite loop. Note that this lowers BUILD_VECTOR through the stack,
   1306 // which is not desirable.
   1307 bool RISCVTargetLowering::shouldExpandBuildVectorWithShuffles(
   1308     EVT VT, unsigned DefinedValues) const {
   1309   return false;
   1310 }
   1311 
   1312 bool RISCVTargetLowering::isShuffleMaskLegal(ArrayRef<int> M, EVT VT) const {
   1313   // Only splats are currently supported.
   1314   if (ShuffleVectorSDNode::isSplatMask(M.data(), VT))
   1315     return true;
   1316 
   1317   return false;
   1318 }
   1319 
   1320 static SDValue lowerSPLAT_VECTOR(SDValue Op, SelectionDAG &DAG,
   1321                                  const RISCVSubtarget &Subtarget) {
   1322   MVT VT = Op.getSimpleValueType();
   1323   assert(VT.isFixedLengthVector() && "Unexpected vector!");
   1324 
   1325   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
   1326 
   1327   SDLoc DL(Op);
   1328   SDValue Mask, VL;
   1329   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   1330 
   1331   unsigned Opc =
   1332       VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
   1333   SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, Op.getOperand(0), VL);
   1334   return convertFromScalableVector(VT, Splat, DAG, Subtarget);
   1335 }
   1336 
   1337 static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
   1338                                  const RISCVSubtarget &Subtarget) {
   1339   MVT VT = Op.getSimpleValueType();
   1340   assert(VT.isFixedLengthVector() && "Unexpected vector!");
   1341 
   1342   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
   1343 
   1344   SDLoc DL(Op);
   1345   SDValue Mask, VL;
   1346   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   1347 
   1348   MVT XLenVT = Subtarget.getXLenVT();
   1349   unsigned NumElts = Op.getNumOperands();
   1350 
   1351   if (VT.getVectorElementType() == MVT::i1) {
   1352     if (ISD::isBuildVectorAllZeros(Op.getNode())) {
   1353       SDValue VMClr = DAG.getNode(RISCVISD::VMCLR_VL, DL, ContainerVT, VL);
   1354       return convertFromScalableVector(VT, VMClr, DAG, Subtarget);
   1355     }
   1356 
   1357     if (ISD::isBuildVectorAllOnes(Op.getNode())) {
   1358       SDValue VMSet = DAG.getNode(RISCVISD::VMSET_VL, DL, ContainerVT, VL);
   1359       return convertFromScalableVector(VT, VMSet, DAG, Subtarget);
   1360     }
   1361 
   1362     // Lower constant mask BUILD_VECTORs via an integer vector type, in
   1363     // scalar integer chunks whose bit-width depends on the number of mask
   1364     // bits and XLEN.
   1365     // First, determine the most appropriate scalar integer type to use. This
   1366     // is at most XLenVT, but may be shrunk to a smaller vector element type
   1367     // according to the size of the final vector - use i8 chunks rather than
   1368     // XLenVT if we're producing a v8i1. This results in more consistent
   1369     // codegen across RV32 and RV64.
   1370     unsigned NumViaIntegerBits =
   1371         std::min(std::max(NumElts, 8u), Subtarget.getXLen());
   1372     if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode())) {
   1373       // If we have to use more than one INSERT_VECTOR_ELT then this
   1374       // optimization is likely to increase code size; avoid peforming it in
   1375       // such a case. We can go through the stack as long as we're at least
   1376       // byte-sized.
   1377       if (DAG.shouldOptForSize() && NumElts > NumViaIntegerBits)
   1378         return SDValue();
   1379       // Now we can create our integer vector type. Note that it may be larger
   1380       // than the resulting mask type: v4i1 would use v1i8 as its integer type.
   1381       MVT IntegerViaVecVT =
   1382           MVT::getVectorVT(MVT::getIntegerVT(NumViaIntegerBits),
   1383                            divideCeil(NumElts, NumViaIntegerBits));
   1384 
   1385       uint64_t Bits = 0;
   1386       unsigned BitPos = 0, IntegerEltIdx = 0;
   1387       SDValue Vec = DAG.getUNDEF(IntegerViaVecVT);
   1388 
   1389       for (unsigned I = 0; I < NumElts; I++, BitPos++) {
   1390         // Once we accumulate enough bits to fill our scalar type, insert into
   1391         // our vector and clear our accumulated data.
   1392         if (I != 0 && I % NumViaIntegerBits == 0) {
   1393           if (NumViaIntegerBits <= 32)
   1394             Bits = SignExtend64(Bits, 32);
   1395           SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
   1396           Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec,
   1397                             Elt, DAG.getConstant(IntegerEltIdx, DL, XLenVT));
   1398           Bits = 0;
   1399           BitPos = 0;
   1400           IntegerEltIdx++;
   1401         }
   1402         SDValue V = Op.getOperand(I);
   1403         bool BitValue = !V.isUndef() && cast<ConstantSDNode>(V)->getZExtValue();
   1404         Bits |= ((uint64_t)BitValue << BitPos);
   1405       }
   1406 
   1407       // Insert the (remaining) scalar value into position in our integer
   1408       // vector type.
   1409       if (NumViaIntegerBits <= 32)
   1410         Bits = SignExtend64(Bits, 32);
   1411       SDValue Elt = DAG.getConstant(Bits, DL, XLenVT);
   1412       Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, IntegerViaVecVT, Vec, Elt,
   1413                         DAG.getConstant(IntegerEltIdx, DL, XLenVT));
   1414 
   1415       if (NumElts < NumViaIntegerBits) {
   1416         // If we're producing a smaller vector than our minimum legal integer
   1417         // type, bitcast to the equivalent (known-legal) mask type, and extract
   1418         // our final mask.
   1419         assert(IntegerViaVecVT == MVT::v1i8 && "Unexpected mask vector type");
   1420         Vec = DAG.getBitcast(MVT::v8i1, Vec);
   1421         Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Vec,
   1422                           DAG.getConstant(0, DL, XLenVT));
   1423       } else {
   1424         // Else we must have produced an integer type with the same size as the
   1425         // mask type; bitcast for the final result.
   1426         assert(VT.getSizeInBits() == IntegerViaVecVT.getSizeInBits());
   1427         Vec = DAG.getBitcast(VT, Vec);
   1428       }
   1429 
   1430       return Vec;
   1431     }
   1432 
   1433     // A BUILD_VECTOR can be lowered as a SETCC. For each fixed-length mask
   1434     // vector type, we have a legal equivalently-sized i8 type, so we can use
   1435     // that.
   1436     MVT WideVecVT = VT.changeVectorElementType(MVT::i8);
   1437     SDValue VecZero = DAG.getConstant(0, DL, WideVecVT);
   1438 
   1439     SDValue WideVec;
   1440     if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
   1441       // For a splat, perform a scalar truncate before creating the wider
   1442       // vector.
   1443       assert(Splat.getValueType() == XLenVT &&
   1444              "Unexpected type for i1 splat value");
   1445       Splat = DAG.getNode(ISD::AND, DL, XLenVT, Splat,
   1446                           DAG.getConstant(1, DL, XLenVT));
   1447       WideVec = DAG.getSplatBuildVector(WideVecVT, DL, Splat);
   1448     } else {
   1449       SmallVector<SDValue, 8> Ops(Op->op_values());
   1450       WideVec = DAG.getBuildVector(WideVecVT, DL, Ops);
   1451       SDValue VecOne = DAG.getConstant(1, DL, WideVecVT);
   1452       WideVec = DAG.getNode(ISD::AND, DL, WideVecVT, WideVec, VecOne);
   1453     }
   1454 
   1455     return DAG.getSetCC(DL, VT, WideVec, VecZero, ISD::SETNE);
   1456   }
   1457 
   1458   if (SDValue Splat = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
   1459     unsigned Opc = VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL
   1460                                         : RISCVISD::VMV_V_X_VL;
   1461     Splat = DAG.getNode(Opc, DL, ContainerVT, Splat, VL);
   1462     return convertFromScalableVector(VT, Splat, DAG, Subtarget);
   1463   }
   1464 
   1465   // Try and match an index sequence, which we can lower directly to the vid
   1466   // instruction. An all-undef vector is matched by getSplatValue, above.
   1467   if (VT.isInteger()) {
   1468     bool IsVID = true;
   1469     for (unsigned I = 0; I < NumElts && IsVID; I++)
   1470       IsVID &= Op.getOperand(I).isUndef() ||
   1471                (isa<ConstantSDNode>(Op.getOperand(I)) &&
   1472                 Op.getConstantOperandVal(I) == I);
   1473 
   1474     if (IsVID) {
   1475       SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL);
   1476       return convertFromScalableVector(VT, VID, DAG, Subtarget);
   1477     }
   1478   }
   1479 
   1480   // Attempt to detect "hidden" splats, which only reveal themselves as splats
   1481   // when re-interpreted as a vector with a larger element type. For example,
   1482   //   v4i16 = build_vector i16 0, i16 1, i16 0, i16 1
   1483   // could be instead splat as
   1484   //   v2i32 = build_vector i32 0x00010000, i32 0x00010000
   1485   // TODO: This optimization could also work on non-constant splats, but it
   1486   // would require bit-manipulation instructions to construct the splat value.
   1487   SmallVector<SDValue> Sequence;
   1488   unsigned EltBitSize = VT.getScalarSizeInBits();
   1489   const auto *BV = cast<BuildVectorSDNode>(Op);
   1490   if (VT.isInteger() && EltBitSize < 64 &&
   1491       ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) &&
   1492       BV->getRepeatedSequence(Sequence) &&
   1493       (Sequence.size() * EltBitSize) <= 64) {
   1494     unsigned SeqLen = Sequence.size();
   1495     MVT ViaIntVT = MVT::getIntegerVT(EltBitSize * SeqLen);
   1496     MVT ViaVecVT = MVT::getVectorVT(ViaIntVT, NumElts / SeqLen);
   1497     assert((ViaIntVT == MVT::i16 || ViaIntVT == MVT::i32 ||
   1498             ViaIntVT == MVT::i64) &&
   1499            "Unexpected sequence type");
   1500 
   1501     unsigned EltIdx = 0;
   1502     uint64_t EltMask = maskTrailingOnes<uint64_t>(EltBitSize);
   1503     uint64_t SplatValue = 0;
   1504     // Construct the amalgamated value which can be splatted as this larger
   1505     // vector type.
   1506     for (const auto &SeqV : Sequence) {
   1507       if (!SeqV.isUndef())
   1508         SplatValue |= ((cast<ConstantSDNode>(SeqV)->getZExtValue() & EltMask)
   1509                        << (EltIdx * EltBitSize));
   1510       EltIdx++;
   1511     }
   1512 
   1513     // On RV64, sign-extend from 32 to 64 bits where possible in order to
   1514     // achieve better constant materializion.
   1515     if (Subtarget.is64Bit() && ViaIntVT == MVT::i32)
   1516       SplatValue = SignExtend64(SplatValue, 32);
   1517 
   1518     // Since we can't introduce illegal i64 types at this stage, we can only
   1519     // perform an i64 splat on RV32 if it is its own sign-extended value. That
   1520     // way we can use RVV instructions to splat.
   1521     assert((ViaIntVT.bitsLE(XLenVT) ||
   1522             (!Subtarget.is64Bit() && ViaIntVT == MVT::i64)) &&
   1523            "Unexpected bitcast sequence");
   1524     if (ViaIntVT.bitsLE(XLenVT) || isInt<32>(SplatValue)) {
   1525       SDValue ViaVL =
   1526           DAG.getConstant(ViaVecVT.getVectorNumElements(), DL, XLenVT);
   1527       MVT ViaContainerVT =
   1528           getContainerForFixedLengthVector(DAG, ViaVecVT, Subtarget);
   1529       SDValue Splat =
   1530           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ViaContainerVT,
   1531                       DAG.getConstant(SplatValue, DL, XLenVT), ViaVL);
   1532       Splat = convertFromScalableVector(ViaVecVT, Splat, DAG, Subtarget);
   1533       return DAG.getBitcast(VT, Splat);
   1534     }
   1535   }
   1536 
   1537   // Try and optimize BUILD_VECTORs with "dominant values" - these are values
   1538   // which constitute a large proportion of the elements. In such cases we can
   1539   // splat a vector with the dominant element and make up the shortfall with
   1540   // INSERT_VECTOR_ELTs.
   1541   // Note that this includes vectors of 2 elements by association. The
   1542   // upper-most element is the "dominant" one, allowing us to use a splat to
   1543   // "insert" the upper element, and an insert of the lower element at position
   1544   // 0, which improves codegen.
   1545   SDValue DominantValue;
   1546   unsigned MostCommonCount = 0;
   1547   DenseMap<SDValue, unsigned> ValueCounts;
   1548   unsigned NumUndefElts =
   1549       count_if(Op->op_values(), [](const SDValue &V) { return V.isUndef(); });
   1550 
   1551   for (SDValue V : Op->op_values()) {
   1552     if (V.isUndef())
   1553       continue;
   1554 
   1555     ValueCounts.insert(std::make_pair(V, 0));
   1556     unsigned &Count = ValueCounts[V];
   1557 
   1558     // Is this value dominant? In case of a tie, prefer the highest element as
   1559     // it's cheaper to insert near the beginning of a vector than it is at the
   1560     // end.
   1561     if (++Count >= MostCommonCount) {
   1562       DominantValue = V;
   1563       MostCommonCount = Count;
   1564     }
   1565   }
   1566 
   1567   assert(DominantValue && "Not expecting an all-undef BUILD_VECTOR");
   1568   unsigned NumDefElts = NumElts - NumUndefElts;
   1569   unsigned DominantValueCountThreshold = NumDefElts <= 2 ? 0 : NumDefElts - 2;
   1570 
   1571   // Don't perform this optimization when optimizing for size, since
   1572   // materializing elements and inserting them tends to cause code bloat.
   1573   if (!DAG.shouldOptForSize() &&
   1574       ((MostCommonCount > DominantValueCountThreshold) ||
   1575        (ValueCounts.size() <= Log2_32(NumDefElts)))) {
   1576     // Start by splatting the most common element.
   1577     SDValue Vec = DAG.getSplatBuildVector(VT, DL, DominantValue);
   1578 
   1579     DenseSet<SDValue> Processed{DominantValue};
   1580     MVT SelMaskTy = VT.changeVectorElementType(MVT::i1);
   1581     for (const auto &OpIdx : enumerate(Op->ops())) {
   1582       const SDValue &V = OpIdx.value();
   1583       if (V.isUndef() || !Processed.insert(V).second)
   1584         continue;
   1585       if (ValueCounts[V] == 1) {
   1586         Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V,
   1587                           DAG.getConstant(OpIdx.index(), DL, XLenVT));
   1588       } else {
   1589         // Blend in all instances of this value using a VSELECT, using a
   1590         // mask where each bit signals whether that element is the one
   1591         // we're after.
   1592         SmallVector<SDValue> Ops;
   1593         transform(Op->op_values(), std::back_inserter(Ops), [&](SDValue V1) {
   1594           return DAG.getConstant(V == V1, DL, XLenVT);
   1595         });
   1596         Vec = DAG.getNode(ISD::VSELECT, DL, VT,
   1597                           DAG.getBuildVector(SelMaskTy, DL, Ops),
   1598                           DAG.getSplatBuildVector(VT, DL, V), Vec);
   1599       }
   1600     }
   1601 
   1602     return Vec;
   1603   }
   1604 
   1605   return SDValue();
   1606 }
   1607 
   1608 // Use a stack slot to splat the two i32 values in Lo/Hi to the vector desired
   1609 // vector nxvXi64 VT.
   1610 static SDValue splatPartsI64ThroughStack(const SDLoc &DL, MVT VT, SDValue Lo,
   1611                                          SDValue Hi, SDValue VL,
   1612                                          SelectionDAG &DAG) {
   1613   assert(VT.getVectorElementType() == MVT::i64 && VT.isScalableVector() &&
   1614          Lo.getValueType() == MVT::i32 && Hi.getValueType() == MVT::i32 &&
   1615          "Unexpected VTs!");
   1616   MachineFunction &MF = DAG.getMachineFunction();
   1617   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
   1618 
   1619   // We use the same frame index we use for moving two i32s into 64-bit FPR.
   1620   // This is an analogous operation.
   1621   int FI = FuncInfo->getMoveF64FrameIndex(MF);
   1622   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
   1623   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   1624   SDValue StackSlot =
   1625       DAG.getFrameIndex(FI, TLI.getPointerTy(DAG.getDataLayout()));
   1626 
   1627   SDValue Chain = DAG.getEntryNode();
   1628   Lo = DAG.getStore(Chain, DL, Lo, StackSlot, MPI, Align(8));
   1629 
   1630   SDValue OffsetSlot =
   1631       DAG.getMemBasePlusOffset(StackSlot, TypeSize::Fixed(4), DL);
   1632   Hi = DAG.getStore(Chain, DL, Hi, OffsetSlot, MPI.getWithOffset(4), Align(8));
   1633 
   1634   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
   1635 
   1636   SDVTList VTs = DAG.getVTList({VT, MVT::Other});
   1637   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, MVT::i64);
   1638   SDValue Ops[] = {Chain, IntID, StackSlot,
   1639                    DAG.getRegister(RISCV::X0, MVT::i64), VL};
   1640 
   1641   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, MVT::i64,
   1642                                  MPI, Align(8), MachineMemOperand::MOLoad);
   1643 }
   1644 
   1645 static SDValue splatPartsI64WithVL(const SDLoc &DL, MVT VT, SDValue Lo,
   1646                                    SDValue Hi, SDValue VL, SelectionDAG &DAG) {
   1647   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
   1648     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
   1649     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
   1650     // If Hi constant is all the same sign bit as Lo, lower this as a custom
   1651     // node in order to try and match RVV vector/scalar instructions.
   1652     if ((LoC >> 31) == HiC)
   1653       return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Lo, VL);
   1654   }
   1655 
   1656   // Fall back to a stack store and stride x0 vector load.
   1657   return splatPartsI64ThroughStack(DL, VT, Lo, Hi, VL, DAG);
   1658 }
   1659 
   1660 // Called by type legalization to handle splat of i64 on RV32.
   1661 // FIXME: We can optimize this when the type has sign or zero bits in one
   1662 // of the halves.
   1663 static SDValue splatSplitI64WithVL(const SDLoc &DL, MVT VT, SDValue Scalar,
   1664                                    SDValue VL, SelectionDAG &DAG) {
   1665   assert(Scalar.getValueType() == MVT::i64 && "Unexpected VT!");
   1666   SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
   1667                            DAG.getConstant(0, DL, MVT::i32));
   1668   SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
   1669                            DAG.getConstant(1, DL, MVT::i32));
   1670   return splatPartsI64WithVL(DL, VT, Lo, Hi, VL, DAG);
   1671 }
   1672 
   1673 // This function lowers a splat of a scalar operand Splat with the vector
   1674 // length VL. It ensures the final sequence is type legal, which is useful when
   1675 // lowering a splat after type legalization.
   1676 static SDValue lowerScalarSplat(SDValue Scalar, SDValue VL, MVT VT, SDLoc DL,
   1677                                 SelectionDAG &DAG,
   1678                                 const RISCVSubtarget &Subtarget) {
   1679   if (VT.isFloatingPoint())
   1680     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, VT, Scalar, VL);
   1681 
   1682   MVT XLenVT = Subtarget.getXLenVT();
   1683 
   1684   // Simplest case is that the operand needs to be promoted to XLenVT.
   1685   if (Scalar.getValueType().bitsLE(XLenVT)) {
   1686     // If the operand is a constant, sign extend to increase our chances
   1687     // of being able to use a .vi instruction. ANY_EXTEND would become a
   1688     // a zero extend and the simm5 check in isel would fail.
   1689     // FIXME: Should we ignore the upper bits in isel instead?
   1690     unsigned ExtOpc =
   1691         isa<ConstantSDNode>(Scalar) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
   1692     Scalar = DAG.getNode(ExtOpc, DL, XLenVT, Scalar);
   1693     return DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Scalar, VL);
   1694   }
   1695 
   1696   assert(XLenVT == MVT::i32 && Scalar.getValueType() == MVT::i64 &&
   1697          "Unexpected scalar for splat lowering!");
   1698 
   1699   // Otherwise use the more complicated splatting algorithm.
   1700   return splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
   1701 }
   1702 
   1703 static SDValue lowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG,
   1704                                    const RISCVSubtarget &Subtarget) {
   1705   SDValue V1 = Op.getOperand(0);
   1706   SDValue V2 = Op.getOperand(1);
   1707   SDLoc DL(Op);
   1708   MVT XLenVT = Subtarget.getXLenVT();
   1709   MVT VT = Op.getSimpleValueType();
   1710   unsigned NumElts = VT.getVectorNumElements();
   1711   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(Op.getNode());
   1712 
   1713   MVT ContainerVT = getContainerForFixedLengthVector(DAG, VT, Subtarget);
   1714 
   1715   SDValue TrueMask, VL;
   1716   std::tie(TrueMask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   1717 
   1718   if (SVN->isSplat()) {
   1719     const int Lane = SVN->getSplatIndex();
   1720     if (Lane >= 0) {
   1721       MVT SVT = VT.getVectorElementType();
   1722 
   1723       // Turn splatted vector load into a strided load with an X0 stride.
   1724       SDValue V = V1;
   1725       // Peek through CONCAT_VECTORS as VectorCombine can concat a vector
   1726       // with undef.
   1727       // FIXME: Peek through INSERT_SUBVECTOR, EXTRACT_SUBVECTOR, bitcasts?
   1728       int Offset = Lane;
   1729       if (V.getOpcode() == ISD::CONCAT_VECTORS) {
   1730         int OpElements =
   1731             V.getOperand(0).getSimpleValueType().getVectorNumElements();
   1732         V = V.getOperand(Offset / OpElements);
   1733         Offset %= OpElements;
   1734       }
   1735 
   1736       // We need to ensure the load isn't atomic or volatile.
   1737       if (ISD::isNormalLoad(V.getNode()) && cast<LoadSDNode>(V)->isSimple()) {
   1738         auto *Ld = cast<LoadSDNode>(V);
   1739         Offset *= SVT.getStoreSize();
   1740         SDValue NewAddr = DAG.getMemBasePlusOffset(Ld->getBasePtr(),
   1741                                                    TypeSize::Fixed(Offset), DL);
   1742 
   1743         // If this is SEW=64 on RV32, use a strided load with a stride of x0.
   1744         if (SVT.isInteger() && SVT.bitsGT(XLenVT)) {
   1745           SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
   1746           SDValue IntID =
   1747               DAG.getTargetConstant(Intrinsic::riscv_vlse, DL, XLenVT);
   1748           SDValue Ops[] = {Ld->getChain(), IntID, NewAddr,
   1749                            DAG.getRegister(RISCV::X0, XLenVT), VL};
   1750           SDValue NewLoad = DAG.getMemIntrinsicNode(
   1751               ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops, SVT,
   1752               DAG.getMachineFunction().getMachineMemOperand(
   1753                   Ld->getMemOperand(), Offset, SVT.getStoreSize()));
   1754           DAG.makeEquivalentMemoryOrdering(Ld, NewLoad);
   1755           return convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
   1756         }
   1757 
   1758         // Otherwise use a scalar load and splat. This will give the best
   1759         // opportunity to fold a splat into the operation. ISel can turn it into
   1760         // the x0 strided load if we aren't able to fold away the select.
   1761         if (SVT.isFloatingPoint())
   1762           V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr,
   1763                           Ld->getPointerInfo().getWithOffset(Offset),
   1764                           Ld->getOriginalAlign(),
   1765                           Ld->getMemOperand()->getFlags());
   1766         else
   1767           V = DAG.getExtLoad(ISD::SEXTLOAD, DL, XLenVT, Ld->getChain(), NewAddr,
   1768                              Ld->getPointerInfo().getWithOffset(Offset), SVT,
   1769                              Ld->getOriginalAlign(),
   1770                              Ld->getMemOperand()->getFlags());
   1771         DAG.makeEquivalentMemoryOrdering(Ld, V);
   1772 
   1773         unsigned Opc =
   1774             VT.isFloatingPoint() ? RISCVISD::VFMV_V_F_VL : RISCVISD::VMV_V_X_VL;
   1775         SDValue Splat = DAG.getNode(Opc, DL, ContainerVT, V, VL);
   1776         return convertFromScalableVector(VT, Splat, DAG, Subtarget);
   1777       }
   1778 
   1779       V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
   1780       assert(Lane < (int)NumElts && "Unexpected lane!");
   1781       SDValue Gather =
   1782           DAG.getNode(RISCVISD::VRGATHER_VX_VL, DL, ContainerVT, V1,
   1783                       DAG.getConstant(Lane, DL, XLenVT), TrueMask, VL);
   1784       return convertFromScalableVector(VT, Gather, DAG, Subtarget);
   1785     }
   1786   }
   1787 
   1788   // Detect shuffles which can be re-expressed as vector selects; these are
   1789   // shuffles in which each element in the destination is taken from an element
   1790   // at the corresponding index in either source vectors.
   1791   bool IsSelect = all_of(enumerate(SVN->getMask()), [&](const auto &MaskIdx) {
   1792     int MaskIndex = MaskIdx.value();
   1793     return MaskIndex < 0 || MaskIdx.index() == (unsigned)MaskIndex % NumElts;
   1794   });
   1795 
   1796   assert(!V1.isUndef() && "Unexpected shuffle canonicalization");
   1797 
   1798   SmallVector<SDValue> MaskVals;
   1799   // As a backup, shuffles can be lowered via a vrgather instruction, possibly
   1800   // merged with a second vrgather.
   1801   SmallVector<SDValue> GatherIndicesLHS, GatherIndicesRHS;
   1802 
   1803   // By default we preserve the original operand order, and use a mask to
   1804   // select LHS as true and RHS as false. However, since RVV vector selects may
   1805   // feature splats but only on the LHS, we may choose to invert our mask and
   1806   // instead select between RHS and LHS.
   1807   bool SwapOps = DAG.isSplatValue(V2) && !DAG.isSplatValue(V1);
   1808   bool InvertMask = IsSelect == SwapOps;
   1809 
   1810   // Now construct the mask that will be used by the vselect or blended
   1811   // vrgather operation. For vrgathers, construct the appropriate indices into
   1812   // each vector.
   1813   for (int MaskIndex : SVN->getMask()) {
   1814     bool SelectMaskVal = (MaskIndex < (int)NumElts) ^ InvertMask;
   1815     MaskVals.push_back(DAG.getConstant(SelectMaskVal, DL, XLenVT));
   1816     if (!IsSelect) {
   1817       bool IsLHS = MaskIndex < (int)NumElts;
   1818       // For "undef" elements of -1, shuffle in element 0 instead.
   1819       GatherIndicesLHS.push_back(
   1820           DAG.getConstant(IsLHS ? std::max(MaskIndex, 0) : 0, DL, XLenVT));
   1821       // TODO: If we're masking out unused elements anyway, it might produce
   1822       // better code if we use the most-common element index instead of 0.
   1823       GatherIndicesRHS.push_back(
   1824           DAG.getConstant(IsLHS ? 0 : MaskIndex - NumElts, DL, XLenVT));
   1825     }
   1826   }
   1827 
   1828   if (SwapOps) {
   1829     std::swap(V1, V2);
   1830     std::swap(GatherIndicesLHS, GatherIndicesRHS);
   1831   }
   1832 
   1833   assert(MaskVals.size() == NumElts && "Unexpected select-like shuffle");
   1834   MVT MaskVT = MVT::getVectorVT(MVT::i1, NumElts);
   1835   SDValue SelectMask = DAG.getBuildVector(MaskVT, DL, MaskVals);
   1836 
   1837   if (IsSelect)
   1838     return DAG.getNode(ISD::VSELECT, DL, VT, SelectMask, V1, V2);
   1839 
   1840   if (VT.getScalarSizeInBits() == 8 && VT.getVectorNumElements() > 256) {
   1841     // On such a large vector we're unable to use i8 as the index type.
   1842     // FIXME: We could promote the index to i16 and use vrgatherei16, but that
   1843     // may involve vector splitting if we're already at LMUL=8, or our
   1844     // user-supplied maximum fixed-length LMUL.
   1845     return SDValue();
   1846   }
   1847 
   1848   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
   1849   MVT IndexVT = VT.changeTypeToInteger();
   1850   // Since we can't introduce illegal index types at this stage, use i16 and
   1851   // vrgatherei16 if the corresponding index type for plain vrgather is greater
   1852   // than XLenVT.
   1853   if (IndexVT.getScalarType().bitsGT(XLenVT)) {
   1854     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
   1855     IndexVT = IndexVT.changeVectorElementType(MVT::i16);
   1856   }
   1857 
   1858   MVT IndexContainerVT =
   1859       ContainerVT.changeVectorElementType(IndexVT.getScalarType());
   1860 
   1861   SDValue Gather;
   1862   // TODO: This doesn't trigger for i64 vectors on RV32, since there we
   1863   // encounter a bitcasted BUILD_VECTOR with low/high i32 values.
   1864   if (SDValue SplatValue = DAG.getSplatValue(V1, /*LegalTypes*/ true)) {
   1865     Gather = lowerScalarSplat(SplatValue, VL, ContainerVT, DL, DAG, Subtarget);
   1866   } else {
   1867     SDValue LHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesLHS);
   1868     LHSIndices =
   1869         convertToScalableVector(IndexContainerVT, LHSIndices, DAG, Subtarget);
   1870 
   1871     V1 = convertToScalableVector(ContainerVT, V1, DAG, Subtarget);
   1872     Gather =
   1873         DAG.getNode(GatherOpc, DL, ContainerVT, V1, LHSIndices, TrueMask, VL);
   1874   }
   1875 
   1876   // If a second vector operand is used by this shuffle, blend it in with an
   1877   // additional vrgather.
   1878   if (!V2.isUndef()) {
   1879     MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
   1880     SelectMask =
   1881         convertToScalableVector(MaskContainerVT, SelectMask, DAG, Subtarget);
   1882 
   1883     SDValue RHSIndices = DAG.getBuildVector(IndexVT, DL, GatherIndicesRHS);
   1884     RHSIndices =
   1885         convertToScalableVector(IndexContainerVT, RHSIndices, DAG, Subtarget);
   1886 
   1887     V2 = convertToScalableVector(ContainerVT, V2, DAG, Subtarget);
   1888     V2 = DAG.getNode(GatherOpc, DL, ContainerVT, V2, RHSIndices, TrueMask, VL);
   1889     Gather = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, SelectMask, V2,
   1890                          Gather, VL);
   1891   }
   1892 
   1893   return convertFromScalableVector(VT, Gather, DAG, Subtarget);
   1894 }
   1895 
   1896 static SDValue getRVVFPExtendOrRound(SDValue Op, MVT VT, MVT ContainerVT,
   1897                                      SDLoc DL, SelectionDAG &DAG,
   1898                                      const RISCVSubtarget &Subtarget) {
   1899   if (VT.isScalableVector())
   1900     return DAG.getFPExtendOrRound(Op, DL, VT);
   1901   assert(VT.isFixedLengthVector() &&
   1902          "Unexpected value type for RVV FP extend/round lowering");
   1903   SDValue Mask, VL;
   1904   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   1905   unsigned RVVOpc = ContainerVT.bitsGT(Op.getSimpleValueType())
   1906                         ? RISCVISD::FP_EXTEND_VL
   1907                         : RISCVISD::FP_ROUND_VL;
   1908   return DAG.getNode(RVVOpc, DL, ContainerVT, Op, Mask, VL);
   1909 }
   1910 
   1911 SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
   1912                                             SelectionDAG &DAG) const {
   1913   switch (Op.getOpcode()) {
   1914   default:
   1915     report_fatal_error("unimplemented operand");
   1916   case ISD::GlobalAddress:
   1917     return lowerGlobalAddress(Op, DAG);
   1918   case ISD::BlockAddress:
   1919     return lowerBlockAddress(Op, DAG);
   1920   case ISD::ConstantPool:
   1921     return lowerConstantPool(Op, DAG);
   1922   case ISD::JumpTable:
   1923     return lowerJumpTable(Op, DAG);
   1924   case ISD::GlobalTLSAddress:
   1925     return lowerGlobalTLSAddress(Op, DAG);
   1926   case ISD::SELECT:
   1927     return lowerSELECT(Op, DAG);
   1928   case ISD::BRCOND:
   1929     return lowerBRCOND(Op, DAG);
   1930   case ISD::VASTART:
   1931     return lowerVASTART(Op, DAG);
   1932   case ISD::FRAMEADDR:
   1933     return lowerFRAMEADDR(Op, DAG);
   1934   case ISD::RETURNADDR:
   1935     return lowerRETURNADDR(Op, DAG);
   1936   case ISD::SHL_PARTS:
   1937     return lowerShiftLeftParts(Op, DAG);
   1938   case ISD::SRA_PARTS:
   1939     return lowerShiftRightParts(Op, DAG, true);
   1940   case ISD::SRL_PARTS:
   1941     return lowerShiftRightParts(Op, DAG, false);
   1942   case ISD::BITCAST: {
   1943     SDLoc DL(Op);
   1944     EVT VT = Op.getValueType();
   1945     SDValue Op0 = Op.getOperand(0);
   1946     EVT Op0VT = Op0.getValueType();
   1947     MVT XLenVT = Subtarget.getXLenVT();
   1948     if (VT.isFixedLengthVector()) {
   1949       // We can handle fixed length vector bitcasts with a simple replacement
   1950       // in isel.
   1951       if (Op0VT.isFixedLengthVector())
   1952         return Op;
   1953       // When bitcasting from scalar to fixed-length vector, insert the scalar
   1954       // into a one-element vector of the result type, and perform a vector
   1955       // bitcast.
   1956       if (!Op0VT.isVector()) {
   1957         auto BVT = EVT::getVectorVT(*DAG.getContext(), Op0VT, 1);
   1958         return DAG.getBitcast(VT, DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, BVT,
   1959                                               DAG.getUNDEF(BVT), Op0,
   1960                                               DAG.getConstant(0, DL, XLenVT)));
   1961       }
   1962       return SDValue();
   1963     }
   1964     // Custom-legalize bitcasts from fixed-length vector types to scalar types
   1965     // thus: bitcast the vector to a one-element vector type whose element type
   1966     // is the same as the result type, and extract the first element.
   1967     if (!VT.isVector() && Op0VT.isFixedLengthVector()) {
   1968       LLVMContext &Context = *DAG.getContext();
   1969       SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
   1970       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
   1971                          DAG.getConstant(0, DL, XLenVT));
   1972     }
   1973     if (VT == MVT::f16 && Op0VT == MVT::i16 && Subtarget.hasStdExtZfh()) {
   1974       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Op0);
   1975       SDValue FPConv = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, NewOp0);
   1976       return FPConv;
   1977     }
   1978     if (VT == MVT::f32 && Op0VT == MVT::i32 && Subtarget.is64Bit() &&
   1979         Subtarget.hasStdExtF()) {
   1980       SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, Op0);
   1981       SDValue FPConv =
   1982           DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, NewOp0);
   1983       return FPConv;
   1984     }
   1985     return SDValue();
   1986   }
   1987   case ISD::INTRINSIC_WO_CHAIN:
   1988     return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   1989   case ISD::INTRINSIC_W_CHAIN:
   1990     return LowerINTRINSIC_W_CHAIN(Op, DAG);
   1991   case ISD::BSWAP:
   1992   case ISD::BITREVERSE: {
   1993     // Convert BSWAP/BITREVERSE to GREVI to enable GREVI combinining.
   1994     assert(Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
   1995     MVT VT = Op.getSimpleValueType();
   1996     SDLoc DL(Op);
   1997     // Start with the maximum immediate value which is the bitwidth - 1.
   1998     unsigned Imm = VT.getSizeInBits() - 1;
   1999     // If this is BSWAP rather than BITREVERSE, clear the lower 3 bits.
   2000     if (Op.getOpcode() == ISD::BSWAP)
   2001       Imm &= ~0x7U;
   2002     return DAG.getNode(RISCVISD::GREV, DL, VT, Op.getOperand(0),
   2003                        DAG.getConstant(Imm, DL, VT));
   2004   }
   2005   case ISD::FSHL:
   2006   case ISD::FSHR: {
   2007     MVT VT = Op.getSimpleValueType();
   2008     assert(VT == Subtarget.getXLenVT() && "Unexpected custom legalization");
   2009     SDLoc DL(Op);
   2010     if (Op.getOperand(2).getOpcode() == ISD::Constant)
   2011       return Op;
   2012     // FSL/FSR take a log2(XLen)+1 bit shift amount but XLenVT FSHL/FSHR only
   2013     // use log(XLen) bits. Mask the shift amount accordingly.
   2014     unsigned ShAmtWidth = Subtarget.getXLen() - 1;
   2015     SDValue ShAmt = DAG.getNode(ISD::AND, DL, VT, Op.getOperand(2),
   2016                                 DAG.getConstant(ShAmtWidth, DL, VT));
   2017     unsigned Opc = Op.getOpcode() == ISD::FSHL ? RISCVISD::FSL : RISCVISD::FSR;
   2018     return DAG.getNode(Opc, DL, VT, Op.getOperand(0), Op.getOperand(1), ShAmt);
   2019   }
   2020   case ISD::TRUNCATE: {
   2021     SDLoc DL(Op);
   2022     MVT VT = Op.getSimpleValueType();
   2023     // Only custom-lower vector truncates
   2024     if (!VT.isVector())
   2025       return Op;
   2026 
   2027     // Truncates to mask types are handled differently
   2028     if (VT.getVectorElementType() == MVT::i1)
   2029       return lowerVectorMaskTrunc(Op, DAG);
   2030 
   2031     // RVV only has truncates which operate from SEW*2->SEW, so lower arbitrary
   2032     // truncates as a series of "RISCVISD::TRUNCATE_VECTOR_VL" nodes which
   2033     // truncate by one power of two at a time.
   2034     MVT DstEltVT = VT.getVectorElementType();
   2035 
   2036     SDValue Src = Op.getOperand(0);
   2037     MVT SrcVT = Src.getSimpleValueType();
   2038     MVT SrcEltVT = SrcVT.getVectorElementType();
   2039 
   2040     assert(DstEltVT.bitsLT(SrcEltVT) &&
   2041            isPowerOf2_64(DstEltVT.getSizeInBits()) &&
   2042            isPowerOf2_64(SrcEltVT.getSizeInBits()) &&
   2043            "Unexpected vector truncate lowering");
   2044 
   2045     MVT ContainerVT = SrcVT;
   2046     if (SrcVT.isFixedLengthVector()) {
   2047       ContainerVT = getContainerForFixedLengthVector(SrcVT);
   2048       Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
   2049     }
   2050 
   2051     SDValue Result = Src;
   2052     SDValue Mask, VL;
   2053     std::tie(Mask, VL) =
   2054         getDefaultVLOps(SrcVT, ContainerVT, DL, DAG, Subtarget);
   2055     LLVMContext &Context = *DAG.getContext();
   2056     const ElementCount Count = ContainerVT.getVectorElementCount();
   2057     do {
   2058       SrcEltVT = MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2);
   2059       EVT ResultVT = EVT::getVectorVT(Context, SrcEltVT, Count);
   2060       Result = DAG.getNode(RISCVISD::TRUNCATE_VECTOR_VL, DL, ResultVT, Result,
   2061                            Mask, VL);
   2062     } while (SrcEltVT != DstEltVT);
   2063 
   2064     if (SrcVT.isFixedLengthVector())
   2065       Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
   2066 
   2067     return Result;
   2068   }
   2069   case ISD::ANY_EXTEND:
   2070   case ISD::ZERO_EXTEND:
   2071     if (Op.getOperand(0).getValueType().isVector() &&
   2072         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
   2073       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ 1);
   2074     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VZEXT_VL);
   2075   case ISD::SIGN_EXTEND:
   2076     if (Op.getOperand(0).getValueType().isVector() &&
   2077         Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
   2078       return lowerVectorMaskExt(Op, DAG, /*ExtVal*/ -1);
   2079     return lowerFixedLengthVectorExtendToRVV(Op, DAG, RISCVISD::VSEXT_VL);
   2080   case ISD::SPLAT_VECTOR_PARTS:
   2081     return lowerSPLAT_VECTOR_PARTS(Op, DAG);
   2082   case ISD::INSERT_VECTOR_ELT:
   2083     return lowerINSERT_VECTOR_ELT(Op, DAG);
   2084   case ISD::EXTRACT_VECTOR_ELT:
   2085     return lowerEXTRACT_VECTOR_ELT(Op, DAG);
   2086   case ISD::VSCALE: {
   2087     MVT VT = Op.getSimpleValueType();
   2088     SDLoc DL(Op);
   2089     SDValue VLENB = DAG.getNode(RISCVISD::READ_VLENB, DL, VT);
   2090     // We define our scalable vector types for lmul=1 to use a 64 bit known
   2091     // minimum size. e.g. <vscale x 2 x i32>. VLENB is in bytes so we calculate
   2092     // vscale as VLENB / 8.
   2093     assert(RISCV::RVVBitsPerBlock == 64 && "Unexpected bits per block!");
   2094     SDValue VScale = DAG.getNode(ISD::SRL, DL, VT, VLENB,
   2095                                  DAG.getConstant(3, DL, VT));
   2096     return DAG.getNode(ISD::MUL, DL, VT, VScale, Op.getOperand(0));
   2097   }
   2098   case ISD::FP_EXTEND: {
   2099     // RVV can only do fp_extend to types double the size as the source. We
   2100     // custom-lower f16->f64 extensions to two hops of ISD::FP_EXTEND, going
   2101     // via f32.
   2102     SDLoc DL(Op);
   2103     MVT VT = Op.getSimpleValueType();
   2104     SDValue Src = Op.getOperand(0);
   2105     MVT SrcVT = Src.getSimpleValueType();
   2106 
   2107     // Prepare any fixed-length vector operands.
   2108     MVT ContainerVT = VT;
   2109     if (SrcVT.isFixedLengthVector()) {
   2110       ContainerVT = getContainerForFixedLengthVector(VT);
   2111       MVT SrcContainerVT =
   2112           ContainerVT.changeVectorElementType(SrcVT.getVectorElementType());
   2113       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
   2114     }
   2115 
   2116     if (!VT.isVector() || VT.getVectorElementType() != MVT::f64 ||
   2117         SrcVT.getVectorElementType() != MVT::f16) {
   2118       // For scalable vectors, we only need to close the gap between
   2119       // vXf16->vXf64.
   2120       if (!VT.isFixedLengthVector())
   2121         return Op;
   2122       // For fixed-length vectors, lower the FP_EXTEND to a custom "VL" version.
   2123       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
   2124       return convertFromScalableVector(VT, Src, DAG, Subtarget);
   2125     }
   2126 
   2127     MVT InterVT = VT.changeVectorElementType(MVT::f32);
   2128     MVT InterContainerVT = ContainerVT.changeVectorElementType(MVT::f32);
   2129     SDValue IntermediateExtend = getRVVFPExtendOrRound(
   2130         Src, InterVT, InterContainerVT, DL, DAG, Subtarget);
   2131 
   2132     SDValue Extend = getRVVFPExtendOrRound(IntermediateExtend, VT, ContainerVT,
   2133                                            DL, DAG, Subtarget);
   2134     if (VT.isFixedLengthVector())
   2135       return convertFromScalableVector(VT, Extend, DAG, Subtarget);
   2136     return Extend;
   2137   }
   2138   case ISD::FP_ROUND: {
   2139     // RVV can only do fp_round to types half the size as the source. We
   2140     // custom-lower f64->f16 rounds via RVV's round-to-odd float
   2141     // conversion instruction.
   2142     SDLoc DL(Op);
   2143     MVT VT = Op.getSimpleValueType();
   2144     SDValue Src = Op.getOperand(0);
   2145     MVT SrcVT = Src.getSimpleValueType();
   2146 
   2147     // Prepare any fixed-length vector operands.
   2148     MVT ContainerVT = VT;
   2149     if (VT.isFixedLengthVector()) {
   2150       MVT SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
   2151       ContainerVT =
   2152           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
   2153       Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
   2154     }
   2155 
   2156     if (!VT.isVector() || VT.getVectorElementType() != MVT::f16 ||
   2157         SrcVT.getVectorElementType() != MVT::f64) {
   2158       // For scalable vectors, we only need to close the gap between
   2159       // vXf64<->vXf16.
   2160       if (!VT.isFixedLengthVector())
   2161         return Op;
   2162       // For fixed-length vectors, lower the FP_ROUND to a custom "VL" version.
   2163       Src = getRVVFPExtendOrRound(Src, VT, ContainerVT, DL, DAG, Subtarget);
   2164       return convertFromScalableVector(VT, Src, DAG, Subtarget);
   2165     }
   2166 
   2167     SDValue Mask, VL;
   2168     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   2169 
   2170     MVT InterVT = ContainerVT.changeVectorElementType(MVT::f32);
   2171     SDValue IntermediateRound =
   2172         DAG.getNode(RISCVISD::VFNCVT_ROD_VL, DL, InterVT, Src, Mask, VL);
   2173     SDValue Round = getRVVFPExtendOrRound(IntermediateRound, VT, ContainerVT,
   2174                                           DL, DAG, Subtarget);
   2175 
   2176     if (VT.isFixedLengthVector())
   2177       return convertFromScalableVector(VT, Round, DAG, Subtarget);
   2178     return Round;
   2179   }
   2180   case ISD::FP_TO_SINT:
   2181   case ISD::FP_TO_UINT:
   2182   case ISD::SINT_TO_FP:
   2183   case ISD::UINT_TO_FP: {
   2184     // RVV can only do fp<->int conversions to types half/double the size as
   2185     // the source. We custom-lower any conversions that do two hops into
   2186     // sequences.
   2187     MVT VT = Op.getSimpleValueType();
   2188     if (!VT.isVector())
   2189       return Op;
   2190     SDLoc DL(Op);
   2191     SDValue Src = Op.getOperand(0);
   2192     MVT EltVT = VT.getVectorElementType();
   2193     MVT SrcVT = Src.getSimpleValueType();
   2194     MVT SrcEltVT = SrcVT.getVectorElementType();
   2195     unsigned EltSize = EltVT.getSizeInBits();
   2196     unsigned SrcEltSize = SrcEltVT.getSizeInBits();
   2197     assert(isPowerOf2_32(EltSize) && isPowerOf2_32(SrcEltSize) &&
   2198            "Unexpected vector element types");
   2199 
   2200     bool IsInt2FP = SrcEltVT.isInteger();
   2201     // Widening conversions
   2202     if (EltSize > SrcEltSize && (EltSize / SrcEltSize >= 4)) {
   2203       if (IsInt2FP) {
   2204         // Do a regular integer sign/zero extension then convert to float.
   2205         MVT IVecVT = MVT::getVectorVT(MVT::getIntegerVT(EltVT.getSizeInBits()),
   2206                                       VT.getVectorElementCount());
   2207         unsigned ExtOpcode = Op.getOpcode() == ISD::UINT_TO_FP
   2208                                  ? ISD::ZERO_EXTEND
   2209                                  : ISD::SIGN_EXTEND;
   2210         SDValue Ext = DAG.getNode(ExtOpcode, DL, IVecVT, Src);
   2211         return DAG.getNode(Op.getOpcode(), DL, VT, Ext);
   2212       }
   2213       // FP2Int
   2214       assert(SrcEltVT == MVT::f16 && "Unexpected FP_TO_[US]INT lowering");
   2215       // Do one doubling fp_extend then complete the operation by converting
   2216       // to int.
   2217       MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
   2218       SDValue FExt = DAG.getFPExtendOrRound(Src, DL, InterimFVT);
   2219       return DAG.getNode(Op.getOpcode(), DL, VT, FExt);
   2220     }
   2221 
   2222     // Narrowing conversions
   2223     if (SrcEltSize > EltSize && (SrcEltSize / EltSize >= 4)) {
   2224       if (IsInt2FP) {
   2225         // One narrowing int_to_fp, then an fp_round.
   2226         assert(EltVT == MVT::f16 && "Unexpected [US]_TO_FP lowering");
   2227         MVT InterimFVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
   2228         SDValue Int2FP = DAG.getNode(Op.getOpcode(), DL, InterimFVT, Src);
   2229         return DAG.getFPExtendOrRound(Int2FP, DL, VT);
   2230       }
   2231       // FP2Int
   2232       // One narrowing fp_to_int, then truncate the integer. If the float isn't
   2233       // representable by the integer, the result is poison.
   2234       MVT IVecVT =
   2235           MVT::getVectorVT(MVT::getIntegerVT(SrcEltVT.getSizeInBits() / 2),
   2236                            VT.getVectorElementCount());
   2237       SDValue FP2Int = DAG.getNode(Op.getOpcode(), DL, IVecVT, Src);
   2238       return DAG.getNode(ISD::TRUNCATE, DL, VT, FP2Int);
   2239     }
   2240 
   2241     // Scalable vectors can exit here. Patterns will handle equally-sized
   2242     // conversions halving/doubling ones.
   2243     if (!VT.isFixedLengthVector())
   2244       return Op;
   2245 
   2246     // For fixed-length vectors we lower to a custom "VL" node.
   2247     unsigned RVVOpc = 0;
   2248     switch (Op.getOpcode()) {
   2249     default:
   2250       llvm_unreachable("Impossible opcode");
   2251     case ISD::FP_TO_SINT:
   2252       RVVOpc = RISCVISD::FP_TO_SINT_VL;
   2253       break;
   2254     case ISD::FP_TO_UINT:
   2255       RVVOpc = RISCVISD::FP_TO_UINT_VL;
   2256       break;
   2257     case ISD::SINT_TO_FP:
   2258       RVVOpc = RISCVISD::SINT_TO_FP_VL;
   2259       break;
   2260     case ISD::UINT_TO_FP:
   2261       RVVOpc = RISCVISD::UINT_TO_FP_VL;
   2262       break;
   2263     }
   2264 
   2265     MVT ContainerVT, SrcContainerVT;
   2266     // Derive the reference container type from the larger vector type.
   2267     if (SrcEltSize > EltSize) {
   2268       SrcContainerVT = getContainerForFixedLengthVector(SrcVT);
   2269       ContainerVT =
   2270           SrcContainerVT.changeVectorElementType(VT.getVectorElementType());
   2271     } else {
   2272       ContainerVT = getContainerForFixedLengthVector(VT);
   2273       SrcContainerVT = ContainerVT.changeVectorElementType(SrcEltVT);
   2274     }
   2275 
   2276     SDValue Mask, VL;
   2277     std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   2278 
   2279     Src = convertToScalableVector(SrcContainerVT, Src, DAG, Subtarget);
   2280     Src = DAG.getNode(RVVOpc, DL, ContainerVT, Src, Mask, VL);
   2281     return convertFromScalableVector(VT, Src, DAG, Subtarget);
   2282   }
   2283   case ISD::VECREDUCE_ADD:
   2284   case ISD::VECREDUCE_UMAX:
   2285   case ISD::VECREDUCE_SMAX:
   2286   case ISD::VECREDUCE_UMIN:
   2287   case ISD::VECREDUCE_SMIN:
   2288     return lowerVECREDUCE(Op, DAG);
   2289   case ISD::VECREDUCE_AND:
   2290   case ISD::VECREDUCE_OR:
   2291   case ISD::VECREDUCE_XOR:
   2292     if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i1)
   2293       return lowerVectorMaskVECREDUCE(Op, DAG);
   2294     return lowerVECREDUCE(Op, DAG);
   2295   case ISD::VECREDUCE_FADD:
   2296   case ISD::VECREDUCE_SEQ_FADD:
   2297   case ISD::VECREDUCE_FMIN:
   2298   case ISD::VECREDUCE_FMAX:
   2299     return lowerFPVECREDUCE(Op, DAG);
   2300   case ISD::INSERT_SUBVECTOR:
   2301     return lowerINSERT_SUBVECTOR(Op, DAG);
   2302   case ISD::EXTRACT_SUBVECTOR:
   2303     return lowerEXTRACT_SUBVECTOR(Op, DAG);
   2304   case ISD::STEP_VECTOR:
   2305     return lowerSTEP_VECTOR(Op, DAG);
   2306   case ISD::VECTOR_REVERSE:
   2307     return lowerVECTOR_REVERSE(Op, DAG);
   2308   case ISD::BUILD_VECTOR:
   2309     return lowerBUILD_VECTOR(Op, DAG, Subtarget);
   2310   case ISD::SPLAT_VECTOR:
   2311     if (Op.getValueType().getVectorElementType() == MVT::i1)
   2312       return lowerVectorMaskSplat(Op, DAG);
   2313     return lowerSPLAT_VECTOR(Op, DAG, Subtarget);
   2314   case ISD::VECTOR_SHUFFLE:
   2315     return lowerVECTOR_SHUFFLE(Op, DAG, Subtarget);
   2316   case ISD::CONCAT_VECTORS: {
   2317     // Split CONCAT_VECTORS into a series of INSERT_SUBVECTOR nodes. This is
   2318     // better than going through the stack, as the default expansion does.
   2319     SDLoc DL(Op);
   2320     MVT VT = Op.getSimpleValueType();
   2321     unsigned NumOpElts =
   2322         Op.getOperand(0).getSimpleValueType().getVectorMinNumElements();
   2323     SDValue Vec = DAG.getUNDEF(VT);
   2324     for (const auto &OpIdx : enumerate(Op->ops()))
   2325       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT, Vec, OpIdx.value(),
   2326                         DAG.getIntPtrConstant(OpIdx.index() * NumOpElts, DL));
   2327     return Vec;
   2328   }
   2329   case ISD::LOAD:
   2330     return lowerFixedLengthVectorLoadToRVV(Op, DAG);
   2331   case ISD::STORE:
   2332     return lowerFixedLengthVectorStoreToRVV(Op, DAG);
   2333   case ISD::MLOAD:
   2334     return lowerMLOAD(Op, DAG);
   2335   case ISD::MSTORE:
   2336     return lowerMSTORE(Op, DAG);
   2337   case ISD::SETCC:
   2338     return lowerFixedLengthVectorSetccToRVV(Op, DAG);
   2339   case ISD::ADD:
   2340     return lowerToScalableOp(Op, DAG, RISCVISD::ADD_VL);
   2341   case ISD::SUB:
   2342     return lowerToScalableOp(Op, DAG, RISCVISD::SUB_VL);
   2343   case ISD::MUL:
   2344     return lowerToScalableOp(Op, DAG, RISCVISD::MUL_VL);
   2345   case ISD::MULHS:
   2346     return lowerToScalableOp(Op, DAG, RISCVISD::MULHS_VL);
   2347   case ISD::MULHU:
   2348     return lowerToScalableOp(Op, DAG, RISCVISD::MULHU_VL);
   2349   case ISD::AND:
   2350     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMAND_VL,
   2351                                               RISCVISD::AND_VL);
   2352   case ISD::OR:
   2353     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMOR_VL,
   2354                                               RISCVISD::OR_VL);
   2355   case ISD::XOR:
   2356     return lowerFixedLengthVectorLogicOpToRVV(Op, DAG, RISCVISD::VMXOR_VL,
   2357                                               RISCVISD::XOR_VL);
   2358   case ISD::SDIV:
   2359     return lowerToScalableOp(Op, DAG, RISCVISD::SDIV_VL);
   2360   case ISD::SREM:
   2361     return lowerToScalableOp(Op, DAG, RISCVISD::SREM_VL);
   2362   case ISD::UDIV:
   2363     return lowerToScalableOp(Op, DAG, RISCVISD::UDIV_VL);
   2364   case ISD::UREM:
   2365     return lowerToScalableOp(Op, DAG, RISCVISD::UREM_VL);
   2366   case ISD::SHL:
   2367     return lowerToScalableOp(Op, DAG, RISCVISD::SHL_VL);
   2368   case ISD::SRA:
   2369     return lowerToScalableOp(Op, DAG, RISCVISD::SRA_VL);
   2370   case ISD::SRL:
   2371     return lowerToScalableOp(Op, DAG, RISCVISD::SRL_VL);
   2372   case ISD::FADD:
   2373     return lowerToScalableOp(Op, DAG, RISCVISD::FADD_VL);
   2374   case ISD::FSUB:
   2375     return lowerToScalableOp(Op, DAG, RISCVISD::FSUB_VL);
   2376   case ISD::FMUL:
   2377     return lowerToScalableOp(Op, DAG, RISCVISD::FMUL_VL);
   2378   case ISD::FDIV:
   2379     return lowerToScalableOp(Op, DAG, RISCVISD::FDIV_VL);
   2380   case ISD::FNEG:
   2381     return lowerToScalableOp(Op, DAG, RISCVISD::FNEG_VL);
   2382   case ISD::FABS:
   2383     return lowerToScalableOp(Op, DAG, RISCVISD::FABS_VL);
   2384   case ISD::FSQRT:
   2385     return lowerToScalableOp(Op, DAG, RISCVISD::FSQRT_VL);
   2386   case ISD::FMA:
   2387     return lowerToScalableOp(Op, DAG, RISCVISD::FMA_VL);
   2388   case ISD::SMIN:
   2389     return lowerToScalableOp(Op, DAG, RISCVISD::SMIN_VL);
   2390   case ISD::SMAX:
   2391     return lowerToScalableOp(Op, DAG, RISCVISD::SMAX_VL);
   2392   case ISD::UMIN:
   2393     return lowerToScalableOp(Op, DAG, RISCVISD::UMIN_VL);
   2394   case ISD::UMAX:
   2395     return lowerToScalableOp(Op, DAG, RISCVISD::UMAX_VL);
   2396   case ISD::FMINNUM:
   2397     return lowerToScalableOp(Op, DAG, RISCVISD::FMINNUM_VL);
   2398   case ISD::FMAXNUM:
   2399     return lowerToScalableOp(Op, DAG, RISCVISD::FMAXNUM_VL);
   2400   case ISD::ABS:
   2401     return lowerABS(Op, DAG);
   2402   case ISD::VSELECT:
   2403     return lowerFixedLengthVectorSelectToRVV(Op, DAG);
   2404   case ISD::FCOPYSIGN:
   2405     return lowerFixedLengthVectorFCOPYSIGNToRVV(Op, DAG);
   2406   case ISD::MGATHER:
   2407     return lowerMGATHER(Op, DAG);
   2408   case ISD::MSCATTER:
   2409     return lowerMSCATTER(Op, DAG);
   2410   case ISD::FLT_ROUNDS_:
   2411     return lowerGET_ROUNDING(Op, DAG);
   2412   case ISD::SET_ROUNDING:
   2413     return lowerSET_ROUNDING(Op, DAG);
   2414   case ISD::VP_ADD:
   2415     return lowerVPOp(Op, DAG, RISCVISD::ADD_VL);
   2416   case ISD::VP_SUB:
   2417     return lowerVPOp(Op, DAG, RISCVISD::SUB_VL);
   2418   case ISD::VP_MUL:
   2419     return lowerVPOp(Op, DAG, RISCVISD::MUL_VL);
   2420   case ISD::VP_SDIV:
   2421     return lowerVPOp(Op, DAG, RISCVISD::SDIV_VL);
   2422   case ISD::VP_UDIV:
   2423     return lowerVPOp(Op, DAG, RISCVISD::UDIV_VL);
   2424   case ISD::VP_SREM:
   2425     return lowerVPOp(Op, DAG, RISCVISD::SREM_VL);
   2426   case ISD::VP_UREM:
   2427     return lowerVPOp(Op, DAG, RISCVISD::UREM_VL);
   2428   case ISD::VP_AND:
   2429     return lowerVPOp(Op, DAG, RISCVISD::AND_VL);
   2430   case ISD::VP_OR:
   2431     return lowerVPOp(Op, DAG, RISCVISD::OR_VL);
   2432   case ISD::VP_XOR:
   2433     return lowerVPOp(Op, DAG, RISCVISD::XOR_VL);
   2434   case ISD::VP_ASHR:
   2435     return lowerVPOp(Op, DAG, RISCVISD::SRA_VL);
   2436   case ISD::VP_LSHR:
   2437     return lowerVPOp(Op, DAG, RISCVISD::SRL_VL);
   2438   case ISD::VP_SHL:
   2439     return lowerVPOp(Op, DAG, RISCVISD::SHL_VL);
   2440   }
   2441 }
   2442 
   2443 static SDValue getTargetNode(GlobalAddressSDNode *N, SDLoc DL, EVT Ty,
   2444                              SelectionDAG &DAG, unsigned Flags) {
   2445   return DAG.getTargetGlobalAddress(N->getGlobal(), DL, Ty, 0, Flags);
   2446 }
   2447 
   2448 static SDValue getTargetNode(BlockAddressSDNode *N, SDLoc DL, EVT Ty,
   2449                              SelectionDAG &DAG, unsigned Flags) {
   2450   return DAG.getTargetBlockAddress(N->getBlockAddress(), Ty, N->getOffset(),
   2451                                    Flags);
   2452 }
   2453 
   2454 static SDValue getTargetNode(ConstantPoolSDNode *N, SDLoc DL, EVT Ty,
   2455                              SelectionDAG &DAG, unsigned Flags) {
   2456   return DAG.getTargetConstantPool(N->getConstVal(), Ty, N->getAlign(),
   2457                                    N->getOffset(), Flags);
   2458 }
   2459 
   2460 static SDValue getTargetNode(JumpTableSDNode *N, SDLoc DL, EVT Ty,
   2461                              SelectionDAG &DAG, unsigned Flags) {
   2462   return DAG.getTargetJumpTable(N->getIndex(), Ty, Flags);
   2463 }
   2464 
   2465 template <class NodeTy>
   2466 SDValue RISCVTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
   2467                                      bool IsLocal) const {
   2468   SDLoc DL(N);
   2469   EVT Ty = getPointerTy(DAG.getDataLayout());
   2470 
   2471   if (isPositionIndependent()) {
   2472     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
   2473     if (IsLocal)
   2474       // Use PC-relative addressing to access the symbol. This generates the
   2475       // pattern (PseudoLLA sym), which expands to (addi (auipc %pcrel_hi(sym))
   2476       // %pcrel_lo(auipc)).
   2477       return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
   2478 
   2479     // Use PC-relative addressing to access the GOT for this symbol, then load
   2480     // the address from the GOT. This generates the pattern (PseudoLA sym),
   2481     // which expands to (ld (addi (auipc %got_pcrel_hi(sym)) %pcrel_lo(auipc))).
   2482     return SDValue(DAG.getMachineNode(RISCV::PseudoLA, DL, Ty, Addr), 0);
   2483   }
   2484 
   2485   switch (getTargetMachine().getCodeModel()) {
   2486   default:
   2487     report_fatal_error("Unsupported code model for lowering");
   2488   case CodeModel::Small: {
   2489     // Generate a sequence for accessing addresses within the first 2 GiB of
   2490     // address space. This generates the pattern (addi (lui %hi(sym)) %lo(sym)).
   2491     SDValue AddrHi = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_HI);
   2492     SDValue AddrLo = getTargetNode(N, DL, Ty, DAG, RISCVII::MO_LO);
   2493     SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
   2494     return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNHi, AddrLo), 0);
   2495   }
   2496   case CodeModel::Medium: {
   2497     // Generate a sequence for accessing addresses within any 2GiB range within
   2498     // the address space. This generates the pattern (PseudoLLA sym), which
   2499     // expands to (addi (auipc %pcrel_hi(sym)) %pcrel_lo(auipc)).
   2500     SDValue Addr = getTargetNode(N, DL, Ty, DAG, 0);
   2501     return SDValue(DAG.getMachineNode(RISCV::PseudoLLA, DL, Ty, Addr), 0);
   2502   }
   2503   }
   2504 }
   2505 
   2506 SDValue RISCVTargetLowering::lowerGlobalAddress(SDValue Op,
   2507                                                 SelectionDAG &DAG) const {
   2508   SDLoc DL(Op);
   2509   EVT Ty = Op.getValueType();
   2510   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
   2511   int64_t Offset = N->getOffset();
   2512   MVT XLenVT = Subtarget.getXLenVT();
   2513 
   2514   const GlobalValue *GV = N->getGlobal();
   2515   bool IsLocal = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
   2516   SDValue Addr = getAddr(N, DAG, IsLocal);
   2517 
   2518   // In order to maximise the opportunity for common subexpression elimination,
   2519   // emit a separate ADD node for the global address offset instead of folding
   2520   // it in the global address node. Later peephole optimisations may choose to
   2521   // fold it back in when profitable.
   2522   if (Offset != 0)
   2523     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
   2524                        DAG.getConstant(Offset, DL, XLenVT));
   2525   return Addr;
   2526 }
   2527 
   2528 SDValue RISCVTargetLowering::lowerBlockAddress(SDValue Op,
   2529                                                SelectionDAG &DAG) const {
   2530   BlockAddressSDNode *N = cast<BlockAddressSDNode>(Op);
   2531 
   2532   return getAddr(N, DAG);
   2533 }
   2534 
   2535 SDValue RISCVTargetLowering::lowerConstantPool(SDValue Op,
   2536                                                SelectionDAG &DAG) const {
   2537   ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
   2538 
   2539   return getAddr(N, DAG);
   2540 }
   2541 
   2542 SDValue RISCVTargetLowering::lowerJumpTable(SDValue Op,
   2543                                             SelectionDAG &DAG) const {
   2544   JumpTableSDNode *N = cast<JumpTableSDNode>(Op);
   2545 
   2546   return getAddr(N, DAG);
   2547 }
   2548 
   2549 SDValue RISCVTargetLowering::getStaticTLSAddr(GlobalAddressSDNode *N,
   2550                                               SelectionDAG &DAG,
   2551                                               bool UseGOT) const {
   2552   SDLoc DL(N);
   2553   EVT Ty = getPointerTy(DAG.getDataLayout());
   2554   const GlobalValue *GV = N->getGlobal();
   2555   MVT XLenVT = Subtarget.getXLenVT();
   2556 
   2557   if (UseGOT) {
   2558     // Use PC-relative addressing to access the GOT for this TLS symbol, then
   2559     // load the address from the GOT and add the thread pointer. This generates
   2560     // the pattern (PseudoLA_TLS_IE sym), which expands to
   2561     // (ld (auipc %tls_ie_pcrel_hi(sym)) %pcrel_lo(auipc)).
   2562     SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
   2563     SDValue Load =
   2564         SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_IE, DL, Ty, Addr), 0);
   2565 
   2566     // Add the thread pointer.
   2567     SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
   2568     return DAG.getNode(ISD::ADD, DL, Ty, Load, TPReg);
   2569   }
   2570 
   2571   // Generate a sequence for accessing the address relative to the thread
   2572   // pointer, with the appropriate adjustment for the thread pointer offset.
   2573   // This generates the pattern
   2574   // (add (add_tprel (lui %tprel_hi(sym)) tp %tprel_add(sym)) %tprel_lo(sym))
   2575   SDValue AddrHi =
   2576       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_HI);
   2577   SDValue AddrAdd =
   2578       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_ADD);
   2579   SDValue AddrLo =
   2580       DAG.getTargetGlobalAddress(GV, DL, Ty, 0, RISCVII::MO_TPREL_LO);
   2581 
   2582   SDValue MNHi = SDValue(DAG.getMachineNode(RISCV::LUI, DL, Ty, AddrHi), 0);
   2583   SDValue TPReg = DAG.getRegister(RISCV::X4, XLenVT);
   2584   SDValue MNAdd = SDValue(
   2585       DAG.getMachineNode(RISCV::PseudoAddTPRel, DL, Ty, MNHi, TPReg, AddrAdd),
   2586       0);
   2587   return SDValue(DAG.getMachineNode(RISCV::ADDI, DL, Ty, MNAdd, AddrLo), 0);
   2588 }
   2589 
   2590 SDValue RISCVTargetLowering::getDynamicTLSAddr(GlobalAddressSDNode *N,
   2591                                                SelectionDAG &DAG) const {
   2592   SDLoc DL(N);
   2593   EVT Ty = getPointerTy(DAG.getDataLayout());
   2594   IntegerType *CallTy = Type::getIntNTy(*DAG.getContext(), Ty.getSizeInBits());
   2595   const GlobalValue *GV = N->getGlobal();
   2596 
   2597   // Use a PC-relative addressing mode to access the global dynamic GOT address.
   2598   // This generates the pattern (PseudoLA_TLS_GD sym), which expands to
   2599   // (addi (auipc %tls_gd_pcrel_hi(sym)) %pcrel_lo(auipc)).
   2600   SDValue Addr = DAG.getTargetGlobalAddress(GV, DL, Ty, 0, 0);
   2601   SDValue Load =
   2602       SDValue(DAG.getMachineNode(RISCV::PseudoLA_TLS_GD, DL, Ty, Addr), 0);
   2603 
   2604   // Prepare argument list to generate call.
   2605   ArgListTy Args;
   2606   ArgListEntry Entry;
   2607   Entry.Node = Load;
   2608   Entry.Ty = CallTy;
   2609   Args.push_back(Entry);
   2610 
   2611   // Setup call to __tls_get_addr.
   2612   TargetLowering::CallLoweringInfo CLI(DAG);
   2613   CLI.setDebugLoc(DL)
   2614       .setChain(DAG.getEntryNode())
   2615       .setLibCallee(CallingConv::C, CallTy,
   2616                     DAG.getExternalSymbol("__tls_get_addr", Ty),
   2617                     std::move(Args));
   2618 
   2619   return LowerCallTo(CLI).first;
   2620 }
   2621 
   2622 SDValue RISCVTargetLowering::lowerGlobalTLSAddress(SDValue Op,
   2623                                                    SelectionDAG &DAG) const {
   2624   SDLoc DL(Op);
   2625   EVT Ty = Op.getValueType();
   2626   GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Op);
   2627   int64_t Offset = N->getOffset();
   2628   MVT XLenVT = Subtarget.getXLenVT();
   2629 
   2630   TLSModel::Model Model = getTargetMachine().getTLSModel(N->getGlobal());
   2631 
   2632   if (DAG.getMachineFunction().getFunction().getCallingConv() ==
   2633       CallingConv::GHC)
   2634     report_fatal_error("In GHC calling convention TLS is not supported");
   2635 
   2636   SDValue Addr;
   2637   switch (Model) {
   2638   case TLSModel::LocalExec:
   2639     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/false);
   2640     break;
   2641   case TLSModel::InitialExec:
   2642     Addr = getStaticTLSAddr(N, DAG, /*UseGOT=*/true);
   2643     break;
   2644   case TLSModel::LocalDynamic:
   2645   case TLSModel::GeneralDynamic:
   2646     Addr = getDynamicTLSAddr(N, DAG);
   2647     break;
   2648   }
   2649 
   2650   // In order to maximise the opportunity for common subexpression elimination,
   2651   // emit a separate ADD node for the global address offset instead of folding
   2652   // it in the global address node. Later peephole optimisations may choose to
   2653   // fold it back in when profitable.
   2654   if (Offset != 0)
   2655     return DAG.getNode(ISD::ADD, DL, Ty, Addr,
   2656                        DAG.getConstant(Offset, DL, XLenVT));
   2657   return Addr;
   2658 }
   2659 
   2660 SDValue RISCVTargetLowering::lowerSELECT(SDValue Op, SelectionDAG &DAG) const {
   2661   SDValue CondV = Op.getOperand(0);
   2662   SDValue TrueV = Op.getOperand(1);
   2663   SDValue FalseV = Op.getOperand(2);
   2664   SDLoc DL(Op);
   2665   MVT XLenVT = Subtarget.getXLenVT();
   2666 
   2667   // If the result type is XLenVT and CondV is the output of a SETCC node
   2668   // which also operated on XLenVT inputs, then merge the SETCC node into the
   2669   // lowered RISCVISD::SELECT_CC to take advantage of the integer
   2670   // compare+branch instructions. i.e.:
   2671   // (select (setcc lhs, rhs, cc), truev, falsev)
   2672   // -> (riscvisd::select_cc lhs, rhs, cc, truev, falsev)
   2673   if (Op.getSimpleValueType() == XLenVT && CondV.getOpcode() == ISD::SETCC &&
   2674       CondV.getOperand(0).getSimpleValueType() == XLenVT) {
   2675     SDValue LHS = CondV.getOperand(0);
   2676     SDValue RHS = CondV.getOperand(1);
   2677     auto CC = cast<CondCodeSDNode>(CondV.getOperand(2));
   2678     ISD::CondCode CCVal = CC->get();
   2679 
   2680     // Special case for a select of 2 constants that have a diffence of 1.
   2681     // Normally this is done by DAGCombine, but if the select is introduced by
   2682     // type legalization or op legalization, we miss it. Restricting to SETLT
   2683     // case for now because that is what signed saturating add/sub need.
   2684     // FIXME: We don't need the condition to be SETLT or even a SETCC,
   2685     // but we would probably want to swap the true/false values if the condition
   2686     // is SETGE/SETLE to avoid an XORI.
   2687     if (isa<ConstantSDNode>(TrueV) && isa<ConstantSDNode>(FalseV) &&
   2688         CCVal == ISD::SETLT) {
   2689       const APInt &TrueVal = cast<ConstantSDNode>(TrueV)->getAPIntValue();
   2690       const APInt &FalseVal = cast<ConstantSDNode>(FalseV)->getAPIntValue();
   2691       if (TrueVal - 1 == FalseVal)
   2692         return DAG.getNode(ISD::ADD, DL, Op.getValueType(), CondV, FalseV);
   2693       if (TrueVal + 1 == FalseVal)
   2694         return DAG.getNode(ISD::SUB, DL, Op.getValueType(), FalseV, CondV);
   2695     }
   2696 
   2697     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
   2698 
   2699     SDValue TargetCC = DAG.getTargetConstant(CCVal, DL, XLenVT);
   2700     SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
   2701     return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
   2702   }
   2703 
   2704   // Otherwise:
   2705   // (select condv, truev, falsev)
   2706   // -> (riscvisd::select_cc condv, zero, setne, truev, falsev)
   2707   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
   2708   SDValue SetNE = DAG.getTargetConstant(ISD::SETNE, DL, XLenVT);
   2709 
   2710   SDValue Ops[] = {CondV, Zero, SetNE, TrueV, FalseV};
   2711 
   2712   return DAG.getNode(RISCVISD::SELECT_CC, DL, Op.getValueType(), Ops);
   2713 }
   2714 
   2715 SDValue RISCVTargetLowering::lowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
   2716   SDValue CondV = Op.getOperand(1);
   2717   SDLoc DL(Op);
   2718   MVT XLenVT = Subtarget.getXLenVT();
   2719 
   2720   if (CondV.getOpcode() == ISD::SETCC &&
   2721       CondV.getOperand(0).getValueType() == XLenVT) {
   2722     SDValue LHS = CondV.getOperand(0);
   2723     SDValue RHS = CondV.getOperand(1);
   2724     ISD::CondCode CCVal = cast<CondCodeSDNode>(CondV.getOperand(2))->get();
   2725 
   2726     translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
   2727 
   2728     SDValue TargetCC = DAG.getCondCode(CCVal);
   2729     return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
   2730                        LHS, RHS, TargetCC, Op.getOperand(2));
   2731   }
   2732 
   2733   return DAG.getNode(RISCVISD::BR_CC, DL, Op.getValueType(), Op.getOperand(0),
   2734                      CondV, DAG.getConstant(0, DL, XLenVT),
   2735                      DAG.getCondCode(ISD::SETNE), Op.getOperand(2));
   2736 }
   2737 
   2738 SDValue RISCVTargetLowering::lowerVASTART(SDValue Op, SelectionDAG &DAG) const {
   2739   MachineFunction &MF = DAG.getMachineFunction();
   2740   RISCVMachineFunctionInfo *FuncInfo = MF.getInfo<RISCVMachineFunctionInfo>();
   2741 
   2742   SDLoc DL(Op);
   2743   SDValue FI = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(),
   2744                                  getPointerTy(MF.getDataLayout()));
   2745 
   2746   // vastart just stores the address of the VarArgsFrameIndex slot into the
   2747   // memory location argument.
   2748   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
   2749   return DAG.getStore(Op.getOperand(0), DL, FI, Op.getOperand(1),
   2750                       MachinePointerInfo(SV));
   2751 }
   2752 
   2753 SDValue RISCVTargetLowering::lowerFRAMEADDR(SDValue Op,
   2754                                             SelectionDAG &DAG) const {
   2755   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
   2756   MachineFunction &MF = DAG.getMachineFunction();
   2757   MachineFrameInfo &MFI = MF.getFrameInfo();
   2758   MFI.setFrameAddressIsTaken(true);
   2759   Register FrameReg = RI.getFrameRegister(MF);
   2760   int XLenInBytes = Subtarget.getXLen() / 8;
   2761 
   2762   EVT VT = Op.getValueType();
   2763   SDLoc DL(Op);
   2764   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, VT);
   2765   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   2766   while (Depth--) {
   2767     int Offset = -(XLenInBytes * 2);
   2768     SDValue Ptr = DAG.getNode(ISD::ADD, DL, VT, FrameAddr,
   2769                               DAG.getIntPtrConstant(Offset, DL));
   2770     FrameAddr =
   2771         DAG.getLoad(VT, DL, DAG.getEntryNode(), Ptr, MachinePointerInfo());
   2772   }
   2773   return FrameAddr;
   2774 }
   2775 
   2776 SDValue RISCVTargetLowering::lowerRETURNADDR(SDValue Op,
   2777                                              SelectionDAG &DAG) const {
   2778   const RISCVRegisterInfo &RI = *Subtarget.getRegisterInfo();
   2779   MachineFunction &MF = DAG.getMachineFunction();
   2780   MachineFrameInfo &MFI = MF.getFrameInfo();
   2781   MFI.setReturnAddressIsTaken(true);
   2782   MVT XLenVT = Subtarget.getXLenVT();
   2783   int XLenInBytes = Subtarget.getXLen() / 8;
   2784 
   2785   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
   2786     return SDValue();
   2787 
   2788   EVT VT = Op.getValueType();
   2789   SDLoc DL(Op);
   2790   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   2791   if (Depth) {
   2792     int Off = -XLenInBytes;
   2793     SDValue FrameAddr = lowerFRAMEADDR(Op, DAG);
   2794     SDValue Offset = DAG.getConstant(Off, DL, VT);
   2795     return DAG.getLoad(VT, DL, DAG.getEntryNode(),
   2796                        DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
   2797                        MachinePointerInfo());
   2798   }
   2799 
   2800   // Return the value of the return address register, marking it an implicit
   2801   // live-in.
   2802   Register Reg = MF.addLiveIn(RI.getRARegister(), getRegClassFor(XLenVT));
   2803   return DAG.getCopyFromReg(DAG.getEntryNode(), DL, Reg, XLenVT);
   2804 }
   2805 
   2806 SDValue RISCVTargetLowering::lowerShiftLeftParts(SDValue Op,
   2807                                                  SelectionDAG &DAG) const {
   2808   SDLoc DL(Op);
   2809   SDValue Lo = Op.getOperand(0);
   2810   SDValue Hi = Op.getOperand(1);
   2811   SDValue Shamt = Op.getOperand(2);
   2812   EVT VT = Lo.getValueType();
   2813 
   2814   // if Shamt-XLEN < 0: // Shamt < XLEN
   2815   //   Lo = Lo << Shamt
   2816   //   Hi = (Hi << Shamt) | ((Lo >>u 1) >>u (XLEN-1 - Shamt))
   2817   // else:
   2818   //   Lo = 0
   2819   //   Hi = Lo << (Shamt-XLEN)
   2820 
   2821   SDValue Zero = DAG.getConstant(0, DL, VT);
   2822   SDValue One = DAG.getConstant(1, DL, VT);
   2823   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
   2824   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
   2825   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
   2826   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
   2827 
   2828   SDValue LoTrue = DAG.getNode(ISD::SHL, DL, VT, Lo, Shamt);
   2829   SDValue ShiftRight1Lo = DAG.getNode(ISD::SRL, DL, VT, Lo, One);
   2830   SDValue ShiftRightLo =
   2831       DAG.getNode(ISD::SRL, DL, VT, ShiftRight1Lo, XLenMinus1Shamt);
   2832   SDValue ShiftLeftHi = DAG.getNode(ISD::SHL, DL, VT, Hi, Shamt);
   2833   SDValue HiTrue = DAG.getNode(ISD::OR, DL, VT, ShiftLeftHi, ShiftRightLo);
   2834   SDValue HiFalse = DAG.getNode(ISD::SHL, DL, VT, Lo, ShamtMinusXLen);
   2835 
   2836   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
   2837 
   2838   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, Zero);
   2839   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
   2840 
   2841   SDValue Parts[2] = {Lo, Hi};
   2842   return DAG.getMergeValues(Parts, DL);
   2843 }
   2844 
   2845 SDValue RISCVTargetLowering::lowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
   2846                                                   bool IsSRA) const {
   2847   SDLoc DL(Op);
   2848   SDValue Lo = Op.getOperand(0);
   2849   SDValue Hi = Op.getOperand(1);
   2850   SDValue Shamt = Op.getOperand(2);
   2851   EVT VT = Lo.getValueType();
   2852 
   2853   // SRA expansion:
   2854   //   if Shamt-XLEN < 0: // Shamt < XLEN
   2855   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
   2856   //     Hi = Hi >>s Shamt
   2857   //   else:
   2858   //     Lo = Hi >>s (Shamt-XLEN);
   2859   //     Hi = Hi >>s (XLEN-1)
   2860   //
   2861   // SRL expansion:
   2862   //   if Shamt-XLEN < 0: // Shamt < XLEN
   2863   //     Lo = (Lo >>u Shamt) | ((Hi << 1) << (XLEN-1 - Shamt))
   2864   //     Hi = Hi >>u Shamt
   2865   //   else:
   2866   //     Lo = Hi >>u (Shamt-XLEN);
   2867   //     Hi = 0;
   2868 
   2869   unsigned ShiftRightOp = IsSRA ? ISD::SRA : ISD::SRL;
   2870 
   2871   SDValue Zero = DAG.getConstant(0, DL, VT);
   2872   SDValue One = DAG.getConstant(1, DL, VT);
   2873   SDValue MinusXLen = DAG.getConstant(-(int)Subtarget.getXLen(), DL, VT);
   2874   SDValue XLenMinus1 = DAG.getConstant(Subtarget.getXLen() - 1, DL, VT);
   2875   SDValue ShamtMinusXLen = DAG.getNode(ISD::ADD, DL, VT, Shamt, MinusXLen);
   2876   SDValue XLenMinus1Shamt = DAG.getNode(ISD::SUB, DL, VT, XLenMinus1, Shamt);
   2877 
   2878   SDValue ShiftRightLo = DAG.getNode(ISD::SRL, DL, VT, Lo, Shamt);
   2879   SDValue ShiftLeftHi1 = DAG.getNode(ISD::SHL, DL, VT, Hi, One);
   2880   SDValue ShiftLeftHi =
   2881       DAG.getNode(ISD::SHL, DL, VT, ShiftLeftHi1, XLenMinus1Shamt);
   2882   SDValue LoTrue = DAG.getNode(ISD::OR, DL, VT, ShiftRightLo, ShiftLeftHi);
   2883   SDValue HiTrue = DAG.getNode(ShiftRightOp, DL, VT, Hi, Shamt);
   2884   SDValue LoFalse = DAG.getNode(ShiftRightOp, DL, VT, Hi, ShamtMinusXLen);
   2885   SDValue HiFalse =
   2886       IsSRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, XLenMinus1) : Zero;
   2887 
   2888   SDValue CC = DAG.getSetCC(DL, VT, ShamtMinusXLen, Zero, ISD::SETLT);
   2889 
   2890   Lo = DAG.getNode(ISD::SELECT, DL, VT, CC, LoTrue, LoFalse);
   2891   Hi = DAG.getNode(ISD::SELECT, DL, VT, CC, HiTrue, HiFalse);
   2892 
   2893   SDValue Parts[2] = {Lo, Hi};
   2894   return DAG.getMergeValues(Parts, DL);
   2895 }
   2896 
   2897 // Lower splats of i1 types to SETCC. For each mask vector type, we have a
   2898 // legal equivalently-sized i8 type, so we can use that as a go-between.
   2899 SDValue RISCVTargetLowering::lowerVectorMaskSplat(SDValue Op,
   2900                                                   SelectionDAG &DAG) const {
   2901   SDLoc DL(Op);
   2902   MVT VT = Op.getSimpleValueType();
   2903   SDValue SplatVal = Op.getOperand(0);
   2904   // All-zeros or all-ones splats are handled specially.
   2905   if (ISD::isConstantSplatVectorAllOnes(Op.getNode())) {
   2906     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
   2907     return DAG.getNode(RISCVISD::VMSET_VL, DL, VT, VL);
   2908   }
   2909   if (ISD::isConstantSplatVectorAllZeros(Op.getNode())) {
   2910     SDValue VL = getDefaultScalableVLOps(VT, DL, DAG, Subtarget).second;
   2911     return DAG.getNode(RISCVISD::VMCLR_VL, DL, VT, VL);
   2912   }
   2913   MVT XLenVT = Subtarget.getXLenVT();
   2914   assert(SplatVal.getValueType() == XLenVT &&
   2915          "Unexpected type for i1 splat value");
   2916   MVT InterVT = VT.changeVectorElementType(MVT::i8);
   2917   SplatVal = DAG.getNode(ISD::AND, DL, XLenVT, SplatVal,
   2918                          DAG.getConstant(1, DL, XLenVT));
   2919   SDValue LHS = DAG.getSplatVector(InterVT, DL, SplatVal);
   2920   SDValue Zero = DAG.getConstant(0, DL, InterVT);
   2921   return DAG.getSetCC(DL, VT, LHS, Zero, ISD::SETNE);
   2922 }
   2923 
   2924 // Custom-lower a SPLAT_VECTOR_PARTS where XLEN<SEW, as the SEW element type is
   2925 // illegal (currently only vXi64 RV32).
   2926 // FIXME: We could also catch non-constant sign-extended i32 values and lower
   2927 // them to SPLAT_VECTOR_I64
   2928 SDValue RISCVTargetLowering::lowerSPLAT_VECTOR_PARTS(SDValue Op,
   2929                                                      SelectionDAG &DAG) const {
   2930   SDLoc DL(Op);
   2931   MVT VecVT = Op.getSimpleValueType();
   2932   assert(!Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64 &&
   2933          "Unexpected SPLAT_VECTOR_PARTS lowering");
   2934 
   2935   assert(Op.getNumOperands() == 2 && "Unexpected number of operands!");
   2936   SDValue Lo = Op.getOperand(0);
   2937   SDValue Hi = Op.getOperand(1);
   2938 
   2939   if (VecVT.isFixedLengthVector()) {
   2940     MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
   2941     SDLoc DL(Op);
   2942     SDValue Mask, VL;
   2943     std::tie(Mask, VL) =
   2944         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
   2945 
   2946     SDValue Res = splatPartsI64WithVL(DL, ContainerVT, Lo, Hi, VL, DAG);
   2947     return convertFromScalableVector(VecVT, Res, DAG, Subtarget);
   2948   }
   2949 
   2950   if (isa<ConstantSDNode>(Lo) && isa<ConstantSDNode>(Hi)) {
   2951     int32_t LoC = cast<ConstantSDNode>(Lo)->getSExtValue();
   2952     int32_t HiC = cast<ConstantSDNode>(Hi)->getSExtValue();
   2953     // If Hi constant is all the same sign bit as Lo, lower this as a custom
   2954     // node in order to try and match RVV vector/scalar instructions.
   2955     if ((LoC >> 31) == HiC)
   2956       return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
   2957   }
   2958 
   2959   // Detect cases where Hi is (SRA Lo, 31) which means Hi is Lo sign extended.
   2960   if (Hi.getOpcode() == ISD::SRA && Hi.getOperand(0) == Lo &&
   2961       isa<ConstantSDNode>(Hi.getOperand(1)) &&
   2962       Hi.getConstantOperandVal(1) == 31)
   2963     return DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, Lo);
   2964 
   2965   // Fall back to use a stack store and stride x0 vector load. Use X0 as VL.
   2966   return splatPartsI64ThroughStack(DL, VecVT, Lo, Hi,
   2967                                    DAG.getRegister(RISCV::X0, MVT::i64), DAG);
   2968 }
   2969 
   2970 // Custom-lower extensions from mask vectors by using a vselect either with 1
   2971 // for zero/any-extension or -1 for sign-extension:
   2972 //   (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0)
   2973 // Note that any-extension is lowered identically to zero-extension.
   2974 SDValue RISCVTargetLowering::lowerVectorMaskExt(SDValue Op, SelectionDAG &DAG,
   2975                                                 int64_t ExtTrueVal) const {
   2976   SDLoc DL(Op);
   2977   MVT VecVT = Op.getSimpleValueType();
   2978   SDValue Src = Op.getOperand(0);
   2979   // Only custom-lower extensions from mask types
   2980   assert(Src.getValueType().isVector() &&
   2981          Src.getValueType().getVectorElementType() == MVT::i1);
   2982 
   2983   MVT XLenVT = Subtarget.getXLenVT();
   2984   SDValue SplatZero = DAG.getConstant(0, DL, XLenVT);
   2985   SDValue SplatTrueVal = DAG.getConstant(ExtTrueVal, DL, XLenVT);
   2986 
   2987   if (VecVT.isScalableVector()) {
   2988     // Be careful not to introduce illegal scalar types at this stage, and be
   2989     // careful also about splatting constants as on RV32, vXi64 SPLAT_VECTOR is
   2990     // illegal and must be expanded. Since we know that the constants are
   2991     // sign-extended 32-bit values, we use SPLAT_VECTOR_I64 directly.
   2992     bool IsRV32E64 =
   2993         !Subtarget.is64Bit() && VecVT.getVectorElementType() == MVT::i64;
   2994 
   2995     if (!IsRV32E64) {
   2996       SplatZero = DAG.getSplatVector(VecVT, DL, SplatZero);
   2997       SplatTrueVal = DAG.getSplatVector(VecVT, DL, SplatTrueVal);
   2998     } else {
   2999       SplatZero = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatZero);
   3000       SplatTrueVal =
   3001           DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, VecVT, SplatTrueVal);
   3002     }
   3003 
   3004     return DAG.getNode(ISD::VSELECT, DL, VecVT, Src, SplatTrueVal, SplatZero);
   3005   }
   3006 
   3007   MVT ContainerVT = getContainerForFixedLengthVector(VecVT);
   3008   MVT I1ContainerVT =
   3009       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
   3010 
   3011   SDValue CC = convertToScalableVector(I1ContainerVT, Src, DAG, Subtarget);
   3012 
   3013   SDValue Mask, VL;
   3014   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
   3015 
   3016   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero, VL);
   3017   SplatTrueVal =
   3018       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatTrueVal, VL);
   3019   SDValue Select = DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC,
   3020                                SplatTrueVal, SplatZero, VL);
   3021 
   3022   return convertFromScalableVector(VecVT, Select, DAG, Subtarget);
   3023 }
   3024 
   3025 SDValue RISCVTargetLowering::lowerFixedLengthVectorExtendToRVV(
   3026     SDValue Op, SelectionDAG &DAG, unsigned ExtendOpc) const {
   3027   MVT ExtVT = Op.getSimpleValueType();
   3028   // Only custom-lower extensions from fixed-length vector types.
   3029   if (!ExtVT.isFixedLengthVector())
   3030     return Op;
   3031   MVT VT = Op.getOperand(0).getSimpleValueType();
   3032   // Grab the canonical container type for the extended type. Infer the smaller
   3033   // type from that to ensure the same number of vector elements, as we know
   3034   // the LMUL will be sufficient to hold the smaller type.
   3035   MVT ContainerExtVT = getContainerForFixedLengthVector(ExtVT);
   3036   // Get the extended container type manually to ensure the same number of
   3037   // vector elements between source and dest.
   3038   MVT ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
   3039                                      ContainerExtVT.getVectorElementCount());
   3040 
   3041   SDValue Op1 =
   3042       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
   3043 
   3044   SDLoc DL(Op);
   3045   SDValue Mask, VL;
   3046   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   3047 
   3048   SDValue Ext = DAG.getNode(ExtendOpc, DL, ContainerExtVT, Op1, Mask, VL);
   3049 
   3050   return convertFromScalableVector(ExtVT, Ext, DAG, Subtarget);
   3051 }
   3052 
   3053 // Custom-lower truncations from vectors to mask vectors by using a mask and a
   3054 // setcc operation:
   3055 //   (vXi1 = trunc vXiN vec) -> (vXi1 = setcc (and vec, 1), 0, ne)
   3056 SDValue RISCVTargetLowering::lowerVectorMaskTrunc(SDValue Op,
   3057                                                   SelectionDAG &DAG) const {
   3058   SDLoc DL(Op);
   3059   EVT MaskVT = Op.getValueType();
   3060   // Only expect to custom-lower truncations to mask types
   3061   assert(MaskVT.isVector() && MaskVT.getVectorElementType() == MVT::i1 &&
   3062          "Unexpected type for vector mask lowering");
   3063   SDValue Src = Op.getOperand(0);
   3064   MVT VecVT = Src.getSimpleValueType();
   3065 
   3066   // If this is a fixed vector, we need to convert it to a scalable vector.
   3067   MVT ContainerVT = VecVT;
   3068   if (VecVT.isFixedLengthVector()) {
   3069     ContainerVT = getContainerForFixedLengthVector(VecVT);
   3070     Src = convertToScalableVector(ContainerVT, Src, DAG, Subtarget);
   3071   }
   3072 
   3073   SDValue SplatOne = DAG.getConstant(1, DL, Subtarget.getXLenVT());
   3074   SDValue SplatZero = DAG.getConstant(0, DL, Subtarget.getXLenVT());
   3075 
   3076   SplatOne = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatOne);
   3077   SplatZero = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT, SplatZero);
   3078 
   3079   if (VecVT.isScalableVector()) {
   3080     SDValue Trunc = DAG.getNode(ISD::AND, DL, VecVT, Src, SplatOne);
   3081     return DAG.getSetCC(DL, MaskVT, Trunc, SplatZero, ISD::SETNE);
   3082   }
   3083 
   3084   SDValue Mask, VL;
   3085   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
   3086 
   3087   MVT MaskContainerVT = ContainerVT.changeVectorElementType(MVT::i1);
   3088   SDValue Trunc =
   3089       DAG.getNode(RISCVISD::AND_VL, DL, ContainerVT, Src, SplatOne, Mask, VL);
   3090   Trunc = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskContainerVT, Trunc, SplatZero,
   3091                       DAG.getCondCode(ISD::SETNE), Mask, VL);
   3092   return convertFromScalableVector(MaskVT, Trunc, DAG, Subtarget);
   3093 }
   3094 
   3095 // Custom-legalize INSERT_VECTOR_ELT so that the value is inserted into the
   3096 // first position of a vector, and that vector is slid up to the insert index.
   3097 // By limiting the active vector length to index+1 and merging with the
   3098 // original vector (with an undisturbed tail policy for elements >= VL), we
   3099 // achieve the desired result of leaving all elements untouched except the one
   3100 // at VL-1, which is replaced with the desired value.
   3101 SDValue RISCVTargetLowering::lowerINSERT_VECTOR_ELT(SDValue Op,
   3102                                                     SelectionDAG &DAG) const {
   3103   SDLoc DL(Op);
   3104   MVT VecVT = Op.getSimpleValueType();
   3105   SDValue Vec = Op.getOperand(0);
   3106   SDValue Val = Op.getOperand(1);
   3107   SDValue Idx = Op.getOperand(2);
   3108 
   3109   if (VecVT.getVectorElementType() == MVT::i1) {
   3110     // FIXME: For now we just promote to an i8 vector and insert into that,
   3111     // but this is probably not optimal.
   3112     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
   3113     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
   3114     Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, WideVT, Vec, Val, Idx);
   3115     return DAG.getNode(ISD::TRUNCATE, DL, VecVT, Vec);
   3116   }
   3117 
   3118   MVT ContainerVT = VecVT;
   3119   // If the operand is a fixed-length vector, convert to a scalable one.
   3120   if (VecVT.isFixedLengthVector()) {
   3121     ContainerVT = getContainerForFixedLengthVector(VecVT);
   3122     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
   3123   }
   3124 
   3125   MVT XLenVT = Subtarget.getXLenVT();
   3126 
   3127   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
   3128   bool IsLegalInsert = Subtarget.is64Bit() || Val.getValueType() != MVT::i64;
   3129   // Even i64-element vectors on RV32 can be lowered without scalar
   3130   // legalization if the most-significant 32 bits of the value are not affected
   3131   // by the sign-extension of the lower 32 bits.
   3132   // TODO: We could also catch sign extensions of a 32-bit value.
   3133   if (!IsLegalInsert && isa<ConstantSDNode>(Val)) {
   3134     const auto *CVal = cast<ConstantSDNode>(Val);
   3135     if (isInt<32>(CVal->getSExtValue())) {
   3136       IsLegalInsert = true;
   3137       Val = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
   3138     }
   3139   }
   3140 
   3141   SDValue Mask, VL;
   3142   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
   3143 
   3144   SDValue ValInVec;
   3145 
   3146   if (IsLegalInsert) {
   3147     unsigned Opc =
   3148         VecVT.isFloatingPoint() ? RISCVISD::VFMV_S_F_VL : RISCVISD::VMV_S_X_VL;
   3149     if (isNullConstant(Idx)) {
   3150       Vec = DAG.getNode(Opc, DL, ContainerVT, Vec, Val, VL);
   3151       if (!VecVT.isFixedLengthVector())
   3152         return Vec;
   3153       return convertFromScalableVector(VecVT, Vec, DAG, Subtarget);
   3154     }
   3155     ValInVec =
   3156         DAG.getNode(Opc, DL, ContainerVT, DAG.getUNDEF(ContainerVT), Val, VL);
   3157   } else {
   3158     // On RV32, i64-element vectors must be specially handled to place the
   3159     // value at element 0, by using two vslide1up instructions in sequence on
   3160     // the i32 split lo/hi value. Use an equivalently-sized i32 vector for
   3161     // this.
   3162     SDValue One = DAG.getConstant(1, DL, XLenVT);
   3163     SDValue ValLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, Zero);
   3164     SDValue ValHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Val, One);
   3165     MVT I32ContainerVT =
   3166         MVT::getVectorVT(MVT::i32, ContainerVT.getVectorElementCount() * 2);
   3167     SDValue I32Mask =
   3168         getDefaultScalableVLOps(I32ContainerVT, DL, DAG, Subtarget).first;
   3169     // Limit the active VL to two.
   3170     SDValue InsertI64VL = DAG.getConstant(2, DL, XLenVT);
   3171     // Note: We can't pass a UNDEF to the first VSLIDE1UP_VL since an untied
   3172     // undef doesn't obey the earlyclobber constraint. Just splat a zero value.
   3173     ValInVec = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, I32ContainerVT, Zero,
   3174                            InsertI64VL);
   3175     // First slide in the hi value, then the lo in underneath it.
   3176     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
   3177                            ValHi, I32Mask, InsertI64VL);
   3178     ValInVec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32ContainerVT, ValInVec,
   3179                            ValLo, I32Mask, InsertI64VL);
   3180     // Bitcast back to the right container type.
   3181     ValInVec = DAG.getBitcast(ContainerVT, ValInVec);
   3182   }
   3183 
   3184   // Now that the value is in a vector, slide it into position.
   3185   SDValue InsertVL =
   3186       DAG.getNode(ISD::ADD, DL, XLenVT, Idx, DAG.getConstant(1, DL, XLenVT));
   3187   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
   3188                                 ValInVec, Idx, Mask, InsertVL);
   3189   if (!VecVT.isFixedLengthVector())
   3190     return Slideup;
   3191   return convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
   3192 }
   3193 
   3194 // Custom-lower EXTRACT_VECTOR_ELT operations to slide the vector down, then
   3195 // extract the first element: (extractelt (slidedown vec, idx), 0). For integer
   3196 // types this is done using VMV_X_S to allow us to glean information about the
   3197 // sign bits of the result.
   3198 SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
   3199                                                      SelectionDAG &DAG) const {
   3200   SDLoc DL(Op);
   3201   SDValue Idx = Op.getOperand(1);
   3202   SDValue Vec = Op.getOperand(0);
   3203   EVT EltVT = Op.getValueType();
   3204   MVT VecVT = Vec.getSimpleValueType();
   3205   MVT XLenVT = Subtarget.getXLenVT();
   3206 
   3207   if (VecVT.getVectorElementType() == MVT::i1) {
   3208     // FIXME: For now we just promote to an i8 vector and extract from that,
   3209     // but this is probably not optimal.
   3210     MVT WideVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorElementCount());
   3211     Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, WideVT, Vec);
   3212     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec, Idx);
   3213   }
   3214 
   3215   // If this is a fixed vector, we need to convert it to a scalable vector.
   3216   MVT ContainerVT = VecVT;
   3217   if (VecVT.isFixedLengthVector()) {
   3218     ContainerVT = getContainerForFixedLengthVector(VecVT);
   3219     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
   3220   }
   3221 
   3222   // If the index is 0, the vector is already in the right position.
   3223   if (!isNullConstant(Idx)) {
   3224     // Use a VL of 1 to avoid processing more elements than we need.
   3225     SDValue VL = DAG.getConstant(1, DL, XLenVT);
   3226     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
   3227     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
   3228     Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
   3229                       DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
   3230   }
   3231 
   3232   if (!EltVT.isInteger()) {
   3233     // Floating-point extracts are handled in TableGen.
   3234     return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vec,
   3235                        DAG.getConstant(0, DL, XLenVT));
   3236   }
   3237 
   3238   SDValue Elt0 = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
   3239   return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Elt0);
   3240 }
   3241 
   3242 // Some RVV intrinsics may claim that they want an integer operand to be
   3243 // promoted or expanded.
   3244 static SDValue lowerVectorIntrinsicSplats(SDValue Op, SelectionDAG &DAG,
   3245                                           const RISCVSubtarget &Subtarget) {
   3246   assert((Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
   3247           Op.getOpcode() == ISD::INTRINSIC_W_CHAIN) &&
   3248          "Unexpected opcode");
   3249 
   3250   if (!Subtarget.hasStdExtV())
   3251     return SDValue();
   3252 
   3253   bool HasChain = Op.getOpcode() == ISD::INTRINSIC_W_CHAIN;
   3254   unsigned IntNo = Op.getConstantOperandVal(HasChain ? 1 : 0);
   3255   SDLoc DL(Op);
   3256 
   3257   const RISCVVIntrinsicsTable::RISCVVIntrinsicInfo *II =
   3258       RISCVVIntrinsicsTable::getRISCVVIntrinsicInfo(IntNo);
   3259   if (!II || !II->SplatOperand)
   3260     return SDValue();
   3261 
   3262   unsigned SplatOp = II->SplatOperand + HasChain;
   3263   assert(SplatOp < Op.getNumOperands());
   3264 
   3265   SmallVector<SDValue, 8> Operands(Op->op_begin(), Op->op_end());
   3266   SDValue &ScalarOp = Operands[SplatOp];
   3267   MVT OpVT = ScalarOp.getSimpleValueType();
   3268   MVT XLenVT = Subtarget.getXLenVT();
   3269 
   3270   // If this isn't a scalar, or its type is XLenVT we're done.
   3271   if (!OpVT.isScalarInteger() || OpVT == XLenVT)
   3272     return SDValue();
   3273 
   3274   // Simplest case is that the operand needs to be promoted to XLenVT.
   3275   if (OpVT.bitsLT(XLenVT)) {
   3276     // If the operand is a constant, sign extend to increase our chances
   3277     // of being able to use a .vi instruction. ANY_EXTEND would become a
   3278     // a zero extend and the simm5 check in isel would fail.
   3279     // FIXME: Should we ignore the upper bits in isel instead?
   3280     unsigned ExtOpc =
   3281         isa<ConstantSDNode>(ScalarOp) ? ISD::SIGN_EXTEND : ISD::ANY_EXTEND;
   3282     ScalarOp = DAG.getNode(ExtOpc, DL, XLenVT, ScalarOp);
   3283     return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
   3284   }
   3285 
   3286   // Use the previous operand to get the vXi64 VT. The result might be a mask
   3287   // VT for compares. Using the previous operand assumes that the previous
   3288   // operand will never have a smaller element size than a scalar operand and
   3289   // that a widening operation never uses SEW=64.
   3290   // NOTE: If this fails the below assert, we can probably just find the
   3291   // element count from any operand or result and use it to construct the VT.
   3292   assert(II->SplatOperand > 1 && "Unexpected splat operand!");
   3293   MVT VT = Op.getOperand(SplatOp - 1).getSimpleValueType();
   3294 
   3295   // The more complex case is when the scalar is larger than XLenVT.
   3296   assert(XLenVT == MVT::i32 && OpVT == MVT::i64 &&
   3297          VT.getVectorElementType() == MVT::i64 && "Unexpected VTs!");
   3298 
   3299   // If this is a sign-extended 32-bit constant, we can truncate it and rely
   3300   // on the instruction to sign-extend since SEW>XLEN.
   3301   if (auto *CVal = dyn_cast<ConstantSDNode>(ScalarOp)) {
   3302     if (isInt<32>(CVal->getSExtValue())) {
   3303       ScalarOp = DAG.getConstant(CVal->getSExtValue(), DL, MVT::i32);
   3304       return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
   3305     }
   3306   }
   3307 
   3308   // We need to convert the scalar to a splat vector.
   3309   // FIXME: Can we implicitly truncate the scalar if it is known to
   3310   // be sign extended?
   3311   // VL should be the last operand.
   3312   SDValue VL = Op.getOperand(Op.getNumOperands() - 1);
   3313   assert(VL.getValueType() == XLenVT);
   3314   ScalarOp = splatSplitI64WithVL(DL, VT, ScalarOp, VL, DAG);
   3315   return DAG.getNode(Op->getOpcode(), DL, Op->getVTList(), Operands);
   3316 }
   3317 
   3318 SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
   3319                                                      SelectionDAG &DAG) const {
   3320   unsigned IntNo = Op.getConstantOperandVal(0);
   3321   SDLoc DL(Op);
   3322   MVT XLenVT = Subtarget.getXLenVT();
   3323 
   3324   switch (IntNo) {
   3325   default:
   3326     break; // Don't custom lower most intrinsics.
   3327   case Intrinsic::thread_pointer: {
   3328     EVT PtrVT = getPointerTy(DAG.getDataLayout());
   3329     return DAG.getRegister(RISCV::X4, PtrVT);
   3330   }
   3331   case Intrinsic::riscv_orc_b:
   3332     // Lower to the GORCI encoding for orc.b.
   3333     return DAG.getNode(RISCVISD::GORC, DL, XLenVT, Op.getOperand(1),
   3334                        DAG.getConstant(7, DL, XLenVT));
   3335   case Intrinsic::riscv_grev:
   3336   case Intrinsic::riscv_gorc: {
   3337     unsigned Opc =
   3338         IntNo == Intrinsic::riscv_grev ? RISCVISD::GREV : RISCVISD::GORC;
   3339     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
   3340   }
   3341   case Intrinsic::riscv_shfl:
   3342   case Intrinsic::riscv_unshfl: {
   3343     unsigned Opc =
   3344         IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
   3345     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
   3346   }
   3347   case Intrinsic::riscv_bcompress:
   3348   case Intrinsic::riscv_bdecompress: {
   3349     unsigned Opc = IntNo == Intrinsic::riscv_bcompress ? RISCVISD::BCOMPRESS
   3350                                                        : RISCVISD::BDECOMPRESS;
   3351     return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
   3352   }
   3353   case Intrinsic::riscv_vmv_x_s:
   3354     assert(Op.getValueType() == XLenVT && "Unexpected VT!");
   3355     return DAG.getNode(RISCVISD::VMV_X_S, DL, Op.getValueType(),
   3356                        Op.getOperand(1));
   3357   case Intrinsic::riscv_vmv_v_x:
   3358     return lowerScalarSplat(Op.getOperand(1), Op.getOperand(2),
   3359                             Op.getSimpleValueType(), DL, DAG, Subtarget);
   3360   case Intrinsic::riscv_vfmv_v_f:
   3361     return DAG.getNode(RISCVISD::VFMV_V_F_VL, DL, Op.getValueType(),
   3362                        Op.getOperand(1), Op.getOperand(2));
   3363   case Intrinsic::riscv_vmv_s_x: {
   3364     SDValue Scalar = Op.getOperand(2);
   3365 
   3366     if (Scalar.getValueType().bitsLE(XLenVT)) {
   3367       Scalar = DAG.getNode(ISD::ANY_EXTEND, DL, XLenVT, Scalar);
   3368       return DAG.getNode(RISCVISD::VMV_S_X_VL, DL, Op.getValueType(),
   3369                          Op.getOperand(1), Scalar, Op.getOperand(3));
   3370     }
   3371 
   3372     assert(Scalar.getValueType() == MVT::i64 && "Unexpected scalar VT!");
   3373 
   3374     // This is an i64 value that lives in two scalar registers. We have to
   3375     // insert this in a convoluted way. First we build vXi64 splat containing
   3376     // the/ two values that we assemble using some bit math. Next we'll use
   3377     // vid.v and vmseq to build a mask with bit 0 set. Then we'll use that mask
   3378     // to merge element 0 from our splat into the source vector.
   3379     // FIXME: This is probably not the best way to do this, but it is
   3380     // consistent with INSERT_VECTOR_ELT lowering so it is a good starting
   3381     // point.
   3382     //   sw lo, (a0)
   3383     //   sw hi, 4(a0)
   3384     //   vlse vX, (a0)
   3385     //
   3386     //   vid.v      vVid
   3387     //   vmseq.vx   mMask, vVid, 0
   3388     //   vmerge.vvm vDest, vSrc, vVal, mMask
   3389     MVT VT = Op.getSimpleValueType();
   3390     SDValue Vec = Op.getOperand(1);
   3391     SDValue VL = Op.getOperand(3);
   3392 
   3393     SDValue SplattedVal = splatSplitI64WithVL(DL, VT, Scalar, VL, DAG);
   3394     SDValue SplattedIdx = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
   3395                                       DAG.getConstant(0, DL, MVT::i32), VL);
   3396 
   3397     MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorElementCount());
   3398     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
   3399     SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
   3400     SDValue SelectCond =
   3401         DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, VID, SplattedIdx,
   3402                     DAG.getCondCode(ISD::SETEQ), Mask, VL);
   3403     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, SelectCond, SplattedVal,
   3404                        Vec, VL);
   3405   }
   3406   case Intrinsic::riscv_vslide1up:
   3407   case Intrinsic::riscv_vslide1down:
   3408   case Intrinsic::riscv_vslide1up_mask:
   3409   case Intrinsic::riscv_vslide1down_mask: {
   3410     // We need to special case these when the scalar is larger than XLen.
   3411     unsigned NumOps = Op.getNumOperands();
   3412     bool IsMasked = NumOps == 6;
   3413     unsigned OpOffset = IsMasked ? 1 : 0;
   3414     SDValue Scalar = Op.getOperand(2 + OpOffset);
   3415     if (Scalar.getValueType().bitsLE(XLenVT))
   3416       break;
   3417 
   3418     // Splatting a sign extended constant is fine.
   3419     if (auto *CVal = dyn_cast<ConstantSDNode>(Scalar))
   3420       if (isInt<32>(CVal->getSExtValue()))
   3421         break;
   3422 
   3423     MVT VT = Op.getSimpleValueType();
   3424     assert(VT.getVectorElementType() == MVT::i64 &&
   3425            Scalar.getValueType() == MVT::i64 && "Unexpected VTs");
   3426 
   3427     // Convert the vector source to the equivalent nxvXi32 vector.
   3428     MVT I32VT = MVT::getVectorVT(MVT::i32, VT.getVectorElementCount() * 2);
   3429     SDValue Vec = DAG.getBitcast(I32VT, Op.getOperand(1 + OpOffset));
   3430 
   3431     SDValue ScalarLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
   3432                                    DAG.getConstant(0, DL, XLenVT));
   3433     SDValue ScalarHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Scalar,
   3434                                    DAG.getConstant(1, DL, XLenVT));
   3435 
   3436     // Double the VL since we halved SEW.
   3437     SDValue VL = Op.getOperand(NumOps - 1);
   3438     SDValue I32VL =
   3439         DAG.getNode(ISD::SHL, DL, XLenVT, VL, DAG.getConstant(1, DL, XLenVT));
   3440 
   3441     MVT I32MaskVT = MVT::getVectorVT(MVT::i1, I32VT.getVectorElementCount());
   3442     SDValue I32Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, I32MaskVT, VL);
   3443 
   3444     // Shift the two scalar parts in using SEW=32 slide1up/slide1down
   3445     // instructions.
   3446     if (IntNo == Intrinsic::riscv_vslide1up ||
   3447         IntNo == Intrinsic::riscv_vslide1up_mask) {
   3448       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarHi,
   3449                         I32Mask, I32VL);
   3450       Vec = DAG.getNode(RISCVISD::VSLIDE1UP_VL, DL, I32VT, Vec, ScalarLo,
   3451                         I32Mask, I32VL);
   3452     } else {
   3453       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarLo,
   3454                         I32Mask, I32VL);
   3455       Vec = DAG.getNode(RISCVISD::VSLIDE1DOWN_VL, DL, I32VT, Vec, ScalarHi,
   3456                         I32Mask, I32VL);
   3457     }
   3458 
   3459     // Convert back to nxvXi64.
   3460     Vec = DAG.getBitcast(VT, Vec);
   3461 
   3462     if (!IsMasked)
   3463       return Vec;
   3464 
   3465     // Apply mask after the operation.
   3466     SDValue Mask = Op.getOperand(NumOps - 2);
   3467     SDValue MaskedOff = Op.getOperand(1);
   3468     return DAG.getNode(RISCVISD::VSELECT_VL, DL, VT, Mask, Vec, MaskedOff, VL);
   3469   }
   3470   }
   3471 
   3472   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
   3473 }
   3474 
   3475 SDValue RISCVTargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
   3476                                                     SelectionDAG &DAG) const {
   3477   return lowerVectorIntrinsicSplats(Op, DAG, Subtarget);
   3478 }
   3479 
   3480 static MVT getLMUL1VT(MVT VT) {
   3481   assert(VT.getVectorElementType().getSizeInBits() <= 64 &&
   3482          "Unexpected vector MVT");
   3483   return MVT::getScalableVectorVT(
   3484       VT.getVectorElementType(),
   3485       RISCV::RVVBitsPerBlock / VT.getVectorElementType().getSizeInBits());
   3486 }
   3487 
   3488 static unsigned getRVVReductionOp(unsigned ISDOpcode) {
   3489   switch (ISDOpcode) {
   3490   default:
   3491     llvm_unreachable("Unhandled reduction");
   3492   case ISD::VECREDUCE_ADD:
   3493     return RISCVISD::VECREDUCE_ADD_VL;
   3494   case ISD::VECREDUCE_UMAX:
   3495     return RISCVISD::VECREDUCE_UMAX_VL;
   3496   case ISD::VECREDUCE_SMAX:
   3497     return RISCVISD::VECREDUCE_SMAX_VL;
   3498   case ISD::VECREDUCE_UMIN:
   3499     return RISCVISD::VECREDUCE_UMIN_VL;
   3500   case ISD::VECREDUCE_SMIN:
   3501     return RISCVISD::VECREDUCE_SMIN_VL;
   3502   case ISD::VECREDUCE_AND:
   3503     return RISCVISD::VECREDUCE_AND_VL;
   3504   case ISD::VECREDUCE_OR:
   3505     return RISCVISD::VECREDUCE_OR_VL;
   3506   case ISD::VECREDUCE_XOR:
   3507     return RISCVISD::VECREDUCE_XOR_VL;
   3508   }
   3509 }
   3510 
   3511 SDValue RISCVTargetLowering::lowerVectorMaskVECREDUCE(SDValue Op,
   3512                                                       SelectionDAG &DAG) const {
   3513   SDLoc DL(Op);
   3514   SDValue Vec = Op.getOperand(0);
   3515   MVT VecVT = Vec.getSimpleValueType();
   3516   assert((Op.getOpcode() == ISD::VECREDUCE_AND ||
   3517           Op.getOpcode() == ISD::VECREDUCE_OR ||
   3518           Op.getOpcode() == ISD::VECREDUCE_XOR) &&
   3519          "Unexpected reduction lowering");
   3520 
   3521   MVT XLenVT = Subtarget.getXLenVT();
   3522   assert(Op.getValueType() == XLenVT &&
   3523          "Expected reduction output to be legalized to XLenVT");
   3524 
   3525   MVT ContainerVT = VecVT;
   3526   if (VecVT.isFixedLengthVector()) {
   3527     ContainerVT = getContainerForFixedLengthVector(VecVT);
   3528     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
   3529   }
   3530 
   3531   SDValue Mask, VL;
   3532   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
   3533   SDValue Zero = DAG.getConstant(0, DL, XLenVT);
   3534 
   3535   switch (Op.getOpcode()) {
   3536   default:
   3537     llvm_unreachable("Unhandled reduction");
   3538   case ISD::VECREDUCE_AND:
   3539     // vpopc ~x == 0
   3540     Vec = DAG.getNode(RISCVISD::VMXOR_VL, DL, ContainerVT, Vec, Mask, VL);
   3541     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
   3542     return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETEQ);
   3543   case ISD::VECREDUCE_OR:
   3544     // vpopc x != 0
   3545     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
   3546     return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
   3547   case ISD::VECREDUCE_XOR: {
   3548     // ((vpopc x) & 1) != 0
   3549     SDValue One = DAG.getConstant(1, DL, XLenVT);
   3550     Vec = DAG.getNode(RISCVISD::VPOPC_VL, DL, XLenVT, Vec, Mask, VL);
   3551     Vec = DAG.getNode(ISD::AND, DL, XLenVT, Vec, One);
   3552     return DAG.getSetCC(DL, XLenVT, Vec, Zero, ISD::SETNE);
   3553   }
   3554   }
   3555 }
   3556 
   3557 SDValue RISCVTargetLowering::lowerVECREDUCE(SDValue Op,
   3558                                             SelectionDAG &DAG) const {
   3559   SDLoc DL(Op);
   3560   SDValue Vec = Op.getOperand(0);
   3561   EVT VecEVT = Vec.getValueType();
   3562 
   3563   unsigned BaseOpc = ISD::getVecReduceBaseOpcode(Op.getOpcode());
   3564 
   3565   // Due to ordering in legalize types we may have a vector type that needs to
   3566   // be split. Do that manually so we can get down to a legal type.
   3567   while (getTypeAction(*DAG.getContext(), VecEVT) ==
   3568          TargetLowering::TypeSplitVector) {
   3569     SDValue Lo, Hi;
   3570     std::tie(Lo, Hi) = DAG.SplitVector(Vec, DL);
   3571     VecEVT = Lo.getValueType();
   3572     Vec = DAG.getNode(BaseOpc, DL, VecEVT, Lo, Hi);
   3573   }
   3574 
   3575   // TODO: The type may need to be widened rather than split. Or widened before
   3576   // it can be split.
   3577   if (!isTypeLegal(VecEVT))
   3578     return SDValue();
   3579 
   3580   MVT VecVT = VecEVT.getSimpleVT();
   3581   MVT VecEltVT = VecVT.getVectorElementType();
   3582   unsigned RVVOpcode = getRVVReductionOp(Op.getOpcode());
   3583 
   3584   MVT ContainerVT = VecVT;
   3585   if (VecVT.isFixedLengthVector()) {
   3586     ContainerVT = getContainerForFixedLengthVector(VecVT);
   3587     Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
   3588   }
   3589 
   3590   MVT M1VT = getLMUL1VT(ContainerVT);
   3591 
   3592   SDValue Mask, VL;
   3593   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
   3594 
   3595   // FIXME: This is a VLMAX splat which might be too large and can prevent
   3596   // vsetvli removal.
   3597   SDValue NeutralElem =
   3598       DAG.getNeutralElement(BaseOpc, DL, VecEltVT, SDNodeFlags());
   3599   SDValue IdentitySplat = DAG.getSplatVector(M1VT, DL, NeutralElem);
   3600   SDValue Reduction =
   3601       DAG.getNode(RVVOpcode, DL, M1VT, Vec, IdentitySplat, Mask, VL);
   3602   SDValue Elt0 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
   3603                              DAG.getConstant(0, DL, Subtarget.getXLenVT()));
   3604   return DAG.getSExtOrTrunc(Elt0, DL, Op.getValueType());
   3605 }
   3606 
   3607 // Given a reduction op, this function returns the matching reduction opcode,
   3608 // the vector SDValue and the scalar SDValue required to lower this to a
   3609 // RISCVISD node.
   3610 static std::tuple<unsigned, SDValue, SDValue>
   3611 getRVVFPReductionOpAndOperands(SDValue Op, SelectionDAG &DAG, EVT EltVT) {
   3612   SDLoc DL(Op);
   3613   auto Flags = Op->getFlags();
   3614   unsigned Opcode = Op.getOpcode();
   3615   unsigned BaseOpcode = ISD::getVecReduceBaseOpcode(Opcode);
   3616   switch (Opcode) {
   3617   default:
   3618     llvm_unreachable("Unhandled reduction");
   3619   case ISD::VECREDUCE_FADD:
   3620     return std::make_tuple(RISCVISD::VECREDUCE_FADD_VL, Op.getOperand(0),
   3621                            DAG.getConstantFP(0.0, DL, EltVT));
   3622   case ISD::VECREDUCE_SEQ_FADD:
   3623     return std::make_tuple(RISCVISD::VECREDUCE_SEQ_FADD_VL, Op.getOperand(1),
   3624                            Op.getOperand(0));
   3625   case ISD::VECREDUCE_FMIN:
   3626     return std::make_tuple(RISCVISD::VECREDUCE_FMIN_VL, Op.getOperand(0),
   3627                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
   3628   case ISD::VECREDUCE_FMAX:
   3629     return std::make_tuple(RISCVISD::VECREDUCE_FMAX_VL, Op.getOperand(0),
   3630                            DAG.getNeutralElement(BaseOpcode, DL, EltVT, Flags));
   3631   }
   3632 }
   3633 
   3634 SDValue RISCVTargetLowering::lowerFPVECREDUCE(SDValue Op,
   3635                                               SelectionDAG &DAG) const {
   3636   SDLoc DL(Op);
   3637   MVT VecEltVT = Op.getSimpleValueType();
   3638 
   3639   unsigned RVVOpcode;
   3640   SDValue VectorVal, ScalarVal;
   3641   std::tie(RVVOpcode, VectorVal, ScalarVal) =
   3642       getRVVFPReductionOpAndOperands(Op, DAG, VecEltVT);
   3643   MVT VecVT = VectorVal.getSimpleValueType();
   3644 
   3645   MVT ContainerVT = VecVT;
   3646   if (VecVT.isFixedLengthVector()) {
   3647     ContainerVT = getContainerForFixedLengthVector(VecVT);
   3648     VectorVal = convertToScalableVector(ContainerVT, VectorVal, DAG, Subtarget);
   3649   }
   3650 
   3651   MVT M1VT = getLMUL1VT(VectorVal.getSimpleValueType());
   3652 
   3653   SDValue Mask, VL;
   3654   std::tie(Mask, VL) = getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget);
   3655 
   3656   // FIXME: This is a VLMAX splat which might be too large and can prevent
   3657   // vsetvli removal.
   3658   SDValue ScalarSplat = DAG.getSplatVector(M1VT, DL, ScalarVal);
   3659   SDValue Reduction =
   3660       DAG.getNode(RVVOpcode, DL, M1VT, VectorVal, ScalarSplat, Mask, VL);
   3661   return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VecEltVT, Reduction,
   3662                      DAG.getConstant(0, DL, Subtarget.getXLenVT()));
   3663 }
   3664 
   3665 SDValue RISCVTargetLowering::lowerINSERT_SUBVECTOR(SDValue Op,
   3666                                                    SelectionDAG &DAG) const {
   3667   SDValue Vec = Op.getOperand(0);
   3668   SDValue SubVec = Op.getOperand(1);
   3669   MVT VecVT = Vec.getSimpleValueType();
   3670   MVT SubVecVT = SubVec.getSimpleValueType();
   3671 
   3672   SDLoc DL(Op);
   3673   MVT XLenVT = Subtarget.getXLenVT();
   3674   unsigned OrigIdx = Op.getConstantOperandVal(2);
   3675   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
   3676 
   3677   // We don't have the ability to slide mask vectors up indexed by their i1
   3678   // elements; the smallest we can do is i8. Often we are able to bitcast to
   3679   // equivalent i8 vectors. Note that when inserting a fixed-length vector
   3680   // into a scalable one, we might not necessarily have enough scalable
   3681   // elements to safely divide by 8: nxv1i1 = insert nxv1i1, v4i1 is valid.
   3682   if (SubVecVT.getVectorElementType() == MVT::i1 &&
   3683       (OrigIdx != 0 || !Vec.isUndef())) {
   3684     if (VecVT.getVectorMinNumElements() >= 8 &&
   3685         SubVecVT.getVectorMinNumElements() >= 8) {
   3686       assert(OrigIdx % 8 == 0 && "Invalid index");
   3687       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
   3688              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
   3689              "Unexpected mask vector lowering");
   3690       OrigIdx /= 8;
   3691       SubVecVT =
   3692           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
   3693                            SubVecVT.isScalableVector());
   3694       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
   3695                                VecVT.isScalableVector());
   3696       Vec = DAG.getBitcast(VecVT, Vec);
   3697       SubVec = DAG.getBitcast(SubVecVT, SubVec);
   3698     } else {
   3699       // We can't slide this mask vector up indexed by its i1 elements.
   3700       // This poses a problem when we wish to insert a scalable vector which
   3701       // can't be re-expressed as a larger type. Just choose the slow path and
   3702       // extend to a larger type, then truncate back down.
   3703       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
   3704       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
   3705       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
   3706       SubVec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtSubVecVT, SubVec);
   3707       Vec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ExtVecVT, Vec, SubVec,
   3708                         Op.getOperand(2));
   3709       SDValue SplatZero = DAG.getConstant(0, DL, ExtVecVT);
   3710       return DAG.getSetCC(DL, VecVT, Vec, SplatZero, ISD::SETNE);
   3711     }
   3712   }
   3713 
   3714   // If the subvector vector is a fixed-length type, we cannot use subregister
   3715   // manipulation to simplify the codegen; we don't know which register of a
   3716   // LMUL group contains the specific subvector as we only know the minimum
   3717   // register size. Therefore we must slide the vector group up the full
   3718   // amount.
   3719   if (SubVecVT.isFixedLengthVector()) {
   3720     if (OrigIdx == 0 && Vec.isUndef())
   3721       return Op;
   3722     MVT ContainerVT = VecVT;
   3723     if (VecVT.isFixedLengthVector()) {
   3724       ContainerVT = getContainerForFixedLengthVector(VecVT);
   3725       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
   3726     }
   3727     SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, ContainerVT,
   3728                          DAG.getUNDEF(ContainerVT), SubVec,
   3729                          DAG.getConstant(0, DL, XLenVT));
   3730     SDValue Mask =
   3731         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
   3732     // Set the vector length to only the number of elements we care about. Note
   3733     // that for slideup this includes the offset.
   3734     SDValue VL =
   3735         DAG.getConstant(OrigIdx + SubVecVT.getVectorNumElements(), DL, XLenVT);
   3736     SDValue SlideupAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
   3737     SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, ContainerVT, Vec,
   3738                                   SubVec, SlideupAmt, Mask, VL);
   3739     if (VecVT.isFixedLengthVector())
   3740       Slideup = convertFromScalableVector(VecVT, Slideup, DAG, Subtarget);
   3741     return DAG.getBitcast(Op.getValueType(), Slideup);
   3742   }
   3743 
   3744   unsigned SubRegIdx, RemIdx;
   3745   std::tie(SubRegIdx, RemIdx) =
   3746       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
   3747           VecVT, SubVecVT, OrigIdx, TRI);
   3748 
   3749   RISCVII::VLMUL SubVecLMUL = RISCVTargetLowering::getLMUL(SubVecVT);
   3750   bool IsSubVecPartReg = SubVecLMUL == RISCVII::VLMUL::LMUL_F2 ||
   3751                          SubVecLMUL == RISCVII::VLMUL::LMUL_F4 ||
   3752                          SubVecLMUL == RISCVII::VLMUL::LMUL_F8;
   3753 
   3754   // 1. If the Idx has been completely eliminated and this subvector's size is
   3755   // a vector register or a multiple thereof, or the surrounding elements are
   3756   // undef, then this is a subvector insert which naturally aligns to a vector
   3757   // register. These can easily be handled using subregister manipulation.
   3758   // 2. If the subvector is smaller than a vector register, then the insertion
   3759   // must preserve the undisturbed elements of the register. We do this by
   3760   // lowering to an EXTRACT_SUBVECTOR grabbing the nearest LMUL=1 vector type
   3761   // (which resolves to a subregister copy), performing a VSLIDEUP to place the
   3762   // subvector within the vector register, and an INSERT_SUBVECTOR of that
   3763   // LMUL=1 type back into the larger vector (resolving to another subregister
   3764   // operation). See below for how our VSLIDEUP works. We go via a LMUL=1 type
   3765   // to avoid allocating a large register group to hold our subvector.
   3766   if (RemIdx == 0 && (!IsSubVecPartReg || Vec.isUndef()))
   3767     return Op;
   3768 
   3769   // VSLIDEUP works by leaving elements 0<i<OFFSET undisturbed, elements
   3770   // OFFSET<=i<VL set to the "subvector" and vl<=i<VLMAX set to the tail policy
   3771   // (in our case undisturbed). This means we can set up a subvector insertion
   3772   // where OFFSET is the insertion offset, and the VL is the OFFSET plus the
   3773   // size of the subvector.
   3774   MVT InterSubVT = VecVT;
   3775   SDValue AlignedExtract = Vec;
   3776   unsigned AlignedIdx = OrigIdx - RemIdx;
   3777   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
   3778     InterSubVT = getLMUL1VT(VecVT);
   3779     // Extract a subvector equal to the nearest full vector register type. This
   3780     // should resolve to a EXTRACT_SUBREG instruction.
   3781     AlignedExtract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
   3782                                  DAG.getConstant(AlignedIdx, DL, XLenVT));
   3783   }
   3784 
   3785   SDValue SlideupAmt = DAG.getConstant(RemIdx, DL, XLenVT);
   3786   // For scalable vectors this must be further multiplied by vscale.
   3787   SlideupAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlideupAmt);
   3788 
   3789   SDValue Mask, VL;
   3790   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
   3791 
   3792   // Construct the vector length corresponding to RemIdx + length(SubVecVT).
   3793   VL = DAG.getConstant(SubVecVT.getVectorMinNumElements(), DL, XLenVT);
   3794   VL = DAG.getNode(ISD::VSCALE, DL, XLenVT, VL);
   3795   VL = DAG.getNode(ISD::ADD, DL, XLenVT, SlideupAmt, VL);
   3796 
   3797   SubVec = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, InterSubVT,
   3798                        DAG.getUNDEF(InterSubVT), SubVec,
   3799                        DAG.getConstant(0, DL, XLenVT));
   3800 
   3801   SDValue Slideup = DAG.getNode(RISCVISD::VSLIDEUP_VL, DL, InterSubVT,
   3802                                 AlignedExtract, SubVec, SlideupAmt, Mask, VL);
   3803 
   3804   // If required, insert this subvector back into the correct vector register.
   3805   // This should resolve to an INSERT_SUBREG instruction.
   3806   if (VecVT.bitsGT(InterSubVT))
   3807     Slideup = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, Vec, Slideup,
   3808                           DAG.getConstant(AlignedIdx, DL, XLenVT));
   3809 
   3810   // We might have bitcast from a mask type: cast back to the original type if
   3811   // required.
   3812   return DAG.getBitcast(Op.getSimpleValueType(), Slideup);
   3813 }
   3814 
   3815 SDValue RISCVTargetLowering::lowerEXTRACT_SUBVECTOR(SDValue Op,
   3816                                                     SelectionDAG &DAG) const {
   3817   SDValue Vec = Op.getOperand(0);
   3818   MVT SubVecVT = Op.getSimpleValueType();
   3819   MVT VecVT = Vec.getSimpleValueType();
   3820 
   3821   SDLoc DL(Op);
   3822   MVT XLenVT = Subtarget.getXLenVT();
   3823   unsigned OrigIdx = Op.getConstantOperandVal(1);
   3824   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
   3825 
   3826   // We don't have the ability to slide mask vectors down indexed by their i1
   3827   // elements; the smallest we can do is i8. Often we are able to bitcast to
   3828   // equivalent i8 vectors. Note that when extracting a fixed-length vector
   3829   // from a scalable one, we might not necessarily have enough scalable
   3830   // elements to safely divide by 8: v8i1 = extract nxv1i1 is valid.
   3831   if (SubVecVT.getVectorElementType() == MVT::i1 && OrigIdx != 0) {
   3832     if (VecVT.getVectorMinNumElements() >= 8 &&
   3833         SubVecVT.getVectorMinNumElements() >= 8) {
   3834       assert(OrigIdx % 8 == 0 && "Invalid index");
   3835       assert(VecVT.getVectorMinNumElements() % 8 == 0 &&
   3836              SubVecVT.getVectorMinNumElements() % 8 == 0 &&
   3837              "Unexpected mask vector lowering");
   3838       OrigIdx /= 8;
   3839       SubVecVT =
   3840           MVT::getVectorVT(MVT::i8, SubVecVT.getVectorMinNumElements() / 8,
   3841                            SubVecVT.isScalableVector());
   3842       VecVT = MVT::getVectorVT(MVT::i8, VecVT.getVectorMinNumElements() / 8,
   3843                                VecVT.isScalableVector());
   3844       Vec = DAG.getBitcast(VecVT, Vec);
   3845     } else {
   3846       // We can't slide this mask vector down, indexed by its i1 elements.
   3847       // This poses a problem when we wish to extract a scalable vector which
   3848       // can't be re-expressed as a larger type. Just choose the slow path and
   3849       // extend to a larger type, then truncate back down.
   3850       // TODO: We could probably improve this when extracting certain fixed
   3851       // from fixed, where we can extract as i8 and shift the correct element
   3852       // right to reach the desired subvector?
   3853       MVT ExtVecVT = VecVT.changeVectorElementType(MVT::i8);
   3854       MVT ExtSubVecVT = SubVecVT.changeVectorElementType(MVT::i8);
   3855       Vec = DAG.getNode(ISD::ZERO_EXTEND, DL, ExtVecVT, Vec);
   3856       Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ExtSubVecVT, Vec,
   3857                         Op.getOperand(1));
   3858       SDValue SplatZero = DAG.getConstant(0, DL, ExtSubVecVT);
   3859       return DAG.getSetCC(DL, SubVecVT, Vec, SplatZero, ISD::SETNE);
   3860     }
   3861   }
   3862 
   3863   // If the subvector vector is a fixed-length type, we cannot use subregister
   3864   // manipulation to simplify the codegen; we don't know which register of a
   3865   // LMUL group contains the specific subvector as we only know the minimum
   3866   // register size. Therefore we must slide the vector group down the full
   3867   // amount.
   3868   if (SubVecVT.isFixedLengthVector()) {
   3869     // With an index of 0 this is a cast-like subvector, which can be performed
   3870     // with subregister operations.
   3871     if (OrigIdx == 0)
   3872       return Op;
   3873     MVT ContainerVT = VecVT;
   3874     if (VecVT.isFixedLengthVector()) {
   3875       ContainerVT = getContainerForFixedLengthVector(VecVT);
   3876       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
   3877     }
   3878     SDValue Mask =
   3879         getDefaultVLOps(VecVT, ContainerVT, DL, DAG, Subtarget).first;
   3880     // Set the vector length to only the number of elements we care about. This
   3881     // avoids sliding down elements we're going to discard straight away.
   3882     SDValue VL = DAG.getConstant(SubVecVT.getVectorNumElements(), DL, XLenVT);
   3883     SDValue SlidedownAmt = DAG.getConstant(OrigIdx, DL, XLenVT);
   3884     SDValue Slidedown =
   3885         DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
   3886                     DAG.getUNDEF(ContainerVT), Vec, SlidedownAmt, Mask, VL);
   3887     // Now we can use a cast-like subvector extract to get the result.
   3888     Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
   3889                             DAG.getConstant(0, DL, XLenVT));
   3890     return DAG.getBitcast(Op.getValueType(), Slidedown);
   3891   }
   3892 
   3893   unsigned SubRegIdx, RemIdx;
   3894   std::tie(SubRegIdx, RemIdx) =
   3895       RISCVTargetLowering::decomposeSubvectorInsertExtractToSubRegs(
   3896           VecVT, SubVecVT, OrigIdx, TRI);
   3897 
   3898   // If the Idx has been completely eliminated then this is a subvector extract
   3899   // which naturally aligns to a vector register. These can easily be handled
   3900   // using subregister manipulation.
   3901   if (RemIdx == 0)
   3902     return Op;
   3903 
   3904   // Else we must shift our vector register directly to extract the subvector.
   3905   // Do this using VSLIDEDOWN.
   3906 
   3907   // If the vector type is an LMUL-group type, extract a subvector equal to the
   3908   // nearest full vector register type. This should resolve to a EXTRACT_SUBREG
   3909   // instruction.
   3910   MVT InterSubVT = VecVT;
   3911   if (VecVT.bitsGT(getLMUL1VT(VecVT))) {
   3912     InterSubVT = getLMUL1VT(VecVT);
   3913     Vec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InterSubVT, Vec,
   3914                       DAG.getConstant(OrigIdx - RemIdx, DL, XLenVT));
   3915   }
   3916 
   3917   // Slide this vector register down by the desired number of elements in order
   3918   // to place the desired subvector starting at element 0.
   3919   SDValue SlidedownAmt = DAG.getConstant(RemIdx, DL, XLenVT);
   3920   // For scalable vectors this must be further multiplied by vscale.
   3921   SlidedownAmt = DAG.getNode(ISD::VSCALE, DL, XLenVT, SlidedownAmt);
   3922 
   3923   SDValue Mask, VL;
   3924   std::tie(Mask, VL) = getDefaultScalableVLOps(InterSubVT, DL, DAG, Subtarget);
   3925   SDValue Slidedown =
   3926       DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, InterSubVT,
   3927                   DAG.getUNDEF(InterSubVT), Vec, SlidedownAmt, Mask, VL);
   3928 
   3929   // Now the vector is in the right position, extract our final subvector. This
   3930   // should resolve to a COPY.
   3931   Slidedown = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SubVecVT, Slidedown,
   3932                           DAG.getConstant(0, DL, XLenVT));
   3933 
   3934   // We might have bitcast from a mask type: cast back to the original type if
   3935   // required.
   3936   return DAG.getBitcast(Op.getSimpleValueType(), Slidedown);
   3937 }
   3938 
   3939 // Lower step_vector to the vid instruction. Any non-identity step value must
   3940 // be accounted for my manual expansion.
   3941 SDValue RISCVTargetLowering::lowerSTEP_VECTOR(SDValue Op,
   3942                                               SelectionDAG &DAG) const {
   3943   SDLoc DL(Op);
   3944   MVT VT = Op.getSimpleValueType();
   3945   MVT XLenVT = Subtarget.getXLenVT();
   3946   SDValue Mask, VL;
   3947   std::tie(Mask, VL) = getDefaultScalableVLOps(VT, DL, DAG, Subtarget);
   3948   SDValue StepVec = DAG.getNode(RISCVISD::VID_VL, DL, VT, Mask, VL);
   3949   uint64_t StepValImm = Op.getConstantOperandVal(0);
   3950   if (StepValImm != 1) {
   3951     assert(Op.getOperand(0).getValueType() == XLenVT &&
   3952            "Unexpected step value type");
   3953     if (isPowerOf2_64(StepValImm)) {
   3954       SDValue StepVal =
   3955           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT,
   3956                       DAG.getConstant(Log2_64(StepValImm), DL, XLenVT));
   3957       StepVec = DAG.getNode(ISD::SHL, DL, VT, StepVec, StepVal);
   3958     } else {
   3959       SDValue StepVal =
   3960           DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VT, Op.getOperand(0));
   3961       StepVec = DAG.getNode(ISD::MUL, DL, VT, StepVec, StepVal);
   3962     }
   3963   }
   3964   return StepVec;
   3965 }
   3966 
   3967 // Implement vector_reverse using vrgather.vv with indices determined by
   3968 // subtracting the id of each element from (VLMAX-1). This will convert
   3969 // the indices like so:
   3970 // (0, 1,..., VLMAX-2, VLMAX-1) -> (VLMAX-1, VLMAX-2,..., 1, 0).
   3971 // TODO: This code assumes VLMAX <= 65536 for LMUL=8 SEW=16.
   3972 SDValue RISCVTargetLowering::lowerVECTOR_REVERSE(SDValue Op,
   3973                                                  SelectionDAG &DAG) const {
   3974   SDLoc DL(Op);
   3975   MVT VecVT = Op.getSimpleValueType();
   3976   unsigned EltSize = VecVT.getScalarSizeInBits();
   3977   unsigned MinSize = VecVT.getSizeInBits().getKnownMinValue();
   3978 
   3979   unsigned MaxVLMAX = 0;
   3980   unsigned VectorBitsMax = Subtarget.getMaxRVVVectorSizeInBits();
   3981   if (VectorBitsMax != 0)
   3982     MaxVLMAX = ((VectorBitsMax / EltSize) * MinSize) / RISCV::RVVBitsPerBlock;
   3983 
   3984   unsigned GatherOpc = RISCVISD::VRGATHER_VV_VL;
   3985   MVT IntVT = VecVT.changeVectorElementTypeToInteger();
   3986 
   3987   // If this is SEW=8 and VLMAX is unknown or more than 256, we need
   3988   // to use vrgatherei16.vv.
   3989   // TODO: It's also possible to use vrgatherei16.vv for other types to
   3990   // decrease register width for the index calculation.
   3991   if ((MaxVLMAX == 0 || MaxVLMAX > 256) && EltSize == 8) {
   3992     // If this is LMUL=8, we have to split before can use vrgatherei16.vv.
   3993     // Reverse each half, then reassemble them in reverse order.
   3994     // NOTE: It's also possible that after splitting that VLMAX no longer
   3995     // requires vrgatherei16.vv.
   3996     if (MinSize == (8 * RISCV::RVVBitsPerBlock)) {
   3997       SDValue Lo, Hi;
   3998       std::tie(Lo, Hi) = DAG.SplitVectorOperand(Op.getNode(), 0);
   3999       EVT LoVT, HiVT;
   4000       std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VecVT);
   4001       Lo = DAG.getNode(ISD::VECTOR_REVERSE, DL, LoVT, Lo);
   4002       Hi = DAG.getNode(ISD::VECTOR_REVERSE, DL, HiVT, Hi);
   4003       // Reassemble the low and high pieces reversed.
   4004       // FIXME: This is a CONCAT_VECTORS.
   4005       SDValue Res =
   4006           DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VecVT, DAG.getUNDEF(VecVT), Hi,
   4007                       DAG.getIntPtrConstant(0, DL));
   4008       return DAG.getNode(
   4009           ISD::INSERT_SUBVECTOR, DL, VecVT, Res, Lo,
   4010           DAG.getIntPtrConstant(LoVT.getVectorMinNumElements(), DL));
   4011     }
   4012 
   4013     // Just promote the int type to i16 which will double the LMUL.
   4014     IntVT = MVT::getVectorVT(MVT::i16, VecVT.getVectorElementCount());
   4015     GatherOpc = RISCVISD::VRGATHEREI16_VV_VL;
   4016   }
   4017 
   4018   MVT XLenVT = Subtarget.getXLenVT();
   4019   SDValue Mask, VL;
   4020   std::tie(Mask, VL) = getDefaultScalableVLOps(VecVT, DL, DAG, Subtarget);
   4021 
   4022   // Calculate VLMAX-1 for the desired SEW.
   4023   unsigned MinElts = VecVT.getVectorMinNumElements();
   4024   SDValue VLMax = DAG.getNode(ISD::VSCALE, DL, XLenVT,
   4025                               DAG.getConstant(MinElts, DL, XLenVT));
   4026   SDValue VLMinus1 =
   4027       DAG.getNode(ISD::SUB, DL, XLenVT, VLMax, DAG.getConstant(1, DL, XLenVT));
   4028 
   4029   // Splat VLMAX-1 taking care to handle SEW==64 on RV32.
   4030   bool IsRV32E64 =
   4031       !Subtarget.is64Bit() && IntVT.getVectorElementType() == MVT::i64;
   4032   SDValue SplatVL;
   4033   if (!IsRV32E64)
   4034     SplatVL = DAG.getSplatVector(IntVT, DL, VLMinus1);
   4035   else
   4036     SplatVL = DAG.getNode(RISCVISD::SPLAT_VECTOR_I64, DL, IntVT, VLMinus1);
   4037 
   4038   SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, IntVT, Mask, VL);
   4039   SDValue Indices =
   4040       DAG.getNode(RISCVISD::SUB_VL, DL, IntVT, SplatVL, VID, Mask, VL);
   4041 
   4042   return DAG.getNode(GatherOpc, DL, VecVT, Op.getOperand(0), Indices, Mask, VL);
   4043 }
   4044 
   4045 SDValue
   4046 RISCVTargetLowering::lowerFixedLengthVectorLoadToRVV(SDValue Op,
   4047                                                      SelectionDAG &DAG) const {
   4048   auto *Load = cast<LoadSDNode>(Op);
   4049 
   4050   SDLoc DL(Op);
   4051   MVT VT = Op.getSimpleValueType();
   4052   MVT ContainerVT = getContainerForFixedLengthVector(VT);
   4053 
   4054   SDValue VL =
   4055       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
   4056 
   4057   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
   4058   SDValue NewLoad = DAG.getMemIntrinsicNode(
   4059       RISCVISD::VLE_VL, DL, VTs, {Load->getChain(), Load->getBasePtr(), VL},
   4060       Load->getMemoryVT(), Load->getMemOperand());
   4061 
   4062   SDValue Result = convertFromScalableVector(VT, NewLoad, DAG, Subtarget);
   4063   return DAG.getMergeValues({Result, Load->getChain()}, DL);
   4064 }
   4065 
   4066 SDValue
   4067 RISCVTargetLowering::lowerFixedLengthVectorStoreToRVV(SDValue Op,
   4068                                                       SelectionDAG &DAG) const {
   4069   auto *Store = cast<StoreSDNode>(Op);
   4070 
   4071   SDLoc DL(Op);
   4072   SDValue StoreVal = Store->getValue();
   4073   MVT VT = StoreVal.getSimpleValueType();
   4074 
   4075   // If the size less than a byte, we need to pad with zeros to make a byte.
   4076   if (VT.getVectorElementType() == MVT::i1 && VT.getVectorNumElements() < 8) {
   4077     VT = MVT::v8i1;
   4078     StoreVal = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
   4079                            DAG.getConstant(0, DL, VT), StoreVal,
   4080                            DAG.getIntPtrConstant(0, DL));
   4081   }
   4082 
   4083   MVT ContainerVT = getContainerForFixedLengthVector(VT);
   4084 
   4085   SDValue VL =
   4086       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
   4087 
   4088   SDValue NewValue =
   4089       convertToScalableVector(ContainerVT, StoreVal, DAG, Subtarget);
   4090   return DAG.getMemIntrinsicNode(
   4091       RISCVISD::VSE_VL, DL, DAG.getVTList(MVT::Other),
   4092       {Store->getChain(), NewValue, Store->getBasePtr(), VL},
   4093       Store->getMemoryVT(), Store->getMemOperand());
   4094 }
   4095 
   4096 SDValue RISCVTargetLowering::lowerMLOAD(SDValue Op, SelectionDAG &DAG) const {
   4097   auto *Load = cast<MaskedLoadSDNode>(Op);
   4098 
   4099   SDLoc DL(Op);
   4100   MVT VT = Op.getSimpleValueType();
   4101   MVT XLenVT = Subtarget.getXLenVT();
   4102 
   4103   SDValue Mask = Load->getMask();
   4104   SDValue PassThru = Load->getPassThru();
   4105   SDValue VL;
   4106 
   4107   MVT ContainerVT = VT;
   4108   if (VT.isFixedLengthVector()) {
   4109     ContainerVT = getContainerForFixedLengthVector(VT);
   4110     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
   4111 
   4112     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
   4113     PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
   4114     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
   4115   } else
   4116     VL = DAG.getRegister(RISCV::X0, XLenVT);
   4117 
   4118   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
   4119   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vle_mask, DL, XLenVT);
   4120   SDValue Ops[] = {Load->getChain(),   IntID, PassThru,
   4121                    Load->getBasePtr(), Mask,  VL};
   4122   SDValue Result =
   4123       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
   4124                               Load->getMemoryVT(), Load->getMemOperand());
   4125   SDValue Chain = Result.getValue(1);
   4126 
   4127   if (VT.isFixedLengthVector())
   4128     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
   4129 
   4130   return DAG.getMergeValues({Result, Chain}, DL);
   4131 }
   4132 
   4133 SDValue RISCVTargetLowering::lowerMSTORE(SDValue Op, SelectionDAG &DAG) const {
   4134   auto *Store = cast<MaskedStoreSDNode>(Op);
   4135 
   4136   SDLoc DL(Op);
   4137   SDValue Val = Store->getValue();
   4138   SDValue Mask = Store->getMask();
   4139   MVT VT = Val.getSimpleValueType();
   4140   MVT XLenVT = Subtarget.getXLenVT();
   4141   SDValue VL;
   4142 
   4143   MVT ContainerVT = VT;
   4144   if (VT.isFixedLengthVector()) {
   4145     ContainerVT = getContainerForFixedLengthVector(VT);
   4146     MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
   4147 
   4148     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
   4149     Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
   4150     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
   4151   } else
   4152     VL = DAG.getRegister(RISCV::X0, XLenVT);
   4153 
   4154   SDValue IntID = DAG.getTargetConstant(Intrinsic::riscv_vse_mask, DL, XLenVT);
   4155   return DAG.getMemIntrinsicNode(
   4156       ISD::INTRINSIC_VOID, DL, DAG.getVTList(MVT::Other),
   4157       {Store->getChain(), IntID, Val, Store->getBasePtr(), Mask, VL},
   4158       Store->getMemoryVT(), Store->getMemOperand());
   4159 }
   4160 
   4161 SDValue
   4162 RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
   4163                                                       SelectionDAG &DAG) const {
   4164   MVT InVT = Op.getOperand(0).getSimpleValueType();
   4165   MVT ContainerVT = getContainerForFixedLengthVector(InVT);
   4166 
   4167   MVT VT = Op.getSimpleValueType();
   4168 
   4169   SDValue Op1 =
   4170       convertToScalableVector(ContainerVT, Op.getOperand(0), DAG, Subtarget);
   4171   SDValue Op2 =
   4172       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
   4173 
   4174   SDLoc DL(Op);
   4175   SDValue VL =
   4176       DAG.getConstant(VT.getVectorNumElements(), DL, Subtarget.getXLenVT());
   4177 
   4178   MVT MaskVT = MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
   4179   SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
   4180 
   4181   SDValue Cmp = DAG.getNode(RISCVISD::SETCC_VL, DL, MaskVT, Op1, Op2,
   4182                             Op.getOperand(2), Mask, VL);
   4183 
   4184   return convertFromScalableVector(VT, Cmp, DAG, Subtarget);
   4185 }
   4186 
   4187 SDValue RISCVTargetLowering::lowerFixedLengthVectorLogicOpToRVV(
   4188     SDValue Op, SelectionDAG &DAG, unsigned MaskOpc, unsigned VecOpc) const {
   4189   MVT VT = Op.getSimpleValueType();
   4190 
   4191   if (VT.getVectorElementType() == MVT::i1)
   4192     return lowerToScalableOp(Op, DAG, MaskOpc, /*HasMask*/ false);
   4193 
   4194   return lowerToScalableOp(Op, DAG, VecOpc, /*HasMask*/ true);
   4195 }
   4196 
   4197 // Lower vector ABS to smax(X, sub(0, X)).
   4198 SDValue RISCVTargetLowering::lowerABS(SDValue Op, SelectionDAG &DAG) const {
   4199   SDLoc DL(Op);
   4200   MVT VT = Op.getSimpleValueType();
   4201   SDValue X = Op.getOperand(0);
   4202 
   4203   assert(VT.isFixedLengthVector() && "Unexpected type");
   4204 
   4205   MVT ContainerVT = getContainerForFixedLengthVector(VT);
   4206   X = convertToScalableVector(ContainerVT, X, DAG, Subtarget);
   4207 
   4208   SDValue Mask, VL;
   4209   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   4210 
   4211   SDValue SplatZero =
   4212       DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
   4213                   DAG.getConstant(0, DL, Subtarget.getXLenVT()));
   4214   SDValue NegX =
   4215       DAG.getNode(RISCVISD::SUB_VL, DL, ContainerVT, SplatZero, X, Mask, VL);
   4216   SDValue Max =
   4217       DAG.getNode(RISCVISD::SMAX_VL, DL, ContainerVT, X, NegX, Mask, VL);
   4218 
   4219   return convertFromScalableVector(VT, Max, DAG, Subtarget);
   4220 }
   4221 
   4222 SDValue RISCVTargetLowering::lowerFixedLengthVectorFCOPYSIGNToRVV(
   4223     SDValue Op, SelectionDAG &DAG) const {
   4224   SDLoc DL(Op);
   4225   MVT VT = Op.getSimpleValueType();
   4226   SDValue Mag = Op.getOperand(0);
   4227   SDValue Sign = Op.getOperand(1);
   4228   assert(Mag.getValueType() == Sign.getValueType() &&
   4229          "Can only handle COPYSIGN with matching types.");
   4230 
   4231   MVT ContainerVT = getContainerForFixedLengthVector(VT);
   4232   Mag = convertToScalableVector(ContainerVT, Mag, DAG, Subtarget);
   4233   Sign = convertToScalableVector(ContainerVT, Sign, DAG, Subtarget);
   4234 
   4235   SDValue Mask, VL;
   4236   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   4237 
   4238   SDValue CopySign =
   4239       DAG.getNode(RISCVISD::FCOPYSIGN_VL, DL, ContainerVT, Mag, Sign, Mask, VL);
   4240 
   4241   return convertFromScalableVector(VT, CopySign, DAG, Subtarget);
   4242 }
   4243 
   4244 SDValue RISCVTargetLowering::lowerFixedLengthVectorSelectToRVV(
   4245     SDValue Op, SelectionDAG &DAG) const {
   4246   MVT VT = Op.getSimpleValueType();
   4247   MVT ContainerVT = getContainerForFixedLengthVector(VT);
   4248 
   4249   MVT I1ContainerVT =
   4250       MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
   4251 
   4252   SDValue CC =
   4253       convertToScalableVector(I1ContainerVT, Op.getOperand(0), DAG, Subtarget);
   4254   SDValue Op1 =
   4255       convertToScalableVector(ContainerVT, Op.getOperand(1), DAG, Subtarget);
   4256   SDValue Op2 =
   4257       convertToScalableVector(ContainerVT, Op.getOperand(2), DAG, Subtarget);
   4258 
   4259   SDLoc DL(Op);
   4260   SDValue Mask, VL;
   4261   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   4262 
   4263   SDValue Select =
   4264       DAG.getNode(RISCVISD::VSELECT_VL, DL, ContainerVT, CC, Op1, Op2, VL);
   4265 
   4266   return convertFromScalableVector(VT, Select, DAG, Subtarget);
   4267 }
   4268 
   4269 SDValue RISCVTargetLowering::lowerToScalableOp(SDValue Op, SelectionDAG &DAG,
   4270                                                unsigned NewOpc,
   4271                                                bool HasMask) const {
   4272   MVT VT = Op.getSimpleValueType();
   4273   MVT ContainerVT = getContainerForFixedLengthVector(VT);
   4274 
   4275   // Create list of operands by converting existing ones to scalable types.
   4276   SmallVector<SDValue, 6> Ops;
   4277   for (const SDValue &V : Op->op_values()) {
   4278     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
   4279 
   4280     // Pass through non-vector operands.
   4281     if (!V.getValueType().isVector()) {
   4282       Ops.push_back(V);
   4283       continue;
   4284     }
   4285 
   4286     // "cast" fixed length vector to a scalable vector.
   4287     assert(useRVVForFixedLengthVectorVT(V.getSimpleValueType()) &&
   4288            "Only fixed length vectors are supported!");
   4289     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
   4290   }
   4291 
   4292   SDLoc DL(Op);
   4293   SDValue Mask, VL;
   4294   std::tie(Mask, VL) = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget);
   4295   if (HasMask)
   4296     Ops.push_back(Mask);
   4297   Ops.push_back(VL);
   4298 
   4299   SDValue ScalableRes = DAG.getNode(NewOpc, DL, ContainerVT, Ops);
   4300   return convertFromScalableVector(VT, ScalableRes, DAG, Subtarget);
   4301 }
   4302 
   4303 // Lower a VP_* ISD node to the corresponding RISCVISD::*_VL node:
   4304 // * Operands of each node are assumed to be in the same order.
   4305 // * The EVL operand is promoted from i32 to i64 on RV64.
   4306 // * Fixed-length vectors are converted to their scalable-vector container
   4307 //   types.
   4308 SDValue RISCVTargetLowering::lowerVPOp(SDValue Op, SelectionDAG &DAG,
   4309                                        unsigned RISCVISDOpc) const {
   4310   SDLoc DL(Op);
   4311   MVT VT = Op.getSimpleValueType();
   4312   Optional<unsigned> EVLIdx = ISD::getVPExplicitVectorLengthIdx(Op.getOpcode());
   4313 
   4314   SmallVector<SDValue, 4> Ops;
   4315   MVT XLenVT = Subtarget.getXLenVT();
   4316 
   4317   for (const auto &OpIdx : enumerate(Op->ops())) {
   4318     SDValue V = OpIdx.value();
   4319     if ((unsigned)OpIdx.index() == EVLIdx) {
   4320       Ops.push_back(DAG.getZExtOrTrunc(V, DL, XLenVT));
   4321       continue;
   4322     }
   4323     assert(!isa<VTSDNode>(V) && "Unexpected VTSDNode node!");
   4324     // Pass through operands which aren't fixed-length vectors.
   4325     if (!V.getValueType().isFixedLengthVector()) {
   4326       Ops.push_back(V);
   4327       continue;
   4328     }
   4329     // "cast" fixed length vector to a scalable vector.
   4330     MVT OpVT = V.getSimpleValueType();
   4331     MVT ContainerVT = getContainerForFixedLengthVector(OpVT);
   4332     assert(useRVVForFixedLengthVectorVT(OpVT) &&
   4333            "Only fixed length vectors are supported!");
   4334     Ops.push_back(convertToScalableVector(ContainerVT, V, DAG, Subtarget));
   4335   }
   4336 
   4337   if (!VT.isFixedLengthVector())
   4338     return DAG.getNode(RISCVISDOpc, DL, VT, Ops);
   4339 
   4340   MVT ContainerVT = getContainerForFixedLengthVector(VT);
   4341 
   4342   SDValue VPOp = DAG.getNode(RISCVISDOpc, DL, ContainerVT, Ops);
   4343 
   4344   return convertFromScalableVector(VT, VPOp, DAG, Subtarget);
   4345 }
   4346 
   4347 // Custom lower MGATHER to a legalized form for RVV. It will then be matched to
   4348 // a RVV indexed load. The RVV indexed load instructions only support the
   4349 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
   4350 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
   4351 // indexing is extended to the XLEN value type and scaled accordingly.
   4352 SDValue RISCVTargetLowering::lowerMGATHER(SDValue Op, SelectionDAG &DAG) const {
   4353   auto *MGN = cast<MaskedGatherSDNode>(Op.getNode());
   4354   SDLoc DL(Op);
   4355 
   4356   SDValue Index = MGN->getIndex();
   4357   SDValue Mask = MGN->getMask();
   4358   SDValue PassThru = MGN->getPassThru();
   4359 
   4360   MVT VT = Op.getSimpleValueType();
   4361   MVT IndexVT = Index.getSimpleValueType();
   4362   MVT XLenVT = Subtarget.getXLenVT();
   4363 
   4364   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
   4365          "Unexpected VTs!");
   4366   assert(MGN->getBasePtr().getSimpleValueType() == XLenVT &&
   4367          "Unexpected pointer type");
   4368   // Targets have to explicitly opt-in for extending vector loads.
   4369   assert(MGN->getExtensionType() == ISD::NON_EXTLOAD &&
   4370          "Unexpected extending MGATHER");
   4371 
   4372   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
   4373   // the selection of the masked intrinsics doesn't do this for us.
   4374   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
   4375 
   4376   SDValue VL;
   4377   MVT ContainerVT = VT;
   4378   if (VT.isFixedLengthVector()) {
   4379     // We need to use the larger of the result and index type to determine the
   4380     // scalable type to use so we don't increase LMUL for any operand/result.
   4381     if (VT.bitsGE(IndexVT)) {
   4382       ContainerVT = getContainerForFixedLengthVector(VT);
   4383       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
   4384                                  ContainerVT.getVectorElementCount());
   4385     } else {
   4386       IndexVT = getContainerForFixedLengthVector(IndexVT);
   4387       ContainerVT = MVT::getVectorVT(ContainerVT.getVectorElementType(),
   4388                                      IndexVT.getVectorElementCount());
   4389     }
   4390 
   4391     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
   4392 
   4393     if (!IsUnmasked) {
   4394       MVT MaskVT =
   4395           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
   4396       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
   4397       PassThru = convertToScalableVector(ContainerVT, PassThru, DAG, Subtarget);
   4398     }
   4399 
   4400     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
   4401   } else
   4402     VL = DAG.getRegister(RISCV::X0, XLenVT);
   4403 
   4404   unsigned IntID =
   4405       IsUnmasked ? Intrinsic::riscv_vloxei : Intrinsic::riscv_vloxei_mask;
   4406   SmallVector<SDValue, 8> Ops{MGN->getChain(),
   4407                               DAG.getTargetConstant(IntID, DL, XLenVT)};
   4408   if (!IsUnmasked)
   4409     Ops.push_back(PassThru);
   4410   Ops.push_back(MGN->getBasePtr());
   4411   Ops.push_back(Index);
   4412   if (!IsUnmasked)
   4413     Ops.push_back(Mask);
   4414   Ops.push_back(VL);
   4415 
   4416   SDVTList VTs = DAG.getVTList({ContainerVT, MVT::Other});
   4417   SDValue Result =
   4418       DAG.getMemIntrinsicNode(ISD::INTRINSIC_W_CHAIN, DL, VTs, Ops,
   4419                               MGN->getMemoryVT(), MGN->getMemOperand());
   4420   SDValue Chain = Result.getValue(1);
   4421 
   4422   if (VT.isFixedLengthVector())
   4423     Result = convertFromScalableVector(VT, Result, DAG, Subtarget);
   4424 
   4425   return DAG.getMergeValues({Result, Chain}, DL);
   4426 }
   4427 
   4428 // Custom lower MSCATTER to a legalized form for RVV. It will then be matched to
   4429 // a RVV indexed store. The RVV indexed store instructions only support the
   4430 // "unsigned unscaled" addressing mode; indices are implicitly zero-extended or
   4431 // truncated to XLEN and are treated as byte offsets. Any signed or scaled
   4432 // indexing is extended to the XLEN value type and scaled accordingly.
   4433 SDValue RISCVTargetLowering::lowerMSCATTER(SDValue Op,
   4434                                            SelectionDAG &DAG) const {
   4435   auto *MSN = cast<MaskedScatterSDNode>(Op.getNode());
   4436   SDLoc DL(Op);
   4437   SDValue Index = MSN->getIndex();
   4438   SDValue Mask = MSN->getMask();
   4439   SDValue Val = MSN->getValue();
   4440 
   4441   MVT VT = Val.getSimpleValueType();
   4442   MVT IndexVT = Index.getSimpleValueType();
   4443   MVT XLenVT = Subtarget.getXLenVT();
   4444 
   4445   assert(VT.getVectorElementCount() == IndexVT.getVectorElementCount() &&
   4446          "Unexpected VTs!");
   4447   assert(MSN->getBasePtr().getSimpleValueType() == XLenVT &&
   4448          "Unexpected pointer type");
   4449   // Targets have to explicitly opt-in for extending vector loads and
   4450   // truncating vector stores.
   4451   assert(!MSN->isTruncatingStore() && "Unexpected extending MSCATTER");
   4452 
   4453   // If the mask is known to be all ones, optimize to an unmasked intrinsic;
   4454   // the selection of the masked intrinsics doesn't do this for us.
   4455   bool IsUnmasked = ISD::isConstantSplatVectorAllOnes(Mask.getNode());
   4456 
   4457   SDValue VL;
   4458   if (VT.isFixedLengthVector()) {
   4459     // We need to use the larger of the value and index type to determine the
   4460     // scalable type to use so we don't increase LMUL for any operand/result.
   4461     MVT ContainerVT;
   4462     if (VT.bitsGE(IndexVT)) {
   4463       ContainerVT = getContainerForFixedLengthVector(VT);
   4464       IndexVT = MVT::getVectorVT(IndexVT.getVectorElementType(),
   4465                                  ContainerVT.getVectorElementCount());
   4466     } else {
   4467       IndexVT = getContainerForFixedLengthVector(IndexVT);
   4468       ContainerVT = MVT::getVectorVT(VT.getVectorElementType(),
   4469                                      IndexVT.getVectorElementCount());
   4470     }
   4471 
   4472     Index = convertToScalableVector(IndexVT, Index, DAG, Subtarget);
   4473     Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
   4474 
   4475     if (!IsUnmasked) {
   4476       MVT MaskVT =
   4477           MVT::getVectorVT(MVT::i1, ContainerVT.getVectorElementCount());
   4478       Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
   4479     }
   4480 
   4481     VL = DAG.getConstant(VT.getVectorNumElements(), DL, XLenVT);
   4482   } else
   4483     VL = DAG.getRegister(RISCV::X0, XLenVT);
   4484 
   4485   unsigned IntID =
   4486       IsUnmasked ? Intrinsic::riscv_vsoxei : Intrinsic::riscv_vsoxei_mask;
   4487   SmallVector<SDValue, 8> Ops{MSN->getChain(),
   4488                               DAG.getTargetConstant(IntID, DL, XLenVT)};
   4489   Ops.push_back(Val);
   4490   Ops.push_back(MSN->getBasePtr());
   4491   Ops.push_back(Index);
   4492   if (!IsUnmasked)
   4493     Ops.push_back(Mask);
   4494   Ops.push_back(VL);
   4495 
   4496   return DAG.getMemIntrinsicNode(ISD::INTRINSIC_VOID, DL, MSN->getVTList(), Ops,
   4497                                  MSN->getMemoryVT(), MSN->getMemOperand());
   4498 }
   4499 
   4500 SDValue RISCVTargetLowering::lowerGET_ROUNDING(SDValue Op,
   4501                                                SelectionDAG &DAG) const {
   4502   const MVT XLenVT = Subtarget.getXLenVT();
   4503   SDLoc DL(Op);
   4504   SDValue Chain = Op->getOperand(0);
   4505   SDValue SysRegNo = DAG.getConstant(
   4506       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
   4507   SDVTList VTs = DAG.getVTList(XLenVT, MVT::Other);
   4508   SDValue RM = DAG.getNode(RISCVISD::READ_CSR, DL, VTs, Chain, SysRegNo);
   4509 
   4510   // Encoding used for rounding mode in RISCV differs from that used in
   4511   // FLT_ROUNDS. To convert it the RISCV rounding mode is used as an index in a
   4512   // table, which consists of a sequence of 4-bit fields, each representing
   4513   // corresponding FLT_ROUNDS mode.
   4514   static const int Table =
   4515       (int(RoundingMode::NearestTiesToEven) << 4 * RISCVFPRndMode::RNE) |
   4516       (int(RoundingMode::TowardZero) << 4 * RISCVFPRndMode::RTZ) |
   4517       (int(RoundingMode::TowardNegative) << 4 * RISCVFPRndMode::RDN) |
   4518       (int(RoundingMode::TowardPositive) << 4 * RISCVFPRndMode::RUP) |
   4519       (int(RoundingMode::NearestTiesToAway) << 4 * RISCVFPRndMode::RMM);
   4520 
   4521   SDValue Shift =
   4522       DAG.getNode(ISD::SHL, DL, XLenVT, RM, DAG.getConstant(2, DL, XLenVT));
   4523   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
   4524                                 DAG.getConstant(Table, DL, XLenVT), Shift);
   4525   SDValue Masked = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
   4526                                DAG.getConstant(7, DL, XLenVT));
   4527 
   4528   return DAG.getMergeValues({Masked, Chain}, DL);
   4529 }
   4530 
   4531 SDValue RISCVTargetLowering::lowerSET_ROUNDING(SDValue Op,
   4532                                                SelectionDAG &DAG) const {
   4533   const MVT XLenVT = Subtarget.getXLenVT();
   4534   SDLoc DL(Op);
   4535   SDValue Chain = Op->getOperand(0);
   4536   SDValue RMValue = Op->getOperand(1);
   4537   SDValue SysRegNo = DAG.getConstant(
   4538       RISCVSysReg::lookupSysRegByName("FRM")->Encoding, DL, XLenVT);
   4539 
   4540   // Encoding used for rounding mode in RISCV differs from that used in
   4541   // FLT_ROUNDS. To convert it the C rounding mode is used as an index in
   4542   // a table, which consists of a sequence of 4-bit fields, each representing
   4543   // corresponding RISCV mode.
   4544   static const unsigned Table =
   4545       (RISCVFPRndMode::RNE << 4 * int(RoundingMode::NearestTiesToEven)) |
   4546       (RISCVFPRndMode::RTZ << 4 * int(RoundingMode::TowardZero)) |
   4547       (RISCVFPRndMode::RDN << 4 * int(RoundingMode::TowardNegative)) |
   4548       (RISCVFPRndMode::RUP << 4 * int(RoundingMode::TowardPositive)) |
   4549       (RISCVFPRndMode::RMM << 4 * int(RoundingMode::NearestTiesToAway));
   4550 
   4551   SDValue Shift = DAG.getNode(ISD::SHL, DL, XLenVT, RMValue,
   4552                               DAG.getConstant(2, DL, XLenVT));
   4553   SDValue Shifted = DAG.getNode(ISD::SRL, DL, XLenVT,
   4554                                 DAG.getConstant(Table, DL, XLenVT), Shift);
   4555   RMValue = DAG.getNode(ISD::AND, DL, XLenVT, Shifted,
   4556                         DAG.getConstant(0x7, DL, XLenVT));
   4557   return DAG.getNode(RISCVISD::WRITE_CSR, DL, MVT::Other, Chain, SysRegNo,
   4558                      RMValue);
   4559 }
   4560 
   4561 // Returns the opcode of the target-specific SDNode that implements the 32-bit
   4562 // form of the given Opcode.
   4563 static RISCVISD::NodeType getRISCVWOpcode(unsigned Opcode) {
   4564   switch (Opcode) {
   4565   default:
   4566     llvm_unreachable("Unexpected opcode");
   4567   case ISD::SHL:
   4568     return RISCVISD::SLLW;
   4569   case ISD::SRA:
   4570     return RISCVISD::SRAW;
   4571   case ISD::SRL:
   4572     return RISCVISD::SRLW;
   4573   case ISD::SDIV:
   4574     return RISCVISD::DIVW;
   4575   case ISD::UDIV:
   4576     return RISCVISD::DIVUW;
   4577   case ISD::UREM:
   4578     return RISCVISD::REMUW;
   4579   case ISD::ROTL:
   4580     return RISCVISD::ROLW;
   4581   case ISD::ROTR:
   4582     return RISCVISD::RORW;
   4583   case RISCVISD::GREV:
   4584     return RISCVISD::GREVW;
   4585   case RISCVISD::GORC:
   4586     return RISCVISD::GORCW;
   4587   }
   4588 }
   4589 
   4590 // Converts the given 32-bit operation to a target-specific SelectionDAG node.
   4591 // Because i32 isn't a legal type for RV64, these operations would otherwise
   4592 // be promoted to i64, making it difficult to select the SLLW/DIVUW/.../*W
   4593 // later one because the fact the operation was originally of type i32 is
   4594 // lost.
   4595 static SDValue customLegalizeToWOp(SDNode *N, SelectionDAG &DAG,
   4596                                    unsigned ExtOpc = ISD::ANY_EXTEND) {
   4597   SDLoc DL(N);
   4598   RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
   4599   SDValue NewOp0 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(0));
   4600   SDValue NewOp1 = DAG.getNode(ExtOpc, DL, MVT::i64, N->getOperand(1));
   4601   SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
   4602   // ReplaceNodeResults requires we maintain the same type for the return value.
   4603   return DAG.getNode(ISD::TRUNCATE, DL, N->getValueType(0), NewRes);
   4604 }
   4605 
   4606 // Converts the given 32-bit operation to a i64 operation with signed extension
   4607 // semantic to reduce the signed extension instructions.
   4608 static SDValue customLegalizeToWOpWithSExt(SDNode *N, SelectionDAG &DAG) {
   4609   SDLoc DL(N);
   4610   SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
   4611   SDValue NewOp1 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
   4612   SDValue NewWOp = DAG.getNode(N->getOpcode(), DL, MVT::i64, NewOp0, NewOp1);
   4613   SDValue NewRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, NewWOp,
   4614                                DAG.getValueType(MVT::i32));
   4615   return DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes);
   4616 }
   4617 
   4618 void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
   4619                                              SmallVectorImpl<SDValue> &Results,
   4620                                              SelectionDAG &DAG) const {
   4621   SDLoc DL(N);
   4622   switch (N->getOpcode()) {
   4623   default:
   4624     llvm_unreachable("Don't know how to custom type legalize this operation!");
   4625   case ISD::STRICT_FP_TO_SINT:
   4626   case ISD::STRICT_FP_TO_UINT:
   4627   case ISD::FP_TO_SINT:
   4628   case ISD::FP_TO_UINT: {
   4629     bool IsStrict = N->isStrictFPOpcode();
   4630     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4631            "Unexpected custom legalisation");
   4632     SDValue Op0 = IsStrict ? N->getOperand(1) : N->getOperand(0);
   4633     // If the FP type needs to be softened, emit a library call using the 'si'
   4634     // version. If we left it to default legalization we'd end up with 'di'. If
   4635     // the FP type doesn't need to be softened just let generic type
   4636     // legalization promote the result type.
   4637     if (getTypeAction(*DAG.getContext(), Op0.getValueType()) !=
   4638         TargetLowering::TypeSoftenFloat)
   4639       return;
   4640     RTLIB::Libcall LC;
   4641     if (N->getOpcode() == ISD::FP_TO_SINT ||
   4642         N->getOpcode() == ISD::STRICT_FP_TO_SINT)
   4643       LC = RTLIB::getFPTOSINT(Op0.getValueType(), N->getValueType(0));
   4644     else
   4645       LC = RTLIB::getFPTOUINT(Op0.getValueType(), N->getValueType(0));
   4646     MakeLibCallOptions CallOptions;
   4647     EVT OpVT = Op0.getValueType();
   4648     CallOptions.setTypeListBeforeSoften(OpVT, N->getValueType(0), true);
   4649     SDValue Chain = IsStrict ? N->getOperand(0) : SDValue();
   4650     SDValue Result;
   4651     std::tie(Result, Chain) =
   4652         makeLibCall(DAG, LC, N->getValueType(0), Op0, CallOptions, DL, Chain);
   4653     Results.push_back(Result);
   4654     if (IsStrict)
   4655       Results.push_back(Chain);
   4656     break;
   4657   }
   4658   case ISD::READCYCLECOUNTER: {
   4659     assert(!Subtarget.is64Bit() &&
   4660            "READCYCLECOUNTER only has custom type legalization on riscv32");
   4661 
   4662     SDVTList VTs = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other);
   4663     SDValue RCW =
   4664         DAG.getNode(RISCVISD::READ_CYCLE_WIDE, DL, VTs, N->getOperand(0));
   4665 
   4666     Results.push_back(
   4667         DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, RCW, RCW.getValue(1)));
   4668     Results.push_back(RCW.getValue(2));
   4669     break;
   4670   }
   4671   case ISD::MUL: {
   4672     unsigned Size = N->getSimpleValueType(0).getSizeInBits();
   4673     unsigned XLen = Subtarget.getXLen();
   4674     // This multiply needs to be expanded, try to use MULHSU+MUL if possible.
   4675     if (Size > XLen) {
   4676       assert(Size == (XLen * 2) && "Unexpected custom legalisation");
   4677       SDValue LHS = N->getOperand(0);
   4678       SDValue RHS = N->getOperand(1);
   4679       APInt HighMask = APInt::getHighBitsSet(Size, XLen);
   4680 
   4681       bool LHSIsU = DAG.MaskedValueIsZero(LHS, HighMask);
   4682       bool RHSIsU = DAG.MaskedValueIsZero(RHS, HighMask);
   4683       // We need exactly one side to be unsigned.
   4684       if (LHSIsU == RHSIsU)
   4685         return;
   4686 
   4687       auto MakeMULPair = [&](SDValue S, SDValue U) {
   4688         MVT XLenVT = Subtarget.getXLenVT();
   4689         S = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, S);
   4690         U = DAG.getNode(ISD::TRUNCATE, DL, XLenVT, U);
   4691         SDValue Lo = DAG.getNode(ISD::MUL, DL, XLenVT, S, U);
   4692         SDValue Hi = DAG.getNode(RISCVISD::MULHSU, DL, XLenVT, S, U);
   4693         return DAG.getNode(ISD::BUILD_PAIR, DL, N->getValueType(0), Lo, Hi);
   4694       };
   4695 
   4696       bool LHSIsS = DAG.ComputeNumSignBits(LHS) > XLen;
   4697       bool RHSIsS = DAG.ComputeNumSignBits(RHS) > XLen;
   4698 
   4699       // The other operand should be signed, but still prefer MULH when
   4700       // possible.
   4701       if (RHSIsU && LHSIsS && !RHSIsS)
   4702         Results.push_back(MakeMULPair(LHS, RHS));
   4703       else if (LHSIsU && RHSIsS && !LHSIsS)
   4704         Results.push_back(MakeMULPair(RHS, LHS));
   4705 
   4706       return;
   4707     }
   4708     LLVM_FALLTHROUGH;
   4709   }
   4710   case ISD::ADD:
   4711   case ISD::SUB:
   4712     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4713            "Unexpected custom legalisation");
   4714     if (N->getOperand(1).getOpcode() == ISD::Constant)
   4715       return;
   4716     Results.push_back(customLegalizeToWOpWithSExt(N, DAG));
   4717     break;
   4718   case ISD::SHL:
   4719   case ISD::SRA:
   4720   case ISD::SRL:
   4721     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4722            "Unexpected custom legalisation");
   4723     if (N->getOperand(1).getOpcode() == ISD::Constant)
   4724       return;
   4725     Results.push_back(customLegalizeToWOp(N, DAG));
   4726     break;
   4727   case ISD::ROTL:
   4728   case ISD::ROTR:
   4729     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4730            "Unexpected custom legalisation");
   4731     Results.push_back(customLegalizeToWOp(N, DAG));
   4732     break;
   4733   case ISD::CTTZ:
   4734   case ISD::CTTZ_ZERO_UNDEF:
   4735   case ISD::CTLZ:
   4736   case ISD::CTLZ_ZERO_UNDEF: {
   4737     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4738            "Unexpected custom legalisation");
   4739 
   4740     SDValue NewOp0 =
   4741         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
   4742     bool IsCTZ =
   4743         N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::CTTZ_ZERO_UNDEF;
   4744     unsigned Opc = IsCTZ ? RISCVISD::CTZW : RISCVISD::CLZW;
   4745     SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp0);
   4746     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
   4747     return;
   4748   }
   4749   case ISD::SDIV:
   4750   case ISD::UDIV:
   4751   case ISD::UREM: {
   4752     MVT VT = N->getSimpleValueType(0);
   4753     assert((VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32) &&
   4754            Subtarget.is64Bit() && Subtarget.hasStdExtM() &&
   4755            "Unexpected custom legalisation");
   4756     if (N->getOperand(0).getOpcode() == ISD::Constant ||
   4757         N->getOperand(1).getOpcode() == ISD::Constant)
   4758       return;
   4759 
   4760     // If the input is i32, use ANY_EXTEND since the W instructions don't read
   4761     // the upper 32 bits. For other types we need to sign or zero extend
   4762     // based on the opcode.
   4763     unsigned ExtOpc = ISD::ANY_EXTEND;
   4764     if (VT != MVT::i32)
   4765       ExtOpc = N->getOpcode() == ISD::SDIV ? ISD::SIGN_EXTEND
   4766                                            : ISD::ZERO_EXTEND;
   4767 
   4768     Results.push_back(customLegalizeToWOp(N, DAG, ExtOpc));
   4769     break;
   4770   }
   4771   case ISD::UADDO:
   4772   case ISD::USUBO: {
   4773     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4774            "Unexpected custom legalisation");
   4775     bool IsAdd = N->getOpcode() == ISD::UADDO;
   4776     // Create an ADDW or SUBW.
   4777     SDValue LHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
   4778     SDValue RHS = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
   4779     SDValue Res =
   4780         DAG.getNode(IsAdd ? ISD::ADD : ISD::SUB, DL, MVT::i64, LHS, RHS);
   4781     Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i64, Res,
   4782                       DAG.getValueType(MVT::i32));
   4783 
   4784     // Sign extend the LHS and perform an unsigned compare with the ADDW result.
   4785     // Since the inputs are sign extended from i32, this is equivalent to
   4786     // comparing the lower 32 bits.
   4787     LHS = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
   4788     SDValue Overflow = DAG.getSetCC(DL, N->getValueType(1), Res, LHS,
   4789                                     IsAdd ? ISD::SETULT : ISD::SETUGT);
   4790 
   4791     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
   4792     Results.push_back(Overflow);
   4793     return;
   4794   }
   4795   case ISD::UADDSAT:
   4796   case ISD::USUBSAT: {
   4797     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4798            "Unexpected custom legalisation");
   4799     if (Subtarget.hasStdExtZbb()) {
   4800       // With Zbb we can sign extend and let LegalizeDAG use minu/maxu. Using
   4801       // sign extend allows overflow of the lower 32 bits to be detected on
   4802       // the promoted size.
   4803       SDValue LHS =
   4804           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(0));
   4805       SDValue RHS =
   4806           DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i64, N->getOperand(1));
   4807       SDValue Res = DAG.getNode(N->getOpcode(), DL, MVT::i64, LHS, RHS);
   4808       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
   4809       return;
   4810     }
   4811 
   4812     // Without Zbb, expand to UADDO/USUBO+select which will trigger our custom
   4813     // promotion for UADDO/USUBO.
   4814     Results.push_back(expandAddSubSat(N, DAG));
   4815     return;
   4816   }
   4817   case ISD::BITCAST: {
   4818     EVT VT = N->getValueType(0);
   4819     assert(VT.isInteger() && !VT.isVector() && "Unexpected VT!");
   4820     SDValue Op0 = N->getOperand(0);
   4821     EVT Op0VT = Op0.getValueType();
   4822     MVT XLenVT = Subtarget.getXLenVT();
   4823     if (VT == MVT::i16 && Op0VT == MVT::f16 && Subtarget.hasStdExtZfh()) {
   4824       SDValue FPConv = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, XLenVT, Op0);
   4825       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, FPConv));
   4826     } else if (VT == MVT::i32 && Op0VT == MVT::f32 && Subtarget.is64Bit() &&
   4827                Subtarget.hasStdExtF()) {
   4828       SDValue FPConv =
   4829           DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Op0);
   4830       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, FPConv));
   4831     } else if (!VT.isVector() && Op0VT.isFixedLengthVector() &&
   4832                isTypeLegal(Op0VT)) {
   4833       // Custom-legalize bitcasts from fixed-length vector types to illegal
   4834       // scalar types in order to improve codegen. Bitcast the vector to a
   4835       // one-element vector type whose element type is the same as the result
   4836       // type, and extract the first element.
   4837       LLVMContext &Context = *DAG.getContext();
   4838       SDValue BVec = DAG.getBitcast(EVT::getVectorVT(Context, VT, 1), Op0);
   4839       Results.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VT, BVec,
   4840                                     DAG.getConstant(0, DL, XLenVT)));
   4841     }
   4842     break;
   4843   }
   4844   case RISCVISD::GREV:
   4845   case RISCVISD::GORC: {
   4846     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4847            "Unexpected custom legalisation");
   4848     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
   4849     // This is similar to customLegalizeToWOp, except that we pass the second
   4850     // operand (a TargetConstant) straight through: it is already of type
   4851     // XLenVT.
   4852     RISCVISD::NodeType WOpcode = getRISCVWOpcode(N->getOpcode());
   4853     SDValue NewOp0 =
   4854         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
   4855     SDValue NewOp1 =
   4856         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
   4857     SDValue NewRes = DAG.getNode(WOpcode, DL, MVT::i64, NewOp0, NewOp1);
   4858     // ReplaceNodeResults requires we maintain the same type for the return
   4859     // value.
   4860     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
   4861     break;
   4862   }
   4863   case RISCVISD::SHFL: {
   4864     // There is no SHFLIW instruction, but we can just promote the operation.
   4865     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4866            "Unexpected custom legalisation");
   4867     assert(isa<ConstantSDNode>(N->getOperand(1)) && "Expected constant");
   4868     SDValue NewOp0 =
   4869         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
   4870     SDValue NewOp1 =
   4871         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
   4872     SDValue NewRes = DAG.getNode(RISCVISD::SHFL, DL, MVT::i64, NewOp0, NewOp1);
   4873     // ReplaceNodeResults requires we maintain the same type for the return
   4874     // value.
   4875     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewRes));
   4876     break;
   4877   }
   4878   case ISD::BSWAP:
   4879   case ISD::BITREVERSE: {
   4880     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4881            Subtarget.hasStdExtZbp() && "Unexpected custom legalisation");
   4882     SDValue NewOp0 = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64,
   4883                                  N->getOperand(0));
   4884     unsigned Imm = N->getOpcode() == ISD::BITREVERSE ? 31 : 24;
   4885     SDValue GREVIW = DAG.getNode(RISCVISD::GREVW, DL, MVT::i64, NewOp0,
   4886                                  DAG.getConstant(Imm, DL, MVT::i64));
   4887     // ReplaceNodeResults requires we maintain the same type for the return
   4888     // value.
   4889     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, GREVIW));
   4890     break;
   4891   }
   4892   case ISD::FSHL:
   4893   case ISD::FSHR: {
   4894     assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4895            Subtarget.hasStdExtZbt() && "Unexpected custom legalisation");
   4896     SDValue NewOp0 =
   4897         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(0));
   4898     SDValue NewOp1 =
   4899         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
   4900     SDValue NewOp2 =
   4901         DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
   4902     // FSLW/FSRW take a 6 bit shift amount but i32 FSHL/FSHR only use 5 bits.
   4903     // Mask the shift amount to 5 bits.
   4904     NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
   4905                          DAG.getConstant(0x1f, DL, MVT::i64));
   4906     unsigned Opc =
   4907         N->getOpcode() == ISD::FSHL ? RISCVISD::FSLW : RISCVISD::FSRW;
   4908     SDValue NewOp = DAG.getNode(Opc, DL, MVT::i64, NewOp0, NewOp1, NewOp2);
   4909     Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, NewOp));
   4910     break;
   4911   }
   4912   case ISD::EXTRACT_VECTOR_ELT: {
   4913     // Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
   4914     // type is illegal (currently only vXi64 RV32).
   4915     // With vmv.x.s, when SEW > XLEN, only the least-significant XLEN bits are
   4916     // transferred to the destination register. We issue two of these from the
   4917     // upper- and lower- halves of the SEW-bit vector element, slid down to the
   4918     // first element.
   4919     SDValue Vec = N->getOperand(0);
   4920     SDValue Idx = N->getOperand(1);
   4921 
   4922     // The vector type hasn't been legalized yet so we can't issue target
   4923     // specific nodes if it needs legalization.
   4924     // FIXME: We would manually legalize if it's important.
   4925     if (!isTypeLegal(Vec.getValueType()))
   4926       return;
   4927 
   4928     MVT VecVT = Vec.getSimpleValueType();
   4929 
   4930     assert(!Subtarget.is64Bit() && N->getValueType(0) == MVT::i64 &&
   4931            VecVT.getVectorElementType() == MVT::i64 &&
   4932            "Unexpected EXTRACT_VECTOR_ELT legalization");
   4933 
   4934     // If this is a fixed vector, we need to convert it to a scalable vector.
   4935     MVT ContainerVT = VecVT;
   4936     if (VecVT.isFixedLengthVector()) {
   4937       ContainerVT = getContainerForFixedLengthVector(VecVT);
   4938       Vec = convertToScalableVector(ContainerVT, Vec, DAG, Subtarget);
   4939     }
   4940 
   4941     MVT XLenVT = Subtarget.getXLenVT();
   4942 
   4943     // Use a VL of 1 to avoid processing more elements than we need.
   4944     MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
   4945     SDValue VL = DAG.getConstant(1, DL, XLenVT);
   4946     SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
   4947 
   4948     // Unless the index is known to be 0, we must slide the vector down to get
   4949     // the desired element into index 0.
   4950     if (!isNullConstant(Idx)) {
   4951       Vec = DAG.getNode(RISCVISD::VSLIDEDOWN_VL, DL, ContainerVT,
   4952                         DAG.getUNDEF(ContainerVT), Vec, Idx, Mask, VL);
   4953     }
   4954 
   4955     // Extract the lower XLEN bits of the correct vector element.
   4956     SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
   4957 
   4958     // To extract the upper XLEN bits of the vector element, shift the first
   4959     // element right by 32 bits and re-extract the lower XLEN bits.
   4960     SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, ContainerVT,
   4961                                      DAG.getConstant(32, DL, XLenVT), VL);
   4962     SDValue LShr32 = DAG.getNode(RISCVISD::SRL_VL, DL, ContainerVT, Vec,
   4963                                  ThirtyTwoV, Mask, VL);
   4964 
   4965     SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
   4966 
   4967     Results.push_back(DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
   4968     break;
   4969   }
   4970   case ISD::INTRINSIC_WO_CHAIN: {
   4971     unsigned IntNo = cast<ConstantSDNode>(N->getOperand(0))->getZExtValue();
   4972     switch (IntNo) {
   4973     default:
   4974       llvm_unreachable(
   4975           "Don't know how to custom type legalize this intrinsic!");
   4976     case Intrinsic::riscv_orc_b: {
   4977       // Lower to the GORCI encoding for orc.b with the operand extended.
   4978       SDValue NewOp =
   4979           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
   4980       // If Zbp is enabled, use GORCIW which will sign extend the result.
   4981       unsigned Opc =
   4982           Subtarget.hasStdExtZbp() ? RISCVISD::GORCW : RISCVISD::GORC;
   4983       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp,
   4984                                 DAG.getConstant(7, DL, MVT::i64));
   4985       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
   4986       return;
   4987     }
   4988     case Intrinsic::riscv_grev:
   4989     case Intrinsic::riscv_gorc: {
   4990       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   4991              "Unexpected custom legalisation");
   4992       SDValue NewOp1 =
   4993           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
   4994       SDValue NewOp2 =
   4995           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
   4996       unsigned Opc =
   4997           IntNo == Intrinsic::riscv_grev ? RISCVISD::GREVW : RISCVISD::GORCW;
   4998       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
   4999       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
   5000       break;
   5001     }
   5002     case Intrinsic::riscv_shfl:
   5003     case Intrinsic::riscv_unshfl: {
   5004       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   5005              "Unexpected custom legalisation");
   5006       SDValue NewOp1 =
   5007           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
   5008       SDValue NewOp2 =
   5009           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
   5010       unsigned Opc =
   5011           IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFLW : RISCVISD::UNSHFLW;
   5012       if (isa<ConstantSDNode>(N->getOperand(2))) {
   5013         NewOp2 = DAG.getNode(ISD::AND, DL, MVT::i64, NewOp2,
   5014                              DAG.getConstant(0xf, DL, MVT::i64));
   5015         Opc =
   5016             IntNo == Intrinsic::riscv_shfl ? RISCVISD::SHFL : RISCVISD::UNSHFL;
   5017       }
   5018       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
   5019       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
   5020       break;
   5021     }
   5022     case Intrinsic::riscv_bcompress:
   5023     case Intrinsic::riscv_bdecompress: {
   5024       assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
   5025              "Unexpected custom legalisation");
   5026       SDValue NewOp1 =
   5027           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(1));
   5028       SDValue NewOp2 =
   5029           DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i64, N->getOperand(2));
   5030       unsigned Opc = IntNo == Intrinsic::riscv_bcompress
   5031                          ? RISCVISD::BCOMPRESSW
   5032                          : RISCVISD::BDECOMPRESSW;
   5033       SDValue Res = DAG.getNode(Opc, DL, MVT::i64, NewOp1, NewOp2);
   5034       Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
   5035       break;
   5036     }
   5037     case Intrinsic::riscv_vmv_x_s: {
   5038       EVT VT = N->getValueType(0);
   5039       MVT XLenVT = Subtarget.getXLenVT();
   5040       if (VT.bitsLT(XLenVT)) {
   5041         // Simple case just extract using vmv.x.s and truncate.
   5042         SDValue Extract = DAG.getNode(RISCVISD::VMV_X_S, DL,
   5043                                       Subtarget.getXLenVT(), N->getOperand(1));
   5044         Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, Extract));
   5045         return;
   5046       }
   5047 
   5048       assert(VT == MVT::i64 && !Subtarget.is64Bit() &&
   5049              "Unexpected custom legalization");
   5050 
   5051       // We need to do the move in two steps.
   5052       SDValue Vec = N->getOperand(1);
   5053       MVT VecVT = Vec.getSimpleValueType();
   5054 
   5055       // First extract the lower XLEN bits of the element.
   5056       SDValue EltLo = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, Vec);
   5057 
   5058       // To extract the upper XLEN bits of the vector element, shift the first
   5059       // element right by 32 bits and re-extract the lower XLEN bits.
   5060       SDValue VL = DAG.getConstant(1, DL, XLenVT);
   5061       MVT MaskVT = MVT::getVectorVT(MVT::i1, VecVT.getVectorElementCount());
   5062       SDValue Mask = DAG.getNode(RISCVISD::VMSET_VL, DL, MaskVT, VL);
   5063       SDValue ThirtyTwoV = DAG.getNode(RISCVISD::VMV_V_X_VL, DL, VecVT,
   5064                                        DAG.getConstant(32, DL, XLenVT), VL);
   5065       SDValue LShr32 =
   5066           DAG.getNode(RISCVISD::SRL_VL, DL, VecVT, Vec, ThirtyTwoV, Mask, VL);
   5067       SDValue EltHi = DAG.getNode(RISCVISD::VMV_X_S, DL, XLenVT, LShr32);
   5068 
   5069       Results.push_back(
   5070           DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, EltLo, EltHi));
   5071       break;
   5072     }
   5073     }
   5074     break;
   5075   }
   5076   case ISD::VECREDUCE_ADD:
   5077   case ISD::VECREDUCE_AND:
   5078   case ISD::VECREDUCE_OR:
   5079   case ISD::VECREDUCE_XOR:
   5080   case ISD::VECREDUCE_SMAX:
   5081   case ISD::VECREDUCE_UMAX:
   5082   case ISD::VECREDUCE_SMIN:
   5083   case ISD::VECREDUCE_UMIN:
   5084     if (SDValue V = lowerVECREDUCE(SDValue(N, 0), DAG))
   5085       Results.push_back(V);
   5086     break;
   5087   case ISD::FLT_ROUNDS_: {
   5088     SDVTList VTs = DAG.getVTList(Subtarget.getXLenVT(), MVT::Other);
   5089     SDValue Res = DAG.getNode(ISD::FLT_ROUNDS_, DL, VTs, N->getOperand(0));
   5090     Results.push_back(Res.getValue(0));
   5091     Results.push_back(Res.getValue(1));
   5092     break;
   5093   }
   5094   }
   5095 }
   5096 
   5097 // A structure to hold one of the bit-manipulation patterns below. Together, a
   5098 // SHL and non-SHL pattern may form a bit-manipulation pair on a single source:
   5099 //   (or (and (shl x, 1), 0xAAAAAAAA),
   5100 //       (and (srl x, 1), 0x55555555))
   5101 struct RISCVBitmanipPat {
   5102   SDValue Op;
   5103   unsigned ShAmt;
   5104   bool IsSHL;
   5105 
   5106   bool formsPairWith(const RISCVBitmanipPat &Other) const {
   5107     return Op == Other.Op && ShAmt == Other.ShAmt && IsSHL != Other.IsSHL;
   5108   }
   5109 };
   5110 
   5111 // Matches patterns of the form
   5112 //   (and (shl x, C2), (C1 << C2))
   5113 //   (and (srl x, C2), C1)
   5114 //   (shl (and x, C1), C2)
   5115 //   (srl (and x, (C1 << C2)), C2)
   5116 // Where C2 is a power of 2 and C1 has at least that many leading zeroes.
   5117 // The expected masks for each shift amount are specified in BitmanipMasks where
   5118 // BitmanipMasks[log2(C2)] specifies the expected C1 value.
   5119 // The max allowed shift amount is either XLen/2 or XLen/4 determined by whether
   5120 // BitmanipMasks contains 6 or 5 entries assuming that the maximum possible
   5121 // XLen is 64.
   5122 static Optional<RISCVBitmanipPat>
   5123 matchRISCVBitmanipPat(SDValue Op, ArrayRef<uint64_t> BitmanipMasks) {
   5124   assert((BitmanipMasks.size() == 5 || BitmanipMasks.size() == 6) &&
   5125          "Unexpected number of masks");
   5126   Optional<uint64_t> Mask;
   5127   // Optionally consume a mask around the shift operation.
   5128   if (Op.getOpcode() == ISD::AND && isa<ConstantSDNode>(Op.getOperand(1))) {
   5129     Mask = Op.getConstantOperandVal(1);
   5130     Op = Op.getOperand(0);
   5131   }
   5132   if (Op.getOpcode() != ISD::SHL && Op.getOpcode() != ISD::SRL)
   5133     return None;
   5134   bool IsSHL = Op.getOpcode() == ISD::SHL;
   5135 
   5136   if (!isa<ConstantSDNode>(Op.getOperand(1)))
   5137     return None;
   5138   uint64_t ShAmt = Op.getConstantOperandVal(1);
   5139 
   5140   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
   5141   if (ShAmt >= Width && !isPowerOf2_64(ShAmt))
   5142     return None;
   5143   // If we don't have enough masks for 64 bit, then we must be trying to
   5144   // match SHFL so we're only allowed to shift 1/4 of the width.
   5145   if (BitmanipMasks.size() == 5 && ShAmt >= (Width / 2))
   5146     return None;
   5147 
   5148   SDValue Src = Op.getOperand(0);
   5149 
   5150   // The expected mask is shifted left when the AND is found around SHL
   5151   // patterns.
   5152   //   ((x >> 1) & 0x55555555)
   5153   //   ((x << 1) & 0xAAAAAAAA)
   5154   bool SHLExpMask = IsSHL;
   5155 
   5156   if (!Mask) {
   5157     // Sometimes LLVM keeps the mask as an operand of the shift, typically when
   5158     // the mask is all ones: consume that now.
   5159     if (Src.getOpcode() == ISD::AND && isa<ConstantSDNode>(Src.getOperand(1))) {
   5160       Mask = Src.getConstantOperandVal(1);
   5161       Src = Src.getOperand(0);
   5162       // The expected mask is now in fact shifted left for SRL, so reverse the
   5163       // decision.
   5164       //   ((x & 0xAAAAAAAA) >> 1)
   5165       //   ((x & 0x55555555) << 1)
   5166       SHLExpMask = !SHLExpMask;
   5167     } else {
   5168       // Use a default shifted mask of all-ones if there's no AND, truncated
   5169       // down to the expected width. This simplifies the logic later on.
   5170       Mask = maskTrailingOnes<uint64_t>(Width);
   5171       *Mask &= (IsSHL ? *Mask << ShAmt : *Mask >> ShAmt);
   5172     }
   5173   }
   5174 
   5175   unsigned MaskIdx = Log2_32(ShAmt);
   5176   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
   5177 
   5178   if (SHLExpMask)
   5179     ExpMask <<= ShAmt;
   5180 
   5181   if (Mask != ExpMask)
   5182     return None;
   5183 
   5184   return RISCVBitmanipPat{Src, (unsigned)ShAmt, IsSHL};
   5185 }
   5186 
   5187 // Matches any of the following bit-manipulation patterns:
   5188 //   (and (shl x, 1), (0x55555555 << 1))
   5189 //   (and (srl x, 1), 0x55555555)
   5190 //   (shl (and x, 0x55555555), 1)
   5191 //   (srl (and x, (0x55555555 << 1)), 1)
   5192 // where the shift amount and mask may vary thus:
   5193 //   [1]  = 0x55555555 / 0xAAAAAAAA
   5194 //   [2]  = 0x33333333 / 0xCCCCCCCC
   5195 //   [4]  = 0x0F0F0F0F / 0xF0F0F0F0
   5196 //   [8]  = 0x00FF00FF / 0xFF00FF00
   5197 //   [16] = 0x0000FFFF / 0xFFFFFFFF
   5198 //   [32] = 0x00000000FFFFFFFF / 0xFFFFFFFF00000000 (for RV64)
   5199 static Optional<RISCVBitmanipPat> matchGREVIPat(SDValue Op) {
   5200   // These are the unshifted masks which we use to match bit-manipulation
   5201   // patterns. They may be shifted left in certain circumstances.
   5202   static const uint64_t BitmanipMasks[] = {
   5203       0x5555555555555555ULL, 0x3333333333333333ULL, 0x0F0F0F0F0F0F0F0FULL,
   5204       0x00FF00FF00FF00FFULL, 0x0000FFFF0000FFFFULL, 0x00000000FFFFFFFFULL};
   5205 
   5206   return matchRISCVBitmanipPat(Op, BitmanipMasks);
   5207 }
   5208 
   5209 // Match the following pattern as a GREVI(W) operation
   5210 //   (or (BITMANIP_SHL x), (BITMANIP_SRL x))
   5211 static SDValue combineORToGREV(SDValue Op, SelectionDAG &DAG,
   5212                                const RISCVSubtarget &Subtarget) {
   5213   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
   5214   EVT VT = Op.getValueType();
   5215 
   5216   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
   5217     auto LHS = matchGREVIPat(Op.getOperand(0));
   5218     auto RHS = matchGREVIPat(Op.getOperand(1));
   5219     if (LHS && RHS && LHS->formsPairWith(*RHS)) {
   5220       SDLoc DL(Op);
   5221       return DAG.getNode(RISCVISD::GREV, DL, VT, LHS->Op,
   5222                          DAG.getConstant(LHS->ShAmt, DL, VT));
   5223     }
   5224   }
   5225   return SDValue();
   5226 }
   5227 
   5228 // Matches any the following pattern as a GORCI(W) operation
   5229 // 1.  (or (GREVI x, shamt), x) if shamt is a power of 2
   5230 // 2.  (or x, (GREVI x, shamt)) if shamt is a power of 2
   5231 // 3.  (or (or (BITMANIP_SHL x), x), (BITMANIP_SRL x))
   5232 // Note that with the variant of 3.,
   5233 //     (or (or (BITMANIP_SHL x), (BITMANIP_SRL x)), x)
   5234 // the inner pattern will first be matched as GREVI and then the outer
   5235 // pattern will be matched to GORC via the first rule above.
   5236 // 4.  (or (rotl/rotr x, bitwidth/2), x)
   5237 static SDValue combineORToGORC(SDValue Op, SelectionDAG &DAG,
   5238                                const RISCVSubtarget &Subtarget) {
   5239   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
   5240   EVT VT = Op.getValueType();
   5241 
   5242   if (VT == Subtarget.getXLenVT() || (Subtarget.is64Bit() && VT == MVT::i32)) {
   5243     SDLoc DL(Op);
   5244     SDValue Op0 = Op.getOperand(0);
   5245     SDValue Op1 = Op.getOperand(1);
   5246 
   5247     auto MatchOROfReverse = [&](SDValue Reverse, SDValue X) {
   5248       if (Reverse.getOpcode() == RISCVISD::GREV && Reverse.getOperand(0) == X &&
   5249           isa<ConstantSDNode>(Reverse.getOperand(1)) &&
   5250           isPowerOf2_32(Reverse.getConstantOperandVal(1)))
   5251         return DAG.getNode(RISCVISD::GORC, DL, VT, X, Reverse.getOperand(1));
   5252       // We can also form GORCI from ROTL/ROTR by half the bitwidth.
   5253       if ((Reverse.getOpcode() == ISD::ROTL ||
   5254            Reverse.getOpcode() == ISD::ROTR) &&
   5255           Reverse.getOperand(0) == X &&
   5256           isa<ConstantSDNode>(Reverse.getOperand(1))) {
   5257         uint64_t RotAmt = Reverse.getConstantOperandVal(1);
   5258         if (RotAmt == (VT.getSizeInBits() / 2))
   5259           return DAG.getNode(RISCVISD::GORC, DL, VT, X,
   5260                              DAG.getConstant(RotAmt, DL, VT));
   5261       }
   5262       return SDValue();
   5263     };
   5264 
   5265     // Check for either commutable permutation of (or (GREVI x, shamt), x)
   5266     if (SDValue V = MatchOROfReverse(Op0, Op1))
   5267       return V;
   5268     if (SDValue V = MatchOROfReverse(Op1, Op0))
   5269       return V;
   5270 
   5271     // OR is commutable so canonicalize its OR operand to the left
   5272     if (Op0.getOpcode() != ISD::OR && Op1.getOpcode() == ISD::OR)
   5273       std::swap(Op0, Op1);
   5274     if (Op0.getOpcode() != ISD::OR)
   5275       return SDValue();
   5276     SDValue OrOp0 = Op0.getOperand(0);
   5277     SDValue OrOp1 = Op0.getOperand(1);
   5278     auto LHS = matchGREVIPat(OrOp0);
   5279     // OR is commutable so swap the operands and try again: x might have been
   5280     // on the left
   5281     if (!LHS) {
   5282       std::swap(OrOp0, OrOp1);
   5283       LHS = matchGREVIPat(OrOp0);
   5284     }
   5285     auto RHS = matchGREVIPat(Op1);
   5286     if (LHS && RHS && LHS->formsPairWith(*RHS) && LHS->Op == OrOp1) {
   5287       return DAG.getNode(RISCVISD::GORC, DL, VT, LHS->Op,
   5288                          DAG.getConstant(LHS->ShAmt, DL, VT));
   5289     }
   5290   }
   5291   return SDValue();
   5292 }
   5293 
   5294 // Matches any of the following bit-manipulation patterns:
   5295 //   (and (shl x, 1), (0x22222222 << 1))
   5296 //   (and (srl x, 1), 0x22222222)
   5297 //   (shl (and x, 0x22222222), 1)
   5298 //   (srl (and x, (0x22222222 << 1)), 1)
   5299 // where the shift amount and mask may vary thus:
   5300 //   [1]  = 0x22222222 / 0x44444444
   5301 //   [2]  = 0x0C0C0C0C / 0x3C3C3C3C
   5302 //   [4]  = 0x00F000F0 / 0x0F000F00
   5303 //   [8]  = 0x0000FF00 / 0x00FF0000
   5304 //   [16] = 0x00000000FFFF0000 / 0x0000FFFF00000000 (for RV64)
   5305 static Optional<RISCVBitmanipPat> matchSHFLPat(SDValue Op) {
   5306   // These are the unshifted masks which we use to match bit-manipulation
   5307   // patterns. They may be shifted left in certain circumstances.
   5308   static const uint64_t BitmanipMasks[] = {
   5309       0x2222222222222222ULL, 0x0C0C0C0C0C0C0C0CULL, 0x00F000F000F000F0ULL,
   5310       0x0000FF000000FF00ULL, 0x00000000FFFF0000ULL};
   5311 
   5312   return matchRISCVBitmanipPat(Op, BitmanipMasks);
   5313 }
   5314 
   5315 // Match (or (or (SHFL_SHL x), (SHFL_SHR x)), (SHFL_AND x)
   5316 static SDValue combineORToSHFL(SDValue Op, SelectionDAG &DAG,
   5317                                const RISCVSubtarget &Subtarget) {
   5318   assert(Subtarget.hasStdExtZbp() && "Expected Zbp extenson");
   5319   EVT VT = Op.getValueType();
   5320 
   5321   if (VT != MVT::i32 && VT != Subtarget.getXLenVT())
   5322     return SDValue();
   5323 
   5324   SDValue Op0 = Op.getOperand(0);
   5325   SDValue Op1 = Op.getOperand(1);
   5326 
   5327   // Or is commutable so canonicalize the second OR to the LHS.
   5328   if (Op0.getOpcode() != ISD::OR)
   5329     std::swap(Op0, Op1);
   5330   if (Op0.getOpcode() != ISD::OR)
   5331     return SDValue();
   5332 
   5333   // We found an inner OR, so our operands are the operands of the inner OR
   5334   // and the other operand of the outer OR.
   5335   SDValue A = Op0.getOperand(0);
   5336   SDValue B = Op0.getOperand(1);
   5337   SDValue C = Op1;
   5338 
   5339   auto Match1 = matchSHFLPat(A);
   5340   auto Match2 = matchSHFLPat(B);
   5341 
   5342   // If neither matched, we failed.
   5343   if (!Match1 && !Match2)
   5344     return SDValue();
   5345 
   5346   // We had at least one match. if one failed, try the remaining C operand.
   5347   if (!Match1) {
   5348     std::swap(A, C);
   5349     Match1 = matchSHFLPat(A);
   5350     if (!Match1)
   5351       return SDValue();
   5352   } else if (!Match2) {
   5353     std::swap(B, C);
   5354     Match2 = matchSHFLPat(B);
   5355     if (!Match2)
   5356       return SDValue();
   5357   }
   5358   assert(Match1 && Match2);
   5359 
   5360   // Make sure our matches pair up.
   5361   if (!Match1->formsPairWith(*Match2))
   5362     return SDValue();
   5363 
   5364   // All the remains is to make sure C is an AND with the same input, that masks
   5365   // out the bits that are being shuffled.
   5366   if (C.getOpcode() != ISD::AND || !isa<ConstantSDNode>(C.getOperand(1)) ||
   5367       C.getOperand(0) != Match1->Op)
   5368     return SDValue();
   5369 
   5370   uint64_t Mask = C.getConstantOperandVal(1);
   5371 
   5372   static const uint64_t BitmanipMasks[] = {
   5373       0x9999999999999999ULL, 0xC3C3C3C3C3C3C3C3ULL, 0xF00FF00FF00FF00FULL,
   5374       0xFF0000FFFF0000FFULL, 0xFFFF00000000FFFFULL,
   5375   };
   5376 
   5377   unsigned Width = Op.getValueType() == MVT::i64 ? 64 : 32;
   5378   unsigned MaskIdx = Log2_32(Match1->ShAmt);
   5379   uint64_t ExpMask = BitmanipMasks[MaskIdx] & maskTrailingOnes<uint64_t>(Width);
   5380 
   5381   if (Mask != ExpMask)
   5382     return SDValue();
   5383 
   5384   SDLoc DL(Op);
   5385   return DAG.getNode(RISCVISD::SHFL, DL, VT, Match1->Op,
   5386                      DAG.getConstant(Match1->ShAmt, DL, VT));
   5387 }
   5388 
   5389 // Combine (GREVI (GREVI x, C2), C1) -> (GREVI x, C1^C2) when C1^C2 is
   5390 // non-zero, and to x when it is. Any repeated GREVI stage undoes itself.
   5391 // Combine (GORCI (GORCI x, C2), C1) -> (GORCI x, C1|C2). Repeated stage does
   5392 // not undo itself, but they are redundant.
   5393 static SDValue combineGREVI_GORCI(SDNode *N, SelectionDAG &DAG) {
   5394   SDValue Src = N->getOperand(0);
   5395 
   5396   if (Src.getOpcode() != N->getOpcode())
   5397     return SDValue();
   5398 
   5399   if (!isa<ConstantSDNode>(N->getOperand(1)) ||
   5400       !isa<ConstantSDNode>(Src.getOperand(1)))
   5401     return SDValue();
   5402 
   5403   unsigned ShAmt1 = N->getConstantOperandVal(1);
   5404   unsigned ShAmt2 = Src.getConstantOperandVal(1);
   5405   Src = Src.getOperand(0);
   5406 
   5407   unsigned CombinedShAmt;
   5408   if (N->getOpcode() == RISCVISD::GORC || N->getOpcode() == RISCVISD::GORCW)
   5409     CombinedShAmt = ShAmt1 | ShAmt2;
   5410   else
   5411     CombinedShAmt = ShAmt1 ^ ShAmt2;
   5412 
   5413   if (CombinedShAmt == 0)
   5414     return Src;
   5415 
   5416   SDLoc DL(N);
   5417   return DAG.getNode(
   5418       N->getOpcode(), DL, N->getValueType(0), Src,
   5419       DAG.getConstant(CombinedShAmt, DL, N->getOperand(1).getValueType()));
   5420 }
   5421 
   5422 // Combine a constant select operand into its use:
   5423 //
   5424 // (and (select_cc lhs, rhs, cc, -1, c), x)
   5425 //   -> (select_cc lhs, rhs, cc, x, (and, x, c))  [AllOnes=1]
   5426 // (or  (select_cc lhs, rhs, cc, 0, c), x)
   5427 //   -> (select_cc lhs, rhs, cc, x, (or, x, c))  [AllOnes=0]
   5428 // (xor (select_cc lhs, rhs, cc, 0, c), x)
   5429 //   -> (select_cc lhs, rhs, cc, x, (xor, x, c))  [AllOnes=0]
   5430 static SDValue combineSelectCCAndUse(SDNode *N, SDValue Slct, SDValue OtherOp,
   5431                                      SelectionDAG &DAG, bool AllOnes) {
   5432   EVT VT = N->getValueType(0);
   5433 
   5434   if (Slct.getOpcode() != RISCVISD::SELECT_CC || !Slct.hasOneUse())
   5435     return SDValue();
   5436 
   5437   auto isZeroOrAllOnes = [](SDValue N, bool AllOnes) {
   5438     return AllOnes ? isAllOnesConstant(N) : isNullConstant(N);
   5439   };
   5440 
   5441   bool SwapSelectOps;
   5442   SDValue TrueVal = Slct.getOperand(3);
   5443   SDValue FalseVal = Slct.getOperand(4);
   5444   SDValue NonConstantVal;
   5445   if (isZeroOrAllOnes(TrueVal, AllOnes)) {
   5446     SwapSelectOps = false;
   5447     NonConstantVal = FalseVal;
   5448   } else if (isZeroOrAllOnes(FalseVal, AllOnes)) {
   5449     SwapSelectOps = true;
   5450     NonConstantVal = TrueVal;
   5451   } else
   5452     return SDValue();
   5453 
   5454   // Slct is now know to be the desired identity constant when CC is true.
   5455   TrueVal = OtherOp;
   5456   FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal);
   5457   // Unless SwapSelectOps says CC should be false.
   5458   if (SwapSelectOps)
   5459     std::swap(TrueVal, FalseVal);
   5460 
   5461   return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), VT,
   5462                      {Slct.getOperand(0), Slct.getOperand(1),
   5463                       Slct.getOperand(2), TrueVal, FalseVal});
   5464 }
   5465 
   5466 // Attempt combineSelectAndUse on each operand of a commutative operator N.
   5467 static SDValue combineSelectCCAndUseCommutative(SDNode *N, SelectionDAG &DAG,
   5468                                                 bool AllOnes) {
   5469   SDValue N0 = N->getOperand(0);
   5470   SDValue N1 = N->getOperand(1);
   5471   if (SDValue Result = combineSelectCCAndUse(N, N0, N1, DAG, AllOnes))
   5472     return Result;
   5473   if (SDValue Result = combineSelectCCAndUse(N, N1, N0, DAG, AllOnes))
   5474     return Result;
   5475   return SDValue();
   5476 }
   5477 
   5478 static SDValue performANDCombine(SDNode *N,
   5479                                  TargetLowering::DAGCombinerInfo &DCI,
   5480                                  const RISCVSubtarget &Subtarget) {
   5481   SelectionDAG &DAG = DCI.DAG;
   5482 
   5483   // fold (and (select_cc lhs, rhs, cc, -1, y), x) ->
   5484   //      (select lhs, rhs, cc, x, (and x, y))
   5485   return combineSelectCCAndUseCommutative(N, DAG, true);
   5486 }
   5487 
   5488 static SDValue performORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI,
   5489                                 const RISCVSubtarget &Subtarget) {
   5490   SelectionDAG &DAG = DCI.DAG;
   5491   if (Subtarget.hasStdExtZbp()) {
   5492     if (auto GREV = combineORToGREV(SDValue(N, 0), DAG, Subtarget))
   5493       return GREV;
   5494     if (auto GORC = combineORToGORC(SDValue(N, 0), DAG, Subtarget))
   5495       return GORC;
   5496     if (auto SHFL = combineORToSHFL(SDValue(N, 0), DAG, Subtarget))
   5497       return SHFL;
   5498   }
   5499 
   5500   // fold (or (select_cc lhs, rhs, cc, 0, y), x) ->
   5501   //      (select lhs, rhs, cc, x, (or x, y))
   5502   return combineSelectCCAndUseCommutative(N, DAG, false);
   5503 }
   5504 
   5505 static SDValue performXORCombine(SDNode *N,
   5506                                  TargetLowering::DAGCombinerInfo &DCI,
   5507                                  const RISCVSubtarget &Subtarget) {
   5508   SelectionDAG &DAG = DCI.DAG;
   5509 
   5510   // fold (xor (select_cc lhs, rhs, cc, 0, y), x) ->
   5511   //      (select lhs, rhs, cc, x, (xor x, y))
   5512   return combineSelectCCAndUseCommutative(N, DAG, false);
   5513 }
   5514 
   5515 SDValue RISCVTargetLowering::PerformDAGCombine(SDNode *N,
   5516                                                DAGCombinerInfo &DCI) const {
   5517   SelectionDAG &DAG = DCI.DAG;
   5518 
   5519   switch (N->getOpcode()) {
   5520   default:
   5521     break;
   5522   case RISCVISD::SplitF64: {
   5523     SDValue Op0 = N->getOperand(0);
   5524     // If the input to SplitF64 is just BuildPairF64 then the operation is
   5525     // redundant. Instead, use BuildPairF64's operands directly.
   5526     if (Op0->getOpcode() == RISCVISD::BuildPairF64)
   5527       return DCI.CombineTo(N, Op0.getOperand(0), Op0.getOperand(1));
   5528 
   5529     SDLoc DL(N);
   5530 
   5531     // It's cheaper to materialise two 32-bit integers than to load a double
   5532     // from the constant pool and transfer it to integer registers through the
   5533     // stack.
   5534     if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op0)) {
   5535       APInt V = C->getValueAPF().bitcastToAPInt();
   5536       SDValue Lo = DAG.getConstant(V.trunc(32), DL, MVT::i32);
   5537       SDValue Hi = DAG.getConstant(V.lshr(32).trunc(32), DL, MVT::i32);
   5538       return DCI.CombineTo(N, Lo, Hi);
   5539     }
   5540 
   5541     // This is a target-specific version of a DAGCombine performed in
   5542     // DAGCombiner::visitBITCAST. It performs the equivalent of:
   5543     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   5544     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   5545     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
   5546         !Op0.getNode()->hasOneUse())
   5547       break;
   5548     SDValue NewSplitF64 =
   5549         DAG.getNode(RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32),
   5550                     Op0.getOperand(0));
   5551     SDValue Lo = NewSplitF64.getValue(0);
   5552     SDValue Hi = NewSplitF64.getValue(1);
   5553     APInt SignBit = APInt::getSignMask(32);
   5554     if (Op0.getOpcode() == ISD::FNEG) {
   5555       SDValue NewHi = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
   5556                                   DAG.getConstant(SignBit, DL, MVT::i32));
   5557       return DCI.CombineTo(N, Lo, NewHi);
   5558     }
   5559     assert(Op0.getOpcode() == ISD::FABS);
   5560     SDValue NewHi = DAG.getNode(ISD::AND, DL, MVT::i32, Hi,
   5561                                 DAG.getConstant(~SignBit, DL, MVT::i32));
   5562     return DCI.CombineTo(N, Lo, NewHi);
   5563   }
   5564   case RISCVISD::SLLW:
   5565   case RISCVISD::SRAW:
   5566   case RISCVISD::SRLW:
   5567   case RISCVISD::ROLW:
   5568   case RISCVISD::RORW: {
   5569     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
   5570     SDValue LHS = N->getOperand(0);
   5571     SDValue RHS = N->getOperand(1);
   5572     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
   5573     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
   5574     if (SimplifyDemandedBits(N->getOperand(0), LHSMask, DCI) ||
   5575         SimplifyDemandedBits(N->getOperand(1), RHSMask, DCI)) {
   5576       if (N->getOpcode() != ISD::DELETED_NODE)
   5577         DCI.AddToWorklist(N);
   5578       return SDValue(N, 0);
   5579     }
   5580     break;
   5581   }
   5582   case RISCVISD::CLZW:
   5583   case RISCVISD::CTZW: {
   5584     // Only the lower 32 bits of the first operand are read
   5585     SDValue Op0 = N->getOperand(0);
   5586     APInt Mask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
   5587     if (SimplifyDemandedBits(Op0, Mask, DCI)) {
   5588       if (N->getOpcode() != ISD::DELETED_NODE)
   5589         DCI.AddToWorklist(N);
   5590       return SDValue(N, 0);
   5591     }
   5592     break;
   5593   }
   5594   case RISCVISD::FSL:
   5595   case RISCVISD::FSR: {
   5596     // Only the lower log2(Bitwidth)+1 bits of the the shift amount are read.
   5597     SDValue ShAmt = N->getOperand(2);
   5598     unsigned BitWidth = ShAmt.getValueSizeInBits();
   5599     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
   5600     APInt ShAmtMask(BitWidth, (BitWidth * 2) - 1);
   5601     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
   5602       if (N->getOpcode() != ISD::DELETED_NODE)
   5603         DCI.AddToWorklist(N);
   5604       return SDValue(N, 0);
   5605     }
   5606     break;
   5607   }
   5608   case RISCVISD::FSLW:
   5609   case RISCVISD::FSRW: {
   5610     // Only the lower 32 bits of Values and lower 6 bits of shift amount are
   5611     // read.
   5612     SDValue Op0 = N->getOperand(0);
   5613     SDValue Op1 = N->getOperand(1);
   5614     SDValue ShAmt = N->getOperand(2);
   5615     APInt OpMask = APInt::getLowBitsSet(Op0.getValueSizeInBits(), 32);
   5616     APInt ShAmtMask = APInt::getLowBitsSet(ShAmt.getValueSizeInBits(), 6);
   5617     if (SimplifyDemandedBits(Op0, OpMask, DCI) ||
   5618         SimplifyDemandedBits(Op1, OpMask, DCI) ||
   5619         SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
   5620       if (N->getOpcode() != ISD::DELETED_NODE)
   5621         DCI.AddToWorklist(N);
   5622       return SDValue(N, 0);
   5623     }
   5624     break;
   5625   }
   5626   case RISCVISD::GREV:
   5627   case RISCVISD::GORC: {
   5628     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
   5629     SDValue ShAmt = N->getOperand(1);
   5630     unsigned BitWidth = ShAmt.getValueSizeInBits();
   5631     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
   5632     APInt ShAmtMask(BitWidth, BitWidth - 1);
   5633     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
   5634       if (N->getOpcode() != ISD::DELETED_NODE)
   5635         DCI.AddToWorklist(N);
   5636       return SDValue(N, 0);
   5637     }
   5638 
   5639     return combineGREVI_GORCI(N, DCI.DAG);
   5640   }
   5641   case RISCVISD::GREVW:
   5642   case RISCVISD::GORCW: {
   5643     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
   5644     SDValue LHS = N->getOperand(0);
   5645     SDValue RHS = N->getOperand(1);
   5646     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
   5647     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 5);
   5648     if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
   5649         SimplifyDemandedBits(RHS, RHSMask, DCI)) {
   5650       if (N->getOpcode() != ISD::DELETED_NODE)
   5651         DCI.AddToWorklist(N);
   5652       return SDValue(N, 0);
   5653     }
   5654 
   5655     return combineGREVI_GORCI(N, DCI.DAG);
   5656   }
   5657   case RISCVISD::SHFL:
   5658   case RISCVISD::UNSHFL: {
   5659     // Only the lower log2(Bitwidth) bits of the the shift amount are read.
   5660     SDValue ShAmt = N->getOperand(1);
   5661     unsigned BitWidth = ShAmt.getValueSizeInBits();
   5662     assert(isPowerOf2_32(BitWidth) && "Unexpected bit width");
   5663     APInt ShAmtMask(BitWidth, (BitWidth / 2) - 1);
   5664     if (SimplifyDemandedBits(ShAmt, ShAmtMask, DCI)) {
   5665       if (N->getOpcode() != ISD::DELETED_NODE)
   5666         DCI.AddToWorklist(N);
   5667       return SDValue(N, 0);
   5668     }
   5669 
   5670     break;
   5671   }
   5672   case RISCVISD::SHFLW:
   5673   case RISCVISD::UNSHFLW: {
   5674     // Only the lower 32 bits of LHS and lower 5 bits of RHS are read.
   5675     SDValue LHS = N->getOperand(0);
   5676     SDValue RHS = N->getOperand(1);
   5677     APInt LHSMask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
   5678     APInt RHSMask = APInt::getLowBitsSet(RHS.getValueSizeInBits(), 4);
   5679     if (SimplifyDemandedBits(LHS, LHSMask, DCI) ||
   5680         SimplifyDemandedBits(RHS, RHSMask, DCI)) {
   5681       if (N->getOpcode() != ISD::DELETED_NODE)
   5682         DCI.AddToWorklist(N);
   5683       return SDValue(N, 0);
   5684     }
   5685 
   5686     break;
   5687   }
   5688   case RISCVISD::BCOMPRESSW:
   5689   case RISCVISD::BDECOMPRESSW: {
   5690     // Only the lower 32 bits of LHS and RHS are read.
   5691     SDValue LHS = N->getOperand(0);
   5692     SDValue RHS = N->getOperand(1);
   5693     APInt Mask = APInt::getLowBitsSet(LHS.getValueSizeInBits(), 32);
   5694     if (SimplifyDemandedBits(LHS, Mask, DCI) ||
   5695         SimplifyDemandedBits(RHS, Mask, DCI)) {
   5696       if (N->getOpcode() != ISD::DELETED_NODE)
   5697         DCI.AddToWorklist(N);
   5698       return SDValue(N, 0);
   5699     }
   5700 
   5701     break;
   5702   }
   5703   case RISCVISD::FMV_X_ANYEXTW_RV64: {
   5704     SDLoc DL(N);
   5705     SDValue Op0 = N->getOperand(0);
   5706     // If the input to FMV_X_ANYEXTW_RV64 is just FMV_W_X_RV64 then the
   5707     // conversion is unnecessary and can be replaced with an ANY_EXTEND
   5708     // of the FMV_W_X_RV64 operand.
   5709     if (Op0->getOpcode() == RISCVISD::FMV_W_X_RV64) {
   5710       assert(Op0.getOperand(0).getValueType() == MVT::i64 &&
   5711              "Unexpected value type!");
   5712       return Op0.getOperand(0);
   5713     }
   5714 
   5715     // This is a target-specific version of a DAGCombine performed in
   5716     // DAGCombiner::visitBITCAST. It performs the equivalent of:
   5717     // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   5718     // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   5719     if (!(Op0.getOpcode() == ISD::FNEG || Op0.getOpcode() == ISD::FABS) ||
   5720         !Op0.getNode()->hasOneUse())
   5721       break;
   5722     SDValue NewFMV = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64,
   5723                                  Op0.getOperand(0));
   5724     APInt SignBit = APInt::getSignMask(32).sext(64);
   5725     if (Op0.getOpcode() == ISD::FNEG)
   5726       return DAG.getNode(ISD::XOR, DL, MVT::i64, NewFMV,
   5727                          DAG.getConstant(SignBit, DL, MVT::i64));
   5728 
   5729     assert(Op0.getOpcode() == ISD::FABS);
   5730     return DAG.getNode(ISD::AND, DL, MVT::i64, NewFMV,
   5731                        DAG.getConstant(~SignBit, DL, MVT::i64));
   5732   }
   5733   case ISD::AND:
   5734     return performANDCombine(N, DCI, Subtarget);
   5735   case ISD::OR:
   5736     return performORCombine(N, DCI, Subtarget);
   5737   case ISD::XOR:
   5738     return performXORCombine(N, DCI, Subtarget);
   5739   case RISCVISD::SELECT_CC: {
   5740     // Transform
   5741     SDValue LHS = N->getOperand(0);
   5742     SDValue RHS = N->getOperand(1);
   5743     auto CCVal = static_cast<ISD::CondCode>(N->getConstantOperandVal(2));
   5744     if (!ISD::isIntEqualitySetCC(CCVal))
   5745       break;
   5746 
   5747     // Fold (select_cc (setlt X, Y), 0, ne, trueV, falseV) ->
   5748     //      (select_cc X, Y, lt, trueV, falseV)
   5749     // Sometimes the setcc is introduced after select_cc has been formed.
   5750     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
   5751         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
   5752       // If we're looking for eq 0 instead of ne 0, we need to invert the
   5753       // condition.
   5754       bool Invert = CCVal == ISD::SETEQ;
   5755       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
   5756       if (Invert)
   5757         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
   5758 
   5759       SDLoc DL(N);
   5760       RHS = LHS.getOperand(1);
   5761       LHS = LHS.getOperand(0);
   5762       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
   5763 
   5764       SDValue TargetCC =
   5765           DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
   5766       return DAG.getNode(
   5767           RISCVISD::SELECT_CC, DL, N->getValueType(0),
   5768           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
   5769     }
   5770 
   5771     // Fold (select_cc (xor X, Y), 0, eq/ne, trueV, falseV) ->
   5772     //      (select_cc X, Y, eq/ne, trueV, falseV)
   5773     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
   5774       return DAG.getNode(RISCVISD::SELECT_CC, SDLoc(N), N->getValueType(0),
   5775                          {LHS.getOperand(0), LHS.getOperand(1),
   5776                           N->getOperand(2), N->getOperand(3),
   5777                           N->getOperand(4)});
   5778     // (select_cc X, 1, setne, trueV, falseV) ->
   5779     // (select_cc X, 0, seteq, trueV, falseV) if we can prove X is 0/1.
   5780     // This can occur when legalizing some floating point comparisons.
   5781     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
   5782     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
   5783       SDLoc DL(N);
   5784       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
   5785       SDValue TargetCC =
   5786           DAG.getTargetConstant(CCVal, DL, Subtarget.getXLenVT());
   5787       RHS = DAG.getConstant(0, DL, LHS.getValueType());
   5788       return DAG.getNode(
   5789           RISCVISD::SELECT_CC, DL, N->getValueType(0),
   5790           {LHS, RHS, TargetCC, N->getOperand(3), N->getOperand(4)});
   5791     }
   5792 
   5793     break;
   5794   }
   5795   case RISCVISD::BR_CC: {
   5796     SDValue LHS = N->getOperand(1);
   5797     SDValue RHS = N->getOperand(2);
   5798     ISD::CondCode CCVal = cast<CondCodeSDNode>(N->getOperand(3))->get();
   5799     if (!ISD::isIntEqualitySetCC(CCVal))
   5800       break;
   5801 
   5802     // Fold (br_cc (setlt X, Y), 0, ne, dest) ->
   5803     //      (br_cc X, Y, lt, dest)
   5804     // Sometimes the setcc is introduced after br_cc has been formed.
   5805     if (LHS.getOpcode() == ISD::SETCC && isNullConstant(RHS) &&
   5806         LHS.getOperand(0).getValueType() == Subtarget.getXLenVT()) {
   5807       // If we're looking for eq 0 instead of ne 0, we need to invert the
   5808       // condition.
   5809       bool Invert = CCVal == ISD::SETEQ;
   5810       CCVal = cast<CondCodeSDNode>(LHS.getOperand(2))->get();
   5811       if (Invert)
   5812         CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
   5813 
   5814       SDLoc DL(N);
   5815       RHS = LHS.getOperand(1);
   5816       LHS = LHS.getOperand(0);
   5817       translateSetCCForBranch(DL, LHS, RHS, CCVal, DAG);
   5818 
   5819       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
   5820                          N->getOperand(0), LHS, RHS, DAG.getCondCode(CCVal),
   5821                          N->getOperand(4));
   5822     }
   5823 
   5824     // Fold (br_cc (xor X, Y), 0, eq/ne, dest) ->
   5825     //      (br_cc X, Y, eq/ne, trueV, falseV)
   5826     if (LHS.getOpcode() == ISD::XOR && isNullConstant(RHS))
   5827       return DAG.getNode(RISCVISD::BR_CC, SDLoc(N), N->getValueType(0),
   5828                          N->getOperand(0), LHS.getOperand(0), LHS.getOperand(1),
   5829                          N->getOperand(3), N->getOperand(4));
   5830 
   5831     // (br_cc X, 1, setne, br_cc) ->
   5832     // (br_cc X, 0, seteq, br_cc) if we can prove X is 0/1.
   5833     // This can occur when legalizing some floating point comparisons.
   5834     APInt Mask = APInt::getBitsSetFrom(LHS.getValueSizeInBits(), 1);
   5835     if (isOneConstant(RHS) && DAG.MaskedValueIsZero(LHS, Mask)) {
   5836       SDLoc DL(N);
   5837       CCVal = ISD::getSetCCInverse(CCVal, LHS.getValueType());
   5838       SDValue TargetCC = DAG.getCondCode(CCVal);
   5839       RHS = DAG.getConstant(0, DL, LHS.getValueType());
   5840       return DAG.getNode(RISCVISD::BR_CC, DL, N->getValueType(0),
   5841                          N->getOperand(0), LHS, RHS, TargetCC,
   5842                          N->getOperand(4));
   5843     }
   5844     break;
   5845   }
   5846   case ISD::FCOPYSIGN: {
   5847     EVT VT = N->getValueType(0);
   5848     if (!VT.isVector())
   5849       break;
   5850     // There is a form of VFSGNJ which injects the negated sign of its second
   5851     // operand. Try and bubble any FNEG up after the extend/round to produce
   5852     // this optimized pattern. Avoid modifying cases where FP_ROUND and
   5853     // TRUNC=1.
   5854     SDValue In2 = N->getOperand(1);
   5855     // Avoid cases where the extend/round has multiple uses, as duplicating
   5856     // those is typically more expensive than removing a fneg.
   5857     if (!In2.hasOneUse())
   5858       break;
   5859     if (In2.getOpcode() != ISD::FP_EXTEND &&
   5860         (In2.getOpcode() != ISD::FP_ROUND || In2.getConstantOperandVal(1) != 0))
   5861       break;
   5862     In2 = In2.getOperand(0);
   5863     if (In2.getOpcode() != ISD::FNEG)
   5864       break;
   5865     SDLoc DL(N);
   5866     SDValue NewFPExtRound = DAG.getFPExtendOrRound(In2.getOperand(0), DL, VT);
   5867     return DAG.getNode(ISD::FCOPYSIGN, DL, VT, N->getOperand(0),
   5868                        DAG.getNode(ISD::FNEG, DL, VT, NewFPExtRound));
   5869   }
   5870   case ISD::MGATHER:
   5871   case ISD::MSCATTER: {
   5872     if (!DCI.isBeforeLegalize())
   5873       break;
   5874     MaskedGatherScatterSDNode *MGSN = cast<MaskedGatherScatterSDNode>(N);
   5875     SDValue Index = MGSN->getIndex();
   5876     EVT IndexVT = Index.getValueType();
   5877     MVT XLenVT = Subtarget.getXLenVT();
   5878     // RISCV indexed loads only support the "unsigned unscaled" addressing
   5879     // mode, so anything else must be manually legalized.
   5880     bool NeedsIdxLegalization = MGSN->isIndexScaled() ||
   5881                                 (MGSN->isIndexSigned() &&
   5882                                  IndexVT.getVectorElementType().bitsLT(XLenVT));
   5883     if (!NeedsIdxLegalization)
   5884       break;
   5885 
   5886     SDLoc DL(N);
   5887 
   5888     // Any index legalization should first promote to XLenVT, so we don't lose
   5889     // bits when scaling. This may create an illegal index type so we let
   5890     // LLVM's legalization take care of the splitting.
   5891     if (IndexVT.getVectorElementType().bitsLT(XLenVT)) {
   5892       IndexVT = IndexVT.changeVectorElementType(XLenVT);
   5893       Index = DAG.getNode(MGSN->isIndexSigned() ? ISD::SIGN_EXTEND
   5894                                                 : ISD::ZERO_EXTEND,
   5895                           DL, IndexVT, Index);
   5896     }
   5897 
   5898     unsigned Scale = N->getConstantOperandVal(5);
   5899     if (MGSN->isIndexScaled() && Scale != 1) {
   5900       // Manually scale the indices by the element size.
   5901       // TODO: Sanitize the scale operand here?
   5902       assert(isPowerOf2_32(Scale) && "Expecting power-of-two types");
   5903       SDValue SplatScale = DAG.getConstant(Log2_32(Scale), DL, IndexVT);
   5904       Index = DAG.getNode(ISD::SHL, DL, IndexVT, Index, SplatScale);
   5905     }
   5906 
   5907     ISD::MemIndexType NewIndexTy = ISD::UNSIGNED_UNSCALED;
   5908     if (const auto *MGN = dyn_cast<MaskedGatherSDNode>(N)) {
   5909       return DAG.getMaskedGather(
   5910           N->getVTList(), MGSN->getMemoryVT(), DL,
   5911           {MGSN->getChain(), MGN->getPassThru(), MGSN->getMask(),
   5912            MGSN->getBasePtr(), Index, MGN->getScale()},
   5913           MGN->getMemOperand(), NewIndexTy, MGN->getExtensionType());
   5914     }
   5915     const auto *MSN = cast<MaskedScatterSDNode>(N);
   5916     return DAG.getMaskedScatter(
   5917         N->getVTList(), MGSN->getMemoryVT(), DL,
   5918         {MGSN->getChain(), MSN->getValue(), MGSN->getMask(), MGSN->getBasePtr(),
   5919          Index, MGSN->getScale()},
   5920         MGSN->getMemOperand(), NewIndexTy, MSN->isTruncatingStore());
   5921   }
   5922   }
   5923 
   5924   return SDValue();
   5925 }
   5926 
   5927 bool RISCVTargetLowering::isDesirableToCommuteWithShift(
   5928     const SDNode *N, CombineLevel Level) const {
   5929   // The following folds are only desirable if `(OP _, c1 << c2)` can be
   5930   // materialised in fewer instructions than `(OP _, c1)`:
   5931   //
   5932   //   (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
   5933   //   (shl (or x, c1), c2) -> (or (shl x, c2), c1 << c2)
   5934   SDValue N0 = N->getOperand(0);
   5935   EVT Ty = N0.getValueType();
   5936   if (Ty.isScalarInteger() &&
   5937       (N0.getOpcode() == ISD::ADD || N0.getOpcode() == ISD::OR)) {
   5938     auto *C1 = dyn_cast<ConstantSDNode>(N0->getOperand(1));
   5939     auto *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1));
   5940     if (C1 && C2) {
   5941       const APInt &C1Int = C1->getAPIntValue();
   5942       APInt ShiftedC1Int = C1Int << C2->getAPIntValue();
   5943 
   5944       // We can materialise `c1 << c2` into an add immediate, so it's "free",
   5945       // and the combine should happen, to potentially allow further combines
   5946       // later.
   5947       if (ShiftedC1Int.getMinSignedBits() <= 64 &&
   5948           isLegalAddImmediate(ShiftedC1Int.getSExtValue()))
   5949         return true;
   5950 
   5951       // We can materialise `c1` in an add immediate, so it's "free", and the
   5952       // combine should be prevented.
   5953       if (C1Int.getMinSignedBits() <= 64 &&
   5954           isLegalAddImmediate(C1Int.getSExtValue()))
   5955         return false;
   5956 
   5957       // Neither constant will fit into an immediate, so find materialisation
   5958       // costs.
   5959       int C1Cost = RISCVMatInt::getIntMatCost(C1Int, Ty.getSizeInBits(),
   5960                                               Subtarget.is64Bit());
   5961       int ShiftedC1Cost = RISCVMatInt::getIntMatCost(
   5962           ShiftedC1Int, Ty.getSizeInBits(), Subtarget.is64Bit());
   5963 
   5964       // Materialising `c1` is cheaper than materialising `c1 << c2`, so the
   5965       // combine should be prevented.
   5966       if (C1Cost < ShiftedC1Cost)
   5967         return false;
   5968     }
   5969   }
   5970   return true;
   5971 }
   5972 
   5973 bool RISCVTargetLowering::targetShrinkDemandedConstant(
   5974     SDValue Op, const APInt &DemandedBits, const APInt &DemandedElts,
   5975     TargetLoweringOpt &TLO) const {
   5976   // Delay this optimization as late as possible.
   5977   if (!TLO.LegalOps)
   5978     return false;
   5979 
   5980   EVT VT = Op.getValueType();
   5981   if (VT.isVector())
   5982     return false;
   5983 
   5984   // Only handle AND for now.
   5985   if (Op.getOpcode() != ISD::AND)
   5986     return false;
   5987 
   5988   ConstantSDNode *C = dyn_cast<ConstantSDNode>(Op.getOperand(1));
   5989   if (!C)
   5990     return false;
   5991 
   5992   const APInt &Mask = C->getAPIntValue();
   5993 
   5994   // Clear all non-demanded bits initially.
   5995   APInt ShrunkMask = Mask & DemandedBits;
   5996 
   5997   // Try to make a smaller immediate by setting undemanded bits.
   5998 
   5999   APInt ExpandedMask = Mask | ~DemandedBits;
   6000 
   6001   auto IsLegalMask = [ShrunkMask, ExpandedMask](const APInt &Mask) -> bool {
   6002     return ShrunkMask.isSubsetOf(Mask) && Mask.isSubsetOf(ExpandedMask);
   6003   };
   6004   auto UseMask = [Mask, Op, VT, &TLO](const APInt &NewMask) -> bool {
   6005     if (NewMask == Mask)
   6006       return true;
   6007     SDLoc DL(Op);
   6008     SDValue NewC = TLO.DAG.getConstant(NewMask, DL, VT);
   6009     SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC);
   6010     return TLO.CombineTo(Op, NewOp);
   6011   };
   6012 
   6013   // If the shrunk mask fits in sign extended 12 bits, let the target
   6014   // independent code apply it.
   6015   if (ShrunkMask.isSignedIntN(12))
   6016     return false;
   6017 
   6018   // Preserve (and X, 0xffff) when zext.h is supported.
   6019   if (Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp()) {
   6020     APInt NewMask = APInt(Mask.getBitWidth(), 0xffff);
   6021     if (IsLegalMask(NewMask))
   6022       return UseMask(NewMask);
   6023   }
   6024 
   6025   // Try to preserve (and X, 0xffffffff), the (zext_inreg X, i32) pattern.
   6026   if (VT == MVT::i64) {
   6027     APInt NewMask = APInt(64, 0xffffffff);
   6028     if (IsLegalMask(NewMask))
   6029       return UseMask(NewMask);
   6030   }
   6031 
   6032   // For the remaining optimizations, we need to be able to make a negative
   6033   // number through a combination of mask and undemanded bits.
   6034   if (!ExpandedMask.isNegative())
   6035     return false;
   6036 
   6037   // What is the fewest number of bits we need to represent the negative number.
   6038   unsigned MinSignedBits = ExpandedMask.getMinSignedBits();
   6039 
   6040   // Try to make a 12 bit negative immediate. If that fails try to make a 32
   6041   // bit negative immediate unless the shrunk immediate already fits in 32 bits.
   6042   APInt NewMask = ShrunkMask;
   6043   if (MinSignedBits <= 12)
   6044     NewMask.setBitsFrom(11);
   6045   else if (MinSignedBits <= 32 && !ShrunkMask.isSignedIntN(32))
   6046     NewMask.setBitsFrom(31);
   6047   else
   6048     return false;
   6049 
   6050   // Sanity check that our new mask is a subset of the demanded mask.
   6051   assert(IsLegalMask(NewMask));
   6052   return UseMask(NewMask);
   6053 }
   6054 
   6055 void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op,
   6056                                                         KnownBits &Known,
   6057                                                         const APInt &DemandedElts,
   6058                                                         const SelectionDAG &DAG,
   6059                                                         unsigned Depth) const {
   6060   unsigned BitWidth = Known.getBitWidth();
   6061   unsigned Opc = Op.getOpcode();
   6062   assert((Opc >= ISD::BUILTIN_OP_END ||
   6063           Opc == ISD::INTRINSIC_WO_CHAIN ||
   6064           Opc == ISD::INTRINSIC_W_CHAIN ||
   6065           Opc == ISD::INTRINSIC_VOID) &&
   6066          "Should use MaskedValueIsZero if you don't know whether Op"
   6067          " is a target node!");
   6068 
   6069   Known.resetAll();
   6070   switch (Opc) {
   6071   default: break;
   6072   case RISCVISD::SELECT_CC: {
   6073     Known = DAG.computeKnownBits(Op.getOperand(4), Depth + 1);
   6074     // If we don't know any bits, early out.
   6075     if (Known.isUnknown())
   6076       break;
   6077     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(3), Depth + 1);
   6078 
   6079     // Only known if known in both the LHS and RHS.
   6080     Known = KnownBits::commonBits(Known, Known2);
   6081     break;
   6082   }
   6083   case RISCVISD::REMUW: {
   6084     KnownBits Known2;
   6085     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
   6086     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
   6087     // We only care about the lower 32 bits.
   6088     Known = KnownBits::urem(Known.trunc(32), Known2.trunc(32));
   6089     // Restore the original width by sign extending.
   6090     Known = Known.sext(BitWidth);
   6091     break;
   6092   }
   6093   case RISCVISD::DIVUW: {
   6094     KnownBits Known2;
   6095     Known = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
   6096     Known2 = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
   6097     // We only care about the lower 32 bits.
   6098     Known = KnownBits::udiv(Known.trunc(32), Known2.trunc(32));
   6099     // Restore the original width by sign extending.
   6100     Known = Known.sext(BitWidth);
   6101     break;
   6102   }
   6103   case RISCVISD::CTZW: {
   6104     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
   6105     unsigned PossibleTZ = Known2.trunc(32).countMaxTrailingZeros();
   6106     unsigned LowBits = Log2_32(PossibleTZ) + 1;
   6107     Known.Zero.setBitsFrom(LowBits);
   6108     break;
   6109   }
   6110   case RISCVISD::CLZW: {
   6111     KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(0), Depth + 1);
   6112     unsigned PossibleLZ = Known2.trunc(32).countMaxLeadingZeros();
   6113     unsigned LowBits = Log2_32(PossibleLZ) + 1;
   6114     Known.Zero.setBitsFrom(LowBits);
   6115     break;
   6116   }
   6117   case RISCVISD::READ_VLENB:
   6118     // We assume VLENB is at least 16 bytes.
   6119     Known.Zero.setLowBits(4);
   6120     break;
   6121   case ISD::INTRINSIC_W_CHAIN: {
   6122     unsigned IntNo = Op.getConstantOperandVal(1);
   6123     switch (IntNo) {
   6124     default:
   6125       // We can't do anything for most intrinsics.
   6126       break;
   6127     case Intrinsic::riscv_vsetvli:
   6128     case Intrinsic::riscv_vsetvlimax:
   6129       // Assume that VL output is positive and would fit in an int32_t.
   6130       // TODO: VLEN might be capped at 16 bits in a future V spec update.
   6131       if (BitWidth >= 32)
   6132         Known.Zero.setBitsFrom(31);
   6133       break;
   6134     }
   6135     break;
   6136   }
   6137   }
   6138 }
   6139 
   6140 unsigned RISCVTargetLowering::ComputeNumSignBitsForTargetNode(
   6141     SDValue Op, const APInt &DemandedElts, const SelectionDAG &DAG,
   6142     unsigned Depth) const {
   6143   switch (Op.getOpcode()) {
   6144   default:
   6145     break;
   6146   case RISCVISD::SLLW:
   6147   case RISCVISD::SRAW:
   6148   case RISCVISD::SRLW:
   6149   case RISCVISD::DIVW:
   6150   case RISCVISD::DIVUW:
   6151   case RISCVISD::REMUW:
   6152   case RISCVISD::ROLW:
   6153   case RISCVISD::RORW:
   6154   case RISCVISD::GREVW:
   6155   case RISCVISD::GORCW:
   6156   case RISCVISD::FSLW:
   6157   case RISCVISD::FSRW:
   6158   case RISCVISD::SHFLW:
   6159   case RISCVISD::UNSHFLW:
   6160   case RISCVISD::BCOMPRESSW:
   6161   case RISCVISD::BDECOMPRESSW:
   6162     // TODO: As the result is sign-extended, this is conservatively correct. A
   6163     // more precise answer could be calculated for SRAW depending on known
   6164     // bits in the shift amount.
   6165     return 33;
   6166   case RISCVISD::SHFL:
   6167   case RISCVISD::UNSHFL: {
   6168     // There is no SHFLIW, but a i64 SHFLI with bit 4 of the control word
   6169     // cleared doesn't affect bit 31. The upper 32 bits will be shuffled, but
   6170     // will stay within the upper 32 bits. If there were more than 32 sign bits
   6171     // before there will be at least 33 sign bits after.
   6172     if (Op.getValueType() == MVT::i64 &&
   6173         isa<ConstantSDNode>(Op.getOperand(1)) &&
   6174         (Op.getConstantOperandVal(1) & 0x10) == 0) {
   6175       unsigned Tmp = DAG.ComputeNumSignBits(Op.getOperand(0), Depth + 1);
   6176       if (Tmp > 32)
   6177         return 33;
   6178     }
   6179     break;
   6180   }
   6181   case RISCVISD::VMV_X_S:
   6182     // The number of sign bits of the scalar result is computed by obtaining the
   6183     // element type of the input vector operand, subtracting its width from the
   6184     // XLEN, and then adding one (sign bit within the element type). If the
   6185     // element type is wider than XLen, the least-significant XLEN bits are
   6186     // taken.
   6187     if (Op.getOperand(0).getScalarValueSizeInBits() > Subtarget.getXLen())
   6188       return 1;
   6189     return Subtarget.getXLen() - Op.getOperand(0).getScalarValueSizeInBits() + 1;
   6190   }
   6191 
   6192   return 1;
   6193 }
   6194 
   6195 static MachineBasicBlock *emitReadCycleWidePseudo(MachineInstr &MI,
   6196                                                   MachineBasicBlock *BB) {
   6197   assert(MI.getOpcode() == RISCV::ReadCycleWide && "Unexpected instruction");
   6198 
   6199   // To read the 64-bit cycle CSR on a 32-bit target, we read the two halves.
   6200   // Should the count have wrapped while it was being read, we need to try
   6201   // again.
   6202   // ...
   6203   // read:
   6204   // rdcycleh x3 # load high word of cycle
   6205   // rdcycle  x2 # load low word of cycle
   6206   // rdcycleh x4 # load high word of cycle
   6207   // bne x3, x4, read # check if high word reads match, otherwise try again
   6208   // ...
   6209 
   6210   MachineFunction &MF = *BB->getParent();
   6211   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   6212   MachineFunction::iterator It = ++BB->getIterator();
   6213 
   6214   MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(LLVM_BB);
   6215   MF.insert(It, LoopMBB);
   6216 
   6217   MachineBasicBlock *DoneMBB = MF.CreateMachineBasicBlock(LLVM_BB);
   6218   MF.insert(It, DoneMBB);
   6219 
   6220   // Transfer the remainder of BB and its successor edges to DoneMBB.
   6221   DoneMBB->splice(DoneMBB->begin(), BB,
   6222                   std::next(MachineBasicBlock::iterator(MI)), BB->end());
   6223   DoneMBB->transferSuccessorsAndUpdatePHIs(BB);
   6224 
   6225   BB->addSuccessor(LoopMBB);
   6226 
   6227   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   6228   Register ReadAgainReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
   6229   Register LoReg = MI.getOperand(0).getReg();
   6230   Register HiReg = MI.getOperand(1).getReg();
   6231   DebugLoc DL = MI.getDebugLoc();
   6232 
   6233   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
   6234   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), HiReg)
   6235       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
   6236       .addReg(RISCV::X0);
   6237   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), LoReg)
   6238       .addImm(RISCVSysReg::lookupSysRegByName("CYCLE")->Encoding)
   6239       .addReg(RISCV::X0);
   6240   BuildMI(LoopMBB, DL, TII->get(RISCV::CSRRS), ReadAgainReg)
   6241       .addImm(RISCVSysReg::lookupSysRegByName("CYCLEH")->Encoding)
   6242       .addReg(RISCV::X0);
   6243 
   6244   BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
   6245       .addReg(HiReg)
   6246       .addReg(ReadAgainReg)
   6247       .addMBB(LoopMBB);
   6248 
   6249   LoopMBB->addSuccessor(LoopMBB);
   6250   LoopMBB->addSuccessor(DoneMBB);
   6251 
   6252   MI.eraseFromParent();
   6253 
   6254   return DoneMBB;
   6255 }
   6256 
   6257 static MachineBasicBlock *emitSplitF64Pseudo(MachineInstr &MI,
   6258                                              MachineBasicBlock *BB) {
   6259   assert(MI.getOpcode() == RISCV::SplitF64Pseudo && "Unexpected instruction");
   6260 
   6261   MachineFunction &MF = *BB->getParent();
   6262   DebugLoc DL = MI.getDebugLoc();
   6263   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   6264   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
   6265   Register LoReg = MI.getOperand(0).getReg();
   6266   Register HiReg = MI.getOperand(1).getReg();
   6267   Register SrcReg = MI.getOperand(2).getReg();
   6268   const TargetRegisterClass *SrcRC = &RISCV::FPR64RegClass;
   6269   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
   6270 
   6271   TII.storeRegToStackSlot(*BB, MI, SrcReg, MI.getOperand(2).isKill(), FI, SrcRC,
   6272                           RI);
   6273   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
   6274   MachineMemOperand *MMOLo =
   6275       MF.getMachineMemOperand(MPI, MachineMemOperand::MOLoad, 4, Align(8));
   6276   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
   6277       MPI.getWithOffset(4), MachineMemOperand::MOLoad, 4, Align(8));
   6278   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), LoReg)
   6279       .addFrameIndex(FI)
   6280       .addImm(0)
   6281       .addMemOperand(MMOLo);
   6282   BuildMI(*BB, MI, DL, TII.get(RISCV::LW), HiReg)
   6283       .addFrameIndex(FI)
   6284       .addImm(4)
   6285       .addMemOperand(MMOHi);
   6286   MI.eraseFromParent(); // The pseudo instruction is gone now.
   6287   return BB;
   6288 }
   6289 
   6290 static MachineBasicBlock *emitBuildPairF64Pseudo(MachineInstr &MI,
   6291                                                  MachineBasicBlock *BB) {
   6292   assert(MI.getOpcode() == RISCV::BuildPairF64Pseudo &&
   6293          "Unexpected instruction");
   6294 
   6295   MachineFunction &MF = *BB->getParent();
   6296   DebugLoc DL = MI.getDebugLoc();
   6297   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   6298   const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo();
   6299   Register DstReg = MI.getOperand(0).getReg();
   6300   Register LoReg = MI.getOperand(1).getReg();
   6301   Register HiReg = MI.getOperand(2).getReg();
   6302   const TargetRegisterClass *DstRC = &RISCV::FPR64RegClass;
   6303   int FI = MF.getInfo<RISCVMachineFunctionInfo>()->getMoveF64FrameIndex(MF);
   6304 
   6305   MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
   6306   MachineMemOperand *MMOLo =
   6307       MF.getMachineMemOperand(MPI, MachineMemOperand::MOStore, 4, Align(8));
   6308   MachineMemOperand *MMOHi = MF.getMachineMemOperand(
   6309       MPI.getWithOffset(4), MachineMemOperand::MOStore, 4, Align(8));
   6310   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
   6311       .addReg(LoReg, getKillRegState(MI.getOperand(1).isKill()))
   6312       .addFrameIndex(FI)
   6313       .addImm(0)
   6314       .addMemOperand(MMOLo);
   6315   BuildMI(*BB, MI, DL, TII.get(RISCV::SW))
   6316       .addReg(HiReg, getKillRegState(MI.getOperand(2).isKill()))
   6317       .addFrameIndex(FI)
   6318       .addImm(4)
   6319       .addMemOperand(MMOHi);
   6320   TII.loadRegFromStackSlot(*BB, MI, DstReg, FI, DstRC, RI);
   6321   MI.eraseFromParent(); // The pseudo instruction is gone now.
   6322   return BB;
   6323 }
   6324 
   6325 static bool isSelectPseudo(MachineInstr &MI) {
   6326   switch (MI.getOpcode()) {
   6327   default:
   6328     return false;
   6329   case RISCV::Select_GPR_Using_CC_GPR:
   6330   case RISCV::Select_FPR16_Using_CC_GPR:
   6331   case RISCV::Select_FPR32_Using_CC_GPR:
   6332   case RISCV::Select_FPR64_Using_CC_GPR:
   6333     return true;
   6334   }
   6335 }
   6336 
   6337 static MachineBasicBlock *emitSelectPseudo(MachineInstr &MI,
   6338                                            MachineBasicBlock *BB) {
   6339   // To "insert" Select_* instructions, we actually have to insert the triangle
   6340   // control-flow pattern.  The incoming instructions know the destination vreg
   6341   // to set, the condition code register to branch on, the true/false values to
   6342   // select between, and the condcode to use to select the appropriate branch.
   6343   //
   6344   // We produce the following control flow:
   6345   //     HeadMBB
   6346   //     |  \
   6347   //     |  IfFalseMBB
   6348   //     | /
   6349   //    TailMBB
   6350   //
   6351   // When we find a sequence of selects we attempt to optimize their emission
   6352   // by sharing the control flow. Currently we only handle cases where we have
   6353   // multiple selects with the exact same condition (same LHS, RHS and CC).
   6354   // The selects may be interleaved with other instructions if the other
   6355   // instructions meet some requirements we deem safe:
   6356   // - They are debug instructions. Otherwise,
   6357   // - They do not have side-effects, do not access memory and their inputs do
   6358   //   not depend on the results of the select pseudo-instructions.
   6359   // The TrueV/FalseV operands of the selects cannot depend on the result of
   6360   // previous selects in the sequence.
   6361   // These conditions could be further relaxed. See the X86 target for a
   6362   // related approach and more information.
   6363   Register LHS = MI.getOperand(1).getReg();
   6364   Register RHS = MI.getOperand(2).getReg();
   6365   auto CC = static_cast<ISD::CondCode>(MI.getOperand(3).getImm());
   6366 
   6367   SmallVector<MachineInstr *, 4> SelectDebugValues;
   6368   SmallSet<Register, 4> SelectDests;
   6369   SelectDests.insert(MI.getOperand(0).getReg());
   6370 
   6371   MachineInstr *LastSelectPseudo = &MI;
   6372 
   6373   for (auto E = BB->end(), SequenceMBBI = MachineBasicBlock::iterator(MI);
   6374        SequenceMBBI != E; ++SequenceMBBI) {
   6375     if (SequenceMBBI->isDebugInstr())
   6376       continue;
   6377     else if (isSelectPseudo(*SequenceMBBI)) {
   6378       if (SequenceMBBI->getOperand(1).getReg() != LHS ||
   6379           SequenceMBBI->getOperand(2).getReg() != RHS ||
   6380           SequenceMBBI->getOperand(3).getImm() != CC ||
   6381           SelectDests.count(SequenceMBBI->getOperand(4).getReg()) ||
   6382           SelectDests.count(SequenceMBBI->getOperand(5).getReg()))
   6383         break;
   6384       LastSelectPseudo = &*SequenceMBBI;
   6385       SequenceMBBI->collectDebugValues(SelectDebugValues);
   6386       SelectDests.insert(SequenceMBBI->getOperand(0).getReg());
   6387     } else {
   6388       if (SequenceMBBI->hasUnmodeledSideEffects() ||
   6389           SequenceMBBI->mayLoadOrStore())
   6390         break;
   6391       if (llvm::any_of(SequenceMBBI->operands(), [&](MachineOperand &MO) {
   6392             return MO.isReg() && MO.isUse() && SelectDests.count(MO.getReg());
   6393           }))
   6394         break;
   6395     }
   6396   }
   6397 
   6398   const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
   6399   const BasicBlock *LLVM_BB = BB->getBasicBlock();
   6400   DebugLoc DL = MI.getDebugLoc();
   6401   MachineFunction::iterator I = ++BB->getIterator();
   6402 
   6403   MachineBasicBlock *HeadMBB = BB;
   6404   MachineFunction *F = BB->getParent();
   6405   MachineBasicBlock *TailMBB = F->CreateMachineBasicBlock(LLVM_BB);
   6406   MachineBasicBlock *IfFalseMBB = F->CreateMachineBasicBlock(LLVM_BB);
   6407 
   6408   F->insert(I, IfFalseMBB);
   6409   F->insert(I, TailMBB);
   6410 
   6411   // Transfer debug instructions associated with the selects to TailMBB.
   6412   for (MachineInstr *DebugInstr : SelectDebugValues) {
   6413     TailMBB->push_back(DebugInstr->removeFromParent());
   6414   }
   6415 
   6416   // Move all instructions after the sequence to TailMBB.
   6417   TailMBB->splice(TailMBB->end(), HeadMBB,
   6418                   std::next(LastSelectPseudo->getIterator()), HeadMBB->end());
   6419   // Update machine-CFG edges by transferring all successors of the current
   6420   // block to the new block which will contain the Phi nodes for the selects.
   6421   TailMBB->transferSuccessorsAndUpdatePHIs(HeadMBB);
   6422   // Set the successors for HeadMBB.
   6423   HeadMBB->addSuccessor(IfFalseMBB);
   6424   HeadMBB->addSuccessor(TailMBB);
   6425 
   6426   // Insert appropriate branch.
   6427   unsigned Opcode = getBranchOpcodeForIntCondCode(CC);
   6428 
   6429   BuildMI(HeadMBB, DL, TII.get(Opcode))
   6430     .addReg(LHS)
   6431     .addReg(RHS)
   6432     .addMBB(TailMBB);
   6433 
   6434   // IfFalseMBB just falls through to TailMBB.
   6435   IfFalseMBB->addSuccessor(TailMBB);
   6436 
   6437   // Create PHIs for all of the select pseudo-instructions.
   6438   auto SelectMBBI = MI.getIterator();
   6439   auto SelectEnd = std::next(LastSelectPseudo->getIterator());
   6440   auto InsertionPoint = TailMBB->begin();
   6441   while (SelectMBBI != SelectEnd) {
   6442     auto Next = std::next(SelectMBBI);
   6443     if (isSelectPseudo(*SelectMBBI)) {
   6444       // %Result = phi [ %TrueValue, HeadMBB ], [ %FalseValue, IfFalseMBB ]
   6445       BuildMI(*TailMBB, InsertionPoint, SelectMBBI->getDebugLoc(),
   6446               TII.get(RISCV::PHI), SelectMBBI->getOperand(0).getReg())
   6447           .addReg(SelectMBBI->getOperand(4).getReg())
   6448           .addMBB(HeadMBB)
   6449           .addReg(SelectMBBI->getOperand(5).getReg())
   6450           .addMBB(IfFalseMBB);
   6451       SelectMBBI->eraseFromParent();
   6452     }
   6453     SelectMBBI = Next;
   6454   }
   6455 
   6456   F->getProperties().reset(MachineFunctionProperties::Property::NoPHIs);
   6457   return TailMBB;
   6458 }
   6459 
   6460 static MachineInstr *elideCopies(MachineInstr *MI,
   6461                                  const MachineRegisterInfo &MRI) {
   6462   while (true) {
   6463     if (!MI->isFullCopy())
   6464       return MI;
   6465     if (!Register::isVirtualRegister(MI->getOperand(1).getReg()))
   6466       return nullptr;
   6467     MI = MRI.getVRegDef(MI->getOperand(1).getReg());
   6468     if (!MI)
   6469       return nullptr;
   6470   }
   6471 }
   6472 
   6473 static MachineBasicBlock *addVSetVL(MachineInstr &MI, MachineBasicBlock *BB,
   6474                                     int VLIndex, unsigned SEWIndex,
   6475                                     RISCVII::VLMUL VLMul,
   6476                                     bool ForceTailAgnostic) {
   6477   MachineFunction &MF = *BB->getParent();
   6478   DebugLoc DL = MI.getDebugLoc();
   6479   const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
   6480 
   6481   unsigned Log2SEW = MI.getOperand(SEWIndex).getImm();
   6482   unsigned SEW = 1 << Log2SEW;
   6483   assert(RISCVVType::isValidSEW(SEW) && "Unexpected SEW");
   6484 
   6485   MachineRegisterInfo &MRI = MF.getRegInfo();
   6486 
   6487   auto BuildVSETVLI = [&]() {
   6488     if (VLIndex >= 0) {
   6489       Register DestReg = MRI.createVirtualRegister(&RISCV::GPRRegClass);
   6490       const MachineOperand &VLOp = MI.getOperand(VLIndex);
   6491 
   6492       // VL can be a register or an immediate.
   6493       if (VLOp.isImm())
   6494         return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETIVLI))
   6495             .addReg(DestReg, RegState::Define | RegState::Dead)
   6496             .addImm(VLOp.getImm());
   6497 
   6498       Register VLReg = MI.getOperand(VLIndex).getReg();
   6499       return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
   6500           .addReg(DestReg, RegState::Define | RegState::Dead)
   6501           .addReg(VLReg);
   6502     }
   6503 
   6504     // With no VL operator in the pseudo, do not modify VL (rd = X0, rs1 = X0).
   6505     return BuildMI(*BB, MI, DL, TII.get(RISCV::PseudoVSETVLI))
   6506         .addReg(RISCV::X0, RegState::Define | RegState::Dead)
   6507         .addReg(RISCV::X0, RegState::Kill);
   6508   };
   6509 
   6510   MachineInstrBuilder MIB = BuildVSETVLI();
   6511 
   6512   // Default to tail agnostic unless the destination is tied to a source. In
   6513   // that case the user would have some control over the tail values. The tail
   6514   // policy is also ignored on instructions that only update element 0 like
   6515   // vmv.s.x or reductions so use agnostic there to match the common case.
   6516   // FIXME: This is conservatively correct, but we might want to detect that
   6517   // the input is undefined.
   6518   bool TailAgnostic = true;
   6519   unsigned UseOpIdx;
   6520   if (!ForceTailAgnostic && MI.isRegTiedToUseOperand(0, &UseOpIdx)) {
   6521     TailAgnostic = false;
   6522     // If the tied operand is an IMPLICIT_DEF we can keep TailAgnostic.
   6523     const MachineOperand &UseMO = MI.getOperand(UseOpIdx);
   6524     MachineInstr *UseMI = MRI.getVRegDef(UseMO.getReg());
   6525     if (UseMI) {
   6526       UseMI = elideCopies(UseMI, MRI);
   6527       if (UseMI && UseMI->isImplicitDef())
   6528         TailAgnostic = true;
   6529     }
   6530   }
   6531 
   6532   // For simplicity we reuse the vtype representation here.
   6533   MIB.addImm(RISCVVType::encodeVTYPE(VLMul, SEW,
   6534                                      /*TailAgnostic*/ TailAgnostic,
   6535                                      /*MaskAgnostic*/ false));
   6536 
   6537   // Remove (now) redundant operands from pseudo
   6538   if (VLIndex >= 0 && MI.getOperand(VLIndex).isReg()) {
   6539     MI.getOperand(VLIndex).setReg(RISCV::NoRegister);
   6540     MI.getOperand(VLIndex).setIsKill(false);
   6541   }
   6542 
   6543   return BB;
   6544 }
   6545 
   6546 MachineBasicBlock *
   6547 RISCVTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
   6548                                                  MachineBasicBlock *BB) const {
   6549   uint64_t TSFlags = MI.getDesc().TSFlags;
   6550 
   6551   if (RISCVII::hasSEWOp(TSFlags)) {
   6552     unsigned NumOperands = MI.getNumExplicitOperands();
   6553     int VLIndex = RISCVII::hasVLOp(TSFlags) ? NumOperands - 2 : -1;
   6554     unsigned SEWIndex = NumOperands - 1;
   6555     bool ForceTailAgnostic = RISCVII::doesForceTailAgnostic(TSFlags);
   6556 
   6557     RISCVII::VLMUL VLMul = RISCVII::getLMul(TSFlags);
   6558     return addVSetVL(MI, BB, VLIndex, SEWIndex, VLMul, ForceTailAgnostic);
   6559   }
   6560 
   6561   switch (MI.getOpcode()) {
   6562   default:
   6563     llvm_unreachable("Unexpected instr type to insert");
   6564   case RISCV::ReadCycleWide:
   6565     assert(!Subtarget.is64Bit() &&
   6566            "ReadCycleWrite is only to be used on riscv32");
   6567     return emitReadCycleWidePseudo(MI, BB);
   6568   case RISCV::Select_GPR_Using_CC_GPR:
   6569   case RISCV::Select_FPR16_Using_CC_GPR:
   6570   case RISCV::Select_FPR32_Using_CC_GPR:
   6571   case RISCV::Select_FPR64_Using_CC_GPR:
   6572     return emitSelectPseudo(MI, BB);
   6573   case RISCV::BuildPairF64Pseudo:
   6574     return emitBuildPairF64Pseudo(MI, BB);
   6575   case RISCV::SplitF64Pseudo:
   6576     return emitSplitF64Pseudo(MI, BB);
   6577   }
   6578 }
   6579 
   6580 // Calling Convention Implementation.
   6581 // The expectations for frontend ABI lowering vary from target to target.
   6582 // Ideally, an LLVM frontend would be able to avoid worrying about many ABI
   6583 // details, but this is a longer term goal. For now, we simply try to keep the
   6584 // role of the frontend as simple and well-defined as possible. The rules can
   6585 // be summarised as:
   6586 // * Never split up large scalar arguments. We handle them here.
   6587 // * If a hardfloat calling convention is being used, and the struct may be
   6588 // passed in a pair of registers (fp+fp, int+fp), and both registers are
   6589 // available, then pass as two separate arguments. If either the GPRs or FPRs
   6590 // are exhausted, then pass according to the rule below.
   6591 // * If a struct could never be passed in registers or directly in a stack
   6592 // slot (as it is larger than 2*XLEN and the floating point rules don't
   6593 // apply), then pass it using a pointer with the byval attribute.
   6594 // * If a struct is less than 2*XLEN, then coerce to either a two-element
   6595 // word-sized array or a 2*XLEN scalar (depending on alignment).
   6596 // * The frontend can determine whether a struct is returned by reference or
   6597 // not based on its size and fields. If it will be returned by reference, the
   6598 // frontend must modify the prototype so a pointer with the sret annotation is
   6599 // passed as the first argument. This is not necessary for large scalar
   6600 // returns.
   6601 // * Struct return values and varargs should be coerced to structs containing
   6602 // register-size fields in the same situations they would be for fixed
   6603 // arguments.
   6604 
   6605 static const MCPhysReg ArgGPRs[] = {
   6606   RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13,
   6607   RISCV::X14, RISCV::X15, RISCV::X16, RISCV::X17
   6608 };
   6609 static const MCPhysReg ArgFPR16s[] = {
   6610   RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H,
   6611   RISCV::F14_H, RISCV::F15_H, RISCV::F16_H, RISCV::F17_H
   6612 };
   6613 static const MCPhysReg ArgFPR32s[] = {
   6614   RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F,
   6615   RISCV::F14_F, RISCV::F15_F, RISCV::F16_F, RISCV::F17_F
   6616 };
   6617 static const MCPhysReg ArgFPR64s[] = {
   6618   RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D,
   6619   RISCV::F14_D, RISCV::F15_D, RISCV::F16_D, RISCV::F17_D
   6620 };
   6621 // This is an interim calling convention and it may be changed in the future.
   6622 static const MCPhysReg ArgVRs[] = {
   6623     RISCV::V8,  RISCV::V9,  RISCV::V10, RISCV::V11, RISCV::V12, RISCV::V13,
   6624     RISCV::V14, RISCV::V15, RISCV::V16, RISCV::V17, RISCV::V18, RISCV::V19,
   6625     RISCV::V20, RISCV::V21, RISCV::V22, RISCV::V23};
   6626 static const MCPhysReg ArgVRM2s[] = {RISCV::V8M2,  RISCV::V10M2, RISCV::V12M2,
   6627                                      RISCV::V14M2, RISCV::V16M2, RISCV::V18M2,
   6628                                      RISCV::V20M2, RISCV::V22M2};
   6629 static const MCPhysReg ArgVRM4s[] = {RISCV::V8M4, RISCV::V12M4, RISCV::V16M4,
   6630                                      RISCV::V20M4};
   6631 static const MCPhysReg ArgVRM8s[] = {RISCV::V8M8, RISCV::V16M8};
   6632 
   6633 // Pass a 2*XLEN argument that has been split into two XLEN values through
   6634 // registers or the stack as necessary.
   6635 static bool CC_RISCVAssign2XLen(unsigned XLen, CCState &State, CCValAssign VA1,
   6636                                 ISD::ArgFlagsTy ArgFlags1, unsigned ValNo2,
   6637                                 MVT ValVT2, MVT LocVT2,
   6638                                 ISD::ArgFlagsTy ArgFlags2) {
   6639   unsigned XLenInBytes = XLen / 8;
   6640   if (Register Reg = State.AllocateReg(ArgGPRs)) {
   6641     // At least one half can be passed via register.
   6642     State.addLoc(CCValAssign::getReg(VA1.getValNo(), VA1.getValVT(), Reg,
   6643                                      VA1.getLocVT(), CCValAssign::Full));
   6644   } else {
   6645     // Both halves must be passed on the stack, with proper alignment.
   6646     Align StackAlign =
   6647         std::max(Align(XLenInBytes), ArgFlags1.getNonZeroOrigAlign());
   6648     State.addLoc(
   6649         CCValAssign::getMem(VA1.getValNo(), VA1.getValVT(),
   6650                             State.AllocateStack(XLenInBytes, StackAlign),
   6651                             VA1.getLocVT(), CCValAssign::Full));
   6652     State.addLoc(CCValAssign::getMem(
   6653         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
   6654         LocVT2, CCValAssign::Full));
   6655     return false;
   6656   }
   6657 
   6658   if (Register Reg = State.AllocateReg(ArgGPRs)) {
   6659     // The second half can also be passed via register.
   6660     State.addLoc(
   6661         CCValAssign::getReg(ValNo2, ValVT2, Reg, LocVT2, CCValAssign::Full));
   6662   } else {
   6663     // The second half is passed via the stack, without additional alignment.
   6664     State.addLoc(CCValAssign::getMem(
   6665         ValNo2, ValVT2, State.AllocateStack(XLenInBytes, Align(XLenInBytes)),
   6666         LocVT2, CCValAssign::Full));
   6667   }
   6668 
   6669   return false;
   6670 }
   6671 
   6672 // Implements the RISC-V calling convention. Returns true upon failure.
   6673 static bool CC_RISCV(const DataLayout &DL, RISCVABI::ABI ABI, unsigned ValNo,
   6674                      MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo,
   6675                      ISD::ArgFlagsTy ArgFlags, CCState &State, bool IsFixed,
   6676                      bool IsRet, Type *OrigTy, const RISCVTargetLowering &TLI,
   6677                      Optional<unsigned> FirstMaskArgument) {
   6678   unsigned XLen = DL.getLargestLegalIntTypeSizeInBits();
   6679   assert(XLen == 32 || XLen == 64);
   6680   MVT XLenVT = XLen == 32 ? MVT::i32 : MVT::i64;
   6681 
   6682   // Any return value split in to more than two values can't be returned
   6683   // directly. Vectors are returned via the available vector registers.
   6684   if (!LocVT.isVector() && IsRet && ValNo > 1)
   6685     return true;
   6686 
   6687   // UseGPRForF16_F32 if targeting one of the soft-float ABIs, if passing a
   6688   // variadic argument, or if no F16/F32 argument registers are available.
   6689   bool UseGPRForF16_F32 = true;
   6690   // UseGPRForF64 if targeting soft-float ABIs or an FLEN=32 ABI, if passing a
   6691   // variadic argument, or if no F64 argument registers are available.
   6692   bool UseGPRForF64 = true;
   6693 
   6694   switch (ABI) {
   6695   default:
   6696     llvm_unreachable("Unexpected ABI");
   6697   case RISCVABI::ABI_ILP32:
   6698   case RISCVABI::ABI_LP64:
   6699     break;
   6700   case RISCVABI::ABI_ILP32F:
   6701   case RISCVABI::ABI_LP64F:
   6702     UseGPRForF16_F32 = !IsFixed;
   6703     break;
   6704   case RISCVABI::ABI_ILP32D:
   6705   case RISCVABI::ABI_LP64D:
   6706     UseGPRForF16_F32 = !IsFixed;
   6707     UseGPRForF64 = !IsFixed;
   6708     break;
   6709   }
   6710 
   6711   // FPR16, FPR32, and FPR64 alias each other.
   6712   if (State.getFirstUnallocated(ArgFPR32s) == array_lengthof(ArgFPR32s)) {
   6713     UseGPRForF16_F32 = true;
   6714     UseGPRForF64 = true;
   6715   }
   6716 
   6717   // From this point on, rely on UseGPRForF16_F32, UseGPRForF64 and
   6718   // similar local variables rather than directly checking against the target
   6719   // ABI.
   6720 
   6721   if (UseGPRForF16_F32 && (ValVT == MVT::f16 || ValVT == MVT::f32)) {
   6722     LocVT = XLenVT;
   6723     LocInfo = CCValAssign::BCvt;
   6724   } else if (UseGPRForF64 && XLen == 64 && ValVT == MVT::f64) {
   6725     LocVT = MVT::i64;
   6726     LocInfo = CCValAssign::BCvt;
   6727   }
   6728 
   6729   // If this is a variadic argument, the RISC-V calling convention requires
   6730   // that it is assigned an 'even' or 'aligned' register if it has 8-byte
   6731   // alignment (RV32) or 16-byte alignment (RV64). An aligned register should
   6732   // be used regardless of whether the original argument was split during
   6733   // legalisation or not. The argument will not be passed by registers if the
   6734   // original type is larger than 2*XLEN, so the register alignment rule does
   6735   // not apply.
   6736   unsigned TwoXLenInBytes = (2 * XLen) / 8;
   6737   if (!IsFixed && ArgFlags.getNonZeroOrigAlign() == TwoXLenInBytes &&
   6738       DL.getTypeAllocSize(OrigTy) == TwoXLenInBytes) {
   6739     unsigned RegIdx = State.getFirstUnallocated(ArgGPRs);
   6740     // Skip 'odd' register if necessary.
   6741     if (RegIdx != array_lengthof(ArgGPRs) && RegIdx % 2 == 1)
   6742       State.AllocateReg(ArgGPRs);
   6743   }
   6744 
   6745   SmallVectorImpl<CCValAssign> &PendingLocs = State.getPendingLocs();
   6746   SmallVectorImpl<ISD::ArgFlagsTy> &PendingArgFlags =
   6747       State.getPendingArgFlags();
   6748 
   6749   assert(PendingLocs.size() == PendingArgFlags.size() &&
   6750          "PendingLocs and PendingArgFlags out of sync");
   6751 
   6752   // Handle passing f64 on RV32D with a soft float ABI or when floating point
   6753   // registers are exhausted.
   6754   if (UseGPRForF64 && XLen == 32 && ValVT == MVT::f64) {
   6755     assert(!ArgFlags.isSplit() && PendingLocs.empty() &&
   6756            "Can't lower f64 if it is split");
   6757     // Depending on available argument GPRS, f64 may be passed in a pair of
   6758     // GPRs, split between a GPR and the stack, or passed completely on the
   6759     // stack. LowerCall/LowerFormalArguments/LowerReturn must recognise these
   6760     // cases.
   6761     Register Reg = State.AllocateReg(ArgGPRs);
   6762     LocVT = MVT::i32;
   6763     if (!Reg) {
   6764       unsigned StackOffset = State.AllocateStack(8, Align(8));
   6765       State.addLoc(
   6766           CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
   6767       return false;
   6768     }
   6769     if (!State.AllocateReg(ArgGPRs))
   6770       State.AllocateStack(4, Align(4));
   6771     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   6772     return false;
   6773   }
   6774 
   6775   // Fixed-length vectors are located in the corresponding scalable-vector
   6776   // container types.
   6777   if (ValVT.isFixedLengthVector())
   6778     LocVT = TLI.getContainerForFixedLengthVector(LocVT);
   6779 
   6780   // Split arguments might be passed indirectly, so keep track of the pending
   6781   // values. Split vectors are passed via a mix of registers and indirectly, so
   6782   // treat them as we would any other argument.
   6783   if (!LocVT.isVector() && (ArgFlags.isSplit() || !PendingLocs.empty())) {
   6784     LocVT = XLenVT;
   6785     LocInfo = CCValAssign::Indirect;
   6786     PendingLocs.push_back(
   6787         CCValAssign::getPending(ValNo, ValVT, LocVT, LocInfo));
   6788     PendingArgFlags.push_back(ArgFlags);
   6789     if (!ArgFlags.isSplitEnd()) {
   6790       return false;
   6791     }
   6792   }
   6793 
   6794   // If the split argument only had two elements, it should be passed directly
   6795   // in registers or on the stack.
   6796   if (!LocVT.isVector() && ArgFlags.isSplitEnd() && PendingLocs.size() <= 2) {
   6797     assert(PendingLocs.size() == 2 && "Unexpected PendingLocs.size()");
   6798     // Apply the normal calling convention rules to the first half of the
   6799     // split argument.
   6800     CCValAssign VA = PendingLocs[0];
   6801     ISD::ArgFlagsTy AF = PendingArgFlags[0];
   6802     PendingLocs.clear();
   6803     PendingArgFlags.clear();
   6804     return CC_RISCVAssign2XLen(XLen, State, VA, AF, ValNo, ValVT, LocVT,
   6805                                ArgFlags);
   6806   }
   6807 
   6808   // Allocate to a register if possible, or else a stack slot.
   6809   Register Reg;
   6810   if (ValVT == MVT::f16 && !UseGPRForF16_F32)
   6811     Reg = State.AllocateReg(ArgFPR16s);
   6812   else if (ValVT == MVT::f32 && !UseGPRForF16_F32)
   6813     Reg = State.AllocateReg(ArgFPR32s);
   6814   else if (ValVT == MVT::f64 && !UseGPRForF64)
   6815     Reg = State.AllocateReg(ArgFPR64s);
   6816   else if (ValVT.isVector()) {
   6817     const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
   6818     if (RC == &RISCV::VRRegClass) {
   6819       // Assign the first mask argument to V0.
   6820       // This is an interim calling convention and it may be changed in the
   6821       // future.
   6822       if (FirstMaskArgument.hasValue() &&
   6823           ValNo == FirstMaskArgument.getValue()) {
   6824         Reg = State.AllocateReg(RISCV::V0);
   6825       } else {
   6826         Reg = State.AllocateReg(ArgVRs);
   6827       }
   6828     } else if (RC == &RISCV::VRM2RegClass) {
   6829       Reg = State.AllocateReg(ArgVRM2s);
   6830     } else if (RC == &RISCV::VRM4RegClass) {
   6831       Reg = State.AllocateReg(ArgVRM4s);
   6832     } else if (RC == &RISCV::VRM8RegClass) {
   6833       Reg = State.AllocateReg(ArgVRM8s);
   6834     } else {
   6835       llvm_unreachable("Unhandled class register for ValueType");
   6836     }
   6837     if (!Reg) {
   6838       // For return values, the vector must be passed fully via registers or
   6839       // via the stack.
   6840       // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
   6841       // but we're using all of them.
   6842       if (IsRet)
   6843         return true;
   6844       LocInfo = CCValAssign::Indirect;
   6845       // Try using a GPR to pass the address
   6846       Reg = State.AllocateReg(ArgGPRs);
   6847       LocVT = XLenVT;
   6848     }
   6849   } else
   6850     Reg = State.AllocateReg(ArgGPRs);
   6851   unsigned StackOffset =
   6852       Reg ? 0 : State.AllocateStack(XLen / 8, Align(XLen / 8));
   6853 
   6854   // If we reach this point and PendingLocs is non-empty, we must be at the
   6855   // end of a split argument that must be passed indirectly.
   6856   if (!PendingLocs.empty()) {
   6857     assert(ArgFlags.isSplitEnd() && "Expected ArgFlags.isSplitEnd()");
   6858     assert(PendingLocs.size() > 2 && "Unexpected PendingLocs.size()");
   6859 
   6860     for (auto &It : PendingLocs) {
   6861       if (Reg)
   6862         It.convertToReg(Reg);
   6863       else
   6864         It.convertToMem(StackOffset);
   6865       State.addLoc(It);
   6866     }
   6867     PendingLocs.clear();
   6868     PendingArgFlags.clear();
   6869     return false;
   6870   }
   6871 
   6872   assert((!UseGPRForF16_F32 || !UseGPRForF64 || LocVT == XLenVT ||
   6873           (TLI.getSubtarget().hasStdExtV() && ValVT.isVector())) &&
   6874          "Expected an XLenVT or vector types at this stage");
   6875 
   6876   if (Reg) {
   6877     State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   6878     return false;
   6879   }
   6880 
   6881   // When a floating-point value is passed on the stack, no bit-conversion is
   6882   // needed.
   6883   if (ValVT.isFloatingPoint()) {
   6884     LocVT = ValVT;
   6885     LocInfo = CCValAssign::Full;
   6886   }
   6887   State.addLoc(CCValAssign::getMem(ValNo, ValVT, StackOffset, LocVT, LocInfo));
   6888   return false;
   6889 }
   6890 
   6891 template <typename ArgTy>
   6892 static Optional<unsigned> preAssignMask(const ArgTy &Args) {
   6893   for (const auto &ArgIdx : enumerate(Args)) {
   6894     MVT ArgVT = ArgIdx.value().VT;
   6895     if (ArgVT.isVector() && ArgVT.getVectorElementType() == MVT::i1)
   6896       return ArgIdx.index();
   6897   }
   6898   return None;
   6899 }
   6900 
   6901 void RISCVTargetLowering::analyzeInputArgs(
   6902     MachineFunction &MF, CCState &CCInfo,
   6903     const SmallVectorImpl<ISD::InputArg> &Ins, bool IsRet) const {
   6904   unsigned NumArgs = Ins.size();
   6905   FunctionType *FType = MF.getFunction().getFunctionType();
   6906 
   6907   Optional<unsigned> FirstMaskArgument;
   6908   if (Subtarget.hasStdExtV())
   6909     FirstMaskArgument = preAssignMask(Ins);
   6910 
   6911   for (unsigned i = 0; i != NumArgs; ++i) {
   6912     MVT ArgVT = Ins[i].VT;
   6913     ISD::ArgFlagsTy ArgFlags = Ins[i].Flags;
   6914 
   6915     Type *ArgTy = nullptr;
   6916     if (IsRet)
   6917       ArgTy = FType->getReturnType();
   6918     else if (Ins[i].isOrigArg())
   6919       ArgTy = FType->getParamType(Ins[i].getOrigArgIndex());
   6920 
   6921     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
   6922     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
   6923                  ArgFlags, CCInfo, /*IsFixed=*/true, IsRet, ArgTy, *this,
   6924                  FirstMaskArgument)) {
   6925       LLVM_DEBUG(dbgs() << "InputArg #" << i << " has unhandled type "
   6926                         << EVT(ArgVT).getEVTString() << '\n');
   6927       llvm_unreachable(nullptr);
   6928     }
   6929   }
   6930 }
   6931 
   6932 void RISCVTargetLowering::analyzeOutputArgs(
   6933     MachineFunction &MF, CCState &CCInfo,
   6934     const SmallVectorImpl<ISD::OutputArg> &Outs, bool IsRet,
   6935     CallLoweringInfo *CLI) const {
   6936   unsigned NumArgs = Outs.size();
   6937 
   6938   Optional<unsigned> FirstMaskArgument;
   6939   if (Subtarget.hasStdExtV())
   6940     FirstMaskArgument = preAssignMask(Outs);
   6941 
   6942   for (unsigned i = 0; i != NumArgs; i++) {
   6943     MVT ArgVT = Outs[i].VT;
   6944     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
   6945     Type *OrigTy = CLI ? CLI->getArgs()[Outs[i].OrigArgIndex].Ty : nullptr;
   6946 
   6947     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
   6948     if (CC_RISCV(MF.getDataLayout(), ABI, i, ArgVT, ArgVT, CCValAssign::Full,
   6949                  ArgFlags, CCInfo, Outs[i].IsFixed, IsRet, OrigTy, *this,
   6950                  FirstMaskArgument)) {
   6951       LLVM_DEBUG(dbgs() << "OutputArg #" << i << " has unhandled type "
   6952                         << EVT(ArgVT).getEVTString() << "\n");
   6953       llvm_unreachable(nullptr);
   6954     }
   6955   }
   6956 }
   6957 
   6958 // Convert Val to a ValVT. Should not be called for CCValAssign::Indirect
   6959 // values.
   6960 static SDValue convertLocVTToValVT(SelectionDAG &DAG, SDValue Val,
   6961                                    const CCValAssign &VA, const SDLoc &DL,
   6962                                    const RISCVSubtarget &Subtarget) {
   6963   switch (VA.getLocInfo()) {
   6964   default:
   6965     llvm_unreachable("Unexpected CCValAssign::LocInfo");
   6966   case CCValAssign::Full:
   6967     if (VA.getValVT().isFixedLengthVector() && VA.getLocVT().isScalableVector())
   6968       Val = convertFromScalableVector(VA.getValVT(), Val, DAG, Subtarget);
   6969     break;
   6970   case CCValAssign::BCvt:
   6971     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
   6972       Val = DAG.getNode(RISCVISD::FMV_H_X, DL, MVT::f16, Val);
   6973     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
   6974       Val = DAG.getNode(RISCVISD::FMV_W_X_RV64, DL, MVT::f32, Val);
   6975     else
   6976       Val = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), Val);
   6977     break;
   6978   }
   6979   return Val;
   6980 }
   6981 
   6982 // The caller is responsible for loading the full value if the argument is
   6983 // passed with CCValAssign::Indirect.
   6984 static SDValue unpackFromRegLoc(SelectionDAG &DAG, SDValue Chain,
   6985                                 const CCValAssign &VA, const SDLoc &DL,
   6986                                 const RISCVTargetLowering &TLI) {
   6987   MachineFunction &MF = DAG.getMachineFunction();
   6988   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   6989   EVT LocVT = VA.getLocVT();
   6990   SDValue Val;
   6991   const TargetRegisterClass *RC = TLI.getRegClassFor(LocVT.getSimpleVT());
   6992   Register VReg = RegInfo.createVirtualRegister(RC);
   6993   RegInfo.addLiveIn(VA.getLocReg(), VReg);
   6994   Val = DAG.getCopyFromReg(Chain, DL, VReg, LocVT);
   6995 
   6996   if (VA.getLocInfo() == CCValAssign::Indirect)
   6997     return Val;
   6998 
   6999   return convertLocVTToValVT(DAG, Val, VA, DL, TLI.getSubtarget());
   7000 }
   7001 
   7002 static SDValue convertValVTToLocVT(SelectionDAG &DAG, SDValue Val,
   7003                                    const CCValAssign &VA, const SDLoc &DL,
   7004                                    const RISCVSubtarget &Subtarget) {
   7005   EVT LocVT = VA.getLocVT();
   7006 
   7007   switch (VA.getLocInfo()) {
   7008   default:
   7009     llvm_unreachable("Unexpected CCValAssign::LocInfo");
   7010   case CCValAssign::Full:
   7011     if (VA.getValVT().isFixedLengthVector() && LocVT.isScalableVector())
   7012       Val = convertToScalableVector(LocVT, Val, DAG, Subtarget);
   7013     break;
   7014   case CCValAssign::BCvt:
   7015     if (VA.getLocVT().isInteger() && VA.getValVT() == MVT::f16)
   7016       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTH, DL, VA.getLocVT(), Val);
   7017     else if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
   7018       Val = DAG.getNode(RISCVISD::FMV_X_ANYEXTW_RV64, DL, MVT::i64, Val);
   7019     else
   7020       Val = DAG.getNode(ISD::BITCAST, DL, LocVT, Val);
   7021     break;
   7022   }
   7023   return Val;
   7024 }
   7025 
   7026 // The caller is responsible for loading the full value if the argument is
   7027 // passed with CCValAssign::Indirect.
   7028 static SDValue unpackFromMemLoc(SelectionDAG &DAG, SDValue Chain,
   7029                                 const CCValAssign &VA, const SDLoc &DL) {
   7030   MachineFunction &MF = DAG.getMachineFunction();
   7031   MachineFrameInfo &MFI = MF.getFrameInfo();
   7032   EVT LocVT = VA.getLocVT();
   7033   EVT ValVT = VA.getValVT();
   7034   EVT PtrVT = MVT::getIntegerVT(DAG.getDataLayout().getPointerSizeInBits(0));
   7035   int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
   7036                                  VA.getLocMemOffset(), /*Immutable=*/true);
   7037   SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
   7038   SDValue Val;
   7039 
   7040   ISD::LoadExtType ExtType;
   7041   switch (VA.getLocInfo()) {
   7042   default:
   7043     llvm_unreachable("Unexpected CCValAssign::LocInfo");
   7044   case CCValAssign::Full:
   7045   case CCValAssign::Indirect:
   7046   case CCValAssign::BCvt:
   7047     ExtType = ISD::NON_EXTLOAD;
   7048     break;
   7049   }
   7050   Val = DAG.getExtLoad(
   7051       ExtType, DL, LocVT, Chain, FIN,
   7052       MachinePointerInfo::getFixedStack(DAG.getMachineFunction(), FI), ValVT);
   7053   return Val;
   7054 }
   7055 
   7056 static SDValue unpackF64OnRV32DSoftABI(SelectionDAG &DAG, SDValue Chain,
   7057                                        const CCValAssign &VA, const SDLoc &DL) {
   7058   assert(VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64 &&
   7059          "Unexpected VA");
   7060   MachineFunction &MF = DAG.getMachineFunction();
   7061   MachineFrameInfo &MFI = MF.getFrameInfo();
   7062   MachineRegisterInfo &RegInfo = MF.getRegInfo();
   7063 
   7064   if (VA.isMemLoc()) {
   7065     // f64 is passed on the stack.
   7066     int FI = MFI.CreateFixedObject(8, VA.getLocMemOffset(), /*Immutable=*/true);
   7067     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
   7068     return DAG.getLoad(MVT::f64, DL, Chain, FIN,
   7069                        MachinePointerInfo::getFixedStack(MF, FI));
   7070   }
   7071 
   7072   assert(VA.isRegLoc() && "Expected register VA assignment");
   7073 
   7074   Register LoVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
   7075   RegInfo.addLiveIn(VA.getLocReg(), LoVReg);
   7076   SDValue Lo = DAG.getCopyFromReg(Chain, DL, LoVReg, MVT::i32);
   7077   SDValue Hi;
   7078   if (VA.getLocReg() == RISCV::X17) {
   7079     // Second half of f64 is passed on the stack.
   7080     int FI = MFI.CreateFixedObject(4, 0, /*Immutable=*/true);
   7081     SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
   7082     Hi = DAG.getLoad(MVT::i32, DL, Chain, FIN,
   7083                      MachinePointerInfo::getFixedStack(MF, FI));
   7084   } else {
   7085     // Second half of f64 is passed in another GPR.
   7086     Register HiVReg = RegInfo.createVirtualRegister(&RISCV::GPRRegClass);
   7087     RegInfo.addLiveIn(VA.getLocReg() + 1, HiVReg);
   7088     Hi = DAG.getCopyFromReg(Chain, DL, HiVReg, MVT::i32);
   7089   }
   7090   return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
   7091 }
   7092 
   7093 // FastCC has less than 1% performance improvement for some particular
   7094 // benchmark. But theoretically, it may has benenfit for some cases.
   7095 static bool CC_RISCV_FastCC(unsigned ValNo, MVT ValVT, MVT LocVT,
   7096                             CCValAssign::LocInfo LocInfo,
   7097                             ISD::ArgFlagsTy ArgFlags, CCState &State) {
   7098 
   7099   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
   7100     // X5 and X6 might be used for save-restore libcall.
   7101     static const MCPhysReg GPRList[] = {
   7102         RISCV::X10, RISCV::X11, RISCV::X12, RISCV::X13, RISCV::X14,
   7103         RISCV::X15, RISCV::X16, RISCV::X17, RISCV::X7,  RISCV::X28,
   7104         RISCV::X29, RISCV::X30, RISCV::X31};
   7105     if (unsigned Reg = State.AllocateReg(GPRList)) {
   7106       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   7107       return false;
   7108     }
   7109   }
   7110 
   7111   if (LocVT == MVT::f16) {
   7112     static const MCPhysReg FPR16List[] = {
   7113         RISCV::F10_H, RISCV::F11_H, RISCV::F12_H, RISCV::F13_H, RISCV::F14_H,
   7114         RISCV::F15_H, RISCV::F16_H, RISCV::F17_H, RISCV::F0_H,  RISCV::F1_H,
   7115         RISCV::F2_H,  RISCV::F3_H,  RISCV::F4_H,  RISCV::F5_H,  RISCV::F6_H,
   7116         RISCV::F7_H,  RISCV::F28_H, RISCV::F29_H, RISCV::F30_H, RISCV::F31_H};
   7117     if (unsigned Reg = State.AllocateReg(FPR16List)) {
   7118       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   7119       return false;
   7120     }
   7121   }
   7122 
   7123   if (LocVT == MVT::f32) {
   7124     static const MCPhysReg FPR32List[] = {
   7125         RISCV::F10_F, RISCV::F11_F, RISCV::F12_F, RISCV::F13_F, RISCV::F14_F,
   7126         RISCV::F15_F, RISCV::F16_F, RISCV::F17_F, RISCV::F0_F,  RISCV::F1_F,
   7127         RISCV::F2_F,  RISCV::F3_F,  RISCV::F4_F,  RISCV::F5_F,  RISCV::F6_F,
   7128         RISCV::F7_F,  RISCV::F28_F, RISCV::F29_F, RISCV::F30_F, RISCV::F31_F};
   7129     if (unsigned Reg = State.AllocateReg(FPR32List)) {
   7130       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   7131       return false;
   7132     }
   7133   }
   7134 
   7135   if (LocVT == MVT::f64) {
   7136     static const MCPhysReg FPR64List[] = {
   7137         RISCV::F10_D, RISCV::F11_D, RISCV::F12_D, RISCV::F13_D, RISCV::F14_D,
   7138         RISCV::F15_D, RISCV::F16_D, RISCV::F17_D, RISCV::F0_D,  RISCV::F1_D,
   7139         RISCV::F2_D,  RISCV::F3_D,  RISCV::F4_D,  RISCV::F5_D,  RISCV::F6_D,
   7140         RISCV::F7_D,  RISCV::F28_D, RISCV::F29_D, RISCV::F30_D, RISCV::F31_D};
   7141     if (unsigned Reg = State.AllocateReg(FPR64List)) {
   7142       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   7143       return false;
   7144     }
   7145   }
   7146 
   7147   if (LocVT == MVT::i32 || LocVT == MVT::f32) {
   7148     unsigned Offset4 = State.AllocateStack(4, Align(4));
   7149     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset4, LocVT, LocInfo));
   7150     return false;
   7151   }
   7152 
   7153   if (LocVT == MVT::i64 || LocVT == MVT::f64) {
   7154     unsigned Offset5 = State.AllocateStack(8, Align(8));
   7155     State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset5, LocVT, LocInfo));
   7156     return false;
   7157   }
   7158 
   7159   return true; // CC didn't match.
   7160 }
   7161 
   7162 static bool CC_RISCV_GHC(unsigned ValNo, MVT ValVT, MVT LocVT,
   7163                          CCValAssign::LocInfo LocInfo,
   7164                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
   7165 
   7166   if (LocVT == MVT::i32 || LocVT == MVT::i64) {
   7167     // Pass in STG registers: Base, Sp, Hp, R1, R2, R3, R4, R5, R6, R7, SpLim
   7168     //                        s1    s2  s3  s4  s5  s6  s7  s8  s9  s10 s11
   7169     static const MCPhysReg GPRList[] = {
   7170         RISCV::X9, RISCV::X18, RISCV::X19, RISCV::X20, RISCV::X21, RISCV::X22,
   7171         RISCV::X23, RISCV::X24, RISCV::X25, RISCV::X26, RISCV::X27};
   7172     if (unsigned Reg = State.AllocateReg(GPRList)) {
   7173       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   7174       return false;
   7175     }
   7176   }
   7177 
   7178   if (LocVT == MVT::f32) {
   7179     // Pass in STG registers: F1, ..., F6
   7180     //                        fs0 ... fs5
   7181     static const MCPhysReg FPR32List[] = {RISCV::F8_F, RISCV::F9_F,
   7182                                           RISCV::F18_F, RISCV::F19_F,
   7183                                           RISCV::F20_F, RISCV::F21_F};
   7184     if (unsigned Reg = State.AllocateReg(FPR32List)) {
   7185       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   7186       return false;
   7187     }
   7188   }
   7189 
   7190   if (LocVT == MVT::f64) {
   7191     // Pass in STG registers: D1, ..., D6
   7192     //                        fs6 ... fs11
   7193     static const MCPhysReg FPR64List[] = {RISCV::F22_D, RISCV::F23_D,
   7194                                           RISCV::F24_D, RISCV::F25_D,
   7195                                           RISCV::F26_D, RISCV::F27_D};
   7196     if (unsigned Reg = State.AllocateReg(FPR64List)) {
   7197       State.addLoc(CCValAssign::getReg(ValNo, ValVT, Reg, LocVT, LocInfo));
   7198       return false;
   7199     }
   7200   }
   7201 
   7202   report_fatal_error("No registers left in GHC calling convention");
   7203   return true;
   7204 }
   7205 
   7206 // Transform physical registers into virtual registers.
   7207 SDValue RISCVTargetLowering::LowerFormalArguments(
   7208     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
   7209     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
   7210     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   7211 
   7212   MachineFunction &MF = DAG.getMachineFunction();
   7213 
   7214   switch (CallConv) {
   7215   default:
   7216     report_fatal_error("Unsupported calling convention");
   7217   case CallingConv::C:
   7218   case CallingConv::Fast:
   7219     break;
   7220   case CallingConv::GHC:
   7221     if (!MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtF] ||
   7222         !MF.getSubtarget().getFeatureBits()[RISCV::FeatureStdExtD])
   7223       report_fatal_error(
   7224         "GHC calling convention requires the F and D instruction set extensions");
   7225   }
   7226 
   7227   const Function &Func = MF.getFunction();
   7228   if (Func.hasFnAttribute("interrupt")) {
   7229     if (!Func.arg_empty())
   7230       report_fatal_error(
   7231         "Functions with the interrupt attribute cannot have arguments!");
   7232 
   7233     StringRef Kind =
   7234       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
   7235 
   7236     if (!(Kind == "user" || Kind == "supervisor" || Kind == "machine"))
   7237       report_fatal_error(
   7238         "Function interrupt attribute argument not supported!");
   7239   }
   7240 
   7241   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   7242   MVT XLenVT = Subtarget.getXLenVT();
   7243   unsigned XLenInBytes = Subtarget.getXLen() / 8;
   7244   // Used with vargs to acumulate store chains.
   7245   std::vector<SDValue> OutChains;
   7246 
   7247   // Assign locations to all of the incoming arguments.
   7248   SmallVector<CCValAssign, 16> ArgLocs;
   7249   CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
   7250 
   7251   if (CallConv == CallingConv::Fast)
   7252     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_FastCC);
   7253   else if (CallConv == CallingConv::GHC)
   7254     CCInfo.AnalyzeFormalArguments(Ins, CC_RISCV_GHC);
   7255   else
   7256     analyzeInputArgs(MF, CCInfo, Ins, /*IsRet=*/false);
   7257 
   7258   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
   7259     CCValAssign &VA = ArgLocs[i];
   7260     SDValue ArgValue;
   7261     // Passing f64 on RV32D with a soft float ABI must be handled as a special
   7262     // case.
   7263     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64)
   7264       ArgValue = unpackF64OnRV32DSoftABI(DAG, Chain, VA, DL);
   7265     else if (VA.isRegLoc())
   7266       ArgValue = unpackFromRegLoc(DAG, Chain, VA, DL, *this);
   7267     else
   7268       ArgValue = unpackFromMemLoc(DAG, Chain, VA, DL);
   7269 
   7270     if (VA.getLocInfo() == CCValAssign::Indirect) {
   7271       // If the original argument was split and passed by reference (e.g. i128
   7272       // on RV32), we need to load all parts of it here (using the same
   7273       // address). Vectors may be partly split to registers and partly to the
   7274       // stack, in which case the base address is partly offset and subsequent
   7275       // stores are relative to that.
   7276       InVals.push_back(DAG.getLoad(VA.getValVT(), DL, Chain, ArgValue,
   7277                                    MachinePointerInfo()));
   7278       unsigned ArgIndex = Ins[i].OrigArgIndex;
   7279       unsigned ArgPartOffset = Ins[i].PartOffset;
   7280       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
   7281       while (i + 1 != e && Ins[i + 1].OrigArgIndex == ArgIndex) {
   7282         CCValAssign &PartVA = ArgLocs[i + 1];
   7283         unsigned PartOffset = Ins[i + 1].PartOffset - ArgPartOffset;
   7284         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, ArgValue,
   7285                                       DAG.getIntPtrConstant(PartOffset, DL));
   7286         InVals.push_back(DAG.getLoad(PartVA.getValVT(), DL, Chain, Address,
   7287                                      MachinePointerInfo()));
   7288         ++i;
   7289       }
   7290       continue;
   7291     }
   7292     InVals.push_back(ArgValue);
   7293   }
   7294 
   7295   if (IsVarArg) {
   7296     ArrayRef<MCPhysReg> ArgRegs = makeArrayRef(ArgGPRs);
   7297     unsigned Idx = CCInfo.getFirstUnallocated(ArgRegs);
   7298     const TargetRegisterClass *RC = &RISCV::GPRRegClass;
   7299     MachineFrameInfo &MFI = MF.getFrameInfo();
   7300     MachineRegisterInfo &RegInfo = MF.getRegInfo();
   7301     RISCVMachineFunctionInfo *RVFI = MF.getInfo<RISCVMachineFunctionInfo>();
   7302 
   7303     // Offset of the first variable argument from stack pointer, and size of
   7304     // the vararg save area. For now, the varargs save area is either zero or
   7305     // large enough to hold a0-a7.
   7306     int VaArgOffset, VarArgsSaveSize;
   7307 
   7308     // If all registers are allocated, then all varargs must be passed on the
   7309     // stack and we don't need to save any argregs.
   7310     if (ArgRegs.size() == Idx) {
   7311       VaArgOffset = CCInfo.getNextStackOffset();
   7312       VarArgsSaveSize = 0;
   7313     } else {
   7314       VarArgsSaveSize = XLenInBytes * (ArgRegs.size() - Idx);
   7315       VaArgOffset = -VarArgsSaveSize;
   7316     }
   7317 
   7318     // Record the frame index of the first variable argument
   7319     // which is a value necessary to VASTART.
   7320     int FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
   7321     RVFI->setVarArgsFrameIndex(FI);
   7322 
   7323     // If saving an odd number of registers then create an extra stack slot to
   7324     // ensure that the frame pointer is 2*XLEN-aligned, which in turn ensures
   7325     // offsets to even-numbered registered remain 2*XLEN-aligned.
   7326     if (Idx % 2) {
   7327       MFI.CreateFixedObject(XLenInBytes, VaArgOffset - (int)XLenInBytes, true);
   7328       VarArgsSaveSize += XLenInBytes;
   7329     }
   7330 
   7331     // Copy the integer registers that may have been used for passing varargs
   7332     // to the vararg save area.
   7333     for (unsigned I = Idx; I < ArgRegs.size();
   7334          ++I, VaArgOffset += XLenInBytes) {
   7335       const Register Reg = RegInfo.createVirtualRegister(RC);
   7336       RegInfo.addLiveIn(ArgRegs[I], Reg);
   7337       SDValue ArgValue = DAG.getCopyFromReg(Chain, DL, Reg, XLenVT);
   7338       FI = MFI.CreateFixedObject(XLenInBytes, VaArgOffset, true);
   7339       SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
   7340       SDValue Store = DAG.getStore(Chain, DL, ArgValue, PtrOff,
   7341                                    MachinePointerInfo::getFixedStack(MF, FI));
   7342       cast<StoreSDNode>(Store.getNode())
   7343           ->getMemOperand()
   7344           ->setValue((Value *)nullptr);
   7345       OutChains.push_back(Store);
   7346     }
   7347     RVFI->setVarArgsSaveSize(VarArgsSaveSize);
   7348   }
   7349 
   7350   // All stores are grouped in one node to allow the matching between
   7351   // the size of Ins and InVals. This only happens for vararg functions.
   7352   if (!OutChains.empty()) {
   7353     OutChains.push_back(Chain);
   7354     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
   7355   }
   7356 
   7357   return Chain;
   7358 }
   7359 
   7360 /// isEligibleForTailCallOptimization - Check whether the call is eligible
   7361 /// for tail call optimization.
   7362 /// Note: This is modelled after ARM's IsEligibleForTailCallOptimization.
   7363 bool RISCVTargetLowering::isEligibleForTailCallOptimization(
   7364     CCState &CCInfo, CallLoweringInfo &CLI, MachineFunction &MF,
   7365     const SmallVector<CCValAssign, 16> &ArgLocs) const {
   7366 
   7367   auto &Callee = CLI.Callee;
   7368   auto CalleeCC = CLI.CallConv;
   7369   auto &Outs = CLI.Outs;
   7370   auto &Caller = MF.getFunction();
   7371   auto CallerCC = Caller.getCallingConv();
   7372 
   7373   // Exception-handling functions need a special set of instructions to
   7374   // indicate a return to the hardware. Tail-calling another function would
   7375   // probably break this.
   7376   // TODO: The "interrupt" attribute isn't currently defined by RISC-V. This
   7377   // should be expanded as new function attributes are introduced.
   7378   if (Caller.hasFnAttribute("interrupt"))
   7379     return false;
   7380 
   7381   // Do not tail call opt if the stack is used to pass parameters.
   7382   if (CCInfo.getNextStackOffset() != 0)
   7383     return false;
   7384 
   7385   // Do not tail call opt if any parameters need to be passed indirectly.
   7386   // Since long doubles (fp128) and i128 are larger than 2*XLEN, they are
   7387   // passed indirectly. So the address of the value will be passed in a
   7388   // register, or if not available, then the address is put on the stack. In
   7389   // order to pass indirectly, space on the stack often needs to be allocated
   7390   // in order to store the value. In this case the CCInfo.getNextStackOffset()
   7391   // != 0 check is not enough and we need to check if any CCValAssign ArgsLocs
   7392   // are passed CCValAssign::Indirect.
   7393   for (auto &VA : ArgLocs)
   7394     if (VA.getLocInfo() == CCValAssign::Indirect)
   7395       return false;
   7396 
   7397   // Do not tail call opt if either caller or callee uses struct return
   7398   // semantics.
   7399   auto IsCallerStructRet = Caller.hasStructRetAttr();
   7400   auto IsCalleeStructRet = Outs.empty() ? false : Outs[0].Flags.isSRet();
   7401   if (IsCallerStructRet || IsCalleeStructRet)
   7402     return false;
   7403 
   7404   // Externally-defined functions with weak linkage should not be
   7405   // tail-called. The behaviour of branch instructions in this situation (as
   7406   // used for tail calls) is implementation-defined, so we cannot rely on the
   7407   // linker replacing the tail call with a return.
   7408   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
   7409     const GlobalValue *GV = G->getGlobal();
   7410     if (GV->hasExternalWeakLinkage())
   7411       return false;
   7412   }
   7413 
   7414   // The callee has to preserve all registers the caller needs to preserve.
   7415   const RISCVRegisterInfo *TRI = Subtarget.getRegisterInfo();
   7416   const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
   7417   if (CalleeCC != CallerCC) {
   7418     const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
   7419     if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
   7420       return false;
   7421   }
   7422 
   7423   // Byval parameters hand the function a pointer directly into the stack area
   7424   // we want to reuse during a tail call. Working around this *is* possible
   7425   // but less efficient and uglier in LowerCall.
   7426   for (auto &Arg : Outs)
   7427     if (Arg.Flags.isByVal())
   7428       return false;
   7429 
   7430   return true;
   7431 }
   7432 
   7433 static Align getPrefTypeAlign(EVT VT, SelectionDAG &DAG) {
   7434   return DAG.getDataLayout().getPrefTypeAlign(
   7435       VT.getTypeForEVT(*DAG.getContext()));
   7436 }
   7437 
   7438 // Lower a call to a callseq_start + CALL + callseq_end chain, and add input
   7439 // and output parameter nodes.
   7440 SDValue RISCVTargetLowering::LowerCall(CallLoweringInfo &CLI,
   7441                                        SmallVectorImpl<SDValue> &InVals) const {
   7442   SelectionDAG &DAG = CLI.DAG;
   7443   SDLoc &DL = CLI.DL;
   7444   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
   7445   SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
   7446   SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
   7447   SDValue Chain = CLI.Chain;
   7448   SDValue Callee = CLI.Callee;
   7449   bool &IsTailCall = CLI.IsTailCall;
   7450   CallingConv::ID CallConv = CLI.CallConv;
   7451   bool IsVarArg = CLI.IsVarArg;
   7452   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   7453   MVT XLenVT = Subtarget.getXLenVT();
   7454 
   7455   MachineFunction &MF = DAG.getMachineFunction();
   7456 
   7457   // Analyze the operands of the call, assigning locations to each operand.
   7458   SmallVector<CCValAssign, 16> ArgLocs;
   7459   CCState ArgCCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
   7460 
   7461   if (CallConv == CallingConv::Fast)
   7462     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_FastCC);
   7463   else if (CallConv == CallingConv::GHC)
   7464     ArgCCInfo.AnalyzeCallOperands(Outs, CC_RISCV_GHC);
   7465   else
   7466     analyzeOutputArgs(MF, ArgCCInfo, Outs, /*IsRet=*/false, &CLI);
   7467 
   7468   // Check if it's really possible to do a tail call.
   7469   if (IsTailCall)
   7470     IsTailCall = isEligibleForTailCallOptimization(ArgCCInfo, CLI, MF, ArgLocs);
   7471 
   7472   if (IsTailCall)
   7473     ++NumTailCalls;
   7474   else if (CLI.CB && CLI.CB->isMustTailCall())
   7475     report_fatal_error("failed to perform tail call elimination on a call "
   7476                        "site marked musttail");
   7477 
   7478   // Get a count of how many bytes are to be pushed on the stack.
   7479   unsigned NumBytes = ArgCCInfo.getNextStackOffset();
   7480 
   7481   // Create local copies for byval args
   7482   SmallVector<SDValue, 8> ByValArgs;
   7483   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
   7484     ISD::ArgFlagsTy Flags = Outs[i].Flags;
   7485     if (!Flags.isByVal())
   7486       continue;
   7487 
   7488     SDValue Arg = OutVals[i];
   7489     unsigned Size = Flags.getByValSize();
   7490     Align Alignment = Flags.getNonZeroByValAlign();
   7491 
   7492     int FI =
   7493         MF.getFrameInfo().CreateStackObject(Size, Alignment, /*isSS=*/false);
   7494     SDValue FIPtr = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
   7495     SDValue SizeNode = DAG.getConstant(Size, DL, XLenVT);
   7496 
   7497     Chain = DAG.getMemcpy(Chain, DL, FIPtr, Arg, SizeNode, Alignment,
   7498                           /*IsVolatile=*/false,
   7499                           /*AlwaysInline=*/false, IsTailCall,
   7500                           MachinePointerInfo(), MachinePointerInfo());
   7501     ByValArgs.push_back(FIPtr);
   7502   }
   7503 
   7504   if (!IsTailCall)
   7505     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, CLI.DL);
   7506 
   7507   // Copy argument values to their designated locations.
   7508   SmallVector<std::pair<Register, SDValue>, 8> RegsToPass;
   7509   SmallVector<SDValue, 8> MemOpChains;
   7510   SDValue StackPtr;
   7511   for (unsigned i = 0, j = 0, e = ArgLocs.size(); i != e; ++i) {
   7512     CCValAssign &VA = ArgLocs[i];
   7513     SDValue ArgValue = OutVals[i];
   7514     ISD::ArgFlagsTy Flags = Outs[i].Flags;
   7515 
   7516     // Handle passing f64 on RV32D with a soft float ABI as a special case.
   7517     bool IsF64OnRV32DSoftABI =
   7518         VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64;
   7519     if (IsF64OnRV32DSoftABI && VA.isRegLoc()) {
   7520       SDValue SplitF64 = DAG.getNode(
   7521           RISCVISD::SplitF64, DL, DAG.getVTList(MVT::i32, MVT::i32), ArgValue);
   7522       SDValue Lo = SplitF64.getValue(0);
   7523       SDValue Hi = SplitF64.getValue(1);
   7524 
   7525       Register RegLo = VA.getLocReg();
   7526       RegsToPass.push_back(std::make_pair(RegLo, Lo));
   7527 
   7528       if (RegLo == RISCV::X17) {
   7529         // Second half of f64 is passed on the stack.
   7530         // Work out the address of the stack slot.
   7531         if (!StackPtr.getNode())
   7532           StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
   7533         // Emit the store.
   7534         MemOpChains.push_back(
   7535             DAG.getStore(Chain, DL, Hi, StackPtr, MachinePointerInfo()));
   7536       } else {
   7537         // Second half of f64 is passed in another GPR.
   7538         assert(RegLo < RISCV::X31 && "Invalid register pair");
   7539         Register RegHigh = RegLo + 1;
   7540         RegsToPass.push_back(std::make_pair(RegHigh, Hi));
   7541       }
   7542       continue;
   7543     }
   7544 
   7545     // IsF64OnRV32DSoftABI && VA.isMemLoc() is handled below in the same way
   7546     // as any other MemLoc.
   7547 
   7548     // Promote the value if needed.
   7549     // For now, only handle fully promoted and indirect arguments.
   7550     if (VA.getLocInfo() == CCValAssign::Indirect) {
   7551       // Store the argument in a stack slot and pass its address.
   7552       Align StackAlign =
   7553           std::max(getPrefTypeAlign(Outs[i].ArgVT, DAG),
   7554                    getPrefTypeAlign(ArgValue.getValueType(), DAG));
   7555       TypeSize StoredSize = ArgValue.getValueType().getStoreSize();
   7556       // If the original argument was split (e.g. i128), we need
   7557       // to store the required parts of it here (and pass just one address).
   7558       // Vectors may be partly split to registers and partly to the stack, in
   7559       // which case the base address is partly offset and subsequent stores are
   7560       // relative to that.
   7561       unsigned ArgIndex = Outs[i].OrigArgIndex;
   7562       unsigned ArgPartOffset = Outs[i].PartOffset;
   7563       assert(VA.getValVT().isVector() || ArgPartOffset == 0);
   7564       // Calculate the total size to store. We don't have access to what we're
   7565       // actually storing other than performing the loop and collecting the
   7566       // info.
   7567       SmallVector<std::pair<SDValue, unsigned>> Parts;
   7568       while (i + 1 != e && Outs[i + 1].OrigArgIndex == ArgIndex) {
   7569         SDValue PartValue = OutVals[i + 1];
   7570         unsigned PartOffset = Outs[i + 1].PartOffset - ArgPartOffset;
   7571         EVT PartVT = PartValue.getValueType();
   7572         StoredSize += PartVT.getStoreSize();
   7573         StackAlign = std::max(StackAlign, getPrefTypeAlign(PartVT, DAG));
   7574         Parts.push_back(std::make_pair(PartValue, PartOffset));
   7575         ++i;
   7576       }
   7577       SDValue SpillSlot = DAG.CreateStackTemporary(StoredSize, StackAlign);
   7578       int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
   7579       MemOpChains.push_back(
   7580           DAG.getStore(Chain, DL, ArgValue, SpillSlot,
   7581                        MachinePointerInfo::getFixedStack(MF, FI)));
   7582       for (const auto &Part : Parts) {
   7583         SDValue PartValue = Part.first;
   7584         unsigned PartOffset = Part.second;
   7585         SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, SpillSlot,
   7586                                       DAG.getIntPtrConstant(PartOffset, DL));
   7587         MemOpChains.push_back(
   7588             DAG.getStore(Chain, DL, PartValue, Address,
   7589                          MachinePointerInfo::getFixedStack(MF, FI)));
   7590       }
   7591       ArgValue = SpillSlot;
   7592     } else {
   7593       ArgValue = convertValVTToLocVT(DAG, ArgValue, VA, DL, Subtarget);
   7594     }
   7595 
   7596     // Use local copy if it is a byval arg.
   7597     if (Flags.isByVal())
   7598       ArgValue = ByValArgs[j++];
   7599 
   7600     if (VA.isRegLoc()) {
   7601       // Queue up the argument copies and emit them at the end.
   7602       RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
   7603     } else {
   7604       assert(VA.isMemLoc() && "Argument not register or memory");
   7605       assert(!IsTailCall && "Tail call not allowed if stack is used "
   7606                             "for passing parameters");
   7607 
   7608       // Work out the address of the stack slot.
   7609       if (!StackPtr.getNode())
   7610         StackPtr = DAG.getCopyFromReg(Chain, DL, RISCV::X2, PtrVT);
   7611       SDValue Address =
   7612           DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
   7613                       DAG.getIntPtrConstant(VA.getLocMemOffset(), DL));
   7614 
   7615       // Emit the store.
   7616       MemOpChains.push_back(
   7617           DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
   7618     }
   7619   }
   7620 
   7621   // Join the stores, which are independent of one another.
   7622   if (!MemOpChains.empty())
   7623     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
   7624 
   7625   SDValue Glue;
   7626 
   7627   // Build a sequence of copy-to-reg nodes, chained and glued together.
   7628   for (auto &Reg : RegsToPass) {
   7629     Chain = DAG.getCopyToReg(Chain, DL, Reg.first, Reg.second, Glue);
   7630     Glue = Chain.getValue(1);
   7631   }
   7632 
   7633   // Validate that none of the argument registers have been marked as
   7634   // reserved, if so report an error. Do the same for the return address if this
   7635   // is not a tailcall.
   7636   validateCCReservedRegs(RegsToPass, MF);
   7637   if (!IsTailCall &&
   7638       MF.getSubtarget<RISCVSubtarget>().isRegisterReservedByUser(RISCV::X1))
   7639     MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
   7640         MF.getFunction(),
   7641         "Return address register required, but has been reserved."});
   7642 
   7643   // If the callee is a GlobalAddress/ExternalSymbol node, turn it into a
   7644   // TargetGlobalAddress/TargetExternalSymbol node so that legalize won't
   7645   // split it and then direct call can be matched by PseudoCALL.
   7646   if (GlobalAddressSDNode *S = dyn_cast<GlobalAddressSDNode>(Callee)) {
   7647     const GlobalValue *GV = S->getGlobal();
   7648 
   7649     unsigned OpFlags = RISCVII::MO_CALL;
   7650     if (!getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV))
   7651       OpFlags = RISCVII::MO_PLT;
   7652 
   7653     Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, OpFlags);
   7654   } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
   7655     unsigned OpFlags = RISCVII::MO_CALL;
   7656 
   7657     if (!getTargetMachine().shouldAssumeDSOLocal(*MF.getFunction().getParent(),
   7658                                                  nullptr))
   7659       OpFlags = RISCVII::MO_PLT;
   7660 
   7661     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, OpFlags);
   7662   }
   7663 
   7664   // The first call operand is the chain and the second is the target address.
   7665   SmallVector<SDValue, 8> Ops;
   7666   Ops.push_back(Chain);
   7667   Ops.push_back(Callee);
   7668 
   7669   // Add argument registers to the end of the list so that they are
   7670   // known live into the call.
   7671   for (auto &Reg : RegsToPass)
   7672     Ops.push_back(DAG.getRegister(Reg.first, Reg.second.getValueType()));
   7673 
   7674   if (!IsTailCall) {
   7675     // Add a register mask operand representing the call-preserved registers.
   7676     const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
   7677     const uint32_t *Mask = TRI->getCallPreservedMask(MF, CallConv);
   7678     assert(Mask && "Missing call preserved mask for calling convention");
   7679     Ops.push_back(DAG.getRegisterMask(Mask));
   7680   }
   7681 
   7682   // Glue the call to the argument copies, if any.
   7683   if (Glue.getNode())
   7684     Ops.push_back(Glue);
   7685 
   7686   // Emit the call.
   7687   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   7688 
   7689   if (IsTailCall) {
   7690     MF.getFrameInfo().setHasTailCall();
   7691     return DAG.getNode(RISCVISD::TAIL, DL, NodeTys, Ops);
   7692   }
   7693 
   7694   Chain = DAG.getNode(RISCVISD::CALL, DL, NodeTys, Ops);
   7695   DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
   7696   Glue = Chain.getValue(1);
   7697 
   7698   // Mark the end of the call, which is glued to the call itself.
   7699   Chain = DAG.getCALLSEQ_END(Chain,
   7700                              DAG.getConstant(NumBytes, DL, PtrVT, true),
   7701                              DAG.getConstant(0, DL, PtrVT, true),
   7702                              Glue, DL);
   7703   Glue = Chain.getValue(1);
   7704 
   7705   // Assign locations to each value returned by this call.
   7706   SmallVector<CCValAssign, 16> RVLocs;
   7707   CCState RetCCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
   7708   analyzeInputArgs(MF, RetCCInfo, Ins, /*IsRet=*/true);
   7709 
   7710   // Copy all of the result registers out of their specified physreg.
   7711   for (auto &VA : RVLocs) {
   7712     // Copy the value out
   7713     SDValue RetValue =
   7714         DAG.getCopyFromReg(Chain, DL, VA.getLocReg(), VA.getLocVT(), Glue);
   7715     // Glue the RetValue to the end of the call sequence
   7716     Chain = RetValue.getValue(1);
   7717     Glue = RetValue.getValue(2);
   7718 
   7719     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
   7720       assert(VA.getLocReg() == ArgGPRs[0] && "Unexpected reg assignment");
   7721       SDValue RetValue2 =
   7722           DAG.getCopyFromReg(Chain, DL, ArgGPRs[1], MVT::i32, Glue);
   7723       Chain = RetValue2.getValue(1);
   7724       Glue = RetValue2.getValue(2);
   7725       RetValue = DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, RetValue,
   7726                              RetValue2);
   7727     }
   7728 
   7729     RetValue = convertLocVTToValVT(DAG, RetValue, VA, DL, Subtarget);
   7730 
   7731     InVals.push_back(RetValue);
   7732   }
   7733 
   7734   return Chain;
   7735 }
   7736 
   7737 bool RISCVTargetLowering::CanLowerReturn(
   7738     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
   7739     const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
   7740   SmallVector<CCValAssign, 16> RVLocs;
   7741   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
   7742 
   7743   Optional<unsigned> FirstMaskArgument;
   7744   if (Subtarget.hasStdExtV())
   7745     FirstMaskArgument = preAssignMask(Outs);
   7746 
   7747   for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
   7748     MVT VT = Outs[i].VT;
   7749     ISD::ArgFlagsTy ArgFlags = Outs[i].Flags;
   7750     RISCVABI::ABI ABI = MF.getSubtarget<RISCVSubtarget>().getTargetABI();
   7751     if (CC_RISCV(MF.getDataLayout(), ABI, i, VT, VT, CCValAssign::Full,
   7752                  ArgFlags, CCInfo, /*IsFixed=*/true, /*IsRet=*/true, nullptr,
   7753                  *this, FirstMaskArgument))
   7754       return false;
   7755   }
   7756   return true;
   7757 }
   7758 
   7759 SDValue
   7760 RISCVTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
   7761                                  bool IsVarArg,
   7762                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
   7763                                  const SmallVectorImpl<SDValue> &OutVals,
   7764                                  const SDLoc &DL, SelectionDAG &DAG) const {
   7765   const MachineFunction &MF = DAG.getMachineFunction();
   7766   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
   7767 
   7768   // Stores the assignment of the return value to a location.
   7769   SmallVector<CCValAssign, 16> RVLocs;
   7770 
   7771   // Info about the registers and stack slot.
   7772   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
   7773                  *DAG.getContext());
   7774 
   7775   analyzeOutputArgs(DAG.getMachineFunction(), CCInfo, Outs, /*IsRet=*/true,
   7776                     nullptr);
   7777 
   7778   if (CallConv == CallingConv::GHC && !RVLocs.empty())
   7779     report_fatal_error("GHC functions return void only");
   7780 
   7781   SDValue Glue;
   7782   SmallVector<SDValue, 4> RetOps(1, Chain);
   7783 
   7784   // Copy the result values into the output registers.
   7785   for (unsigned i = 0, e = RVLocs.size(); i < e; ++i) {
   7786     SDValue Val = OutVals[i];
   7787     CCValAssign &VA = RVLocs[i];
   7788     assert(VA.isRegLoc() && "Can only return in registers!");
   7789 
   7790     if (VA.getLocVT() == MVT::i32 && VA.getValVT() == MVT::f64) {
   7791       // Handle returning f64 on RV32D with a soft float ABI.
   7792       assert(VA.isRegLoc() && "Expected return via registers");
   7793       SDValue SplitF64 = DAG.getNode(RISCVISD::SplitF64, DL,
   7794                                      DAG.getVTList(MVT::i32, MVT::i32), Val);
   7795       SDValue Lo = SplitF64.getValue(0);
   7796       SDValue Hi = SplitF64.getValue(1);
   7797       Register RegLo = VA.getLocReg();
   7798       assert(RegLo < RISCV::X31 && "Invalid register pair");
   7799       Register RegHi = RegLo + 1;
   7800 
   7801       if (STI.isRegisterReservedByUser(RegLo) ||
   7802           STI.isRegisterReservedByUser(RegHi))
   7803         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
   7804             MF.getFunction(),
   7805             "Return value register required, but has been reserved."});
   7806 
   7807       Chain = DAG.getCopyToReg(Chain, DL, RegLo, Lo, Glue);
   7808       Glue = Chain.getValue(1);
   7809       RetOps.push_back(DAG.getRegister(RegLo, MVT::i32));
   7810       Chain = DAG.getCopyToReg(Chain, DL, RegHi, Hi, Glue);
   7811       Glue = Chain.getValue(1);
   7812       RetOps.push_back(DAG.getRegister(RegHi, MVT::i32));
   7813     } else {
   7814       // Handle a 'normal' return.
   7815       Val = convertValVTToLocVT(DAG, Val, VA, DL, Subtarget);
   7816       Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), Val, Glue);
   7817 
   7818       if (STI.isRegisterReservedByUser(VA.getLocReg()))
   7819         MF.getFunction().getContext().diagnose(DiagnosticInfoUnsupported{
   7820             MF.getFunction(),
   7821             "Return value register required, but has been reserved."});
   7822 
   7823       // Guarantee that all emitted copies are stuck together.
   7824       Glue = Chain.getValue(1);
   7825       RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
   7826     }
   7827   }
   7828 
   7829   RetOps[0] = Chain; // Update chain.
   7830 
   7831   // Add the glue node if we have it.
   7832   if (Glue.getNode()) {
   7833     RetOps.push_back(Glue);
   7834   }
   7835 
   7836   // Interrupt service routines use different return instructions.
   7837   const Function &Func = DAG.getMachineFunction().getFunction();
   7838   if (Func.hasFnAttribute("interrupt")) {
   7839     if (!Func.getReturnType()->isVoidTy())
   7840       report_fatal_error(
   7841           "Functions with the interrupt attribute must have void return type!");
   7842 
   7843     MachineFunction &MF = DAG.getMachineFunction();
   7844     StringRef Kind =
   7845       MF.getFunction().getFnAttribute("interrupt").getValueAsString();
   7846 
   7847     unsigned RetOpc;
   7848     if (Kind == "user")
   7849       RetOpc = RISCVISD::URET_FLAG;
   7850     else if (Kind == "supervisor")
   7851       RetOpc = RISCVISD::SRET_FLAG;
   7852     else
   7853       RetOpc = RISCVISD::MRET_FLAG;
   7854 
   7855     return DAG.getNode(RetOpc, DL, MVT::Other, RetOps);
   7856   }
   7857 
   7858   return DAG.getNode(RISCVISD::RET_FLAG, DL, MVT::Other, RetOps);
   7859 }
   7860 
   7861 void RISCVTargetLowering::validateCCReservedRegs(
   7862     const SmallVectorImpl<std::pair<llvm::Register, llvm::SDValue>> &Regs,
   7863     MachineFunction &MF) const {
   7864   const Function &F = MF.getFunction();
   7865   const RISCVSubtarget &STI = MF.getSubtarget<RISCVSubtarget>();
   7866 
   7867   if (llvm::any_of(Regs, [&STI](auto Reg) {
   7868         return STI.isRegisterReservedByUser(Reg.first);
   7869       }))
   7870     F.getContext().diagnose(DiagnosticInfoUnsupported{
   7871         F, "Argument register required, but has been reserved."});
   7872 }
   7873 
   7874 bool RISCVTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
   7875   return CI->isTailCall();
   7876 }
   7877 
   7878 const char *RISCVTargetLowering::getTargetNodeName(unsigned Opcode) const {
   7879 #define NODE_NAME_CASE(NODE)                                                   \
   7880   case RISCVISD::NODE:                                                         \
   7881     return "RISCVISD::" #NODE;
   7882   // clang-format off
   7883   switch ((RISCVISD::NodeType)Opcode) {
   7884   case RISCVISD::FIRST_NUMBER:
   7885     break;
   7886   NODE_NAME_CASE(RET_FLAG)
   7887   NODE_NAME_CASE(URET_FLAG)
   7888   NODE_NAME_CASE(SRET_FLAG)
   7889   NODE_NAME_CASE(MRET_FLAG)
   7890   NODE_NAME_CASE(CALL)
   7891   NODE_NAME_CASE(SELECT_CC)
   7892   NODE_NAME_CASE(BR_CC)
   7893   NODE_NAME_CASE(BuildPairF64)
   7894   NODE_NAME_CASE(SplitF64)
   7895   NODE_NAME_CASE(TAIL)
   7896   NODE_NAME_CASE(MULHSU)
   7897   NODE_NAME_CASE(SLLW)
   7898   NODE_NAME_CASE(SRAW)
   7899   NODE_NAME_CASE(SRLW)
   7900   NODE_NAME_CASE(DIVW)
   7901   NODE_NAME_CASE(DIVUW)
   7902   NODE_NAME_CASE(REMUW)
   7903   NODE_NAME_CASE(ROLW)
   7904   NODE_NAME_CASE(RORW)
   7905   NODE_NAME_CASE(CLZW)
   7906   NODE_NAME_CASE(CTZW)
   7907   NODE_NAME_CASE(FSLW)
   7908   NODE_NAME_CASE(FSRW)
   7909   NODE_NAME_CASE(FSL)
   7910   NODE_NAME_CASE(FSR)
   7911   NODE_NAME_CASE(FMV_H_X)
   7912   NODE_NAME_CASE(FMV_X_ANYEXTH)
   7913   NODE_NAME_CASE(FMV_W_X_RV64)
   7914   NODE_NAME_CASE(FMV_X_ANYEXTW_RV64)
   7915   NODE_NAME_CASE(READ_CYCLE_WIDE)
   7916   NODE_NAME_CASE(GREV)
   7917   NODE_NAME_CASE(GREVW)
   7918   NODE_NAME_CASE(GORC)
   7919   NODE_NAME_CASE(GORCW)
   7920   NODE_NAME_CASE(SHFL)
   7921   NODE_NAME_CASE(SHFLW)
   7922   NODE_NAME_CASE(UNSHFL)
   7923   NODE_NAME_CASE(UNSHFLW)
   7924   NODE_NAME_CASE(BCOMPRESS)
   7925   NODE_NAME_CASE(BCOMPRESSW)
   7926   NODE_NAME_CASE(BDECOMPRESS)
   7927   NODE_NAME_CASE(BDECOMPRESSW)
   7928   NODE_NAME_CASE(VMV_V_X_VL)
   7929   NODE_NAME_CASE(VFMV_V_F_VL)
   7930   NODE_NAME_CASE(VMV_X_S)
   7931   NODE_NAME_CASE(VMV_S_X_VL)
   7932   NODE_NAME_CASE(VFMV_S_F_VL)
   7933   NODE_NAME_CASE(SPLAT_VECTOR_I64)
   7934   NODE_NAME_CASE(READ_VLENB)
   7935   NODE_NAME_CASE(TRUNCATE_VECTOR_VL)
   7936   NODE_NAME_CASE(VSLIDEUP_VL)
   7937   NODE_NAME_CASE(VSLIDE1UP_VL)
   7938   NODE_NAME_CASE(VSLIDEDOWN_VL)
   7939   NODE_NAME_CASE(VSLIDE1DOWN_VL)
   7940   NODE_NAME_CASE(VID_VL)
   7941   NODE_NAME_CASE(VFNCVT_ROD_VL)
   7942   NODE_NAME_CASE(VECREDUCE_ADD_VL)
   7943   NODE_NAME_CASE(VECREDUCE_UMAX_VL)
   7944   NODE_NAME_CASE(VECREDUCE_SMAX_VL)
   7945   NODE_NAME_CASE(VECREDUCE_UMIN_VL)
   7946   NODE_NAME_CASE(VECREDUCE_SMIN_VL)
   7947   NODE_NAME_CASE(VECREDUCE_AND_VL)
   7948   NODE_NAME_CASE(VECREDUCE_OR_VL)
   7949   NODE_NAME_CASE(VECREDUCE_XOR_VL)
   7950   NODE_NAME_CASE(VECREDUCE_FADD_VL)
   7951   NODE_NAME_CASE(VECREDUCE_SEQ_FADD_VL)
   7952   NODE_NAME_CASE(VECREDUCE_FMIN_VL)
   7953   NODE_NAME_CASE(VECREDUCE_FMAX_VL)
   7954   NODE_NAME_CASE(ADD_VL)
   7955   NODE_NAME_CASE(AND_VL)
   7956   NODE_NAME_CASE(MUL_VL)
   7957   NODE_NAME_CASE(OR_VL)
   7958   NODE_NAME_CASE(SDIV_VL)
   7959   NODE_NAME_CASE(SHL_VL)
   7960   NODE_NAME_CASE(SREM_VL)
   7961   NODE_NAME_CASE(SRA_VL)
   7962   NODE_NAME_CASE(SRL_VL)
   7963   NODE_NAME_CASE(SUB_VL)
   7964   NODE_NAME_CASE(UDIV_VL)
   7965   NODE_NAME_CASE(UREM_VL)
   7966   NODE_NAME_CASE(XOR_VL)
   7967   NODE_NAME_CASE(FADD_VL)
   7968   NODE_NAME_CASE(FSUB_VL)
   7969   NODE_NAME_CASE(FMUL_VL)
   7970   NODE_NAME_CASE(FDIV_VL)
   7971   NODE_NAME_CASE(FNEG_VL)
   7972   NODE_NAME_CASE(FABS_VL)
   7973   NODE_NAME_CASE(FSQRT_VL)
   7974   NODE_NAME_CASE(FMA_VL)
   7975   NODE_NAME_CASE(FCOPYSIGN_VL)
   7976   NODE_NAME_CASE(SMIN_VL)
   7977   NODE_NAME_CASE(SMAX_VL)
   7978   NODE_NAME_CASE(UMIN_VL)
   7979   NODE_NAME_CASE(UMAX_VL)
   7980   NODE_NAME_CASE(FMINNUM_VL)
   7981   NODE_NAME_CASE(FMAXNUM_VL)
   7982   NODE_NAME_CASE(MULHS_VL)
   7983   NODE_NAME_CASE(MULHU_VL)
   7984   NODE_NAME_CASE(FP_TO_SINT_VL)
   7985   NODE_NAME_CASE(FP_TO_UINT_VL)
   7986   NODE_NAME_CASE(SINT_TO_FP_VL)
   7987   NODE_NAME_CASE(UINT_TO_FP_VL)
   7988   NODE_NAME_CASE(FP_EXTEND_VL)
   7989   NODE_NAME_CASE(FP_ROUND_VL)
   7990   NODE_NAME_CASE(SETCC_VL)
   7991   NODE_NAME_CASE(VSELECT_VL)
   7992   NODE_NAME_CASE(VMAND_VL)
   7993   NODE_NAME_CASE(VMOR_VL)
   7994   NODE_NAME_CASE(VMXOR_VL)
   7995   NODE_NAME_CASE(VMCLR_VL)
   7996   NODE_NAME_CASE(VMSET_VL)
   7997   NODE_NAME_CASE(VRGATHER_VX_VL)
   7998   NODE_NAME_CASE(VRGATHER_VV_VL)
   7999   NODE_NAME_CASE(VRGATHEREI16_VV_VL)
   8000   NODE_NAME_CASE(VSEXT_VL)
   8001   NODE_NAME_CASE(VZEXT_VL)
   8002   NODE_NAME_CASE(VPOPC_VL)
   8003   NODE_NAME_CASE(VLE_VL)
   8004   NODE_NAME_CASE(VSE_VL)
   8005   NODE_NAME_CASE(READ_CSR)
   8006   NODE_NAME_CASE(WRITE_CSR)
   8007   NODE_NAME_CASE(SWAP_CSR)
   8008   }
   8009   // clang-format on
   8010   return nullptr;
   8011 #undef NODE_NAME_CASE
   8012 }
   8013 
   8014 /// getConstraintType - Given a constraint letter, return the type of
   8015 /// constraint it is for this target.
   8016 RISCVTargetLowering::ConstraintType
   8017 RISCVTargetLowering::getConstraintType(StringRef Constraint) const {
   8018   if (Constraint.size() == 1) {
   8019     switch (Constraint[0]) {
   8020     default:
   8021       break;
   8022     case 'f':
   8023     case 'v':
   8024       return C_RegisterClass;
   8025     case 'I':
   8026     case 'J':
   8027     case 'K':
   8028       return C_Immediate;
   8029     case 'A':
   8030       return C_Memory;
   8031     }
   8032   }
   8033   return TargetLowering::getConstraintType(Constraint);
   8034 }
   8035 
   8036 std::pair<unsigned, const TargetRegisterClass *>
   8037 RISCVTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
   8038                                                   StringRef Constraint,
   8039                                                   MVT VT) const {
   8040   // First, see if this is a constraint that directly corresponds to a
   8041   // RISCV register class.
   8042   if (Constraint.size() == 1) {
   8043     switch (Constraint[0]) {
   8044     case 'r':
   8045       return std::make_pair(0U, &RISCV::GPRRegClass);
   8046     case 'f':
   8047       if (Subtarget.hasStdExtZfh() && VT == MVT::f16)
   8048         return std::make_pair(0U, &RISCV::FPR16RegClass);
   8049       if (Subtarget.hasStdExtF() && VT == MVT::f32)
   8050         return std::make_pair(0U, &RISCV::FPR32RegClass);
   8051       if (Subtarget.hasStdExtD() && VT == MVT::f64)
   8052         return std::make_pair(0U, &RISCV::FPR64RegClass);
   8053       break;
   8054     case 'v':
   8055       for (const auto *RC :
   8056            {&RISCV::VMRegClass, &RISCV::VRRegClass, &RISCV::VRM2RegClass,
   8057             &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
   8058         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy))
   8059           return std::make_pair(0U, RC);
   8060       }
   8061       break;
   8062     default:
   8063       break;
   8064     }
   8065   }
   8066 
   8067   // Clang will correctly decode the usage of register name aliases into their
   8068   // official names. However, other frontends like `rustc` do not. This allows
   8069   // users of these frontends to use the ABI names for registers in LLVM-style
   8070   // register constraints.
   8071   unsigned XRegFromAlias = StringSwitch<unsigned>(Constraint.lower())
   8072                                .Case("{zero}", RISCV::X0)
   8073                                .Case("{ra}", RISCV::X1)
   8074                                .Case("{sp}", RISCV::X2)
   8075                                .Case("{gp}", RISCV::X3)
   8076                                .Case("{tp}", RISCV::X4)
   8077                                .Case("{t0}", RISCV::X5)
   8078                                .Case("{t1}", RISCV::X6)
   8079                                .Case("{t2}", RISCV::X7)
   8080                                .Cases("{s0}", "{fp}", RISCV::X8)
   8081                                .Case("{s1}", RISCV::X9)
   8082                                .Case("{a0}", RISCV::X10)
   8083                                .Case("{a1}", RISCV::X11)
   8084                                .Case("{a2}", RISCV::X12)
   8085                                .Case("{a3}", RISCV::X13)
   8086                                .Case("{a4}", RISCV::X14)
   8087                                .Case("{a5}", RISCV::X15)
   8088                                .Case("{a6}", RISCV::X16)
   8089                                .Case("{a7}", RISCV::X17)
   8090                                .Case("{s2}", RISCV::X18)
   8091                                .Case("{s3}", RISCV::X19)
   8092                                .Case("{s4}", RISCV::X20)
   8093                                .Case("{s5}", RISCV::X21)
   8094                                .Case("{s6}", RISCV::X22)
   8095                                .Case("{s7}", RISCV::X23)
   8096                                .Case("{s8}", RISCV::X24)
   8097                                .Case("{s9}", RISCV::X25)
   8098                                .Case("{s10}", RISCV::X26)
   8099                                .Case("{s11}", RISCV::X27)
   8100                                .Case("{t3}", RISCV::X28)
   8101                                .Case("{t4}", RISCV::X29)
   8102                                .Case("{t5}", RISCV::X30)
   8103                                .Case("{t6}", RISCV::X31)
   8104                                .Default(RISCV::NoRegister);
   8105   if (XRegFromAlias != RISCV::NoRegister)
   8106     return std::make_pair(XRegFromAlias, &RISCV::GPRRegClass);
   8107 
   8108   // Since TargetLowering::getRegForInlineAsmConstraint uses the name of the
   8109   // TableGen record rather than the AsmName to choose registers for InlineAsm
   8110   // constraints, plus we want to match those names to the widest floating point
   8111   // register type available, manually select floating point registers here.
   8112   //
   8113   // The second case is the ABI name of the register, so that frontends can also
   8114   // use the ABI names in register constraint lists.
   8115   if (Subtarget.hasStdExtF()) {
   8116     unsigned FReg = StringSwitch<unsigned>(Constraint.lower())
   8117                         .Cases("{f0}", "{ft0}", RISCV::F0_F)
   8118                         .Cases("{f1}", "{ft1}", RISCV::F1_F)
   8119                         .Cases("{f2}", "{ft2}", RISCV::F2_F)
   8120                         .Cases("{f3}", "{ft3}", RISCV::F3_F)
   8121                         .Cases("{f4}", "{ft4}", RISCV::F4_F)
   8122                         .Cases("{f5}", "{ft5}", RISCV::F5_F)
   8123                         .Cases("{f6}", "{ft6}", RISCV::F6_F)
   8124                         .Cases("{f7}", "{ft7}", RISCV::F7_F)
   8125                         .Cases("{f8}", "{fs0}", RISCV::F8_F)
   8126                         .Cases("{f9}", "{fs1}", RISCV::F9_F)
   8127                         .Cases("{f10}", "{fa0}", RISCV::F10_F)
   8128                         .Cases("{f11}", "{fa1}", RISCV::F11_F)
   8129                         .Cases("{f12}", "{fa2}", RISCV::F12_F)
   8130                         .Cases("{f13}", "{fa3}", RISCV::F13_F)
   8131                         .Cases("{f14}", "{fa4}", RISCV::F14_F)
   8132                         .Cases("{f15}", "{fa5}", RISCV::F15_F)
   8133                         .Cases("{f16}", "{fa6}", RISCV::F16_F)
   8134                         .Cases("{f17}", "{fa7}", RISCV::F17_F)
   8135                         .Cases("{f18}", "{fs2}", RISCV::F18_F)
   8136                         .Cases("{f19}", "{fs3}", RISCV::F19_F)
   8137                         .Cases("{f20}", "{fs4}", RISCV::F20_F)
   8138                         .Cases("{f21}", "{fs5}", RISCV::F21_F)
   8139                         .Cases("{f22}", "{fs6}", RISCV::F22_F)
   8140                         .Cases("{f23}", "{fs7}", RISCV::F23_F)
   8141                         .Cases("{f24}", "{fs8}", RISCV::F24_F)
   8142                         .Cases("{f25}", "{fs9}", RISCV::F25_F)
   8143                         .Cases("{f26}", "{fs10}", RISCV::F26_F)
   8144                         .Cases("{f27}", "{fs11}", RISCV::F27_F)
   8145                         .Cases("{f28}", "{ft8}", RISCV::F28_F)
   8146                         .Cases("{f29}", "{ft9}", RISCV::F29_F)
   8147                         .Cases("{f30}", "{ft10}", RISCV::F30_F)
   8148                         .Cases("{f31}", "{ft11}", RISCV::F31_F)
   8149                         .Default(RISCV::NoRegister);
   8150     if (FReg != RISCV::NoRegister) {
   8151       assert(RISCV::F0_F <= FReg && FReg <= RISCV::F31_F && "Unknown fp-reg");
   8152       if (Subtarget.hasStdExtD()) {
   8153         unsigned RegNo = FReg - RISCV::F0_F;
   8154         unsigned DReg = RISCV::F0_D + RegNo;
   8155         return std::make_pair(DReg, &RISCV::FPR64RegClass);
   8156       }
   8157       return std::make_pair(FReg, &RISCV::FPR32RegClass);
   8158     }
   8159   }
   8160 
   8161   if (Subtarget.hasStdExtV()) {
   8162     Register VReg = StringSwitch<Register>(Constraint.lower())
   8163                         .Case("{v0}", RISCV::V0)
   8164                         .Case("{v1}", RISCV::V1)
   8165                         .Case("{v2}", RISCV::V2)
   8166                         .Case("{v3}", RISCV::V3)
   8167                         .Case("{v4}", RISCV::V4)
   8168                         .Case("{v5}", RISCV::V5)
   8169                         .Case("{v6}", RISCV::V6)
   8170                         .Case("{v7}", RISCV::V7)
   8171                         .Case("{v8}", RISCV::V8)
   8172                         .Case("{v9}", RISCV::V9)
   8173                         .Case("{v10}", RISCV::V10)
   8174                         .Case("{v11}", RISCV::V11)
   8175                         .Case("{v12}", RISCV::V12)
   8176                         .Case("{v13}", RISCV::V13)
   8177                         .Case("{v14}", RISCV::V14)
   8178                         .Case("{v15}", RISCV::V15)
   8179                         .Case("{v16}", RISCV::V16)
   8180                         .Case("{v17}", RISCV::V17)
   8181                         .Case("{v18}", RISCV::V18)
   8182                         .Case("{v19}", RISCV::V19)
   8183                         .Case("{v20}", RISCV::V20)
   8184                         .Case("{v21}", RISCV::V21)
   8185                         .Case("{v22}", RISCV::V22)
   8186                         .Case("{v23}", RISCV::V23)
   8187                         .Case("{v24}", RISCV::V24)
   8188                         .Case("{v25}", RISCV::V25)
   8189                         .Case("{v26}", RISCV::V26)
   8190                         .Case("{v27}", RISCV::V27)
   8191                         .Case("{v28}", RISCV::V28)
   8192                         .Case("{v29}", RISCV::V29)
   8193                         .Case("{v30}", RISCV::V30)
   8194                         .Case("{v31}", RISCV::V31)
   8195                         .Default(RISCV::NoRegister);
   8196     if (VReg != RISCV::NoRegister) {
   8197       if (TRI->isTypeLegalForClass(RISCV::VMRegClass, VT.SimpleTy))
   8198         return std::make_pair(VReg, &RISCV::VMRegClass);
   8199       if (TRI->isTypeLegalForClass(RISCV::VRRegClass, VT.SimpleTy))
   8200         return std::make_pair(VReg, &RISCV::VRRegClass);
   8201       for (const auto *RC :
   8202            {&RISCV::VRM2RegClass, &RISCV::VRM4RegClass, &RISCV::VRM8RegClass}) {
   8203         if (TRI->isTypeLegalForClass(*RC, VT.SimpleTy)) {
   8204           VReg = TRI->getMatchingSuperReg(VReg, RISCV::sub_vrm1_0, RC);
   8205           return std::make_pair(VReg, RC);
   8206         }
   8207       }
   8208     }
   8209   }
   8210 
   8211   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
   8212 }
   8213 
   8214 unsigned
   8215 RISCVTargetLowering::getInlineAsmMemConstraint(StringRef ConstraintCode) const {
   8216   // Currently only support length 1 constraints.
   8217   if (ConstraintCode.size() == 1) {
   8218     switch (ConstraintCode[0]) {
   8219     case 'A':
   8220       return InlineAsm::Constraint_A;
   8221     default:
   8222       break;
   8223     }
   8224   }
   8225 
   8226   return TargetLowering::getInlineAsmMemConstraint(ConstraintCode);
   8227 }
   8228 
   8229 void RISCVTargetLowering::LowerAsmOperandForConstraint(
   8230     SDValue Op, std::string &Constraint, std::vector<SDValue> &Ops,
   8231     SelectionDAG &DAG) const {
   8232   // Currently only support length 1 constraints.
   8233   if (Constraint.length() == 1) {
   8234     switch (Constraint[0]) {
   8235     case 'I':
   8236       // Validate & create a 12-bit signed immediate operand.
   8237       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
   8238         uint64_t CVal = C->getSExtValue();
   8239         if (isInt<12>(CVal))
   8240           Ops.push_back(
   8241               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
   8242       }
   8243       return;
   8244     case 'J':
   8245       // Validate & create an integer zero operand.
   8246       if (auto *C = dyn_cast<ConstantSDNode>(Op))
   8247         if (C->getZExtValue() == 0)
   8248           Ops.push_back(
   8249               DAG.getTargetConstant(0, SDLoc(Op), Subtarget.getXLenVT()));
   8250       return;
   8251     case 'K':
   8252       // Validate & create a 5-bit unsigned immediate operand.
   8253       if (auto *C = dyn_cast<ConstantSDNode>(Op)) {
   8254         uint64_t CVal = C->getZExtValue();
   8255         if (isUInt<5>(CVal))
   8256           Ops.push_back(
   8257               DAG.getTargetConstant(CVal, SDLoc(Op), Subtarget.getXLenVT()));
   8258       }
   8259       return;
   8260     default:
   8261       break;
   8262     }
   8263   }
   8264   TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG);
   8265 }
   8266 
   8267 Instruction *RISCVTargetLowering::emitLeadingFence(IRBuilder<> &Builder,
   8268                                                    Instruction *Inst,
   8269                                                    AtomicOrdering Ord) const {
   8270   if (isa<LoadInst>(Inst) && Ord == AtomicOrdering::SequentiallyConsistent)
   8271     return Builder.CreateFence(Ord);
   8272   if (isa<StoreInst>(Inst) && isReleaseOrStronger(Ord))
   8273     return Builder.CreateFence(AtomicOrdering::Release);
   8274   return nullptr;
   8275 }
   8276 
   8277 Instruction *RISCVTargetLowering::emitTrailingFence(IRBuilder<> &Builder,
   8278                                                     Instruction *Inst,
   8279                                                     AtomicOrdering Ord) const {
   8280   if (isa<LoadInst>(Inst) && isAcquireOrStronger(Ord))
   8281     return Builder.CreateFence(AtomicOrdering::Acquire);
   8282   return nullptr;
   8283 }
   8284 
   8285 TargetLowering::AtomicExpansionKind
   8286 RISCVTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
   8287   // atomicrmw {fadd,fsub} must be expanded to use compare-exchange, as floating
   8288   // point operations can't be used in an lr/sc sequence without breaking the
   8289   // forward-progress guarantee.
   8290   if (AI->isFloatingPointOperation())
   8291     return AtomicExpansionKind::CmpXChg;
   8292 
   8293   unsigned Size = AI->getType()->getPrimitiveSizeInBits();
   8294   if (Size == 8 || Size == 16)
   8295     return AtomicExpansionKind::MaskedIntrinsic;
   8296   return AtomicExpansionKind::None;
   8297 }
   8298 
   8299 static Intrinsic::ID
   8300 getIntrinsicForMaskedAtomicRMWBinOp(unsigned XLen, AtomicRMWInst::BinOp BinOp) {
   8301   if (XLen == 32) {
   8302     switch (BinOp) {
   8303     default:
   8304       llvm_unreachable("Unexpected AtomicRMW BinOp");
   8305     case AtomicRMWInst::Xchg:
   8306       return Intrinsic::riscv_masked_atomicrmw_xchg_i32;
   8307     case AtomicRMWInst::Add:
   8308       return Intrinsic::riscv_masked_atomicrmw_add_i32;
   8309     case AtomicRMWInst::Sub:
   8310       return Intrinsic::riscv_masked_atomicrmw_sub_i32;
   8311     case AtomicRMWInst::Nand:
   8312       return Intrinsic::riscv_masked_atomicrmw_nand_i32;
   8313     case AtomicRMWInst::Max:
   8314       return Intrinsic::riscv_masked_atomicrmw_max_i32;
   8315     case AtomicRMWInst::Min:
   8316       return Intrinsic::riscv_masked_atomicrmw_min_i32;
   8317     case AtomicRMWInst::UMax:
   8318       return Intrinsic::riscv_masked_atomicrmw_umax_i32;
   8319     case AtomicRMWInst::UMin:
   8320       return Intrinsic::riscv_masked_atomicrmw_umin_i32;
   8321     }
   8322   }
   8323 
   8324   if (XLen == 64) {
   8325     switch (BinOp) {
   8326     default:
   8327       llvm_unreachable("Unexpected AtomicRMW BinOp");
   8328     case AtomicRMWInst::Xchg:
   8329       return Intrinsic::riscv_masked_atomicrmw_xchg_i64;
   8330     case AtomicRMWInst::Add:
   8331       return Intrinsic::riscv_masked_atomicrmw_add_i64;
   8332     case AtomicRMWInst::Sub:
   8333       return Intrinsic::riscv_masked_atomicrmw_sub_i64;
   8334     case AtomicRMWInst::Nand:
   8335       return Intrinsic::riscv_masked_atomicrmw_nand_i64;
   8336     case AtomicRMWInst::Max:
   8337       return Intrinsic::riscv_masked_atomicrmw_max_i64;
   8338     case AtomicRMWInst::Min:
   8339       return Intrinsic::riscv_masked_atomicrmw_min_i64;
   8340     case AtomicRMWInst::UMax:
   8341       return Intrinsic::riscv_masked_atomicrmw_umax_i64;
   8342     case AtomicRMWInst::UMin:
   8343       return Intrinsic::riscv_masked_atomicrmw_umin_i64;
   8344     }
   8345   }
   8346 
   8347   llvm_unreachable("Unexpected XLen\n");
   8348 }
   8349 
   8350 Value *RISCVTargetLowering::emitMaskedAtomicRMWIntrinsic(
   8351     IRBuilder<> &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr,
   8352     Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const {
   8353   unsigned XLen = Subtarget.getXLen();
   8354   Value *Ordering =
   8355       Builder.getIntN(XLen, static_cast<uint64_t>(AI->getOrdering()));
   8356   Type *Tys[] = {AlignedAddr->getType()};
   8357   Function *LrwOpScwLoop = Intrinsic::getDeclaration(
   8358       AI->getModule(),
   8359       getIntrinsicForMaskedAtomicRMWBinOp(XLen, AI->getOperation()), Tys);
   8360 
   8361   if (XLen == 64) {
   8362     Incr = Builder.CreateSExt(Incr, Builder.getInt64Ty());
   8363     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
   8364     ShiftAmt = Builder.CreateSExt(ShiftAmt, Builder.getInt64Ty());
   8365   }
   8366 
   8367   Value *Result;
   8368 
   8369   // Must pass the shift amount needed to sign extend the loaded value prior
   8370   // to performing a signed comparison for min/max. ShiftAmt is the number of
   8371   // bits to shift the value into position. Pass XLen-ShiftAmt-ValWidth, which
   8372   // is the number of bits to left+right shift the value in order to
   8373   // sign-extend.
   8374   if (AI->getOperation() == AtomicRMWInst::Min ||
   8375       AI->getOperation() == AtomicRMWInst::Max) {
   8376     const DataLayout &DL = AI->getModule()->getDataLayout();
   8377     unsigned ValWidth =
   8378         DL.getTypeStoreSizeInBits(AI->getValOperand()->getType());
   8379     Value *SextShamt =
   8380         Builder.CreateSub(Builder.getIntN(XLen, XLen - ValWidth), ShiftAmt);
   8381     Result = Builder.CreateCall(LrwOpScwLoop,
   8382                                 {AlignedAddr, Incr, Mask, SextShamt, Ordering});
   8383   } else {
   8384     Result =
   8385         Builder.CreateCall(LrwOpScwLoop, {AlignedAddr, Incr, Mask, Ordering});
   8386   }
   8387 
   8388   if (XLen == 64)
   8389     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
   8390   return Result;
   8391 }
   8392 
   8393 TargetLowering::AtomicExpansionKind
   8394 RISCVTargetLowering::shouldExpandAtomicCmpXchgInIR(
   8395     AtomicCmpXchgInst *CI) const {
   8396   unsigned Size = CI->getCompareOperand()->getType()->getPrimitiveSizeInBits();
   8397   if (Size == 8 || Size == 16)
   8398     return AtomicExpansionKind::MaskedIntrinsic;
   8399   return AtomicExpansionKind::None;
   8400 }
   8401 
   8402 Value *RISCVTargetLowering::emitMaskedAtomicCmpXchgIntrinsic(
   8403     IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
   8404     Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
   8405   unsigned XLen = Subtarget.getXLen();
   8406   Value *Ordering = Builder.getIntN(XLen, static_cast<uint64_t>(Ord));
   8407   Intrinsic::ID CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i32;
   8408   if (XLen == 64) {
   8409     CmpVal = Builder.CreateSExt(CmpVal, Builder.getInt64Ty());
   8410     NewVal = Builder.CreateSExt(NewVal, Builder.getInt64Ty());
   8411     Mask = Builder.CreateSExt(Mask, Builder.getInt64Ty());
   8412     CmpXchgIntrID = Intrinsic::riscv_masked_cmpxchg_i64;
   8413   }
   8414   Type *Tys[] = {AlignedAddr->getType()};
   8415   Function *MaskedCmpXchg =
   8416       Intrinsic::getDeclaration(CI->getModule(), CmpXchgIntrID, Tys);
   8417   Value *Result = Builder.CreateCall(
   8418       MaskedCmpXchg, {AlignedAddr, CmpVal, NewVal, Mask, Ordering});
   8419   if (XLen == 64)
   8420     Result = Builder.CreateTrunc(Result, Builder.getInt32Ty());
   8421   return Result;
   8422 }
   8423 
   8424 bool RISCVTargetLowering::shouldRemoveExtendFromGSIndex(EVT VT) const {
   8425   return false;
   8426 }
   8427 
   8428 bool RISCVTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF,
   8429                                                      EVT VT) const {
   8430   VT = VT.getScalarType();
   8431 
   8432   if (!VT.isSimple())
   8433     return false;
   8434 
   8435   switch (VT.getSimpleVT().SimpleTy) {
   8436   case MVT::f16:
   8437     return Subtarget.hasStdExtZfh();
   8438   case MVT::f32:
   8439     return Subtarget.hasStdExtF();
   8440   case MVT::f64:
   8441     return Subtarget.hasStdExtD();
   8442   default:
   8443     break;
   8444   }
   8445 
   8446   return false;
   8447 }
   8448 
   8449 Register RISCVTargetLowering::getExceptionPointerRegister(
   8450     const Constant *PersonalityFn) const {
   8451   return RISCV::X10;
   8452 }
   8453 
   8454 Register RISCVTargetLowering::getExceptionSelectorRegister(
   8455     const Constant *PersonalityFn) const {
   8456   return RISCV::X11;
   8457 }
   8458 
   8459 bool RISCVTargetLowering::shouldExtendTypeInLibCall(EVT Type) const {
   8460   // Return false to suppress the unnecessary extensions if the LibCall
   8461   // arguments or return value is f32 type for LP64 ABI.
   8462   RISCVABI::ABI ABI = Subtarget.getTargetABI();
   8463   if (ABI == RISCVABI::ABI_LP64 && (Type == MVT::f32))
   8464     return false;
   8465 
   8466   return true;
   8467 }
   8468 
   8469 bool RISCVTargetLowering::shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const {
   8470   if (Subtarget.is64Bit() && Type == MVT::i32)
   8471     return true;
   8472 
   8473   return IsSigned;
   8474 }
   8475 
   8476 bool RISCVTargetLowering::decomposeMulByConstant(LLVMContext &Context, EVT VT,
   8477                                                  SDValue C) const {
   8478   // Check integral scalar types.
   8479   if (VT.isScalarInteger()) {
   8480     // Omit the optimization if the sub target has the M extension and the data
   8481     // size exceeds XLen.
   8482     if (Subtarget.hasStdExtM() && VT.getSizeInBits() > Subtarget.getXLen())
   8483       return false;
   8484     if (auto *ConstNode = dyn_cast<ConstantSDNode>(C.getNode())) {
   8485       // Break the MUL to a SLLI and an ADD/SUB.
   8486       const APInt &Imm = ConstNode->getAPIntValue();
   8487       if ((Imm + 1).isPowerOf2() || (Imm - 1).isPowerOf2() ||
   8488           (1 - Imm).isPowerOf2() || (-1 - Imm).isPowerOf2())
   8489         return true;
   8490       // Omit the following optimization if the sub target has the M extension
   8491       // and the data size >= XLen.
   8492       if (Subtarget.hasStdExtM() && VT.getSizeInBits() >= Subtarget.getXLen())
   8493         return false;
   8494       // Break the MUL to two SLLI instructions and an ADD/SUB, if Imm needs
   8495       // a pair of LUI/ADDI.
   8496       if (!Imm.isSignedIntN(12) && Imm.countTrailingZeros() < 12) {
   8497         APInt ImmS = Imm.ashr(Imm.countTrailingZeros());
   8498         if ((ImmS + 1).isPowerOf2() || (ImmS - 1).isPowerOf2() ||
   8499             (1 - ImmS).isPowerOf2())
   8500         return true;
   8501       }
   8502     }
   8503   }
   8504 
   8505   return false;
   8506 }
   8507 
   8508 bool RISCVTargetLowering::allowsMisalignedMemoryAccesses(
   8509     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
   8510     bool *Fast) const {
   8511   if (!VT.isScalableVector())
   8512     return false;
   8513 
   8514   EVT ElemVT = VT.getVectorElementType();
   8515   if (Alignment >= ElemVT.getStoreSize()) {
   8516     if (Fast)
   8517       *Fast = true;
   8518     return true;
   8519   }
   8520 
   8521   return false;
   8522 }
   8523 
   8524 bool RISCVTargetLowering::splitValueIntoRegisterParts(
   8525     SelectionDAG &DAG, const SDLoc &DL, SDValue Val, SDValue *Parts,
   8526     unsigned NumParts, MVT PartVT, Optional<CallingConv::ID> CC) const {
   8527   bool IsABIRegCopy = CC.hasValue();
   8528   EVT ValueVT = Val.getValueType();
   8529   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
   8530     // Cast the f16 to i16, extend to i32, pad with ones to make a float nan,
   8531     // and cast to f32.
   8532     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i16, Val);
   8533     Val = DAG.getNode(ISD::ANY_EXTEND, DL, MVT::i32, Val);
   8534     Val = DAG.getNode(ISD::OR, DL, MVT::i32, Val,
   8535                       DAG.getConstant(0xFFFF0000, DL, MVT::i32));
   8536     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f32, Val);
   8537     Parts[0] = Val;
   8538     return true;
   8539   }
   8540 
   8541   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
   8542     LLVMContext &Context = *DAG.getContext();
   8543     EVT ValueEltVT = ValueVT.getVectorElementType();
   8544     EVT PartEltVT = PartVT.getVectorElementType();
   8545     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
   8546     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
   8547     if (PartVTBitSize % ValueVTBitSize == 0) {
   8548       // If the element types are different, bitcast to the same element type of
   8549       // PartVT first.
   8550       if (ValueEltVT != PartEltVT) {
   8551         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
   8552         assert(Count != 0 && "The number of element should not be zero.");
   8553         EVT SameEltTypeVT =
   8554             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
   8555         Val = DAG.getNode(ISD::BITCAST, DL, SameEltTypeVT, Val);
   8556       }
   8557       Val = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, PartVT, DAG.getUNDEF(PartVT),
   8558                         Val, DAG.getConstant(0, DL, Subtarget.getXLenVT()));
   8559       Parts[0] = Val;
   8560       return true;
   8561     }
   8562   }
   8563   return false;
   8564 }
   8565 
   8566 SDValue RISCVTargetLowering::joinRegisterPartsIntoValue(
   8567     SelectionDAG &DAG, const SDLoc &DL, const SDValue *Parts, unsigned NumParts,
   8568     MVT PartVT, EVT ValueVT, Optional<CallingConv::ID> CC) const {
   8569   bool IsABIRegCopy = CC.hasValue();
   8570   if (IsABIRegCopy && ValueVT == MVT::f16 && PartVT == MVT::f32) {
   8571     SDValue Val = Parts[0];
   8572 
   8573     // Cast the f32 to i32, truncate to i16, and cast back to f16.
   8574     Val = DAG.getNode(ISD::BITCAST, DL, MVT::i32, Val);
   8575     Val = DAG.getNode(ISD::TRUNCATE, DL, MVT::i16, Val);
   8576     Val = DAG.getNode(ISD::BITCAST, DL, MVT::f16, Val);
   8577     return Val;
   8578   }
   8579 
   8580   if (ValueVT.isScalableVector() && PartVT.isScalableVector()) {
   8581     LLVMContext &Context = *DAG.getContext();
   8582     SDValue Val = Parts[0];
   8583     EVT ValueEltVT = ValueVT.getVectorElementType();
   8584     EVT PartEltVT = PartVT.getVectorElementType();
   8585     unsigned ValueVTBitSize = ValueVT.getSizeInBits().getKnownMinSize();
   8586     unsigned PartVTBitSize = PartVT.getSizeInBits().getKnownMinSize();
   8587     if (PartVTBitSize % ValueVTBitSize == 0) {
   8588       EVT SameEltTypeVT = ValueVT;
   8589       // If the element types are different, convert it to the same element type
   8590       // of PartVT.
   8591       if (ValueEltVT != PartEltVT) {
   8592         unsigned Count = ValueVTBitSize / PartEltVT.getSizeInBits();
   8593         assert(Count != 0 && "The number of element should not be zero.");
   8594         SameEltTypeVT =
   8595             EVT::getVectorVT(Context, PartEltVT, Count, /*IsScalable=*/true);
   8596       }
   8597       Val = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SameEltTypeVT, Val,
   8598                         DAG.getConstant(0, DL, Subtarget.getXLenVT()));
   8599       if (ValueEltVT != PartEltVT)
   8600         Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
   8601       return Val;
   8602     }
   8603   }
   8604   return SDValue();
   8605 }
   8606 
   8607 #define GET_REGISTER_MATCHER
   8608 #include "RISCVGenAsmMatcher.inc"
   8609 
   8610 Register
   8611 RISCVTargetLowering::getRegisterByName(const char *RegName, LLT VT,
   8612                                        const MachineFunction &MF) const {
   8613   Register Reg = MatchRegisterAltName(RegName);
   8614   if (Reg == RISCV::NoRegister)
   8615     Reg = MatchRegisterName(RegName);
   8616   if (Reg == RISCV::NoRegister)
   8617     report_fatal_error(
   8618         Twine("Invalid register name \"" + StringRef(RegName) + "\"."));
   8619   BitVector ReservedRegs = Subtarget.getRegisterInfo()->getReservedRegs(MF);
   8620   if (!ReservedRegs.test(Reg) && !Subtarget.isRegisterReservedByUser(Reg))
   8621     report_fatal_error(Twine("Trying to obtain non-reserved register \"" +
   8622                              StringRef(RegName) + "\"."));
   8623   return Reg;
   8624 }
   8625 
   8626 namespace llvm {
   8627 namespace RISCVVIntrinsicsTable {
   8628 
   8629 #define GET_RISCVVIntrinsicsTable_IMPL
   8630 #include "RISCVGenSearchableTables.inc"
   8631 
   8632 } // namespace RISCVVIntrinsicsTable
   8633 
   8634 } // namespace llvm
   8635