Home | History | Annotate | Line # | Download | only in GlobalISel
      1 //===-- llvm/CodeGen/GlobalISel/LegalizerHelper.cpp -----------------------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 /// \file This file implements the LegalizerHelper class to legalize
     10 /// individual instructions and the LegalizeMachineIR wrapper pass for the
     11 /// primary legalization.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h"
     16 #include "llvm/CodeGen/GlobalISel/CallLowering.h"
     17 #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h"
     18 #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h"
     19 #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h"
     20 #include "llvm/CodeGen/GlobalISel/Utils.h"
     21 #include "llvm/CodeGen/MachineRegisterInfo.h"
     22 #include "llvm/CodeGen/TargetFrameLowering.h"
     23 #include "llvm/CodeGen/TargetInstrInfo.h"
     24 #include "llvm/CodeGen/TargetLowering.h"
     25 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     26 #include "llvm/Support/Debug.h"
     27 #include "llvm/Support/MathExtras.h"
     28 #include "llvm/Support/raw_ostream.h"
     29 
     30 #define DEBUG_TYPE "legalizer"
     31 
     32 using namespace llvm;
     33 using namespace LegalizeActions;
     34 using namespace MIPatternMatch;
     35 
     36 /// Try to break down \p OrigTy into \p NarrowTy sized pieces.
     37 ///
     38 /// Returns the number of \p NarrowTy elements needed to reconstruct \p OrigTy,
     39 /// with any leftover piece as type \p LeftoverTy
     40 ///
     41 /// Returns -1 in the first element of the pair if the breakdown is not
     42 /// satisfiable.
     43 static std::pair<int, int>
     44 getNarrowTypeBreakDown(LLT OrigTy, LLT NarrowTy, LLT &LeftoverTy) {
     45   assert(!LeftoverTy.isValid() && "this is an out argument");
     46 
     47   unsigned Size = OrigTy.getSizeInBits();
     48   unsigned NarrowSize = NarrowTy.getSizeInBits();
     49   unsigned NumParts = Size / NarrowSize;
     50   unsigned LeftoverSize = Size - NumParts * NarrowSize;
     51   assert(Size > NarrowSize);
     52 
     53   if (LeftoverSize == 0)
     54     return {NumParts, 0};
     55 
     56   if (NarrowTy.isVector()) {
     57     unsigned EltSize = OrigTy.getScalarSizeInBits();
     58     if (LeftoverSize % EltSize != 0)
     59       return {-1, -1};
     60     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
     61   } else {
     62     LeftoverTy = LLT::scalar(LeftoverSize);
     63   }
     64 
     65   int NumLeftover = LeftoverSize / LeftoverTy.getSizeInBits();
     66   return std::make_pair(NumParts, NumLeftover);
     67 }
     68 
     69 static Type *getFloatTypeForLLT(LLVMContext &Ctx, LLT Ty) {
     70 
     71   if (!Ty.isScalar())
     72     return nullptr;
     73 
     74   switch (Ty.getSizeInBits()) {
     75   case 16:
     76     return Type::getHalfTy(Ctx);
     77   case 32:
     78     return Type::getFloatTy(Ctx);
     79   case 64:
     80     return Type::getDoubleTy(Ctx);
     81   case 80:
     82     return Type::getX86_FP80Ty(Ctx);
     83   case 128:
     84     return Type::getFP128Ty(Ctx);
     85   default:
     86     return nullptr;
     87   }
     88 }
     89 
     90 LegalizerHelper::LegalizerHelper(MachineFunction &MF,
     91                                  GISelChangeObserver &Observer,
     92                                  MachineIRBuilder &Builder)
     93     : MIRBuilder(Builder), Observer(Observer), MRI(MF.getRegInfo()),
     94       LI(*MF.getSubtarget().getLegalizerInfo()),
     95       TLI(*MF.getSubtarget().getTargetLowering()) { }
     96 
     97 LegalizerHelper::LegalizerHelper(MachineFunction &MF, const LegalizerInfo &LI,
     98                                  GISelChangeObserver &Observer,
     99                                  MachineIRBuilder &B)
    100   : MIRBuilder(B), Observer(Observer), MRI(MF.getRegInfo()), LI(LI),
    101     TLI(*MF.getSubtarget().getTargetLowering()) { }
    102 
    103 LegalizerHelper::LegalizeResult
    104 LegalizerHelper::legalizeInstrStep(MachineInstr &MI) {
    105   LLVM_DEBUG(dbgs() << "Legalizing: " << MI);
    106 
    107   MIRBuilder.setInstrAndDebugLoc(MI);
    108 
    109   if (MI.getOpcode() == TargetOpcode::G_INTRINSIC ||
    110       MI.getOpcode() == TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS)
    111     return LI.legalizeIntrinsic(*this, MI) ? Legalized : UnableToLegalize;
    112   auto Step = LI.getAction(MI, MRI);
    113   switch (Step.Action) {
    114   case Legal:
    115     LLVM_DEBUG(dbgs() << ".. Already legal\n");
    116     return AlreadyLegal;
    117   case Libcall:
    118     LLVM_DEBUG(dbgs() << ".. Convert to libcall\n");
    119     return libcall(MI);
    120   case NarrowScalar:
    121     LLVM_DEBUG(dbgs() << ".. Narrow scalar\n");
    122     return narrowScalar(MI, Step.TypeIdx, Step.NewType);
    123   case WidenScalar:
    124     LLVM_DEBUG(dbgs() << ".. Widen scalar\n");
    125     return widenScalar(MI, Step.TypeIdx, Step.NewType);
    126   case Bitcast:
    127     LLVM_DEBUG(dbgs() << ".. Bitcast type\n");
    128     return bitcast(MI, Step.TypeIdx, Step.NewType);
    129   case Lower:
    130     LLVM_DEBUG(dbgs() << ".. Lower\n");
    131     return lower(MI, Step.TypeIdx, Step.NewType);
    132   case FewerElements:
    133     LLVM_DEBUG(dbgs() << ".. Reduce number of elements\n");
    134     return fewerElementsVector(MI, Step.TypeIdx, Step.NewType);
    135   case MoreElements:
    136     LLVM_DEBUG(dbgs() << ".. Increase number of elements\n");
    137     return moreElementsVector(MI, Step.TypeIdx, Step.NewType);
    138   case Custom:
    139     LLVM_DEBUG(dbgs() << ".. Custom legalization\n");
    140     return LI.legalizeCustom(*this, MI) ? Legalized : UnableToLegalize;
    141   default:
    142     LLVM_DEBUG(dbgs() << ".. Unable to legalize\n");
    143     return UnableToLegalize;
    144   }
    145 }
    146 
    147 void LegalizerHelper::extractParts(Register Reg, LLT Ty, int NumParts,
    148                                    SmallVectorImpl<Register> &VRegs) {
    149   for (int i = 0; i < NumParts; ++i)
    150     VRegs.push_back(MRI.createGenericVirtualRegister(Ty));
    151   MIRBuilder.buildUnmerge(VRegs, Reg);
    152 }
    153 
    154 bool LegalizerHelper::extractParts(Register Reg, LLT RegTy,
    155                                    LLT MainTy, LLT &LeftoverTy,
    156                                    SmallVectorImpl<Register> &VRegs,
    157                                    SmallVectorImpl<Register> &LeftoverRegs) {
    158   assert(!LeftoverTy.isValid() && "this is an out argument");
    159 
    160   unsigned RegSize = RegTy.getSizeInBits();
    161   unsigned MainSize = MainTy.getSizeInBits();
    162   unsigned NumParts = RegSize / MainSize;
    163   unsigned LeftoverSize = RegSize - NumParts * MainSize;
    164 
    165   // Use an unmerge when possible.
    166   if (LeftoverSize == 0) {
    167     for (unsigned I = 0; I < NumParts; ++I)
    168       VRegs.push_back(MRI.createGenericVirtualRegister(MainTy));
    169     MIRBuilder.buildUnmerge(VRegs, Reg);
    170     return true;
    171   }
    172 
    173   if (MainTy.isVector()) {
    174     unsigned EltSize = MainTy.getScalarSizeInBits();
    175     if (LeftoverSize % EltSize != 0)
    176       return false;
    177     LeftoverTy = LLT::scalarOrVector(LeftoverSize / EltSize, EltSize);
    178   } else {
    179     LeftoverTy = LLT::scalar(LeftoverSize);
    180   }
    181 
    182   // For irregular sizes, extract the individual parts.
    183   for (unsigned I = 0; I != NumParts; ++I) {
    184     Register NewReg = MRI.createGenericVirtualRegister(MainTy);
    185     VRegs.push_back(NewReg);
    186     MIRBuilder.buildExtract(NewReg, Reg, MainSize * I);
    187   }
    188 
    189   for (unsigned Offset = MainSize * NumParts; Offset < RegSize;
    190        Offset += LeftoverSize) {
    191     Register NewReg = MRI.createGenericVirtualRegister(LeftoverTy);
    192     LeftoverRegs.push_back(NewReg);
    193     MIRBuilder.buildExtract(NewReg, Reg, Offset);
    194   }
    195 
    196   return true;
    197 }
    198 
    199 void LegalizerHelper::insertParts(Register DstReg,
    200                                   LLT ResultTy, LLT PartTy,
    201                                   ArrayRef<Register> PartRegs,
    202                                   LLT LeftoverTy,
    203                                   ArrayRef<Register> LeftoverRegs) {
    204   if (!LeftoverTy.isValid()) {
    205     assert(LeftoverRegs.empty());
    206 
    207     if (!ResultTy.isVector()) {
    208       MIRBuilder.buildMerge(DstReg, PartRegs);
    209       return;
    210     }
    211 
    212     if (PartTy.isVector())
    213       MIRBuilder.buildConcatVectors(DstReg, PartRegs);
    214     else
    215       MIRBuilder.buildBuildVector(DstReg, PartRegs);
    216     return;
    217   }
    218 
    219   unsigned PartSize = PartTy.getSizeInBits();
    220   unsigned LeftoverPartSize = LeftoverTy.getSizeInBits();
    221 
    222   Register CurResultReg = MRI.createGenericVirtualRegister(ResultTy);
    223   MIRBuilder.buildUndef(CurResultReg);
    224 
    225   unsigned Offset = 0;
    226   for (Register PartReg : PartRegs) {
    227     Register NewResultReg = MRI.createGenericVirtualRegister(ResultTy);
    228     MIRBuilder.buildInsert(NewResultReg, CurResultReg, PartReg, Offset);
    229     CurResultReg = NewResultReg;
    230     Offset += PartSize;
    231   }
    232 
    233   for (unsigned I = 0, E = LeftoverRegs.size(); I != E; ++I) {
    234     // Use the original output register for the final insert to avoid a copy.
    235     Register NewResultReg = (I + 1 == E) ?
    236       DstReg : MRI.createGenericVirtualRegister(ResultTy);
    237 
    238     MIRBuilder.buildInsert(NewResultReg, CurResultReg, LeftoverRegs[I], Offset);
    239     CurResultReg = NewResultReg;
    240     Offset += LeftoverPartSize;
    241   }
    242 }
    243 
    244 /// Append the result registers of G_UNMERGE_VALUES \p MI to \p Regs.
    245 static void getUnmergeResults(SmallVectorImpl<Register> &Regs,
    246                               const MachineInstr &MI) {
    247   assert(MI.getOpcode() == TargetOpcode::G_UNMERGE_VALUES);
    248 
    249   const int StartIdx = Regs.size();
    250   const int NumResults = MI.getNumOperands() - 1;
    251   Regs.resize(Regs.size() + NumResults);
    252   for (int I = 0; I != NumResults; ++I)
    253     Regs[StartIdx + I] = MI.getOperand(I).getReg();
    254 }
    255 
    256 void LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts,
    257                                      LLT GCDTy, Register SrcReg) {
    258   LLT SrcTy = MRI.getType(SrcReg);
    259   if (SrcTy == GCDTy) {
    260     // If the source already evenly divides the result type, we don't need to do
    261     // anything.
    262     Parts.push_back(SrcReg);
    263   } else {
    264     // Need to split into common type sized pieces.
    265     auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
    266     getUnmergeResults(Parts, *Unmerge);
    267   }
    268 }
    269 
    270 LLT LegalizerHelper::extractGCDType(SmallVectorImpl<Register> &Parts, LLT DstTy,
    271                                     LLT NarrowTy, Register SrcReg) {
    272   LLT SrcTy = MRI.getType(SrcReg);
    273   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
    274   extractGCDType(Parts, GCDTy, SrcReg);
    275   return GCDTy;
    276 }
    277 
    278 LLT LegalizerHelper::buildLCMMergePieces(LLT DstTy, LLT NarrowTy, LLT GCDTy,
    279                                          SmallVectorImpl<Register> &VRegs,
    280                                          unsigned PadStrategy) {
    281   LLT LCMTy = getLCMType(DstTy, NarrowTy);
    282 
    283   int NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
    284   int NumSubParts = NarrowTy.getSizeInBits() / GCDTy.getSizeInBits();
    285   int NumOrigSrc = VRegs.size();
    286 
    287   Register PadReg;
    288 
    289   // Get a value we can use to pad the source value if the sources won't evenly
    290   // cover the result type.
    291   if (NumOrigSrc < NumParts * NumSubParts) {
    292     if (PadStrategy == TargetOpcode::G_ZEXT)
    293       PadReg = MIRBuilder.buildConstant(GCDTy, 0).getReg(0);
    294     else if (PadStrategy == TargetOpcode::G_ANYEXT)
    295       PadReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
    296     else {
    297       assert(PadStrategy == TargetOpcode::G_SEXT);
    298 
    299       // Shift the sign bit of the low register through the high register.
    300       auto ShiftAmt =
    301         MIRBuilder.buildConstant(LLT::scalar(64), GCDTy.getSizeInBits() - 1);
    302       PadReg = MIRBuilder.buildAShr(GCDTy, VRegs.back(), ShiftAmt).getReg(0);
    303     }
    304   }
    305 
    306   // Registers for the final merge to be produced.
    307   SmallVector<Register, 4> Remerge(NumParts);
    308 
    309   // Registers needed for intermediate merges, which will be merged into a
    310   // source for Remerge.
    311   SmallVector<Register, 4> SubMerge(NumSubParts);
    312 
    313   // Once we've fully read off the end of the original source bits, we can reuse
    314   // the same high bits for remaining padding elements.
    315   Register AllPadReg;
    316 
    317   // Build merges to the LCM type to cover the original result type.
    318   for (int I = 0; I != NumParts; ++I) {
    319     bool AllMergePartsArePadding = true;
    320 
    321     // Build the requested merges to the requested type.
    322     for (int J = 0; J != NumSubParts; ++J) {
    323       int Idx = I * NumSubParts + J;
    324       if (Idx >= NumOrigSrc) {
    325         SubMerge[J] = PadReg;
    326         continue;
    327       }
    328 
    329       SubMerge[J] = VRegs[Idx];
    330 
    331       // There are meaningful bits here we can't reuse later.
    332       AllMergePartsArePadding = false;
    333     }
    334 
    335     // If we've filled up a complete piece with padding bits, we can directly
    336     // emit the natural sized constant if applicable, rather than a merge of
    337     // smaller constants.
    338     if (AllMergePartsArePadding && !AllPadReg) {
    339       if (PadStrategy == TargetOpcode::G_ANYEXT)
    340         AllPadReg = MIRBuilder.buildUndef(NarrowTy).getReg(0);
    341       else if (PadStrategy == TargetOpcode::G_ZEXT)
    342         AllPadReg = MIRBuilder.buildConstant(NarrowTy, 0).getReg(0);
    343 
    344       // If this is a sign extension, we can't materialize a trivial constant
    345       // with the right type and have to produce a merge.
    346     }
    347 
    348     if (AllPadReg) {
    349       // Avoid creating additional instructions if we're just adding additional
    350       // copies of padding bits.
    351       Remerge[I] = AllPadReg;
    352       continue;
    353     }
    354 
    355     if (NumSubParts == 1)
    356       Remerge[I] = SubMerge[0];
    357     else
    358       Remerge[I] = MIRBuilder.buildMerge(NarrowTy, SubMerge).getReg(0);
    359 
    360     // In the sign extend padding case, re-use the first all-signbit merge.
    361     if (AllMergePartsArePadding && !AllPadReg)
    362       AllPadReg = Remerge[I];
    363   }
    364 
    365   VRegs = std::move(Remerge);
    366   return LCMTy;
    367 }
    368 
    369 void LegalizerHelper::buildWidenedRemergeToDst(Register DstReg, LLT LCMTy,
    370                                                ArrayRef<Register> RemergeRegs) {
    371   LLT DstTy = MRI.getType(DstReg);
    372 
    373   // Create the merge to the widened source, and extract the relevant bits into
    374   // the result.
    375 
    376   if (DstTy == LCMTy) {
    377     MIRBuilder.buildMerge(DstReg, RemergeRegs);
    378     return;
    379   }
    380 
    381   auto Remerge = MIRBuilder.buildMerge(LCMTy, RemergeRegs);
    382   if (DstTy.isScalar() && LCMTy.isScalar()) {
    383     MIRBuilder.buildTrunc(DstReg, Remerge);
    384     return;
    385   }
    386 
    387   if (LCMTy.isVector()) {
    388     unsigned NumDefs = LCMTy.getSizeInBits() / DstTy.getSizeInBits();
    389     SmallVector<Register, 8> UnmergeDefs(NumDefs);
    390     UnmergeDefs[0] = DstReg;
    391     for (unsigned I = 1; I != NumDefs; ++I)
    392       UnmergeDefs[I] = MRI.createGenericVirtualRegister(DstTy);
    393 
    394     MIRBuilder.buildUnmerge(UnmergeDefs,
    395                             MIRBuilder.buildMerge(LCMTy, RemergeRegs));
    396     return;
    397   }
    398 
    399   llvm_unreachable("unhandled case");
    400 }
    401 
    402 static RTLIB::Libcall getRTLibDesc(unsigned Opcode, unsigned Size) {
    403 #define RTLIBCASE_INT(LibcallPrefix)                                           \
    404   do {                                                                         \
    405     switch (Size) {                                                            \
    406     case 32:                                                                   \
    407       return RTLIB::LibcallPrefix##32;                                         \
    408     case 64:                                                                   \
    409       return RTLIB::LibcallPrefix##64;                                         \
    410     case 128:                                                                  \
    411       return RTLIB::LibcallPrefix##128;                                        \
    412     default:                                                                   \
    413       llvm_unreachable("unexpected size");                                     \
    414     }                                                                          \
    415   } while (0)
    416 
    417 #define RTLIBCASE(LibcallPrefix)                                               \
    418   do {                                                                         \
    419     switch (Size) {                                                            \
    420     case 32:                                                                   \
    421       return RTLIB::LibcallPrefix##32;                                         \
    422     case 64:                                                                   \
    423       return RTLIB::LibcallPrefix##64;                                         \
    424     case 80:                                                                   \
    425       return RTLIB::LibcallPrefix##80;                                         \
    426     case 128:                                                                  \
    427       return RTLIB::LibcallPrefix##128;                                        \
    428     default:                                                                   \
    429       llvm_unreachable("unexpected size");                                     \
    430     }                                                                          \
    431   } while (0)
    432 
    433   switch (Opcode) {
    434   case TargetOpcode::G_SDIV:
    435     RTLIBCASE_INT(SDIV_I);
    436   case TargetOpcode::G_UDIV:
    437     RTLIBCASE_INT(UDIV_I);
    438   case TargetOpcode::G_SREM:
    439     RTLIBCASE_INT(SREM_I);
    440   case TargetOpcode::G_UREM:
    441     RTLIBCASE_INT(UREM_I);
    442   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
    443     RTLIBCASE_INT(CTLZ_I);
    444   case TargetOpcode::G_FADD:
    445     RTLIBCASE(ADD_F);
    446   case TargetOpcode::G_FSUB:
    447     RTLIBCASE(SUB_F);
    448   case TargetOpcode::G_FMUL:
    449     RTLIBCASE(MUL_F);
    450   case TargetOpcode::G_FDIV:
    451     RTLIBCASE(DIV_F);
    452   case TargetOpcode::G_FEXP:
    453     RTLIBCASE(EXP_F);
    454   case TargetOpcode::G_FEXP2:
    455     RTLIBCASE(EXP2_F);
    456   case TargetOpcode::G_FREM:
    457     RTLIBCASE(REM_F);
    458   case TargetOpcode::G_FPOW:
    459     RTLIBCASE(POW_F);
    460   case TargetOpcode::G_FMA:
    461     RTLIBCASE(FMA_F);
    462   case TargetOpcode::G_FSIN:
    463     RTLIBCASE(SIN_F);
    464   case TargetOpcode::G_FCOS:
    465     RTLIBCASE(COS_F);
    466   case TargetOpcode::G_FLOG10:
    467     RTLIBCASE(LOG10_F);
    468   case TargetOpcode::G_FLOG:
    469     RTLIBCASE(LOG_F);
    470   case TargetOpcode::G_FLOG2:
    471     RTLIBCASE(LOG2_F);
    472   case TargetOpcode::G_FCEIL:
    473     RTLIBCASE(CEIL_F);
    474   case TargetOpcode::G_FFLOOR:
    475     RTLIBCASE(FLOOR_F);
    476   case TargetOpcode::G_FMINNUM:
    477     RTLIBCASE(FMIN_F);
    478   case TargetOpcode::G_FMAXNUM:
    479     RTLIBCASE(FMAX_F);
    480   case TargetOpcode::G_FSQRT:
    481     RTLIBCASE(SQRT_F);
    482   case TargetOpcode::G_FRINT:
    483     RTLIBCASE(RINT_F);
    484   case TargetOpcode::G_FNEARBYINT:
    485     RTLIBCASE(NEARBYINT_F);
    486   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
    487     RTLIBCASE(ROUNDEVEN_F);
    488   }
    489   llvm_unreachable("Unknown libcall function");
    490 }
    491 
    492 /// True if an instruction is in tail position in its caller. Intended for
    493 /// legalizing libcalls as tail calls when possible.
    494 static bool isLibCallInTailPosition(const TargetInstrInfo &TII,
    495                                     MachineInstr &MI) {
    496   MachineBasicBlock &MBB = *MI.getParent();
    497   const Function &F = MBB.getParent()->getFunction();
    498 
    499   // Conservatively require the attributes of the call to match those of
    500   // the return. Ignore NoAlias and NonNull because they don't affect the
    501   // call sequence.
    502   AttributeList CallerAttrs = F.getAttributes();
    503   if (AttrBuilder(CallerAttrs, AttributeList::ReturnIndex)
    504           .removeAttribute(Attribute::NoAlias)
    505           .removeAttribute(Attribute::NonNull)
    506           .hasAttributes())
    507     return false;
    508 
    509   // It's not safe to eliminate the sign / zero extension of the return value.
    510   if (CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::ZExt) ||
    511       CallerAttrs.hasAttribute(AttributeList::ReturnIndex, Attribute::SExt))
    512     return false;
    513 
    514   // Only tail call if the following instruction is a standard return.
    515   auto Next = next_nodbg(MI.getIterator(), MBB.instr_end());
    516   if (Next == MBB.instr_end() || TII.isTailCall(*Next) || !Next->isReturn())
    517     return false;
    518 
    519   return true;
    520 }
    521 
    522 LegalizerHelper::LegalizeResult
    523 llvm::createLibcall(MachineIRBuilder &MIRBuilder, const char *Name,
    524                     const CallLowering::ArgInfo &Result,
    525                     ArrayRef<CallLowering::ArgInfo> Args,
    526                     const CallingConv::ID CC) {
    527   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
    528 
    529   CallLowering::CallLoweringInfo Info;
    530   Info.CallConv = CC;
    531   Info.Callee = MachineOperand::CreateES(Name);
    532   Info.OrigRet = Result;
    533   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
    534   if (!CLI.lowerCall(MIRBuilder, Info))
    535     return LegalizerHelper::UnableToLegalize;
    536 
    537   return LegalizerHelper::Legalized;
    538 }
    539 
    540 LegalizerHelper::LegalizeResult
    541 llvm::createLibcall(MachineIRBuilder &MIRBuilder, RTLIB::Libcall Libcall,
    542                     const CallLowering::ArgInfo &Result,
    543                     ArrayRef<CallLowering::ArgInfo> Args) {
    544   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
    545   const char *Name = TLI.getLibcallName(Libcall);
    546   const CallingConv::ID CC = TLI.getLibcallCallingConv(Libcall);
    547   return createLibcall(MIRBuilder, Name, Result, Args, CC);
    548 }
    549 
    550 // Useful for libcalls where all operands have the same type.
    551 static LegalizerHelper::LegalizeResult
    552 simpleLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, unsigned Size,
    553               Type *OpType) {
    554   auto Libcall = getRTLibDesc(MI.getOpcode(), Size);
    555 
    556   SmallVector<CallLowering::ArgInfo, 3> Args;
    557   for (unsigned i = 1; i < MI.getNumOperands(); i++)
    558     Args.push_back({MI.getOperand(i).getReg(), OpType});
    559   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), OpType},
    560                        Args);
    561 }
    562 
    563 LegalizerHelper::LegalizeResult
    564 llvm::createMemLibcall(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
    565                        MachineInstr &MI) {
    566   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
    567 
    568   SmallVector<CallLowering::ArgInfo, 3> Args;
    569   // Add all the args, except for the last which is an imm denoting 'tail'.
    570   for (unsigned i = 0; i < MI.getNumOperands() - 1; ++i) {
    571     Register Reg = MI.getOperand(i).getReg();
    572 
    573     // Need derive an IR type for call lowering.
    574     LLT OpLLT = MRI.getType(Reg);
    575     Type *OpTy = nullptr;
    576     if (OpLLT.isPointer())
    577       OpTy = Type::getInt8PtrTy(Ctx, OpLLT.getAddressSpace());
    578     else
    579       OpTy = IntegerType::get(Ctx, OpLLT.getSizeInBits());
    580     Args.push_back({Reg, OpTy});
    581   }
    582 
    583   auto &CLI = *MIRBuilder.getMF().getSubtarget().getCallLowering();
    584   auto &TLI = *MIRBuilder.getMF().getSubtarget().getTargetLowering();
    585   RTLIB::Libcall RTLibcall;
    586   unsigned Opc = MI.getOpcode();
    587   switch (Opc) {
    588   case TargetOpcode::G_BZERO:
    589     RTLibcall = RTLIB::BZERO;
    590     break;
    591   case TargetOpcode::G_MEMCPY:
    592     RTLibcall = RTLIB::MEMCPY;
    593     break;
    594   case TargetOpcode::G_MEMMOVE:
    595     RTLibcall = RTLIB::MEMMOVE;
    596     break;
    597   case TargetOpcode::G_MEMSET:
    598     RTLibcall = RTLIB::MEMSET;
    599     break;
    600   default:
    601     return LegalizerHelper::UnableToLegalize;
    602   }
    603   const char *Name = TLI.getLibcallName(RTLibcall);
    604 
    605   // Unsupported libcall on the target.
    606   if (!Name) {
    607     LLVM_DEBUG(dbgs() << ".. .. Could not find libcall name for "
    608                       << MIRBuilder.getTII().getName(Opc) << "\n");
    609     return LegalizerHelper::UnableToLegalize;
    610   }
    611 
    612   CallLowering::CallLoweringInfo Info;
    613   Info.CallConv = TLI.getLibcallCallingConv(RTLibcall);
    614   Info.Callee = MachineOperand::CreateES(Name);
    615   Info.OrigRet = CallLowering::ArgInfo({0}, Type::getVoidTy(Ctx));
    616   Info.IsTailCall = MI.getOperand(MI.getNumOperands() - 1).getImm() &&
    617                     isLibCallInTailPosition(MIRBuilder.getTII(), MI);
    618 
    619   std::copy(Args.begin(), Args.end(), std::back_inserter(Info.OrigArgs));
    620   if (!CLI.lowerCall(MIRBuilder, Info))
    621     return LegalizerHelper::UnableToLegalize;
    622 
    623   if (Info.LoweredTailCall) {
    624     assert(Info.IsTailCall && "Lowered tail call when it wasn't a tail call?");
    625     // We must have a return following the call (or debug insts) to get past
    626     // isLibCallInTailPosition.
    627     do {
    628       MachineInstr *Next = MI.getNextNode();
    629       assert(Next && (Next->isReturn() || Next->isDebugInstr()) &&
    630              "Expected instr following MI to be return or debug inst?");
    631       // We lowered a tail call, so the call is now the return from the block.
    632       // Delete the old return.
    633       Next->eraseFromParent();
    634     } while (MI.getNextNode());
    635   }
    636 
    637   return LegalizerHelper::Legalized;
    638 }
    639 
    640 static RTLIB::Libcall getConvRTLibDesc(unsigned Opcode, Type *ToType,
    641                                        Type *FromType) {
    642   auto ToMVT = MVT::getVT(ToType);
    643   auto FromMVT = MVT::getVT(FromType);
    644 
    645   switch (Opcode) {
    646   case TargetOpcode::G_FPEXT:
    647     return RTLIB::getFPEXT(FromMVT, ToMVT);
    648   case TargetOpcode::G_FPTRUNC:
    649     return RTLIB::getFPROUND(FromMVT, ToMVT);
    650   case TargetOpcode::G_FPTOSI:
    651     return RTLIB::getFPTOSINT(FromMVT, ToMVT);
    652   case TargetOpcode::G_FPTOUI:
    653     return RTLIB::getFPTOUINT(FromMVT, ToMVT);
    654   case TargetOpcode::G_SITOFP:
    655     return RTLIB::getSINTTOFP(FromMVT, ToMVT);
    656   case TargetOpcode::G_UITOFP:
    657     return RTLIB::getUINTTOFP(FromMVT, ToMVT);
    658   }
    659   llvm_unreachable("Unsupported libcall function");
    660 }
    661 
    662 static LegalizerHelper::LegalizeResult
    663 conversionLibcall(MachineInstr &MI, MachineIRBuilder &MIRBuilder, Type *ToType,
    664                   Type *FromType) {
    665   RTLIB::Libcall Libcall = getConvRTLibDesc(MI.getOpcode(), ToType, FromType);
    666   return createLibcall(MIRBuilder, Libcall, {MI.getOperand(0).getReg(), ToType},
    667                        {{MI.getOperand(1).getReg(), FromType}});
    668 }
    669 
    670 LegalizerHelper::LegalizeResult
    671 LegalizerHelper::libcall(MachineInstr &MI) {
    672   LLT LLTy = MRI.getType(MI.getOperand(0).getReg());
    673   unsigned Size = LLTy.getSizeInBits();
    674   auto &Ctx = MIRBuilder.getMF().getFunction().getContext();
    675 
    676   switch (MI.getOpcode()) {
    677   default:
    678     return UnableToLegalize;
    679   case TargetOpcode::G_SDIV:
    680   case TargetOpcode::G_UDIV:
    681   case TargetOpcode::G_SREM:
    682   case TargetOpcode::G_UREM:
    683   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
    684     Type *HLTy = IntegerType::get(Ctx, Size);
    685     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
    686     if (Status != Legalized)
    687       return Status;
    688     break;
    689   }
    690   case TargetOpcode::G_FADD:
    691   case TargetOpcode::G_FSUB:
    692   case TargetOpcode::G_FMUL:
    693   case TargetOpcode::G_FDIV:
    694   case TargetOpcode::G_FMA:
    695   case TargetOpcode::G_FPOW:
    696   case TargetOpcode::G_FREM:
    697   case TargetOpcode::G_FCOS:
    698   case TargetOpcode::G_FSIN:
    699   case TargetOpcode::G_FLOG10:
    700   case TargetOpcode::G_FLOG:
    701   case TargetOpcode::G_FLOG2:
    702   case TargetOpcode::G_FEXP:
    703   case TargetOpcode::G_FEXP2:
    704   case TargetOpcode::G_FCEIL:
    705   case TargetOpcode::G_FFLOOR:
    706   case TargetOpcode::G_FMINNUM:
    707   case TargetOpcode::G_FMAXNUM:
    708   case TargetOpcode::G_FSQRT:
    709   case TargetOpcode::G_FRINT:
    710   case TargetOpcode::G_FNEARBYINT:
    711   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
    712     Type *HLTy = getFloatTypeForLLT(Ctx, LLTy);
    713     if (!HLTy || (Size != 32 && Size != 64 && Size != 80 && Size != 128)) {
    714       LLVM_DEBUG(dbgs() << "No libcall available for type " << LLTy << ".\n");
    715       return UnableToLegalize;
    716     }
    717     auto Status = simpleLibcall(MI, MIRBuilder, Size, HLTy);
    718     if (Status != Legalized)
    719       return Status;
    720     break;
    721   }
    722   case TargetOpcode::G_FPEXT:
    723   case TargetOpcode::G_FPTRUNC: {
    724     Type *FromTy = getFloatTypeForLLT(Ctx,  MRI.getType(MI.getOperand(1).getReg()));
    725     Type *ToTy = getFloatTypeForLLT(Ctx, MRI.getType(MI.getOperand(0).getReg()));
    726     if (!FromTy || !ToTy)
    727       return UnableToLegalize;
    728     LegalizeResult Status = conversionLibcall(MI, MIRBuilder, ToTy, FromTy );
    729     if (Status != Legalized)
    730       return Status;
    731     break;
    732   }
    733   case TargetOpcode::G_FPTOSI:
    734   case TargetOpcode::G_FPTOUI: {
    735     // FIXME: Support other types
    736     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
    737     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
    738     if ((ToSize != 32 && ToSize != 64) || (FromSize != 32 && FromSize != 64))
    739       return UnableToLegalize;
    740     LegalizeResult Status = conversionLibcall(
    741         MI, MIRBuilder,
    742         ToSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx),
    743         FromSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx));
    744     if (Status != Legalized)
    745       return Status;
    746     break;
    747   }
    748   case TargetOpcode::G_SITOFP:
    749   case TargetOpcode::G_UITOFP: {
    750     // FIXME: Support other types
    751     unsigned FromSize = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
    752     unsigned ToSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
    753     if ((FromSize != 32 && FromSize != 64) || (ToSize != 32 && ToSize != 64))
    754       return UnableToLegalize;
    755     LegalizeResult Status = conversionLibcall(
    756         MI, MIRBuilder,
    757         ToSize == 64 ? Type::getDoubleTy(Ctx) : Type::getFloatTy(Ctx),
    758         FromSize == 32 ? Type::getInt32Ty(Ctx) : Type::getInt64Ty(Ctx));
    759     if (Status != Legalized)
    760       return Status;
    761     break;
    762   }
    763   case TargetOpcode::G_BZERO:
    764   case TargetOpcode::G_MEMCPY:
    765   case TargetOpcode::G_MEMMOVE:
    766   case TargetOpcode::G_MEMSET: {
    767     LegalizeResult Result =
    768         createMemLibcall(MIRBuilder, *MIRBuilder.getMRI(), MI);
    769     if (Result != Legalized)
    770       return Result;
    771     MI.eraseFromParent();
    772     return Result;
    773   }
    774   }
    775 
    776   MI.eraseFromParent();
    777   return Legalized;
    778 }
    779 
    780 LegalizerHelper::LegalizeResult LegalizerHelper::narrowScalar(MachineInstr &MI,
    781                                                               unsigned TypeIdx,
    782                                                               LLT NarrowTy) {
    783   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
    784   uint64_t NarrowSize = NarrowTy.getSizeInBits();
    785 
    786   switch (MI.getOpcode()) {
    787   default:
    788     return UnableToLegalize;
    789   case TargetOpcode::G_IMPLICIT_DEF: {
    790     Register DstReg = MI.getOperand(0).getReg();
    791     LLT DstTy = MRI.getType(DstReg);
    792 
    793     // If SizeOp0 is not an exact multiple of NarrowSize, emit
    794     // G_ANYEXT(G_IMPLICIT_DEF). Cast result to vector if needed.
    795     // FIXME: Although this would also be legal for the general case, it causes
    796     //  a lot of regressions in the emitted code (superfluous COPYs, artifact
    797     //  combines not being hit). This seems to be a problem related to the
    798     //  artifact combiner.
    799     if (SizeOp0 % NarrowSize != 0) {
    800       LLT ImplicitTy = NarrowTy;
    801       if (DstTy.isVector())
    802         ImplicitTy = LLT::vector(DstTy.getNumElements(), ImplicitTy);
    803 
    804       Register ImplicitReg = MIRBuilder.buildUndef(ImplicitTy).getReg(0);
    805       MIRBuilder.buildAnyExt(DstReg, ImplicitReg);
    806 
    807       MI.eraseFromParent();
    808       return Legalized;
    809     }
    810 
    811     int NumParts = SizeOp0 / NarrowSize;
    812 
    813     SmallVector<Register, 2> DstRegs;
    814     for (int i = 0; i < NumParts; ++i)
    815       DstRegs.push_back(MIRBuilder.buildUndef(NarrowTy).getReg(0));
    816 
    817     if (DstTy.isVector())
    818       MIRBuilder.buildBuildVector(DstReg, DstRegs);
    819     else
    820       MIRBuilder.buildMerge(DstReg, DstRegs);
    821     MI.eraseFromParent();
    822     return Legalized;
    823   }
    824   case TargetOpcode::G_CONSTANT: {
    825     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
    826     const APInt &Val = MI.getOperand(1).getCImm()->getValue();
    827     unsigned TotalSize = Ty.getSizeInBits();
    828     unsigned NarrowSize = NarrowTy.getSizeInBits();
    829     int NumParts = TotalSize / NarrowSize;
    830 
    831     SmallVector<Register, 4> PartRegs;
    832     for (int I = 0; I != NumParts; ++I) {
    833       unsigned Offset = I * NarrowSize;
    834       auto K = MIRBuilder.buildConstant(NarrowTy,
    835                                         Val.lshr(Offset).trunc(NarrowSize));
    836       PartRegs.push_back(K.getReg(0));
    837     }
    838 
    839     LLT LeftoverTy;
    840     unsigned LeftoverBits = TotalSize - NumParts * NarrowSize;
    841     SmallVector<Register, 1> LeftoverRegs;
    842     if (LeftoverBits != 0) {
    843       LeftoverTy = LLT::scalar(LeftoverBits);
    844       auto K = MIRBuilder.buildConstant(
    845         LeftoverTy,
    846         Val.lshr(NumParts * NarrowSize).trunc(LeftoverBits));
    847       LeftoverRegs.push_back(K.getReg(0));
    848     }
    849 
    850     insertParts(MI.getOperand(0).getReg(),
    851                 Ty, NarrowTy, PartRegs, LeftoverTy, LeftoverRegs);
    852 
    853     MI.eraseFromParent();
    854     return Legalized;
    855   }
    856   case TargetOpcode::G_SEXT:
    857   case TargetOpcode::G_ZEXT:
    858   case TargetOpcode::G_ANYEXT:
    859     return narrowScalarExt(MI, TypeIdx, NarrowTy);
    860   case TargetOpcode::G_TRUNC: {
    861     if (TypeIdx != 1)
    862       return UnableToLegalize;
    863 
    864     uint64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
    865     if (NarrowTy.getSizeInBits() * 2 != SizeOp1) {
    866       LLVM_DEBUG(dbgs() << "Can't narrow trunc to type " << NarrowTy << "\n");
    867       return UnableToLegalize;
    868     }
    869 
    870     auto Unmerge = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
    871     MIRBuilder.buildCopy(MI.getOperand(0), Unmerge.getReg(0));
    872     MI.eraseFromParent();
    873     return Legalized;
    874   }
    875 
    876   case TargetOpcode::G_FREEZE:
    877     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
    878   case TargetOpcode::G_ADD:
    879   case TargetOpcode::G_SUB:
    880   case TargetOpcode::G_SADDO:
    881   case TargetOpcode::G_SSUBO:
    882   case TargetOpcode::G_SADDE:
    883   case TargetOpcode::G_SSUBE:
    884   case TargetOpcode::G_UADDO:
    885   case TargetOpcode::G_USUBO:
    886   case TargetOpcode::G_UADDE:
    887   case TargetOpcode::G_USUBE:
    888     return narrowScalarAddSub(MI, TypeIdx, NarrowTy);
    889   case TargetOpcode::G_MUL:
    890   case TargetOpcode::G_UMULH:
    891     return narrowScalarMul(MI, NarrowTy);
    892   case TargetOpcode::G_EXTRACT:
    893     return narrowScalarExtract(MI, TypeIdx, NarrowTy);
    894   case TargetOpcode::G_INSERT:
    895     return narrowScalarInsert(MI, TypeIdx, NarrowTy);
    896   case TargetOpcode::G_LOAD: {
    897     auto &MMO = **MI.memoperands_begin();
    898     Register DstReg = MI.getOperand(0).getReg();
    899     LLT DstTy = MRI.getType(DstReg);
    900     if (DstTy.isVector())
    901       return UnableToLegalize;
    902 
    903     if (8 * MMO.getSize() != DstTy.getSizeInBits()) {
    904       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
    905       MIRBuilder.buildLoad(TmpReg, MI.getOperand(1), MMO);
    906       MIRBuilder.buildAnyExt(DstReg, TmpReg);
    907       MI.eraseFromParent();
    908       return Legalized;
    909     }
    910 
    911     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
    912   }
    913   case TargetOpcode::G_ZEXTLOAD:
    914   case TargetOpcode::G_SEXTLOAD: {
    915     bool ZExt = MI.getOpcode() == TargetOpcode::G_ZEXTLOAD;
    916     Register DstReg = MI.getOperand(0).getReg();
    917     Register PtrReg = MI.getOperand(1).getReg();
    918 
    919     Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
    920     auto &MMO = **MI.memoperands_begin();
    921     unsigned MemSize = MMO.getSizeInBits();
    922 
    923     if (MemSize == NarrowSize) {
    924       MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
    925     } else if (MemSize < NarrowSize) {
    926       MIRBuilder.buildLoadInstr(MI.getOpcode(), TmpReg, PtrReg, MMO);
    927     } else if (MemSize > NarrowSize) {
    928       // FIXME: Need to split the load.
    929       return UnableToLegalize;
    930     }
    931 
    932     if (ZExt)
    933       MIRBuilder.buildZExt(DstReg, TmpReg);
    934     else
    935       MIRBuilder.buildSExt(DstReg, TmpReg);
    936 
    937     MI.eraseFromParent();
    938     return Legalized;
    939   }
    940   case TargetOpcode::G_STORE: {
    941     const auto &MMO = **MI.memoperands_begin();
    942 
    943     Register SrcReg = MI.getOperand(0).getReg();
    944     LLT SrcTy = MRI.getType(SrcReg);
    945     if (SrcTy.isVector())
    946       return UnableToLegalize;
    947 
    948     int NumParts = SizeOp0 / NarrowSize;
    949     unsigned HandledSize = NumParts * NarrowTy.getSizeInBits();
    950     unsigned LeftoverBits = SrcTy.getSizeInBits() - HandledSize;
    951     if (SrcTy.isVector() && LeftoverBits != 0)
    952       return UnableToLegalize;
    953 
    954     if (8 * MMO.getSize() != SrcTy.getSizeInBits()) {
    955       Register TmpReg = MRI.createGenericVirtualRegister(NarrowTy);
    956       auto &MMO = **MI.memoperands_begin();
    957       MIRBuilder.buildTrunc(TmpReg, SrcReg);
    958       MIRBuilder.buildStore(TmpReg, MI.getOperand(1), MMO);
    959       MI.eraseFromParent();
    960       return Legalized;
    961     }
    962 
    963     return reduceLoadStoreWidth(MI, 0, NarrowTy);
    964   }
    965   case TargetOpcode::G_SELECT:
    966     return narrowScalarSelect(MI, TypeIdx, NarrowTy);
    967   case TargetOpcode::G_AND:
    968   case TargetOpcode::G_OR:
    969   case TargetOpcode::G_XOR: {
    970     // Legalize bitwise operation:
    971     // A = BinOp<Ty> B, C
    972     // into:
    973     // B1, ..., BN = G_UNMERGE_VALUES B
    974     // C1, ..., CN = G_UNMERGE_VALUES C
    975     // A1 = BinOp<Ty/N> B1, C2
    976     // ...
    977     // AN = BinOp<Ty/N> BN, CN
    978     // A = G_MERGE_VALUES A1, ..., AN
    979     return narrowScalarBasic(MI, TypeIdx, NarrowTy);
    980   }
    981   case TargetOpcode::G_SHL:
    982   case TargetOpcode::G_LSHR:
    983   case TargetOpcode::G_ASHR:
    984     return narrowScalarShift(MI, TypeIdx, NarrowTy);
    985   case TargetOpcode::G_CTLZ:
    986   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
    987   case TargetOpcode::G_CTTZ:
    988   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
    989   case TargetOpcode::G_CTPOP:
    990     if (TypeIdx == 1)
    991       switch (MI.getOpcode()) {
    992       case TargetOpcode::G_CTLZ:
    993       case TargetOpcode::G_CTLZ_ZERO_UNDEF:
    994         return narrowScalarCTLZ(MI, TypeIdx, NarrowTy);
    995       case TargetOpcode::G_CTTZ:
    996       case TargetOpcode::G_CTTZ_ZERO_UNDEF:
    997         return narrowScalarCTTZ(MI, TypeIdx, NarrowTy);
    998       case TargetOpcode::G_CTPOP:
    999         return narrowScalarCTPOP(MI, TypeIdx, NarrowTy);
   1000       default:
   1001         return UnableToLegalize;
   1002       }
   1003 
   1004     Observer.changingInstr(MI);
   1005     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
   1006     Observer.changedInstr(MI);
   1007     return Legalized;
   1008   case TargetOpcode::G_INTTOPTR:
   1009     if (TypeIdx != 1)
   1010       return UnableToLegalize;
   1011 
   1012     Observer.changingInstr(MI);
   1013     narrowScalarSrc(MI, NarrowTy, 1);
   1014     Observer.changedInstr(MI);
   1015     return Legalized;
   1016   case TargetOpcode::G_PTRTOINT:
   1017     if (TypeIdx != 0)
   1018       return UnableToLegalize;
   1019 
   1020     Observer.changingInstr(MI);
   1021     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_ZEXT);
   1022     Observer.changedInstr(MI);
   1023     return Legalized;
   1024   case TargetOpcode::G_PHI: {
   1025     // FIXME: add support for when SizeOp0 isn't an exact multiple of
   1026     // NarrowSize.
   1027     if (SizeOp0 % NarrowSize != 0)
   1028       return UnableToLegalize;
   1029 
   1030     unsigned NumParts = SizeOp0 / NarrowSize;
   1031     SmallVector<Register, 2> DstRegs(NumParts);
   1032     SmallVector<SmallVector<Register, 2>, 2> SrcRegs(MI.getNumOperands() / 2);
   1033     Observer.changingInstr(MI);
   1034     for (unsigned i = 1; i < MI.getNumOperands(); i += 2) {
   1035       MachineBasicBlock &OpMBB = *MI.getOperand(i + 1).getMBB();
   1036       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
   1037       extractParts(MI.getOperand(i).getReg(), NarrowTy, NumParts,
   1038                    SrcRegs[i / 2]);
   1039     }
   1040     MachineBasicBlock &MBB = *MI.getParent();
   1041     MIRBuilder.setInsertPt(MBB, MI);
   1042     for (unsigned i = 0; i < NumParts; ++i) {
   1043       DstRegs[i] = MRI.createGenericVirtualRegister(NarrowTy);
   1044       MachineInstrBuilder MIB =
   1045           MIRBuilder.buildInstr(TargetOpcode::G_PHI).addDef(DstRegs[i]);
   1046       for (unsigned j = 1; j < MI.getNumOperands(); j += 2)
   1047         MIB.addUse(SrcRegs[j / 2][i]).add(MI.getOperand(j + 1));
   1048     }
   1049     MIRBuilder.setInsertPt(MBB, MBB.getFirstNonPHI());
   1050     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
   1051     Observer.changedInstr(MI);
   1052     MI.eraseFromParent();
   1053     return Legalized;
   1054   }
   1055   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
   1056   case TargetOpcode::G_INSERT_VECTOR_ELT: {
   1057     if (TypeIdx != 2)
   1058       return UnableToLegalize;
   1059 
   1060     int OpIdx = MI.getOpcode() == TargetOpcode::G_EXTRACT_VECTOR_ELT ? 2 : 3;
   1061     Observer.changingInstr(MI);
   1062     narrowScalarSrc(MI, NarrowTy, OpIdx);
   1063     Observer.changedInstr(MI);
   1064     return Legalized;
   1065   }
   1066   case TargetOpcode::G_ICMP: {
   1067     uint64_t SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
   1068     if (NarrowSize * 2 != SrcSize)
   1069       return UnableToLegalize;
   1070 
   1071     Observer.changingInstr(MI);
   1072     Register LHSL = MRI.createGenericVirtualRegister(NarrowTy);
   1073     Register LHSH = MRI.createGenericVirtualRegister(NarrowTy);
   1074     MIRBuilder.buildUnmerge({LHSL, LHSH}, MI.getOperand(2));
   1075 
   1076     Register RHSL = MRI.createGenericVirtualRegister(NarrowTy);
   1077     Register RHSH = MRI.createGenericVirtualRegister(NarrowTy);
   1078     MIRBuilder.buildUnmerge({RHSL, RHSH}, MI.getOperand(3));
   1079 
   1080     CmpInst::Predicate Pred =
   1081         static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
   1082     LLT ResTy = MRI.getType(MI.getOperand(0).getReg());
   1083 
   1084     if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) {
   1085       MachineInstrBuilder XorL = MIRBuilder.buildXor(NarrowTy, LHSL, RHSL);
   1086       MachineInstrBuilder XorH = MIRBuilder.buildXor(NarrowTy, LHSH, RHSH);
   1087       MachineInstrBuilder Or = MIRBuilder.buildOr(NarrowTy, XorL, XorH);
   1088       MachineInstrBuilder Zero = MIRBuilder.buildConstant(NarrowTy, 0);
   1089       MIRBuilder.buildICmp(Pred, MI.getOperand(0), Or, Zero);
   1090     } else {
   1091       MachineInstrBuilder CmpH = MIRBuilder.buildICmp(Pred, ResTy, LHSH, RHSH);
   1092       MachineInstrBuilder CmpHEQ =
   1093           MIRBuilder.buildICmp(CmpInst::Predicate::ICMP_EQ, ResTy, LHSH, RHSH);
   1094       MachineInstrBuilder CmpLU = MIRBuilder.buildICmp(
   1095           ICmpInst::getUnsignedPredicate(Pred), ResTy, LHSL, RHSL);
   1096       MIRBuilder.buildSelect(MI.getOperand(0), CmpHEQ, CmpLU, CmpH);
   1097     }
   1098     Observer.changedInstr(MI);
   1099     MI.eraseFromParent();
   1100     return Legalized;
   1101   }
   1102   case TargetOpcode::G_SEXT_INREG: {
   1103     if (TypeIdx != 0)
   1104       return UnableToLegalize;
   1105 
   1106     int64_t SizeInBits = MI.getOperand(2).getImm();
   1107 
   1108     // So long as the new type has more bits than the bits we're extending we
   1109     // don't need to break it apart.
   1110     if (NarrowTy.getScalarSizeInBits() >= SizeInBits) {
   1111       Observer.changingInstr(MI);
   1112       // We don't lose any non-extension bits by truncating the src and
   1113       // sign-extending the dst.
   1114       MachineOperand &MO1 = MI.getOperand(1);
   1115       auto TruncMIB = MIRBuilder.buildTrunc(NarrowTy, MO1);
   1116       MO1.setReg(TruncMIB.getReg(0));
   1117 
   1118       MachineOperand &MO2 = MI.getOperand(0);
   1119       Register DstExt = MRI.createGenericVirtualRegister(NarrowTy);
   1120       MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
   1121       MIRBuilder.buildSExt(MO2, DstExt);
   1122       MO2.setReg(DstExt);
   1123       Observer.changedInstr(MI);
   1124       return Legalized;
   1125     }
   1126 
   1127     // Break it apart. Components below the extension point are unmodified. The
   1128     // component containing the extension point becomes a narrower SEXT_INREG.
   1129     // Components above it are ashr'd from the component containing the
   1130     // extension point.
   1131     if (SizeOp0 % NarrowSize != 0)
   1132       return UnableToLegalize;
   1133     int NumParts = SizeOp0 / NarrowSize;
   1134 
   1135     // List the registers where the destination will be scattered.
   1136     SmallVector<Register, 2> DstRegs;
   1137     // List the registers where the source will be split.
   1138     SmallVector<Register, 2> SrcRegs;
   1139 
   1140     // Create all the temporary registers.
   1141     for (int i = 0; i < NumParts; ++i) {
   1142       Register SrcReg = MRI.createGenericVirtualRegister(NarrowTy);
   1143 
   1144       SrcRegs.push_back(SrcReg);
   1145     }
   1146 
   1147     // Explode the big arguments into smaller chunks.
   1148     MIRBuilder.buildUnmerge(SrcRegs, MI.getOperand(1));
   1149 
   1150     Register AshrCstReg =
   1151         MIRBuilder.buildConstant(NarrowTy, NarrowTy.getScalarSizeInBits() - 1)
   1152             .getReg(0);
   1153     Register FullExtensionReg = 0;
   1154     Register PartialExtensionReg = 0;
   1155 
   1156     // Do the operation on each small part.
   1157     for (int i = 0; i < NumParts; ++i) {
   1158       if ((i + 1) * NarrowTy.getScalarSizeInBits() < SizeInBits)
   1159         DstRegs.push_back(SrcRegs[i]);
   1160       else if (i * NarrowTy.getScalarSizeInBits() > SizeInBits) {
   1161         assert(PartialExtensionReg &&
   1162                "Expected to visit partial extension before full");
   1163         if (FullExtensionReg) {
   1164           DstRegs.push_back(FullExtensionReg);
   1165           continue;
   1166         }
   1167         DstRegs.push_back(
   1168             MIRBuilder.buildAShr(NarrowTy, PartialExtensionReg, AshrCstReg)
   1169                 .getReg(0));
   1170         FullExtensionReg = DstRegs.back();
   1171       } else {
   1172         DstRegs.push_back(
   1173             MIRBuilder
   1174                 .buildInstr(
   1175                     TargetOpcode::G_SEXT_INREG, {NarrowTy},
   1176                     {SrcRegs[i], SizeInBits % NarrowTy.getScalarSizeInBits()})
   1177                 .getReg(0));
   1178         PartialExtensionReg = DstRegs.back();
   1179       }
   1180     }
   1181 
   1182     // Gather the destination registers into the final destination.
   1183     Register DstReg = MI.getOperand(0).getReg();
   1184     MIRBuilder.buildMerge(DstReg, DstRegs);
   1185     MI.eraseFromParent();
   1186     return Legalized;
   1187   }
   1188   case TargetOpcode::G_BSWAP:
   1189   case TargetOpcode::G_BITREVERSE: {
   1190     if (SizeOp0 % NarrowSize != 0)
   1191       return UnableToLegalize;
   1192 
   1193     Observer.changingInstr(MI);
   1194     SmallVector<Register, 2> SrcRegs, DstRegs;
   1195     unsigned NumParts = SizeOp0 / NarrowSize;
   1196     extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
   1197 
   1198     for (unsigned i = 0; i < NumParts; ++i) {
   1199       auto DstPart = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
   1200                                            {SrcRegs[NumParts - 1 - i]});
   1201       DstRegs.push_back(DstPart.getReg(0));
   1202     }
   1203 
   1204     MIRBuilder.buildMerge(MI.getOperand(0), DstRegs);
   1205 
   1206     Observer.changedInstr(MI);
   1207     MI.eraseFromParent();
   1208     return Legalized;
   1209   }
   1210   case TargetOpcode::G_PTR_ADD:
   1211   case TargetOpcode::G_PTRMASK: {
   1212     if (TypeIdx != 1)
   1213       return UnableToLegalize;
   1214     Observer.changingInstr(MI);
   1215     narrowScalarSrc(MI, NarrowTy, 2);
   1216     Observer.changedInstr(MI);
   1217     return Legalized;
   1218   }
   1219   case TargetOpcode::G_FPTOUI:
   1220   case TargetOpcode::G_FPTOSI:
   1221     return narrowScalarFPTOI(MI, TypeIdx, NarrowTy);
   1222   case TargetOpcode::G_FPEXT:
   1223     if (TypeIdx != 0)
   1224       return UnableToLegalize;
   1225     Observer.changingInstr(MI);
   1226     narrowScalarDst(MI, NarrowTy, 0, TargetOpcode::G_FPEXT);
   1227     Observer.changedInstr(MI);
   1228     return Legalized;
   1229   }
   1230 }
   1231 
   1232 Register LegalizerHelper::coerceToScalar(Register Val) {
   1233   LLT Ty = MRI.getType(Val);
   1234   if (Ty.isScalar())
   1235     return Val;
   1236 
   1237   const DataLayout &DL = MIRBuilder.getDataLayout();
   1238   LLT NewTy = LLT::scalar(Ty.getSizeInBits());
   1239   if (Ty.isPointer()) {
   1240     if (DL.isNonIntegralAddressSpace(Ty.getAddressSpace()))
   1241       return Register();
   1242     return MIRBuilder.buildPtrToInt(NewTy, Val).getReg(0);
   1243   }
   1244 
   1245   Register NewVal = Val;
   1246 
   1247   assert(Ty.isVector());
   1248   LLT EltTy = Ty.getElementType();
   1249   if (EltTy.isPointer())
   1250     NewVal = MIRBuilder.buildPtrToInt(NewTy, NewVal).getReg(0);
   1251   return MIRBuilder.buildBitcast(NewTy, NewVal).getReg(0);
   1252 }
   1253 
   1254 void LegalizerHelper::widenScalarSrc(MachineInstr &MI, LLT WideTy,
   1255                                      unsigned OpIdx, unsigned ExtOpcode) {
   1256   MachineOperand &MO = MI.getOperand(OpIdx);
   1257   auto ExtB = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MO});
   1258   MO.setReg(ExtB.getReg(0));
   1259 }
   1260 
   1261 void LegalizerHelper::narrowScalarSrc(MachineInstr &MI, LLT NarrowTy,
   1262                                       unsigned OpIdx) {
   1263   MachineOperand &MO = MI.getOperand(OpIdx);
   1264   auto ExtB = MIRBuilder.buildTrunc(NarrowTy, MO);
   1265   MO.setReg(ExtB.getReg(0));
   1266 }
   1267 
   1268 void LegalizerHelper::widenScalarDst(MachineInstr &MI, LLT WideTy,
   1269                                      unsigned OpIdx, unsigned TruncOpcode) {
   1270   MachineOperand &MO = MI.getOperand(OpIdx);
   1271   Register DstExt = MRI.createGenericVirtualRegister(WideTy);
   1272   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
   1273   MIRBuilder.buildInstr(TruncOpcode, {MO}, {DstExt});
   1274   MO.setReg(DstExt);
   1275 }
   1276 
   1277 void LegalizerHelper::narrowScalarDst(MachineInstr &MI, LLT NarrowTy,
   1278                                       unsigned OpIdx, unsigned ExtOpcode) {
   1279   MachineOperand &MO = MI.getOperand(OpIdx);
   1280   Register DstTrunc = MRI.createGenericVirtualRegister(NarrowTy);
   1281   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
   1282   MIRBuilder.buildInstr(ExtOpcode, {MO}, {DstTrunc});
   1283   MO.setReg(DstTrunc);
   1284 }
   1285 
   1286 void LegalizerHelper::moreElementsVectorDst(MachineInstr &MI, LLT WideTy,
   1287                                             unsigned OpIdx) {
   1288   MachineOperand &MO = MI.getOperand(OpIdx);
   1289   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
   1290   MO.setReg(widenWithUnmerge(WideTy, MO.getReg()));
   1291 }
   1292 
   1293 void LegalizerHelper::moreElementsVectorSrc(MachineInstr &MI, LLT MoreTy,
   1294                                             unsigned OpIdx) {
   1295   MachineOperand &MO = MI.getOperand(OpIdx);
   1296 
   1297   LLT OldTy = MRI.getType(MO.getReg());
   1298   unsigned OldElts = OldTy.getNumElements();
   1299   unsigned NewElts = MoreTy.getNumElements();
   1300 
   1301   unsigned NumParts = NewElts / OldElts;
   1302 
   1303   // Use concat_vectors if the result is a multiple of the number of elements.
   1304   if (NumParts * OldElts == NewElts) {
   1305     SmallVector<Register, 8> Parts;
   1306     Parts.push_back(MO.getReg());
   1307 
   1308     Register ImpDef = MIRBuilder.buildUndef(OldTy).getReg(0);
   1309     for (unsigned I = 1; I != NumParts; ++I)
   1310       Parts.push_back(ImpDef);
   1311 
   1312     auto Concat = MIRBuilder.buildConcatVectors(MoreTy, Parts);
   1313     MO.setReg(Concat.getReg(0));
   1314     return;
   1315   }
   1316 
   1317   Register MoreReg = MRI.createGenericVirtualRegister(MoreTy);
   1318   Register ImpDef = MIRBuilder.buildUndef(MoreTy).getReg(0);
   1319   MIRBuilder.buildInsert(MoreReg, ImpDef, MO.getReg(), 0);
   1320   MO.setReg(MoreReg);
   1321 }
   1322 
   1323 void LegalizerHelper::bitcastSrc(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
   1324   MachineOperand &Op = MI.getOperand(OpIdx);
   1325   Op.setReg(MIRBuilder.buildBitcast(CastTy, Op).getReg(0));
   1326 }
   1327 
   1328 void LegalizerHelper::bitcastDst(MachineInstr &MI, LLT CastTy, unsigned OpIdx) {
   1329   MachineOperand &MO = MI.getOperand(OpIdx);
   1330   Register CastDst = MRI.createGenericVirtualRegister(CastTy);
   1331   MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
   1332   MIRBuilder.buildBitcast(MO, CastDst);
   1333   MO.setReg(CastDst);
   1334 }
   1335 
   1336 LegalizerHelper::LegalizeResult
   1337 LegalizerHelper::widenScalarMergeValues(MachineInstr &MI, unsigned TypeIdx,
   1338                                         LLT WideTy) {
   1339   if (TypeIdx != 1)
   1340     return UnableToLegalize;
   1341 
   1342   Register DstReg = MI.getOperand(0).getReg();
   1343   LLT DstTy = MRI.getType(DstReg);
   1344   if (DstTy.isVector())
   1345     return UnableToLegalize;
   1346 
   1347   Register Src1 = MI.getOperand(1).getReg();
   1348   LLT SrcTy = MRI.getType(Src1);
   1349   const int DstSize = DstTy.getSizeInBits();
   1350   const int SrcSize = SrcTy.getSizeInBits();
   1351   const int WideSize = WideTy.getSizeInBits();
   1352   const int NumMerge = (DstSize + WideSize - 1) / WideSize;
   1353 
   1354   unsigned NumOps = MI.getNumOperands();
   1355   unsigned NumSrc = MI.getNumOperands() - 1;
   1356   unsigned PartSize = DstTy.getSizeInBits() / NumSrc;
   1357 
   1358   if (WideSize >= DstSize) {
   1359     // Directly pack the bits in the target type.
   1360     Register ResultReg = MIRBuilder.buildZExt(WideTy, Src1).getReg(0);
   1361 
   1362     for (unsigned I = 2; I != NumOps; ++I) {
   1363       const unsigned Offset = (I - 1) * PartSize;
   1364 
   1365       Register SrcReg = MI.getOperand(I).getReg();
   1366       assert(MRI.getType(SrcReg) == LLT::scalar(PartSize));
   1367 
   1368       auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
   1369 
   1370       Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
   1371         MRI.createGenericVirtualRegister(WideTy);
   1372 
   1373       auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
   1374       auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
   1375       MIRBuilder.buildOr(NextResult, ResultReg, Shl);
   1376       ResultReg = NextResult;
   1377     }
   1378 
   1379     if (WideSize > DstSize)
   1380       MIRBuilder.buildTrunc(DstReg, ResultReg);
   1381     else if (DstTy.isPointer())
   1382       MIRBuilder.buildIntToPtr(DstReg, ResultReg);
   1383 
   1384     MI.eraseFromParent();
   1385     return Legalized;
   1386   }
   1387 
   1388   // Unmerge the original values to the GCD type, and recombine to the next
   1389   // multiple greater than the original type.
   1390   //
   1391   // %3:_(s12) = G_MERGE_VALUES %0:_(s4), %1:_(s4), %2:_(s4) -> s6
   1392   // %4:_(s2), %5:_(s2) = G_UNMERGE_VALUES %0
   1393   // %6:_(s2), %7:_(s2) = G_UNMERGE_VALUES %1
   1394   // %8:_(s2), %9:_(s2) = G_UNMERGE_VALUES %2
   1395   // %10:_(s6) = G_MERGE_VALUES %4, %5, %6
   1396   // %11:_(s6) = G_MERGE_VALUES %7, %8, %9
   1397   // %12:_(s12) = G_MERGE_VALUES %10, %11
   1398   //
   1399   // Padding with undef if necessary:
   1400   //
   1401   // %2:_(s8) = G_MERGE_VALUES %0:_(s4), %1:_(s4) -> s6
   1402   // %3:_(s2), %4:_(s2) = G_UNMERGE_VALUES %0
   1403   // %5:_(s2), %6:_(s2) = G_UNMERGE_VALUES %1
   1404   // %7:_(s2) = G_IMPLICIT_DEF
   1405   // %8:_(s6) = G_MERGE_VALUES %3, %4, %5
   1406   // %9:_(s6) = G_MERGE_VALUES %6, %7, %7
   1407   // %10:_(s12) = G_MERGE_VALUES %8, %9
   1408 
   1409   const int GCD = greatestCommonDivisor(SrcSize, WideSize);
   1410   LLT GCDTy = LLT::scalar(GCD);
   1411 
   1412   SmallVector<Register, 8> Parts;
   1413   SmallVector<Register, 8> NewMergeRegs;
   1414   SmallVector<Register, 8> Unmerges;
   1415   LLT WideDstTy = LLT::scalar(NumMerge * WideSize);
   1416 
   1417   // Decompose the original operands if they don't evenly divide.
   1418   for (int I = 1, E = MI.getNumOperands(); I != E; ++I) {
   1419     Register SrcReg = MI.getOperand(I).getReg();
   1420     if (GCD == SrcSize) {
   1421       Unmerges.push_back(SrcReg);
   1422     } else {
   1423       auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
   1424       for (int J = 0, JE = Unmerge->getNumOperands() - 1; J != JE; ++J)
   1425         Unmerges.push_back(Unmerge.getReg(J));
   1426     }
   1427   }
   1428 
   1429   // Pad with undef to the next size that is a multiple of the requested size.
   1430   if (static_cast<int>(Unmerges.size()) != NumMerge * WideSize) {
   1431     Register UndefReg = MIRBuilder.buildUndef(GCDTy).getReg(0);
   1432     for (int I = Unmerges.size(); I != NumMerge * WideSize; ++I)
   1433       Unmerges.push_back(UndefReg);
   1434   }
   1435 
   1436   const int PartsPerGCD = WideSize / GCD;
   1437 
   1438   // Build merges of each piece.
   1439   ArrayRef<Register> Slicer(Unmerges);
   1440   for (int I = 0; I != NumMerge; ++I, Slicer = Slicer.drop_front(PartsPerGCD)) {
   1441     auto Merge = MIRBuilder.buildMerge(WideTy, Slicer.take_front(PartsPerGCD));
   1442     NewMergeRegs.push_back(Merge.getReg(0));
   1443   }
   1444 
   1445   // A truncate may be necessary if the requested type doesn't evenly divide the
   1446   // original result type.
   1447   if (DstTy.getSizeInBits() == WideDstTy.getSizeInBits()) {
   1448     MIRBuilder.buildMerge(DstReg, NewMergeRegs);
   1449   } else {
   1450     auto FinalMerge = MIRBuilder.buildMerge(WideDstTy, NewMergeRegs);
   1451     MIRBuilder.buildTrunc(DstReg, FinalMerge.getReg(0));
   1452   }
   1453 
   1454   MI.eraseFromParent();
   1455   return Legalized;
   1456 }
   1457 
   1458 Register LegalizerHelper::widenWithUnmerge(LLT WideTy, Register OrigReg) {
   1459   Register WideReg = MRI.createGenericVirtualRegister(WideTy);
   1460   LLT OrigTy = MRI.getType(OrigReg);
   1461   LLT LCMTy = getLCMType(WideTy, OrigTy);
   1462 
   1463   const int NumMergeParts = LCMTy.getSizeInBits() / WideTy.getSizeInBits();
   1464   const int NumUnmergeParts = LCMTy.getSizeInBits() / OrigTy.getSizeInBits();
   1465 
   1466   Register UnmergeSrc = WideReg;
   1467 
   1468   // Create a merge to the LCM type, padding with undef
   1469   // %0:_(<3 x s32>) = G_FOO => <4 x s32>
   1470   // =>
   1471   // %1:_(<4 x s32>) = G_FOO
   1472   // %2:_(<4 x s32>) = G_IMPLICIT_DEF
   1473   // %3:_(<12 x s32>) = G_CONCAT_VECTORS %1, %2, %2
   1474   // %0:_(<3 x s32>), %4:_, %5:_, %6:_ = G_UNMERGE_VALUES %3
   1475   if (NumMergeParts > 1) {
   1476     Register Undef = MIRBuilder.buildUndef(WideTy).getReg(0);
   1477     SmallVector<Register, 8> MergeParts(NumMergeParts, Undef);
   1478     MergeParts[0] = WideReg;
   1479     UnmergeSrc = MIRBuilder.buildMerge(LCMTy, MergeParts).getReg(0);
   1480   }
   1481 
   1482   // Unmerge to the original register and pad with dead defs.
   1483   SmallVector<Register, 8> UnmergeResults(NumUnmergeParts);
   1484   UnmergeResults[0] = OrigReg;
   1485   for (int I = 1; I != NumUnmergeParts; ++I)
   1486     UnmergeResults[I] = MRI.createGenericVirtualRegister(OrigTy);
   1487 
   1488   MIRBuilder.buildUnmerge(UnmergeResults, UnmergeSrc);
   1489   return WideReg;
   1490 }
   1491 
   1492 LegalizerHelper::LegalizeResult
   1493 LegalizerHelper::widenScalarUnmergeValues(MachineInstr &MI, unsigned TypeIdx,
   1494                                           LLT WideTy) {
   1495   if (TypeIdx != 0)
   1496     return UnableToLegalize;
   1497 
   1498   int NumDst = MI.getNumOperands() - 1;
   1499   Register SrcReg = MI.getOperand(NumDst).getReg();
   1500   LLT SrcTy = MRI.getType(SrcReg);
   1501   if (SrcTy.isVector())
   1502     return UnableToLegalize;
   1503 
   1504   Register Dst0Reg = MI.getOperand(0).getReg();
   1505   LLT DstTy = MRI.getType(Dst0Reg);
   1506   if (!DstTy.isScalar())
   1507     return UnableToLegalize;
   1508 
   1509   if (WideTy.getSizeInBits() >= SrcTy.getSizeInBits()) {
   1510     if (SrcTy.isPointer()) {
   1511       const DataLayout &DL = MIRBuilder.getDataLayout();
   1512       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace())) {
   1513         LLVM_DEBUG(
   1514             dbgs() << "Not casting non-integral address space integer\n");
   1515         return UnableToLegalize;
   1516       }
   1517 
   1518       SrcTy = LLT::scalar(SrcTy.getSizeInBits());
   1519       SrcReg = MIRBuilder.buildPtrToInt(SrcTy, SrcReg).getReg(0);
   1520     }
   1521 
   1522     // Widen SrcTy to WideTy. This does not affect the result, but since the
   1523     // user requested this size, it is probably better handled than SrcTy and
   1524     // should reduce the total number of legalization artifacts
   1525     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
   1526       SrcTy = WideTy;
   1527       SrcReg = MIRBuilder.buildAnyExt(WideTy, SrcReg).getReg(0);
   1528     }
   1529 
   1530     // Theres no unmerge type to target. Directly extract the bits from the
   1531     // source type
   1532     unsigned DstSize = DstTy.getSizeInBits();
   1533 
   1534     MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
   1535     for (int I = 1; I != NumDst; ++I) {
   1536       auto ShiftAmt = MIRBuilder.buildConstant(SrcTy, DstSize * I);
   1537       auto Shr = MIRBuilder.buildLShr(SrcTy, SrcReg, ShiftAmt);
   1538       MIRBuilder.buildTrunc(MI.getOperand(I), Shr);
   1539     }
   1540 
   1541     MI.eraseFromParent();
   1542     return Legalized;
   1543   }
   1544 
   1545   // Extend the source to a wider type.
   1546   LLT LCMTy = getLCMType(SrcTy, WideTy);
   1547 
   1548   Register WideSrc = SrcReg;
   1549   if (LCMTy.getSizeInBits() != SrcTy.getSizeInBits()) {
   1550     // TODO: If this is an integral address space, cast to integer and anyext.
   1551     if (SrcTy.isPointer()) {
   1552       LLVM_DEBUG(dbgs() << "Widening pointer source types not implemented\n");
   1553       return UnableToLegalize;
   1554     }
   1555 
   1556     WideSrc = MIRBuilder.buildAnyExt(LCMTy, WideSrc).getReg(0);
   1557   }
   1558 
   1559   auto Unmerge = MIRBuilder.buildUnmerge(WideTy, WideSrc);
   1560 
   1561   // Create a sequence of unmerges and merges to the original results. Since we
   1562   // may have widened the source, we will need to pad the results with dead defs
   1563   // to cover the source register.
   1564   // e.g. widen s48 to s64:
   1565   // %1:_(s48), %2:_(s48) = G_UNMERGE_VALUES %0:_(s96)
   1566   //
   1567   // =>
   1568   //  %4:_(s192) = G_ANYEXT %0:_(s96)
   1569   //  %5:_(s64), %6, %7 = G_UNMERGE_VALUES %4 ; Requested unmerge
   1570   //  ; unpack to GCD type, with extra dead defs
   1571   //  %8:_(s16), %9, %10, %11 = G_UNMERGE_VALUES %5:_(s64)
   1572   //  %12:_(s16), %13, dead %14, dead %15 = G_UNMERGE_VALUES %6:_(s64)
   1573   //  dead %16:_(s16), dead %17, dead %18, dead %18 = G_UNMERGE_VALUES %7:_(s64)
   1574   //  %1:_(s48) = G_MERGE_VALUES %8:_(s16), %9, %10   ; Remerge to destination
   1575   //  %2:_(s48) = G_MERGE_VALUES %11:_(s16), %12, %13 ; Remerge to destination
   1576   const LLT GCDTy = getGCDType(WideTy, DstTy);
   1577   const int NumUnmerge = Unmerge->getNumOperands() - 1;
   1578   const int PartsPerRemerge = DstTy.getSizeInBits() / GCDTy.getSizeInBits();
   1579 
   1580   // Directly unmerge to the destination without going through a GCD type
   1581   // if possible
   1582   if (PartsPerRemerge == 1) {
   1583     const int PartsPerUnmerge = WideTy.getSizeInBits() / DstTy.getSizeInBits();
   1584 
   1585     for (int I = 0; I != NumUnmerge; ++I) {
   1586       auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
   1587 
   1588       for (int J = 0; J != PartsPerUnmerge; ++J) {
   1589         int Idx = I * PartsPerUnmerge + J;
   1590         if (Idx < NumDst)
   1591           MIB.addDef(MI.getOperand(Idx).getReg());
   1592         else {
   1593           // Create dead def for excess components.
   1594           MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
   1595         }
   1596       }
   1597 
   1598       MIB.addUse(Unmerge.getReg(I));
   1599     }
   1600   } else {
   1601     SmallVector<Register, 16> Parts;
   1602     for (int J = 0; J != NumUnmerge; ++J)
   1603       extractGCDType(Parts, GCDTy, Unmerge.getReg(J));
   1604 
   1605     SmallVector<Register, 8> RemergeParts;
   1606     for (int I = 0; I != NumDst; ++I) {
   1607       for (int J = 0; J < PartsPerRemerge; ++J) {
   1608         const int Idx = I * PartsPerRemerge + J;
   1609         RemergeParts.emplace_back(Parts[Idx]);
   1610       }
   1611 
   1612       MIRBuilder.buildMerge(MI.getOperand(I).getReg(), RemergeParts);
   1613       RemergeParts.clear();
   1614     }
   1615   }
   1616 
   1617   MI.eraseFromParent();
   1618   return Legalized;
   1619 }
   1620 
   1621 LegalizerHelper::LegalizeResult
   1622 LegalizerHelper::widenScalarExtract(MachineInstr &MI, unsigned TypeIdx,
   1623                                     LLT WideTy) {
   1624   Register DstReg = MI.getOperand(0).getReg();
   1625   Register SrcReg = MI.getOperand(1).getReg();
   1626   LLT SrcTy = MRI.getType(SrcReg);
   1627 
   1628   LLT DstTy = MRI.getType(DstReg);
   1629   unsigned Offset = MI.getOperand(2).getImm();
   1630 
   1631   if (TypeIdx == 0) {
   1632     if (SrcTy.isVector() || DstTy.isVector())
   1633       return UnableToLegalize;
   1634 
   1635     SrcOp Src(SrcReg);
   1636     if (SrcTy.isPointer()) {
   1637       // Extracts from pointers can be handled only if they are really just
   1638       // simple integers.
   1639       const DataLayout &DL = MIRBuilder.getDataLayout();
   1640       if (DL.isNonIntegralAddressSpace(SrcTy.getAddressSpace()))
   1641         return UnableToLegalize;
   1642 
   1643       LLT SrcAsIntTy = LLT::scalar(SrcTy.getSizeInBits());
   1644       Src = MIRBuilder.buildPtrToInt(SrcAsIntTy, Src);
   1645       SrcTy = SrcAsIntTy;
   1646     }
   1647 
   1648     if (DstTy.isPointer())
   1649       return UnableToLegalize;
   1650 
   1651     if (Offset == 0) {
   1652       // Avoid a shift in the degenerate case.
   1653       MIRBuilder.buildTrunc(DstReg,
   1654                             MIRBuilder.buildAnyExtOrTrunc(WideTy, Src));
   1655       MI.eraseFromParent();
   1656       return Legalized;
   1657     }
   1658 
   1659     // Do a shift in the source type.
   1660     LLT ShiftTy = SrcTy;
   1661     if (WideTy.getSizeInBits() > SrcTy.getSizeInBits()) {
   1662       Src = MIRBuilder.buildAnyExt(WideTy, Src);
   1663       ShiftTy = WideTy;
   1664     }
   1665 
   1666     auto LShr = MIRBuilder.buildLShr(
   1667       ShiftTy, Src, MIRBuilder.buildConstant(ShiftTy, Offset));
   1668     MIRBuilder.buildTrunc(DstReg, LShr);
   1669     MI.eraseFromParent();
   1670     return Legalized;
   1671   }
   1672 
   1673   if (SrcTy.isScalar()) {
   1674     Observer.changingInstr(MI);
   1675     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
   1676     Observer.changedInstr(MI);
   1677     return Legalized;
   1678   }
   1679 
   1680   if (!SrcTy.isVector())
   1681     return UnableToLegalize;
   1682 
   1683   if (DstTy != SrcTy.getElementType())
   1684     return UnableToLegalize;
   1685 
   1686   if (Offset % SrcTy.getScalarSizeInBits() != 0)
   1687     return UnableToLegalize;
   1688 
   1689   Observer.changingInstr(MI);
   1690   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
   1691 
   1692   MI.getOperand(2).setImm((WideTy.getSizeInBits() / SrcTy.getSizeInBits()) *
   1693                           Offset);
   1694   widenScalarDst(MI, WideTy.getScalarType(), 0);
   1695   Observer.changedInstr(MI);
   1696   return Legalized;
   1697 }
   1698 
   1699 LegalizerHelper::LegalizeResult
   1700 LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
   1701                                    LLT WideTy) {
   1702   if (TypeIdx != 0 || WideTy.isVector())
   1703     return UnableToLegalize;
   1704   Observer.changingInstr(MI);
   1705   widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
   1706   widenScalarDst(MI, WideTy);
   1707   Observer.changedInstr(MI);
   1708   return Legalized;
   1709 }
   1710 
   1711 LegalizerHelper::LegalizeResult
   1712 LegalizerHelper::widenScalarAddSubOverflow(MachineInstr &MI, unsigned TypeIdx,
   1713                                            LLT WideTy) {
   1714   if (TypeIdx == 1)
   1715     return UnableToLegalize; // TODO
   1716 
   1717   unsigned Opcode;
   1718   unsigned ExtOpcode;
   1719   Optional<Register> CarryIn = None;
   1720   switch (MI.getOpcode()) {
   1721   default:
   1722     llvm_unreachable("Unexpected opcode!");
   1723   case TargetOpcode::G_SADDO:
   1724     Opcode = TargetOpcode::G_ADD;
   1725     ExtOpcode = TargetOpcode::G_SEXT;
   1726     break;
   1727   case TargetOpcode::G_SSUBO:
   1728     Opcode = TargetOpcode::G_SUB;
   1729     ExtOpcode = TargetOpcode::G_SEXT;
   1730     break;
   1731   case TargetOpcode::G_UADDO:
   1732     Opcode = TargetOpcode::G_ADD;
   1733     ExtOpcode = TargetOpcode::G_ZEXT;
   1734     break;
   1735   case TargetOpcode::G_USUBO:
   1736     Opcode = TargetOpcode::G_SUB;
   1737     ExtOpcode = TargetOpcode::G_ZEXT;
   1738     break;
   1739   case TargetOpcode::G_SADDE:
   1740     Opcode = TargetOpcode::G_UADDE;
   1741     ExtOpcode = TargetOpcode::G_SEXT;
   1742     CarryIn = MI.getOperand(4).getReg();
   1743     break;
   1744   case TargetOpcode::G_SSUBE:
   1745     Opcode = TargetOpcode::G_USUBE;
   1746     ExtOpcode = TargetOpcode::G_SEXT;
   1747     CarryIn = MI.getOperand(4).getReg();
   1748     break;
   1749   case TargetOpcode::G_UADDE:
   1750     Opcode = TargetOpcode::G_UADDE;
   1751     ExtOpcode = TargetOpcode::G_ZEXT;
   1752     CarryIn = MI.getOperand(4).getReg();
   1753     break;
   1754   case TargetOpcode::G_USUBE:
   1755     Opcode = TargetOpcode::G_USUBE;
   1756     ExtOpcode = TargetOpcode::G_ZEXT;
   1757     CarryIn = MI.getOperand(4).getReg();
   1758     break;
   1759   }
   1760 
   1761   auto LHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(2)});
   1762   auto RHSExt = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {MI.getOperand(3)});
   1763   // Do the arithmetic in the larger type.
   1764   Register NewOp;
   1765   if (CarryIn) {
   1766     LLT CarryOutTy = MRI.getType(MI.getOperand(1).getReg());
   1767     NewOp = MIRBuilder
   1768                 .buildInstr(Opcode, {WideTy, CarryOutTy},
   1769                             {LHSExt, RHSExt, *CarryIn})
   1770                 .getReg(0);
   1771   } else {
   1772     NewOp = MIRBuilder.buildInstr(Opcode, {WideTy}, {LHSExt, RHSExt}).getReg(0);
   1773   }
   1774   LLT OrigTy = MRI.getType(MI.getOperand(0).getReg());
   1775   auto TruncOp = MIRBuilder.buildTrunc(OrigTy, NewOp);
   1776   auto ExtOp = MIRBuilder.buildInstr(ExtOpcode, {WideTy}, {TruncOp});
   1777   // There is no overflow if the ExtOp is the same as NewOp.
   1778   MIRBuilder.buildICmp(CmpInst::ICMP_NE, MI.getOperand(1), NewOp, ExtOp);
   1779   // Now trunc the NewOp to the original result.
   1780   MIRBuilder.buildTrunc(MI.getOperand(0), NewOp);
   1781   MI.eraseFromParent();
   1782   return Legalized;
   1783 }
   1784 
   1785 LegalizerHelper::LegalizeResult
   1786 LegalizerHelper::widenScalarAddSubShlSat(MachineInstr &MI, unsigned TypeIdx,
   1787                                          LLT WideTy) {
   1788   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
   1789                   MI.getOpcode() == TargetOpcode::G_SSUBSAT ||
   1790                   MI.getOpcode() == TargetOpcode::G_SSHLSAT;
   1791   bool IsShift = MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
   1792                  MI.getOpcode() == TargetOpcode::G_USHLSAT;
   1793   // We can convert this to:
   1794   //   1. Any extend iN to iM
   1795   //   2. SHL by M-N
   1796   //   3. [US][ADD|SUB|SHL]SAT
   1797   //   4. L/ASHR by M-N
   1798   //
   1799   // It may be more efficient to lower this to a min and a max operation in
   1800   // the higher precision arithmetic if the promoted operation isn't legal,
   1801   // but this decision is up to the target's lowering request.
   1802   Register DstReg = MI.getOperand(0).getReg();
   1803 
   1804   unsigned NewBits = WideTy.getScalarSizeInBits();
   1805   unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
   1806 
   1807   // Shifts must zero-extend the RHS to preserve the unsigned quantity, and
   1808   // must not left shift the RHS to preserve the shift amount.
   1809   auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
   1810   auto RHS = IsShift ? MIRBuilder.buildZExt(WideTy, MI.getOperand(2))
   1811                      : MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
   1812   auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
   1813   auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
   1814   auto ShiftR = IsShift ? RHS : MIRBuilder.buildShl(WideTy, RHS, ShiftK);
   1815 
   1816   auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
   1817                                         {ShiftL, ShiftR}, MI.getFlags());
   1818 
   1819   // Use a shift that will preserve the number of sign bits when the trunc is
   1820   // folded away.
   1821   auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
   1822                          : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
   1823 
   1824   MIRBuilder.buildTrunc(DstReg, Result);
   1825   MI.eraseFromParent();
   1826   return Legalized;
   1827 }
   1828 
   1829 LegalizerHelper::LegalizeResult
   1830 LegalizerHelper::widenScalarMulo(MachineInstr &MI, unsigned TypeIdx,
   1831                                  LLT WideTy) {
   1832   if (TypeIdx == 1)
   1833     return UnableToLegalize;
   1834 
   1835   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULO;
   1836   Register Result = MI.getOperand(0).getReg();
   1837   Register OriginalOverflow = MI.getOperand(1).getReg();
   1838   Register LHS = MI.getOperand(2).getReg();
   1839   Register RHS = MI.getOperand(3).getReg();
   1840   LLT SrcTy = MRI.getType(LHS);
   1841   LLT OverflowTy = MRI.getType(OriginalOverflow);
   1842   unsigned SrcBitWidth = SrcTy.getScalarSizeInBits();
   1843 
   1844   // To determine if the result overflowed in the larger type, we extend the
   1845   // input to the larger type, do the multiply (checking if it overflows),
   1846   // then also check the high bits of the result to see if overflow happened
   1847   // there.
   1848   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
   1849   auto LeftOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {LHS});
   1850   auto RightOperand = MIRBuilder.buildInstr(ExtOp, {WideTy}, {RHS});
   1851 
   1852   auto Mulo = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy, OverflowTy},
   1853                                     {LeftOperand, RightOperand});
   1854   auto Mul = Mulo->getOperand(0);
   1855   MIRBuilder.buildTrunc(Result, Mul);
   1856 
   1857   MachineInstrBuilder ExtResult;
   1858   // Overflow occurred if it occurred in the larger type, or if the high part
   1859   // of the result does not zero/sign-extend the low part.  Check this second
   1860   // possibility first.
   1861   if (IsSigned) {
   1862     // For signed, overflow occurred when the high part does not sign-extend
   1863     // the low part.
   1864     ExtResult = MIRBuilder.buildSExtInReg(WideTy, Mul, SrcBitWidth);
   1865   } else {
   1866     // Unsigned overflow occurred when the high part does not zero-extend the
   1867     // low part.
   1868     ExtResult = MIRBuilder.buildZExtInReg(WideTy, Mul, SrcBitWidth);
   1869   }
   1870 
   1871   // Multiplication cannot overflow if the WideTy is >= 2 * original width,
   1872   // so we don't need to check the overflow result of larger type Mulo.
   1873   if (WideTy.getScalarSizeInBits() < 2 * SrcBitWidth) {
   1874     auto Overflow =
   1875         MIRBuilder.buildICmp(CmpInst::ICMP_NE, OverflowTy, Mul, ExtResult);
   1876     // Finally check if the multiplication in the larger type itself overflowed.
   1877     MIRBuilder.buildOr(OriginalOverflow, Mulo->getOperand(1), Overflow);
   1878   } else {
   1879     MIRBuilder.buildICmp(CmpInst::ICMP_NE, OriginalOverflow, Mul, ExtResult);
   1880   }
   1881   MI.eraseFromParent();
   1882   return Legalized;
   1883 }
   1884 
   1885 LegalizerHelper::LegalizeResult
   1886 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   1887   switch (MI.getOpcode()) {
   1888   default:
   1889     return UnableToLegalize;
   1890   case TargetOpcode::G_EXTRACT:
   1891     return widenScalarExtract(MI, TypeIdx, WideTy);
   1892   case TargetOpcode::G_INSERT:
   1893     return widenScalarInsert(MI, TypeIdx, WideTy);
   1894   case TargetOpcode::G_MERGE_VALUES:
   1895     return widenScalarMergeValues(MI, TypeIdx, WideTy);
   1896   case TargetOpcode::G_UNMERGE_VALUES:
   1897     return widenScalarUnmergeValues(MI, TypeIdx, WideTy);
   1898   case TargetOpcode::G_SADDO:
   1899   case TargetOpcode::G_SSUBO:
   1900   case TargetOpcode::G_UADDO:
   1901   case TargetOpcode::G_USUBO:
   1902   case TargetOpcode::G_SADDE:
   1903   case TargetOpcode::G_SSUBE:
   1904   case TargetOpcode::G_UADDE:
   1905   case TargetOpcode::G_USUBE:
   1906     return widenScalarAddSubOverflow(MI, TypeIdx, WideTy);
   1907   case TargetOpcode::G_UMULO:
   1908   case TargetOpcode::G_SMULO:
   1909     return widenScalarMulo(MI, TypeIdx, WideTy);
   1910   case TargetOpcode::G_SADDSAT:
   1911   case TargetOpcode::G_SSUBSAT:
   1912   case TargetOpcode::G_SSHLSAT:
   1913   case TargetOpcode::G_UADDSAT:
   1914   case TargetOpcode::G_USUBSAT:
   1915   case TargetOpcode::G_USHLSAT:
   1916     return widenScalarAddSubShlSat(MI, TypeIdx, WideTy);
   1917   case TargetOpcode::G_CTTZ:
   1918   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
   1919   case TargetOpcode::G_CTLZ:
   1920   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
   1921   case TargetOpcode::G_CTPOP: {
   1922     if (TypeIdx == 0) {
   1923       Observer.changingInstr(MI);
   1924       widenScalarDst(MI, WideTy, 0);
   1925       Observer.changedInstr(MI);
   1926       return Legalized;
   1927     }
   1928 
   1929     Register SrcReg = MI.getOperand(1).getReg();
   1930 
   1931     // First ZEXT the input.
   1932     auto MIBSrc = MIRBuilder.buildZExt(WideTy, SrcReg);
   1933     LLT CurTy = MRI.getType(SrcReg);
   1934     if (MI.getOpcode() == TargetOpcode::G_CTTZ) {
   1935       // The count is the same in the larger type except if the original
   1936       // value was zero.  This can be handled by setting the bit just off
   1937       // the top of the original type.
   1938       auto TopBit =
   1939           APInt::getOneBitSet(WideTy.getSizeInBits(), CurTy.getSizeInBits());
   1940       MIBSrc = MIRBuilder.buildOr(
   1941         WideTy, MIBSrc, MIRBuilder.buildConstant(WideTy, TopBit));
   1942     }
   1943 
   1944     // Perform the operation at the larger size.
   1945     auto MIBNewOp = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy}, {MIBSrc});
   1946     // This is already the correct result for CTPOP and CTTZs
   1947     if (MI.getOpcode() == TargetOpcode::G_CTLZ ||
   1948         MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF) {
   1949       // The correct result is NewOp - (Difference in widety and current ty).
   1950       unsigned SizeDiff = WideTy.getSizeInBits() - CurTy.getSizeInBits();
   1951       MIBNewOp = MIRBuilder.buildSub(
   1952           WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
   1953     }
   1954 
   1955     MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);
   1956     MI.eraseFromParent();
   1957     return Legalized;
   1958   }
   1959   case TargetOpcode::G_BSWAP: {
   1960     Observer.changingInstr(MI);
   1961     Register DstReg = MI.getOperand(0).getReg();
   1962 
   1963     Register ShrReg = MRI.createGenericVirtualRegister(WideTy);
   1964     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
   1965     Register ShiftAmtReg = MRI.createGenericVirtualRegister(WideTy);
   1966     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
   1967 
   1968     MI.getOperand(0).setReg(DstExt);
   1969 
   1970     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
   1971 
   1972     LLT Ty = MRI.getType(DstReg);
   1973     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
   1974     MIRBuilder.buildConstant(ShiftAmtReg, DiffBits);
   1975     MIRBuilder.buildLShr(ShrReg, DstExt, ShiftAmtReg);
   1976 
   1977     MIRBuilder.buildTrunc(DstReg, ShrReg);
   1978     Observer.changedInstr(MI);
   1979     return Legalized;
   1980   }
   1981   case TargetOpcode::G_BITREVERSE: {
   1982     Observer.changingInstr(MI);
   1983 
   1984     Register DstReg = MI.getOperand(0).getReg();
   1985     LLT Ty = MRI.getType(DstReg);
   1986     unsigned DiffBits = WideTy.getScalarSizeInBits() - Ty.getScalarSizeInBits();
   1987 
   1988     Register DstExt = MRI.createGenericVirtualRegister(WideTy);
   1989     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
   1990     MI.getOperand(0).setReg(DstExt);
   1991     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
   1992 
   1993     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, DiffBits);
   1994     auto Shift = MIRBuilder.buildLShr(WideTy, DstExt, ShiftAmt);
   1995     MIRBuilder.buildTrunc(DstReg, Shift);
   1996     Observer.changedInstr(MI);
   1997     return Legalized;
   1998   }
   1999   case TargetOpcode::G_FREEZE:
   2000     Observer.changingInstr(MI);
   2001     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
   2002     widenScalarDst(MI, WideTy);
   2003     Observer.changedInstr(MI);
   2004     return Legalized;
   2005 
   2006   case TargetOpcode::G_ADD:
   2007   case TargetOpcode::G_AND:
   2008   case TargetOpcode::G_MUL:
   2009   case TargetOpcode::G_OR:
   2010   case TargetOpcode::G_XOR:
   2011   case TargetOpcode::G_SUB:
   2012     // Perform operation at larger width (any extension is fines here, high bits
   2013     // don't affect the result) and then truncate the result back to the
   2014     // original type.
   2015     Observer.changingInstr(MI);
   2016     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
   2017     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
   2018     widenScalarDst(MI, WideTy);
   2019     Observer.changedInstr(MI);
   2020     return Legalized;
   2021 
   2022   case TargetOpcode::G_SHL:
   2023     Observer.changingInstr(MI);
   2024 
   2025     if (TypeIdx == 0) {
   2026       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
   2027       widenScalarDst(MI, WideTy);
   2028     } else {
   2029       assert(TypeIdx == 1);
   2030       // The "number of bits to shift" operand must preserve its value as an
   2031       // unsigned integer:
   2032       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
   2033     }
   2034 
   2035     Observer.changedInstr(MI);
   2036     return Legalized;
   2037 
   2038   case TargetOpcode::G_SDIV:
   2039   case TargetOpcode::G_SREM:
   2040   case TargetOpcode::G_SMIN:
   2041   case TargetOpcode::G_SMAX:
   2042     Observer.changingInstr(MI);
   2043     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
   2044     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
   2045     widenScalarDst(MI, WideTy);
   2046     Observer.changedInstr(MI);
   2047     return Legalized;
   2048 
   2049   case TargetOpcode::G_ASHR:
   2050   case TargetOpcode::G_LSHR:
   2051     Observer.changingInstr(MI);
   2052 
   2053     if (TypeIdx == 0) {
   2054       unsigned CvtOp = MI.getOpcode() == TargetOpcode::G_ASHR ?
   2055         TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
   2056 
   2057       widenScalarSrc(MI, WideTy, 1, CvtOp);
   2058       widenScalarDst(MI, WideTy);
   2059     } else {
   2060       assert(TypeIdx == 1);
   2061       // The "number of bits to shift" operand must preserve its value as an
   2062       // unsigned integer:
   2063       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
   2064     }
   2065 
   2066     Observer.changedInstr(MI);
   2067     return Legalized;
   2068   case TargetOpcode::G_UDIV:
   2069   case TargetOpcode::G_UREM:
   2070   case TargetOpcode::G_UMIN:
   2071   case TargetOpcode::G_UMAX:
   2072     Observer.changingInstr(MI);
   2073     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
   2074     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
   2075     widenScalarDst(MI, WideTy);
   2076     Observer.changedInstr(MI);
   2077     return Legalized;
   2078 
   2079   case TargetOpcode::G_SELECT:
   2080     Observer.changingInstr(MI);
   2081     if (TypeIdx == 0) {
   2082       // Perform operation at larger width (any extension is fine here, high
   2083       // bits don't affect the result) and then truncate the result back to the
   2084       // original type.
   2085       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
   2086       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_ANYEXT);
   2087       widenScalarDst(MI, WideTy);
   2088     } else {
   2089       bool IsVec = MRI.getType(MI.getOperand(1).getReg()).isVector();
   2090       // Explicit extension is required here since high bits affect the result.
   2091       widenScalarSrc(MI, WideTy, 1, MIRBuilder.getBoolExtOp(IsVec, false));
   2092     }
   2093     Observer.changedInstr(MI);
   2094     return Legalized;
   2095 
   2096   case TargetOpcode::G_FPTOSI:
   2097   case TargetOpcode::G_FPTOUI:
   2098     Observer.changingInstr(MI);
   2099 
   2100     if (TypeIdx == 0)
   2101       widenScalarDst(MI, WideTy);
   2102     else
   2103       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
   2104 
   2105     Observer.changedInstr(MI);
   2106     return Legalized;
   2107   case TargetOpcode::G_SITOFP:
   2108     Observer.changingInstr(MI);
   2109 
   2110     if (TypeIdx == 0)
   2111       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
   2112     else
   2113       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
   2114 
   2115     Observer.changedInstr(MI);
   2116     return Legalized;
   2117   case TargetOpcode::G_UITOFP:
   2118     Observer.changingInstr(MI);
   2119 
   2120     if (TypeIdx == 0)
   2121       widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
   2122     else
   2123       widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
   2124 
   2125     Observer.changedInstr(MI);
   2126     return Legalized;
   2127   case TargetOpcode::G_LOAD:
   2128   case TargetOpcode::G_SEXTLOAD:
   2129   case TargetOpcode::G_ZEXTLOAD:
   2130     Observer.changingInstr(MI);
   2131     widenScalarDst(MI, WideTy);
   2132     Observer.changedInstr(MI);
   2133     return Legalized;
   2134 
   2135   case TargetOpcode::G_STORE: {
   2136     if (TypeIdx != 0)
   2137       return UnableToLegalize;
   2138 
   2139     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
   2140     if (!Ty.isScalar())
   2141       return UnableToLegalize;
   2142 
   2143     Observer.changingInstr(MI);
   2144 
   2145     unsigned ExtType = Ty.getScalarSizeInBits() == 1 ?
   2146       TargetOpcode::G_ZEXT : TargetOpcode::G_ANYEXT;
   2147     widenScalarSrc(MI, WideTy, 0, ExtType);
   2148 
   2149     Observer.changedInstr(MI);
   2150     return Legalized;
   2151   }
   2152   case TargetOpcode::G_CONSTANT: {
   2153     MachineOperand &SrcMO = MI.getOperand(1);
   2154     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
   2155     unsigned ExtOpc = LI.getExtOpcodeForWideningConstant(
   2156         MRI.getType(MI.getOperand(0).getReg()));
   2157     assert((ExtOpc == TargetOpcode::G_ZEXT || ExtOpc == TargetOpcode::G_SEXT ||
   2158             ExtOpc == TargetOpcode::G_ANYEXT) &&
   2159            "Illegal Extend");
   2160     const APInt &SrcVal = SrcMO.getCImm()->getValue();
   2161     const APInt &Val = (ExtOpc == TargetOpcode::G_SEXT)
   2162                            ? SrcVal.sext(WideTy.getSizeInBits())
   2163                            : SrcVal.zext(WideTy.getSizeInBits());
   2164     Observer.changingInstr(MI);
   2165     SrcMO.setCImm(ConstantInt::get(Ctx, Val));
   2166 
   2167     widenScalarDst(MI, WideTy);
   2168     Observer.changedInstr(MI);
   2169     return Legalized;
   2170   }
   2171   case TargetOpcode::G_FCONSTANT: {
   2172     MachineOperand &SrcMO = MI.getOperand(1);
   2173     LLVMContext &Ctx = MIRBuilder.getMF().getFunction().getContext();
   2174     APFloat Val = SrcMO.getFPImm()->getValueAPF();
   2175     bool LosesInfo;
   2176     switch (WideTy.getSizeInBits()) {
   2177     case 32:
   2178       Val.convert(APFloat::IEEEsingle(), APFloat::rmNearestTiesToEven,
   2179                   &LosesInfo);
   2180       break;
   2181     case 64:
   2182       Val.convert(APFloat::IEEEdouble(), APFloat::rmNearestTiesToEven,
   2183                   &LosesInfo);
   2184       break;
   2185     default:
   2186       return UnableToLegalize;
   2187     }
   2188 
   2189     assert(!LosesInfo && "extend should always be lossless");
   2190 
   2191     Observer.changingInstr(MI);
   2192     SrcMO.setFPImm(ConstantFP::get(Ctx, Val));
   2193 
   2194     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
   2195     Observer.changedInstr(MI);
   2196     return Legalized;
   2197   }
   2198   case TargetOpcode::G_IMPLICIT_DEF: {
   2199     Observer.changingInstr(MI);
   2200     widenScalarDst(MI, WideTy);
   2201     Observer.changedInstr(MI);
   2202     return Legalized;
   2203   }
   2204   case TargetOpcode::G_BRCOND:
   2205     Observer.changingInstr(MI);
   2206     widenScalarSrc(MI, WideTy, 0, MIRBuilder.getBoolExtOp(false, false));
   2207     Observer.changedInstr(MI);
   2208     return Legalized;
   2209 
   2210   case TargetOpcode::G_FCMP:
   2211     Observer.changingInstr(MI);
   2212     if (TypeIdx == 0)
   2213       widenScalarDst(MI, WideTy);
   2214     else {
   2215       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_FPEXT);
   2216       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_FPEXT);
   2217     }
   2218     Observer.changedInstr(MI);
   2219     return Legalized;
   2220 
   2221   case TargetOpcode::G_ICMP:
   2222     Observer.changingInstr(MI);
   2223     if (TypeIdx == 0)
   2224       widenScalarDst(MI, WideTy);
   2225     else {
   2226       unsigned ExtOpcode = CmpInst::isSigned(static_cast<CmpInst::Predicate>(
   2227                                MI.getOperand(1).getPredicate()))
   2228                                ? TargetOpcode::G_SEXT
   2229                                : TargetOpcode::G_ZEXT;
   2230       widenScalarSrc(MI, WideTy, 2, ExtOpcode);
   2231       widenScalarSrc(MI, WideTy, 3, ExtOpcode);
   2232     }
   2233     Observer.changedInstr(MI);
   2234     return Legalized;
   2235 
   2236   case TargetOpcode::G_PTR_ADD:
   2237     assert(TypeIdx == 1 && "unable to legalize pointer of G_PTR_ADD");
   2238     Observer.changingInstr(MI);
   2239     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
   2240     Observer.changedInstr(MI);
   2241     return Legalized;
   2242 
   2243   case TargetOpcode::G_PHI: {
   2244     assert(TypeIdx == 0 && "Expecting only Idx 0");
   2245 
   2246     Observer.changingInstr(MI);
   2247     for (unsigned I = 1; I < MI.getNumOperands(); I += 2) {
   2248       MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
   2249       MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
   2250       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_ANYEXT);
   2251     }
   2252 
   2253     MachineBasicBlock &MBB = *MI.getParent();
   2254     MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
   2255     widenScalarDst(MI, WideTy);
   2256     Observer.changedInstr(MI);
   2257     return Legalized;
   2258   }
   2259   case TargetOpcode::G_EXTRACT_VECTOR_ELT: {
   2260     if (TypeIdx == 0) {
   2261       Register VecReg = MI.getOperand(1).getReg();
   2262       LLT VecTy = MRI.getType(VecReg);
   2263       Observer.changingInstr(MI);
   2264 
   2265       widenScalarSrc(MI, LLT::vector(VecTy.getNumElements(),
   2266                                      WideTy.getSizeInBits()),
   2267                      1, TargetOpcode::G_SEXT);
   2268 
   2269       widenScalarDst(MI, WideTy, 0);
   2270       Observer.changedInstr(MI);
   2271       return Legalized;
   2272     }
   2273 
   2274     if (TypeIdx != 2)
   2275       return UnableToLegalize;
   2276     Observer.changingInstr(MI);
   2277     // TODO: Probably should be zext
   2278     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_SEXT);
   2279     Observer.changedInstr(MI);
   2280     return Legalized;
   2281   }
   2282   case TargetOpcode::G_INSERT_VECTOR_ELT: {
   2283     if (TypeIdx == 1) {
   2284       Observer.changingInstr(MI);
   2285 
   2286       Register VecReg = MI.getOperand(1).getReg();
   2287       LLT VecTy = MRI.getType(VecReg);
   2288       LLT WideVecTy = LLT::vector(VecTy.getNumElements(), WideTy);
   2289 
   2290       widenScalarSrc(MI, WideVecTy, 1, TargetOpcode::G_ANYEXT);
   2291       widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ANYEXT);
   2292       widenScalarDst(MI, WideVecTy, 0);
   2293       Observer.changedInstr(MI);
   2294       return Legalized;
   2295     }
   2296 
   2297     if (TypeIdx == 2) {
   2298       Observer.changingInstr(MI);
   2299       // TODO: Probably should be zext
   2300       widenScalarSrc(MI, WideTy, 3, TargetOpcode::G_SEXT);
   2301       Observer.changedInstr(MI);
   2302       return Legalized;
   2303     }
   2304 
   2305     return UnableToLegalize;
   2306   }
   2307   case TargetOpcode::G_FADD:
   2308   case TargetOpcode::G_FMUL:
   2309   case TargetOpcode::G_FSUB:
   2310   case TargetOpcode::G_FMA:
   2311   case TargetOpcode::G_FMAD:
   2312   case TargetOpcode::G_FNEG:
   2313   case TargetOpcode::G_FABS:
   2314   case TargetOpcode::G_FCANONICALIZE:
   2315   case TargetOpcode::G_FMINNUM:
   2316   case TargetOpcode::G_FMAXNUM:
   2317   case TargetOpcode::G_FMINNUM_IEEE:
   2318   case TargetOpcode::G_FMAXNUM_IEEE:
   2319   case TargetOpcode::G_FMINIMUM:
   2320   case TargetOpcode::G_FMAXIMUM:
   2321   case TargetOpcode::G_FDIV:
   2322   case TargetOpcode::G_FREM:
   2323   case TargetOpcode::G_FCEIL:
   2324   case TargetOpcode::G_FFLOOR:
   2325   case TargetOpcode::G_FCOS:
   2326   case TargetOpcode::G_FSIN:
   2327   case TargetOpcode::G_FLOG10:
   2328   case TargetOpcode::G_FLOG:
   2329   case TargetOpcode::G_FLOG2:
   2330   case TargetOpcode::G_FRINT:
   2331   case TargetOpcode::G_FNEARBYINT:
   2332   case TargetOpcode::G_FSQRT:
   2333   case TargetOpcode::G_FEXP:
   2334   case TargetOpcode::G_FEXP2:
   2335   case TargetOpcode::G_FPOW:
   2336   case TargetOpcode::G_INTRINSIC_TRUNC:
   2337   case TargetOpcode::G_INTRINSIC_ROUND:
   2338   case TargetOpcode::G_INTRINSIC_ROUNDEVEN:
   2339     assert(TypeIdx == 0);
   2340     Observer.changingInstr(MI);
   2341 
   2342     for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
   2343       widenScalarSrc(MI, WideTy, I, TargetOpcode::G_FPEXT);
   2344 
   2345     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
   2346     Observer.changedInstr(MI);
   2347     return Legalized;
   2348   case TargetOpcode::G_FPOWI: {
   2349     if (TypeIdx != 0)
   2350       return UnableToLegalize;
   2351     Observer.changingInstr(MI);
   2352     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_FPEXT);
   2353     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
   2354     Observer.changedInstr(MI);
   2355     return Legalized;
   2356   }
   2357   case TargetOpcode::G_INTTOPTR:
   2358     if (TypeIdx != 1)
   2359       return UnableToLegalize;
   2360 
   2361     Observer.changingInstr(MI);
   2362     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
   2363     Observer.changedInstr(MI);
   2364     return Legalized;
   2365   case TargetOpcode::G_PTRTOINT:
   2366     if (TypeIdx != 0)
   2367       return UnableToLegalize;
   2368 
   2369     Observer.changingInstr(MI);
   2370     widenScalarDst(MI, WideTy, 0);
   2371     Observer.changedInstr(MI);
   2372     return Legalized;
   2373   case TargetOpcode::G_BUILD_VECTOR: {
   2374     Observer.changingInstr(MI);
   2375 
   2376     const LLT WideEltTy = TypeIdx == 1 ? WideTy : WideTy.getElementType();
   2377     for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
   2378       widenScalarSrc(MI, WideEltTy, I, TargetOpcode::G_ANYEXT);
   2379 
   2380     // Avoid changing the result vector type if the source element type was
   2381     // requested.
   2382     if (TypeIdx == 1) {
   2383       MI.setDesc(MIRBuilder.getTII().get(TargetOpcode::G_BUILD_VECTOR_TRUNC));
   2384     } else {
   2385       widenScalarDst(MI, WideTy, 0);
   2386     }
   2387 
   2388     Observer.changedInstr(MI);
   2389     return Legalized;
   2390   }
   2391   case TargetOpcode::G_SEXT_INREG:
   2392     if (TypeIdx != 0)
   2393       return UnableToLegalize;
   2394 
   2395     Observer.changingInstr(MI);
   2396     widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT);
   2397     widenScalarDst(MI, WideTy, 0, TargetOpcode::G_TRUNC);
   2398     Observer.changedInstr(MI);
   2399     return Legalized;
   2400   case TargetOpcode::G_PTRMASK: {
   2401     if (TypeIdx != 1)
   2402       return UnableToLegalize;
   2403     Observer.changingInstr(MI);
   2404     widenScalarSrc(MI, WideTy, 2, TargetOpcode::G_ZEXT);
   2405     Observer.changedInstr(MI);
   2406     return Legalized;
   2407   }
   2408   }
   2409 }
   2410 
   2411 static void getUnmergePieces(SmallVectorImpl<Register> &Pieces,
   2412                              MachineIRBuilder &B, Register Src, LLT Ty) {
   2413   auto Unmerge = B.buildUnmerge(Ty, Src);
   2414   for (int I = 0, E = Unmerge->getNumOperands() - 1; I != E; ++I)
   2415     Pieces.push_back(Unmerge.getReg(I));
   2416 }
   2417 
   2418 LegalizerHelper::LegalizeResult
   2419 LegalizerHelper::lowerBitcast(MachineInstr &MI) {
   2420   Register Dst = MI.getOperand(0).getReg();
   2421   Register Src = MI.getOperand(1).getReg();
   2422   LLT DstTy = MRI.getType(Dst);
   2423   LLT SrcTy = MRI.getType(Src);
   2424 
   2425   if (SrcTy.isVector()) {
   2426     LLT SrcEltTy = SrcTy.getElementType();
   2427     SmallVector<Register, 8> SrcRegs;
   2428 
   2429     if (DstTy.isVector()) {
   2430       int NumDstElt = DstTy.getNumElements();
   2431       int NumSrcElt = SrcTy.getNumElements();
   2432 
   2433       LLT DstEltTy = DstTy.getElementType();
   2434       LLT DstCastTy = DstEltTy; // Intermediate bitcast result type
   2435       LLT SrcPartTy = SrcEltTy; // Original unmerge result type.
   2436 
   2437       // If there's an element size mismatch, insert intermediate casts to match
   2438       // the result element type.
   2439       if (NumSrcElt < NumDstElt) { // Source element type is larger.
   2440         // %1:_(<4 x s8>) = G_BITCAST %0:_(<2 x s16>)
   2441         //
   2442         // =>
   2443         //
   2444         // %2:_(s16), %3:_(s16) = G_UNMERGE_VALUES %0
   2445         // %3:_(<2 x s8>) = G_BITCAST %2
   2446         // %4:_(<2 x s8>) = G_BITCAST %3
   2447         // %1:_(<4 x s16>) = G_CONCAT_VECTORS %3, %4
   2448         DstCastTy = LLT::vector(NumDstElt / NumSrcElt, DstEltTy);
   2449         SrcPartTy = SrcEltTy;
   2450       } else if (NumSrcElt > NumDstElt) { // Source element type is smaller.
   2451         //
   2452         // %1:_(<2 x s16>) = G_BITCAST %0:_(<4 x s8>)
   2453         //
   2454         // =>
   2455         //
   2456         // %2:_(<2 x s8>), %3:_(<2 x s8>) = G_UNMERGE_VALUES %0
   2457         // %3:_(s16) = G_BITCAST %2
   2458         // %4:_(s16) = G_BITCAST %3
   2459         // %1:_(<2 x s16>) = G_BUILD_VECTOR %3, %4
   2460         SrcPartTy = LLT::vector(NumSrcElt / NumDstElt, SrcEltTy);
   2461         DstCastTy = DstEltTy;
   2462       }
   2463 
   2464       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcPartTy);
   2465       for (Register &SrcReg : SrcRegs)
   2466         SrcReg = MIRBuilder.buildBitcast(DstCastTy, SrcReg).getReg(0);
   2467     } else
   2468       getUnmergePieces(SrcRegs, MIRBuilder, Src, SrcEltTy);
   2469 
   2470     MIRBuilder.buildMerge(Dst, SrcRegs);
   2471     MI.eraseFromParent();
   2472     return Legalized;
   2473   }
   2474 
   2475   if (DstTy.isVector()) {
   2476     SmallVector<Register, 8> SrcRegs;
   2477     getUnmergePieces(SrcRegs, MIRBuilder, Src, DstTy.getElementType());
   2478     MIRBuilder.buildMerge(Dst, SrcRegs);
   2479     MI.eraseFromParent();
   2480     return Legalized;
   2481   }
   2482 
   2483   return UnableToLegalize;
   2484 }
   2485 
   2486 /// Figure out the bit offset into a register when coercing a vector index for
   2487 /// the wide element type. This is only for the case when promoting vector to
   2488 /// one with larger elements.
   2489 //
   2490 ///
   2491 /// %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
   2492 /// %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
   2493 static Register getBitcastWiderVectorElementOffset(MachineIRBuilder &B,
   2494                                                    Register Idx,
   2495                                                    unsigned NewEltSize,
   2496                                                    unsigned OldEltSize) {
   2497   const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
   2498   LLT IdxTy = B.getMRI()->getType(Idx);
   2499 
   2500   // Now figure out the amount we need to shift to get the target bits.
   2501   auto OffsetMask = B.buildConstant(
   2502     IdxTy, ~(APInt::getAllOnesValue(IdxTy.getSizeInBits()) << Log2EltRatio));
   2503   auto OffsetIdx = B.buildAnd(IdxTy, Idx, OffsetMask);
   2504   return B.buildShl(IdxTy, OffsetIdx,
   2505                     B.buildConstant(IdxTy, Log2_32(OldEltSize))).getReg(0);
   2506 }
   2507 
   2508 /// Perform a G_EXTRACT_VECTOR_ELT in a different sized vector element. If this
   2509 /// is casting to a vector with a smaller element size, perform multiple element
   2510 /// extracts and merge the results. If this is coercing to a vector with larger
   2511 /// elements, index the bitcasted vector and extract the target element with bit
   2512 /// operations. This is intended to force the indexing in the native register
   2513 /// size for architectures that can dynamically index the register file.
   2514 LegalizerHelper::LegalizeResult
   2515 LegalizerHelper::bitcastExtractVectorElt(MachineInstr &MI, unsigned TypeIdx,
   2516                                          LLT CastTy) {
   2517   if (TypeIdx != 1)
   2518     return UnableToLegalize;
   2519 
   2520   Register Dst = MI.getOperand(0).getReg();
   2521   Register SrcVec = MI.getOperand(1).getReg();
   2522   Register Idx = MI.getOperand(2).getReg();
   2523   LLT SrcVecTy = MRI.getType(SrcVec);
   2524   LLT IdxTy = MRI.getType(Idx);
   2525 
   2526   LLT SrcEltTy = SrcVecTy.getElementType();
   2527   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
   2528   unsigned OldNumElts = SrcVecTy.getNumElements();
   2529 
   2530   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
   2531   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
   2532 
   2533   const unsigned NewEltSize = NewEltTy.getSizeInBits();
   2534   const unsigned OldEltSize = SrcEltTy.getSizeInBits();
   2535   if (NewNumElts > OldNumElts) {
   2536     // Decreasing the vector element size
   2537     //
   2538     // e.g. i64 = extract_vector_elt x:v2i64, y:i32
   2539     //  =>
   2540     //  v4i32:castx = bitcast x:v2i64
   2541     //
   2542     // i64 = bitcast
   2543     //   (v2i32 build_vector (i32 (extract_vector_elt castx, (2 * y))),
   2544     //                       (i32 (extract_vector_elt castx, (2 * y + 1)))
   2545     //
   2546     if (NewNumElts % OldNumElts != 0)
   2547       return UnableToLegalize;
   2548 
   2549     // Type of the intermediate result vector.
   2550     const unsigned NewEltsPerOldElt = NewNumElts / OldNumElts;
   2551     LLT MidTy = LLT::scalarOrVector(NewEltsPerOldElt, NewEltTy);
   2552 
   2553     auto NewEltsPerOldEltK = MIRBuilder.buildConstant(IdxTy, NewEltsPerOldElt);
   2554 
   2555     SmallVector<Register, 8> NewOps(NewEltsPerOldElt);
   2556     auto NewBaseIdx = MIRBuilder.buildMul(IdxTy, Idx, NewEltsPerOldEltK);
   2557 
   2558     for (unsigned I = 0; I < NewEltsPerOldElt; ++I) {
   2559       auto IdxOffset = MIRBuilder.buildConstant(IdxTy, I);
   2560       auto TmpIdx = MIRBuilder.buildAdd(IdxTy, NewBaseIdx, IdxOffset);
   2561       auto Elt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec, TmpIdx);
   2562       NewOps[I] = Elt.getReg(0);
   2563     }
   2564 
   2565     auto NewVec = MIRBuilder.buildBuildVector(MidTy, NewOps);
   2566     MIRBuilder.buildBitcast(Dst, NewVec);
   2567     MI.eraseFromParent();
   2568     return Legalized;
   2569   }
   2570 
   2571   if (NewNumElts < OldNumElts) {
   2572     if (NewEltSize % OldEltSize != 0)
   2573       return UnableToLegalize;
   2574 
   2575     // This only depends on powers of 2 because we use bit tricks to figure out
   2576     // the bit offset we need to shift to get the target element. A general
   2577     // expansion could emit division/multiply.
   2578     if (!isPowerOf2_32(NewEltSize / OldEltSize))
   2579       return UnableToLegalize;
   2580 
   2581     // Increasing the vector element size.
   2582     // %elt:_(small_elt) = G_EXTRACT_VECTOR_ELT %vec:_(<N x small_elt>), %idx
   2583     //
   2584     //   =>
   2585     //
   2586     // %cast = G_BITCAST %vec
   2587     // %scaled_idx = G_LSHR %idx, Log2(DstEltSize / SrcEltSize)
   2588     // %wide_elt  = G_EXTRACT_VECTOR_ELT %cast, %scaled_idx
   2589     // %offset_idx = G_AND %idx, ~(-1 << Log2(DstEltSize / SrcEltSize))
   2590     // %offset_bits = G_SHL %offset_idx, Log2(SrcEltSize)
   2591     // %elt_bits = G_LSHR %wide_elt, %offset_bits
   2592     // %elt = G_TRUNC %elt_bits
   2593 
   2594     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
   2595     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
   2596 
   2597     // Divide to get the index in the wider element type.
   2598     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
   2599 
   2600     Register WideElt = CastVec;
   2601     if (CastTy.isVector()) {
   2602       WideElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
   2603                                                      ScaledIdx).getReg(0);
   2604     }
   2605 
   2606     // Compute the bit offset into the register of the target element.
   2607     Register OffsetBits = getBitcastWiderVectorElementOffset(
   2608       MIRBuilder, Idx, NewEltSize, OldEltSize);
   2609 
   2610     // Shift the wide element to get the target element.
   2611     auto ExtractedBits = MIRBuilder.buildLShr(NewEltTy, WideElt, OffsetBits);
   2612     MIRBuilder.buildTrunc(Dst, ExtractedBits);
   2613     MI.eraseFromParent();
   2614     return Legalized;
   2615   }
   2616 
   2617   return UnableToLegalize;
   2618 }
   2619 
   2620 /// Emit code to insert \p InsertReg into \p TargetRet at \p OffsetBits in \p
   2621 /// TargetReg, while preserving other bits in \p TargetReg.
   2622 ///
   2623 /// (InsertReg << Offset) | (TargetReg & ~(-1 >> InsertReg.size()) << Offset)
   2624 static Register buildBitFieldInsert(MachineIRBuilder &B,
   2625                                     Register TargetReg, Register InsertReg,
   2626                                     Register OffsetBits) {
   2627   LLT TargetTy = B.getMRI()->getType(TargetReg);
   2628   LLT InsertTy = B.getMRI()->getType(InsertReg);
   2629   auto ZextVal = B.buildZExt(TargetTy, InsertReg);
   2630   auto ShiftedInsertVal = B.buildShl(TargetTy, ZextVal, OffsetBits);
   2631 
   2632   // Produce a bitmask of the value to insert
   2633   auto EltMask = B.buildConstant(
   2634     TargetTy, APInt::getLowBitsSet(TargetTy.getSizeInBits(),
   2635                                    InsertTy.getSizeInBits()));
   2636   // Shift it into position
   2637   auto ShiftedMask = B.buildShl(TargetTy, EltMask, OffsetBits);
   2638   auto InvShiftedMask = B.buildNot(TargetTy, ShiftedMask);
   2639 
   2640   // Clear out the bits in the wide element
   2641   auto MaskedOldElt = B.buildAnd(TargetTy, TargetReg, InvShiftedMask);
   2642 
   2643   // The value to insert has all zeros already, so stick it into the masked
   2644   // wide element.
   2645   return B.buildOr(TargetTy, MaskedOldElt, ShiftedInsertVal).getReg(0);
   2646 }
   2647 
   2648 /// Perform a G_INSERT_VECTOR_ELT in a different sized vector element. If this
   2649 /// is increasing the element size, perform the indexing in the target element
   2650 /// type, and use bit operations to insert at the element position. This is
   2651 /// intended for architectures that can dynamically index the register file and
   2652 /// want to force indexing in the native register size.
   2653 LegalizerHelper::LegalizeResult
   2654 LegalizerHelper::bitcastInsertVectorElt(MachineInstr &MI, unsigned TypeIdx,
   2655                                         LLT CastTy) {
   2656   if (TypeIdx != 0)
   2657     return UnableToLegalize;
   2658 
   2659   Register Dst = MI.getOperand(0).getReg();
   2660   Register SrcVec = MI.getOperand(1).getReg();
   2661   Register Val = MI.getOperand(2).getReg();
   2662   Register Idx = MI.getOperand(3).getReg();
   2663 
   2664   LLT VecTy = MRI.getType(Dst);
   2665   LLT IdxTy = MRI.getType(Idx);
   2666 
   2667   LLT VecEltTy = VecTy.getElementType();
   2668   LLT NewEltTy = CastTy.isVector() ? CastTy.getElementType() : CastTy;
   2669   const unsigned NewEltSize = NewEltTy.getSizeInBits();
   2670   const unsigned OldEltSize = VecEltTy.getSizeInBits();
   2671 
   2672   unsigned NewNumElts = CastTy.isVector() ? CastTy.getNumElements() : 1;
   2673   unsigned OldNumElts = VecTy.getNumElements();
   2674 
   2675   Register CastVec = MIRBuilder.buildBitcast(CastTy, SrcVec).getReg(0);
   2676   if (NewNumElts < OldNumElts) {
   2677     if (NewEltSize % OldEltSize != 0)
   2678       return UnableToLegalize;
   2679 
   2680     // This only depends on powers of 2 because we use bit tricks to figure out
   2681     // the bit offset we need to shift to get the target element. A general
   2682     // expansion could emit division/multiply.
   2683     if (!isPowerOf2_32(NewEltSize / OldEltSize))
   2684       return UnableToLegalize;
   2685 
   2686     const unsigned Log2EltRatio = Log2_32(NewEltSize / OldEltSize);
   2687     auto Log2Ratio = MIRBuilder.buildConstant(IdxTy, Log2EltRatio);
   2688 
   2689     // Divide to get the index in the wider element type.
   2690     auto ScaledIdx = MIRBuilder.buildLShr(IdxTy, Idx, Log2Ratio);
   2691 
   2692     Register ExtractedElt = CastVec;
   2693     if (CastTy.isVector()) {
   2694       ExtractedElt = MIRBuilder.buildExtractVectorElement(NewEltTy, CastVec,
   2695                                                           ScaledIdx).getReg(0);
   2696     }
   2697 
   2698     // Compute the bit offset into the register of the target element.
   2699     Register OffsetBits = getBitcastWiderVectorElementOffset(
   2700       MIRBuilder, Idx, NewEltSize, OldEltSize);
   2701 
   2702     Register InsertedElt = buildBitFieldInsert(MIRBuilder, ExtractedElt,
   2703                                                Val, OffsetBits);
   2704     if (CastTy.isVector()) {
   2705       InsertedElt = MIRBuilder.buildInsertVectorElement(
   2706         CastTy, CastVec, InsertedElt, ScaledIdx).getReg(0);
   2707     }
   2708 
   2709     MIRBuilder.buildBitcast(Dst, InsertedElt);
   2710     MI.eraseFromParent();
   2711     return Legalized;
   2712   }
   2713 
   2714   return UnableToLegalize;
   2715 }
   2716 
   2717 LegalizerHelper::LegalizeResult
   2718 LegalizerHelper::lowerLoad(MachineInstr &MI) {
   2719   // Lower to a memory-width G_LOAD and a G_SEXT/G_ZEXT/G_ANYEXT
   2720   Register DstReg = MI.getOperand(0).getReg();
   2721   Register PtrReg = MI.getOperand(1).getReg();
   2722   LLT DstTy = MRI.getType(DstReg);
   2723   auto &MMO = **MI.memoperands_begin();
   2724 
   2725   if (DstTy.getSizeInBits() == MMO.getSizeInBits()) {
   2726     if (MI.getOpcode() == TargetOpcode::G_LOAD) {
   2727       // This load needs splitting into power of 2 sized loads.
   2728       if (DstTy.isVector())
   2729         return UnableToLegalize;
   2730       if (isPowerOf2_32(DstTy.getSizeInBits()))
   2731         return UnableToLegalize; // Don't know what we're being asked to do.
   2732 
   2733       // Our strategy here is to generate anyextending loads for the smaller
   2734       // types up to next power-2 result type, and then combine the two larger
   2735       // result values together, before truncating back down to the non-pow-2
   2736       // type.
   2737       // E.g. v1 = i24 load =>
   2738       // v2 = i32 zextload (2 byte)
   2739       // v3 = i32 load (1 byte)
   2740       // v4 = i32 shl v3, 16
   2741       // v5 = i32 or v4, v2
   2742       // v1 = i24 trunc v5
   2743       // By doing this we generate the correct truncate which should get
   2744       // combined away as an artifact with a matching extend.
   2745       uint64_t LargeSplitSize = PowerOf2Floor(DstTy.getSizeInBits());
   2746       uint64_t SmallSplitSize = DstTy.getSizeInBits() - LargeSplitSize;
   2747 
   2748       MachineFunction &MF = MIRBuilder.getMF();
   2749       MachineMemOperand *LargeMMO =
   2750         MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
   2751       MachineMemOperand *SmallMMO = MF.getMachineMemOperand(
   2752         &MMO, LargeSplitSize / 8, SmallSplitSize / 8);
   2753 
   2754       LLT PtrTy = MRI.getType(PtrReg);
   2755       unsigned AnyExtSize = NextPowerOf2(DstTy.getSizeInBits());
   2756       LLT AnyExtTy = LLT::scalar(AnyExtSize);
   2757       Register LargeLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
   2758       Register SmallLdReg = MRI.createGenericVirtualRegister(AnyExtTy);
   2759       auto LargeLoad = MIRBuilder.buildLoadInstr(
   2760         TargetOpcode::G_ZEXTLOAD, LargeLdReg, PtrReg, *LargeMMO);
   2761 
   2762       auto OffsetCst = MIRBuilder.buildConstant(
   2763         LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
   2764       Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
   2765       auto SmallPtr =
   2766         MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
   2767       auto SmallLoad = MIRBuilder.buildLoad(SmallLdReg, SmallPtr.getReg(0),
   2768                                             *SmallMMO);
   2769 
   2770       auto ShiftAmt = MIRBuilder.buildConstant(AnyExtTy, LargeSplitSize);
   2771       auto Shift = MIRBuilder.buildShl(AnyExtTy, SmallLoad, ShiftAmt);
   2772       auto Or = MIRBuilder.buildOr(AnyExtTy, Shift, LargeLoad);
   2773       MIRBuilder.buildTrunc(DstReg, {Or.getReg(0)});
   2774       MI.eraseFromParent();
   2775       return Legalized;
   2776     }
   2777 
   2778     MIRBuilder.buildLoad(DstReg, PtrReg, MMO);
   2779     MI.eraseFromParent();
   2780     return Legalized;
   2781   }
   2782 
   2783   if (DstTy.isScalar()) {
   2784     Register TmpReg =
   2785       MRI.createGenericVirtualRegister(LLT::scalar(MMO.getSizeInBits()));
   2786     MIRBuilder.buildLoad(TmpReg, PtrReg, MMO);
   2787     switch (MI.getOpcode()) {
   2788     default:
   2789       llvm_unreachable("Unexpected opcode");
   2790     case TargetOpcode::G_LOAD:
   2791       MIRBuilder.buildAnyExtOrTrunc(DstReg, TmpReg);
   2792       break;
   2793     case TargetOpcode::G_SEXTLOAD:
   2794       MIRBuilder.buildSExt(DstReg, TmpReg);
   2795       break;
   2796     case TargetOpcode::G_ZEXTLOAD:
   2797       MIRBuilder.buildZExt(DstReg, TmpReg);
   2798       break;
   2799     }
   2800 
   2801     MI.eraseFromParent();
   2802     return Legalized;
   2803   }
   2804 
   2805   return UnableToLegalize;
   2806 }
   2807 
   2808 LegalizerHelper::LegalizeResult
   2809 LegalizerHelper::lowerStore(MachineInstr &MI) {
   2810   // Lower a non-power of 2 store into multiple pow-2 stores.
   2811   // E.g. split an i24 store into an i16 store + i8 store.
   2812   // We do this by first extending the stored value to the next largest power
   2813   // of 2 type, and then using truncating stores to store the components.
   2814   // By doing this, likewise with G_LOAD, generate an extend that can be
   2815   // artifact-combined away instead of leaving behind extracts.
   2816   Register SrcReg = MI.getOperand(0).getReg();
   2817   Register PtrReg = MI.getOperand(1).getReg();
   2818   LLT SrcTy = MRI.getType(SrcReg);
   2819   MachineMemOperand &MMO = **MI.memoperands_begin();
   2820   if (SrcTy.getSizeInBits() != MMO.getSizeInBits())
   2821     return UnableToLegalize;
   2822   if (SrcTy.isVector())
   2823     return UnableToLegalize;
   2824   if (isPowerOf2_32(SrcTy.getSizeInBits()))
   2825     return UnableToLegalize; // Don't know what we're being asked to do.
   2826 
   2827   // Extend to the next pow-2.
   2828   const LLT ExtendTy = LLT::scalar(NextPowerOf2(SrcTy.getSizeInBits()));
   2829   auto ExtVal = MIRBuilder.buildAnyExt(ExtendTy, SrcReg);
   2830 
   2831   // Obtain the smaller value by shifting away the larger value.
   2832   uint64_t LargeSplitSize = PowerOf2Floor(SrcTy.getSizeInBits());
   2833   uint64_t SmallSplitSize = SrcTy.getSizeInBits() - LargeSplitSize;
   2834   auto ShiftAmt = MIRBuilder.buildConstant(ExtendTy, LargeSplitSize);
   2835   auto SmallVal = MIRBuilder.buildLShr(ExtendTy, ExtVal, ShiftAmt);
   2836 
   2837   // Generate the PtrAdd and truncating stores.
   2838   LLT PtrTy = MRI.getType(PtrReg);
   2839   auto OffsetCst = MIRBuilder.buildConstant(
   2840     LLT::scalar(PtrTy.getSizeInBits()), LargeSplitSize / 8);
   2841   Register PtrAddReg = MRI.createGenericVirtualRegister(PtrTy);
   2842   auto SmallPtr =
   2843     MIRBuilder.buildPtrAdd(PtrAddReg, PtrReg, OffsetCst.getReg(0));
   2844 
   2845   MachineFunction &MF = MIRBuilder.getMF();
   2846   MachineMemOperand *LargeMMO =
   2847     MF.getMachineMemOperand(&MMO, 0, LargeSplitSize / 8);
   2848   MachineMemOperand *SmallMMO =
   2849     MF.getMachineMemOperand(&MMO, LargeSplitSize / 8, SmallSplitSize / 8);
   2850   MIRBuilder.buildStore(ExtVal.getReg(0), PtrReg, *LargeMMO);
   2851   MIRBuilder.buildStore(SmallVal.getReg(0), SmallPtr.getReg(0), *SmallMMO);
   2852   MI.eraseFromParent();
   2853   return Legalized;
   2854 }
   2855 
   2856 LegalizerHelper::LegalizeResult
   2857 LegalizerHelper::bitcast(MachineInstr &MI, unsigned TypeIdx, LLT CastTy) {
   2858   switch (MI.getOpcode()) {
   2859   case TargetOpcode::G_LOAD: {
   2860     if (TypeIdx != 0)
   2861       return UnableToLegalize;
   2862 
   2863     Observer.changingInstr(MI);
   2864     bitcastDst(MI, CastTy, 0);
   2865     Observer.changedInstr(MI);
   2866     return Legalized;
   2867   }
   2868   case TargetOpcode::G_STORE: {
   2869     if (TypeIdx != 0)
   2870       return UnableToLegalize;
   2871 
   2872     Observer.changingInstr(MI);
   2873     bitcastSrc(MI, CastTy, 0);
   2874     Observer.changedInstr(MI);
   2875     return Legalized;
   2876   }
   2877   case TargetOpcode::G_SELECT: {
   2878     if (TypeIdx != 0)
   2879       return UnableToLegalize;
   2880 
   2881     if (MRI.getType(MI.getOperand(1).getReg()).isVector()) {
   2882       LLVM_DEBUG(
   2883           dbgs() << "bitcast action not implemented for vector select\n");
   2884       return UnableToLegalize;
   2885     }
   2886 
   2887     Observer.changingInstr(MI);
   2888     bitcastSrc(MI, CastTy, 2);
   2889     bitcastSrc(MI, CastTy, 3);
   2890     bitcastDst(MI, CastTy, 0);
   2891     Observer.changedInstr(MI);
   2892     return Legalized;
   2893   }
   2894   case TargetOpcode::G_AND:
   2895   case TargetOpcode::G_OR:
   2896   case TargetOpcode::G_XOR: {
   2897     Observer.changingInstr(MI);
   2898     bitcastSrc(MI, CastTy, 1);
   2899     bitcastSrc(MI, CastTy, 2);
   2900     bitcastDst(MI, CastTy, 0);
   2901     Observer.changedInstr(MI);
   2902     return Legalized;
   2903   }
   2904   case TargetOpcode::G_EXTRACT_VECTOR_ELT:
   2905     return bitcastExtractVectorElt(MI, TypeIdx, CastTy);
   2906   case TargetOpcode::G_INSERT_VECTOR_ELT:
   2907     return bitcastInsertVectorElt(MI, TypeIdx, CastTy);
   2908   default:
   2909     return UnableToLegalize;
   2910   }
   2911 }
   2912 
   2913 // Legalize an instruction by changing the opcode in place.
   2914 void LegalizerHelper::changeOpcode(MachineInstr &MI, unsigned NewOpcode) {
   2915     Observer.changingInstr(MI);
   2916     MI.setDesc(MIRBuilder.getTII().get(NewOpcode));
   2917     Observer.changedInstr(MI);
   2918 }
   2919 
   2920 LegalizerHelper::LegalizeResult
   2921 LegalizerHelper::lower(MachineInstr &MI, unsigned TypeIdx, LLT LowerHintTy) {
   2922   using namespace TargetOpcode;
   2923 
   2924   switch(MI.getOpcode()) {
   2925   default:
   2926     return UnableToLegalize;
   2927   case TargetOpcode::G_BITCAST:
   2928     return lowerBitcast(MI);
   2929   case TargetOpcode::G_SREM:
   2930   case TargetOpcode::G_UREM: {
   2931     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
   2932     auto Quot =
   2933         MIRBuilder.buildInstr(MI.getOpcode() == G_SREM ? G_SDIV : G_UDIV, {Ty},
   2934                               {MI.getOperand(1), MI.getOperand(2)});
   2935 
   2936     auto Prod = MIRBuilder.buildMul(Ty, Quot, MI.getOperand(2));
   2937     MIRBuilder.buildSub(MI.getOperand(0), MI.getOperand(1), Prod);
   2938     MI.eraseFromParent();
   2939     return Legalized;
   2940   }
   2941   case TargetOpcode::G_SADDO:
   2942   case TargetOpcode::G_SSUBO:
   2943     return lowerSADDO_SSUBO(MI);
   2944   case TargetOpcode::G_UMULH:
   2945   case TargetOpcode::G_SMULH:
   2946     return lowerSMULH_UMULH(MI);
   2947   case TargetOpcode::G_SMULO:
   2948   case TargetOpcode::G_UMULO: {
   2949     // Generate G_UMULH/G_SMULH to check for overflow and a normal G_MUL for the
   2950     // result.
   2951     Register Res = MI.getOperand(0).getReg();
   2952     Register Overflow = MI.getOperand(1).getReg();
   2953     Register LHS = MI.getOperand(2).getReg();
   2954     Register RHS = MI.getOperand(3).getReg();
   2955     LLT Ty = MRI.getType(Res);
   2956 
   2957     unsigned Opcode = MI.getOpcode() == TargetOpcode::G_SMULO
   2958                           ? TargetOpcode::G_SMULH
   2959                           : TargetOpcode::G_UMULH;
   2960 
   2961     Observer.changingInstr(MI);
   2962     const auto &TII = MIRBuilder.getTII();
   2963     MI.setDesc(TII.get(TargetOpcode::G_MUL));
   2964     MI.RemoveOperand(1);
   2965     Observer.changedInstr(MI);
   2966 
   2967     auto HiPart = MIRBuilder.buildInstr(Opcode, {Ty}, {LHS, RHS});
   2968     auto Zero = MIRBuilder.buildConstant(Ty, 0);
   2969 
   2970     // Move insert point forward so we can use the Res register if needed.
   2971     MIRBuilder.setInsertPt(MIRBuilder.getMBB(), ++MIRBuilder.getInsertPt());
   2972 
   2973     // For *signed* multiply, overflow is detected by checking:
   2974     // (hi != (lo >> bitwidth-1))
   2975     if (Opcode == TargetOpcode::G_SMULH) {
   2976       auto ShiftAmt = MIRBuilder.buildConstant(Ty, Ty.getSizeInBits() - 1);
   2977       auto Shifted = MIRBuilder.buildAShr(Ty, Res, ShiftAmt);
   2978       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Shifted);
   2979     } else {
   2980       MIRBuilder.buildICmp(CmpInst::ICMP_NE, Overflow, HiPart, Zero);
   2981     }
   2982     return Legalized;
   2983   }
   2984   case TargetOpcode::G_FNEG: {
   2985     Register Res = MI.getOperand(0).getReg();
   2986     LLT Ty = MRI.getType(Res);
   2987 
   2988     // TODO: Handle vector types once we are able to
   2989     // represent them.
   2990     if (Ty.isVector())
   2991       return UnableToLegalize;
   2992     auto SignMask =
   2993         MIRBuilder.buildConstant(Ty, APInt::getSignMask(Ty.getSizeInBits()));
   2994     Register SubByReg = MI.getOperand(1).getReg();
   2995     MIRBuilder.buildXor(Res, SubByReg, SignMask);
   2996     MI.eraseFromParent();
   2997     return Legalized;
   2998   }
   2999   case TargetOpcode::G_FSUB: {
   3000     Register Res = MI.getOperand(0).getReg();
   3001     LLT Ty = MRI.getType(Res);
   3002 
   3003     // Lower (G_FSUB LHS, RHS) to (G_FADD LHS, (G_FNEG RHS)).
   3004     // First, check if G_FNEG is marked as Lower. If so, we may
   3005     // end up with an infinite loop as G_FSUB is used to legalize G_FNEG.
   3006     if (LI.getAction({G_FNEG, {Ty}}).Action == Lower)
   3007       return UnableToLegalize;
   3008     Register LHS = MI.getOperand(1).getReg();
   3009     Register RHS = MI.getOperand(2).getReg();
   3010     Register Neg = MRI.createGenericVirtualRegister(Ty);
   3011     MIRBuilder.buildFNeg(Neg, RHS);
   3012     MIRBuilder.buildFAdd(Res, LHS, Neg, MI.getFlags());
   3013     MI.eraseFromParent();
   3014     return Legalized;
   3015   }
   3016   case TargetOpcode::G_FMAD:
   3017     return lowerFMad(MI);
   3018   case TargetOpcode::G_FFLOOR:
   3019     return lowerFFloor(MI);
   3020   case TargetOpcode::G_INTRINSIC_ROUND:
   3021     return lowerIntrinsicRound(MI);
   3022   case TargetOpcode::G_INTRINSIC_ROUNDEVEN: {
   3023     // Since round even is the assumed rounding mode for unconstrained FP
   3024     // operations, rint and roundeven are the same operation.
   3025     changeOpcode(MI, TargetOpcode::G_FRINT);
   3026     return Legalized;
   3027   }
   3028   case TargetOpcode::G_ATOMIC_CMPXCHG_WITH_SUCCESS: {
   3029     Register OldValRes = MI.getOperand(0).getReg();
   3030     Register SuccessRes = MI.getOperand(1).getReg();
   3031     Register Addr = MI.getOperand(2).getReg();
   3032     Register CmpVal = MI.getOperand(3).getReg();
   3033     Register NewVal = MI.getOperand(4).getReg();
   3034     MIRBuilder.buildAtomicCmpXchg(OldValRes, Addr, CmpVal, NewVal,
   3035                                   **MI.memoperands_begin());
   3036     MIRBuilder.buildICmp(CmpInst::ICMP_EQ, SuccessRes, OldValRes, CmpVal);
   3037     MI.eraseFromParent();
   3038     return Legalized;
   3039   }
   3040   case TargetOpcode::G_LOAD:
   3041   case TargetOpcode::G_SEXTLOAD:
   3042   case TargetOpcode::G_ZEXTLOAD:
   3043     return lowerLoad(MI);
   3044   case TargetOpcode::G_STORE:
   3045     return lowerStore(MI);
   3046   case TargetOpcode::G_CTLZ_ZERO_UNDEF:
   3047   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
   3048   case TargetOpcode::G_CTLZ:
   3049   case TargetOpcode::G_CTTZ:
   3050   case TargetOpcode::G_CTPOP:
   3051     return lowerBitCount(MI);
   3052   case G_UADDO: {
   3053     Register Res = MI.getOperand(0).getReg();
   3054     Register CarryOut = MI.getOperand(1).getReg();
   3055     Register LHS = MI.getOperand(2).getReg();
   3056     Register RHS = MI.getOperand(3).getReg();
   3057 
   3058     MIRBuilder.buildAdd(Res, LHS, RHS);
   3059     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, RHS);
   3060 
   3061     MI.eraseFromParent();
   3062     return Legalized;
   3063   }
   3064   case G_UADDE: {
   3065     Register Res = MI.getOperand(0).getReg();
   3066     Register CarryOut = MI.getOperand(1).getReg();
   3067     Register LHS = MI.getOperand(2).getReg();
   3068     Register RHS = MI.getOperand(3).getReg();
   3069     Register CarryIn = MI.getOperand(4).getReg();
   3070     LLT Ty = MRI.getType(Res);
   3071 
   3072     auto TmpRes = MIRBuilder.buildAdd(Ty, LHS, RHS);
   3073     auto ZExtCarryIn = MIRBuilder.buildZExt(Ty, CarryIn);
   3074     MIRBuilder.buildAdd(Res, TmpRes, ZExtCarryIn);
   3075     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CarryOut, Res, LHS);
   3076 
   3077     MI.eraseFromParent();
   3078     return Legalized;
   3079   }
   3080   case G_USUBO: {
   3081     Register Res = MI.getOperand(0).getReg();
   3082     Register BorrowOut = MI.getOperand(1).getReg();
   3083     Register LHS = MI.getOperand(2).getReg();
   3084     Register RHS = MI.getOperand(3).getReg();
   3085 
   3086     MIRBuilder.buildSub(Res, LHS, RHS);
   3087     MIRBuilder.buildICmp(CmpInst::ICMP_ULT, BorrowOut, LHS, RHS);
   3088 
   3089     MI.eraseFromParent();
   3090     return Legalized;
   3091   }
   3092   case G_USUBE: {
   3093     Register Res = MI.getOperand(0).getReg();
   3094     Register BorrowOut = MI.getOperand(1).getReg();
   3095     Register LHS = MI.getOperand(2).getReg();
   3096     Register RHS = MI.getOperand(3).getReg();
   3097     Register BorrowIn = MI.getOperand(4).getReg();
   3098     const LLT CondTy = MRI.getType(BorrowOut);
   3099     const LLT Ty = MRI.getType(Res);
   3100 
   3101     auto TmpRes = MIRBuilder.buildSub(Ty, LHS, RHS);
   3102     auto ZExtBorrowIn = MIRBuilder.buildZExt(Ty, BorrowIn);
   3103     MIRBuilder.buildSub(Res, TmpRes, ZExtBorrowIn);
   3104 
   3105     auto LHS_EQ_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, CondTy, LHS, RHS);
   3106     auto LHS_ULT_RHS = MIRBuilder.buildICmp(CmpInst::ICMP_ULT, CondTy, LHS, RHS);
   3107     MIRBuilder.buildSelect(BorrowOut, LHS_EQ_RHS, BorrowIn, LHS_ULT_RHS);
   3108 
   3109     MI.eraseFromParent();
   3110     return Legalized;
   3111   }
   3112   case G_UITOFP:
   3113     return lowerUITOFP(MI);
   3114   case G_SITOFP:
   3115     return lowerSITOFP(MI);
   3116   case G_FPTOUI:
   3117     return lowerFPTOUI(MI);
   3118   case G_FPTOSI:
   3119     return lowerFPTOSI(MI);
   3120   case G_FPTRUNC:
   3121     return lowerFPTRUNC(MI);
   3122   case G_FPOWI:
   3123     return lowerFPOWI(MI);
   3124   case G_SMIN:
   3125   case G_SMAX:
   3126   case G_UMIN:
   3127   case G_UMAX:
   3128     return lowerMinMax(MI);
   3129   case G_FCOPYSIGN:
   3130     return lowerFCopySign(MI);
   3131   case G_FMINNUM:
   3132   case G_FMAXNUM:
   3133     return lowerFMinNumMaxNum(MI);
   3134   case G_MERGE_VALUES:
   3135     return lowerMergeValues(MI);
   3136   case G_UNMERGE_VALUES:
   3137     return lowerUnmergeValues(MI);
   3138   case TargetOpcode::G_SEXT_INREG: {
   3139     assert(MI.getOperand(2).isImm() && "Expected immediate");
   3140     int64_t SizeInBits = MI.getOperand(2).getImm();
   3141 
   3142     Register DstReg = MI.getOperand(0).getReg();
   3143     Register SrcReg = MI.getOperand(1).getReg();
   3144     LLT DstTy = MRI.getType(DstReg);
   3145     Register TmpRes = MRI.createGenericVirtualRegister(DstTy);
   3146 
   3147     auto MIBSz = MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - SizeInBits);
   3148     MIRBuilder.buildShl(TmpRes, SrcReg, MIBSz->getOperand(0));
   3149     MIRBuilder.buildAShr(DstReg, TmpRes, MIBSz->getOperand(0));
   3150     MI.eraseFromParent();
   3151     return Legalized;
   3152   }
   3153   case G_EXTRACT_VECTOR_ELT:
   3154   case G_INSERT_VECTOR_ELT:
   3155     return lowerExtractInsertVectorElt(MI);
   3156   case G_SHUFFLE_VECTOR:
   3157     return lowerShuffleVector(MI);
   3158   case G_DYN_STACKALLOC:
   3159     return lowerDynStackAlloc(MI);
   3160   case G_EXTRACT:
   3161     return lowerExtract(MI);
   3162   case G_INSERT:
   3163     return lowerInsert(MI);
   3164   case G_BSWAP:
   3165     return lowerBswap(MI);
   3166   case G_BITREVERSE:
   3167     return lowerBitreverse(MI);
   3168   case G_READ_REGISTER:
   3169   case G_WRITE_REGISTER:
   3170     return lowerReadWriteRegister(MI);
   3171   case G_UADDSAT:
   3172   case G_USUBSAT: {
   3173     // Try to make a reasonable guess about which lowering strategy to use. The
   3174     // target can override this with custom lowering and calling the
   3175     // implementation functions.
   3176     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
   3177     if (LI.isLegalOrCustom({G_UMIN, Ty}))
   3178       return lowerAddSubSatToMinMax(MI);
   3179     return lowerAddSubSatToAddoSubo(MI);
   3180   }
   3181   case G_SADDSAT:
   3182   case G_SSUBSAT: {
   3183     LLT Ty = MRI.getType(MI.getOperand(0).getReg());
   3184 
   3185     // FIXME: It would probably make more sense to see if G_SADDO is preferred,
   3186     // since it's a shorter expansion. However, we would need to figure out the
   3187     // preferred boolean type for the carry out for the query.
   3188     if (LI.isLegalOrCustom({G_SMIN, Ty}) && LI.isLegalOrCustom({G_SMAX, Ty}))
   3189       return lowerAddSubSatToMinMax(MI);
   3190     return lowerAddSubSatToAddoSubo(MI);
   3191   }
   3192   case G_SSHLSAT:
   3193   case G_USHLSAT:
   3194     return lowerShlSat(MI);
   3195   case G_ABS: {
   3196     // Expand %res = G_ABS %a into:
   3197     // %v1 = G_ASHR %a, scalar_size-1
   3198     // %v2 = G_ADD %a, %v1
   3199     // %res = G_XOR %v2, %v1
   3200     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
   3201     Register OpReg = MI.getOperand(1).getReg();
   3202     auto ShiftAmt =
   3203         MIRBuilder.buildConstant(DstTy, DstTy.getScalarSizeInBits() - 1);
   3204     auto Shift =
   3205         MIRBuilder.buildAShr(DstTy, OpReg, ShiftAmt);
   3206     auto Add = MIRBuilder.buildAdd(DstTy, OpReg, Shift);
   3207     MIRBuilder.buildXor(MI.getOperand(0).getReg(), Add, Shift);
   3208     MI.eraseFromParent();
   3209     return Legalized;
   3210   }
   3211   case G_SELECT:
   3212     return lowerSelect(MI);
   3213   case G_SDIVREM:
   3214   case G_UDIVREM:
   3215     return lowerDIVREM(MI);
   3216   case G_FSHL:
   3217   case G_FSHR:
   3218     return lowerFunnelShift(MI);
   3219   case G_ROTL:
   3220   case G_ROTR:
   3221     return lowerRotate(MI);
   3222   }
   3223 }
   3224 
   3225 Align LegalizerHelper::getStackTemporaryAlignment(LLT Ty,
   3226                                                   Align MinAlign) const {
   3227   // FIXME: We're missing a way to go back from LLT to llvm::Type to query the
   3228   // datalayout for the preferred alignment. Also there should be a target hook
   3229   // for this to allow targets to reduce the alignment and ignore the
   3230   // datalayout. e.g. AMDGPU should always use a 4-byte alignment, regardless of
   3231   // the type.
   3232   return std::max(Align(PowerOf2Ceil(Ty.getSizeInBytes())), MinAlign);
   3233 }
   3234 
   3235 MachineInstrBuilder
   3236 LegalizerHelper::createStackTemporary(TypeSize Bytes, Align Alignment,
   3237                                       MachinePointerInfo &PtrInfo) {
   3238   MachineFunction &MF = MIRBuilder.getMF();
   3239   const DataLayout &DL = MIRBuilder.getDataLayout();
   3240   int FrameIdx = MF.getFrameInfo().CreateStackObject(Bytes, Alignment, false);
   3241 
   3242   unsigned AddrSpace = DL.getAllocaAddrSpace();
   3243   LLT FramePtrTy = LLT::pointer(AddrSpace, DL.getPointerSizeInBits(AddrSpace));
   3244 
   3245   PtrInfo = MachinePointerInfo::getFixedStack(MF, FrameIdx);
   3246   return MIRBuilder.buildFrameIndex(FramePtrTy, FrameIdx);
   3247 }
   3248 
   3249 static Register clampDynamicVectorIndex(MachineIRBuilder &B, Register IdxReg,
   3250                                         LLT VecTy) {
   3251   int64_t IdxVal;
   3252   if (mi_match(IdxReg, *B.getMRI(), m_ICst(IdxVal)))
   3253     return IdxReg;
   3254 
   3255   LLT IdxTy = B.getMRI()->getType(IdxReg);
   3256   unsigned NElts = VecTy.getNumElements();
   3257   if (isPowerOf2_32(NElts)) {
   3258     APInt Imm = APInt::getLowBitsSet(IdxTy.getSizeInBits(), Log2_32(NElts));
   3259     return B.buildAnd(IdxTy, IdxReg, B.buildConstant(IdxTy, Imm)).getReg(0);
   3260   }
   3261 
   3262   return B.buildUMin(IdxTy, IdxReg, B.buildConstant(IdxTy, NElts - 1))
   3263       .getReg(0);
   3264 }
   3265 
   3266 Register LegalizerHelper::getVectorElementPointer(Register VecPtr, LLT VecTy,
   3267                                                   Register Index) {
   3268   LLT EltTy = VecTy.getElementType();
   3269 
   3270   // Calculate the element offset and add it to the pointer.
   3271   unsigned EltSize = EltTy.getSizeInBits() / 8; // FIXME: should be ABI size.
   3272   assert(EltSize * 8 == EltTy.getSizeInBits() &&
   3273          "Converting bits to bytes lost precision");
   3274 
   3275   Index = clampDynamicVectorIndex(MIRBuilder, Index, VecTy);
   3276 
   3277   LLT IdxTy = MRI.getType(Index);
   3278   auto Mul = MIRBuilder.buildMul(IdxTy, Index,
   3279                                  MIRBuilder.buildConstant(IdxTy, EltSize));
   3280 
   3281   LLT PtrTy = MRI.getType(VecPtr);
   3282   return MIRBuilder.buildPtrAdd(PtrTy, VecPtr, Mul).getReg(0);
   3283 }
   3284 
   3285 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorImplicitDef(
   3286     MachineInstr &MI, unsigned TypeIdx, LLT NarrowTy) {
   3287   Register DstReg = MI.getOperand(0).getReg();
   3288   LLT DstTy = MRI.getType(DstReg);
   3289   LLT LCMTy = getLCMType(DstTy, NarrowTy);
   3290 
   3291   unsigned NumParts = LCMTy.getSizeInBits() / NarrowTy.getSizeInBits();
   3292 
   3293   auto NewUndef = MIRBuilder.buildUndef(NarrowTy);
   3294   SmallVector<Register, 8> Parts(NumParts, NewUndef.getReg(0));
   3295 
   3296   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
   3297   MI.eraseFromParent();
   3298   return Legalized;
   3299 }
   3300 
   3301 // Handle splitting vector operations which need to have the same number of
   3302 // elements in each type index, but each type index may have a different element
   3303 // type.
   3304 //
   3305 // e.g.  <4 x s64> = G_SHL <4 x s64>, <4 x s32> ->
   3306 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
   3307 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
   3308 //
   3309 // Also handles some irregular breakdown cases, e.g.
   3310 // e.g.  <3 x s64> = G_SHL <3 x s64>, <3 x s32> ->
   3311 //       <2 x s64> = G_SHL <2 x s64>, <2 x s32>
   3312 //             s64 = G_SHL s64, s32
   3313 LegalizerHelper::LegalizeResult
   3314 LegalizerHelper::fewerElementsVectorMultiEltType(
   3315   MachineInstr &MI, unsigned TypeIdx, LLT NarrowTyArg) {
   3316   if (TypeIdx != 0)
   3317     return UnableToLegalize;
   3318 
   3319   const LLT NarrowTy0 = NarrowTyArg;
   3320   const unsigned NewNumElts =
   3321       NarrowTy0.isVector() ? NarrowTy0.getNumElements() : 1;
   3322 
   3323   const Register DstReg = MI.getOperand(0).getReg();
   3324   LLT DstTy = MRI.getType(DstReg);
   3325   LLT LeftoverTy0;
   3326 
   3327   // All of the operands need to have the same number of elements, so if we can
   3328   // determine a type breakdown for the result type, we can for all of the
   3329   // source types.
   3330   int NumParts = getNarrowTypeBreakDown(DstTy, NarrowTy0, LeftoverTy0).first;
   3331   if (NumParts < 0)
   3332     return UnableToLegalize;
   3333 
   3334   SmallVector<MachineInstrBuilder, 4> NewInsts;
   3335 
   3336   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
   3337   SmallVector<Register, 4> PartRegs, LeftoverRegs;
   3338 
   3339   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I) {
   3340     Register SrcReg = MI.getOperand(I).getReg();
   3341     LLT SrcTyI = MRI.getType(SrcReg);
   3342     LLT NarrowTyI = LLT::scalarOrVector(NewNumElts, SrcTyI.getScalarType());
   3343     LLT LeftoverTyI;
   3344 
   3345     // Split this operand into the requested typed registers, and any leftover
   3346     // required to reproduce the original type.
   3347     if (!extractParts(SrcReg, SrcTyI, NarrowTyI, LeftoverTyI, PartRegs,
   3348                       LeftoverRegs))
   3349       return UnableToLegalize;
   3350 
   3351     if (I == 1) {
   3352       // For the first operand, create an instruction for each part and setup
   3353       // the result.
   3354       for (Register PartReg : PartRegs) {
   3355         Register PartDstReg = MRI.createGenericVirtualRegister(NarrowTy0);
   3356         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
   3357                                .addDef(PartDstReg)
   3358                                .addUse(PartReg));
   3359         DstRegs.push_back(PartDstReg);
   3360       }
   3361 
   3362       for (Register LeftoverReg : LeftoverRegs) {
   3363         Register PartDstReg = MRI.createGenericVirtualRegister(LeftoverTy0);
   3364         NewInsts.push_back(MIRBuilder.buildInstrNoInsert(MI.getOpcode())
   3365                                .addDef(PartDstReg)
   3366                                .addUse(LeftoverReg));
   3367         LeftoverDstRegs.push_back(PartDstReg);
   3368       }
   3369     } else {
   3370       assert(NewInsts.size() == PartRegs.size() + LeftoverRegs.size());
   3371 
   3372       // Add the newly created operand splits to the existing instructions. The
   3373       // odd-sized pieces are ordered after the requested NarrowTyArg sized
   3374       // pieces.
   3375       unsigned InstCount = 0;
   3376       for (unsigned J = 0, JE = PartRegs.size(); J != JE; ++J)
   3377         NewInsts[InstCount++].addUse(PartRegs[J]);
   3378       for (unsigned J = 0, JE = LeftoverRegs.size(); J != JE; ++J)
   3379         NewInsts[InstCount++].addUse(LeftoverRegs[J]);
   3380     }
   3381 
   3382     PartRegs.clear();
   3383     LeftoverRegs.clear();
   3384   }
   3385 
   3386   // Insert the newly built operations and rebuild the result register.
   3387   for (auto &MIB : NewInsts)
   3388     MIRBuilder.insertInstr(MIB);
   3389 
   3390   insertParts(DstReg, DstTy, NarrowTy0, DstRegs, LeftoverTy0, LeftoverDstRegs);
   3391 
   3392   MI.eraseFromParent();
   3393   return Legalized;
   3394 }
   3395 
   3396 LegalizerHelper::LegalizeResult
   3397 LegalizerHelper::fewerElementsVectorCasts(MachineInstr &MI, unsigned TypeIdx,
   3398                                           LLT NarrowTy) {
   3399   if (TypeIdx != 0)
   3400     return UnableToLegalize;
   3401 
   3402   Register DstReg = MI.getOperand(0).getReg();
   3403   Register SrcReg = MI.getOperand(1).getReg();
   3404   LLT DstTy = MRI.getType(DstReg);
   3405   LLT SrcTy = MRI.getType(SrcReg);
   3406 
   3407   LLT NarrowTy0 = NarrowTy;
   3408   LLT NarrowTy1;
   3409   unsigned NumParts;
   3410 
   3411   if (NarrowTy.isVector()) {
   3412     // Uneven breakdown not handled.
   3413     NumParts = DstTy.getNumElements() / NarrowTy.getNumElements();
   3414     if (NumParts * NarrowTy.getNumElements() != DstTy.getNumElements())
   3415       return UnableToLegalize;
   3416 
   3417     NarrowTy1 = LLT::vector(NarrowTy.getNumElements(), SrcTy.getElementType());
   3418   } else {
   3419     NumParts = DstTy.getNumElements();
   3420     NarrowTy1 = SrcTy.getElementType();
   3421   }
   3422 
   3423   SmallVector<Register, 4> SrcRegs, DstRegs;
   3424   extractParts(SrcReg, NarrowTy1, NumParts, SrcRegs);
   3425 
   3426   for (unsigned I = 0; I < NumParts; ++I) {
   3427     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
   3428     MachineInstr *NewInst =
   3429         MIRBuilder.buildInstr(MI.getOpcode(), {DstReg}, {SrcRegs[I]});
   3430 
   3431     NewInst->setFlags(MI.getFlags());
   3432     DstRegs.push_back(DstReg);
   3433   }
   3434 
   3435   if (NarrowTy.isVector())
   3436     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
   3437   else
   3438     MIRBuilder.buildBuildVector(DstReg, DstRegs);
   3439 
   3440   MI.eraseFromParent();
   3441   return Legalized;
   3442 }
   3443 
   3444 LegalizerHelper::LegalizeResult
   3445 LegalizerHelper::fewerElementsVectorCmp(MachineInstr &MI, unsigned TypeIdx,
   3446                                         LLT NarrowTy) {
   3447   Register DstReg = MI.getOperand(0).getReg();
   3448   Register Src0Reg = MI.getOperand(2).getReg();
   3449   LLT DstTy = MRI.getType(DstReg);
   3450   LLT SrcTy = MRI.getType(Src0Reg);
   3451 
   3452   unsigned NumParts;
   3453   LLT NarrowTy0, NarrowTy1;
   3454 
   3455   if (TypeIdx == 0) {
   3456     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
   3457     unsigned OldElts = DstTy.getNumElements();
   3458 
   3459     NarrowTy0 = NarrowTy;
   3460     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) : DstTy.getNumElements();
   3461     NarrowTy1 = NarrowTy.isVector() ?
   3462       LLT::vector(NarrowTy.getNumElements(), SrcTy.getScalarSizeInBits()) :
   3463       SrcTy.getElementType();
   3464 
   3465   } else {
   3466     unsigned NewElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
   3467     unsigned OldElts = SrcTy.getNumElements();
   3468 
   3469     NumParts = NarrowTy.isVector() ? (OldElts / NewElts) :
   3470       NarrowTy.getNumElements();
   3471     NarrowTy0 = LLT::vector(NarrowTy.getNumElements(),
   3472                             DstTy.getScalarSizeInBits());
   3473     NarrowTy1 = NarrowTy;
   3474   }
   3475 
   3476   // FIXME: Don't know how to handle the situation where the small vectors
   3477   // aren't all the same size yet.
   3478   if (NarrowTy1.isVector() &&
   3479       NarrowTy1.getNumElements() * NumParts != DstTy.getNumElements())
   3480     return UnableToLegalize;
   3481 
   3482   CmpInst::Predicate Pred
   3483     = static_cast<CmpInst::Predicate>(MI.getOperand(1).getPredicate());
   3484 
   3485   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
   3486   extractParts(MI.getOperand(2).getReg(), NarrowTy1, NumParts, Src1Regs);
   3487   extractParts(MI.getOperand(3).getReg(), NarrowTy1, NumParts, Src2Regs);
   3488 
   3489   for (unsigned I = 0; I < NumParts; ++I) {
   3490     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
   3491     DstRegs.push_back(DstReg);
   3492 
   3493     if (MI.getOpcode() == TargetOpcode::G_ICMP)
   3494       MIRBuilder.buildICmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
   3495     else {
   3496       MachineInstr *NewCmp
   3497         = MIRBuilder.buildFCmp(Pred, DstReg, Src1Regs[I], Src2Regs[I]);
   3498       NewCmp->setFlags(MI.getFlags());
   3499     }
   3500   }
   3501 
   3502   if (NarrowTy1.isVector())
   3503     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
   3504   else
   3505     MIRBuilder.buildBuildVector(DstReg, DstRegs);
   3506 
   3507   MI.eraseFromParent();
   3508   return Legalized;
   3509 }
   3510 
   3511 LegalizerHelper::LegalizeResult
   3512 LegalizerHelper::fewerElementsVectorSelect(MachineInstr &MI, unsigned TypeIdx,
   3513                                            LLT NarrowTy) {
   3514   Register DstReg = MI.getOperand(0).getReg();
   3515   Register CondReg = MI.getOperand(1).getReg();
   3516 
   3517   unsigned NumParts = 0;
   3518   LLT NarrowTy0, NarrowTy1;
   3519 
   3520   LLT DstTy = MRI.getType(DstReg);
   3521   LLT CondTy = MRI.getType(CondReg);
   3522   unsigned Size = DstTy.getSizeInBits();
   3523 
   3524   assert(TypeIdx == 0 || CondTy.isVector());
   3525 
   3526   if (TypeIdx == 0) {
   3527     NarrowTy0 = NarrowTy;
   3528     NarrowTy1 = CondTy;
   3529 
   3530     unsigned NarrowSize = NarrowTy0.getSizeInBits();
   3531     // FIXME: Don't know how to handle the situation where the small vectors
   3532     // aren't all the same size yet.
   3533     if (Size % NarrowSize != 0)
   3534       return UnableToLegalize;
   3535 
   3536     NumParts = Size / NarrowSize;
   3537 
   3538     // Need to break down the condition type
   3539     if (CondTy.isVector()) {
   3540       if (CondTy.getNumElements() == NumParts)
   3541         NarrowTy1 = CondTy.getElementType();
   3542       else
   3543         NarrowTy1 = LLT::vector(CondTy.getNumElements() / NumParts,
   3544                                 CondTy.getScalarSizeInBits());
   3545     }
   3546   } else {
   3547     NumParts = CondTy.getNumElements();
   3548     if (NarrowTy.isVector()) {
   3549       // TODO: Handle uneven breakdown.
   3550       if (NumParts * NarrowTy.getNumElements() != CondTy.getNumElements())
   3551         return UnableToLegalize;
   3552 
   3553       return UnableToLegalize;
   3554     } else {
   3555       NarrowTy0 = DstTy.getElementType();
   3556       NarrowTy1 = NarrowTy;
   3557     }
   3558   }
   3559 
   3560   SmallVector<Register, 2> DstRegs, Src0Regs, Src1Regs, Src2Regs;
   3561   if (CondTy.isVector())
   3562     extractParts(MI.getOperand(1).getReg(), NarrowTy1, NumParts, Src0Regs);
   3563 
   3564   extractParts(MI.getOperand(2).getReg(), NarrowTy0, NumParts, Src1Regs);
   3565   extractParts(MI.getOperand(3).getReg(), NarrowTy0, NumParts, Src2Regs);
   3566 
   3567   for (unsigned i = 0; i < NumParts; ++i) {
   3568     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy0);
   3569     MIRBuilder.buildSelect(DstReg, CondTy.isVector() ? Src0Regs[i] : CondReg,
   3570                            Src1Regs[i], Src2Regs[i]);
   3571     DstRegs.push_back(DstReg);
   3572   }
   3573 
   3574   if (NarrowTy0.isVector())
   3575     MIRBuilder.buildConcatVectors(DstReg, DstRegs);
   3576   else
   3577     MIRBuilder.buildBuildVector(DstReg, DstRegs);
   3578 
   3579   MI.eraseFromParent();
   3580   return Legalized;
   3581 }
   3582 
   3583 LegalizerHelper::LegalizeResult
   3584 LegalizerHelper::fewerElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
   3585                                         LLT NarrowTy) {
   3586   const Register DstReg = MI.getOperand(0).getReg();
   3587   LLT PhiTy = MRI.getType(DstReg);
   3588   LLT LeftoverTy;
   3589 
   3590   // All of the operands need to have the same number of elements, so if we can
   3591   // determine a type breakdown for the result type, we can for all of the
   3592   // source types.
   3593   int NumParts, NumLeftover;
   3594   std::tie(NumParts, NumLeftover)
   3595     = getNarrowTypeBreakDown(PhiTy, NarrowTy, LeftoverTy);
   3596   if (NumParts < 0)
   3597     return UnableToLegalize;
   3598 
   3599   SmallVector<Register, 4> DstRegs, LeftoverDstRegs;
   3600   SmallVector<MachineInstrBuilder, 4> NewInsts;
   3601 
   3602   const int TotalNumParts = NumParts + NumLeftover;
   3603 
   3604   // Insert the new phis in the result block first.
   3605   for (int I = 0; I != TotalNumParts; ++I) {
   3606     LLT Ty = I < NumParts ? NarrowTy : LeftoverTy;
   3607     Register PartDstReg = MRI.createGenericVirtualRegister(Ty);
   3608     NewInsts.push_back(MIRBuilder.buildInstr(TargetOpcode::G_PHI)
   3609                        .addDef(PartDstReg));
   3610     if (I < NumParts)
   3611       DstRegs.push_back(PartDstReg);
   3612     else
   3613       LeftoverDstRegs.push_back(PartDstReg);
   3614   }
   3615 
   3616   MachineBasicBlock *MBB = MI.getParent();
   3617   MIRBuilder.setInsertPt(*MBB, MBB->getFirstNonPHI());
   3618   insertParts(DstReg, PhiTy, NarrowTy, DstRegs, LeftoverTy, LeftoverDstRegs);
   3619 
   3620   SmallVector<Register, 4> PartRegs, LeftoverRegs;
   3621 
   3622   // Insert code to extract the incoming values in each predecessor block.
   3623   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
   3624     PartRegs.clear();
   3625     LeftoverRegs.clear();
   3626 
   3627     Register SrcReg = MI.getOperand(I).getReg();
   3628     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
   3629     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
   3630 
   3631     LLT Unused;
   3632     if (!extractParts(SrcReg, PhiTy, NarrowTy, Unused, PartRegs,
   3633                       LeftoverRegs))
   3634       return UnableToLegalize;
   3635 
   3636     // Add the newly created operand splits to the existing instructions. The
   3637     // odd-sized pieces are ordered after the requested NarrowTyArg sized
   3638     // pieces.
   3639     for (int J = 0; J != TotalNumParts; ++J) {
   3640       MachineInstrBuilder MIB = NewInsts[J];
   3641       MIB.addUse(J < NumParts ? PartRegs[J] : LeftoverRegs[J - NumParts]);
   3642       MIB.addMBB(&OpMBB);
   3643     }
   3644   }
   3645 
   3646   MI.eraseFromParent();
   3647   return Legalized;
   3648 }
   3649 
   3650 LegalizerHelper::LegalizeResult
   3651 LegalizerHelper::fewerElementsVectorUnmergeValues(MachineInstr &MI,
   3652                                                   unsigned TypeIdx,
   3653                                                   LLT NarrowTy) {
   3654   if (TypeIdx != 1)
   3655     return UnableToLegalize;
   3656 
   3657   const int NumDst = MI.getNumOperands() - 1;
   3658   const Register SrcReg = MI.getOperand(NumDst).getReg();
   3659   LLT SrcTy = MRI.getType(SrcReg);
   3660 
   3661   LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
   3662 
   3663   // TODO: Create sequence of extracts.
   3664   if (DstTy == NarrowTy)
   3665     return UnableToLegalize;
   3666 
   3667   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
   3668   if (DstTy == GCDTy) {
   3669     // This would just be a copy of the same unmerge.
   3670     // TODO: Create extracts, pad with undef and create intermediate merges.
   3671     return UnableToLegalize;
   3672   }
   3673 
   3674   auto Unmerge = MIRBuilder.buildUnmerge(GCDTy, SrcReg);
   3675   const int NumUnmerge = Unmerge->getNumOperands() - 1;
   3676   const int PartsPerUnmerge = NumDst / NumUnmerge;
   3677 
   3678   for (int I = 0; I != NumUnmerge; ++I) {
   3679     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
   3680 
   3681     for (int J = 0; J != PartsPerUnmerge; ++J)
   3682       MIB.addDef(MI.getOperand(I * PartsPerUnmerge + J).getReg());
   3683     MIB.addUse(Unmerge.getReg(I));
   3684   }
   3685 
   3686   MI.eraseFromParent();
   3687   return Legalized;
   3688 }
   3689 
   3690 LegalizerHelper::LegalizeResult
   3691 LegalizerHelper::fewerElementsVectorMulo(MachineInstr &MI, unsigned TypeIdx,
   3692                                          LLT NarrowTy) {
   3693   Register Result = MI.getOperand(0).getReg();
   3694   Register Overflow = MI.getOperand(1).getReg();
   3695   Register LHS = MI.getOperand(2).getReg();
   3696   Register RHS = MI.getOperand(3).getReg();
   3697 
   3698   LLT SrcTy = MRI.getType(LHS);
   3699   if (!SrcTy.isVector())
   3700     return UnableToLegalize;
   3701 
   3702   LLT ElementType = SrcTy.getElementType();
   3703   LLT OverflowElementTy = MRI.getType(Overflow).getElementType();
   3704   const int NumResult = SrcTy.getNumElements();
   3705   LLT GCDTy = getGCDType(SrcTy, NarrowTy);
   3706 
   3707   // Unmerge the operands to smaller parts of GCD type.
   3708   auto UnmergeLHS = MIRBuilder.buildUnmerge(GCDTy, LHS);
   3709   auto UnmergeRHS = MIRBuilder.buildUnmerge(GCDTy, RHS);
   3710 
   3711   const int NumOps = UnmergeLHS->getNumOperands() - 1;
   3712   const int PartsPerUnmerge = NumResult / NumOps;
   3713   LLT OverflowTy = LLT::scalarOrVector(PartsPerUnmerge, OverflowElementTy);
   3714   LLT ResultTy = LLT::scalarOrVector(PartsPerUnmerge, ElementType);
   3715 
   3716   // Perform the operation over unmerged parts.
   3717   SmallVector<Register, 8> ResultParts;
   3718   SmallVector<Register, 8> OverflowParts;
   3719   for (int I = 0; I != NumOps; ++I) {
   3720     Register Operand1 = UnmergeLHS->getOperand(I).getReg();
   3721     Register Operand2 = UnmergeRHS->getOperand(I).getReg();
   3722     auto PartMul = MIRBuilder.buildInstr(MI.getOpcode(), {ResultTy, OverflowTy},
   3723                                          {Operand1, Operand2});
   3724     ResultParts.push_back(PartMul->getOperand(0).getReg());
   3725     OverflowParts.push_back(PartMul->getOperand(1).getReg());
   3726   }
   3727 
   3728   LLT ResultLCMTy = buildLCMMergePieces(SrcTy, NarrowTy, GCDTy, ResultParts);
   3729   LLT OverflowLCMTy =
   3730       LLT::scalarOrVector(ResultLCMTy.getNumElements(), OverflowElementTy);
   3731 
   3732   // Recombine the pieces to the original result and overflow registers.
   3733   buildWidenedRemergeToDst(Result, ResultLCMTy, ResultParts);
   3734   buildWidenedRemergeToDst(Overflow, OverflowLCMTy, OverflowParts);
   3735   MI.eraseFromParent();
   3736   return Legalized;
   3737 }
   3738 
   3739 // Handle FewerElementsVector a G_BUILD_VECTOR or G_CONCAT_VECTORS that produces
   3740 // a vector
   3741 //
   3742 // Create a G_BUILD_VECTOR or G_CONCAT_VECTORS of NarrowTy pieces, padding with
   3743 // undef as necessary.
   3744 //
   3745 // %3:_(<3 x s16>) = G_BUILD_VECTOR %0, %1, %2
   3746 //   -> <2 x s16>
   3747 //
   3748 // %4:_(s16) = G_IMPLICIT_DEF
   3749 // %5:_(<2 x s16>) = G_BUILD_VECTOR %0, %1
   3750 // %6:_(<2 x s16>) = G_BUILD_VECTOR %2, %4
   3751 // %7:_(<2 x s16>) = G_IMPLICIT_DEF
   3752 // %8:_(<6 x s16>) = G_CONCAT_VECTORS %5, %6, %7
   3753 // %3:_(<3 x s16>), %8:_(<3 x s16>) = G_UNMERGE_VALUES %8
   3754 LegalizerHelper::LegalizeResult
   3755 LegalizerHelper::fewerElementsVectorMerge(MachineInstr &MI, unsigned TypeIdx,
   3756                                           LLT NarrowTy) {
   3757   Register DstReg = MI.getOperand(0).getReg();
   3758   LLT DstTy = MRI.getType(DstReg);
   3759   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
   3760   LLT GCDTy = getGCDType(getGCDType(SrcTy, NarrowTy), DstTy);
   3761 
   3762   // Break into a common type
   3763   SmallVector<Register, 16> Parts;
   3764   for (unsigned I = 1, E = MI.getNumOperands(); I != E; ++I)
   3765     extractGCDType(Parts, GCDTy, MI.getOperand(I).getReg());
   3766 
   3767   // Build the requested new merge, padding with undef.
   3768   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts,
   3769                                   TargetOpcode::G_ANYEXT);
   3770 
   3771   // Pack into the original result register.
   3772   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
   3773 
   3774   MI.eraseFromParent();
   3775   return Legalized;
   3776 }
   3777 
   3778 LegalizerHelper::LegalizeResult
   3779 LegalizerHelper::fewerElementsVectorExtractInsertVectorElt(MachineInstr &MI,
   3780                                                            unsigned TypeIdx,
   3781                                                            LLT NarrowVecTy) {
   3782   Register DstReg = MI.getOperand(0).getReg();
   3783   Register SrcVec = MI.getOperand(1).getReg();
   3784   Register InsertVal;
   3785   bool IsInsert = MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT;
   3786 
   3787   assert((IsInsert ? TypeIdx == 0 : TypeIdx == 1) && "not a vector type index");
   3788   if (IsInsert)
   3789     InsertVal = MI.getOperand(2).getReg();
   3790 
   3791   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
   3792 
   3793   // TODO: Handle total scalarization case.
   3794   if (!NarrowVecTy.isVector())
   3795     return UnableToLegalize;
   3796 
   3797   LLT VecTy = MRI.getType(SrcVec);
   3798 
   3799   // If the index is a constant, we can really break this down as you would
   3800   // expect, and index into the target size pieces.
   3801   int64_t IdxVal;
   3802   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
   3803     // Avoid out of bounds indexing the pieces.
   3804     if (IdxVal >= VecTy.getNumElements()) {
   3805       MIRBuilder.buildUndef(DstReg);
   3806       MI.eraseFromParent();
   3807       return Legalized;
   3808     }
   3809 
   3810     SmallVector<Register, 8> VecParts;
   3811     LLT GCDTy = extractGCDType(VecParts, VecTy, NarrowVecTy, SrcVec);
   3812 
   3813     // Build a sequence of NarrowTy pieces in VecParts for this operand.
   3814     LLT LCMTy = buildLCMMergePieces(VecTy, NarrowVecTy, GCDTy, VecParts,
   3815                                     TargetOpcode::G_ANYEXT);
   3816 
   3817     unsigned NewNumElts = NarrowVecTy.getNumElements();
   3818 
   3819     LLT IdxTy = MRI.getType(Idx);
   3820     int64_t PartIdx = IdxVal / NewNumElts;
   3821     auto NewIdx =
   3822         MIRBuilder.buildConstant(IdxTy, IdxVal - NewNumElts * PartIdx);
   3823 
   3824     if (IsInsert) {
   3825       LLT PartTy = MRI.getType(VecParts[PartIdx]);
   3826 
   3827       // Use the adjusted index to insert into one of the subvectors.
   3828       auto InsertPart = MIRBuilder.buildInsertVectorElement(
   3829           PartTy, VecParts[PartIdx], InsertVal, NewIdx);
   3830       VecParts[PartIdx] = InsertPart.getReg(0);
   3831 
   3832       // Recombine the inserted subvector with the others to reform the result
   3833       // vector.
   3834       buildWidenedRemergeToDst(DstReg, LCMTy, VecParts);
   3835     } else {
   3836       MIRBuilder.buildExtractVectorElement(DstReg, VecParts[PartIdx], NewIdx);
   3837     }
   3838 
   3839     MI.eraseFromParent();
   3840     return Legalized;
   3841   }
   3842 
   3843   // With a variable index, we can't perform the operation in a smaller type, so
   3844   // we're forced to expand this.
   3845   //
   3846   // TODO: We could emit a chain of compare/select to figure out which piece to
   3847   // index.
   3848   return lowerExtractInsertVectorElt(MI);
   3849 }
   3850 
   3851 LegalizerHelper::LegalizeResult
   3852 LegalizerHelper::reduceLoadStoreWidth(MachineInstr &MI, unsigned TypeIdx,
   3853                                       LLT NarrowTy) {
   3854   // FIXME: Don't know how to handle secondary types yet.
   3855   if (TypeIdx != 0)
   3856     return UnableToLegalize;
   3857 
   3858   MachineMemOperand *MMO = *MI.memoperands_begin();
   3859 
   3860   // This implementation doesn't work for atomics. Give up instead of doing
   3861   // something invalid.
   3862   if (MMO->getOrdering() != AtomicOrdering::NotAtomic ||
   3863       MMO->getFailureOrdering() != AtomicOrdering::NotAtomic)
   3864     return UnableToLegalize;
   3865 
   3866   bool IsLoad = MI.getOpcode() == TargetOpcode::G_LOAD;
   3867   Register ValReg = MI.getOperand(0).getReg();
   3868   Register AddrReg = MI.getOperand(1).getReg();
   3869   LLT ValTy = MRI.getType(ValReg);
   3870 
   3871   // FIXME: Do we need a distinct NarrowMemory legalize action?
   3872   if (ValTy.getSizeInBits() != 8 * MMO->getSize()) {
   3873     LLVM_DEBUG(dbgs() << "Can't narrow extload/truncstore\n");
   3874     return UnableToLegalize;
   3875   }
   3876 
   3877   int NumParts = -1;
   3878   int NumLeftover = -1;
   3879   LLT LeftoverTy;
   3880   SmallVector<Register, 8> NarrowRegs, NarrowLeftoverRegs;
   3881   if (IsLoad) {
   3882     std::tie(NumParts, NumLeftover) = getNarrowTypeBreakDown(ValTy, NarrowTy, LeftoverTy);
   3883   } else {
   3884     if (extractParts(ValReg, ValTy, NarrowTy, LeftoverTy, NarrowRegs,
   3885                      NarrowLeftoverRegs)) {
   3886       NumParts = NarrowRegs.size();
   3887       NumLeftover = NarrowLeftoverRegs.size();
   3888     }
   3889   }
   3890 
   3891   if (NumParts == -1)
   3892     return UnableToLegalize;
   3893 
   3894   LLT PtrTy = MRI.getType(AddrReg);
   3895   const LLT OffsetTy = LLT::scalar(PtrTy.getSizeInBits());
   3896 
   3897   unsigned TotalSize = ValTy.getSizeInBits();
   3898 
   3899   // Split the load/store into PartTy sized pieces starting at Offset. If this
   3900   // is a load, return the new registers in ValRegs. For a store, each elements
   3901   // of ValRegs should be PartTy. Returns the next offset that needs to be
   3902   // handled.
   3903   auto splitTypePieces = [=](LLT PartTy, SmallVectorImpl<Register> &ValRegs,
   3904                              unsigned Offset) -> unsigned {
   3905     MachineFunction &MF = MIRBuilder.getMF();
   3906     unsigned PartSize = PartTy.getSizeInBits();
   3907     for (unsigned Idx = 0, E = NumParts; Idx != E && Offset < TotalSize;
   3908          Offset += PartSize, ++Idx) {
   3909       unsigned ByteSize = PartSize / 8;
   3910       unsigned ByteOffset = Offset / 8;
   3911       Register NewAddrReg;
   3912 
   3913       MIRBuilder.materializePtrAdd(NewAddrReg, AddrReg, OffsetTy, ByteOffset);
   3914 
   3915       MachineMemOperand *NewMMO =
   3916         MF.getMachineMemOperand(MMO, ByteOffset, ByteSize);
   3917 
   3918       if (IsLoad) {
   3919         Register Dst = MRI.createGenericVirtualRegister(PartTy);
   3920         ValRegs.push_back(Dst);
   3921         MIRBuilder.buildLoad(Dst, NewAddrReg, *NewMMO);
   3922       } else {
   3923         MIRBuilder.buildStore(ValRegs[Idx], NewAddrReg, *NewMMO);
   3924       }
   3925     }
   3926 
   3927     return Offset;
   3928   };
   3929 
   3930   unsigned HandledOffset = splitTypePieces(NarrowTy, NarrowRegs, 0);
   3931 
   3932   // Handle the rest of the register if this isn't an even type breakdown.
   3933   if (LeftoverTy.isValid())
   3934     splitTypePieces(LeftoverTy, NarrowLeftoverRegs, HandledOffset);
   3935 
   3936   if (IsLoad) {
   3937     insertParts(ValReg, ValTy, NarrowTy, NarrowRegs,
   3938                 LeftoverTy, NarrowLeftoverRegs);
   3939   }
   3940 
   3941   MI.eraseFromParent();
   3942   return Legalized;
   3943 }
   3944 
   3945 LegalizerHelper::LegalizeResult
   3946 LegalizerHelper::reduceOperationWidth(MachineInstr &MI, unsigned int TypeIdx,
   3947                                       LLT NarrowTy) {
   3948   assert(TypeIdx == 0 && "only one type index expected");
   3949 
   3950   const unsigned Opc = MI.getOpcode();
   3951   const int NumDefOps = MI.getNumExplicitDefs();
   3952   const int NumSrcOps = MI.getNumOperands() - NumDefOps;
   3953   const unsigned Flags = MI.getFlags();
   3954   const unsigned NarrowSize = NarrowTy.getSizeInBits();
   3955   const LLT NarrowScalarTy = LLT::scalar(NarrowSize);
   3956 
   3957   assert(MI.getNumOperands() <= 4 && "expected instruction with either 1 "
   3958                                      "result and 1-3 sources or 2 results and "
   3959                                      "1-2 sources");
   3960 
   3961   SmallVector<Register, 2> DstRegs;
   3962   for (int I = 0; I < NumDefOps; ++I)
   3963     DstRegs.push_back(MI.getOperand(I).getReg());
   3964 
   3965   // First of all check whether we are narrowing (changing the element type)
   3966   // or reducing the vector elements
   3967   const LLT DstTy = MRI.getType(DstRegs[0]);
   3968   const bool IsNarrow = NarrowTy.getScalarType() != DstTy.getScalarType();
   3969 
   3970   SmallVector<Register, 8> ExtractedRegs[3];
   3971   SmallVector<Register, 8> Parts;
   3972 
   3973   unsigned NarrowElts = NarrowTy.isVector() ? NarrowTy.getNumElements() : 1;
   3974 
   3975   // Break down all the sources into NarrowTy pieces we can operate on. This may
   3976   // involve creating merges to a wider type, padded with undef.
   3977   for (int I = 0; I != NumSrcOps; ++I) {
   3978     Register SrcReg = MI.getOperand(I + NumDefOps).getReg();
   3979     LLT SrcTy = MRI.getType(SrcReg);
   3980 
   3981     // The type to narrow SrcReg to. For narrowing, this is a smaller scalar.
   3982     // For fewerElements, this is a smaller vector with the same element type.
   3983     LLT OpNarrowTy;
   3984     if (IsNarrow) {
   3985       OpNarrowTy = NarrowScalarTy;
   3986 
   3987       // In case of narrowing, we need to cast vectors to scalars for this to
   3988       // work properly
   3989       // FIXME: Can we do without the bitcast here if we're narrowing?
   3990       if (SrcTy.isVector()) {
   3991         SrcTy = LLT::scalar(SrcTy.getSizeInBits());
   3992         SrcReg = MIRBuilder.buildBitcast(SrcTy, SrcReg).getReg(0);
   3993       }
   3994     } else {
   3995       OpNarrowTy = LLT::scalarOrVector(NarrowElts, SrcTy.getScalarType());
   3996     }
   3997 
   3998     LLT GCDTy = extractGCDType(ExtractedRegs[I], SrcTy, OpNarrowTy, SrcReg);
   3999 
   4000     // Build a sequence of NarrowTy pieces in ExtractedRegs for this operand.
   4001     buildLCMMergePieces(SrcTy, OpNarrowTy, GCDTy, ExtractedRegs[I],
   4002                         TargetOpcode::G_ANYEXT);
   4003   }
   4004 
   4005   SmallVector<Register, 8> ResultRegs[2];
   4006 
   4007   // Input operands for each sub-instruction.
   4008   SmallVector<SrcOp, 4> InputRegs(NumSrcOps, Register());
   4009 
   4010   int NumParts = ExtractedRegs[0].size();
   4011   const unsigned DstSize = DstTy.getSizeInBits();
   4012   const LLT DstScalarTy = LLT::scalar(DstSize);
   4013 
   4014   // Narrowing needs to use scalar types
   4015   LLT DstLCMTy, NarrowDstTy;
   4016   if (IsNarrow) {
   4017     DstLCMTy = getLCMType(DstScalarTy, NarrowScalarTy);
   4018     NarrowDstTy = NarrowScalarTy;
   4019   } else {
   4020     DstLCMTy = getLCMType(DstTy, NarrowTy);
   4021     NarrowDstTy = NarrowTy;
   4022   }
   4023 
   4024   // We widened the source registers to satisfy merge/unmerge size
   4025   // constraints. We'll have some extra fully undef parts.
   4026   const int NumRealParts = (DstSize + NarrowSize - 1) / NarrowSize;
   4027 
   4028   for (int I = 0; I != NumRealParts; ++I) {
   4029     // Emit this instruction on each of the split pieces.
   4030     for (int J = 0; J != NumSrcOps; ++J)
   4031       InputRegs[J] = ExtractedRegs[J][I];
   4032 
   4033     MachineInstrBuilder Inst;
   4034     if (NumDefOps == 1)
   4035       Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy}, InputRegs, Flags);
   4036     else
   4037       Inst = MIRBuilder.buildInstr(Opc, {NarrowDstTy, NarrowDstTy}, InputRegs,
   4038                                    Flags);
   4039 
   4040     for (int J = 0; J != NumDefOps; ++J)
   4041       ResultRegs[J].push_back(Inst.getReg(J));
   4042   }
   4043 
   4044   // Fill out the widened result with undef instead of creating instructions
   4045   // with undef inputs.
   4046   int NumUndefParts = NumParts - NumRealParts;
   4047   if (NumUndefParts != 0) {
   4048     Register Undef = MIRBuilder.buildUndef(NarrowDstTy).getReg(0);
   4049     for (int I = 0; I != NumDefOps; ++I)
   4050       ResultRegs[I].append(NumUndefParts, Undef);
   4051   }
   4052 
   4053   // Extract the possibly padded result. Use a scratch register if we need to do
   4054   // a final bitcast, otherwise use the original result register.
   4055   Register MergeDstReg;
   4056   for (int I = 0; I != NumDefOps; ++I) {
   4057     if (IsNarrow && DstTy.isVector())
   4058       MergeDstReg = MRI.createGenericVirtualRegister(DstScalarTy);
   4059     else
   4060       MergeDstReg = DstRegs[I];
   4061 
   4062     buildWidenedRemergeToDst(MergeDstReg, DstLCMTy, ResultRegs[I]);
   4063 
   4064     // Recast to vector if we narrowed a vector
   4065     if (IsNarrow && DstTy.isVector())
   4066       MIRBuilder.buildBitcast(DstRegs[I], MergeDstReg);
   4067   }
   4068 
   4069   MI.eraseFromParent();
   4070   return Legalized;
   4071 }
   4072 
   4073 LegalizerHelper::LegalizeResult
   4074 LegalizerHelper::fewerElementsVectorSextInReg(MachineInstr &MI, unsigned TypeIdx,
   4075                                               LLT NarrowTy) {
   4076   Register DstReg = MI.getOperand(0).getReg();
   4077   Register SrcReg = MI.getOperand(1).getReg();
   4078   int64_t Imm = MI.getOperand(2).getImm();
   4079 
   4080   LLT DstTy = MRI.getType(DstReg);
   4081 
   4082   SmallVector<Register, 8> Parts;
   4083   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
   4084   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts);
   4085 
   4086   for (Register &R : Parts)
   4087     R = MIRBuilder.buildSExtInReg(NarrowTy, R, Imm).getReg(0);
   4088 
   4089   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
   4090 
   4091   MI.eraseFromParent();
   4092   return Legalized;
   4093 }
   4094 
   4095 LegalizerHelper::LegalizeResult
   4096 LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   4097                                      LLT NarrowTy) {
   4098   using namespace TargetOpcode;
   4099 
   4100   switch (MI.getOpcode()) {
   4101   case G_IMPLICIT_DEF:
   4102     return fewerElementsVectorImplicitDef(MI, TypeIdx, NarrowTy);
   4103   case G_TRUNC:
   4104   case G_AND:
   4105   case G_OR:
   4106   case G_XOR:
   4107   case G_ADD:
   4108   case G_SUB:
   4109   case G_MUL:
   4110   case G_PTR_ADD:
   4111   case G_SMULH:
   4112   case G_UMULH:
   4113   case G_FADD:
   4114   case G_FMUL:
   4115   case G_FSUB:
   4116   case G_FNEG:
   4117   case G_FABS:
   4118   case G_FCANONICALIZE:
   4119   case G_FDIV:
   4120   case G_FREM:
   4121   case G_FMA:
   4122   case G_FMAD:
   4123   case G_FPOW:
   4124   case G_FEXP:
   4125   case G_FEXP2:
   4126   case G_FLOG:
   4127   case G_FLOG2:
   4128   case G_FLOG10:
   4129   case G_FNEARBYINT:
   4130   case G_FCEIL:
   4131   case G_FFLOOR:
   4132   case G_FRINT:
   4133   case G_INTRINSIC_ROUND:
   4134   case G_INTRINSIC_ROUNDEVEN:
   4135   case G_INTRINSIC_TRUNC:
   4136   case G_FCOS:
   4137   case G_FSIN:
   4138   case G_FSQRT:
   4139   case G_BSWAP:
   4140   case G_BITREVERSE:
   4141   case G_SDIV:
   4142   case G_UDIV:
   4143   case G_SREM:
   4144   case G_UREM:
   4145   case G_SMIN:
   4146   case G_SMAX:
   4147   case G_UMIN:
   4148   case G_UMAX:
   4149   case G_FMINNUM:
   4150   case G_FMAXNUM:
   4151   case G_FMINNUM_IEEE:
   4152   case G_FMAXNUM_IEEE:
   4153   case G_FMINIMUM:
   4154   case G_FMAXIMUM:
   4155   case G_FSHL:
   4156   case G_FSHR:
   4157   case G_FREEZE:
   4158   case G_SADDSAT:
   4159   case G_SSUBSAT:
   4160   case G_UADDSAT:
   4161   case G_USUBSAT:
   4162     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
   4163   case G_UMULO:
   4164   case G_SMULO:
   4165     return fewerElementsVectorMulo(MI, TypeIdx, NarrowTy);
   4166   case G_SHL:
   4167   case G_LSHR:
   4168   case G_ASHR:
   4169   case G_SSHLSAT:
   4170   case G_USHLSAT:
   4171   case G_CTLZ:
   4172   case G_CTLZ_ZERO_UNDEF:
   4173   case G_CTTZ:
   4174   case G_CTTZ_ZERO_UNDEF:
   4175   case G_CTPOP:
   4176   case G_FCOPYSIGN:
   4177     return fewerElementsVectorMultiEltType(MI, TypeIdx, NarrowTy);
   4178   case G_ZEXT:
   4179   case G_SEXT:
   4180   case G_ANYEXT:
   4181   case G_FPEXT:
   4182   case G_FPTRUNC:
   4183   case G_SITOFP:
   4184   case G_UITOFP:
   4185   case G_FPTOSI:
   4186   case G_FPTOUI:
   4187   case G_INTTOPTR:
   4188   case G_PTRTOINT:
   4189   case G_ADDRSPACE_CAST:
   4190     return fewerElementsVectorCasts(MI, TypeIdx, NarrowTy);
   4191   case G_ICMP:
   4192   case G_FCMP:
   4193     return fewerElementsVectorCmp(MI, TypeIdx, NarrowTy);
   4194   case G_SELECT:
   4195     return fewerElementsVectorSelect(MI, TypeIdx, NarrowTy);
   4196   case G_PHI:
   4197     return fewerElementsVectorPhi(MI, TypeIdx, NarrowTy);
   4198   case G_UNMERGE_VALUES:
   4199     return fewerElementsVectorUnmergeValues(MI, TypeIdx, NarrowTy);
   4200   case G_BUILD_VECTOR:
   4201     assert(TypeIdx == 0 && "not a vector type index");
   4202     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
   4203   case G_CONCAT_VECTORS:
   4204     if (TypeIdx != 1) // TODO: This probably does work as expected already.
   4205       return UnableToLegalize;
   4206     return fewerElementsVectorMerge(MI, TypeIdx, NarrowTy);
   4207   case G_EXTRACT_VECTOR_ELT:
   4208   case G_INSERT_VECTOR_ELT:
   4209     return fewerElementsVectorExtractInsertVectorElt(MI, TypeIdx, NarrowTy);
   4210   case G_LOAD:
   4211   case G_STORE:
   4212     return reduceLoadStoreWidth(MI, TypeIdx, NarrowTy);
   4213   case G_SEXT_INREG:
   4214     return fewerElementsVectorSextInReg(MI, TypeIdx, NarrowTy);
   4215   GISEL_VECREDUCE_CASES_NONSEQ
   4216     return fewerElementsVectorReductions(MI, TypeIdx, NarrowTy);
   4217   default:
   4218     return UnableToLegalize;
   4219   }
   4220 }
   4221 
   4222 LegalizerHelper::LegalizeResult LegalizerHelper::fewerElementsVectorReductions(
   4223     MachineInstr &MI, unsigned int TypeIdx, LLT NarrowTy) {
   4224   unsigned Opc = MI.getOpcode();
   4225   assert(Opc != TargetOpcode::G_VECREDUCE_SEQ_FADD &&
   4226          Opc != TargetOpcode::G_VECREDUCE_SEQ_FMUL &&
   4227          "Sequential reductions not expected");
   4228 
   4229   if (TypeIdx != 1)
   4230     return UnableToLegalize;
   4231 
   4232   // The semantics of the normal non-sequential reductions allow us to freely
   4233   // re-associate the operation.
   4234   Register SrcReg = MI.getOperand(1).getReg();
   4235   LLT SrcTy = MRI.getType(SrcReg);
   4236   Register DstReg = MI.getOperand(0).getReg();
   4237   LLT DstTy = MRI.getType(DstReg);
   4238 
   4239   if (SrcTy.getNumElements() % NarrowTy.getNumElements() != 0)
   4240     return UnableToLegalize;
   4241 
   4242   SmallVector<Register> SplitSrcs;
   4243   const unsigned NumParts = SrcTy.getNumElements() / NarrowTy.getNumElements();
   4244   extractParts(SrcReg, NarrowTy, NumParts, SplitSrcs);
   4245   SmallVector<Register> PartialReductions;
   4246   for (unsigned Part = 0; Part < NumParts; ++Part) {
   4247     PartialReductions.push_back(
   4248         MIRBuilder.buildInstr(Opc, {DstTy}, {SplitSrcs[Part]}).getReg(0));
   4249   }
   4250 
   4251   unsigned ScalarOpc;
   4252   switch (Opc) {
   4253   case TargetOpcode::G_VECREDUCE_FADD:
   4254     ScalarOpc = TargetOpcode::G_FADD;
   4255     break;
   4256   case TargetOpcode::G_VECREDUCE_FMUL:
   4257     ScalarOpc = TargetOpcode::G_FMUL;
   4258     break;
   4259   case TargetOpcode::G_VECREDUCE_FMAX:
   4260     ScalarOpc = TargetOpcode::G_FMAXNUM;
   4261     break;
   4262   case TargetOpcode::G_VECREDUCE_FMIN:
   4263     ScalarOpc = TargetOpcode::G_FMINNUM;
   4264     break;
   4265   case TargetOpcode::G_VECREDUCE_ADD:
   4266     ScalarOpc = TargetOpcode::G_ADD;
   4267     break;
   4268   case TargetOpcode::G_VECREDUCE_MUL:
   4269     ScalarOpc = TargetOpcode::G_MUL;
   4270     break;
   4271   case TargetOpcode::G_VECREDUCE_AND:
   4272     ScalarOpc = TargetOpcode::G_AND;
   4273     break;
   4274   case TargetOpcode::G_VECREDUCE_OR:
   4275     ScalarOpc = TargetOpcode::G_OR;
   4276     break;
   4277   case TargetOpcode::G_VECREDUCE_XOR:
   4278     ScalarOpc = TargetOpcode::G_XOR;
   4279     break;
   4280   case TargetOpcode::G_VECREDUCE_SMAX:
   4281     ScalarOpc = TargetOpcode::G_SMAX;
   4282     break;
   4283   case TargetOpcode::G_VECREDUCE_SMIN:
   4284     ScalarOpc = TargetOpcode::G_SMIN;
   4285     break;
   4286   case TargetOpcode::G_VECREDUCE_UMAX:
   4287     ScalarOpc = TargetOpcode::G_UMAX;
   4288     break;
   4289   case TargetOpcode::G_VECREDUCE_UMIN:
   4290     ScalarOpc = TargetOpcode::G_UMIN;
   4291     break;
   4292   default:
   4293     LLVM_DEBUG(dbgs() << "Can't legalize: unknown reduction kind.\n");
   4294     return UnableToLegalize;
   4295   }
   4296 
   4297   // If the types involved are powers of 2, we can generate intermediate vector
   4298   // ops, before generating a final reduction operation.
   4299   if (isPowerOf2_32(SrcTy.getNumElements()) &&
   4300       isPowerOf2_32(NarrowTy.getNumElements())) {
   4301     return tryNarrowPow2Reduction(MI, SrcReg, SrcTy, NarrowTy, ScalarOpc);
   4302   }
   4303 
   4304   Register Acc = PartialReductions[0];
   4305   for (unsigned Part = 1; Part < NumParts; ++Part) {
   4306     if (Part == NumParts - 1) {
   4307       MIRBuilder.buildInstr(ScalarOpc, {DstReg},
   4308                             {Acc, PartialReductions[Part]});
   4309     } else {
   4310       Acc = MIRBuilder
   4311                 .buildInstr(ScalarOpc, {DstTy}, {Acc, PartialReductions[Part]})
   4312                 .getReg(0);
   4313     }
   4314   }
   4315   MI.eraseFromParent();
   4316   return Legalized;
   4317 }
   4318 
   4319 LegalizerHelper::LegalizeResult
   4320 LegalizerHelper::tryNarrowPow2Reduction(MachineInstr &MI, Register SrcReg,
   4321                                         LLT SrcTy, LLT NarrowTy,
   4322                                         unsigned ScalarOpc) {
   4323   SmallVector<Register> SplitSrcs;
   4324   // Split the sources into NarrowTy size pieces.
   4325   extractParts(SrcReg, NarrowTy,
   4326                SrcTy.getNumElements() / NarrowTy.getNumElements(), SplitSrcs);
   4327   // We're going to do a tree reduction using vector operations until we have
   4328   // one NarrowTy size value left.
   4329   while (SplitSrcs.size() > 1) {
   4330     SmallVector<Register> PartialRdxs;
   4331     for (unsigned Idx = 0; Idx < SplitSrcs.size()-1; Idx += 2) {
   4332       Register LHS = SplitSrcs[Idx];
   4333       Register RHS = SplitSrcs[Idx + 1];
   4334       // Create the intermediate vector op.
   4335       Register Res =
   4336           MIRBuilder.buildInstr(ScalarOpc, {NarrowTy}, {LHS, RHS}).getReg(0);
   4337       PartialRdxs.push_back(Res);
   4338     }
   4339     SplitSrcs = std::move(PartialRdxs);
   4340   }
   4341   // Finally generate the requested NarrowTy based reduction.
   4342   Observer.changingInstr(MI);
   4343   MI.getOperand(1).setReg(SplitSrcs[0]);
   4344   Observer.changedInstr(MI);
   4345   return Legalized;
   4346 }
   4347 
   4348 LegalizerHelper::LegalizeResult
   4349 LegalizerHelper::narrowScalarShiftByConstant(MachineInstr &MI, const APInt &Amt,
   4350                                              const LLT HalfTy, const LLT AmtTy) {
   4351 
   4352   Register InL = MRI.createGenericVirtualRegister(HalfTy);
   4353   Register InH = MRI.createGenericVirtualRegister(HalfTy);
   4354   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
   4355 
   4356   if (Amt.isNullValue()) {
   4357     MIRBuilder.buildMerge(MI.getOperand(0), {InL, InH});
   4358     MI.eraseFromParent();
   4359     return Legalized;
   4360   }
   4361 
   4362   LLT NVT = HalfTy;
   4363   unsigned NVTBits = HalfTy.getSizeInBits();
   4364   unsigned VTBits = 2 * NVTBits;
   4365 
   4366   SrcOp Lo(Register(0)), Hi(Register(0));
   4367   if (MI.getOpcode() == TargetOpcode::G_SHL) {
   4368     if (Amt.ugt(VTBits)) {
   4369       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
   4370     } else if (Amt.ugt(NVTBits)) {
   4371       Lo = MIRBuilder.buildConstant(NVT, 0);
   4372       Hi = MIRBuilder.buildShl(NVT, InL,
   4373                                MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
   4374     } else if (Amt == NVTBits) {
   4375       Lo = MIRBuilder.buildConstant(NVT, 0);
   4376       Hi = InL;
   4377     } else {
   4378       Lo = MIRBuilder.buildShl(NVT, InL, MIRBuilder.buildConstant(AmtTy, Amt));
   4379       auto OrLHS =
   4380           MIRBuilder.buildShl(NVT, InH, MIRBuilder.buildConstant(AmtTy, Amt));
   4381       auto OrRHS = MIRBuilder.buildLShr(
   4382           NVT, InL, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
   4383       Hi = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
   4384     }
   4385   } else if (MI.getOpcode() == TargetOpcode::G_LSHR) {
   4386     if (Amt.ugt(VTBits)) {
   4387       Lo = Hi = MIRBuilder.buildConstant(NVT, 0);
   4388     } else if (Amt.ugt(NVTBits)) {
   4389       Lo = MIRBuilder.buildLShr(NVT, InH,
   4390                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
   4391       Hi = MIRBuilder.buildConstant(NVT, 0);
   4392     } else if (Amt == NVTBits) {
   4393       Lo = InH;
   4394       Hi = MIRBuilder.buildConstant(NVT, 0);
   4395     } else {
   4396       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
   4397 
   4398       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
   4399       auto OrRHS = MIRBuilder.buildShl(
   4400           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
   4401 
   4402       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
   4403       Hi = MIRBuilder.buildLShr(NVT, InH, ShiftAmtConst);
   4404     }
   4405   } else {
   4406     if (Amt.ugt(VTBits)) {
   4407       Hi = Lo = MIRBuilder.buildAShr(
   4408           NVT, InH, MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
   4409     } else if (Amt.ugt(NVTBits)) {
   4410       Lo = MIRBuilder.buildAShr(NVT, InH,
   4411                                 MIRBuilder.buildConstant(AmtTy, Amt - NVTBits));
   4412       Hi = MIRBuilder.buildAShr(NVT, InH,
   4413                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
   4414     } else if (Amt == NVTBits) {
   4415       Lo = InH;
   4416       Hi = MIRBuilder.buildAShr(NVT, InH,
   4417                                 MIRBuilder.buildConstant(AmtTy, NVTBits - 1));
   4418     } else {
   4419       auto ShiftAmtConst = MIRBuilder.buildConstant(AmtTy, Amt);
   4420 
   4421       auto OrLHS = MIRBuilder.buildLShr(NVT, InL, ShiftAmtConst);
   4422       auto OrRHS = MIRBuilder.buildShl(
   4423           NVT, InH, MIRBuilder.buildConstant(AmtTy, -Amt + NVTBits));
   4424 
   4425       Lo = MIRBuilder.buildOr(NVT, OrLHS, OrRHS);
   4426       Hi = MIRBuilder.buildAShr(NVT, InH, ShiftAmtConst);
   4427     }
   4428   }
   4429 
   4430   MIRBuilder.buildMerge(MI.getOperand(0), {Lo, Hi});
   4431   MI.eraseFromParent();
   4432 
   4433   return Legalized;
   4434 }
   4435 
   4436 // TODO: Optimize if constant shift amount.
   4437 LegalizerHelper::LegalizeResult
   4438 LegalizerHelper::narrowScalarShift(MachineInstr &MI, unsigned TypeIdx,
   4439                                    LLT RequestedTy) {
   4440   if (TypeIdx == 1) {
   4441     Observer.changingInstr(MI);
   4442     narrowScalarSrc(MI, RequestedTy, 2);
   4443     Observer.changedInstr(MI);
   4444     return Legalized;
   4445   }
   4446 
   4447   Register DstReg = MI.getOperand(0).getReg();
   4448   LLT DstTy = MRI.getType(DstReg);
   4449   if (DstTy.isVector())
   4450     return UnableToLegalize;
   4451 
   4452   Register Amt = MI.getOperand(2).getReg();
   4453   LLT ShiftAmtTy = MRI.getType(Amt);
   4454   const unsigned DstEltSize = DstTy.getScalarSizeInBits();
   4455   if (DstEltSize % 2 != 0)
   4456     return UnableToLegalize;
   4457 
   4458   // Ignore the input type. We can only go to exactly half the size of the
   4459   // input. If that isn't small enough, the resulting pieces will be further
   4460   // legalized.
   4461   const unsigned NewBitSize = DstEltSize / 2;
   4462   const LLT HalfTy = LLT::scalar(NewBitSize);
   4463   const LLT CondTy = LLT::scalar(1);
   4464 
   4465   if (const MachineInstr *KShiftAmt =
   4466           getOpcodeDef(TargetOpcode::G_CONSTANT, Amt, MRI)) {
   4467     return narrowScalarShiftByConstant(
   4468         MI, KShiftAmt->getOperand(1).getCImm()->getValue(), HalfTy, ShiftAmtTy);
   4469   }
   4470 
   4471   // TODO: Expand with known bits.
   4472 
   4473   // Handle the fully general expansion by an unknown amount.
   4474   auto NewBits = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize);
   4475 
   4476   Register InL = MRI.createGenericVirtualRegister(HalfTy);
   4477   Register InH = MRI.createGenericVirtualRegister(HalfTy);
   4478   MIRBuilder.buildUnmerge({InL, InH}, MI.getOperand(1));
   4479 
   4480   auto AmtExcess = MIRBuilder.buildSub(ShiftAmtTy, Amt, NewBits);
   4481   auto AmtLack = MIRBuilder.buildSub(ShiftAmtTy, NewBits, Amt);
   4482 
   4483   auto Zero = MIRBuilder.buildConstant(ShiftAmtTy, 0);
   4484   auto IsShort = MIRBuilder.buildICmp(ICmpInst::ICMP_ULT, CondTy, Amt, NewBits);
   4485   auto IsZero = MIRBuilder.buildICmp(ICmpInst::ICMP_EQ, CondTy, Amt, Zero);
   4486 
   4487   Register ResultRegs[2];
   4488   switch (MI.getOpcode()) {
   4489   case TargetOpcode::G_SHL: {
   4490     // Short: ShAmt < NewBitSize
   4491     auto LoS = MIRBuilder.buildShl(HalfTy, InL, Amt);
   4492 
   4493     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, AmtLack);
   4494     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, Amt);
   4495     auto HiS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
   4496 
   4497     // Long: ShAmt >= NewBitSize
   4498     auto LoL = MIRBuilder.buildConstant(HalfTy, 0);         // Lo part is zero.
   4499     auto HiL = MIRBuilder.buildShl(HalfTy, InL, AmtExcess); // Hi from Lo part.
   4500 
   4501     auto Lo = MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL);
   4502     auto Hi = MIRBuilder.buildSelect(
   4503         HalfTy, IsZero, InH, MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL));
   4504 
   4505     ResultRegs[0] = Lo.getReg(0);
   4506     ResultRegs[1] = Hi.getReg(0);
   4507     break;
   4508   }
   4509   case TargetOpcode::G_LSHR:
   4510   case TargetOpcode::G_ASHR: {
   4511     // Short: ShAmt < NewBitSize
   4512     auto HiS = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy}, {InH, Amt});
   4513 
   4514     auto LoOr = MIRBuilder.buildLShr(HalfTy, InL, Amt);
   4515     auto HiOr = MIRBuilder.buildShl(HalfTy, InH, AmtLack);
   4516     auto LoS = MIRBuilder.buildOr(HalfTy, LoOr, HiOr);
   4517 
   4518     // Long: ShAmt >= NewBitSize
   4519     MachineInstrBuilder HiL;
   4520     if (MI.getOpcode() == TargetOpcode::G_LSHR) {
   4521       HiL = MIRBuilder.buildConstant(HalfTy, 0);            // Hi part is zero.
   4522     } else {
   4523       auto ShiftAmt = MIRBuilder.buildConstant(ShiftAmtTy, NewBitSize - 1);
   4524       HiL = MIRBuilder.buildAShr(HalfTy, InH, ShiftAmt);    // Sign of Hi part.
   4525     }
   4526     auto LoL = MIRBuilder.buildInstr(MI.getOpcode(), {HalfTy},
   4527                                      {InH, AmtExcess});     // Lo from Hi part.
   4528 
   4529     auto Lo = MIRBuilder.buildSelect(
   4530         HalfTy, IsZero, InL, MIRBuilder.buildSelect(HalfTy, IsShort, LoS, LoL));
   4531 
   4532     auto Hi = MIRBuilder.buildSelect(HalfTy, IsShort, HiS, HiL);
   4533 
   4534     ResultRegs[0] = Lo.getReg(0);
   4535     ResultRegs[1] = Hi.getReg(0);
   4536     break;
   4537   }
   4538   default:
   4539     llvm_unreachable("not a shift");
   4540   }
   4541 
   4542   MIRBuilder.buildMerge(DstReg, ResultRegs);
   4543   MI.eraseFromParent();
   4544   return Legalized;
   4545 }
   4546 
   4547 LegalizerHelper::LegalizeResult
   4548 LegalizerHelper::moreElementsVectorPhi(MachineInstr &MI, unsigned TypeIdx,
   4549                                        LLT MoreTy) {
   4550   assert(TypeIdx == 0 && "Expecting only Idx 0");
   4551 
   4552   Observer.changingInstr(MI);
   4553   for (unsigned I = 1, E = MI.getNumOperands(); I != E; I += 2) {
   4554     MachineBasicBlock &OpMBB = *MI.getOperand(I + 1).getMBB();
   4555     MIRBuilder.setInsertPt(OpMBB, OpMBB.getFirstTerminator());
   4556     moreElementsVectorSrc(MI, MoreTy, I);
   4557   }
   4558 
   4559   MachineBasicBlock &MBB = *MI.getParent();
   4560   MIRBuilder.setInsertPt(MBB, --MBB.getFirstNonPHI());
   4561   moreElementsVectorDst(MI, MoreTy, 0);
   4562   Observer.changedInstr(MI);
   4563   return Legalized;
   4564 }
   4565 
   4566 LegalizerHelper::LegalizeResult
   4567 LegalizerHelper::moreElementsVector(MachineInstr &MI, unsigned TypeIdx,
   4568                                     LLT MoreTy) {
   4569   unsigned Opc = MI.getOpcode();
   4570   switch (Opc) {
   4571   case TargetOpcode::G_IMPLICIT_DEF:
   4572   case TargetOpcode::G_LOAD: {
   4573     if (TypeIdx != 0)
   4574       return UnableToLegalize;
   4575     Observer.changingInstr(MI);
   4576     moreElementsVectorDst(MI, MoreTy, 0);
   4577     Observer.changedInstr(MI);
   4578     return Legalized;
   4579   }
   4580   case TargetOpcode::G_STORE:
   4581     if (TypeIdx != 0)
   4582       return UnableToLegalize;
   4583     Observer.changingInstr(MI);
   4584     moreElementsVectorSrc(MI, MoreTy, 0);
   4585     Observer.changedInstr(MI);
   4586     return Legalized;
   4587   case TargetOpcode::G_AND:
   4588   case TargetOpcode::G_OR:
   4589   case TargetOpcode::G_XOR:
   4590   case TargetOpcode::G_SMIN:
   4591   case TargetOpcode::G_SMAX:
   4592   case TargetOpcode::G_UMIN:
   4593   case TargetOpcode::G_UMAX:
   4594   case TargetOpcode::G_FMINNUM:
   4595   case TargetOpcode::G_FMAXNUM:
   4596   case TargetOpcode::G_FMINNUM_IEEE:
   4597   case TargetOpcode::G_FMAXNUM_IEEE:
   4598   case TargetOpcode::G_FMINIMUM:
   4599   case TargetOpcode::G_FMAXIMUM: {
   4600     Observer.changingInstr(MI);
   4601     moreElementsVectorSrc(MI, MoreTy, 1);
   4602     moreElementsVectorSrc(MI, MoreTy, 2);
   4603     moreElementsVectorDst(MI, MoreTy, 0);
   4604     Observer.changedInstr(MI);
   4605     return Legalized;
   4606   }
   4607   case TargetOpcode::G_EXTRACT:
   4608     if (TypeIdx != 1)
   4609       return UnableToLegalize;
   4610     Observer.changingInstr(MI);
   4611     moreElementsVectorSrc(MI, MoreTy, 1);
   4612     Observer.changedInstr(MI);
   4613     return Legalized;
   4614   case TargetOpcode::G_INSERT:
   4615   case TargetOpcode::G_FREEZE:
   4616     if (TypeIdx != 0)
   4617       return UnableToLegalize;
   4618     Observer.changingInstr(MI);
   4619     moreElementsVectorSrc(MI, MoreTy, 1);
   4620     moreElementsVectorDst(MI, MoreTy, 0);
   4621     Observer.changedInstr(MI);
   4622     return Legalized;
   4623   case TargetOpcode::G_SELECT:
   4624     if (TypeIdx != 0)
   4625       return UnableToLegalize;
   4626     if (MRI.getType(MI.getOperand(1).getReg()).isVector())
   4627       return UnableToLegalize;
   4628 
   4629     Observer.changingInstr(MI);
   4630     moreElementsVectorSrc(MI, MoreTy, 2);
   4631     moreElementsVectorSrc(MI, MoreTy, 3);
   4632     moreElementsVectorDst(MI, MoreTy, 0);
   4633     Observer.changedInstr(MI);
   4634     return Legalized;
   4635   case TargetOpcode::G_UNMERGE_VALUES: {
   4636     if (TypeIdx != 1)
   4637       return UnableToLegalize;
   4638 
   4639     LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
   4640     int NumDst = MI.getNumOperands() - 1;
   4641     moreElementsVectorSrc(MI, MoreTy, NumDst);
   4642 
   4643     auto MIB = MIRBuilder.buildInstr(TargetOpcode::G_UNMERGE_VALUES);
   4644     for (int I = 0; I != NumDst; ++I)
   4645       MIB.addDef(MI.getOperand(I).getReg());
   4646 
   4647     int NewNumDst = MoreTy.getSizeInBits() / DstTy.getSizeInBits();
   4648     for (int I = NumDst; I != NewNumDst; ++I)
   4649       MIB.addDef(MRI.createGenericVirtualRegister(DstTy));
   4650 
   4651     MIB.addUse(MI.getOperand(NumDst).getReg());
   4652     MI.eraseFromParent();
   4653     return Legalized;
   4654   }
   4655   case TargetOpcode::G_PHI:
   4656     return moreElementsVectorPhi(MI, TypeIdx, MoreTy);
   4657   default:
   4658     return UnableToLegalize;
   4659   }
   4660 }
   4661 
   4662 void LegalizerHelper::multiplyRegisters(SmallVectorImpl<Register> &DstRegs,
   4663                                         ArrayRef<Register> Src1Regs,
   4664                                         ArrayRef<Register> Src2Regs,
   4665                                         LLT NarrowTy) {
   4666   MachineIRBuilder &B = MIRBuilder;
   4667   unsigned SrcParts = Src1Regs.size();
   4668   unsigned DstParts = DstRegs.size();
   4669 
   4670   unsigned DstIdx = 0; // Low bits of the result.
   4671   Register FactorSum =
   4672       B.buildMul(NarrowTy, Src1Regs[DstIdx], Src2Regs[DstIdx]).getReg(0);
   4673   DstRegs[DstIdx] = FactorSum;
   4674 
   4675   unsigned CarrySumPrevDstIdx;
   4676   SmallVector<Register, 4> Factors;
   4677 
   4678   for (DstIdx = 1; DstIdx < DstParts; DstIdx++) {
   4679     // Collect low parts of muls for DstIdx.
   4680     for (unsigned i = DstIdx + 1 < SrcParts ? 0 : DstIdx - SrcParts + 1;
   4681          i <= std::min(DstIdx, SrcParts - 1); ++i) {
   4682       MachineInstrBuilder Mul =
   4683           B.buildMul(NarrowTy, Src1Regs[DstIdx - i], Src2Regs[i]);
   4684       Factors.push_back(Mul.getReg(0));
   4685     }
   4686     // Collect high parts of muls from previous DstIdx.
   4687     for (unsigned i = DstIdx < SrcParts ? 0 : DstIdx - SrcParts;
   4688          i <= std::min(DstIdx - 1, SrcParts - 1); ++i) {
   4689       MachineInstrBuilder Umulh =
   4690           B.buildUMulH(NarrowTy, Src1Regs[DstIdx - 1 - i], Src2Regs[i]);
   4691       Factors.push_back(Umulh.getReg(0));
   4692     }
   4693     // Add CarrySum from additions calculated for previous DstIdx.
   4694     if (DstIdx != 1) {
   4695       Factors.push_back(CarrySumPrevDstIdx);
   4696     }
   4697 
   4698     Register CarrySum;
   4699     // Add all factors and accumulate all carries into CarrySum.
   4700     if (DstIdx != DstParts - 1) {
   4701       MachineInstrBuilder Uaddo =
   4702           B.buildUAddo(NarrowTy, LLT::scalar(1), Factors[0], Factors[1]);
   4703       FactorSum = Uaddo.getReg(0);
   4704       CarrySum = B.buildZExt(NarrowTy, Uaddo.getReg(1)).getReg(0);
   4705       for (unsigned i = 2; i < Factors.size(); ++i) {
   4706         MachineInstrBuilder Uaddo =
   4707             B.buildUAddo(NarrowTy, LLT::scalar(1), FactorSum, Factors[i]);
   4708         FactorSum = Uaddo.getReg(0);
   4709         MachineInstrBuilder Carry = B.buildZExt(NarrowTy, Uaddo.getReg(1));
   4710         CarrySum = B.buildAdd(NarrowTy, CarrySum, Carry).getReg(0);
   4711       }
   4712     } else {
   4713       // Since value for the next index is not calculated, neither is CarrySum.
   4714       FactorSum = B.buildAdd(NarrowTy, Factors[0], Factors[1]).getReg(0);
   4715       for (unsigned i = 2; i < Factors.size(); ++i)
   4716         FactorSum = B.buildAdd(NarrowTy, FactorSum, Factors[i]).getReg(0);
   4717     }
   4718 
   4719     CarrySumPrevDstIdx = CarrySum;
   4720     DstRegs[DstIdx] = FactorSum;
   4721     Factors.clear();
   4722   }
   4723 }
   4724 
   4725 LegalizerHelper::LegalizeResult
   4726 LegalizerHelper::narrowScalarAddSub(MachineInstr &MI, unsigned TypeIdx,
   4727                                     LLT NarrowTy) {
   4728   if (TypeIdx != 0)
   4729     return UnableToLegalize;
   4730 
   4731   Register DstReg = MI.getOperand(0).getReg();
   4732   LLT DstType = MRI.getType(DstReg);
   4733   // FIXME: add support for vector types
   4734   if (DstType.isVector())
   4735     return UnableToLegalize;
   4736 
   4737   uint64_t SizeOp0 = DstType.getSizeInBits();
   4738   uint64_t NarrowSize = NarrowTy.getSizeInBits();
   4739 
   4740   // FIXME: add support for when SizeOp0 isn't an exact multiple of
   4741   // NarrowSize.
   4742   if (SizeOp0 % NarrowSize != 0)
   4743     return UnableToLegalize;
   4744 
   4745   // Expand in terms of carry-setting/consuming G_<Op>E instructions.
   4746   int NumParts = SizeOp0 / NarrowTy.getSizeInBits();
   4747 
   4748   unsigned Opcode = MI.getOpcode();
   4749   unsigned OpO, OpE, OpF;
   4750   switch (Opcode) {
   4751   case TargetOpcode::G_SADDO:
   4752   case TargetOpcode::G_SADDE:
   4753   case TargetOpcode::G_UADDO:
   4754   case TargetOpcode::G_UADDE:
   4755   case TargetOpcode::G_ADD:
   4756     OpO = TargetOpcode::G_UADDO;
   4757     OpE = TargetOpcode::G_UADDE;
   4758     OpF = TargetOpcode::G_UADDE;
   4759     if (Opcode == TargetOpcode::G_SADDO || Opcode == TargetOpcode::G_SADDE)
   4760       OpF = TargetOpcode::G_SADDE;
   4761     break;
   4762   case TargetOpcode::G_SSUBO:
   4763   case TargetOpcode::G_SSUBE:
   4764   case TargetOpcode::G_USUBO:
   4765   case TargetOpcode::G_USUBE:
   4766   case TargetOpcode::G_SUB:
   4767     OpO = TargetOpcode::G_USUBO;
   4768     OpE = TargetOpcode::G_USUBE;
   4769     OpF = TargetOpcode::G_USUBE;
   4770     if (Opcode == TargetOpcode::G_SSUBO || Opcode == TargetOpcode::G_SSUBE)
   4771       OpF = TargetOpcode::G_SSUBE;
   4772     break;
   4773   default:
   4774     llvm_unreachable("Unexpected add/sub opcode!");
   4775   }
   4776 
   4777   // 1 for a plain add/sub, 2 if this is an operation with a carry-out.
   4778   unsigned NumDefs = MI.getNumExplicitDefs();
   4779   Register Src1 = MI.getOperand(NumDefs).getReg();
   4780   Register Src2 = MI.getOperand(NumDefs + 1).getReg();
   4781   Register CarryDst;
   4782   if (NumDefs == 2)
   4783     CarryDst = MI.getOperand(1).getReg();
   4784   Register CarryIn;
   4785   if (MI.getNumOperands() == NumDefs + 3)
   4786     CarryIn = MI.getOperand(NumDefs + 2).getReg();
   4787 
   4788   SmallVector<Register, 2> Src1Regs, Src2Regs, DstRegs;
   4789   extractParts(Src1, NarrowTy, NumParts, Src1Regs);
   4790   extractParts(Src2, NarrowTy, NumParts, Src2Regs);
   4791 
   4792   for (int i = 0; i < NumParts; ++i) {
   4793     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
   4794     Register CarryOut = MRI.createGenericVirtualRegister(LLT::scalar(1));
   4795     // Forward the final carry-out to the destination register
   4796     if (i == NumParts - 1 && CarryDst)
   4797       CarryOut = CarryDst;
   4798 
   4799     if (!CarryIn) {
   4800       MIRBuilder.buildInstr(OpO, {DstReg, CarryOut},
   4801                             {Src1Regs[i], Src2Regs[i]});
   4802     } else if (i == NumParts - 1) {
   4803       MIRBuilder.buildInstr(OpF, {DstReg, CarryOut},
   4804                             {Src1Regs[i], Src2Regs[i], CarryIn});
   4805     } else {
   4806       MIRBuilder.buildInstr(OpE, {DstReg, CarryOut},
   4807                             {Src1Regs[i], Src2Regs[i], CarryIn});
   4808     }
   4809 
   4810     DstRegs.push_back(DstReg);
   4811     CarryIn = CarryOut;
   4812   }
   4813   MIRBuilder.buildMerge(DstReg, DstRegs);
   4814   MI.eraseFromParent();
   4815   return Legalized;
   4816 }
   4817 
   4818 LegalizerHelper::LegalizeResult
   4819 LegalizerHelper::narrowScalarMul(MachineInstr &MI, LLT NarrowTy) {
   4820   Register DstReg = MI.getOperand(0).getReg();
   4821   Register Src1 = MI.getOperand(1).getReg();
   4822   Register Src2 = MI.getOperand(2).getReg();
   4823 
   4824   LLT Ty = MRI.getType(DstReg);
   4825   if (Ty.isVector())
   4826     return UnableToLegalize;
   4827 
   4828   unsigned SrcSize = MRI.getType(Src1).getSizeInBits();
   4829   unsigned DstSize = Ty.getSizeInBits();
   4830   unsigned NarrowSize = NarrowTy.getSizeInBits();
   4831   if (DstSize % NarrowSize != 0 || SrcSize % NarrowSize != 0)
   4832     return UnableToLegalize;
   4833 
   4834   unsigned NumDstParts = DstSize / NarrowSize;
   4835   unsigned NumSrcParts = SrcSize / NarrowSize;
   4836   bool IsMulHigh = MI.getOpcode() == TargetOpcode::G_UMULH;
   4837   unsigned DstTmpParts = NumDstParts * (IsMulHigh ? 2 : 1);
   4838 
   4839   SmallVector<Register, 2> Src1Parts, Src2Parts;
   4840   SmallVector<Register, 2> DstTmpRegs(DstTmpParts);
   4841   extractParts(Src1, NarrowTy, NumSrcParts, Src1Parts);
   4842   extractParts(Src2, NarrowTy, NumSrcParts, Src2Parts);
   4843   multiplyRegisters(DstTmpRegs, Src1Parts, Src2Parts, NarrowTy);
   4844 
   4845   // Take only high half of registers if this is high mul.
   4846   ArrayRef<Register> DstRegs(
   4847       IsMulHigh ? &DstTmpRegs[DstTmpParts / 2] : &DstTmpRegs[0], NumDstParts);
   4848   MIRBuilder.buildMerge(DstReg, DstRegs);
   4849   MI.eraseFromParent();
   4850   return Legalized;
   4851 }
   4852 
   4853 LegalizerHelper::LegalizeResult
   4854 LegalizerHelper::narrowScalarFPTOI(MachineInstr &MI, unsigned TypeIdx,
   4855                                    LLT NarrowTy) {
   4856   if (TypeIdx != 0)
   4857     return UnableToLegalize;
   4858 
   4859   bool IsSigned = MI.getOpcode() == TargetOpcode::G_FPTOSI;
   4860 
   4861   Register Src = MI.getOperand(1).getReg();
   4862   LLT SrcTy = MRI.getType(Src);
   4863 
   4864   // If all finite floats fit into the narrowed integer type, we can just swap
   4865   // out the result type. This is practically only useful for conversions from
   4866   // half to at least 16-bits, so just handle the one case.
   4867   if (SrcTy.getScalarType() != LLT::scalar(16) ||
   4868       NarrowTy.getScalarSizeInBits() < (IsSigned ? 17u : 16u))
   4869     return UnableToLegalize;
   4870 
   4871   Observer.changingInstr(MI);
   4872   narrowScalarDst(MI, NarrowTy, 0,
   4873                   IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT);
   4874   Observer.changedInstr(MI);
   4875   return Legalized;
   4876 }
   4877 
   4878 LegalizerHelper::LegalizeResult
   4879 LegalizerHelper::narrowScalarExtract(MachineInstr &MI, unsigned TypeIdx,
   4880                                      LLT NarrowTy) {
   4881   if (TypeIdx != 1)
   4882     return UnableToLegalize;
   4883 
   4884   uint64_t NarrowSize = NarrowTy.getSizeInBits();
   4885 
   4886   int64_t SizeOp1 = MRI.getType(MI.getOperand(1).getReg()).getSizeInBits();
   4887   // FIXME: add support for when SizeOp1 isn't an exact multiple of
   4888   // NarrowSize.
   4889   if (SizeOp1 % NarrowSize != 0)
   4890     return UnableToLegalize;
   4891   int NumParts = SizeOp1 / NarrowSize;
   4892 
   4893   SmallVector<Register, 2> SrcRegs, DstRegs;
   4894   SmallVector<uint64_t, 2> Indexes;
   4895   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
   4896 
   4897   Register OpReg = MI.getOperand(0).getReg();
   4898   uint64_t OpStart = MI.getOperand(2).getImm();
   4899   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
   4900   for (int i = 0; i < NumParts; ++i) {
   4901     unsigned SrcStart = i * NarrowSize;
   4902 
   4903     if (SrcStart + NarrowSize <= OpStart || SrcStart >= OpStart + OpSize) {
   4904       // No part of the extract uses this subregister, ignore it.
   4905       continue;
   4906     } else if (SrcStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
   4907       // The entire subregister is extracted, forward the value.
   4908       DstRegs.push_back(SrcRegs[i]);
   4909       continue;
   4910     }
   4911 
   4912     // OpSegStart is where this destination segment would start in OpReg if it
   4913     // extended infinitely in both directions.
   4914     int64_t ExtractOffset;
   4915     uint64_t SegSize;
   4916     if (OpStart < SrcStart) {
   4917       ExtractOffset = 0;
   4918       SegSize = std::min(NarrowSize, OpStart + OpSize - SrcStart);
   4919     } else {
   4920       ExtractOffset = OpStart - SrcStart;
   4921       SegSize = std::min(SrcStart + NarrowSize - OpStart, OpSize);
   4922     }
   4923 
   4924     Register SegReg = SrcRegs[i];
   4925     if (ExtractOffset != 0 || SegSize != NarrowSize) {
   4926       // A genuine extract is needed.
   4927       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
   4928       MIRBuilder.buildExtract(SegReg, SrcRegs[i], ExtractOffset);
   4929     }
   4930 
   4931     DstRegs.push_back(SegReg);
   4932   }
   4933 
   4934   Register DstReg = MI.getOperand(0).getReg();
   4935   if (MRI.getType(DstReg).isVector())
   4936     MIRBuilder.buildBuildVector(DstReg, DstRegs);
   4937   else if (DstRegs.size() > 1)
   4938     MIRBuilder.buildMerge(DstReg, DstRegs);
   4939   else
   4940     MIRBuilder.buildCopy(DstReg, DstRegs[0]);
   4941   MI.eraseFromParent();
   4942   return Legalized;
   4943 }
   4944 
   4945 LegalizerHelper::LegalizeResult
   4946 LegalizerHelper::narrowScalarInsert(MachineInstr &MI, unsigned TypeIdx,
   4947                                     LLT NarrowTy) {
   4948   // FIXME: Don't know how to handle secondary types yet.
   4949   if (TypeIdx != 0)
   4950     return UnableToLegalize;
   4951 
   4952   uint64_t SizeOp0 = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
   4953   uint64_t NarrowSize = NarrowTy.getSizeInBits();
   4954 
   4955   // FIXME: add support for when SizeOp0 isn't an exact multiple of
   4956   // NarrowSize.
   4957   if (SizeOp0 % NarrowSize != 0)
   4958     return UnableToLegalize;
   4959 
   4960   int NumParts = SizeOp0 / NarrowSize;
   4961 
   4962   SmallVector<Register, 2> SrcRegs, DstRegs;
   4963   SmallVector<uint64_t, 2> Indexes;
   4964   extractParts(MI.getOperand(1).getReg(), NarrowTy, NumParts, SrcRegs);
   4965 
   4966   Register OpReg = MI.getOperand(2).getReg();
   4967   uint64_t OpStart = MI.getOperand(3).getImm();
   4968   uint64_t OpSize = MRI.getType(OpReg).getSizeInBits();
   4969   for (int i = 0; i < NumParts; ++i) {
   4970     unsigned DstStart = i * NarrowSize;
   4971 
   4972     if (DstStart + NarrowSize <= OpStart || DstStart >= OpStart + OpSize) {
   4973       // No part of the insert affects this subregister, forward the original.
   4974       DstRegs.push_back(SrcRegs[i]);
   4975       continue;
   4976     } else if (DstStart == OpStart && NarrowTy == MRI.getType(OpReg)) {
   4977       // The entire subregister is defined by this insert, forward the new
   4978       // value.
   4979       DstRegs.push_back(OpReg);
   4980       continue;
   4981     }
   4982 
   4983     // OpSegStart is where this destination segment would start in OpReg if it
   4984     // extended infinitely in both directions.
   4985     int64_t ExtractOffset, InsertOffset;
   4986     uint64_t SegSize;
   4987     if (OpStart < DstStart) {
   4988       InsertOffset = 0;
   4989       ExtractOffset = DstStart - OpStart;
   4990       SegSize = std::min(NarrowSize, OpStart + OpSize - DstStart);
   4991     } else {
   4992       InsertOffset = OpStart - DstStart;
   4993       ExtractOffset = 0;
   4994       SegSize =
   4995         std::min(NarrowSize - InsertOffset, OpStart + OpSize - DstStart);
   4996     }
   4997 
   4998     Register SegReg = OpReg;
   4999     if (ExtractOffset != 0 || SegSize != OpSize) {
   5000       // A genuine extract is needed.
   5001       SegReg = MRI.createGenericVirtualRegister(LLT::scalar(SegSize));
   5002       MIRBuilder.buildExtract(SegReg, OpReg, ExtractOffset);
   5003     }
   5004 
   5005     Register DstReg = MRI.createGenericVirtualRegister(NarrowTy);
   5006     MIRBuilder.buildInsert(DstReg, SrcRegs[i], SegReg, InsertOffset);
   5007     DstRegs.push_back(DstReg);
   5008   }
   5009 
   5010   assert(DstRegs.size() == (unsigned)NumParts && "not all parts covered");
   5011   Register DstReg = MI.getOperand(0).getReg();
   5012   if(MRI.getType(DstReg).isVector())
   5013     MIRBuilder.buildBuildVector(DstReg, DstRegs);
   5014   else
   5015     MIRBuilder.buildMerge(DstReg, DstRegs);
   5016   MI.eraseFromParent();
   5017   return Legalized;
   5018 }
   5019 
   5020 LegalizerHelper::LegalizeResult
   5021 LegalizerHelper::narrowScalarBasic(MachineInstr &MI, unsigned TypeIdx,
   5022                                    LLT NarrowTy) {
   5023   Register DstReg = MI.getOperand(0).getReg();
   5024   LLT DstTy = MRI.getType(DstReg);
   5025 
   5026   assert(MI.getNumOperands() == 3 && TypeIdx == 0);
   5027 
   5028   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
   5029   SmallVector<Register, 4> Src0Regs, Src0LeftoverRegs;
   5030   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
   5031   LLT LeftoverTy;
   5032   if (!extractParts(MI.getOperand(1).getReg(), DstTy, NarrowTy, LeftoverTy,
   5033                     Src0Regs, Src0LeftoverRegs))
   5034     return UnableToLegalize;
   5035 
   5036   LLT Unused;
   5037   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, Unused,
   5038                     Src1Regs, Src1LeftoverRegs))
   5039     llvm_unreachable("inconsistent extractParts result");
   5040 
   5041   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
   5042     auto Inst = MIRBuilder.buildInstr(MI.getOpcode(), {NarrowTy},
   5043                                         {Src0Regs[I], Src1Regs[I]});
   5044     DstRegs.push_back(Inst.getReg(0));
   5045   }
   5046 
   5047   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
   5048     auto Inst = MIRBuilder.buildInstr(
   5049       MI.getOpcode(),
   5050       {LeftoverTy}, {Src0LeftoverRegs[I], Src1LeftoverRegs[I]});
   5051     DstLeftoverRegs.push_back(Inst.getReg(0));
   5052   }
   5053 
   5054   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
   5055               LeftoverTy, DstLeftoverRegs);
   5056 
   5057   MI.eraseFromParent();
   5058   return Legalized;
   5059 }
   5060 
   5061 LegalizerHelper::LegalizeResult
   5062 LegalizerHelper::narrowScalarExt(MachineInstr &MI, unsigned TypeIdx,
   5063                                  LLT NarrowTy) {
   5064   if (TypeIdx != 0)
   5065     return UnableToLegalize;
   5066 
   5067   Register DstReg = MI.getOperand(0).getReg();
   5068   Register SrcReg = MI.getOperand(1).getReg();
   5069 
   5070   LLT DstTy = MRI.getType(DstReg);
   5071   if (DstTy.isVector())
   5072     return UnableToLegalize;
   5073 
   5074   SmallVector<Register, 8> Parts;
   5075   LLT GCDTy = extractGCDType(Parts, DstTy, NarrowTy, SrcReg);
   5076   LLT LCMTy = buildLCMMergePieces(DstTy, NarrowTy, GCDTy, Parts, MI.getOpcode());
   5077   buildWidenedRemergeToDst(DstReg, LCMTy, Parts);
   5078 
   5079   MI.eraseFromParent();
   5080   return Legalized;
   5081 }
   5082 
   5083 LegalizerHelper::LegalizeResult
   5084 LegalizerHelper::narrowScalarSelect(MachineInstr &MI, unsigned TypeIdx,
   5085                                     LLT NarrowTy) {
   5086   if (TypeIdx != 0)
   5087     return UnableToLegalize;
   5088 
   5089   Register CondReg = MI.getOperand(1).getReg();
   5090   LLT CondTy = MRI.getType(CondReg);
   5091   if (CondTy.isVector()) // TODO: Handle vselect
   5092     return UnableToLegalize;
   5093 
   5094   Register DstReg = MI.getOperand(0).getReg();
   5095   LLT DstTy = MRI.getType(DstReg);
   5096 
   5097   SmallVector<Register, 4> DstRegs, DstLeftoverRegs;
   5098   SmallVector<Register, 4> Src1Regs, Src1LeftoverRegs;
   5099   SmallVector<Register, 4> Src2Regs, Src2LeftoverRegs;
   5100   LLT LeftoverTy;
   5101   if (!extractParts(MI.getOperand(2).getReg(), DstTy, NarrowTy, LeftoverTy,
   5102                     Src1Regs, Src1LeftoverRegs))
   5103     return UnableToLegalize;
   5104 
   5105   LLT Unused;
   5106   if (!extractParts(MI.getOperand(3).getReg(), DstTy, NarrowTy, Unused,
   5107                     Src2Regs, Src2LeftoverRegs))
   5108     llvm_unreachable("inconsistent extractParts result");
   5109 
   5110   for (unsigned I = 0, E = Src1Regs.size(); I != E; ++I) {
   5111     auto Select = MIRBuilder.buildSelect(NarrowTy,
   5112                                          CondReg, Src1Regs[I], Src2Regs[I]);
   5113     DstRegs.push_back(Select.getReg(0));
   5114   }
   5115 
   5116   for (unsigned I = 0, E = Src1LeftoverRegs.size(); I != E; ++I) {
   5117     auto Select = MIRBuilder.buildSelect(
   5118       LeftoverTy, CondReg, Src1LeftoverRegs[I], Src2LeftoverRegs[I]);
   5119     DstLeftoverRegs.push_back(Select.getReg(0));
   5120   }
   5121 
   5122   insertParts(DstReg, DstTy, NarrowTy, DstRegs,
   5123               LeftoverTy, DstLeftoverRegs);
   5124 
   5125   MI.eraseFromParent();
   5126   return Legalized;
   5127 }
   5128 
   5129 LegalizerHelper::LegalizeResult
   5130 LegalizerHelper::narrowScalarCTLZ(MachineInstr &MI, unsigned TypeIdx,
   5131                                   LLT NarrowTy) {
   5132   if (TypeIdx != 1)
   5133     return UnableToLegalize;
   5134 
   5135   Register DstReg = MI.getOperand(0).getReg();
   5136   Register SrcReg = MI.getOperand(1).getReg();
   5137   LLT DstTy = MRI.getType(DstReg);
   5138   LLT SrcTy = MRI.getType(SrcReg);
   5139   unsigned NarrowSize = NarrowTy.getSizeInBits();
   5140 
   5141   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
   5142     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTLZ_ZERO_UNDEF;
   5143 
   5144     MachineIRBuilder &B = MIRBuilder;
   5145     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
   5146     // ctlz(Hi:Lo) -> Hi == 0 ? (NarrowSize + ctlz(Lo)) : ctlz(Hi)
   5147     auto C_0 = B.buildConstant(NarrowTy, 0);
   5148     auto HiIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
   5149                                 UnmergeSrc.getReg(1), C_0);
   5150     auto LoCTLZ = IsUndef ?
   5151       B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0)) :
   5152       B.buildCTLZ(DstTy, UnmergeSrc.getReg(0));
   5153     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
   5154     auto HiIsZeroCTLZ = B.buildAdd(DstTy, LoCTLZ, C_NarrowSize);
   5155     auto HiCTLZ = B.buildCTLZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1));
   5156     B.buildSelect(DstReg, HiIsZero, HiIsZeroCTLZ, HiCTLZ);
   5157 
   5158     MI.eraseFromParent();
   5159     return Legalized;
   5160   }
   5161 
   5162   return UnableToLegalize;
   5163 }
   5164 
   5165 LegalizerHelper::LegalizeResult
   5166 LegalizerHelper::narrowScalarCTTZ(MachineInstr &MI, unsigned TypeIdx,
   5167                                   LLT NarrowTy) {
   5168   if (TypeIdx != 1)
   5169     return UnableToLegalize;
   5170 
   5171   Register DstReg = MI.getOperand(0).getReg();
   5172   Register SrcReg = MI.getOperand(1).getReg();
   5173   LLT DstTy = MRI.getType(DstReg);
   5174   LLT SrcTy = MRI.getType(SrcReg);
   5175   unsigned NarrowSize = NarrowTy.getSizeInBits();
   5176 
   5177   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
   5178     const bool IsUndef = MI.getOpcode() == TargetOpcode::G_CTTZ_ZERO_UNDEF;
   5179 
   5180     MachineIRBuilder &B = MIRBuilder;
   5181     auto UnmergeSrc = B.buildUnmerge(NarrowTy, SrcReg);
   5182     // cttz(Hi:Lo) -> Lo == 0 ? (cttz(Hi) + NarrowSize) : cttz(Lo)
   5183     auto C_0 = B.buildConstant(NarrowTy, 0);
   5184     auto LoIsZero = B.buildICmp(CmpInst::ICMP_EQ, LLT::scalar(1),
   5185                                 UnmergeSrc.getReg(0), C_0);
   5186     auto HiCTTZ = IsUndef ?
   5187       B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(1)) :
   5188       B.buildCTTZ(DstTy, UnmergeSrc.getReg(1));
   5189     auto C_NarrowSize = B.buildConstant(DstTy, NarrowSize);
   5190     auto LoIsZeroCTTZ = B.buildAdd(DstTy, HiCTTZ, C_NarrowSize);
   5191     auto LoCTTZ = B.buildCTTZ_ZERO_UNDEF(DstTy, UnmergeSrc.getReg(0));
   5192     B.buildSelect(DstReg, LoIsZero, LoIsZeroCTTZ, LoCTTZ);
   5193 
   5194     MI.eraseFromParent();
   5195     return Legalized;
   5196   }
   5197 
   5198   return UnableToLegalize;
   5199 }
   5200 
   5201 LegalizerHelper::LegalizeResult
   5202 LegalizerHelper::narrowScalarCTPOP(MachineInstr &MI, unsigned TypeIdx,
   5203                                    LLT NarrowTy) {
   5204   if (TypeIdx != 1)
   5205     return UnableToLegalize;
   5206 
   5207   Register DstReg = MI.getOperand(0).getReg();
   5208   LLT DstTy = MRI.getType(DstReg);
   5209   LLT SrcTy = MRI.getType(MI.getOperand(1).getReg());
   5210   unsigned NarrowSize = NarrowTy.getSizeInBits();
   5211 
   5212   if (SrcTy.isScalar() && SrcTy.getSizeInBits() == 2 * NarrowSize) {
   5213     auto UnmergeSrc = MIRBuilder.buildUnmerge(NarrowTy, MI.getOperand(1));
   5214 
   5215     auto LoCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(0));
   5216     auto HiCTPOP = MIRBuilder.buildCTPOP(DstTy, UnmergeSrc.getReg(1));
   5217     MIRBuilder.buildAdd(DstReg, HiCTPOP, LoCTPOP);
   5218 
   5219     MI.eraseFromParent();
   5220     return Legalized;
   5221   }
   5222 
   5223   return UnableToLegalize;
   5224 }
   5225 
   5226 LegalizerHelper::LegalizeResult
   5227 LegalizerHelper::lowerBitCount(MachineInstr &MI) {
   5228   unsigned Opc = MI.getOpcode();
   5229   const auto &TII = MIRBuilder.getTII();
   5230   auto isSupported = [this](const LegalityQuery &Q) {
   5231     auto QAction = LI.getAction(Q).Action;
   5232     return QAction == Legal || QAction == Libcall || QAction == Custom;
   5233   };
   5234   switch (Opc) {
   5235   default:
   5236     return UnableToLegalize;
   5237   case TargetOpcode::G_CTLZ_ZERO_UNDEF: {
   5238     // This trivially expands to CTLZ.
   5239     Observer.changingInstr(MI);
   5240     MI.setDesc(TII.get(TargetOpcode::G_CTLZ));
   5241     Observer.changedInstr(MI);
   5242     return Legalized;
   5243   }
   5244   case TargetOpcode::G_CTLZ: {
   5245     Register DstReg = MI.getOperand(0).getReg();
   5246     Register SrcReg = MI.getOperand(1).getReg();
   5247     LLT DstTy = MRI.getType(DstReg);
   5248     LLT SrcTy = MRI.getType(SrcReg);
   5249     unsigned Len = SrcTy.getSizeInBits();
   5250 
   5251     if (isSupported({TargetOpcode::G_CTLZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
   5252       // If CTLZ_ZERO_UNDEF is supported, emit that and a select for zero.
   5253       auto CtlzZU = MIRBuilder.buildCTLZ_ZERO_UNDEF(DstTy, SrcReg);
   5254       auto ZeroSrc = MIRBuilder.buildConstant(SrcTy, 0);
   5255       auto ICmp = MIRBuilder.buildICmp(
   5256           CmpInst::ICMP_EQ, SrcTy.changeElementSize(1), SrcReg, ZeroSrc);
   5257       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
   5258       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CtlzZU);
   5259       MI.eraseFromParent();
   5260       return Legalized;
   5261     }
   5262     // for now, we do this:
   5263     // NewLen = NextPowerOf2(Len);
   5264     // x = x | (x >> 1);
   5265     // x = x | (x >> 2);
   5266     // ...
   5267     // x = x | (x >>16);
   5268     // x = x | (x >>32); // for 64-bit input
   5269     // Upto NewLen/2
   5270     // return Len - popcount(x);
   5271     //
   5272     // Ref: "Hacker's Delight" by Henry Warren
   5273     Register Op = SrcReg;
   5274     unsigned NewLen = PowerOf2Ceil(Len);
   5275     for (unsigned i = 0; (1U << i) <= (NewLen / 2); ++i) {
   5276       auto MIBShiftAmt = MIRBuilder.buildConstant(SrcTy, 1ULL << i);
   5277       auto MIBOp = MIRBuilder.buildOr(
   5278           SrcTy, Op, MIRBuilder.buildLShr(SrcTy, Op, MIBShiftAmt));
   5279       Op = MIBOp.getReg(0);
   5280     }
   5281     auto MIBPop = MIRBuilder.buildCTPOP(DstTy, Op);
   5282     MIRBuilder.buildSub(MI.getOperand(0), MIRBuilder.buildConstant(DstTy, Len),
   5283                         MIBPop);
   5284     MI.eraseFromParent();
   5285     return Legalized;
   5286   }
   5287   case TargetOpcode::G_CTTZ_ZERO_UNDEF: {
   5288     // This trivially expands to CTTZ.
   5289     Observer.changingInstr(MI);
   5290     MI.setDesc(TII.get(TargetOpcode::G_CTTZ));
   5291     Observer.changedInstr(MI);
   5292     return Legalized;
   5293   }
   5294   case TargetOpcode::G_CTTZ: {
   5295     Register DstReg = MI.getOperand(0).getReg();
   5296     Register SrcReg = MI.getOperand(1).getReg();
   5297     LLT DstTy = MRI.getType(DstReg);
   5298     LLT SrcTy = MRI.getType(SrcReg);
   5299 
   5300     unsigned Len = SrcTy.getSizeInBits();
   5301     if (isSupported({TargetOpcode::G_CTTZ_ZERO_UNDEF, {DstTy, SrcTy}})) {
   5302       // If CTTZ_ZERO_UNDEF is legal or custom, emit that and a select with
   5303       // zero.
   5304       auto CttzZU = MIRBuilder.buildCTTZ_ZERO_UNDEF(DstTy, SrcReg);
   5305       auto Zero = MIRBuilder.buildConstant(SrcTy, 0);
   5306       auto ICmp = MIRBuilder.buildICmp(
   5307           CmpInst::ICMP_EQ, DstTy.changeElementSize(1), SrcReg, Zero);
   5308       auto LenConst = MIRBuilder.buildConstant(DstTy, Len);
   5309       MIRBuilder.buildSelect(DstReg, ICmp, LenConst, CttzZU);
   5310       MI.eraseFromParent();
   5311       return Legalized;
   5312     }
   5313     // for now, we use: { return popcount(~x & (x - 1)); }
   5314     // unless the target has ctlz but not ctpop, in which case we use:
   5315     // { return 32 - nlz(~x & (x-1)); }
   5316     // Ref: "Hacker's Delight" by Henry Warren
   5317     auto MIBCstNeg1 = MIRBuilder.buildConstant(SrcTy, -1);
   5318     auto MIBNot = MIRBuilder.buildXor(SrcTy, SrcReg, MIBCstNeg1);
   5319     auto MIBTmp = MIRBuilder.buildAnd(
   5320         SrcTy, MIBNot, MIRBuilder.buildAdd(SrcTy, SrcReg, MIBCstNeg1));
   5321     if (!isSupported({TargetOpcode::G_CTPOP, {SrcTy, SrcTy}}) &&
   5322         isSupported({TargetOpcode::G_CTLZ, {SrcTy, SrcTy}})) {
   5323       auto MIBCstLen = MIRBuilder.buildConstant(SrcTy, Len);
   5324       MIRBuilder.buildSub(MI.getOperand(0), MIBCstLen,
   5325                           MIRBuilder.buildCTLZ(SrcTy, MIBTmp));
   5326       MI.eraseFromParent();
   5327       return Legalized;
   5328     }
   5329     MI.setDesc(TII.get(TargetOpcode::G_CTPOP));
   5330     MI.getOperand(1).setReg(MIBTmp.getReg(0));
   5331     return Legalized;
   5332   }
   5333   case TargetOpcode::G_CTPOP: {
   5334     Register SrcReg = MI.getOperand(1).getReg();
   5335     LLT Ty = MRI.getType(SrcReg);
   5336     unsigned Size = Ty.getSizeInBits();
   5337     MachineIRBuilder &B = MIRBuilder;
   5338 
   5339     // Count set bits in blocks of 2 bits. Default approach would be
   5340     // B2Count = { val & 0x55555555 } + { (val >> 1) & 0x55555555 }
   5341     // We use following formula instead:
   5342     // B2Count = val - { (val >> 1) & 0x55555555 }
   5343     // since it gives same result in blocks of 2 with one instruction less.
   5344     auto C_1 = B.buildConstant(Ty, 1);
   5345     auto B2Set1LoTo1Hi = B.buildLShr(Ty, SrcReg, C_1);
   5346     APInt B2Mask1HiTo0 = APInt::getSplat(Size, APInt(8, 0x55));
   5347     auto C_B2Mask1HiTo0 = B.buildConstant(Ty, B2Mask1HiTo0);
   5348     auto B2Count1Hi = B.buildAnd(Ty, B2Set1LoTo1Hi, C_B2Mask1HiTo0);
   5349     auto B2Count = B.buildSub(Ty, SrcReg, B2Count1Hi);
   5350 
   5351     // In order to get count in blocks of 4 add values from adjacent block of 2.
   5352     // B4Count = { B2Count & 0x33333333 } + { (B2Count >> 2) & 0x33333333 }
   5353     auto C_2 = B.buildConstant(Ty, 2);
   5354     auto B4Set2LoTo2Hi = B.buildLShr(Ty, B2Count, C_2);
   5355     APInt B4Mask2HiTo0 = APInt::getSplat(Size, APInt(8, 0x33));
   5356     auto C_B4Mask2HiTo0 = B.buildConstant(Ty, B4Mask2HiTo0);
   5357     auto B4HiB2Count = B.buildAnd(Ty, B4Set2LoTo2Hi, C_B4Mask2HiTo0);
   5358     auto B4LoB2Count = B.buildAnd(Ty, B2Count, C_B4Mask2HiTo0);
   5359     auto B4Count = B.buildAdd(Ty, B4HiB2Count, B4LoB2Count);
   5360 
   5361     // For count in blocks of 8 bits we don't have to mask high 4 bits before
   5362     // addition since count value sits in range {0,...,8} and 4 bits are enough
   5363     // to hold such binary values. After addition high 4 bits still hold count
   5364     // of set bits in high 4 bit block, set them to zero and get 8 bit result.
   5365     // B8Count = { B4Count + (B4Count >> 4) } & 0x0F0F0F0F
   5366     auto C_4 = B.buildConstant(Ty, 4);
   5367     auto B8HiB4Count = B.buildLShr(Ty, B4Count, C_4);
   5368     auto B8CountDirty4Hi = B.buildAdd(Ty, B8HiB4Count, B4Count);
   5369     APInt B8Mask4HiTo0 = APInt::getSplat(Size, APInt(8, 0x0F));
   5370     auto C_B8Mask4HiTo0 = B.buildConstant(Ty, B8Mask4HiTo0);
   5371     auto B8Count = B.buildAnd(Ty, B8CountDirty4Hi, C_B8Mask4HiTo0);
   5372 
   5373     assert(Size<=128 && "Scalar size is too large for CTPOP lower algorithm");
   5374     // 8 bits can hold CTPOP result of 128 bit int or smaller. Mul with this
   5375     // bitmask will set 8 msb in ResTmp to sum of all B8Counts in 8 bit blocks.
   5376     auto MulMask = B.buildConstant(Ty, APInt::getSplat(Size, APInt(8, 0x01)));
   5377     auto ResTmp = B.buildMul(Ty, B8Count, MulMask);
   5378 
   5379     // Shift count result from 8 high bits to low bits.
   5380     auto C_SizeM8 = B.buildConstant(Ty, Size - 8);
   5381     B.buildLShr(MI.getOperand(0).getReg(), ResTmp, C_SizeM8);
   5382 
   5383     MI.eraseFromParent();
   5384     return Legalized;
   5385   }
   5386   }
   5387 }
   5388 
   5389 // Check that (every element of) Reg is undef or not an exact multiple of BW.
   5390 static bool isNonZeroModBitWidthOrUndef(const MachineRegisterInfo &MRI,
   5391                                         Register Reg, unsigned BW) {
   5392   return matchUnaryPredicate(
   5393       MRI, Reg,
   5394       [=](const Constant *C) {
   5395         // Null constant here means an undef.
   5396         const ConstantInt *CI = dyn_cast_or_null<ConstantInt>(C);
   5397         return !CI || CI->getValue().urem(BW) != 0;
   5398       },
   5399       /*AllowUndefs*/ true);
   5400 }
   5401 
   5402 LegalizerHelper::LegalizeResult
   5403 LegalizerHelper::lowerFunnelShiftWithInverse(MachineInstr &MI) {
   5404   Register Dst = MI.getOperand(0).getReg();
   5405   Register X = MI.getOperand(1).getReg();
   5406   Register Y = MI.getOperand(2).getReg();
   5407   Register Z = MI.getOperand(3).getReg();
   5408   LLT Ty = MRI.getType(Dst);
   5409   LLT ShTy = MRI.getType(Z);
   5410 
   5411   unsigned BW = Ty.getScalarSizeInBits();
   5412 
   5413   if (!isPowerOf2_32(BW))
   5414     return UnableToLegalize;
   5415 
   5416   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
   5417   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
   5418 
   5419   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
   5420     // fshl X, Y, Z -> fshr X, Y, -Z
   5421     // fshr X, Y, Z -> fshl X, Y, -Z
   5422     auto Zero = MIRBuilder.buildConstant(ShTy, 0);
   5423     Z = MIRBuilder.buildSub(Ty, Zero, Z).getReg(0);
   5424   } else {
   5425     // fshl X, Y, Z -> fshr (srl X, 1), (fshr X, Y, 1), ~Z
   5426     // fshr X, Y, Z -> fshl (fshl X, Y, 1), (shl Y, 1), ~Z
   5427     auto One = MIRBuilder.buildConstant(ShTy, 1);
   5428     if (IsFSHL) {
   5429       Y = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
   5430       X = MIRBuilder.buildLShr(Ty, X, One).getReg(0);
   5431     } else {
   5432       X = MIRBuilder.buildInstr(RevOpcode, {Ty}, {X, Y, One}).getReg(0);
   5433       Y = MIRBuilder.buildShl(Ty, Y, One).getReg(0);
   5434     }
   5435 
   5436     Z = MIRBuilder.buildNot(ShTy, Z).getReg(0);
   5437   }
   5438 
   5439   MIRBuilder.buildInstr(RevOpcode, {Dst}, {X, Y, Z});
   5440   MI.eraseFromParent();
   5441   return Legalized;
   5442 }
   5443 
   5444 LegalizerHelper::LegalizeResult
   5445 LegalizerHelper::lowerFunnelShiftAsShifts(MachineInstr &MI) {
   5446   Register Dst = MI.getOperand(0).getReg();
   5447   Register X = MI.getOperand(1).getReg();
   5448   Register Y = MI.getOperand(2).getReg();
   5449   Register Z = MI.getOperand(3).getReg();
   5450   LLT Ty = MRI.getType(Dst);
   5451   LLT ShTy = MRI.getType(Z);
   5452 
   5453   const unsigned BW = Ty.getScalarSizeInBits();
   5454   const bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
   5455 
   5456   Register ShX, ShY;
   5457   Register ShAmt, InvShAmt;
   5458 
   5459   // FIXME: Emit optimized urem by constant instead of letting it expand later.
   5460   if (isNonZeroModBitWidthOrUndef(MRI, Z, BW)) {
   5461     // fshl: X << C | Y >> (BW - C)
   5462     // fshr: X << (BW - C) | Y >> C
   5463     // where C = Z % BW is not zero
   5464     auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
   5465     ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
   5466     InvShAmt = MIRBuilder.buildSub(ShTy, BitWidthC, ShAmt).getReg(0);
   5467     ShX = MIRBuilder.buildShl(Ty, X, IsFSHL ? ShAmt : InvShAmt).getReg(0);
   5468     ShY = MIRBuilder.buildLShr(Ty, Y, IsFSHL ? InvShAmt : ShAmt).getReg(0);
   5469   } else {
   5470     // fshl: X << (Z % BW) | Y >> 1 >> (BW - 1 - (Z % BW))
   5471     // fshr: X << 1 << (BW - 1 - (Z % BW)) | Y >> (Z % BW)
   5472     auto Mask = MIRBuilder.buildConstant(ShTy, BW - 1);
   5473     if (isPowerOf2_32(BW)) {
   5474       // Z % BW -> Z & (BW - 1)
   5475       ShAmt = MIRBuilder.buildAnd(ShTy, Z, Mask).getReg(0);
   5476       // (BW - 1) - (Z % BW) -> ~Z & (BW - 1)
   5477       auto NotZ = MIRBuilder.buildNot(ShTy, Z);
   5478       InvShAmt = MIRBuilder.buildAnd(ShTy, NotZ, Mask).getReg(0);
   5479     } else {
   5480       auto BitWidthC = MIRBuilder.buildConstant(ShTy, BW);
   5481       ShAmt = MIRBuilder.buildURem(ShTy, Z, BitWidthC).getReg(0);
   5482       InvShAmt = MIRBuilder.buildSub(ShTy, Mask, ShAmt).getReg(0);
   5483     }
   5484 
   5485     auto One = MIRBuilder.buildConstant(ShTy, 1);
   5486     if (IsFSHL) {
   5487       ShX = MIRBuilder.buildShl(Ty, X, ShAmt).getReg(0);
   5488       auto ShY1 = MIRBuilder.buildLShr(Ty, Y, One);
   5489       ShY = MIRBuilder.buildLShr(Ty, ShY1, InvShAmt).getReg(0);
   5490     } else {
   5491       auto ShX1 = MIRBuilder.buildShl(Ty, X, One);
   5492       ShX = MIRBuilder.buildShl(Ty, ShX1, InvShAmt).getReg(0);
   5493       ShY = MIRBuilder.buildLShr(Ty, Y, ShAmt).getReg(0);
   5494     }
   5495   }
   5496 
   5497   MIRBuilder.buildOr(Dst, ShX, ShY);
   5498   MI.eraseFromParent();
   5499   return Legalized;
   5500 }
   5501 
   5502 LegalizerHelper::LegalizeResult
   5503 LegalizerHelper::lowerFunnelShift(MachineInstr &MI) {
   5504   // These operations approximately do the following (while avoiding undefined
   5505   // shifts by BW):
   5506   // G_FSHL: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
   5507   // G_FSHR: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
   5508   Register Dst = MI.getOperand(0).getReg();
   5509   LLT Ty = MRI.getType(Dst);
   5510   LLT ShTy = MRI.getType(MI.getOperand(3).getReg());
   5511 
   5512   bool IsFSHL = MI.getOpcode() == TargetOpcode::G_FSHL;
   5513   unsigned RevOpcode = IsFSHL ? TargetOpcode::G_FSHR : TargetOpcode::G_FSHL;
   5514 
   5515   // TODO: Use smarter heuristic that accounts for vector legalization.
   5516   if (LI.getAction({RevOpcode, {Ty, ShTy}}).Action == Lower)
   5517     return lowerFunnelShiftAsShifts(MI);
   5518 
   5519   // This only works for powers of 2, fallback to shifts if it fails.
   5520   LegalizerHelper::LegalizeResult Result = lowerFunnelShiftWithInverse(MI);
   5521   if (Result == UnableToLegalize)
   5522     return lowerFunnelShiftAsShifts(MI);
   5523   return Result;
   5524 }
   5525 
   5526 LegalizerHelper::LegalizeResult
   5527 LegalizerHelper::lowerRotateWithReverseRotate(MachineInstr &MI) {
   5528   Register Dst = MI.getOperand(0).getReg();
   5529   Register Src = MI.getOperand(1).getReg();
   5530   Register Amt = MI.getOperand(2).getReg();
   5531   LLT AmtTy = MRI.getType(Amt);
   5532   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
   5533   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
   5534   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
   5535   auto Neg = MIRBuilder.buildSub(AmtTy, Zero, Amt);
   5536   MIRBuilder.buildInstr(RevRot, {Dst}, {Src, Neg});
   5537   MI.eraseFromParent();
   5538   return Legalized;
   5539 }
   5540 
   5541 LegalizerHelper::LegalizeResult LegalizerHelper::lowerRotate(MachineInstr &MI) {
   5542   Register Dst = MI.getOperand(0).getReg();
   5543   Register Src = MI.getOperand(1).getReg();
   5544   Register Amt = MI.getOperand(2).getReg();
   5545   LLT DstTy = MRI.getType(Dst);
   5546   LLT SrcTy = MRI.getType(Dst);
   5547   LLT AmtTy = MRI.getType(Amt);
   5548 
   5549   unsigned EltSizeInBits = DstTy.getScalarSizeInBits();
   5550   bool IsLeft = MI.getOpcode() == TargetOpcode::G_ROTL;
   5551 
   5552   MIRBuilder.setInstrAndDebugLoc(MI);
   5553 
   5554   // If a rotate in the other direction is supported, use it.
   5555   unsigned RevRot = IsLeft ? TargetOpcode::G_ROTR : TargetOpcode::G_ROTL;
   5556   if (LI.isLegalOrCustom({RevRot, {DstTy, SrcTy}}) &&
   5557       isPowerOf2_32(EltSizeInBits))
   5558     return lowerRotateWithReverseRotate(MI);
   5559 
   5560   auto Zero = MIRBuilder.buildConstant(AmtTy, 0);
   5561   unsigned ShOpc = IsLeft ? TargetOpcode::G_SHL : TargetOpcode::G_LSHR;
   5562   unsigned RevShiftOpc = IsLeft ? TargetOpcode::G_LSHR : TargetOpcode::G_SHL;
   5563   auto BitWidthMinusOneC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits - 1);
   5564   Register ShVal;
   5565   Register RevShiftVal;
   5566   if (isPowerOf2_32(EltSizeInBits)) {
   5567     // (rotl x, c) -> x << (c & (w - 1)) | x >> (-c & (w - 1))
   5568     // (rotr x, c) -> x >> (c & (w - 1)) | x << (-c & (w - 1))
   5569     auto NegAmt = MIRBuilder.buildSub(AmtTy, Zero, Amt);
   5570     auto ShAmt = MIRBuilder.buildAnd(AmtTy, Amt, BitWidthMinusOneC);
   5571     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
   5572     auto RevAmt = MIRBuilder.buildAnd(AmtTy, NegAmt, BitWidthMinusOneC);
   5573     RevShiftVal =
   5574         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, RevAmt}).getReg(0);
   5575   } else {
   5576     // (rotl x, c) -> x << (c % w) | x >> 1 >> (w - 1 - (c % w))
   5577     // (rotr x, c) -> x >> (c % w) | x << 1 << (w - 1 - (c % w))
   5578     auto BitWidthC = MIRBuilder.buildConstant(AmtTy, EltSizeInBits);
   5579     auto ShAmt = MIRBuilder.buildURem(AmtTy, Amt, BitWidthC);
   5580     ShVal = MIRBuilder.buildInstr(ShOpc, {DstTy}, {Src, ShAmt}).getReg(0);
   5581     auto RevAmt = MIRBuilder.buildSub(AmtTy, BitWidthMinusOneC, ShAmt);
   5582     auto One = MIRBuilder.buildConstant(AmtTy, 1);
   5583     auto Inner = MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Src, One});
   5584     RevShiftVal =
   5585         MIRBuilder.buildInstr(RevShiftOpc, {DstTy}, {Inner, RevAmt}).getReg(0);
   5586   }
   5587   MIRBuilder.buildOr(Dst, ShVal, RevShiftVal);
   5588   MI.eraseFromParent();
   5589   return Legalized;
   5590 }
   5591 
   5592 // Expand s32 = G_UITOFP s64 using bit operations to an IEEE float
   5593 // representation.
   5594 LegalizerHelper::LegalizeResult
   5595 LegalizerHelper::lowerU64ToF32BitOps(MachineInstr &MI) {
   5596   Register Dst = MI.getOperand(0).getReg();
   5597   Register Src = MI.getOperand(1).getReg();
   5598   const LLT S64 = LLT::scalar(64);
   5599   const LLT S32 = LLT::scalar(32);
   5600   const LLT S1 = LLT::scalar(1);
   5601 
   5602   assert(MRI.getType(Src) == S64 && MRI.getType(Dst) == S32);
   5603 
   5604   // unsigned cul2f(ulong u) {
   5605   //   uint lz = clz(u);
   5606   //   uint e = (u != 0) ? 127U + 63U - lz : 0;
   5607   //   u = (u << lz) & 0x7fffffffffffffffUL;
   5608   //   ulong t = u & 0xffffffffffUL;
   5609   //   uint v = (e << 23) | (uint)(u >> 40);
   5610   //   uint r = t > 0x8000000000UL ? 1U : (t == 0x8000000000UL ? v & 1U : 0U);
   5611   //   return as_float(v + r);
   5612   // }
   5613 
   5614   auto Zero32 = MIRBuilder.buildConstant(S32, 0);
   5615   auto Zero64 = MIRBuilder.buildConstant(S64, 0);
   5616 
   5617   auto LZ = MIRBuilder.buildCTLZ_ZERO_UNDEF(S32, Src);
   5618 
   5619   auto K = MIRBuilder.buildConstant(S32, 127U + 63U);
   5620   auto Sub = MIRBuilder.buildSub(S32, K, LZ);
   5621 
   5622   auto NotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, Src, Zero64);
   5623   auto E = MIRBuilder.buildSelect(S32, NotZero, Sub, Zero32);
   5624 
   5625   auto Mask0 = MIRBuilder.buildConstant(S64, (-1ULL) >> 1);
   5626   auto ShlLZ = MIRBuilder.buildShl(S64, Src, LZ);
   5627 
   5628   auto U = MIRBuilder.buildAnd(S64, ShlLZ, Mask0);
   5629 
   5630   auto Mask1 = MIRBuilder.buildConstant(S64, 0xffffffffffULL);
   5631   auto T = MIRBuilder.buildAnd(S64, U, Mask1);
   5632 
   5633   auto UShl = MIRBuilder.buildLShr(S64, U, MIRBuilder.buildConstant(S64, 40));
   5634   auto ShlE = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 23));
   5635   auto V = MIRBuilder.buildOr(S32, ShlE, MIRBuilder.buildTrunc(S32, UShl));
   5636 
   5637   auto C = MIRBuilder.buildConstant(S64, 0x8000000000ULL);
   5638   auto RCmp = MIRBuilder.buildICmp(CmpInst::ICMP_UGT, S1, T, C);
   5639   auto TCmp = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, T, C);
   5640   auto One = MIRBuilder.buildConstant(S32, 1);
   5641 
   5642   auto VTrunc1 = MIRBuilder.buildAnd(S32, V, One);
   5643   auto Select0 = MIRBuilder.buildSelect(S32, TCmp, VTrunc1, Zero32);
   5644   auto R = MIRBuilder.buildSelect(S32, RCmp, One, Select0);
   5645   MIRBuilder.buildAdd(Dst, V, R);
   5646 
   5647   MI.eraseFromParent();
   5648   return Legalized;
   5649 }
   5650 
   5651 LegalizerHelper::LegalizeResult LegalizerHelper::lowerUITOFP(MachineInstr &MI) {
   5652   Register Dst = MI.getOperand(0).getReg();
   5653   Register Src = MI.getOperand(1).getReg();
   5654   LLT DstTy = MRI.getType(Dst);
   5655   LLT SrcTy = MRI.getType(Src);
   5656 
   5657   if (SrcTy == LLT::scalar(1)) {
   5658     auto True = MIRBuilder.buildFConstant(DstTy, 1.0);
   5659     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
   5660     MIRBuilder.buildSelect(Dst, Src, True, False);
   5661     MI.eraseFromParent();
   5662     return Legalized;
   5663   }
   5664 
   5665   if (SrcTy != LLT::scalar(64))
   5666     return UnableToLegalize;
   5667 
   5668   if (DstTy == LLT::scalar(32)) {
   5669     // TODO: SelectionDAG has several alternative expansions to port which may
   5670     // be more reasonble depending on the available instructions. If a target
   5671     // has sitofp, does not have CTLZ, or can efficiently use f64 as an
   5672     // intermediate type, this is probably worse.
   5673     return lowerU64ToF32BitOps(MI);
   5674   }
   5675 
   5676   return UnableToLegalize;
   5677 }
   5678 
   5679 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSITOFP(MachineInstr &MI) {
   5680   Register Dst = MI.getOperand(0).getReg();
   5681   Register Src = MI.getOperand(1).getReg();
   5682   LLT DstTy = MRI.getType(Dst);
   5683   LLT SrcTy = MRI.getType(Src);
   5684 
   5685   const LLT S64 = LLT::scalar(64);
   5686   const LLT S32 = LLT::scalar(32);
   5687   const LLT S1 = LLT::scalar(1);
   5688 
   5689   if (SrcTy == S1) {
   5690     auto True = MIRBuilder.buildFConstant(DstTy, -1.0);
   5691     auto False = MIRBuilder.buildFConstant(DstTy, 0.0);
   5692     MIRBuilder.buildSelect(Dst, Src, True, False);
   5693     MI.eraseFromParent();
   5694     return Legalized;
   5695   }
   5696 
   5697   if (SrcTy != S64)
   5698     return UnableToLegalize;
   5699 
   5700   if (DstTy == S32) {
   5701     // signed cl2f(long l) {
   5702     //   long s = l >> 63;
   5703     //   float r = cul2f((l + s) ^ s);
   5704     //   return s ? -r : r;
   5705     // }
   5706     Register L = Src;
   5707     auto SignBit = MIRBuilder.buildConstant(S64, 63);
   5708     auto S = MIRBuilder.buildAShr(S64, L, SignBit);
   5709 
   5710     auto LPlusS = MIRBuilder.buildAdd(S64, L, S);
   5711     auto Xor = MIRBuilder.buildXor(S64, LPlusS, S);
   5712     auto R = MIRBuilder.buildUITOFP(S32, Xor);
   5713 
   5714     auto RNeg = MIRBuilder.buildFNeg(S32, R);
   5715     auto SignNotZero = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, S,
   5716                                             MIRBuilder.buildConstant(S64, 0));
   5717     MIRBuilder.buildSelect(Dst, SignNotZero, RNeg, R);
   5718     MI.eraseFromParent();
   5719     return Legalized;
   5720   }
   5721 
   5722   return UnableToLegalize;
   5723 }
   5724 
   5725 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOUI(MachineInstr &MI) {
   5726   Register Dst = MI.getOperand(0).getReg();
   5727   Register Src = MI.getOperand(1).getReg();
   5728   LLT DstTy = MRI.getType(Dst);
   5729   LLT SrcTy = MRI.getType(Src);
   5730   const LLT S64 = LLT::scalar(64);
   5731   const LLT S32 = LLT::scalar(32);
   5732 
   5733   if (SrcTy != S64 && SrcTy != S32)
   5734     return UnableToLegalize;
   5735   if (DstTy != S32 && DstTy != S64)
   5736     return UnableToLegalize;
   5737 
   5738   // FPTOSI gives same result as FPTOUI for positive signed integers.
   5739   // FPTOUI needs to deal with fp values that convert to unsigned integers
   5740   // greater or equal to 2^31 for float or 2^63 for double. For brevity 2^Exp.
   5741 
   5742   APInt TwoPExpInt = APInt::getSignMask(DstTy.getSizeInBits());
   5743   APFloat TwoPExpFP(SrcTy.getSizeInBits() == 32 ? APFloat::IEEEsingle()
   5744                                                 : APFloat::IEEEdouble(),
   5745                     APInt::getNullValue(SrcTy.getSizeInBits()));
   5746   TwoPExpFP.convertFromAPInt(TwoPExpInt, false, APFloat::rmNearestTiesToEven);
   5747 
   5748   MachineInstrBuilder FPTOSI = MIRBuilder.buildFPTOSI(DstTy, Src);
   5749 
   5750   MachineInstrBuilder Threshold = MIRBuilder.buildFConstant(SrcTy, TwoPExpFP);
   5751   // For fp Value greater or equal to Threshold(2^Exp), we use FPTOSI on
   5752   // (Value - 2^Exp) and add 2^Exp by setting highest bit in result to 1.
   5753   MachineInstrBuilder FSub = MIRBuilder.buildFSub(SrcTy, Src, Threshold);
   5754   MachineInstrBuilder ResLowBits = MIRBuilder.buildFPTOSI(DstTy, FSub);
   5755   MachineInstrBuilder ResHighBit = MIRBuilder.buildConstant(DstTy, TwoPExpInt);
   5756   MachineInstrBuilder Res = MIRBuilder.buildXor(DstTy, ResLowBits, ResHighBit);
   5757 
   5758   const LLT S1 = LLT::scalar(1);
   5759 
   5760   MachineInstrBuilder FCMP =
   5761       MIRBuilder.buildFCmp(CmpInst::FCMP_ULT, S1, Src, Threshold);
   5762   MIRBuilder.buildSelect(Dst, FCMP, FPTOSI, Res);
   5763 
   5764   MI.eraseFromParent();
   5765   return Legalized;
   5766 }
   5767 
   5768 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPTOSI(MachineInstr &MI) {
   5769   Register Dst = MI.getOperand(0).getReg();
   5770   Register Src = MI.getOperand(1).getReg();
   5771   LLT DstTy = MRI.getType(Dst);
   5772   LLT SrcTy = MRI.getType(Src);
   5773   const LLT S64 = LLT::scalar(64);
   5774   const LLT S32 = LLT::scalar(32);
   5775 
   5776   // FIXME: Only f32 to i64 conversions are supported.
   5777   if (SrcTy.getScalarType() != S32 || DstTy.getScalarType() != S64)
   5778     return UnableToLegalize;
   5779 
   5780   // Expand f32 -> i64 conversion
   5781   // This algorithm comes from compiler-rt's implementation of fixsfdi:
   5782   // https://github.com/llvm/llvm-project/blob/main/compiler-rt/lib/builtins/fixsfdi.c
   5783 
   5784   unsigned SrcEltBits = SrcTy.getScalarSizeInBits();
   5785 
   5786   auto ExponentMask = MIRBuilder.buildConstant(SrcTy, 0x7F800000);
   5787   auto ExponentLoBit = MIRBuilder.buildConstant(SrcTy, 23);
   5788 
   5789   auto AndExpMask = MIRBuilder.buildAnd(SrcTy, Src, ExponentMask);
   5790   auto ExponentBits = MIRBuilder.buildLShr(SrcTy, AndExpMask, ExponentLoBit);
   5791 
   5792   auto SignMask = MIRBuilder.buildConstant(SrcTy,
   5793                                            APInt::getSignMask(SrcEltBits));
   5794   auto AndSignMask = MIRBuilder.buildAnd(SrcTy, Src, SignMask);
   5795   auto SignLowBit = MIRBuilder.buildConstant(SrcTy, SrcEltBits - 1);
   5796   auto Sign = MIRBuilder.buildAShr(SrcTy, AndSignMask, SignLowBit);
   5797   Sign = MIRBuilder.buildSExt(DstTy, Sign);
   5798 
   5799   auto MantissaMask = MIRBuilder.buildConstant(SrcTy, 0x007FFFFF);
   5800   auto AndMantissaMask = MIRBuilder.buildAnd(SrcTy, Src, MantissaMask);
   5801   auto K = MIRBuilder.buildConstant(SrcTy, 0x00800000);
   5802 
   5803   auto R = MIRBuilder.buildOr(SrcTy, AndMantissaMask, K);
   5804   R = MIRBuilder.buildZExt(DstTy, R);
   5805 
   5806   auto Bias = MIRBuilder.buildConstant(SrcTy, 127);
   5807   auto Exponent = MIRBuilder.buildSub(SrcTy, ExponentBits, Bias);
   5808   auto SubExponent = MIRBuilder.buildSub(SrcTy, Exponent, ExponentLoBit);
   5809   auto ExponentSub = MIRBuilder.buildSub(SrcTy, ExponentLoBit, Exponent);
   5810 
   5811   auto Shl = MIRBuilder.buildShl(DstTy, R, SubExponent);
   5812   auto Srl = MIRBuilder.buildLShr(DstTy, R, ExponentSub);
   5813 
   5814   const LLT S1 = LLT::scalar(1);
   5815   auto CmpGt = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,
   5816                                     S1, Exponent, ExponentLoBit);
   5817 
   5818   R = MIRBuilder.buildSelect(DstTy, CmpGt, Shl, Srl);
   5819 
   5820   auto XorSign = MIRBuilder.buildXor(DstTy, R, Sign);
   5821   auto Ret = MIRBuilder.buildSub(DstTy, XorSign, Sign);
   5822 
   5823   auto ZeroSrcTy = MIRBuilder.buildConstant(SrcTy, 0);
   5824 
   5825   auto ExponentLt0 = MIRBuilder.buildICmp(CmpInst::ICMP_SLT,
   5826                                           S1, Exponent, ZeroSrcTy);
   5827 
   5828   auto ZeroDstTy = MIRBuilder.buildConstant(DstTy, 0);
   5829   MIRBuilder.buildSelect(Dst, ExponentLt0, ZeroDstTy, Ret);
   5830 
   5831   MI.eraseFromParent();
   5832   return Legalized;
   5833 }
   5834 
   5835 // f64 -> f16 conversion using round-to-nearest-even rounding mode.
   5836 LegalizerHelper::LegalizeResult
   5837 LegalizerHelper::lowerFPTRUNC_F64_TO_F16(MachineInstr &MI) {
   5838   Register Dst = MI.getOperand(0).getReg();
   5839   Register Src = MI.getOperand(1).getReg();
   5840 
   5841   if (MRI.getType(Src).isVector()) // TODO: Handle vectors directly.
   5842     return UnableToLegalize;
   5843 
   5844   const unsigned ExpMask = 0x7ff;
   5845   const unsigned ExpBiasf64 = 1023;
   5846   const unsigned ExpBiasf16 = 15;
   5847   const LLT S32 = LLT::scalar(32);
   5848   const LLT S1 = LLT::scalar(1);
   5849 
   5850   auto Unmerge = MIRBuilder.buildUnmerge(S32, Src);
   5851   Register U = Unmerge.getReg(0);
   5852   Register UH = Unmerge.getReg(1);
   5853 
   5854   auto E = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 20));
   5855   E = MIRBuilder.buildAnd(S32, E, MIRBuilder.buildConstant(S32, ExpMask));
   5856 
   5857   // Subtract the fp64 exponent bias (1023) to get the real exponent and
   5858   // add the f16 bias (15) to get the biased exponent for the f16 format.
   5859   E = MIRBuilder.buildAdd(
   5860     S32, E, MIRBuilder.buildConstant(S32, -ExpBiasf64 + ExpBiasf16));
   5861 
   5862   auto M = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 8));
   5863   M = MIRBuilder.buildAnd(S32, M, MIRBuilder.buildConstant(S32, 0xffe));
   5864 
   5865   auto MaskedSig = MIRBuilder.buildAnd(S32, UH,
   5866                                        MIRBuilder.buildConstant(S32, 0x1ff));
   5867   MaskedSig = MIRBuilder.buildOr(S32, MaskedSig, U);
   5868 
   5869   auto Zero = MIRBuilder.buildConstant(S32, 0);
   5870   auto SigCmpNE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, MaskedSig, Zero);
   5871   auto Lo40Set = MIRBuilder.buildZExt(S32, SigCmpNE0);
   5872   M = MIRBuilder.buildOr(S32, M, Lo40Set);
   5873 
   5874   // (M != 0 ? 0x0200 : 0) | 0x7c00;
   5875   auto Bits0x200 = MIRBuilder.buildConstant(S32, 0x0200);
   5876   auto CmpM_NE0 = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1, M, Zero);
   5877   auto SelectCC = MIRBuilder.buildSelect(S32, CmpM_NE0, Bits0x200, Zero);
   5878 
   5879   auto Bits0x7c00 = MIRBuilder.buildConstant(S32, 0x7c00);
   5880   auto I = MIRBuilder.buildOr(S32, SelectCC, Bits0x7c00);
   5881 
   5882   // N = M | (E << 12);
   5883   auto EShl12 = MIRBuilder.buildShl(S32, E, MIRBuilder.buildConstant(S32, 12));
   5884   auto N = MIRBuilder.buildOr(S32, M, EShl12);
   5885 
   5886   // B = clamp(1-E, 0, 13);
   5887   auto One = MIRBuilder.buildConstant(S32, 1);
   5888   auto OneSubExp = MIRBuilder.buildSub(S32, One, E);
   5889   auto B = MIRBuilder.buildSMax(S32, OneSubExp, Zero);
   5890   B = MIRBuilder.buildSMin(S32, B, MIRBuilder.buildConstant(S32, 13));
   5891 
   5892   auto SigSetHigh = MIRBuilder.buildOr(S32, M,
   5893                                        MIRBuilder.buildConstant(S32, 0x1000));
   5894 
   5895   auto D = MIRBuilder.buildLShr(S32, SigSetHigh, B);
   5896   auto D0 = MIRBuilder.buildShl(S32, D, B);
   5897 
   5898   auto D0_NE_SigSetHigh = MIRBuilder.buildICmp(CmpInst::ICMP_NE, S1,
   5899                                              D0, SigSetHigh);
   5900   auto D1 = MIRBuilder.buildZExt(S32, D0_NE_SigSetHigh);
   5901   D = MIRBuilder.buildOr(S32, D, D1);
   5902 
   5903   auto CmpELtOne = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, S1, E, One);
   5904   auto V = MIRBuilder.buildSelect(S32, CmpELtOne, D, N);
   5905 
   5906   auto VLow3 = MIRBuilder.buildAnd(S32, V, MIRBuilder.buildConstant(S32, 7));
   5907   V = MIRBuilder.buildLShr(S32, V, MIRBuilder.buildConstant(S32, 2));
   5908 
   5909   auto VLow3Eq3 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1, VLow3,
   5910                                        MIRBuilder.buildConstant(S32, 3));
   5911   auto V0 = MIRBuilder.buildZExt(S32, VLow3Eq3);
   5912 
   5913   auto VLow3Gt5 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT, S1, VLow3,
   5914                                        MIRBuilder.buildConstant(S32, 5));
   5915   auto V1 = MIRBuilder.buildZExt(S32, VLow3Gt5);
   5916 
   5917   V1 = MIRBuilder.buildOr(S32, V0, V1);
   5918   V = MIRBuilder.buildAdd(S32, V, V1);
   5919 
   5920   auto CmpEGt30 = MIRBuilder.buildICmp(CmpInst::ICMP_SGT,  S1,
   5921                                        E, MIRBuilder.buildConstant(S32, 30));
   5922   V = MIRBuilder.buildSelect(S32, CmpEGt30,
   5923                              MIRBuilder.buildConstant(S32, 0x7c00), V);
   5924 
   5925   auto CmpEGt1039 = MIRBuilder.buildICmp(CmpInst::ICMP_EQ, S1,
   5926                                          E, MIRBuilder.buildConstant(S32, 1039));
   5927   V = MIRBuilder.buildSelect(S32, CmpEGt1039, I, V);
   5928 
   5929   // Extract the sign bit.
   5930   auto Sign = MIRBuilder.buildLShr(S32, UH, MIRBuilder.buildConstant(S32, 16));
   5931   Sign = MIRBuilder.buildAnd(S32, Sign, MIRBuilder.buildConstant(S32, 0x8000));
   5932 
   5933   // Insert the sign bit
   5934   V = MIRBuilder.buildOr(S32, Sign, V);
   5935 
   5936   MIRBuilder.buildTrunc(Dst, V);
   5937   MI.eraseFromParent();
   5938   return Legalized;
   5939 }
   5940 
   5941 LegalizerHelper::LegalizeResult
   5942 LegalizerHelper::lowerFPTRUNC(MachineInstr &MI) {
   5943   Register Dst = MI.getOperand(0).getReg();
   5944   Register Src = MI.getOperand(1).getReg();
   5945 
   5946   LLT DstTy = MRI.getType(Dst);
   5947   LLT SrcTy = MRI.getType(Src);
   5948   const LLT S64 = LLT::scalar(64);
   5949   const LLT S16 = LLT::scalar(16);
   5950 
   5951   if (DstTy.getScalarType() == S16 && SrcTy.getScalarType() == S64)
   5952     return lowerFPTRUNC_F64_TO_F16(MI);
   5953 
   5954   return UnableToLegalize;
   5955 }
   5956 
   5957 // TODO: If RHS is a constant SelectionDAGBuilder expands this into a
   5958 // multiplication tree.
   5959 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFPOWI(MachineInstr &MI) {
   5960   Register Dst = MI.getOperand(0).getReg();
   5961   Register Src0 = MI.getOperand(1).getReg();
   5962   Register Src1 = MI.getOperand(2).getReg();
   5963   LLT Ty = MRI.getType(Dst);
   5964 
   5965   auto CvtSrc1 = MIRBuilder.buildSITOFP(Ty, Src1);
   5966   MIRBuilder.buildFPow(Dst, Src0, CvtSrc1, MI.getFlags());
   5967   MI.eraseFromParent();
   5968   return Legalized;
   5969 }
   5970 
   5971 static CmpInst::Predicate minMaxToCompare(unsigned Opc) {
   5972   switch (Opc) {
   5973   case TargetOpcode::G_SMIN:
   5974     return CmpInst::ICMP_SLT;
   5975   case TargetOpcode::G_SMAX:
   5976     return CmpInst::ICMP_SGT;
   5977   case TargetOpcode::G_UMIN:
   5978     return CmpInst::ICMP_ULT;
   5979   case TargetOpcode::G_UMAX:
   5980     return CmpInst::ICMP_UGT;
   5981   default:
   5982     llvm_unreachable("not in integer min/max");
   5983   }
   5984 }
   5985 
   5986 LegalizerHelper::LegalizeResult LegalizerHelper::lowerMinMax(MachineInstr &MI) {
   5987   Register Dst = MI.getOperand(0).getReg();
   5988   Register Src0 = MI.getOperand(1).getReg();
   5989   Register Src1 = MI.getOperand(2).getReg();
   5990 
   5991   const CmpInst::Predicate Pred = minMaxToCompare(MI.getOpcode());
   5992   LLT CmpType = MRI.getType(Dst).changeElementSize(1);
   5993 
   5994   auto Cmp = MIRBuilder.buildICmp(Pred, CmpType, Src0, Src1);
   5995   MIRBuilder.buildSelect(Dst, Cmp, Src0, Src1);
   5996 
   5997   MI.eraseFromParent();
   5998   return Legalized;
   5999 }
   6000 
   6001 LegalizerHelper::LegalizeResult
   6002 LegalizerHelper::lowerFCopySign(MachineInstr &MI) {
   6003   Register Dst = MI.getOperand(0).getReg();
   6004   Register Src0 = MI.getOperand(1).getReg();
   6005   Register Src1 = MI.getOperand(2).getReg();
   6006 
   6007   const LLT Src0Ty = MRI.getType(Src0);
   6008   const LLT Src1Ty = MRI.getType(Src1);
   6009 
   6010   const int Src0Size = Src0Ty.getScalarSizeInBits();
   6011   const int Src1Size = Src1Ty.getScalarSizeInBits();
   6012 
   6013   auto SignBitMask = MIRBuilder.buildConstant(
   6014     Src0Ty, APInt::getSignMask(Src0Size));
   6015 
   6016   auto NotSignBitMask = MIRBuilder.buildConstant(
   6017     Src0Ty, APInt::getLowBitsSet(Src0Size, Src0Size - 1));
   6018 
   6019   Register And0 = MIRBuilder.buildAnd(Src0Ty, Src0, NotSignBitMask).getReg(0);
   6020   Register And1;
   6021   if (Src0Ty == Src1Ty) {
   6022     And1 = MIRBuilder.buildAnd(Src1Ty, Src1, SignBitMask).getReg(0);
   6023   } else if (Src0Size > Src1Size) {
   6024     auto ShiftAmt = MIRBuilder.buildConstant(Src0Ty, Src0Size - Src1Size);
   6025     auto Zext = MIRBuilder.buildZExt(Src0Ty, Src1);
   6026     auto Shift = MIRBuilder.buildShl(Src0Ty, Zext, ShiftAmt);
   6027     And1 = MIRBuilder.buildAnd(Src0Ty, Shift, SignBitMask).getReg(0);
   6028   } else {
   6029     auto ShiftAmt = MIRBuilder.buildConstant(Src1Ty, Src1Size - Src0Size);
   6030     auto Shift = MIRBuilder.buildLShr(Src1Ty, Src1, ShiftAmt);
   6031     auto Trunc = MIRBuilder.buildTrunc(Src0Ty, Shift);
   6032     And1 = MIRBuilder.buildAnd(Src0Ty, Trunc, SignBitMask).getReg(0);
   6033   }
   6034 
   6035   // Be careful about setting nsz/nnan/ninf on every instruction, since the
   6036   // constants are a nan and -0.0, but the final result should preserve
   6037   // everything.
   6038   unsigned Flags = MI.getFlags();
   6039   MIRBuilder.buildOr(Dst, And0, And1, Flags);
   6040 
   6041   MI.eraseFromParent();
   6042   return Legalized;
   6043 }
   6044 
   6045 LegalizerHelper::LegalizeResult
   6046 LegalizerHelper::lowerFMinNumMaxNum(MachineInstr &MI) {
   6047   unsigned NewOp = MI.getOpcode() == TargetOpcode::G_FMINNUM ?
   6048     TargetOpcode::G_FMINNUM_IEEE : TargetOpcode::G_FMAXNUM_IEEE;
   6049 
   6050   Register Dst = MI.getOperand(0).getReg();
   6051   Register Src0 = MI.getOperand(1).getReg();
   6052   Register Src1 = MI.getOperand(2).getReg();
   6053   LLT Ty = MRI.getType(Dst);
   6054 
   6055   if (!MI.getFlag(MachineInstr::FmNoNans)) {
   6056     // Insert canonicalizes if it's possible we need to quiet to get correct
   6057     // sNaN behavior.
   6058 
   6059     // Note this must be done here, and not as an optimization combine in the
   6060     // absence of a dedicate quiet-snan instruction as we're using an
   6061     // omni-purpose G_FCANONICALIZE.
   6062     if (!isKnownNeverSNaN(Src0, MRI))
   6063       Src0 = MIRBuilder.buildFCanonicalize(Ty, Src0, MI.getFlags()).getReg(0);
   6064 
   6065     if (!isKnownNeverSNaN(Src1, MRI))
   6066       Src1 = MIRBuilder.buildFCanonicalize(Ty, Src1, MI.getFlags()).getReg(0);
   6067   }
   6068 
   6069   // If there are no nans, it's safe to simply replace this with the non-IEEE
   6070   // version.
   6071   MIRBuilder.buildInstr(NewOp, {Dst}, {Src0, Src1}, MI.getFlags());
   6072   MI.eraseFromParent();
   6073   return Legalized;
   6074 }
   6075 
   6076 LegalizerHelper::LegalizeResult LegalizerHelper::lowerFMad(MachineInstr &MI) {
   6077   // Expand G_FMAD a, b, c -> G_FADD (G_FMUL a, b), c
   6078   Register DstReg = MI.getOperand(0).getReg();
   6079   LLT Ty = MRI.getType(DstReg);
   6080   unsigned Flags = MI.getFlags();
   6081 
   6082   auto Mul = MIRBuilder.buildFMul(Ty, MI.getOperand(1), MI.getOperand(2),
   6083                                   Flags);
   6084   MIRBuilder.buildFAdd(DstReg, Mul, MI.getOperand(3), Flags);
   6085   MI.eraseFromParent();
   6086   return Legalized;
   6087 }
   6088 
   6089 LegalizerHelper::LegalizeResult
   6090 LegalizerHelper::lowerIntrinsicRound(MachineInstr &MI) {
   6091   Register DstReg = MI.getOperand(0).getReg();
   6092   Register X = MI.getOperand(1).getReg();
   6093   const unsigned Flags = MI.getFlags();
   6094   const LLT Ty = MRI.getType(DstReg);
   6095   const LLT CondTy = Ty.changeElementSize(1);
   6096 
   6097   // round(x) =>
   6098   //  t = trunc(x);
   6099   //  d = fabs(x - t);
   6100   //  o = copysign(1.0f, x);
   6101   //  return t + (d >= 0.5 ? o : 0.0);
   6102 
   6103   auto T = MIRBuilder.buildIntrinsicTrunc(Ty, X, Flags);
   6104 
   6105   auto Diff = MIRBuilder.buildFSub(Ty, X, T, Flags);
   6106   auto AbsDiff = MIRBuilder.buildFAbs(Ty, Diff, Flags);
   6107   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
   6108   auto One = MIRBuilder.buildFConstant(Ty, 1.0);
   6109   auto Half = MIRBuilder.buildFConstant(Ty, 0.5);
   6110   auto SignOne = MIRBuilder.buildFCopysign(Ty, One, X);
   6111 
   6112   auto Cmp = MIRBuilder.buildFCmp(CmpInst::FCMP_OGE, CondTy, AbsDiff, Half,
   6113                                   Flags);
   6114   auto Sel = MIRBuilder.buildSelect(Ty, Cmp, SignOne, Zero, Flags);
   6115 
   6116   MIRBuilder.buildFAdd(DstReg, T, Sel, Flags);
   6117 
   6118   MI.eraseFromParent();
   6119   return Legalized;
   6120 }
   6121 
   6122 LegalizerHelper::LegalizeResult
   6123 LegalizerHelper::lowerFFloor(MachineInstr &MI) {
   6124   Register DstReg = MI.getOperand(0).getReg();
   6125   Register SrcReg = MI.getOperand(1).getReg();
   6126   unsigned Flags = MI.getFlags();
   6127   LLT Ty = MRI.getType(DstReg);
   6128   const LLT CondTy = Ty.changeElementSize(1);
   6129 
   6130   // result = trunc(src);
   6131   // if (src < 0.0 && src != result)
   6132   //   result += -1.0.
   6133 
   6134   auto Trunc = MIRBuilder.buildIntrinsicTrunc(Ty, SrcReg, Flags);
   6135   auto Zero = MIRBuilder.buildFConstant(Ty, 0.0);
   6136 
   6137   auto Lt0 = MIRBuilder.buildFCmp(CmpInst::FCMP_OLT, CondTy,
   6138                                   SrcReg, Zero, Flags);
   6139   auto NeTrunc = MIRBuilder.buildFCmp(CmpInst::FCMP_ONE, CondTy,
   6140                                       SrcReg, Trunc, Flags);
   6141   auto And = MIRBuilder.buildAnd(CondTy, Lt0, NeTrunc);
   6142   auto AddVal = MIRBuilder.buildSITOFP(Ty, And);
   6143 
   6144   MIRBuilder.buildFAdd(DstReg, Trunc, AddVal, Flags);
   6145   MI.eraseFromParent();
   6146   return Legalized;
   6147 }
   6148 
   6149 LegalizerHelper::LegalizeResult
   6150 LegalizerHelper::lowerMergeValues(MachineInstr &MI) {
   6151   const unsigned NumOps = MI.getNumOperands();
   6152   Register DstReg = MI.getOperand(0).getReg();
   6153   Register Src0Reg = MI.getOperand(1).getReg();
   6154   LLT DstTy = MRI.getType(DstReg);
   6155   LLT SrcTy = MRI.getType(Src0Reg);
   6156   unsigned PartSize = SrcTy.getSizeInBits();
   6157 
   6158   LLT WideTy = LLT::scalar(DstTy.getSizeInBits());
   6159   Register ResultReg = MIRBuilder.buildZExt(WideTy, Src0Reg).getReg(0);
   6160 
   6161   for (unsigned I = 2; I != NumOps; ++I) {
   6162     const unsigned Offset = (I - 1) * PartSize;
   6163 
   6164     Register SrcReg = MI.getOperand(I).getReg();
   6165     auto ZextInput = MIRBuilder.buildZExt(WideTy, SrcReg);
   6166 
   6167     Register NextResult = I + 1 == NumOps && WideTy == DstTy ? DstReg :
   6168       MRI.createGenericVirtualRegister(WideTy);
   6169 
   6170     auto ShiftAmt = MIRBuilder.buildConstant(WideTy, Offset);
   6171     auto Shl = MIRBuilder.buildShl(WideTy, ZextInput, ShiftAmt);
   6172     MIRBuilder.buildOr(NextResult, ResultReg, Shl);
   6173     ResultReg = NextResult;
   6174   }
   6175 
   6176   if (DstTy.isPointer()) {
   6177     if (MIRBuilder.getDataLayout().isNonIntegralAddressSpace(
   6178           DstTy.getAddressSpace())) {
   6179       LLVM_DEBUG(dbgs() << "Not casting nonintegral address space\n");
   6180       return UnableToLegalize;
   6181     }
   6182 
   6183     MIRBuilder.buildIntToPtr(DstReg, ResultReg);
   6184   }
   6185 
   6186   MI.eraseFromParent();
   6187   return Legalized;
   6188 }
   6189 
   6190 LegalizerHelper::LegalizeResult
   6191 LegalizerHelper::lowerUnmergeValues(MachineInstr &MI) {
   6192   const unsigned NumDst = MI.getNumOperands() - 1;
   6193   Register SrcReg = MI.getOperand(NumDst).getReg();
   6194   Register Dst0Reg = MI.getOperand(0).getReg();
   6195   LLT DstTy = MRI.getType(Dst0Reg);
   6196   if (DstTy.isPointer())
   6197     return UnableToLegalize; // TODO
   6198 
   6199   SrcReg = coerceToScalar(SrcReg);
   6200   if (!SrcReg)
   6201     return UnableToLegalize;
   6202 
   6203   // Expand scalarizing unmerge as bitcast to integer and shift.
   6204   LLT IntTy = MRI.getType(SrcReg);
   6205 
   6206   MIRBuilder.buildTrunc(Dst0Reg, SrcReg);
   6207 
   6208   const unsigned DstSize = DstTy.getSizeInBits();
   6209   unsigned Offset = DstSize;
   6210   for (unsigned I = 1; I != NumDst; ++I, Offset += DstSize) {
   6211     auto ShiftAmt = MIRBuilder.buildConstant(IntTy, Offset);
   6212     auto Shift = MIRBuilder.buildLShr(IntTy, SrcReg, ShiftAmt);
   6213     MIRBuilder.buildTrunc(MI.getOperand(I), Shift);
   6214   }
   6215 
   6216   MI.eraseFromParent();
   6217   return Legalized;
   6218 }
   6219 
   6220 /// Lower a vector extract or insert by writing the vector to a stack temporary
   6221 /// and reloading the element or vector.
   6222 ///
   6223 /// %dst = G_EXTRACT_VECTOR_ELT %vec, %idx
   6224 ///  =>
   6225 ///  %stack_temp = G_FRAME_INDEX
   6226 ///  G_STORE %vec, %stack_temp
   6227 ///  %idx = clamp(%idx, %vec.getNumElements())
   6228 ///  %element_ptr = G_PTR_ADD %stack_temp, %idx
   6229 ///  %dst = G_LOAD %element_ptr
   6230 LegalizerHelper::LegalizeResult
   6231 LegalizerHelper::lowerExtractInsertVectorElt(MachineInstr &MI) {
   6232   Register DstReg = MI.getOperand(0).getReg();
   6233   Register SrcVec = MI.getOperand(1).getReg();
   6234   Register InsertVal;
   6235   if (MI.getOpcode() == TargetOpcode::G_INSERT_VECTOR_ELT)
   6236     InsertVal = MI.getOperand(2).getReg();
   6237 
   6238   Register Idx = MI.getOperand(MI.getNumOperands() - 1).getReg();
   6239 
   6240   LLT VecTy = MRI.getType(SrcVec);
   6241   LLT EltTy = VecTy.getElementType();
   6242   if (!EltTy.isByteSized()) { // Not implemented.
   6243     LLVM_DEBUG(dbgs() << "Can't handle non-byte element vectors yet\n");
   6244     return UnableToLegalize;
   6245   }
   6246 
   6247   unsigned EltBytes = EltTy.getSizeInBytes();
   6248   Align VecAlign = getStackTemporaryAlignment(VecTy);
   6249   Align EltAlign;
   6250 
   6251   MachinePointerInfo PtrInfo;
   6252   auto StackTemp = createStackTemporary(TypeSize::Fixed(VecTy.getSizeInBytes()),
   6253                                         VecAlign, PtrInfo);
   6254   MIRBuilder.buildStore(SrcVec, StackTemp, PtrInfo, VecAlign);
   6255 
   6256   // Get the pointer to the element, and be sure not to hit undefined behavior
   6257   // if the index is out of bounds.
   6258   Register EltPtr = getVectorElementPointer(StackTemp.getReg(0), VecTy, Idx);
   6259 
   6260   int64_t IdxVal;
   6261   if (mi_match(Idx, MRI, m_ICst(IdxVal))) {
   6262     int64_t Offset = IdxVal * EltBytes;
   6263     PtrInfo = PtrInfo.getWithOffset(Offset);
   6264     EltAlign = commonAlignment(VecAlign, Offset);
   6265   } else {
   6266     // We lose information with a variable offset.
   6267     EltAlign = getStackTemporaryAlignment(EltTy);
   6268     PtrInfo = MachinePointerInfo(MRI.getType(EltPtr).getAddressSpace());
   6269   }
   6270 
   6271   if (InsertVal) {
   6272     // Write the inserted element
   6273     MIRBuilder.buildStore(InsertVal, EltPtr, PtrInfo, EltAlign);
   6274 
   6275     // Reload the whole vector.
   6276     MIRBuilder.buildLoad(DstReg, StackTemp, PtrInfo, VecAlign);
   6277   } else {
   6278     MIRBuilder.buildLoad(DstReg, EltPtr, PtrInfo, EltAlign);
   6279   }
   6280 
   6281   MI.eraseFromParent();
   6282   return Legalized;
   6283 }
   6284 
   6285 LegalizerHelper::LegalizeResult
   6286 LegalizerHelper::lowerShuffleVector(MachineInstr &MI) {
   6287   Register DstReg = MI.getOperand(0).getReg();
   6288   Register Src0Reg = MI.getOperand(1).getReg();
   6289   Register Src1Reg = MI.getOperand(2).getReg();
   6290   LLT Src0Ty = MRI.getType(Src0Reg);
   6291   LLT DstTy = MRI.getType(DstReg);
   6292   LLT IdxTy = LLT::scalar(32);
   6293 
   6294   ArrayRef<int> Mask = MI.getOperand(3).getShuffleMask();
   6295 
   6296   if (DstTy.isScalar()) {
   6297     if (Src0Ty.isVector())
   6298       return UnableToLegalize;
   6299 
   6300     // This is just a SELECT.
   6301     assert(Mask.size() == 1 && "Expected a single mask element");
   6302     Register Val;
   6303     if (Mask[0] < 0 || Mask[0] > 1)
   6304       Val = MIRBuilder.buildUndef(DstTy).getReg(0);
   6305     else
   6306       Val = Mask[0] == 0 ? Src0Reg : Src1Reg;
   6307     MIRBuilder.buildCopy(DstReg, Val);
   6308     MI.eraseFromParent();
   6309     return Legalized;
   6310   }
   6311 
   6312   Register Undef;
   6313   SmallVector<Register, 32> BuildVec;
   6314   LLT EltTy = DstTy.getElementType();
   6315 
   6316   for (int Idx : Mask) {
   6317     if (Idx < 0) {
   6318       if (!Undef.isValid())
   6319         Undef = MIRBuilder.buildUndef(EltTy).getReg(0);
   6320       BuildVec.push_back(Undef);
   6321       continue;
   6322     }
   6323 
   6324     if (Src0Ty.isScalar()) {
   6325       BuildVec.push_back(Idx == 0 ? Src0Reg : Src1Reg);
   6326     } else {
   6327       int NumElts = Src0Ty.getNumElements();
   6328       Register SrcVec = Idx < NumElts ? Src0Reg : Src1Reg;
   6329       int ExtractIdx = Idx < NumElts ? Idx : Idx - NumElts;
   6330       auto IdxK = MIRBuilder.buildConstant(IdxTy, ExtractIdx);
   6331       auto Extract = MIRBuilder.buildExtractVectorElement(EltTy, SrcVec, IdxK);
   6332       BuildVec.push_back(Extract.getReg(0));
   6333     }
   6334   }
   6335 
   6336   MIRBuilder.buildBuildVector(DstReg, BuildVec);
   6337   MI.eraseFromParent();
   6338   return Legalized;
   6339 }
   6340 
   6341 LegalizerHelper::LegalizeResult
   6342 LegalizerHelper::lowerDynStackAlloc(MachineInstr &MI) {
   6343   const auto &MF = *MI.getMF();
   6344   const auto &TFI = *MF.getSubtarget().getFrameLowering();
   6345   if (TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsUp)
   6346     return UnableToLegalize;
   6347 
   6348   Register Dst = MI.getOperand(0).getReg();
   6349   Register AllocSize = MI.getOperand(1).getReg();
   6350   Align Alignment = assumeAligned(MI.getOperand(2).getImm());
   6351 
   6352   LLT PtrTy = MRI.getType(Dst);
   6353   LLT IntPtrTy = LLT::scalar(PtrTy.getSizeInBits());
   6354 
   6355   Register SPReg = TLI.getStackPointerRegisterToSaveRestore();
   6356   auto SPTmp = MIRBuilder.buildCopy(PtrTy, SPReg);
   6357   SPTmp = MIRBuilder.buildCast(IntPtrTy, SPTmp);
   6358 
   6359   // Subtract the final alloc from the SP. We use G_PTRTOINT here so we don't
   6360   // have to generate an extra instruction to negate the alloc and then use
   6361   // G_PTR_ADD to add the negative offset.
   6362   auto Alloc = MIRBuilder.buildSub(IntPtrTy, SPTmp, AllocSize);
   6363   if (Alignment > Align(1)) {
   6364     APInt AlignMask(IntPtrTy.getSizeInBits(), Alignment.value(), true);
   6365     AlignMask.negate();
   6366     auto AlignCst = MIRBuilder.buildConstant(IntPtrTy, AlignMask);
   6367     Alloc = MIRBuilder.buildAnd(IntPtrTy, Alloc, AlignCst);
   6368   }
   6369 
   6370   SPTmp = MIRBuilder.buildCast(PtrTy, Alloc);
   6371   MIRBuilder.buildCopy(SPReg, SPTmp);
   6372   MIRBuilder.buildCopy(Dst, SPTmp);
   6373 
   6374   MI.eraseFromParent();
   6375   return Legalized;
   6376 }
   6377 
   6378 LegalizerHelper::LegalizeResult
   6379 LegalizerHelper::lowerExtract(MachineInstr &MI) {
   6380   Register Dst = MI.getOperand(0).getReg();
   6381   Register Src = MI.getOperand(1).getReg();
   6382   unsigned Offset = MI.getOperand(2).getImm();
   6383 
   6384   LLT DstTy = MRI.getType(Dst);
   6385   LLT SrcTy = MRI.getType(Src);
   6386 
   6387   if (DstTy.isScalar() &&
   6388       (SrcTy.isScalar() ||
   6389        (SrcTy.isVector() && DstTy == SrcTy.getElementType()))) {
   6390     LLT SrcIntTy = SrcTy;
   6391     if (!SrcTy.isScalar()) {
   6392       SrcIntTy = LLT::scalar(SrcTy.getSizeInBits());
   6393       Src = MIRBuilder.buildBitcast(SrcIntTy, Src).getReg(0);
   6394     }
   6395 
   6396     if (Offset == 0)
   6397       MIRBuilder.buildTrunc(Dst, Src);
   6398     else {
   6399       auto ShiftAmt = MIRBuilder.buildConstant(SrcIntTy, Offset);
   6400       auto Shr = MIRBuilder.buildLShr(SrcIntTy, Src, ShiftAmt);
   6401       MIRBuilder.buildTrunc(Dst, Shr);
   6402     }
   6403 
   6404     MI.eraseFromParent();
   6405     return Legalized;
   6406   }
   6407 
   6408   return UnableToLegalize;
   6409 }
   6410 
   6411 LegalizerHelper::LegalizeResult LegalizerHelper::lowerInsert(MachineInstr &MI) {
   6412   Register Dst = MI.getOperand(0).getReg();
   6413   Register Src = MI.getOperand(1).getReg();
   6414   Register InsertSrc = MI.getOperand(2).getReg();
   6415   uint64_t Offset = MI.getOperand(3).getImm();
   6416 
   6417   LLT DstTy = MRI.getType(Src);
   6418   LLT InsertTy = MRI.getType(InsertSrc);
   6419 
   6420   if (InsertTy.isVector() ||
   6421       (DstTy.isVector() && DstTy.getElementType() != InsertTy))
   6422     return UnableToLegalize;
   6423 
   6424   const DataLayout &DL = MIRBuilder.getDataLayout();
   6425   if ((DstTy.isPointer() &&
   6426        DL.isNonIntegralAddressSpace(DstTy.getAddressSpace())) ||
   6427       (InsertTy.isPointer() &&
   6428        DL.isNonIntegralAddressSpace(InsertTy.getAddressSpace()))) {
   6429     LLVM_DEBUG(dbgs() << "Not casting non-integral address space integer\n");
   6430     return UnableToLegalize;
   6431   }
   6432 
   6433   LLT IntDstTy = DstTy;
   6434 
   6435   if (!DstTy.isScalar()) {
   6436     IntDstTy = LLT::scalar(DstTy.getSizeInBits());
   6437     Src = MIRBuilder.buildCast(IntDstTy, Src).getReg(0);
   6438   }
   6439 
   6440   if (!InsertTy.isScalar()) {
   6441     const LLT IntInsertTy = LLT::scalar(InsertTy.getSizeInBits());
   6442     InsertSrc = MIRBuilder.buildPtrToInt(IntInsertTy, InsertSrc).getReg(0);
   6443   }
   6444 
   6445   Register ExtInsSrc = MIRBuilder.buildZExt(IntDstTy, InsertSrc).getReg(0);
   6446   if (Offset != 0) {
   6447     auto ShiftAmt = MIRBuilder.buildConstant(IntDstTy, Offset);
   6448     ExtInsSrc = MIRBuilder.buildShl(IntDstTy, ExtInsSrc, ShiftAmt).getReg(0);
   6449   }
   6450 
   6451   APInt MaskVal = APInt::getBitsSetWithWrap(
   6452       DstTy.getSizeInBits(), Offset + InsertTy.getSizeInBits(), Offset);
   6453 
   6454   auto Mask = MIRBuilder.buildConstant(IntDstTy, MaskVal);
   6455   auto MaskedSrc = MIRBuilder.buildAnd(IntDstTy, Src, Mask);
   6456   auto Or = MIRBuilder.buildOr(IntDstTy, MaskedSrc, ExtInsSrc);
   6457 
   6458   MIRBuilder.buildCast(Dst, Or);
   6459   MI.eraseFromParent();
   6460   return Legalized;
   6461 }
   6462 
   6463 LegalizerHelper::LegalizeResult
   6464 LegalizerHelper::lowerSADDO_SSUBO(MachineInstr &MI) {
   6465   Register Dst0 = MI.getOperand(0).getReg();
   6466   Register Dst1 = MI.getOperand(1).getReg();
   6467   Register LHS = MI.getOperand(2).getReg();
   6468   Register RHS = MI.getOperand(3).getReg();
   6469   const bool IsAdd = MI.getOpcode() == TargetOpcode::G_SADDO;
   6470 
   6471   LLT Ty = MRI.getType(Dst0);
   6472   LLT BoolTy = MRI.getType(Dst1);
   6473 
   6474   if (IsAdd)
   6475     MIRBuilder.buildAdd(Dst0, LHS, RHS);
   6476   else
   6477     MIRBuilder.buildSub(Dst0, LHS, RHS);
   6478 
   6479   // TODO: If SADDSAT/SSUBSAT is legal, compare results to detect overflow.
   6480 
   6481   auto Zero = MIRBuilder.buildConstant(Ty, 0);
   6482 
   6483   // For an addition, the result should be less than one of the operands (LHS)
   6484   // if and only if the other operand (RHS) is negative, otherwise there will
   6485   // be overflow.
   6486   // For a subtraction, the result should be less than one of the operands
   6487   // (LHS) if and only if the other operand (RHS) is (non-zero) positive,
   6488   // otherwise there will be overflow.
   6489   auto ResultLowerThanLHS =
   6490       MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, Dst0, LHS);
   6491   auto ConditionRHS = MIRBuilder.buildICmp(
   6492       IsAdd ? CmpInst::ICMP_SLT : CmpInst::ICMP_SGT, BoolTy, RHS, Zero);
   6493 
   6494   MIRBuilder.buildXor(Dst1, ConditionRHS, ResultLowerThanLHS);
   6495   MI.eraseFromParent();
   6496   return Legalized;
   6497 }
   6498 
   6499 LegalizerHelper::LegalizeResult
   6500 LegalizerHelper::lowerAddSubSatToMinMax(MachineInstr &MI) {
   6501   Register Res = MI.getOperand(0).getReg();
   6502   Register LHS = MI.getOperand(1).getReg();
   6503   Register RHS = MI.getOperand(2).getReg();
   6504   LLT Ty = MRI.getType(Res);
   6505   bool IsSigned;
   6506   bool IsAdd;
   6507   unsigned BaseOp;
   6508   switch (MI.getOpcode()) {
   6509   default:
   6510     llvm_unreachable("unexpected addsat/subsat opcode");
   6511   case TargetOpcode::G_UADDSAT:
   6512     IsSigned = false;
   6513     IsAdd = true;
   6514     BaseOp = TargetOpcode::G_ADD;
   6515     break;
   6516   case TargetOpcode::G_SADDSAT:
   6517     IsSigned = true;
   6518     IsAdd = true;
   6519     BaseOp = TargetOpcode::G_ADD;
   6520     break;
   6521   case TargetOpcode::G_USUBSAT:
   6522     IsSigned = false;
   6523     IsAdd = false;
   6524     BaseOp = TargetOpcode::G_SUB;
   6525     break;
   6526   case TargetOpcode::G_SSUBSAT:
   6527     IsSigned = true;
   6528     IsAdd = false;
   6529     BaseOp = TargetOpcode::G_SUB;
   6530     break;
   6531   }
   6532 
   6533   if (IsSigned) {
   6534     // sadd.sat(a, b) ->
   6535     //   hi = 0x7fffffff - smax(a, 0)
   6536     //   lo = 0x80000000 - smin(a, 0)
   6537     //   a + smin(smax(lo, b), hi)
   6538     // ssub.sat(a, b) ->
   6539     //   lo = smax(a, -1) - 0x7fffffff
   6540     //   hi = smin(a, -1) - 0x80000000
   6541     //   a - smin(smax(lo, b), hi)
   6542     // TODO: AMDGPU can use a "median of 3" instruction here:
   6543     //   a +/- med3(lo, b, hi)
   6544     uint64_t NumBits = Ty.getScalarSizeInBits();
   6545     auto MaxVal =
   6546         MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(NumBits));
   6547     auto MinVal =
   6548         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
   6549     MachineInstrBuilder Hi, Lo;
   6550     if (IsAdd) {
   6551       auto Zero = MIRBuilder.buildConstant(Ty, 0);
   6552       Hi = MIRBuilder.buildSub(Ty, MaxVal, MIRBuilder.buildSMax(Ty, LHS, Zero));
   6553       Lo = MIRBuilder.buildSub(Ty, MinVal, MIRBuilder.buildSMin(Ty, LHS, Zero));
   6554     } else {
   6555       auto NegOne = MIRBuilder.buildConstant(Ty, -1);
   6556       Lo = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMax(Ty, LHS, NegOne),
   6557                                MaxVal);
   6558       Hi = MIRBuilder.buildSub(Ty, MIRBuilder.buildSMin(Ty, LHS, NegOne),
   6559                                MinVal);
   6560     }
   6561     auto RHSClamped =
   6562         MIRBuilder.buildSMin(Ty, MIRBuilder.buildSMax(Ty, Lo, RHS), Hi);
   6563     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, RHSClamped});
   6564   } else {
   6565     // uadd.sat(a, b) -> a + umin(~a, b)
   6566     // usub.sat(a, b) -> a - umin(a, b)
   6567     Register Not = IsAdd ? MIRBuilder.buildNot(Ty, LHS).getReg(0) : LHS;
   6568     auto Min = MIRBuilder.buildUMin(Ty, Not, RHS);
   6569     MIRBuilder.buildInstr(BaseOp, {Res}, {LHS, Min});
   6570   }
   6571 
   6572   MI.eraseFromParent();
   6573   return Legalized;
   6574 }
   6575 
   6576 LegalizerHelper::LegalizeResult
   6577 LegalizerHelper::lowerAddSubSatToAddoSubo(MachineInstr &MI) {
   6578   Register Res = MI.getOperand(0).getReg();
   6579   Register LHS = MI.getOperand(1).getReg();
   6580   Register RHS = MI.getOperand(2).getReg();
   6581   LLT Ty = MRI.getType(Res);
   6582   LLT BoolTy = Ty.changeElementSize(1);
   6583   bool IsSigned;
   6584   bool IsAdd;
   6585   unsigned OverflowOp;
   6586   switch (MI.getOpcode()) {
   6587   default:
   6588     llvm_unreachable("unexpected addsat/subsat opcode");
   6589   case TargetOpcode::G_UADDSAT:
   6590     IsSigned = false;
   6591     IsAdd = true;
   6592     OverflowOp = TargetOpcode::G_UADDO;
   6593     break;
   6594   case TargetOpcode::G_SADDSAT:
   6595     IsSigned = true;
   6596     IsAdd = true;
   6597     OverflowOp = TargetOpcode::G_SADDO;
   6598     break;
   6599   case TargetOpcode::G_USUBSAT:
   6600     IsSigned = false;
   6601     IsAdd = false;
   6602     OverflowOp = TargetOpcode::G_USUBO;
   6603     break;
   6604   case TargetOpcode::G_SSUBSAT:
   6605     IsSigned = true;
   6606     IsAdd = false;
   6607     OverflowOp = TargetOpcode::G_SSUBO;
   6608     break;
   6609   }
   6610 
   6611   auto OverflowRes =
   6612       MIRBuilder.buildInstr(OverflowOp, {Ty, BoolTy}, {LHS, RHS});
   6613   Register Tmp = OverflowRes.getReg(0);
   6614   Register Ov = OverflowRes.getReg(1);
   6615   MachineInstrBuilder Clamp;
   6616   if (IsSigned) {
   6617     // sadd.sat(a, b) ->
   6618     //   {tmp, ov} = saddo(a, b)
   6619     //   ov ? (tmp >>s 31) + 0x80000000 : r
   6620     // ssub.sat(a, b) ->
   6621     //   {tmp, ov} = ssubo(a, b)
   6622     //   ov ? (tmp >>s 31) + 0x80000000 : r
   6623     uint64_t NumBits = Ty.getScalarSizeInBits();
   6624     auto ShiftAmount = MIRBuilder.buildConstant(Ty, NumBits - 1);
   6625     auto Sign = MIRBuilder.buildAShr(Ty, Tmp, ShiftAmount);
   6626     auto MinVal =
   6627         MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(NumBits));
   6628     Clamp = MIRBuilder.buildAdd(Ty, Sign, MinVal);
   6629   } else {
   6630     // uadd.sat(a, b) ->
   6631     //   {tmp, ov} = uaddo(a, b)
   6632     //   ov ? 0xffffffff : tmp
   6633     // usub.sat(a, b) ->
   6634     //   {tmp, ov} = usubo(a, b)
   6635     //   ov ? 0 : tmp
   6636     Clamp = MIRBuilder.buildConstant(Ty, IsAdd ? -1 : 0);
   6637   }
   6638   MIRBuilder.buildSelect(Res, Ov, Clamp, Tmp);
   6639 
   6640   MI.eraseFromParent();
   6641   return Legalized;
   6642 }
   6643 
   6644 LegalizerHelper::LegalizeResult
   6645 LegalizerHelper::lowerShlSat(MachineInstr &MI) {
   6646   assert((MI.getOpcode() == TargetOpcode::G_SSHLSAT ||
   6647           MI.getOpcode() == TargetOpcode::G_USHLSAT) &&
   6648          "Expected shlsat opcode!");
   6649   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SSHLSAT;
   6650   Register Res = MI.getOperand(0).getReg();
   6651   Register LHS = MI.getOperand(1).getReg();
   6652   Register RHS = MI.getOperand(2).getReg();
   6653   LLT Ty = MRI.getType(Res);
   6654   LLT BoolTy = Ty.changeElementSize(1);
   6655 
   6656   unsigned BW = Ty.getScalarSizeInBits();
   6657   auto Result = MIRBuilder.buildShl(Ty, LHS, RHS);
   6658   auto Orig = IsSigned ? MIRBuilder.buildAShr(Ty, Result, RHS)
   6659                        : MIRBuilder.buildLShr(Ty, Result, RHS);
   6660 
   6661   MachineInstrBuilder SatVal;
   6662   if (IsSigned) {
   6663     auto SatMin = MIRBuilder.buildConstant(Ty, APInt::getSignedMinValue(BW));
   6664     auto SatMax = MIRBuilder.buildConstant(Ty, APInt::getSignedMaxValue(BW));
   6665     auto Cmp = MIRBuilder.buildICmp(CmpInst::ICMP_SLT, BoolTy, LHS,
   6666                                     MIRBuilder.buildConstant(Ty, 0));
   6667     SatVal = MIRBuilder.buildSelect(Ty, Cmp, SatMin, SatMax);
   6668   } else {
   6669     SatVal = MIRBuilder.buildConstant(Ty, APInt::getMaxValue(BW));
   6670   }
   6671   auto Ov = MIRBuilder.buildICmp(CmpInst::ICMP_NE, BoolTy, LHS, Orig);
   6672   MIRBuilder.buildSelect(Res, Ov, SatVal, Result);
   6673 
   6674   MI.eraseFromParent();
   6675   return Legalized;
   6676 }
   6677 
   6678 LegalizerHelper::LegalizeResult
   6679 LegalizerHelper::lowerBswap(MachineInstr &MI) {
   6680   Register Dst = MI.getOperand(0).getReg();
   6681   Register Src = MI.getOperand(1).getReg();
   6682   const LLT Ty = MRI.getType(Src);
   6683   unsigned SizeInBytes = (Ty.getScalarSizeInBits() + 7) / 8;
   6684   unsigned BaseShiftAmt = (SizeInBytes - 1) * 8;
   6685 
   6686   // Swap most and least significant byte, set remaining bytes in Res to zero.
   6687   auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt);
   6688   auto LSByteShiftedLeft = MIRBuilder.buildShl(Ty, Src, ShiftAmt);
   6689   auto MSByteShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
   6690   auto Res = MIRBuilder.buildOr(Ty, MSByteShiftedRight, LSByteShiftedLeft);
   6691 
   6692   // Set i-th high/low byte in Res to i-th low/high byte from Src.
   6693   for (unsigned i = 1; i < SizeInBytes / 2; ++i) {
   6694     // AND with Mask leaves byte i unchanged and sets remaining bytes to 0.
   6695     APInt APMask(SizeInBytes * 8, 0xFF << (i * 8));
   6696     auto Mask = MIRBuilder.buildConstant(Ty, APMask);
   6697     auto ShiftAmt = MIRBuilder.buildConstant(Ty, BaseShiftAmt - 16 * i);
   6698     // Low byte shifted left to place of high byte: (Src & Mask) << ShiftAmt.
   6699     auto LoByte = MIRBuilder.buildAnd(Ty, Src, Mask);
   6700     auto LoShiftedLeft = MIRBuilder.buildShl(Ty, LoByte, ShiftAmt);
   6701     Res = MIRBuilder.buildOr(Ty, Res, LoShiftedLeft);
   6702     // High byte shifted right to place of low byte: (Src >> ShiftAmt) & Mask.
   6703     auto SrcShiftedRight = MIRBuilder.buildLShr(Ty, Src, ShiftAmt);
   6704     auto HiShiftedRight = MIRBuilder.buildAnd(Ty, SrcShiftedRight, Mask);
   6705     Res = MIRBuilder.buildOr(Ty, Res, HiShiftedRight);
   6706   }
   6707   Res.getInstr()->getOperand(0).setReg(Dst);
   6708 
   6709   MI.eraseFromParent();
   6710   return Legalized;
   6711 }
   6712 
   6713 //{ (Src & Mask) >> N } | { (Src << N) & Mask }
   6714 static MachineInstrBuilder SwapN(unsigned N, DstOp Dst, MachineIRBuilder &B,
   6715                                  MachineInstrBuilder Src, APInt Mask) {
   6716   const LLT Ty = Dst.getLLTTy(*B.getMRI());
   6717   MachineInstrBuilder C_N = B.buildConstant(Ty, N);
   6718   MachineInstrBuilder MaskLoNTo0 = B.buildConstant(Ty, Mask);
   6719   auto LHS = B.buildLShr(Ty, B.buildAnd(Ty, Src, MaskLoNTo0), C_N);
   6720   auto RHS = B.buildAnd(Ty, B.buildShl(Ty, Src, C_N), MaskLoNTo0);
   6721   return B.buildOr(Dst, LHS, RHS);
   6722 }
   6723 
   6724 LegalizerHelper::LegalizeResult
   6725 LegalizerHelper::lowerBitreverse(MachineInstr &MI) {
   6726   Register Dst = MI.getOperand(0).getReg();
   6727   Register Src = MI.getOperand(1).getReg();
   6728   const LLT Ty = MRI.getType(Src);
   6729   unsigned Size = Ty.getSizeInBits();
   6730 
   6731   MachineInstrBuilder BSWAP =
   6732       MIRBuilder.buildInstr(TargetOpcode::G_BSWAP, {Ty}, {Src});
   6733 
   6734   // swap high and low 4 bits in 8 bit blocks 7654|3210 -> 3210|7654
   6735   //    [(val & 0xF0F0F0F0) >> 4] | [(val & 0x0F0F0F0F) << 4]
   6736   // -> [(val & 0xF0F0F0F0) >> 4] | [(val << 4) & 0xF0F0F0F0]
   6737   MachineInstrBuilder Swap4 =
   6738       SwapN(4, Ty, MIRBuilder, BSWAP, APInt::getSplat(Size, APInt(8, 0xF0)));
   6739 
   6740   // swap high and low 2 bits in 4 bit blocks 32|10 76|54 -> 10|32 54|76
   6741   //    [(val & 0xCCCCCCCC) >> 2] & [(val & 0x33333333) << 2]
   6742   // -> [(val & 0xCCCCCCCC) >> 2] & [(val << 2) & 0xCCCCCCCC]
   6743   MachineInstrBuilder Swap2 =
   6744       SwapN(2, Ty, MIRBuilder, Swap4, APInt::getSplat(Size, APInt(8, 0xCC)));
   6745 
   6746   // swap high and low 1 bit in 2 bit blocks 1|0 3|2 5|4 7|6 -> 0|1 2|3 4|5 6|7
   6747   //    [(val & 0xAAAAAAAA) >> 1] & [(val & 0x55555555) << 1]
   6748   // -> [(val & 0xAAAAAAAA) >> 1] & [(val << 1) & 0xAAAAAAAA]
   6749   SwapN(1, Dst, MIRBuilder, Swap2, APInt::getSplat(Size, APInt(8, 0xAA)));
   6750 
   6751   MI.eraseFromParent();
   6752   return Legalized;
   6753 }
   6754 
   6755 LegalizerHelper::LegalizeResult
   6756 LegalizerHelper::lowerReadWriteRegister(MachineInstr &MI) {
   6757   MachineFunction &MF = MIRBuilder.getMF();
   6758 
   6759   bool IsRead = MI.getOpcode() == TargetOpcode::G_READ_REGISTER;
   6760   int NameOpIdx = IsRead ? 1 : 0;
   6761   int ValRegIndex = IsRead ? 0 : 1;
   6762 
   6763   Register ValReg = MI.getOperand(ValRegIndex).getReg();
   6764   const LLT Ty = MRI.getType(ValReg);
   6765   const MDString *RegStr = cast<MDString>(
   6766     cast<MDNode>(MI.getOperand(NameOpIdx).getMetadata())->getOperand(0));
   6767 
   6768   Register PhysReg = TLI.getRegisterByName(RegStr->getString().data(), Ty, MF);
   6769   if (!PhysReg.isValid())
   6770     return UnableToLegalize;
   6771 
   6772   if (IsRead)
   6773     MIRBuilder.buildCopy(ValReg, PhysReg);
   6774   else
   6775     MIRBuilder.buildCopy(PhysReg, ValReg);
   6776 
   6777   MI.eraseFromParent();
   6778   return Legalized;
   6779 }
   6780 
   6781 LegalizerHelper::LegalizeResult
   6782 LegalizerHelper::lowerSMULH_UMULH(MachineInstr &MI) {
   6783   bool IsSigned = MI.getOpcode() == TargetOpcode::G_SMULH;
   6784   unsigned ExtOp = IsSigned ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
   6785   Register Result = MI.getOperand(0).getReg();
   6786   LLT OrigTy = MRI.getType(Result);
   6787   auto SizeInBits = OrigTy.getScalarSizeInBits();
   6788   LLT WideTy = OrigTy.changeElementSize(SizeInBits * 2);
   6789 
   6790   auto LHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(1)});
   6791   auto RHS = MIRBuilder.buildInstr(ExtOp, {WideTy}, {MI.getOperand(2)});
   6792   auto Mul = MIRBuilder.buildMul(WideTy, LHS, RHS);
   6793   unsigned ShiftOp = IsSigned ? TargetOpcode::G_ASHR : TargetOpcode::G_LSHR;
   6794 
   6795   auto ShiftAmt = MIRBuilder.buildConstant(WideTy, SizeInBits);
   6796   auto Shifted = MIRBuilder.buildInstr(ShiftOp, {WideTy}, {Mul, ShiftAmt});
   6797   MIRBuilder.buildTrunc(Result, Shifted);
   6798 
   6799   MI.eraseFromParent();
   6800   return Legalized;
   6801 }
   6802 
   6803 LegalizerHelper::LegalizeResult LegalizerHelper::lowerSelect(MachineInstr &MI) {
   6804   // Implement vector G_SELECT in terms of XOR, AND, OR.
   6805   Register DstReg = MI.getOperand(0).getReg();
   6806   Register MaskReg = MI.getOperand(1).getReg();
   6807   Register Op1Reg = MI.getOperand(2).getReg();
   6808   Register Op2Reg = MI.getOperand(3).getReg();
   6809   LLT DstTy = MRI.getType(DstReg);
   6810   LLT MaskTy = MRI.getType(MaskReg);
   6811   LLT Op1Ty = MRI.getType(Op1Reg);
   6812   if (!DstTy.isVector())
   6813     return UnableToLegalize;
   6814 
   6815   // Vector selects can have a scalar predicate. If so, splat into a vector and
   6816   // finish for later legalization attempts to try again.
   6817   if (MaskTy.isScalar()) {
   6818     Register MaskElt = MaskReg;
   6819     if (MaskTy.getSizeInBits() < DstTy.getScalarSizeInBits())
   6820       MaskElt = MIRBuilder.buildSExt(DstTy.getElementType(), MaskElt).getReg(0);
   6821     // Generate a vector splat idiom to be pattern matched later.
   6822     auto ShufSplat = MIRBuilder.buildShuffleSplat(DstTy, MaskElt);
   6823     Observer.changingInstr(MI);
   6824     MI.getOperand(1).setReg(ShufSplat.getReg(0));
   6825     Observer.changedInstr(MI);
   6826     return Legalized;
   6827   }
   6828 
   6829   if (MaskTy.getSizeInBits() != Op1Ty.getSizeInBits()) {
   6830     return UnableToLegalize;
   6831   }
   6832 
   6833   auto NotMask = MIRBuilder.buildNot(MaskTy, MaskReg);
   6834   auto NewOp1 = MIRBuilder.buildAnd(MaskTy, Op1Reg, MaskReg);
   6835   auto NewOp2 = MIRBuilder.buildAnd(MaskTy, Op2Reg, NotMask);
   6836   MIRBuilder.buildOr(DstReg, NewOp1, NewOp2);
   6837   MI.eraseFromParent();
   6838   return Legalized;
   6839 }
   6840 
   6841 LegalizerHelper::LegalizeResult LegalizerHelper::lowerDIVREM(MachineInstr &MI) {
   6842   // Split DIVREM into individual instructions.
   6843   unsigned Opcode = MI.getOpcode();
   6844 
   6845   MIRBuilder.buildInstr(
   6846       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SDIV
   6847                                         : TargetOpcode::G_UDIV,
   6848       {MI.getOperand(0).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
   6849   MIRBuilder.buildInstr(
   6850       Opcode == TargetOpcode::G_SDIVREM ? TargetOpcode::G_SREM
   6851                                         : TargetOpcode::G_UREM,
   6852       {MI.getOperand(1).getReg()}, {MI.getOperand(2), MI.getOperand(3)});
   6853   MI.eraseFromParent();
   6854   return Legalized;
   6855 }
   6856