Home | History | Annotate | Line # | Download | only in Hexagon
      1 //===-- HexagonISelLowering.cpp - Hexagon DAG Lowering Implementation -----===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // This file implements the interfaces that Hexagon uses to lower LLVM code
     10 // into a selection DAG.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "HexagonISelLowering.h"
     15 #include "Hexagon.h"
     16 #include "HexagonMachineFunctionInfo.h"
     17 #include "HexagonRegisterInfo.h"
     18 #include "HexagonSubtarget.h"
     19 #include "HexagonTargetMachine.h"
     20 #include "HexagonTargetObjectFile.h"
     21 #include "llvm/ADT/APInt.h"
     22 #include "llvm/ADT/ArrayRef.h"
     23 #include "llvm/ADT/SmallVector.h"
     24 #include "llvm/ADT/StringSwitch.h"
     25 #include "llvm/CodeGen/CallingConvLower.h"
     26 #include "llvm/CodeGen/MachineFrameInfo.h"
     27 #include "llvm/CodeGen/MachineFunction.h"
     28 #include "llvm/CodeGen/MachineMemOperand.h"
     29 #include "llvm/CodeGen/MachineRegisterInfo.h"
     30 #include "llvm/CodeGen/RuntimeLibcalls.h"
     31 #include "llvm/CodeGen/SelectionDAG.h"
     32 #include "llvm/CodeGen/TargetCallingConv.h"
     33 #include "llvm/CodeGen/ValueTypes.h"
     34 #include "llvm/IR/BasicBlock.h"
     35 #include "llvm/IR/CallingConv.h"
     36 #include "llvm/IR/DataLayout.h"
     37 #include "llvm/IR/DerivedTypes.h"
     38 #include "llvm/IR/Function.h"
     39 #include "llvm/IR/GlobalValue.h"
     40 #include "llvm/IR/InlineAsm.h"
     41 #include "llvm/IR/Instructions.h"
     42 #include "llvm/IR/IntrinsicInst.h"
     43 #include "llvm/IR/Intrinsics.h"
     44 #include "llvm/IR/IntrinsicsHexagon.h"
     45 #include "llvm/IR/Module.h"
     46 #include "llvm/IR/Type.h"
     47 #include "llvm/IR/Value.h"
     48 #include "llvm/MC/MCRegisterInfo.h"
     49 #include "llvm/Support/Casting.h"
     50 #include "llvm/Support/CodeGen.h"
     51 #include "llvm/Support/CommandLine.h"
     52 #include "llvm/Support/Debug.h"
     53 #include "llvm/Support/ErrorHandling.h"
     54 #include "llvm/Support/MathExtras.h"
     55 #include "llvm/Support/raw_ostream.h"
     56 #include "llvm/Target/TargetMachine.h"
     57 #include <algorithm>
     58 #include <cassert>
     59 #include <cstddef>
     60 #include <cstdint>
     61 #include <limits>
     62 #include <utility>
     63 
     64 using namespace llvm;
     65 
     66 #define DEBUG_TYPE "hexagon-lowering"
     67 
     68 static cl::opt<bool> EmitJumpTables("hexagon-emit-jump-tables",
     69   cl::init(true), cl::Hidden,
     70   cl::desc("Control jump table emission on Hexagon target"));
     71 
     72 static cl::opt<bool> EnableHexSDNodeSched("enable-hexagon-sdnode-sched",
     73   cl::Hidden, cl::ZeroOrMore, cl::init(false),
     74   cl::desc("Enable Hexagon SDNode scheduling"));
     75 
     76 static cl::opt<bool> EnableFastMath("ffast-math",
     77   cl::Hidden, cl::ZeroOrMore, cl::init(false),
     78   cl::desc("Enable Fast Math processing"));
     79 
     80 static cl::opt<int> MinimumJumpTables("minimum-jump-tables",
     81   cl::Hidden, cl::ZeroOrMore, cl::init(5),
     82   cl::desc("Set minimum jump tables"));
     83 
     84 static cl::opt<int> MaxStoresPerMemcpyCL("max-store-memcpy",
     85   cl::Hidden, cl::ZeroOrMore, cl::init(6),
     86   cl::desc("Max #stores to inline memcpy"));
     87 
     88 static cl::opt<int> MaxStoresPerMemcpyOptSizeCL("max-store-memcpy-Os",
     89   cl::Hidden, cl::ZeroOrMore, cl::init(4),
     90   cl::desc("Max #stores to inline memcpy"));
     91 
     92 static cl::opt<int> MaxStoresPerMemmoveCL("max-store-memmove",
     93   cl::Hidden, cl::ZeroOrMore, cl::init(6),
     94   cl::desc("Max #stores to inline memmove"));
     95 
     96 static cl::opt<int> MaxStoresPerMemmoveOptSizeCL("max-store-memmove-Os",
     97   cl::Hidden, cl::ZeroOrMore, cl::init(4),
     98   cl::desc("Max #stores to inline memmove"));
     99 
    100 static cl::opt<int> MaxStoresPerMemsetCL("max-store-memset",
    101   cl::Hidden, cl::ZeroOrMore, cl::init(8),
    102   cl::desc("Max #stores to inline memset"));
    103 
    104 static cl::opt<int> MaxStoresPerMemsetOptSizeCL("max-store-memset-Os",
    105   cl::Hidden, cl::ZeroOrMore, cl::init(4),
    106   cl::desc("Max #stores to inline memset"));
    107 
    108 static cl::opt<bool> AlignLoads("hexagon-align-loads",
    109   cl::Hidden, cl::init(false),
    110   cl::desc("Rewrite unaligned loads as a pair of aligned loads"));
    111 
    112 static cl::opt<bool>
    113     DisableArgsMinAlignment("hexagon-disable-args-min-alignment", cl::Hidden,
    114                             cl::init(false),
    115                             cl::desc("Disable minimum alignment of 1 for "
    116                                      "arguments passed by value on stack"));
    117 
    118 namespace {
    119 
    120   class HexagonCCState : public CCState {
    121     unsigned NumNamedVarArgParams = 0;
    122 
    123   public:
    124     HexagonCCState(CallingConv::ID CC, bool IsVarArg, MachineFunction &MF,
    125                    SmallVectorImpl<CCValAssign> &locs, LLVMContext &C,
    126                    unsigned NumNamedArgs)
    127         : CCState(CC, IsVarArg, MF, locs, C),
    128           NumNamedVarArgParams(NumNamedArgs) {}
    129     unsigned getNumNamedVarArgParams() const { return NumNamedVarArgParams; }
    130   };
    131 
    132 } // end anonymous namespace
    133 
    134 
    135 // Implement calling convention for Hexagon.
    136 
    137 static bool CC_SkipOdd(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
    138                        CCValAssign::LocInfo &LocInfo,
    139                        ISD::ArgFlagsTy &ArgFlags, CCState &State) {
    140   static const MCPhysReg ArgRegs[] = {
    141     Hexagon::R0, Hexagon::R1, Hexagon::R2,
    142     Hexagon::R3, Hexagon::R4, Hexagon::R5
    143   };
    144   const unsigned NumArgRegs = array_lengthof(ArgRegs);
    145   unsigned RegNum = State.getFirstUnallocated(ArgRegs);
    146 
    147   // RegNum is an index into ArgRegs: skip a register if RegNum is odd.
    148   if (RegNum != NumArgRegs && RegNum % 2 == 1)
    149     State.AllocateReg(ArgRegs[RegNum]);
    150 
    151   // Always return false here, as this function only makes sure that the first
    152   // unallocated register has an even register number and does not actually
    153   // allocate a register for the current argument.
    154   return false;
    155 }
    156 
    157 #include "HexagonGenCallingConv.inc"
    158 
    159 
    160 SDValue
    161 HexagonTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG)
    162       const {
    163   return SDValue();
    164 }
    165 
    166 /// CreateCopyOfByValArgument - Make a copy of an aggregate at address specified
    167 /// by "Src" to address "Dst" of size "Size".  Alignment information is
    168 /// specified by the specific parameter attribute. The copy will be passed as
    169 /// a byval function parameter.  Sometimes what we are copying is the end of a
    170 /// larger object, the part that does not fit in registers.
    171 static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst,
    172                                          SDValue Chain, ISD::ArgFlagsTy Flags,
    173                                          SelectionDAG &DAG, const SDLoc &dl) {
    174   SDValue SizeNode = DAG.getConstant(Flags.getByValSize(), dl, MVT::i32);
    175   return DAG.getMemcpy(
    176       Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
    177       /*isVolatile=*/false, /*AlwaysInline=*/false,
    178       /*isTailCall=*/false, MachinePointerInfo(), MachinePointerInfo());
    179 }
    180 
    181 bool
    182 HexagonTargetLowering::CanLowerReturn(
    183     CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
    184     const SmallVectorImpl<ISD::OutputArg> &Outs,
    185     LLVMContext &Context) const {
    186   SmallVector<CCValAssign, 16> RVLocs;
    187   CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
    188 
    189   if (MF.getSubtarget<HexagonSubtarget>().useHVXOps())
    190     return CCInfo.CheckReturn(Outs, RetCC_Hexagon_HVX);
    191   return CCInfo.CheckReturn(Outs, RetCC_Hexagon);
    192 }
    193 
    194 // LowerReturn - Lower ISD::RET. If a struct is larger than 8 bytes and is
    195 // passed by value, the function prototype is modified to return void and
    196 // the value is stored in memory pointed by a pointer passed by caller.
    197 SDValue
    198 HexagonTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
    199                                    bool IsVarArg,
    200                                    const SmallVectorImpl<ISD::OutputArg> &Outs,
    201                                    const SmallVectorImpl<SDValue> &OutVals,
    202                                    const SDLoc &dl, SelectionDAG &DAG) const {
    203   // CCValAssign - represent the assignment of the return value to locations.
    204   SmallVector<CCValAssign, 16> RVLocs;
    205 
    206   // CCState - Info about the registers and stack slot.
    207   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
    208                  *DAG.getContext());
    209 
    210   // Analyze return values of ISD::RET
    211   if (Subtarget.useHVXOps())
    212     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon_HVX);
    213   else
    214     CCInfo.AnalyzeReturn(Outs, RetCC_Hexagon);
    215 
    216   SDValue Flag;
    217   SmallVector<SDValue, 4> RetOps(1, Chain);
    218 
    219   // Copy the result values into the output registers.
    220   for (unsigned i = 0; i != RVLocs.size(); ++i) {
    221     CCValAssign &VA = RVLocs[i];
    222     SDValue Val = OutVals[i];
    223 
    224     switch (VA.getLocInfo()) {
    225       default:
    226         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
    227         llvm_unreachable("Unknown loc info!");
    228       case CCValAssign::Full:
    229         break;
    230       case CCValAssign::BCvt:
    231         Val = DAG.getBitcast(VA.getLocVT(), Val);
    232         break;
    233       case CCValAssign::SExt:
    234         Val = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Val);
    235         break;
    236       case CCValAssign::ZExt:
    237         Val = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Val);
    238         break;
    239       case CCValAssign::AExt:
    240         Val = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Val);
    241         break;
    242     }
    243 
    244     Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), Val, Flag);
    245 
    246     // Guarantee that all emitted copies are stuck together with flags.
    247     Flag = Chain.getValue(1);
    248     RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
    249   }
    250 
    251   RetOps[0] = Chain;  // Update chain.
    252 
    253   // Add the flag if we have it.
    254   if (Flag.getNode())
    255     RetOps.push_back(Flag);
    256 
    257   return DAG.getNode(HexagonISD::RET_FLAG, dl, MVT::Other, RetOps);
    258 }
    259 
    260 bool HexagonTargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
    261   // If either no tail call or told not to tail call at all, don't.
    262   return CI->isTailCall();
    263 }
    264 
    265 Register HexagonTargetLowering::getRegisterByName(
    266       const char* RegName, LLT VT, const MachineFunction &) const {
    267   // Just support r19, the linux kernel uses it.
    268   Register Reg = StringSwitch<Register>(RegName)
    269                      .Case("r0", Hexagon::R0)
    270                      .Case("r1", Hexagon::R1)
    271                      .Case("r2", Hexagon::R2)
    272                      .Case("r3", Hexagon::R3)
    273                      .Case("r4", Hexagon::R4)
    274                      .Case("r5", Hexagon::R5)
    275                      .Case("r6", Hexagon::R6)
    276                      .Case("r7", Hexagon::R7)
    277                      .Case("r8", Hexagon::R8)
    278                      .Case("r9", Hexagon::R9)
    279                      .Case("r10", Hexagon::R10)
    280                      .Case("r11", Hexagon::R11)
    281                      .Case("r12", Hexagon::R12)
    282                      .Case("r13", Hexagon::R13)
    283                      .Case("r14", Hexagon::R14)
    284                      .Case("r15", Hexagon::R15)
    285                      .Case("r16", Hexagon::R16)
    286                      .Case("r17", Hexagon::R17)
    287                      .Case("r18", Hexagon::R18)
    288                      .Case("r19", Hexagon::R19)
    289                      .Case("r20", Hexagon::R20)
    290                      .Case("r21", Hexagon::R21)
    291                      .Case("r22", Hexagon::R22)
    292                      .Case("r23", Hexagon::R23)
    293                      .Case("r24", Hexagon::R24)
    294                      .Case("r25", Hexagon::R25)
    295                      .Case("r26", Hexagon::R26)
    296                      .Case("r27", Hexagon::R27)
    297                      .Case("r28", Hexagon::R28)
    298                      .Case("r29", Hexagon::R29)
    299                      .Case("r30", Hexagon::R30)
    300                      .Case("r31", Hexagon::R31)
    301                      .Case("r1:0", Hexagon::D0)
    302                      .Case("r3:2", Hexagon::D1)
    303                      .Case("r5:4", Hexagon::D2)
    304                      .Case("r7:6", Hexagon::D3)
    305                      .Case("r9:8", Hexagon::D4)
    306                      .Case("r11:10", Hexagon::D5)
    307                      .Case("r13:12", Hexagon::D6)
    308                      .Case("r15:14", Hexagon::D7)
    309                      .Case("r17:16", Hexagon::D8)
    310                      .Case("r19:18", Hexagon::D9)
    311                      .Case("r21:20", Hexagon::D10)
    312                      .Case("r23:22", Hexagon::D11)
    313                      .Case("r25:24", Hexagon::D12)
    314                      .Case("r27:26", Hexagon::D13)
    315                      .Case("r29:28", Hexagon::D14)
    316                      .Case("r31:30", Hexagon::D15)
    317                      .Case("sp", Hexagon::R29)
    318                      .Case("fp", Hexagon::R30)
    319                      .Case("lr", Hexagon::R31)
    320                      .Case("p0", Hexagon::P0)
    321                      .Case("p1", Hexagon::P1)
    322                      .Case("p2", Hexagon::P2)
    323                      .Case("p3", Hexagon::P3)
    324                      .Case("sa0", Hexagon::SA0)
    325                      .Case("lc0", Hexagon::LC0)
    326                      .Case("sa1", Hexagon::SA1)
    327                      .Case("lc1", Hexagon::LC1)
    328                      .Case("m0", Hexagon::M0)
    329                      .Case("m1", Hexagon::M1)
    330                      .Case("usr", Hexagon::USR)
    331                      .Case("ugp", Hexagon::UGP)
    332                      .Case("cs0", Hexagon::CS0)
    333                      .Case("cs1", Hexagon::CS1)
    334                      .Default(Register());
    335   if (Reg)
    336     return Reg;
    337 
    338   report_fatal_error("Invalid register name global variable");
    339 }
    340 
    341 /// LowerCallResult - Lower the result values of an ISD::CALL into the
    342 /// appropriate copies out of appropriate physical registers.  This assumes that
    343 /// Chain/Glue are the input chain/glue to use, and that TheCall is the call
    344 /// being lowered. Returns a SDNode with the same number of values as the
    345 /// ISD::CALL.
    346 SDValue HexagonTargetLowering::LowerCallResult(
    347     SDValue Chain, SDValue Glue, CallingConv::ID CallConv, bool IsVarArg,
    348     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
    349     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
    350     const SmallVectorImpl<SDValue> &OutVals, SDValue Callee) const {
    351   // Assign locations to each value returned by this call.
    352   SmallVector<CCValAssign, 16> RVLocs;
    353 
    354   CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
    355                  *DAG.getContext());
    356 
    357   if (Subtarget.useHVXOps())
    358     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon_HVX);
    359   else
    360     CCInfo.AnalyzeCallResult(Ins, RetCC_Hexagon);
    361 
    362   // Copy all of the result registers out of their specified physreg.
    363   for (unsigned i = 0; i != RVLocs.size(); ++i) {
    364     SDValue RetVal;
    365     if (RVLocs[i].getValVT() == MVT::i1) {
    366       // Return values of type MVT::i1 require special handling. The reason
    367       // is that MVT::i1 is associated with the PredRegs register class, but
    368       // values of that type are still returned in R0. Generate an explicit
    369       // copy into a predicate register from R0, and treat the value of the
    370       // predicate register as the call result.
    371       auto &MRI = DAG.getMachineFunction().getRegInfo();
    372       SDValue FR0 = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
    373                                        MVT::i32, Glue);
    374       // FR0 = (Value, Chain, Glue)
    375       Register PredR = MRI.createVirtualRegister(&Hexagon::PredRegsRegClass);
    376       SDValue TPR = DAG.getCopyToReg(FR0.getValue(1), dl, PredR,
    377                                      FR0.getValue(0), FR0.getValue(2));
    378       // TPR = (Chain, Glue)
    379       // Don't glue this CopyFromReg, because it copies from a virtual
    380       // register. If it is glued to the call, InstrEmitter will add it
    381       // as an implicit def to the call (EmitMachineNode).
    382       RetVal = DAG.getCopyFromReg(TPR.getValue(0), dl, PredR, MVT::i1);
    383       Glue = TPR.getValue(1);
    384       Chain = TPR.getValue(0);
    385     } else {
    386       RetVal = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
    387                                   RVLocs[i].getValVT(), Glue);
    388       Glue = RetVal.getValue(2);
    389       Chain = RetVal.getValue(1);
    390     }
    391     InVals.push_back(RetVal.getValue(0));
    392   }
    393 
    394   return Chain;
    395 }
    396 
    397 /// LowerCall - Functions arguments are copied from virtual regs to
    398 /// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
    399 SDValue
    400 HexagonTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
    401                                  SmallVectorImpl<SDValue> &InVals) const {
    402   SelectionDAG &DAG                     = CLI.DAG;
    403   SDLoc &dl                             = CLI.DL;
    404   SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
    405   SmallVectorImpl<SDValue> &OutVals     = CLI.OutVals;
    406   SmallVectorImpl<ISD::InputArg> &Ins   = CLI.Ins;
    407   SDValue Chain                         = CLI.Chain;
    408   SDValue Callee                        = CLI.Callee;
    409   CallingConv::ID CallConv              = CLI.CallConv;
    410   bool IsVarArg                         = CLI.IsVarArg;
    411   bool DoesNotReturn                    = CLI.DoesNotReturn;
    412 
    413   bool IsStructRet    = Outs.empty() ? false : Outs[0].Flags.isSRet();
    414   MachineFunction &MF = DAG.getMachineFunction();
    415   MachineFrameInfo &MFI = MF.getFrameInfo();
    416   auto PtrVT = getPointerTy(MF.getDataLayout());
    417 
    418   unsigned NumParams = CLI.CB ? CLI.CB->getFunctionType()->getNumParams() : 0;
    419   if (GlobalAddressSDNode *GAN = dyn_cast<GlobalAddressSDNode>(Callee))
    420     Callee = DAG.getTargetGlobalAddress(GAN->getGlobal(), dl, MVT::i32);
    421 
    422   // Linux ABI treats var-arg calls the same way as regular ones.
    423   bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
    424 
    425   // Analyze operands of the call, assigning locations to each operand.
    426   SmallVector<CCValAssign, 16> ArgLocs;
    427   HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs, *DAG.getContext(),
    428                         NumParams);
    429 
    430   if (Subtarget.useHVXOps())
    431     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_HVX);
    432   else if (DisableArgsMinAlignment)
    433     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon_Legacy);
    434   else
    435     CCInfo.AnalyzeCallOperands(Outs, CC_Hexagon);
    436 
    437   if (CLI.IsTailCall) {
    438     bool StructAttrFlag = MF.getFunction().hasStructRetAttr();
    439     CLI.IsTailCall = IsEligibleForTailCallOptimization(Callee, CallConv,
    440                         IsVarArg, IsStructRet, StructAttrFlag, Outs,
    441                         OutVals, Ins, DAG);
    442     for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    443       CCValAssign &VA = ArgLocs[i];
    444       if (VA.isMemLoc()) {
    445         CLI.IsTailCall = false;
    446         break;
    447       }
    448     }
    449     LLVM_DEBUG(dbgs() << (CLI.IsTailCall ? "Eligible for Tail Call\n"
    450                                          : "Argument must be passed on stack. "
    451                                            "Not eligible for Tail Call\n"));
    452   }
    453   // Get a count of how many bytes are to be pushed on the stack.
    454   unsigned NumBytes = CCInfo.getNextStackOffset();
    455   SmallVector<std::pair<unsigned, SDValue>, 16> RegsToPass;
    456   SmallVector<SDValue, 8> MemOpChains;
    457 
    458   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
    459   SDValue StackPtr =
    460       DAG.getCopyFromReg(Chain, dl, HRI.getStackRegister(), PtrVT);
    461 
    462   bool NeedsArgAlign = false;
    463   Align LargestAlignSeen;
    464   // Walk the register/memloc assignments, inserting copies/loads.
    465   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    466     CCValAssign &VA = ArgLocs[i];
    467     SDValue Arg = OutVals[i];
    468     ISD::ArgFlagsTy Flags = Outs[i].Flags;
    469     // Record if we need > 8 byte alignment on an argument.
    470     bool ArgAlign = Subtarget.isHVXVectorType(VA.getValVT());
    471     NeedsArgAlign |= ArgAlign;
    472 
    473     // Promote the value if needed.
    474     switch (VA.getLocInfo()) {
    475       default:
    476         // Loc info must be one of Full, BCvt, SExt, ZExt, or AExt.
    477         llvm_unreachable("Unknown loc info!");
    478       case CCValAssign::Full:
    479         break;
    480       case CCValAssign::BCvt:
    481         Arg = DAG.getBitcast(VA.getLocVT(), Arg);
    482         break;
    483       case CCValAssign::SExt:
    484         Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), Arg);
    485         break;
    486       case CCValAssign::ZExt:
    487         Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), Arg);
    488         break;
    489       case CCValAssign::AExt:
    490         Arg = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), Arg);
    491         break;
    492     }
    493 
    494     if (VA.isMemLoc()) {
    495       unsigned LocMemOffset = VA.getLocMemOffset();
    496       SDValue MemAddr = DAG.getConstant(LocMemOffset, dl,
    497                                         StackPtr.getValueType());
    498       MemAddr = DAG.getNode(ISD::ADD, dl, MVT::i32, StackPtr, MemAddr);
    499       if (ArgAlign)
    500         LargestAlignSeen = std::max(
    501             LargestAlignSeen, Align(VA.getLocVT().getStoreSizeInBits() / 8));
    502       if (Flags.isByVal()) {
    503         // The argument is a struct passed by value. According to LLVM, "Arg"
    504         // is a pointer.
    505         MemOpChains.push_back(CreateCopyOfByValArgument(Arg, MemAddr, Chain,
    506                                                         Flags, DAG, dl));
    507       } else {
    508         MachinePointerInfo LocPI = MachinePointerInfo::getStack(
    509             DAG.getMachineFunction(), LocMemOffset);
    510         SDValue S = DAG.getStore(Chain, dl, Arg, MemAddr, LocPI);
    511         MemOpChains.push_back(S);
    512       }
    513       continue;
    514     }
    515 
    516     // Arguments that can be passed on register must be kept at RegsToPass
    517     // vector.
    518     if (VA.isRegLoc())
    519       RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
    520   }
    521 
    522   if (NeedsArgAlign && Subtarget.hasV60Ops()) {
    523     LLVM_DEBUG(dbgs() << "Function needs byte stack align due to call args\n");
    524     Align VecAlign = HRI.getSpillAlign(Hexagon::HvxVRRegClass);
    525     LargestAlignSeen = std::max(LargestAlignSeen, VecAlign);
    526     MFI.ensureMaxAlignment(LargestAlignSeen);
    527   }
    528   // Transform all store nodes into one single node because all store
    529   // nodes are independent of each other.
    530   if (!MemOpChains.empty())
    531     Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
    532 
    533   SDValue Glue;
    534   if (!CLI.IsTailCall) {
    535     Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
    536     Glue = Chain.getValue(1);
    537   }
    538 
    539   // Build a sequence of copy-to-reg nodes chained together with token
    540   // chain and flag operands which copy the outgoing args into registers.
    541   // The Glue is necessary since all emitted instructions must be
    542   // stuck together.
    543   if (!CLI.IsTailCall) {
    544     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    545       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
    546                                RegsToPass[i].second, Glue);
    547       Glue = Chain.getValue(1);
    548     }
    549   } else {
    550     // For tail calls lower the arguments to the 'real' stack slot.
    551     //
    552     // Force all the incoming stack arguments to be loaded from the stack
    553     // before any new outgoing arguments are stored to the stack, because the
    554     // outgoing stack slots may alias the incoming argument stack slots, and
    555     // the alias isn't otherwise explicit. This is slightly more conservative
    556     // than necessary, because it means that each store effectively depends
    557     // on every argument instead of just those arguments it would clobber.
    558     //
    559     // Do not flag preceding copytoreg stuff together with the following stuff.
    560     Glue = SDValue();
    561     for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    562       Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
    563                                RegsToPass[i].second, Glue);
    564       Glue = Chain.getValue(1);
    565     }
    566     Glue = SDValue();
    567   }
    568 
    569   bool LongCalls = MF.getSubtarget<HexagonSubtarget>().useLongCalls();
    570   unsigned Flags = LongCalls ? HexagonII::HMOTF_ConstExtended : 0;
    571 
    572   // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
    573   // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
    574   // node so that legalize doesn't hack it.
    575   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
    576     Callee = DAG.getTargetGlobalAddress(G->getGlobal(), dl, PtrVT, 0, Flags);
    577   } else if (ExternalSymbolSDNode *S =
    578              dyn_cast<ExternalSymbolSDNode>(Callee)) {
    579     Callee = DAG.getTargetExternalSymbol(S->getSymbol(), PtrVT, Flags);
    580   }
    581 
    582   // Returns a chain & a flag for retval copy to use.
    583   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
    584   SmallVector<SDValue, 8> Ops;
    585   Ops.push_back(Chain);
    586   Ops.push_back(Callee);
    587 
    588   // Add argument registers to the end of the list so that they are
    589   // known live into the call.
    590   for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
    591     Ops.push_back(DAG.getRegister(RegsToPass[i].first,
    592                                   RegsToPass[i].second.getValueType()));
    593   }
    594 
    595   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallConv);
    596   assert(Mask && "Missing call preserved mask for calling convention");
    597   Ops.push_back(DAG.getRegisterMask(Mask));
    598 
    599   if (Glue.getNode())
    600     Ops.push_back(Glue);
    601 
    602   if (CLI.IsTailCall) {
    603     MFI.setHasTailCall();
    604     return DAG.getNode(HexagonISD::TC_RETURN, dl, NodeTys, Ops);
    605   }
    606 
    607   // Set this here because we need to know this for "hasFP" in frame lowering.
    608   // The target-independent code calls getFrameRegister before setting it, and
    609   // getFrameRegister uses hasFP to determine whether the function has FP.
    610   MFI.setHasCalls(true);
    611 
    612   unsigned OpCode = DoesNotReturn ? HexagonISD::CALLnr : HexagonISD::CALL;
    613   Chain = DAG.getNode(OpCode, dl, NodeTys, Ops);
    614   Glue = Chain.getValue(1);
    615 
    616   // Create the CALLSEQ_END node.
    617   Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, dl, true),
    618                              DAG.getIntPtrConstant(0, dl, true), Glue, dl);
    619   Glue = Chain.getValue(1);
    620 
    621   // Handle result values, copying them out of physregs into vregs that we
    622   // return.
    623   return LowerCallResult(Chain, Glue, CallConv, IsVarArg, Ins, dl, DAG,
    624                          InVals, OutVals, Callee);
    625 }
    626 
    627 /// Returns true by value, base pointer and offset pointer and addressing
    628 /// mode by reference if this node can be combined with a load / store to
    629 /// form a post-indexed load / store.
    630 bool HexagonTargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
    631       SDValue &Base, SDValue &Offset, ISD::MemIndexedMode &AM,
    632       SelectionDAG &DAG) const {
    633   LSBaseSDNode *LSN = dyn_cast<LSBaseSDNode>(N);
    634   if (!LSN)
    635     return false;
    636   EVT VT = LSN->getMemoryVT();
    637   if (!VT.isSimple())
    638     return false;
    639   bool IsLegalType = VT == MVT::i8 || VT == MVT::i16 || VT == MVT::i32 ||
    640                      VT == MVT::i64 || VT == MVT::f32 || VT == MVT::f64 ||
    641                      VT == MVT::v2i16 || VT == MVT::v2i32 || VT == MVT::v4i8 ||
    642                      VT == MVT::v4i16 || VT == MVT::v8i8 ||
    643                      Subtarget.isHVXVectorType(VT.getSimpleVT());
    644   if (!IsLegalType)
    645     return false;
    646 
    647   if (Op->getOpcode() != ISD::ADD)
    648     return false;
    649   Base = Op->getOperand(0);
    650   Offset = Op->getOperand(1);
    651   if (!isa<ConstantSDNode>(Offset.getNode()))
    652     return false;
    653   AM = ISD::POST_INC;
    654 
    655   int32_t V = cast<ConstantSDNode>(Offset.getNode())->getSExtValue();
    656   return Subtarget.getInstrInfo()->isValidAutoIncImm(VT, V);
    657 }
    658 
    659 SDValue
    660 HexagonTargetLowering::LowerINLINEASM(SDValue Op, SelectionDAG &DAG) const {
    661   MachineFunction &MF = DAG.getMachineFunction();
    662   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
    663   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
    664   unsigned LR = HRI.getRARegister();
    665 
    666   if ((Op.getOpcode() != ISD::INLINEASM &&
    667        Op.getOpcode() != ISD::INLINEASM_BR) || HMFI.hasClobberLR())
    668     return Op;
    669 
    670   unsigned NumOps = Op.getNumOperands();
    671   if (Op.getOperand(NumOps-1).getValueType() == MVT::Glue)
    672     --NumOps;  // Ignore the flag operand.
    673 
    674   for (unsigned i = InlineAsm::Op_FirstOperand; i != NumOps;) {
    675     unsigned Flags = cast<ConstantSDNode>(Op.getOperand(i))->getZExtValue();
    676     unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags);
    677     ++i;  // Skip the ID value.
    678 
    679     switch (InlineAsm::getKind(Flags)) {
    680       default:
    681         llvm_unreachable("Bad flags!");
    682       case InlineAsm::Kind_RegUse:
    683       case InlineAsm::Kind_Imm:
    684       case InlineAsm::Kind_Mem:
    685         i += NumVals;
    686         break;
    687       case InlineAsm::Kind_Clobber:
    688       case InlineAsm::Kind_RegDef:
    689       case InlineAsm::Kind_RegDefEarlyClobber: {
    690         for (; NumVals; --NumVals, ++i) {
    691           unsigned Reg = cast<RegisterSDNode>(Op.getOperand(i))->getReg();
    692           if (Reg != LR)
    693             continue;
    694           HMFI.setHasClobberLR(true);
    695           return Op;
    696         }
    697         break;
    698       }
    699     }
    700   }
    701 
    702   return Op;
    703 }
    704 
    705 // Need to transform ISD::PREFETCH into something that doesn't inherit
    706 // all of the properties of ISD::PREFETCH, specifically SDNPMayLoad and
    707 // SDNPMayStore.
    708 SDValue HexagonTargetLowering::LowerPREFETCH(SDValue Op,
    709                                              SelectionDAG &DAG) const {
    710   SDValue Chain = Op.getOperand(0);
    711   SDValue Addr = Op.getOperand(1);
    712   // Lower it to DCFETCH($reg, #0).  A "pat" will try to merge the offset in,
    713   // if the "reg" is fed by an "add".
    714   SDLoc DL(Op);
    715   SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
    716   return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
    717 }
    718 
    719 // Custom-handle ISD::READCYCLECOUNTER because the target-independent SDNode
    720 // is marked as having side-effects, while the register read on Hexagon does
    721 // not have any. TableGen refuses to accept the direct pattern from that node
    722 // to the A4_tfrcpp.
    723 SDValue HexagonTargetLowering::LowerREADCYCLECOUNTER(SDValue Op,
    724                                                      SelectionDAG &DAG) const {
    725   SDValue Chain = Op.getOperand(0);
    726   SDLoc dl(Op);
    727   SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other);
    728   return DAG.getNode(HexagonISD::READCYCLE, dl, VTs, Chain);
    729 }
    730 
    731 SDValue HexagonTargetLowering::LowerINTRINSIC_VOID(SDValue Op,
    732       SelectionDAG &DAG) const {
    733   SDValue Chain = Op.getOperand(0);
    734   unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
    735   // Lower the hexagon_prefetch builtin to DCFETCH, as above.
    736   if (IntNo == Intrinsic::hexagon_prefetch) {
    737     SDValue Addr = Op.getOperand(2);
    738     SDLoc DL(Op);
    739     SDValue Zero = DAG.getConstant(0, DL, MVT::i32);
    740     return DAG.getNode(HexagonISD::DCFETCH, DL, MVT::Other, Chain, Addr, Zero);
    741   }
    742   return SDValue();
    743 }
    744 
    745 SDValue
    746 HexagonTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
    747                                                SelectionDAG &DAG) const {
    748   SDValue Chain = Op.getOperand(0);
    749   SDValue Size = Op.getOperand(1);
    750   SDValue Align = Op.getOperand(2);
    751   SDLoc dl(Op);
    752 
    753   ConstantSDNode *AlignConst = dyn_cast<ConstantSDNode>(Align);
    754   assert(AlignConst && "Non-constant Align in LowerDYNAMIC_STACKALLOC");
    755 
    756   unsigned A = AlignConst->getSExtValue();
    757   auto &HFI = *Subtarget.getFrameLowering();
    758   // "Zero" means natural stack alignment.
    759   if (A == 0)
    760     A = HFI.getStackAlign().value();
    761 
    762   LLVM_DEBUG({
    763     dbgs () << __func__ << " Align: " << A << " Size: ";
    764     Size.getNode()->dump(&DAG);
    765     dbgs() << "\n";
    766   });
    767 
    768   SDValue AC = DAG.getConstant(A, dl, MVT::i32);
    769   SDVTList VTs = DAG.getVTList(MVT::i32, MVT::Other);
    770   SDValue AA = DAG.getNode(HexagonISD::ALLOCA, dl, VTs, Chain, Size, AC);
    771 
    772   DAG.ReplaceAllUsesOfValueWith(Op, AA);
    773   return AA;
    774 }
    775 
    776 SDValue HexagonTargetLowering::LowerFormalArguments(
    777     SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
    778     const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
    779     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
    780   MachineFunction &MF = DAG.getMachineFunction();
    781   MachineFrameInfo &MFI = MF.getFrameInfo();
    782   MachineRegisterInfo &MRI = MF.getRegInfo();
    783 
    784   // Linux ABI treats var-arg calls the same way as regular ones.
    785   bool TreatAsVarArg = !Subtarget.isEnvironmentMusl() && IsVarArg;
    786 
    787   // Assign locations to all of the incoming arguments.
    788   SmallVector<CCValAssign, 16> ArgLocs;
    789   HexagonCCState CCInfo(CallConv, TreatAsVarArg, MF, ArgLocs,
    790                         *DAG.getContext(),
    791                         MF.getFunction().getFunctionType()->getNumParams());
    792 
    793   if (Subtarget.useHVXOps())
    794     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_HVX);
    795   else if (DisableArgsMinAlignment)
    796     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon_Legacy);
    797   else
    798     CCInfo.AnalyzeFormalArguments(Ins, CC_Hexagon);
    799 
    800   // For LLVM, in the case when returning a struct by value (>8byte),
    801   // the first argument is a pointer that points to the location on caller's
    802   // stack where the return value will be stored. For Hexagon, the location on
    803   // caller's stack is passed only when the struct size is smaller than (and
    804   // equal to) 8 bytes. If not, no address will be passed into callee and
    805   // callee return the result direclty through R0/R1.
    806   auto NextSingleReg = [] (const TargetRegisterClass &RC, unsigned Reg) {
    807     switch (RC.getID()) {
    808     case Hexagon::IntRegsRegClassID:
    809       return Reg - Hexagon::R0 + 1;
    810     case Hexagon::DoubleRegsRegClassID:
    811       return (Reg - Hexagon::D0 + 1) * 2;
    812     case Hexagon::HvxVRRegClassID:
    813       return Reg - Hexagon::V0 + 1;
    814     case Hexagon::HvxWRRegClassID:
    815       return (Reg - Hexagon::W0 + 1) * 2;
    816     }
    817     llvm_unreachable("Unexpected register class");
    818   };
    819 
    820   auto &HFL = const_cast<HexagonFrameLowering&>(*Subtarget.getFrameLowering());
    821   auto &HMFI = *MF.getInfo<HexagonMachineFunctionInfo>();
    822   HFL.FirstVarArgSavedReg = 0;
    823   HMFI.setFirstNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
    824 
    825   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
    826     CCValAssign &VA = ArgLocs[i];
    827     ISD::ArgFlagsTy Flags = Ins[i].Flags;
    828     bool ByVal = Flags.isByVal();
    829 
    830     // Arguments passed in registers:
    831     // 1. 32- and 64-bit values and HVX vectors are passed directly,
    832     // 2. Large structs are passed via an address, and the address is
    833     //    passed in a register.
    834     if (VA.isRegLoc() && ByVal && Flags.getByValSize() <= 8)
    835       llvm_unreachable("ByValSize must be bigger than 8 bytes");
    836 
    837     bool InReg = VA.isRegLoc() &&
    838                  (!ByVal || (ByVal && Flags.getByValSize() > 8));
    839 
    840     if (InReg) {
    841       MVT RegVT = VA.getLocVT();
    842       if (VA.getLocInfo() == CCValAssign::BCvt)
    843         RegVT = VA.getValVT();
    844 
    845       const TargetRegisterClass *RC = getRegClassFor(RegVT);
    846       Register VReg = MRI.createVirtualRegister(RC);
    847       SDValue Copy = DAG.getCopyFromReg(Chain, dl, VReg, RegVT);
    848 
    849       // Treat values of type MVT::i1 specially: they are passed in
    850       // registers of type i32, but they need to remain as values of
    851       // type i1 for consistency of the argument lowering.
    852       if (VA.getValVT() == MVT::i1) {
    853         assert(RegVT.getSizeInBits() <= 32);
    854         SDValue T = DAG.getNode(ISD::AND, dl, RegVT,
    855                                 Copy, DAG.getConstant(1, dl, RegVT));
    856         Copy = DAG.getSetCC(dl, MVT::i1, T, DAG.getConstant(0, dl, RegVT),
    857                             ISD::SETNE);
    858       } else {
    859 #ifndef NDEBUG
    860         unsigned RegSize = RegVT.getSizeInBits();
    861         assert(RegSize == 32 || RegSize == 64 ||
    862                Subtarget.isHVXVectorType(RegVT));
    863 #endif
    864       }
    865       InVals.push_back(Copy);
    866       MRI.addLiveIn(VA.getLocReg(), VReg);
    867       HFL.FirstVarArgSavedReg = NextSingleReg(*RC, VA.getLocReg());
    868     } else {
    869       assert(VA.isMemLoc() && "Argument should be passed in memory");
    870 
    871       // If it's a byval parameter, then we need to compute the
    872       // "real" size, not the size of the pointer.
    873       unsigned ObjSize = Flags.isByVal()
    874                             ? Flags.getByValSize()
    875                             : VA.getLocVT().getStoreSizeInBits() / 8;
    876 
    877       // Create the frame index object for this incoming parameter.
    878       int Offset = HEXAGON_LRFP_SIZE + VA.getLocMemOffset();
    879       int FI = MFI.CreateFixedObject(ObjSize, Offset, true);
    880       SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
    881 
    882       if (Flags.isByVal()) {
    883         // If it's a pass-by-value aggregate, then do not dereference the stack
    884         // location. Instead, we should generate a reference to the stack
    885         // location.
    886         InVals.push_back(FIN);
    887       } else {
    888         SDValue L = DAG.getLoad(VA.getValVT(), dl, Chain, FIN,
    889                                 MachinePointerInfo::getFixedStack(MF, FI, 0));
    890         InVals.push_back(L);
    891       }
    892     }
    893   }
    894 
    895   if (IsVarArg && Subtarget.isEnvironmentMusl()) {
    896     for (int i = HFL.FirstVarArgSavedReg; i < 6; i++)
    897       MRI.addLiveIn(Hexagon::R0+i);
    898   }
    899 
    900   if (IsVarArg && Subtarget.isEnvironmentMusl()) {
    901     HMFI.setFirstNamedArgFrameIndex(HMFI.getFirstNamedArgFrameIndex() - 1);
    902     HMFI.setLastNamedArgFrameIndex(-int(MFI.getNumFixedObjects()));
    903 
    904     // Create Frame index for the start of register saved area.
    905     int NumVarArgRegs = 6 - HFL.FirstVarArgSavedReg;
    906     bool RequiresPadding = (NumVarArgRegs & 1);
    907     int RegSaveAreaSizePlusPadding = RequiresPadding
    908                                         ? (NumVarArgRegs + 1) * 4
    909                                         : NumVarArgRegs * 4;
    910 
    911     if (RegSaveAreaSizePlusPadding > 0) {
    912       // The offset to saved register area should be 8 byte aligned.
    913       int RegAreaStart = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
    914       if (!(RegAreaStart % 8))
    915         RegAreaStart = (RegAreaStart + 7) & -8;
    916 
    917       int RegSaveAreaFrameIndex =
    918         MFI.CreateFixedObject(RegSaveAreaSizePlusPadding, RegAreaStart, true);
    919       HMFI.setRegSavedAreaStartFrameIndex(RegSaveAreaFrameIndex);
    920 
    921       // This will point to the next argument passed via stack.
    922       int Offset = RegAreaStart + RegSaveAreaSizePlusPadding;
    923       int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
    924       HMFI.setVarArgsFrameIndex(FI);
    925     } else {
    926       // This will point to the next argument passed via stack, when
    927       // there is no saved register area.
    928       int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
    929       int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
    930       HMFI.setRegSavedAreaStartFrameIndex(FI);
    931       HMFI.setVarArgsFrameIndex(FI);
    932     }
    933   }
    934 
    935 
    936   if (IsVarArg && !Subtarget.isEnvironmentMusl()) {
    937     // This will point to the next argument passed via stack.
    938     int Offset = HEXAGON_LRFP_SIZE + CCInfo.getNextStackOffset();
    939     int FI = MFI.CreateFixedObject(Hexagon_PointerSize, Offset, true);
    940     HMFI.setVarArgsFrameIndex(FI);
    941   }
    942 
    943   return Chain;
    944 }
    945 
    946 SDValue
    947 HexagonTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const {
    948   // VASTART stores the address of the VarArgsFrameIndex slot into the
    949   // memory location argument.
    950   MachineFunction &MF = DAG.getMachineFunction();
    951   HexagonMachineFunctionInfo *QFI = MF.getInfo<HexagonMachineFunctionInfo>();
    952   SDValue Addr = DAG.getFrameIndex(QFI->getVarArgsFrameIndex(), MVT::i32);
    953   const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
    954 
    955   if (!Subtarget.isEnvironmentMusl()) {
    956     return DAG.getStore(Op.getOperand(0), SDLoc(Op), Addr, Op.getOperand(1),
    957                         MachinePointerInfo(SV));
    958   }
    959   auto &FuncInfo = *MF.getInfo<HexagonMachineFunctionInfo>();
    960   auto &HFL = *Subtarget.getFrameLowering();
    961   SDLoc DL(Op);
    962   SmallVector<SDValue, 8> MemOps;
    963 
    964   // Get frame index of va_list.
    965   SDValue FIN = Op.getOperand(1);
    966 
    967   // If first Vararg register is odd, add 4 bytes to start of
    968   // saved register area to point to the first register location.
    969   // This is because the saved register area has to be 8 byte aligned.
    970   // Incase of an odd start register, there will be 4 bytes of padding in
    971   // the beginning of saved register area. If all registers area used up,
    972   // the following condition will handle it correctly.
    973   SDValue SavedRegAreaStartFrameIndex =
    974     DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(), MVT::i32);
    975 
    976   auto PtrVT = getPointerTy(DAG.getDataLayout());
    977 
    978   if (HFL.FirstVarArgSavedReg & 1)
    979     SavedRegAreaStartFrameIndex =
    980       DAG.getNode(ISD::ADD, DL, PtrVT,
    981                   DAG.getFrameIndex(FuncInfo.getRegSavedAreaStartFrameIndex(),
    982                                     MVT::i32),
    983                   DAG.getIntPtrConstant(4, DL));
    984 
    985   // Store the saved register area start pointer.
    986   SDValue Store =
    987     DAG.getStore(Op.getOperand(0), DL,
    988                  SavedRegAreaStartFrameIndex,
    989                  FIN, MachinePointerInfo(SV));
    990   MemOps.push_back(Store);
    991 
    992   // Store saved register area end pointer.
    993   FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
    994                     FIN, DAG.getIntPtrConstant(4, DL));
    995   Store = DAG.getStore(Op.getOperand(0), DL,
    996                        DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
    997                                          PtrVT),
    998                        FIN, MachinePointerInfo(SV, 4));
    999   MemOps.push_back(Store);
   1000 
   1001   // Store overflow area pointer.
   1002   FIN = DAG.getNode(ISD::ADD, DL, PtrVT,
   1003                     FIN, DAG.getIntPtrConstant(4, DL));
   1004   Store = DAG.getStore(Op.getOperand(0), DL,
   1005                        DAG.getFrameIndex(FuncInfo.getVarArgsFrameIndex(),
   1006                                          PtrVT),
   1007                        FIN, MachinePointerInfo(SV, 8));
   1008   MemOps.push_back(Store);
   1009 
   1010   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
   1011 }
   1012 
   1013 SDValue
   1014 HexagonTargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const {
   1015   // Assert that the linux ABI is enabled for the current compilation.
   1016   assert(Subtarget.isEnvironmentMusl() && "Linux ABI should be enabled");
   1017   SDValue Chain = Op.getOperand(0);
   1018   SDValue DestPtr = Op.getOperand(1);
   1019   SDValue SrcPtr = Op.getOperand(2);
   1020   const Value *DestSV = cast<SrcValueSDNode>(Op.getOperand(3))->getValue();
   1021   const Value *SrcSV = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
   1022   SDLoc DL(Op);
   1023   // Size of the va_list is 12 bytes as it has 3 pointers. Therefore,
   1024   // we need to memcopy 12 bytes from va_list to another similar list.
   1025   return DAG.getMemcpy(Chain, DL, DestPtr, SrcPtr,
   1026                        DAG.getIntPtrConstant(12, DL), Align(4),
   1027                        /*isVolatile*/ false, false, false,
   1028                        MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV));
   1029 }
   1030 
   1031 SDValue HexagonTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
   1032   const SDLoc &dl(Op);
   1033   SDValue LHS = Op.getOperand(0);
   1034   SDValue RHS = Op.getOperand(1);
   1035   ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
   1036   MVT ResTy = ty(Op);
   1037   MVT OpTy = ty(LHS);
   1038 
   1039   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
   1040     MVT ElemTy = OpTy.getVectorElementType();
   1041     assert(ElemTy.isScalarInteger());
   1042     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
   1043                                   OpTy.getVectorNumElements());
   1044     return DAG.getSetCC(dl, ResTy,
   1045                         DAG.getSExtOrTrunc(LHS, SDLoc(LHS), WideTy),
   1046                         DAG.getSExtOrTrunc(RHS, SDLoc(RHS), WideTy), CC);
   1047   }
   1048 
   1049   // Treat all other vector types as legal.
   1050   if (ResTy.isVector())
   1051     return Op;
   1052 
   1053   // Comparisons of short integers should use sign-extend, not zero-extend,
   1054   // since we can represent small negative values in the compare instructions.
   1055   // The LLVM default is to use zero-extend arbitrarily in these cases.
   1056   auto isSExtFree = [this](SDValue N) {
   1057     switch (N.getOpcode()) {
   1058       case ISD::TRUNCATE: {
   1059         // A sign-extend of a truncate of a sign-extend is free.
   1060         SDValue Op = N.getOperand(0);
   1061         if (Op.getOpcode() != ISD::AssertSext)
   1062           return false;
   1063         EVT OrigTy = cast<VTSDNode>(Op.getOperand(1))->getVT();
   1064         unsigned ThisBW = ty(N).getSizeInBits();
   1065         unsigned OrigBW = OrigTy.getSizeInBits();
   1066         // The type that was sign-extended to get the AssertSext must be
   1067         // narrower than the type of N (so that N has still the same value
   1068         // as the original).
   1069         return ThisBW >= OrigBW;
   1070       }
   1071       case ISD::LOAD:
   1072         // We have sign-extended loads.
   1073         return true;
   1074     }
   1075     return false;
   1076   };
   1077 
   1078   if (OpTy == MVT::i8 || OpTy == MVT::i16) {
   1079     ConstantSDNode *C = dyn_cast<ConstantSDNode>(RHS);
   1080     bool IsNegative = C && C->getAPIntValue().isNegative();
   1081     if (IsNegative || isSExtFree(LHS) || isSExtFree(RHS))
   1082       return DAG.getSetCC(dl, ResTy,
   1083                           DAG.getSExtOrTrunc(LHS, SDLoc(LHS), MVT::i32),
   1084                           DAG.getSExtOrTrunc(RHS, SDLoc(RHS), MVT::i32), CC);
   1085   }
   1086 
   1087   return SDValue();
   1088 }
   1089 
   1090 SDValue
   1091 HexagonTargetLowering::LowerVSELECT(SDValue Op, SelectionDAG &DAG) const {
   1092   SDValue PredOp = Op.getOperand(0);
   1093   SDValue Op1 = Op.getOperand(1), Op2 = Op.getOperand(2);
   1094   MVT OpTy = ty(Op1);
   1095   const SDLoc &dl(Op);
   1096 
   1097   if (OpTy == MVT::v2i16 || OpTy == MVT::v4i8) {
   1098     MVT ElemTy = OpTy.getVectorElementType();
   1099     assert(ElemTy.isScalarInteger());
   1100     MVT WideTy = MVT::getVectorVT(MVT::getIntegerVT(2*ElemTy.getSizeInBits()),
   1101                                   OpTy.getVectorNumElements());
   1102     // Generate (trunc (select (_, sext, sext))).
   1103     return DAG.getSExtOrTrunc(
   1104               DAG.getSelect(dl, WideTy, PredOp,
   1105                             DAG.getSExtOrTrunc(Op1, dl, WideTy),
   1106                             DAG.getSExtOrTrunc(Op2, dl, WideTy)),
   1107               dl, OpTy);
   1108   }
   1109 
   1110   return SDValue();
   1111 }
   1112 
   1113 SDValue
   1114 HexagonTargetLowering::LowerConstantPool(SDValue Op, SelectionDAG &DAG) const {
   1115   EVT ValTy = Op.getValueType();
   1116   ConstantPoolSDNode *CPN = cast<ConstantPoolSDNode>(Op);
   1117   Constant *CVal = nullptr;
   1118   bool isVTi1Type = false;
   1119   if (auto *CV = dyn_cast<ConstantVector>(CPN->getConstVal())) {
   1120     if (cast<VectorType>(CV->getType())->getElementType()->isIntegerTy(1)) {
   1121       IRBuilder<> IRB(CV->getContext());
   1122       SmallVector<Constant*, 128> NewConst;
   1123       unsigned VecLen = CV->getNumOperands();
   1124       assert(isPowerOf2_32(VecLen) &&
   1125              "conversion only supported for pow2 VectorSize");
   1126       for (unsigned i = 0; i < VecLen; ++i)
   1127         NewConst.push_back(IRB.getInt8(CV->getOperand(i)->isZeroValue()));
   1128 
   1129       CVal = ConstantVector::get(NewConst);
   1130       isVTi1Type = true;
   1131     }
   1132   }
   1133   Align Alignment = CPN->getAlign();
   1134   bool IsPositionIndependent = isPositionIndependent();
   1135   unsigned char TF = IsPositionIndependent ? HexagonII::MO_PCREL : 0;
   1136 
   1137   unsigned Offset = 0;
   1138   SDValue T;
   1139   if (CPN->isMachineConstantPoolEntry())
   1140     T = DAG.getTargetConstantPool(CPN->getMachineCPVal(), ValTy, Alignment,
   1141                                   Offset, TF);
   1142   else if (isVTi1Type)
   1143     T = DAG.getTargetConstantPool(CVal, ValTy, Alignment, Offset, TF);
   1144   else
   1145     T = DAG.getTargetConstantPool(CPN->getConstVal(), ValTy, Alignment, Offset,
   1146                                   TF);
   1147 
   1148   assert(cast<ConstantPoolSDNode>(T)->getTargetFlags() == TF &&
   1149          "Inconsistent target flag encountered");
   1150 
   1151   if (IsPositionIndependent)
   1152     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), ValTy, T);
   1153   return DAG.getNode(HexagonISD::CP, SDLoc(Op), ValTy, T);
   1154 }
   1155 
   1156 SDValue
   1157 HexagonTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
   1158   EVT VT = Op.getValueType();
   1159   int Idx = cast<JumpTableSDNode>(Op)->getIndex();
   1160   if (isPositionIndependent()) {
   1161     SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
   1162     return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), VT, T);
   1163   }
   1164 
   1165   SDValue T = DAG.getTargetJumpTable(Idx, VT);
   1166   return DAG.getNode(HexagonISD::JT, SDLoc(Op), VT, T);
   1167 }
   1168 
   1169 SDValue
   1170 HexagonTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const {
   1171   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
   1172   MachineFunction &MF = DAG.getMachineFunction();
   1173   MachineFrameInfo &MFI = MF.getFrameInfo();
   1174   MFI.setReturnAddressIsTaken(true);
   1175 
   1176   if (verifyReturnAddressArgumentIsConstant(Op, DAG))
   1177     return SDValue();
   1178 
   1179   EVT VT = Op.getValueType();
   1180   SDLoc dl(Op);
   1181   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   1182   if (Depth) {
   1183     SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
   1184     SDValue Offset = DAG.getConstant(4, dl, MVT::i32);
   1185     return DAG.getLoad(VT, dl, DAG.getEntryNode(),
   1186                        DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset),
   1187                        MachinePointerInfo());
   1188   }
   1189 
   1190   // Return LR, which contains the return address. Mark it an implicit live-in.
   1191   unsigned Reg = MF.addLiveIn(HRI.getRARegister(), getRegClassFor(MVT::i32));
   1192   return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, VT);
   1193 }
   1194 
   1195 SDValue
   1196 HexagonTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const {
   1197   const HexagonRegisterInfo &HRI = *Subtarget.getRegisterInfo();
   1198   MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
   1199   MFI.setFrameAddressIsTaken(true);
   1200 
   1201   EVT VT = Op.getValueType();
   1202   SDLoc dl(Op);
   1203   unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
   1204   SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
   1205                                          HRI.getFrameRegister(), VT);
   1206   while (Depth--)
   1207     FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr,
   1208                             MachinePointerInfo());
   1209   return FrameAddr;
   1210 }
   1211 
   1212 SDValue
   1213 HexagonTargetLowering::LowerATOMIC_FENCE(SDValue Op, SelectionDAG& DAG) const {
   1214   SDLoc dl(Op);
   1215   return DAG.getNode(HexagonISD::BARRIER, dl, MVT::Other, Op.getOperand(0));
   1216 }
   1217 
   1218 SDValue
   1219 HexagonTargetLowering::LowerGLOBALADDRESS(SDValue Op, SelectionDAG &DAG) const {
   1220   SDLoc dl(Op);
   1221   auto *GAN = cast<GlobalAddressSDNode>(Op);
   1222   auto PtrVT = getPointerTy(DAG.getDataLayout());
   1223   auto *GV = GAN->getGlobal();
   1224   int64_t Offset = GAN->getOffset();
   1225 
   1226   auto &HLOF = *HTM.getObjFileLowering();
   1227   Reloc::Model RM = HTM.getRelocationModel();
   1228 
   1229   if (RM == Reloc::Static) {
   1230     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset);
   1231     const GlobalObject *GO = GV->getBaseObject();
   1232     if (GO && Subtarget.useSmallData() && HLOF.isGlobalInSmallSection(GO, HTM))
   1233       return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, GA);
   1234     return DAG.getNode(HexagonISD::CONST32, dl, PtrVT, GA);
   1235   }
   1236 
   1237   bool UsePCRel = getTargetMachine().shouldAssumeDSOLocal(*GV->getParent(), GV);
   1238   if (UsePCRel) {
   1239     SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, Offset,
   1240                                             HexagonII::MO_PCREL);
   1241     return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, GA);
   1242   }
   1243 
   1244   // Use GOT index.
   1245   SDValue GOT = DAG.getGLOBAL_OFFSET_TABLE(PtrVT);
   1246   SDValue GA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, HexagonII::MO_GOT);
   1247   SDValue Off = DAG.getConstant(Offset, dl, MVT::i32);
   1248   return DAG.getNode(HexagonISD::AT_GOT, dl, PtrVT, GOT, GA, Off);
   1249 }
   1250 
   1251 // Specifies that for loads and stores VT can be promoted to PromotedLdStVT.
   1252 SDValue
   1253 HexagonTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const {
   1254   const BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
   1255   SDLoc dl(Op);
   1256   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   1257 
   1258   Reloc::Model RM = HTM.getRelocationModel();
   1259   if (RM == Reloc::Static) {
   1260     SDValue A = DAG.getTargetBlockAddress(BA, PtrVT);
   1261     return DAG.getNode(HexagonISD::CONST32_GP, dl, PtrVT, A);
   1262   }
   1263 
   1264   SDValue A = DAG.getTargetBlockAddress(BA, PtrVT, 0, HexagonII::MO_PCREL);
   1265   return DAG.getNode(HexagonISD::AT_PCREL, dl, PtrVT, A);
   1266 }
   1267 
   1268 SDValue
   1269 HexagonTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG)
   1270       const {
   1271   EVT PtrVT = getPointerTy(DAG.getDataLayout());
   1272   SDValue GOTSym = DAG.getTargetExternalSymbol(HEXAGON_GOT_SYM_NAME, PtrVT,
   1273                                                HexagonII::MO_PCREL);
   1274   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Op), PtrVT, GOTSym);
   1275 }
   1276 
   1277 SDValue
   1278 HexagonTargetLowering::GetDynamicTLSAddr(SelectionDAG &DAG, SDValue Chain,
   1279       GlobalAddressSDNode *GA, SDValue Glue, EVT PtrVT, unsigned ReturnReg,
   1280       unsigned char OperandFlags) const {
   1281   MachineFunction &MF = DAG.getMachineFunction();
   1282   MachineFrameInfo &MFI = MF.getFrameInfo();
   1283   SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
   1284   SDLoc dl(GA);
   1285   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
   1286                                            GA->getValueType(0),
   1287                                            GA->getOffset(),
   1288                                            OperandFlags);
   1289   // Create Operands for the call.The Operands should have the following:
   1290   // 1. Chain SDValue
   1291   // 2. Callee which in this case is the Global address value.
   1292   // 3. Registers live into the call.In this case its R0, as we
   1293   //    have just one argument to be passed.
   1294   // 4. Glue.
   1295   // Note: The order is important.
   1296 
   1297   const auto &HRI = *Subtarget.getRegisterInfo();
   1298   const uint32_t *Mask = HRI.getCallPreservedMask(MF, CallingConv::C);
   1299   assert(Mask && "Missing call preserved mask for calling convention");
   1300   SDValue Ops[] = { Chain, TGA, DAG.getRegister(Hexagon::R0, PtrVT),
   1301                     DAG.getRegisterMask(Mask), Glue };
   1302   Chain = DAG.getNode(HexagonISD::CALL, dl, NodeTys, Ops);
   1303 
   1304   // Inform MFI that function has calls.
   1305   MFI.setAdjustsStack(true);
   1306 
   1307   Glue = Chain.getValue(1);
   1308   return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Glue);
   1309 }
   1310 
   1311 //
   1312 // Lower using the intial executable model for TLS addresses
   1313 //
   1314 SDValue
   1315 HexagonTargetLowering::LowerToTLSInitialExecModel(GlobalAddressSDNode *GA,
   1316       SelectionDAG &DAG) const {
   1317   SDLoc dl(GA);
   1318   int64_t Offset = GA->getOffset();
   1319   auto PtrVT = getPointerTy(DAG.getDataLayout());
   1320 
   1321   // Get the thread pointer.
   1322   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
   1323 
   1324   bool IsPositionIndependent = isPositionIndependent();
   1325   unsigned char TF =
   1326       IsPositionIndependent ? HexagonII::MO_IEGOT : HexagonII::MO_IE;
   1327 
   1328   // First generate the TLS symbol address
   1329   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT,
   1330                                            Offset, TF);
   1331 
   1332   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
   1333 
   1334   if (IsPositionIndependent) {
   1335     // Generate the GOT pointer in case of position independent code
   1336     SDValue GOT = LowerGLOBAL_OFFSET_TABLE(Sym, DAG);
   1337 
   1338     // Add the TLS Symbol address to GOT pointer.This gives
   1339     // GOT relative relocation for the symbol.
   1340     Sym = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
   1341   }
   1342 
   1343   // Load the offset value for TLS symbol.This offset is relative to
   1344   // thread pointer.
   1345   SDValue LoadOffset =
   1346       DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), Sym, MachinePointerInfo());
   1347 
   1348   // Address of the thread local variable is the add of thread
   1349   // pointer and the offset of the variable.
   1350   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, LoadOffset);
   1351 }
   1352 
   1353 //
   1354 // Lower using the local executable model for TLS addresses
   1355 //
   1356 SDValue
   1357 HexagonTargetLowering::LowerToTLSLocalExecModel(GlobalAddressSDNode *GA,
   1358       SelectionDAG &DAG) const {
   1359   SDLoc dl(GA);
   1360   int64_t Offset = GA->getOffset();
   1361   auto PtrVT = getPointerTy(DAG.getDataLayout());
   1362 
   1363   // Get the thread pointer.
   1364   SDValue TP = DAG.getCopyFromReg(DAG.getEntryNode(), dl, Hexagon::UGP, PtrVT);
   1365   // Generate the TLS symbol address
   1366   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
   1367                                            HexagonII::MO_TPREL);
   1368   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
   1369 
   1370   // Address of the thread local variable is the add of thread
   1371   // pointer and the offset of the variable.
   1372   return DAG.getNode(ISD::ADD, dl, PtrVT, TP, Sym);
   1373 }
   1374 
   1375 //
   1376 // Lower using the general dynamic model for TLS addresses
   1377 //
   1378 SDValue
   1379 HexagonTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
   1380       SelectionDAG &DAG) const {
   1381   SDLoc dl(GA);
   1382   int64_t Offset = GA->getOffset();
   1383   auto PtrVT = getPointerTy(DAG.getDataLayout());
   1384 
   1385   // First generate the TLS symbol address
   1386   SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl, PtrVT, Offset,
   1387                                            HexagonII::MO_GDGOT);
   1388 
   1389   // Then, generate the GOT pointer
   1390   SDValue GOT = LowerGLOBAL_OFFSET_TABLE(TGA, DAG);
   1391 
   1392   // Add the TLS symbol and the GOT pointer
   1393   SDValue Sym = DAG.getNode(HexagonISD::CONST32, dl, PtrVT, TGA);
   1394   SDValue Chain = DAG.getNode(ISD::ADD, dl, PtrVT, GOT, Sym);
   1395 
   1396   // Copy over the argument to R0
   1397   SDValue InFlag;
   1398   Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, Hexagon::R0, Chain, InFlag);
   1399   InFlag = Chain.getValue(1);
   1400 
   1401   unsigned Flags =
   1402       static_cast<const HexagonSubtarget &>(DAG.getSubtarget()).useLongCalls()
   1403           ? HexagonII::MO_GDPLT | HexagonII::HMOTF_ConstExtended
   1404           : HexagonII::MO_GDPLT;
   1405 
   1406   return GetDynamicTLSAddr(DAG, Chain, GA, InFlag, PtrVT,
   1407                            Hexagon::R0, Flags);
   1408 }
   1409 
   1410 //
   1411 // Lower TLS addresses.
   1412 //
   1413 // For now for dynamic models, we only support the general dynamic model.
   1414 //
   1415 SDValue
   1416 HexagonTargetLowering::LowerGlobalTLSAddress(SDValue Op,
   1417       SelectionDAG &DAG) const {
   1418   GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(Op);
   1419 
   1420   switch (HTM.getTLSModel(GA->getGlobal())) {
   1421     case TLSModel::GeneralDynamic:
   1422     case TLSModel::LocalDynamic:
   1423       return LowerToTLSGeneralDynamicModel(GA, DAG);
   1424     case TLSModel::InitialExec:
   1425       return LowerToTLSInitialExecModel(GA, DAG);
   1426     case TLSModel::LocalExec:
   1427       return LowerToTLSLocalExecModel(GA, DAG);
   1428   }
   1429   llvm_unreachable("Bogus TLS model");
   1430 }
   1431 
   1432 //===----------------------------------------------------------------------===//
   1433 // TargetLowering Implementation
   1434 //===----------------------------------------------------------------------===//
   1435 
   1436 HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM,
   1437                                              const HexagonSubtarget &ST)
   1438     : TargetLowering(TM), HTM(static_cast<const HexagonTargetMachine&>(TM)),
   1439       Subtarget(ST) {
   1440   auto &HRI = *Subtarget.getRegisterInfo();
   1441 
   1442   setPrefLoopAlignment(Align(16));
   1443   setMinFunctionAlignment(Align(4));
   1444   setPrefFunctionAlignment(Align(16));
   1445   setStackPointerRegisterToSaveRestore(HRI.getStackRegister());
   1446   setBooleanContents(TargetLoweringBase::UndefinedBooleanContent);
   1447   setBooleanVectorContents(TargetLoweringBase::UndefinedBooleanContent);
   1448 
   1449   setMaxAtomicSizeInBitsSupported(64);
   1450   setMinCmpXchgSizeInBits(32);
   1451 
   1452   if (EnableHexSDNodeSched)
   1453     setSchedulingPreference(Sched::VLIW);
   1454   else
   1455     setSchedulingPreference(Sched::Source);
   1456 
   1457   // Limits for inline expansion of memcpy/memmove
   1458   MaxStoresPerMemcpy = MaxStoresPerMemcpyCL;
   1459   MaxStoresPerMemcpyOptSize = MaxStoresPerMemcpyOptSizeCL;
   1460   MaxStoresPerMemmove = MaxStoresPerMemmoveCL;
   1461   MaxStoresPerMemmoveOptSize = MaxStoresPerMemmoveOptSizeCL;
   1462   MaxStoresPerMemset = MaxStoresPerMemsetCL;
   1463   MaxStoresPerMemsetOptSize = MaxStoresPerMemsetOptSizeCL;
   1464 
   1465   //
   1466   // Set up register classes.
   1467   //
   1468 
   1469   addRegisterClass(MVT::i1,    &Hexagon::PredRegsRegClass);
   1470   addRegisterClass(MVT::v2i1,  &Hexagon::PredRegsRegClass);  // bbbbaaaa
   1471   addRegisterClass(MVT::v4i1,  &Hexagon::PredRegsRegClass);  // ddccbbaa
   1472   addRegisterClass(MVT::v8i1,  &Hexagon::PredRegsRegClass);  // hgfedcba
   1473   addRegisterClass(MVT::i32,   &Hexagon::IntRegsRegClass);
   1474   addRegisterClass(MVT::v2i16, &Hexagon::IntRegsRegClass);
   1475   addRegisterClass(MVT::v4i8,  &Hexagon::IntRegsRegClass);
   1476   addRegisterClass(MVT::i64,   &Hexagon::DoubleRegsRegClass);
   1477   addRegisterClass(MVT::v8i8,  &Hexagon::DoubleRegsRegClass);
   1478   addRegisterClass(MVT::v4i16, &Hexagon::DoubleRegsRegClass);
   1479   addRegisterClass(MVT::v2i32, &Hexagon::DoubleRegsRegClass);
   1480 
   1481   addRegisterClass(MVT::f32, &Hexagon::IntRegsRegClass);
   1482   addRegisterClass(MVT::f64, &Hexagon::DoubleRegsRegClass);
   1483 
   1484   //
   1485   // Handling of scalar operations.
   1486   //
   1487   // All operations default to "legal", except:
   1488   // - indexed loads and stores (pre-/post-incremented),
   1489   // - ANY_EXTEND_VECTOR_INREG, ATOMIC_CMP_SWAP_WITH_SUCCESS, CONCAT_VECTORS,
   1490   //   ConstantFP, DEBUGTRAP, FCEIL, FCOPYSIGN, FEXP, FEXP2, FFLOOR, FGETSIGN,
   1491   //   FLOG, FLOG2, FLOG10, FMAXNUM, FMINNUM, FNEARBYINT, FRINT, FROUND, TRAP,
   1492   //   FTRUNC, PREFETCH, SIGN_EXTEND_VECTOR_INREG, ZERO_EXTEND_VECTOR_INREG,
   1493   // which default to "expand" for at least one type.
   1494 
   1495   // Misc operations.
   1496   setOperationAction(ISD::ConstantFP,           MVT::f32,   Legal);
   1497   setOperationAction(ISD::ConstantFP,           MVT::f64,   Legal);
   1498   setOperationAction(ISD::TRAP,                 MVT::Other, Legal);
   1499   setOperationAction(ISD::ConstantPool,         MVT::i32,   Custom);
   1500   setOperationAction(ISD::JumpTable,            MVT::i32,   Custom);
   1501   setOperationAction(ISD::BUILD_PAIR,           MVT::i64,   Expand);
   1502   setOperationAction(ISD::SIGN_EXTEND_INREG,    MVT::i1,    Expand);
   1503   setOperationAction(ISD::INLINEASM,            MVT::Other, Custom);
   1504   setOperationAction(ISD::INLINEASM_BR,         MVT::Other, Custom);
   1505   setOperationAction(ISD::PREFETCH,             MVT::Other, Custom);
   1506   setOperationAction(ISD::READCYCLECOUNTER,     MVT::i64,   Custom);
   1507   setOperationAction(ISD::INTRINSIC_VOID,       MVT::Other, Custom);
   1508   setOperationAction(ISD::EH_RETURN,            MVT::Other, Custom);
   1509   setOperationAction(ISD::GLOBAL_OFFSET_TABLE,  MVT::i32,   Custom);
   1510   setOperationAction(ISD::GlobalTLSAddress,     MVT::i32,   Custom);
   1511   setOperationAction(ISD::ATOMIC_FENCE,         MVT::Other, Custom);
   1512 
   1513   // Custom legalize GlobalAddress nodes into CONST32.
   1514   setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
   1515   setOperationAction(ISD::GlobalAddress, MVT::i8,  Custom);
   1516   setOperationAction(ISD::BlockAddress,  MVT::i32, Custom);
   1517 
   1518   // Hexagon needs to optimize cases with negative constants.
   1519   setOperationAction(ISD::SETCC, MVT::i8,    Custom);
   1520   setOperationAction(ISD::SETCC, MVT::i16,   Custom);
   1521   setOperationAction(ISD::SETCC, MVT::v4i8,  Custom);
   1522   setOperationAction(ISD::SETCC, MVT::v2i16, Custom);
   1523 
   1524   // VASTART needs to be custom lowered to use the VarArgsFrameIndex.
   1525   setOperationAction(ISD::VASTART, MVT::Other, Custom);
   1526   setOperationAction(ISD::VAEND,   MVT::Other, Expand);
   1527   setOperationAction(ISD::VAARG,   MVT::Other, Expand);
   1528   if (Subtarget.isEnvironmentMusl())
   1529     setOperationAction(ISD::VACOPY, MVT::Other, Custom);
   1530   else
   1531     setOperationAction(ISD::VACOPY,  MVT::Other, Expand);
   1532 
   1533   setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
   1534   setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
   1535   setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
   1536 
   1537   if (EmitJumpTables)
   1538     setMinimumJumpTableEntries(MinimumJumpTables);
   1539   else
   1540     setMinimumJumpTableEntries(std::numeric_limits<unsigned>::max());
   1541   setOperationAction(ISD::BR_JT, MVT::Other, Expand);
   1542 
   1543   for (unsigned LegalIntOp :
   1544        {ISD::ABS, ISD::SMIN, ISD::SMAX, ISD::UMIN, ISD::UMAX}) {
   1545     setOperationAction(LegalIntOp, MVT::i32, Legal);
   1546     setOperationAction(LegalIntOp, MVT::i64, Legal);
   1547   }
   1548 
   1549   // Hexagon has A4_addp_c and A4_subp_c that take and generate a carry bit,
   1550   // but they only operate on i64.
   1551   for (MVT VT : MVT::integer_valuetypes()) {
   1552     setOperationAction(ISD::UADDO,    VT, Custom);
   1553     setOperationAction(ISD::USUBO,    VT, Custom);
   1554     setOperationAction(ISD::SADDO,    VT, Expand);
   1555     setOperationAction(ISD::SSUBO,    VT, Expand);
   1556     setOperationAction(ISD::ADDCARRY, VT, Expand);
   1557     setOperationAction(ISD::SUBCARRY, VT, Expand);
   1558   }
   1559   setOperationAction(ISD::ADDCARRY, MVT::i64, Custom);
   1560   setOperationAction(ISD::SUBCARRY, MVT::i64, Custom);
   1561 
   1562   setOperationAction(ISD::CTLZ, MVT::i8,  Promote);
   1563   setOperationAction(ISD::CTLZ, MVT::i16, Promote);
   1564   setOperationAction(ISD::CTTZ, MVT::i8,  Promote);
   1565   setOperationAction(ISD::CTTZ, MVT::i16, Promote);
   1566 
   1567   // Popcount can count # of 1s in i64 but returns i32.
   1568   setOperationAction(ISD::CTPOP, MVT::i8,  Promote);
   1569   setOperationAction(ISD::CTPOP, MVT::i16, Promote);
   1570   setOperationAction(ISD::CTPOP, MVT::i32, Promote);
   1571   setOperationAction(ISD::CTPOP, MVT::i64, Legal);
   1572 
   1573   setOperationAction(ISD::BITREVERSE, MVT::i32, Legal);
   1574   setOperationAction(ISD::BITREVERSE, MVT::i64, Legal);
   1575   setOperationAction(ISD::BSWAP, MVT::i32, Legal);
   1576   setOperationAction(ISD::BSWAP, MVT::i64, Legal);
   1577 
   1578   setOperationAction(ISD::FSHL, MVT::i32, Legal);
   1579   setOperationAction(ISD::FSHL, MVT::i64, Legal);
   1580   setOperationAction(ISD::FSHR, MVT::i32, Legal);
   1581   setOperationAction(ISD::FSHR, MVT::i64, Legal);
   1582 
   1583   for (unsigned IntExpOp :
   1584        {ISD::SDIV,      ISD::UDIV,      ISD::SREM,      ISD::UREM,
   1585         ISD::SDIVREM,   ISD::UDIVREM,   ISD::ROTL,      ISD::ROTR,
   1586         ISD::SHL_PARTS, ISD::SRA_PARTS, ISD::SRL_PARTS,
   1587         ISD::SMUL_LOHI, ISD::UMUL_LOHI}) {
   1588     for (MVT VT : MVT::integer_valuetypes())
   1589       setOperationAction(IntExpOp, VT, Expand);
   1590   }
   1591 
   1592   for (unsigned FPExpOp :
   1593        {ISD::FDIV, ISD::FREM, ISD::FSQRT, ISD::FSIN, ISD::FCOS, ISD::FSINCOS,
   1594         ISD::FPOW, ISD::FCOPYSIGN}) {
   1595     for (MVT VT : MVT::fp_valuetypes())
   1596       setOperationAction(FPExpOp, VT, Expand);
   1597   }
   1598 
   1599   // No extending loads from i32.
   1600   for (MVT VT : MVT::integer_valuetypes()) {
   1601     setLoadExtAction(ISD::ZEXTLOAD, VT, MVT::i32, Expand);
   1602     setLoadExtAction(ISD::SEXTLOAD, VT, MVT::i32, Expand);
   1603     setLoadExtAction(ISD::EXTLOAD,  VT, MVT::i32, Expand);
   1604   }
   1605   // Turn FP truncstore into trunc + store.
   1606   setTruncStoreAction(MVT::f64, MVT::f32, Expand);
   1607   // Turn FP extload into load/fpextend.
   1608   for (MVT VT : MVT::fp_valuetypes())
   1609     setLoadExtAction(ISD::EXTLOAD, VT, MVT::f32, Expand);
   1610 
   1611   // Expand BR_CC and SELECT_CC for all integer and fp types.
   1612   for (MVT VT : MVT::integer_valuetypes()) {
   1613     setOperationAction(ISD::BR_CC,     VT, Expand);
   1614     setOperationAction(ISD::SELECT_CC, VT, Expand);
   1615   }
   1616   for (MVT VT : MVT::fp_valuetypes()) {
   1617     setOperationAction(ISD::BR_CC,     VT, Expand);
   1618     setOperationAction(ISD::SELECT_CC, VT, Expand);
   1619   }
   1620   setOperationAction(ISD::BR_CC, MVT::Other, Expand);
   1621 
   1622   //
   1623   // Handling of vector operations.
   1624   //
   1625 
   1626   // Set the action for vector operations to "expand", then override it with
   1627   // either "custom" or "legal" for specific cases.
   1628   static const unsigned VectExpOps[] = {
   1629     // Integer arithmetic:
   1630     ISD::ADD,     ISD::SUB,     ISD::MUL,     ISD::SDIV,      ISD::UDIV,
   1631     ISD::SREM,    ISD::UREM,    ISD::SDIVREM, ISD::UDIVREM,   ISD::SADDO,
   1632     ISD::UADDO,   ISD::SSUBO,   ISD::USUBO,   ISD::SMUL_LOHI, ISD::UMUL_LOHI,
   1633     // Logical/bit:
   1634     ISD::AND,     ISD::OR,      ISD::XOR,     ISD::ROTL,    ISD::ROTR,
   1635     ISD::CTPOP,   ISD::CTLZ,    ISD::CTTZ,
   1636     // Floating point arithmetic/math functions:
   1637     ISD::FADD,    ISD::FSUB,    ISD::FMUL,    ISD::FMA,     ISD::FDIV,
   1638     ISD::FREM,    ISD::FNEG,    ISD::FABS,    ISD::FSQRT,   ISD::FSIN,
   1639     ISD::FCOS,    ISD::FPOW,    ISD::FLOG,    ISD::FLOG2,
   1640     ISD::FLOG10,  ISD::FEXP,    ISD::FEXP2,   ISD::FCEIL,   ISD::FTRUNC,
   1641     ISD::FRINT,   ISD::FNEARBYINT,            ISD::FROUND,  ISD::FFLOOR,
   1642     ISD::FMINNUM, ISD::FMAXNUM, ISD::FSINCOS,
   1643     // Misc:
   1644     ISD::BR_CC,   ISD::SELECT_CC,             ISD::ConstantPool,
   1645     // Vector:
   1646     ISD::BUILD_VECTOR,          ISD::SCALAR_TO_VECTOR,
   1647     ISD::EXTRACT_VECTOR_ELT,    ISD::INSERT_VECTOR_ELT,
   1648     ISD::EXTRACT_SUBVECTOR,     ISD::INSERT_SUBVECTOR,
   1649     ISD::CONCAT_VECTORS,        ISD::VECTOR_SHUFFLE,
   1650     ISD::SPLAT_VECTOR,
   1651   };
   1652 
   1653   for (MVT VT : MVT::fixedlen_vector_valuetypes()) {
   1654     for (unsigned VectExpOp : VectExpOps)
   1655       setOperationAction(VectExpOp, VT, Expand);
   1656 
   1657     // Expand all extending loads and truncating stores:
   1658     for (MVT TargetVT : MVT::fixedlen_vector_valuetypes()) {
   1659       if (TargetVT == VT)
   1660         continue;
   1661       setLoadExtAction(ISD::EXTLOAD, TargetVT, VT, Expand);
   1662       setLoadExtAction(ISD::ZEXTLOAD, TargetVT, VT, Expand);
   1663       setLoadExtAction(ISD::SEXTLOAD, TargetVT, VT, Expand);
   1664       setTruncStoreAction(VT, TargetVT, Expand);
   1665     }
   1666 
   1667     // Normalize all inputs to SELECT to be vectors of i32.
   1668     if (VT.getVectorElementType() != MVT::i32) {
   1669       MVT VT32 = MVT::getVectorVT(MVT::i32, VT.getSizeInBits()/32);
   1670       setOperationAction(ISD::SELECT, VT, Promote);
   1671       AddPromotedToType(ISD::SELECT, VT, VT32);
   1672     }
   1673     setOperationAction(ISD::SRA, VT, Custom);
   1674     setOperationAction(ISD::SHL, VT, Custom);
   1675     setOperationAction(ISD::SRL, VT, Custom);
   1676   }
   1677 
   1678   // Extending loads from (native) vectors of i8 into (native) vectors of i16
   1679   // are legal.
   1680   setLoadExtAction(ISD::EXTLOAD,  MVT::v2i16, MVT::v2i8, Legal);
   1681   setLoadExtAction(ISD::ZEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
   1682   setLoadExtAction(ISD::SEXTLOAD, MVT::v2i16, MVT::v2i8, Legal);
   1683   setLoadExtAction(ISD::EXTLOAD,  MVT::v4i16, MVT::v4i8, Legal);
   1684   setLoadExtAction(ISD::ZEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
   1685   setLoadExtAction(ISD::SEXTLOAD, MVT::v4i16, MVT::v4i8, Legal);
   1686 
   1687   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i8,  Legal);
   1688   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i16, Legal);
   1689   setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i32, Legal);
   1690 
   1691   // Types natively supported:
   1692   for (MVT NativeVT : {MVT::v8i1, MVT::v4i1, MVT::v2i1, MVT::v4i8,
   1693                        MVT::v8i8, MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
   1694     setOperationAction(ISD::BUILD_VECTOR,       NativeVT, Custom);
   1695     setOperationAction(ISD::EXTRACT_VECTOR_ELT, NativeVT, Custom);
   1696     setOperationAction(ISD::INSERT_VECTOR_ELT,  NativeVT, Custom);
   1697     setOperationAction(ISD::EXTRACT_SUBVECTOR,  NativeVT, Custom);
   1698     setOperationAction(ISD::INSERT_SUBVECTOR,   NativeVT, Custom);
   1699     setOperationAction(ISD::CONCAT_VECTORS,     NativeVT, Custom);
   1700 
   1701     setOperationAction(ISD::ADD, NativeVT, Legal);
   1702     setOperationAction(ISD::SUB, NativeVT, Legal);
   1703     setOperationAction(ISD::MUL, NativeVT, Legal);
   1704     setOperationAction(ISD::AND, NativeVT, Legal);
   1705     setOperationAction(ISD::OR,  NativeVT, Legal);
   1706     setOperationAction(ISD::XOR, NativeVT, Legal);
   1707 
   1708     if (NativeVT.getVectorElementType() != MVT::i1)
   1709       setOperationAction(ISD::SPLAT_VECTOR, NativeVT, Legal);
   1710   }
   1711 
   1712   for (MVT VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32}) {
   1713     setOperationAction(ISD::SMIN, VT, Legal);
   1714     setOperationAction(ISD::SMAX, VT, Legal);
   1715     setOperationAction(ISD::UMIN, VT, Legal);
   1716     setOperationAction(ISD::UMAX, VT, Legal);
   1717   }
   1718 
   1719   // Custom lower unaligned loads.
   1720   // Also, for both loads and stores, verify the alignment of the address
   1721   // in case it is a compile-time constant. This is a usability feature to
   1722   // provide a meaningful error message to users.
   1723   for (MVT VT : {MVT::i16, MVT::i32, MVT::v4i8, MVT::i64, MVT::v8i8,
   1724                  MVT::v2i16, MVT::v4i16, MVT::v2i32}) {
   1725     setOperationAction(ISD::LOAD,  VT, Custom);
   1726     setOperationAction(ISD::STORE, VT, Custom);
   1727   }
   1728 
   1729   // Custom-lower load/stores of boolean vectors.
   1730   for (MVT VT : {MVT::v2i1, MVT::v4i1, MVT::v8i1}) {
   1731     setOperationAction(ISD::LOAD,  VT, Custom);
   1732     setOperationAction(ISD::STORE, VT, Custom);
   1733   }
   1734 
   1735   for (MVT VT : {MVT::v2i16, MVT::v4i8, MVT::v8i8, MVT::v2i32, MVT::v4i16,
   1736                  MVT::v2i32}) {
   1737     setCondCodeAction(ISD::SETNE,  VT, Expand);
   1738     setCondCodeAction(ISD::SETLE,  VT, Expand);
   1739     setCondCodeAction(ISD::SETGE,  VT, Expand);
   1740     setCondCodeAction(ISD::SETLT,  VT, Expand);
   1741     setCondCodeAction(ISD::SETULE, VT, Expand);
   1742     setCondCodeAction(ISD::SETUGE, VT, Expand);
   1743     setCondCodeAction(ISD::SETULT, VT, Expand);
   1744   }
   1745 
   1746   // Custom-lower bitcasts from i8 to v8i1.
   1747   setOperationAction(ISD::BITCAST,        MVT::i8,    Custom);
   1748   setOperationAction(ISD::SETCC,          MVT::v2i16, Custom);
   1749   setOperationAction(ISD::VSELECT,        MVT::v4i8,  Custom);
   1750   setOperationAction(ISD::VSELECT,        MVT::v2i16, Custom);
   1751   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i8,  Custom);
   1752   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom);
   1753   setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8,  Custom);
   1754 
   1755   // V5+.
   1756   setOperationAction(ISD::FMA,  MVT::f64, Expand);
   1757   setOperationAction(ISD::FADD, MVT::f64, Expand);
   1758   setOperationAction(ISD::FSUB, MVT::f64, Expand);
   1759   setOperationAction(ISD::FMUL, MVT::f64, Expand);
   1760 
   1761   setOperationAction(ISD::FMINNUM, MVT::f32, Legal);
   1762   setOperationAction(ISD::FMAXNUM, MVT::f32, Legal);
   1763 
   1764   setOperationAction(ISD::FP_TO_UINT, MVT::i1,  Promote);
   1765   setOperationAction(ISD::FP_TO_UINT, MVT::i8,  Promote);
   1766   setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
   1767   setOperationAction(ISD::FP_TO_SINT, MVT::i1,  Promote);
   1768   setOperationAction(ISD::FP_TO_SINT, MVT::i8,  Promote);
   1769   setOperationAction(ISD::FP_TO_SINT, MVT::i16, Promote);
   1770   setOperationAction(ISD::UINT_TO_FP, MVT::i1,  Promote);
   1771   setOperationAction(ISD::UINT_TO_FP, MVT::i8,  Promote);
   1772   setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
   1773   setOperationAction(ISD::SINT_TO_FP, MVT::i1,  Promote);
   1774   setOperationAction(ISD::SINT_TO_FP, MVT::i8,  Promote);
   1775   setOperationAction(ISD::SINT_TO_FP, MVT::i16, Promote);
   1776 
   1777   // Handling of indexed loads/stores: default is "expand".
   1778   //
   1779   for (MVT VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64, MVT::f32, MVT::f64,
   1780                  MVT::v2i16, MVT::v2i32, MVT::v4i8, MVT::v4i16, MVT::v8i8}) {
   1781     setIndexedLoadAction(ISD::POST_INC, VT, Legal);
   1782     setIndexedStoreAction(ISD::POST_INC, VT, Legal);
   1783   }
   1784 
   1785   // Subtarget-specific operation actions.
   1786   //
   1787   if (Subtarget.hasV60Ops()) {
   1788     setOperationAction(ISD::ROTL, MVT::i32, Legal);
   1789     setOperationAction(ISD::ROTL, MVT::i64, Legal);
   1790     setOperationAction(ISD::ROTR, MVT::i32, Legal);
   1791     setOperationAction(ISD::ROTR, MVT::i64, Legal);
   1792   }
   1793   if (Subtarget.hasV66Ops()) {
   1794     setOperationAction(ISD::FADD, MVT::f64, Legal);
   1795     setOperationAction(ISD::FSUB, MVT::f64, Legal);
   1796   }
   1797   if (Subtarget.hasV67Ops()) {
   1798     setOperationAction(ISD::FMINNUM, MVT::f64, Legal);
   1799     setOperationAction(ISD::FMAXNUM, MVT::f64, Legal);
   1800     setOperationAction(ISD::FMUL,    MVT::f64, Legal);
   1801   }
   1802 
   1803   setTargetDAGCombine(ISD::VSELECT);
   1804 
   1805   if (Subtarget.useHVXOps())
   1806     initializeHVXLowering();
   1807 
   1808   computeRegisterProperties(&HRI);
   1809 
   1810   //
   1811   // Library calls for unsupported operations
   1812   //
   1813   bool FastMath  = EnableFastMath;
   1814 
   1815   setLibcallName(RTLIB::SDIV_I32, "__hexagon_divsi3");
   1816   setLibcallName(RTLIB::SDIV_I64, "__hexagon_divdi3");
   1817   setLibcallName(RTLIB::UDIV_I32, "__hexagon_udivsi3");
   1818   setLibcallName(RTLIB::UDIV_I64, "__hexagon_udivdi3");
   1819   setLibcallName(RTLIB::SREM_I32, "__hexagon_modsi3");
   1820   setLibcallName(RTLIB::SREM_I64, "__hexagon_moddi3");
   1821   setLibcallName(RTLIB::UREM_I32, "__hexagon_umodsi3");
   1822   setLibcallName(RTLIB::UREM_I64, "__hexagon_umoddi3");
   1823 
   1824   setLibcallName(RTLIB::SINTTOFP_I128_F64, "__hexagon_floattidf");
   1825   setLibcallName(RTLIB::SINTTOFP_I128_F32, "__hexagon_floattisf");
   1826   setLibcallName(RTLIB::FPTOUINT_F32_I128, "__hexagon_fixunssfti");
   1827   setLibcallName(RTLIB::FPTOUINT_F64_I128, "__hexagon_fixunsdfti");
   1828   setLibcallName(RTLIB::FPTOSINT_F32_I128, "__hexagon_fixsfti");
   1829   setLibcallName(RTLIB::FPTOSINT_F64_I128, "__hexagon_fixdfti");
   1830 
   1831   // This is the only fast library function for sqrtd.
   1832   if (FastMath)
   1833     setLibcallName(RTLIB::SQRT_F64, "__hexagon_fast2_sqrtdf2");
   1834 
   1835   // Prefix is: nothing  for "slow-math",
   1836   //            "fast2_" for V5+ fast-math double-precision
   1837   // (actually, keep fast-math and fast-math2 separate for now)
   1838   if (FastMath) {
   1839     setLibcallName(RTLIB::ADD_F64, "__hexagon_fast_adddf3");
   1840     setLibcallName(RTLIB::SUB_F64, "__hexagon_fast_subdf3");
   1841     setLibcallName(RTLIB::MUL_F64, "__hexagon_fast_muldf3");
   1842     setLibcallName(RTLIB::DIV_F64, "__hexagon_fast_divdf3");
   1843     setLibcallName(RTLIB::DIV_F32, "__hexagon_fast_divsf3");
   1844   } else {
   1845     setLibcallName(RTLIB::ADD_F64, "__hexagon_adddf3");
   1846     setLibcallName(RTLIB::SUB_F64, "__hexagon_subdf3");
   1847     setLibcallName(RTLIB::MUL_F64, "__hexagon_muldf3");
   1848     setLibcallName(RTLIB::DIV_F64, "__hexagon_divdf3");
   1849     setLibcallName(RTLIB::DIV_F32, "__hexagon_divsf3");
   1850   }
   1851 
   1852   if (FastMath)
   1853     setLibcallName(RTLIB::SQRT_F32, "__hexagon_fast2_sqrtf");
   1854   else
   1855     setLibcallName(RTLIB::SQRT_F32, "__hexagon_sqrtf");
   1856 
   1857   // These cause problems when the shift amount is non-constant.
   1858   setLibcallName(RTLIB::SHL_I128, nullptr);
   1859   setLibcallName(RTLIB::SRL_I128, nullptr);
   1860   setLibcallName(RTLIB::SRA_I128, nullptr);
   1861 }
   1862 
   1863 const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const {
   1864   switch ((HexagonISD::NodeType)Opcode) {
   1865   case HexagonISD::ADDC:          return "HexagonISD::ADDC";
   1866   case HexagonISD::SUBC:          return "HexagonISD::SUBC";
   1867   case HexagonISD::ALLOCA:        return "HexagonISD::ALLOCA";
   1868   case HexagonISD::AT_GOT:        return "HexagonISD::AT_GOT";
   1869   case HexagonISD::AT_PCREL:      return "HexagonISD::AT_PCREL";
   1870   case HexagonISD::BARRIER:       return "HexagonISD::BARRIER";
   1871   case HexagonISD::CALL:          return "HexagonISD::CALL";
   1872   case HexagonISD::CALLnr:        return "HexagonISD::CALLnr";
   1873   case HexagonISD::CALLR:         return "HexagonISD::CALLR";
   1874   case HexagonISD::COMBINE:       return "HexagonISD::COMBINE";
   1875   case HexagonISD::CONST32_GP:    return "HexagonISD::CONST32_GP";
   1876   case HexagonISD::CONST32:       return "HexagonISD::CONST32";
   1877   case HexagonISD::CP:            return "HexagonISD::CP";
   1878   case HexagonISD::DCFETCH:       return "HexagonISD::DCFETCH";
   1879   case HexagonISD::EH_RETURN:     return "HexagonISD::EH_RETURN";
   1880   case HexagonISD::TSTBIT:        return "HexagonISD::TSTBIT";
   1881   case HexagonISD::EXTRACTU:      return "HexagonISD::EXTRACTU";
   1882   case HexagonISD::INSERT:        return "HexagonISD::INSERT";
   1883   case HexagonISD::JT:            return "HexagonISD::JT";
   1884   case HexagonISD::RET_FLAG:      return "HexagonISD::RET_FLAG";
   1885   case HexagonISD::TC_RETURN:     return "HexagonISD::TC_RETURN";
   1886   case HexagonISD::VASL:          return "HexagonISD::VASL";
   1887   case HexagonISD::VASR:          return "HexagonISD::VASR";
   1888   case HexagonISD::VLSR:          return "HexagonISD::VLSR";
   1889   case HexagonISD::VEXTRACTW:     return "HexagonISD::VEXTRACTW";
   1890   case HexagonISD::VINSERTW0:     return "HexagonISD::VINSERTW0";
   1891   case HexagonISD::VROR:          return "HexagonISD::VROR";
   1892   case HexagonISD::READCYCLE:     return "HexagonISD::READCYCLE";
   1893   case HexagonISD::PTRUE:         return "HexagonISD::PTRUE";
   1894   case HexagonISD::PFALSE:        return "HexagonISD::PFALSE";
   1895   case HexagonISD::D2P:           return "HexagonISD::D2P";
   1896   case HexagonISD::P2D:           return "HexagonISD::P2D";
   1897   case HexagonISD::V2Q:           return "HexagonISD::V2Q";
   1898   case HexagonISD::Q2V:           return "HexagonISD::Q2V";
   1899   case HexagonISD::QCAT:          return "HexagonISD::QCAT";
   1900   case HexagonISD::QTRUE:         return "HexagonISD::QTRUE";
   1901   case HexagonISD::QFALSE:        return "HexagonISD::QFALSE";
   1902   case HexagonISD::TYPECAST:      return "HexagonISD::TYPECAST";
   1903   case HexagonISD::VALIGN:        return "HexagonISD::VALIGN";
   1904   case HexagonISD::VALIGNADDR:    return "HexagonISD::VALIGNADDR";
   1905   case HexagonISD::VPACKL:        return "HexagonISD::VPACKL";
   1906   case HexagonISD::VUNPACK:       return "HexagonISD::VUNPACK";
   1907   case HexagonISD::VUNPACKU:      return "HexagonISD::VUNPACKU";
   1908   case HexagonISD::ISEL:          return "HexagonISD::ISEL";
   1909   case HexagonISD::OP_END:        break;
   1910   }
   1911   return nullptr;
   1912 }
   1913 
   1914 void
   1915 HexagonTargetLowering::validateConstPtrAlignment(SDValue Ptr, const SDLoc &dl,
   1916       unsigned NeedAlign) const {
   1917   auto *CA = dyn_cast<ConstantSDNode>(Ptr);
   1918   if (!CA)
   1919     return;
   1920   unsigned Addr = CA->getZExtValue();
   1921   unsigned HaveAlign = Addr != 0 ? 1u << countTrailingZeros(Addr) : NeedAlign;
   1922   if (HaveAlign < NeedAlign) {
   1923     std::string ErrMsg;
   1924     raw_string_ostream O(ErrMsg);
   1925     O << "Misaligned constant address: " << format_hex(Addr, 10)
   1926       << " has alignment " << HaveAlign
   1927       << ", but the memory access requires " << NeedAlign;
   1928     if (DebugLoc DL = dl.getDebugLoc())
   1929       DL.print(O << ", at ");
   1930     report_fatal_error(O.str());
   1931   }
   1932 }
   1933 
   1934 // Bit-reverse Load Intrinsic: Check if the instruction is a bit reverse load
   1935 // intrinsic.
   1936 static bool isBrevLdIntrinsic(const Value *Inst) {
   1937   unsigned ID = cast<IntrinsicInst>(Inst)->getIntrinsicID();
   1938   return (ID == Intrinsic::hexagon_L2_loadrd_pbr ||
   1939           ID == Intrinsic::hexagon_L2_loadri_pbr ||
   1940           ID == Intrinsic::hexagon_L2_loadrh_pbr ||
   1941           ID == Intrinsic::hexagon_L2_loadruh_pbr ||
   1942           ID == Intrinsic::hexagon_L2_loadrb_pbr ||
   1943           ID == Intrinsic::hexagon_L2_loadrub_pbr);
   1944 }
   1945 
   1946 // Bit-reverse Load Intrinsic :Crawl up and figure out the object from previous
   1947 // instruction. So far we only handle bitcast, extract value and bit reverse
   1948 // load intrinsic instructions. Should we handle CGEP ?
   1949 static Value *getBrevLdObject(Value *V) {
   1950   if (Operator::getOpcode(V) == Instruction::ExtractValue ||
   1951       Operator::getOpcode(V) == Instruction::BitCast)
   1952     V = cast<Operator>(V)->getOperand(0);
   1953   else if (isa<IntrinsicInst>(V) && isBrevLdIntrinsic(V))
   1954     V = cast<Instruction>(V)->getOperand(0);
   1955   return V;
   1956 }
   1957 
   1958 // Bit-reverse Load Intrinsic: For a PHI Node return either an incoming edge or
   1959 // a back edge. If the back edge comes from the intrinsic itself, the incoming
   1960 // edge is returned.
   1961 static Value *returnEdge(const PHINode *PN, Value *IntrBaseVal) {
   1962   const BasicBlock *Parent = PN->getParent();
   1963   int Idx = -1;
   1964   for (unsigned i = 0, e = PN->getNumIncomingValues(); i < e; ++i) {
   1965     BasicBlock *Blk = PN->getIncomingBlock(i);
   1966     // Determine if the back edge is originated from intrinsic.
   1967     if (Blk == Parent) {
   1968       Value *BackEdgeVal = PN->getIncomingValue(i);
   1969       Value *BaseVal;
   1970       // Loop over till we return the same Value or we hit the IntrBaseVal.
   1971       do {
   1972         BaseVal = BackEdgeVal;
   1973         BackEdgeVal = getBrevLdObject(BackEdgeVal);
   1974       } while ((BaseVal != BackEdgeVal) && (IntrBaseVal != BackEdgeVal));
   1975       // If the getBrevLdObject returns IntrBaseVal, we should return the
   1976       // incoming edge.
   1977       if (IntrBaseVal == BackEdgeVal)
   1978         continue;
   1979       Idx = i;
   1980       break;
   1981     } else // Set the node to incoming edge.
   1982       Idx = i;
   1983   }
   1984   assert(Idx >= 0 && "Unexpected index to incoming argument in PHI");
   1985   return PN->getIncomingValue(Idx);
   1986 }
   1987 
   1988 // Bit-reverse Load Intrinsic: Figure out the underlying object the base
   1989 // pointer points to, for the bit-reverse load intrinsic. Setting this to
   1990 // memoperand might help alias analysis to figure out the dependencies.
   1991 static Value *getUnderLyingObjectForBrevLdIntr(Value *V) {
   1992   Value *IntrBaseVal = V;
   1993   Value *BaseVal;
   1994   // Loop over till we return the same Value, implies we either figure out
   1995   // the object or we hit a PHI
   1996   do {
   1997     BaseVal = V;
   1998     V = getBrevLdObject(V);
   1999   } while (BaseVal != V);
   2000 
   2001   // Identify the object from PHINode.
   2002   if (const PHINode *PN = dyn_cast<PHINode>(V))
   2003     return returnEdge(PN, IntrBaseVal);
   2004   // For non PHI nodes, the object is the last value returned by getBrevLdObject
   2005   else
   2006     return V;
   2007 }
   2008 
   2009 /// Given an intrinsic, checks if on the target the intrinsic will need to map
   2010 /// to a MemIntrinsicNode (touches memory). If this is the case, it returns
   2011 /// true and store the intrinsic information into the IntrinsicInfo that was
   2012 /// passed to the function.
   2013 bool HexagonTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info,
   2014                                                const CallInst &I,
   2015                                                MachineFunction &MF,
   2016                                                unsigned Intrinsic) const {
   2017   switch (Intrinsic) {
   2018   case Intrinsic::hexagon_L2_loadrd_pbr:
   2019   case Intrinsic::hexagon_L2_loadri_pbr:
   2020   case Intrinsic::hexagon_L2_loadrh_pbr:
   2021   case Intrinsic::hexagon_L2_loadruh_pbr:
   2022   case Intrinsic::hexagon_L2_loadrb_pbr:
   2023   case Intrinsic::hexagon_L2_loadrub_pbr: {
   2024     Info.opc = ISD::INTRINSIC_W_CHAIN;
   2025     auto &DL = I.getCalledFunction()->getParent()->getDataLayout();
   2026     auto &Cont = I.getCalledFunction()->getParent()->getContext();
   2027     // The intrinsic function call is of the form { ElTy, i8* }
   2028     // @llvm.hexagon.L2.loadXX.pbr(i8*, i32). The pointer and memory access type
   2029     // should be derived from ElTy.
   2030     Type *ElTy = I.getCalledFunction()->getReturnType()->getStructElementType(0);
   2031     Info.memVT = MVT::getVT(ElTy);
   2032     llvm::Value *BasePtrVal = I.getOperand(0);
   2033     Info.ptrVal = getUnderLyingObjectForBrevLdIntr(BasePtrVal);
   2034     // The offset value comes through Modifier register. For now, assume the
   2035     // offset is 0.
   2036     Info.offset = 0;
   2037     Info.align = DL.getABITypeAlign(Info.memVT.getTypeForEVT(Cont));
   2038     Info.flags = MachineMemOperand::MOLoad;
   2039     return true;
   2040   }
   2041   case Intrinsic::hexagon_V6_vgathermw:
   2042   case Intrinsic::hexagon_V6_vgathermw_128B:
   2043   case Intrinsic::hexagon_V6_vgathermh:
   2044   case Intrinsic::hexagon_V6_vgathermh_128B:
   2045   case Intrinsic::hexagon_V6_vgathermhw:
   2046   case Intrinsic::hexagon_V6_vgathermhw_128B:
   2047   case Intrinsic::hexagon_V6_vgathermwq:
   2048   case Intrinsic::hexagon_V6_vgathermwq_128B:
   2049   case Intrinsic::hexagon_V6_vgathermhq:
   2050   case Intrinsic::hexagon_V6_vgathermhq_128B:
   2051   case Intrinsic::hexagon_V6_vgathermhwq:
   2052   case Intrinsic::hexagon_V6_vgathermhwq_128B: {
   2053     const Module &M = *I.getParent()->getParent()->getParent();
   2054     Info.opc = ISD::INTRINSIC_W_CHAIN;
   2055     Type *VecTy = I.getArgOperand(1)->getType();
   2056     Info.memVT = MVT::getVT(VecTy);
   2057     Info.ptrVal = I.getArgOperand(0);
   2058     Info.offset = 0;
   2059     Info.align =
   2060         MaybeAlign(M.getDataLayout().getTypeAllocSizeInBits(VecTy) / 8);
   2061     Info.flags = MachineMemOperand::MOLoad |
   2062                  MachineMemOperand::MOStore |
   2063                  MachineMemOperand::MOVolatile;
   2064     return true;
   2065   }
   2066   default:
   2067     break;
   2068   }
   2069   return false;
   2070 }
   2071 
   2072 bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const {
   2073   return X.getValueType().isScalarInteger(); // 'tstbit'
   2074 }
   2075 
   2076 bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
   2077   return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2));
   2078 }
   2079 
   2080 bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
   2081   if (!VT1.isSimple() || !VT2.isSimple())
   2082     return false;
   2083   return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32;
   2084 }
   2085 
   2086 bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(
   2087     const MachineFunction &MF, EVT VT) const {
   2088   return isOperationLegalOrCustom(ISD::FMA, VT);
   2089 }
   2090 
   2091 // Should we expand the build vector with shuffles?
   2092 bool HexagonTargetLowering::shouldExpandBuildVectorWithShuffles(EVT VT,
   2093       unsigned DefinedValues) const {
   2094   return false;
   2095 }
   2096 
   2097 bool HexagonTargetLowering::isShuffleMaskLegal(ArrayRef<int> Mask,
   2098                                                EVT VT) const {
   2099   return true;
   2100 }
   2101 
   2102 TargetLoweringBase::LegalizeTypeAction
   2103 HexagonTargetLowering::getPreferredVectorAction(MVT VT) const {
   2104   unsigned VecLen = VT.getVectorMinNumElements();
   2105   MVT ElemTy = VT.getVectorElementType();
   2106 
   2107   if (VecLen == 1 || VT.isScalableVector())
   2108     return TargetLoweringBase::TypeScalarizeVector;
   2109 
   2110   if (Subtarget.useHVXOps()) {
   2111     unsigned Action = getPreferredHvxVectorAction(VT);
   2112     if (Action != ~0u)
   2113       return static_cast<TargetLoweringBase::LegalizeTypeAction>(Action);
   2114   }
   2115 
   2116   // Always widen (remaining) vectors of i1.
   2117   if (ElemTy == MVT::i1)
   2118     return TargetLoweringBase::TypeWidenVector;
   2119 
   2120   return TargetLoweringBase::TypeSplitVector;
   2121 }
   2122 
   2123 std::pair<SDValue, int>
   2124 HexagonTargetLowering::getBaseAndOffset(SDValue Addr) const {
   2125   if (Addr.getOpcode() == ISD::ADD) {
   2126     SDValue Op1 = Addr.getOperand(1);
   2127     if (auto *CN = dyn_cast<const ConstantSDNode>(Op1.getNode()))
   2128       return { Addr.getOperand(0), CN->getSExtValue() };
   2129   }
   2130   return { Addr, 0 };
   2131 }
   2132 
   2133 // Lower a vector shuffle (V1, V2, V3).  V1 and V2 are the two vectors
   2134 // to select data from, V3 is the permutation.
   2135 SDValue
   2136 HexagonTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG)
   2137       const {
   2138   const auto *SVN = cast<ShuffleVectorSDNode>(Op);
   2139   ArrayRef<int> AM = SVN->getMask();
   2140   assert(AM.size() <= 8 && "Unexpected shuffle mask");
   2141   unsigned VecLen = AM.size();
   2142 
   2143   MVT VecTy = ty(Op);
   2144   assert(!Subtarget.isHVXVectorType(VecTy, true) &&
   2145          "HVX shuffles should be legal");
   2146   assert(VecTy.getSizeInBits() <= 64 && "Unexpected vector length");
   2147 
   2148   SDValue Op0 = Op.getOperand(0);
   2149   SDValue Op1 = Op.getOperand(1);
   2150   const SDLoc &dl(Op);
   2151 
   2152   // If the inputs are not the same as the output, bail. This is not an
   2153   // error situation, but complicates the handling and the default expansion
   2154   // (into BUILD_VECTOR) should be adequate.
   2155   if (ty(Op0) != VecTy || ty(Op1) != VecTy)
   2156     return SDValue();
   2157 
   2158   // Normalize the mask so that the first non-negative index comes from
   2159   // the first operand.
   2160   SmallVector<int,8> Mask(AM.begin(), AM.end());
   2161   unsigned F = llvm::find_if(AM, [](int M) { return M >= 0; }) - AM.data();
   2162   if (F == AM.size())
   2163     return DAG.getUNDEF(VecTy);
   2164   if (AM[F] >= int(VecLen)) {
   2165     ShuffleVectorSDNode::commuteMask(Mask);
   2166     std::swap(Op0, Op1);
   2167   }
   2168 
   2169   // Express the shuffle mask in terms of bytes.
   2170   SmallVector<int,8> ByteMask;
   2171   unsigned ElemBytes = VecTy.getVectorElementType().getSizeInBits() / 8;
   2172   for (unsigned i = 0, e = Mask.size(); i != e; ++i) {
   2173     int M = Mask[i];
   2174     if (M < 0) {
   2175       for (unsigned j = 0; j != ElemBytes; ++j)
   2176         ByteMask.push_back(-1);
   2177     } else {
   2178       for (unsigned j = 0; j != ElemBytes; ++j)
   2179         ByteMask.push_back(M*ElemBytes + j);
   2180     }
   2181   }
   2182   assert(ByteMask.size() <= 8);
   2183 
   2184   // All non-undef (non-negative) indexes are well within [0..127], so they
   2185   // fit in a single byte. Build two 64-bit words:
   2186   // - MaskIdx where each byte is the corresponding index (for non-negative
   2187   //   indexes), and 0xFF for negative indexes, and
   2188   // - MaskUnd that has 0xFF for each negative index.
   2189   uint64_t MaskIdx = 0;
   2190   uint64_t MaskUnd = 0;
   2191   for (unsigned i = 0, e = ByteMask.size(); i != e; ++i) {
   2192     unsigned S = 8*i;
   2193     uint64_t M = ByteMask[i] & 0xFF;
   2194     if (M == 0xFF)
   2195       MaskUnd |= M << S;
   2196     MaskIdx |= M << S;
   2197   }
   2198 
   2199   if (ByteMask.size() == 4) {
   2200     // Identity.
   2201     if (MaskIdx == (0x03020100 | MaskUnd))
   2202       return Op0;
   2203     // Byte swap.
   2204     if (MaskIdx == (0x00010203 | MaskUnd)) {
   2205       SDValue T0 = DAG.getBitcast(MVT::i32, Op0);
   2206       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i32, T0);
   2207       return DAG.getBitcast(VecTy, T1);
   2208     }
   2209 
   2210     // Byte packs.
   2211     SDValue Concat10 = DAG.getNode(HexagonISD::COMBINE, dl,
   2212                                    typeJoin({ty(Op1), ty(Op0)}), {Op1, Op0});
   2213     if (MaskIdx == (0x06040200 | MaskUnd))
   2214       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat10}, DAG);
   2215     if (MaskIdx == (0x07050301 | MaskUnd))
   2216       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat10}, DAG);
   2217 
   2218     SDValue Concat01 = DAG.getNode(HexagonISD::COMBINE, dl,
   2219                                    typeJoin({ty(Op0), ty(Op1)}), {Op0, Op1});
   2220     if (MaskIdx == (0x02000604 | MaskUnd))
   2221       return getInstr(Hexagon::S2_vtrunehb, dl, VecTy, {Concat01}, DAG);
   2222     if (MaskIdx == (0x03010705 | MaskUnd))
   2223       return getInstr(Hexagon::S2_vtrunohb, dl, VecTy, {Concat01}, DAG);
   2224   }
   2225 
   2226   if (ByteMask.size() == 8) {
   2227     // Identity.
   2228     if (MaskIdx == (0x0706050403020100ull | MaskUnd))
   2229       return Op0;
   2230     // Byte swap.
   2231     if (MaskIdx == (0x0001020304050607ull | MaskUnd)) {
   2232       SDValue T0 = DAG.getBitcast(MVT::i64, Op0);
   2233       SDValue T1 = DAG.getNode(ISD::BSWAP, dl, MVT::i64, T0);
   2234       return DAG.getBitcast(VecTy, T1);
   2235     }
   2236 
   2237     // Halfword picks.
   2238     if (MaskIdx == (0x0d0c050409080100ull | MaskUnd))
   2239       return getInstr(Hexagon::S2_shuffeh, dl, VecTy, {Op1, Op0}, DAG);
   2240     if (MaskIdx == (0x0f0e07060b0a0302ull | MaskUnd))
   2241       return getInstr(Hexagon::S2_shuffoh, dl, VecTy, {Op1, Op0}, DAG);
   2242     if (MaskIdx == (0x0d0c090805040100ull | MaskUnd))
   2243       return getInstr(Hexagon::S2_vtrunewh, dl, VecTy, {Op1, Op0}, DAG);
   2244     if (MaskIdx == (0x0f0e0b0a07060302ull | MaskUnd))
   2245       return getInstr(Hexagon::S2_vtrunowh, dl, VecTy, {Op1, Op0}, DAG);
   2246     if (MaskIdx == (0x0706030205040100ull | MaskUnd)) {
   2247       VectorPair P = opSplit(Op0, dl, DAG);
   2248       return getInstr(Hexagon::S2_packhl, dl, VecTy, {P.second, P.first}, DAG);
   2249     }
   2250 
   2251     // Byte packs.
   2252     if (MaskIdx == (0x0e060c040a020800ull | MaskUnd))
   2253       return getInstr(Hexagon::S2_shuffeb, dl, VecTy, {Op1, Op0}, DAG);
   2254     if (MaskIdx == (0x0f070d050b030901ull | MaskUnd))
   2255       return getInstr(Hexagon::S2_shuffob, dl, VecTy, {Op1, Op0}, DAG);
   2256   }
   2257 
   2258   return SDValue();
   2259 }
   2260 
   2261 // Create a Hexagon-specific node for shifting a vector by an integer.
   2262 SDValue
   2263 HexagonTargetLowering::getVectorShiftByInt(SDValue Op, SelectionDAG &DAG)
   2264       const {
   2265   unsigned NewOpc;
   2266   switch (Op.getOpcode()) {
   2267     case ISD::SHL:
   2268       NewOpc = HexagonISD::VASL;
   2269       break;
   2270     case ISD::SRA:
   2271       NewOpc = HexagonISD::VASR;
   2272       break;
   2273     case ISD::SRL:
   2274       NewOpc = HexagonISD::VLSR;
   2275       break;
   2276     default:
   2277       llvm_unreachable("Unexpected shift opcode");
   2278   }
   2279 
   2280   SDValue Op0 = Op.getOperand(0);
   2281   SDValue Op1 = Op.getOperand(1);
   2282   const SDLoc &dl(Op);
   2283 
   2284   switch (Op1.getOpcode()) {
   2285     case ISD::BUILD_VECTOR:
   2286       if (SDValue S = cast<BuildVectorSDNode>(Op1)->getSplatValue())
   2287         return DAG.getNode(NewOpc, dl, ty(Op), Op0, S);
   2288       break;
   2289     case ISD::SPLAT_VECTOR:
   2290       return DAG.getNode(NewOpc, dl, ty(Op), Op0, Op1.getOperand(0));
   2291   }
   2292   return SDValue();
   2293 }
   2294 
   2295 SDValue
   2296 HexagonTargetLowering::LowerVECTOR_SHIFT(SDValue Op, SelectionDAG &DAG) const {
   2297   return getVectorShiftByInt(Op, DAG);
   2298 }
   2299 
   2300 SDValue
   2301 HexagonTargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
   2302   if (isa<ConstantSDNode>(Op.getOperand(1).getNode()))
   2303     return Op;
   2304   return SDValue();
   2305 }
   2306 
   2307 SDValue
   2308 HexagonTargetLowering::LowerBITCAST(SDValue Op, SelectionDAG &DAG) const {
   2309   MVT ResTy = ty(Op);
   2310   SDValue InpV = Op.getOperand(0);
   2311   MVT InpTy = ty(InpV);
   2312   assert(ResTy.getSizeInBits() == InpTy.getSizeInBits());
   2313   const SDLoc &dl(Op);
   2314 
   2315   // Handle conversion from i8 to v8i1.
   2316   if (InpTy == MVT::i8) {
   2317     if (ResTy == MVT::v8i1) {
   2318       SDValue Sc = DAG.getBitcast(tyScalar(InpTy), InpV);
   2319       SDValue Ext = DAG.getZExtOrTrunc(Sc, dl, MVT::i32);
   2320       return getInstr(Hexagon::C2_tfrrp, dl, ResTy, Ext, DAG);
   2321     }
   2322     return SDValue();
   2323   }
   2324 
   2325   return Op;
   2326 }
   2327 
   2328 bool
   2329 HexagonTargetLowering::getBuildVectorConstInts(ArrayRef<SDValue> Values,
   2330       MVT VecTy, SelectionDAG &DAG,
   2331       MutableArrayRef<ConstantInt*> Consts) const {
   2332   MVT ElemTy = VecTy.getVectorElementType();
   2333   unsigned ElemWidth = ElemTy.getSizeInBits();
   2334   IntegerType *IntTy = IntegerType::get(*DAG.getContext(), ElemWidth);
   2335   bool AllConst = true;
   2336 
   2337   for (unsigned i = 0, e = Values.size(); i != e; ++i) {
   2338     SDValue V = Values[i];
   2339     if (V.isUndef()) {
   2340       Consts[i] = ConstantInt::get(IntTy, 0);
   2341       continue;
   2342     }
   2343     // Make sure to always cast to IntTy.
   2344     if (auto *CN = dyn_cast<ConstantSDNode>(V.getNode())) {
   2345       const ConstantInt *CI = CN->getConstantIntValue();
   2346       Consts[i] = ConstantInt::get(IntTy, CI->getValue().getSExtValue());
   2347     } else if (auto *CN = dyn_cast<ConstantFPSDNode>(V.getNode())) {
   2348       const ConstantFP *CF = CN->getConstantFPValue();
   2349       APInt A = CF->getValueAPF().bitcastToAPInt();
   2350       Consts[i] = ConstantInt::get(IntTy, A.getZExtValue());
   2351     } else {
   2352       AllConst = false;
   2353     }
   2354   }
   2355   return AllConst;
   2356 }
   2357 
   2358 SDValue
   2359 HexagonTargetLowering::buildVector32(ArrayRef<SDValue> Elem, const SDLoc &dl,
   2360                                      MVT VecTy, SelectionDAG &DAG) const {
   2361   MVT ElemTy = VecTy.getVectorElementType();
   2362   assert(VecTy.getVectorNumElements() == Elem.size());
   2363 
   2364   SmallVector<ConstantInt*,4> Consts(Elem.size());
   2365   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
   2366 
   2367   unsigned First, Num = Elem.size();
   2368   for (First = 0; First != Num; ++First) {
   2369     if (!isUndef(Elem[First]))
   2370       break;
   2371   }
   2372   if (First == Num)
   2373     return DAG.getUNDEF(VecTy);
   2374 
   2375   if (AllConst &&
   2376       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
   2377     return getZero(dl, VecTy, DAG);
   2378 
   2379   if (ElemTy == MVT::i16) {
   2380     assert(Elem.size() == 2);
   2381     if (AllConst) {
   2382       uint32_t V = (Consts[0]->getZExtValue() & 0xFFFF) |
   2383                    Consts[1]->getZExtValue() << 16;
   2384       return DAG.getBitcast(MVT::v2i16, DAG.getConstant(V, dl, MVT::i32));
   2385     }
   2386     SDValue N = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32,
   2387                          {Elem[1], Elem[0]}, DAG);
   2388     return DAG.getBitcast(MVT::v2i16, N);
   2389   }
   2390 
   2391   if (ElemTy == MVT::i8) {
   2392     // First try generating a constant.
   2393     if (AllConst) {
   2394       int32_t V = (Consts[0]->getZExtValue() & 0xFF) |
   2395                   (Consts[1]->getZExtValue() & 0xFF) << 8 |
   2396                   (Consts[1]->getZExtValue() & 0xFF) << 16 |
   2397                   Consts[2]->getZExtValue() << 24;
   2398       return DAG.getBitcast(MVT::v4i8, DAG.getConstant(V, dl, MVT::i32));
   2399     }
   2400 
   2401     // Then try splat.
   2402     bool IsSplat = true;
   2403     for (unsigned i = First+1; i != Num; ++i) {
   2404       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
   2405         continue;
   2406       IsSplat = false;
   2407       break;
   2408     }
   2409     if (IsSplat) {
   2410       // Legalize the operand of SPLAT_VECTOR.
   2411       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
   2412       return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
   2413     }
   2414 
   2415     // Generate
   2416     //   (zxtb(Elem[0]) | (zxtb(Elem[1]) << 8)) |
   2417     //   (zxtb(Elem[2]) | (zxtb(Elem[3]) << 8)) << 16
   2418     assert(Elem.size() == 4);
   2419     SDValue Vs[4];
   2420     for (unsigned i = 0; i != 4; ++i) {
   2421       Vs[i] = DAG.getZExtOrTrunc(Elem[i], dl, MVT::i32);
   2422       Vs[i] = DAG.getZeroExtendInReg(Vs[i], dl, MVT::i8);
   2423     }
   2424     SDValue S8 = DAG.getConstant(8, dl, MVT::i32);
   2425     SDValue T0 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[1], S8});
   2426     SDValue T1 = DAG.getNode(ISD::SHL, dl, MVT::i32, {Vs[3], S8});
   2427     SDValue B0 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[0], T0});
   2428     SDValue B1 = DAG.getNode(ISD::OR, dl, MVT::i32, {Vs[2], T1});
   2429 
   2430     SDValue R = getInstr(Hexagon::A2_combine_ll, dl, MVT::i32, {B1, B0}, DAG);
   2431     return DAG.getBitcast(MVT::v4i8, R);
   2432   }
   2433 
   2434 #ifndef NDEBUG
   2435   dbgs() << "VecTy: " << EVT(VecTy).getEVTString() << '\n';
   2436 #endif
   2437   llvm_unreachable("Unexpected vector element type");
   2438 }
   2439 
   2440 SDValue
   2441 HexagonTargetLowering::buildVector64(ArrayRef<SDValue> Elem, const SDLoc &dl,
   2442                                      MVT VecTy, SelectionDAG &DAG) const {
   2443   MVT ElemTy = VecTy.getVectorElementType();
   2444   assert(VecTy.getVectorNumElements() == Elem.size());
   2445 
   2446   SmallVector<ConstantInt*,8> Consts(Elem.size());
   2447   bool AllConst = getBuildVectorConstInts(Elem, VecTy, DAG, Consts);
   2448 
   2449   unsigned First, Num = Elem.size();
   2450   for (First = 0; First != Num; ++First) {
   2451     if (!isUndef(Elem[First]))
   2452       break;
   2453   }
   2454   if (First == Num)
   2455     return DAG.getUNDEF(VecTy);
   2456 
   2457   if (AllConst &&
   2458       llvm::all_of(Consts, [](ConstantInt *CI) { return CI->isZero(); }))
   2459     return getZero(dl, VecTy, DAG);
   2460 
   2461   // First try splat if possible.
   2462   if (ElemTy == MVT::i16) {
   2463     bool IsSplat = true;
   2464     for (unsigned i = First+1; i != Num; ++i) {
   2465       if (Elem[i] == Elem[First] || isUndef(Elem[i]))
   2466         continue;
   2467       IsSplat = false;
   2468       break;
   2469     }
   2470     if (IsSplat) {
   2471       // Legalize the operand of SPLAT_VECTOR
   2472       SDValue Ext = DAG.getZExtOrTrunc(Elem[First], dl, MVT::i32);
   2473       return DAG.getNode(ISD::SPLAT_VECTOR, dl, VecTy, Ext);
   2474     }
   2475   }
   2476 
   2477   // Then try constant.
   2478   if (AllConst) {
   2479     uint64_t Val = 0;
   2480     unsigned W = ElemTy.getSizeInBits();
   2481     uint64_t Mask = (ElemTy == MVT::i8)  ? 0xFFull
   2482                   : (ElemTy == MVT::i16) ? 0xFFFFull : 0xFFFFFFFFull;
   2483     for (unsigned i = 0; i != Num; ++i)
   2484       Val = (Val << W) | (Consts[Num-1-i]->getZExtValue() & Mask);
   2485     SDValue V0 = DAG.getConstant(Val, dl, MVT::i64);
   2486     return DAG.getBitcast(VecTy, V0);
   2487   }
   2488 
   2489   // Build two 32-bit vectors and concatenate.
   2490   MVT HalfTy = MVT::getVectorVT(ElemTy, Num/2);
   2491   SDValue L = (ElemTy == MVT::i32)
   2492                 ? Elem[0]
   2493                 : buildVector32(Elem.take_front(Num/2), dl, HalfTy, DAG);
   2494   SDValue H = (ElemTy == MVT::i32)
   2495                 ? Elem[1]
   2496                 : buildVector32(Elem.drop_front(Num/2), dl, HalfTy, DAG);
   2497   return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, {H, L});
   2498 }
   2499 
   2500 SDValue
   2501 HexagonTargetLowering::extractVector(SDValue VecV, SDValue IdxV,
   2502                                      const SDLoc &dl, MVT ValTy, MVT ResTy,
   2503                                      SelectionDAG &DAG) const {
   2504   MVT VecTy = ty(VecV);
   2505   assert(!ValTy.isVector() ||
   2506          VecTy.getVectorElementType() == ValTy.getVectorElementType());
   2507   unsigned VecWidth = VecTy.getSizeInBits();
   2508   unsigned ValWidth = ValTy.getSizeInBits();
   2509   unsigned ElemWidth = VecTy.getVectorElementType().getSizeInBits();
   2510   assert((VecWidth % ElemWidth) == 0);
   2511   auto *IdxN = dyn_cast<ConstantSDNode>(IdxV);
   2512 
   2513   // Special case for v{8,4,2}i1 (the only boolean vectors legal in Hexagon
   2514   // without any coprocessors).
   2515   if (ElemWidth == 1) {
   2516     assert(VecWidth == VecTy.getVectorNumElements() && "Sanity failure");
   2517     assert(VecWidth == 8 || VecWidth == 4 || VecWidth == 2);
   2518     // Check if this is an extract of the lowest bit.
   2519     if (IdxN) {
   2520       // Extracting the lowest bit is a no-op, but it changes the type,
   2521       // so it must be kept as an operation to avoid errors related to
   2522       // type mismatches.
   2523       if (IdxN->isNullValue() && ValTy.getSizeInBits() == 1)
   2524         return DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, VecV);
   2525     }
   2526 
   2527     // If the value extracted is a single bit, use tstbit.
   2528     if (ValWidth == 1) {
   2529       SDValue A0 = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32, {VecV}, DAG);
   2530       SDValue M0 = DAG.getConstant(8 / VecWidth, dl, MVT::i32);
   2531       SDValue I0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, M0);
   2532       return DAG.getNode(HexagonISD::TSTBIT, dl, MVT::i1, A0, I0);
   2533     }
   2534 
   2535     // Each bool vector (v2i1, v4i1, v8i1) always occupies 8 bits in
   2536     // a predicate register. The elements of the vector are repeated
   2537     // in the register (if necessary) so that the total number is 8.
   2538     // The extracted subvector will need to be expanded in such a way.
   2539     unsigned Scale = VecWidth / ValWidth;
   2540 
   2541     // Generate (p2d VecV) >> 8*Idx to move the interesting bytes to
   2542     // position 0.
   2543     assert(ty(IdxV) == MVT::i32);
   2544     unsigned VecRep = 8 / VecWidth;
   2545     SDValue S0 = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
   2546                              DAG.getConstant(8*VecRep, dl, MVT::i32));
   2547     SDValue T0 = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
   2548     SDValue T1 = DAG.getNode(ISD::SRL, dl, MVT::i64, T0, S0);
   2549     while (Scale > 1) {
   2550       // The longest possible subvector is at most 32 bits, so it is always
   2551       // contained in the low subregister.
   2552       T1 = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, T1);
   2553       T1 = expandPredicate(T1, dl, DAG);
   2554       Scale /= 2;
   2555     }
   2556 
   2557     return DAG.getNode(HexagonISD::D2P, dl, ResTy, T1);
   2558   }
   2559 
   2560   assert(VecWidth == 32 || VecWidth == 64);
   2561 
   2562   // Cast everything to scalar integer types.
   2563   MVT ScalarTy = tyScalar(VecTy);
   2564   VecV = DAG.getBitcast(ScalarTy, VecV);
   2565 
   2566   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
   2567   SDValue ExtV;
   2568 
   2569   if (IdxN) {
   2570     unsigned Off = IdxN->getZExtValue() * ElemWidth;
   2571     if (VecWidth == 64 && ValWidth == 32) {
   2572       assert(Off == 0 || Off == 32);
   2573       unsigned SubIdx = Off == 0 ? Hexagon::isub_lo : Hexagon::isub_hi;
   2574       ExtV = DAG.getTargetExtractSubreg(SubIdx, dl, MVT::i32, VecV);
   2575     } else if (Off == 0 && (ValWidth % 8) == 0) {
   2576       ExtV = DAG.getZeroExtendInReg(VecV, dl, tyScalar(ValTy));
   2577     } else {
   2578       SDValue OffV = DAG.getConstant(Off, dl, MVT::i32);
   2579       // The return type of EXTRACTU must be the same as the type of the
   2580       // input vector.
   2581       ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
   2582                          {VecV, WidthV, OffV});
   2583     }
   2584   } else {
   2585     if (ty(IdxV) != MVT::i32)
   2586       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
   2587     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
   2588                                DAG.getConstant(ElemWidth, dl, MVT::i32));
   2589     ExtV = DAG.getNode(HexagonISD::EXTRACTU, dl, ScalarTy,
   2590                        {VecV, WidthV, OffV});
   2591   }
   2592 
   2593   // Cast ExtV to the requested result type.
   2594   ExtV = DAG.getZExtOrTrunc(ExtV, dl, tyScalar(ResTy));
   2595   ExtV = DAG.getBitcast(ResTy, ExtV);
   2596   return ExtV;
   2597 }
   2598 
   2599 SDValue
   2600 HexagonTargetLowering::insertVector(SDValue VecV, SDValue ValV, SDValue IdxV,
   2601                                     const SDLoc &dl, MVT ValTy,
   2602                                     SelectionDAG &DAG) const {
   2603   MVT VecTy = ty(VecV);
   2604   if (VecTy.getVectorElementType() == MVT::i1) {
   2605     MVT ValTy = ty(ValV);
   2606     assert(ValTy.getVectorElementType() == MVT::i1);
   2607     SDValue ValR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, ValV);
   2608     unsigned VecLen = VecTy.getVectorNumElements();
   2609     unsigned Scale = VecLen / ValTy.getVectorNumElements();
   2610     assert(Scale > 1);
   2611 
   2612     for (unsigned R = Scale; R > 1; R /= 2) {
   2613       ValR = contractPredicate(ValR, dl, DAG);
   2614       ValR = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
   2615                          DAG.getUNDEF(MVT::i32), ValR);
   2616     }
   2617     // The longest possible subvector is at most 32 bits, so it is always
   2618     // contained in the low subregister.
   2619     ValR = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, ValR);
   2620 
   2621     unsigned ValBytes = 64 / Scale;
   2622     SDValue Width = DAG.getConstant(ValBytes*8, dl, MVT::i32);
   2623     SDValue Idx = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV,
   2624                               DAG.getConstant(8, dl, MVT::i32));
   2625     SDValue VecR = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, VecV);
   2626     SDValue Ins = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
   2627                               {VecR, ValR, Width, Idx});
   2628     return DAG.getNode(HexagonISD::D2P, dl, VecTy, Ins);
   2629   }
   2630 
   2631   unsigned VecWidth = VecTy.getSizeInBits();
   2632   unsigned ValWidth = ValTy.getSizeInBits();
   2633   assert(VecWidth == 32 || VecWidth == 64);
   2634   assert((VecWidth % ValWidth) == 0);
   2635 
   2636   // Cast everything to scalar integer types.
   2637   MVT ScalarTy = MVT::getIntegerVT(VecWidth);
   2638   // The actual type of ValV may be different than ValTy (which is related
   2639   // to the vector type).
   2640   unsigned VW = ty(ValV).getSizeInBits();
   2641   ValV = DAG.getBitcast(MVT::getIntegerVT(VW), ValV);
   2642   VecV = DAG.getBitcast(ScalarTy, VecV);
   2643   if (VW != VecWidth)
   2644     ValV = DAG.getAnyExtOrTrunc(ValV, dl, ScalarTy);
   2645 
   2646   SDValue WidthV = DAG.getConstant(ValWidth, dl, MVT::i32);
   2647   SDValue InsV;
   2648 
   2649   if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(IdxV)) {
   2650     unsigned W = C->getZExtValue() * ValWidth;
   2651     SDValue OffV = DAG.getConstant(W, dl, MVT::i32);
   2652     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
   2653                        {VecV, ValV, WidthV, OffV});
   2654   } else {
   2655     if (ty(IdxV) != MVT::i32)
   2656       IdxV = DAG.getZExtOrTrunc(IdxV, dl, MVT::i32);
   2657     SDValue OffV = DAG.getNode(ISD::MUL, dl, MVT::i32, IdxV, WidthV);
   2658     InsV = DAG.getNode(HexagonISD::INSERT, dl, ScalarTy,
   2659                        {VecV, ValV, WidthV, OffV});
   2660   }
   2661 
   2662   return DAG.getNode(ISD::BITCAST, dl, VecTy, InsV);
   2663 }
   2664 
   2665 SDValue
   2666 HexagonTargetLowering::expandPredicate(SDValue Vec32, const SDLoc &dl,
   2667                                        SelectionDAG &DAG) const {
   2668   assert(ty(Vec32).getSizeInBits() == 32);
   2669   if (isUndef(Vec32))
   2670     return DAG.getUNDEF(MVT::i64);
   2671   return getInstr(Hexagon::S2_vsxtbh, dl, MVT::i64, {Vec32}, DAG);
   2672 }
   2673 
   2674 SDValue
   2675 HexagonTargetLowering::contractPredicate(SDValue Vec64, const SDLoc &dl,
   2676                                          SelectionDAG &DAG) const {
   2677   assert(ty(Vec64).getSizeInBits() == 64);
   2678   if (isUndef(Vec64))
   2679     return DAG.getUNDEF(MVT::i32);
   2680   return getInstr(Hexagon::S2_vtrunehb, dl, MVT::i32, {Vec64}, DAG);
   2681 }
   2682 
   2683 SDValue
   2684 HexagonTargetLowering::getZero(const SDLoc &dl, MVT Ty, SelectionDAG &DAG)
   2685       const {
   2686   if (Ty.isVector()) {
   2687     assert(Ty.isInteger() && "Only integer vectors are supported here");
   2688     unsigned W = Ty.getSizeInBits();
   2689     if (W <= 64)
   2690       return DAG.getBitcast(Ty, DAG.getConstant(0, dl, MVT::getIntegerVT(W)));
   2691     return DAG.getNode(ISD::SPLAT_VECTOR, dl, Ty, getZero(dl, MVT::i32, DAG));
   2692   }
   2693 
   2694   if (Ty.isInteger())
   2695     return DAG.getConstant(0, dl, Ty);
   2696   if (Ty.isFloatingPoint())
   2697     return DAG.getConstantFP(0.0, dl, Ty);
   2698   llvm_unreachable("Invalid type for zero");
   2699 }
   2700 
   2701 SDValue
   2702 HexagonTargetLowering::appendUndef(SDValue Val, MVT ResTy, SelectionDAG &DAG)
   2703       const {
   2704   MVT ValTy = ty(Val);
   2705   assert(ValTy.getVectorElementType() == ResTy.getVectorElementType());
   2706 
   2707   unsigned ValLen = ValTy.getVectorNumElements();
   2708   unsigned ResLen = ResTy.getVectorNumElements();
   2709   if (ValLen == ResLen)
   2710     return Val;
   2711 
   2712   const SDLoc &dl(Val);
   2713   assert(ValLen < ResLen);
   2714   assert(ResLen % ValLen == 0);
   2715 
   2716   SmallVector<SDValue, 4> Concats = {Val};
   2717   for (unsigned i = 1, e = ResLen / ValLen; i < e; ++i)
   2718     Concats.push_back(DAG.getUNDEF(ValTy));
   2719 
   2720   return DAG.getNode(ISD::CONCAT_VECTORS, dl, ResTy, Concats);
   2721 }
   2722 
   2723 SDValue
   2724 HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const {
   2725   MVT VecTy = ty(Op);
   2726   unsigned BW = VecTy.getSizeInBits();
   2727   const SDLoc &dl(Op);
   2728   SmallVector<SDValue,8> Ops;
   2729   for (unsigned i = 0, e = Op.getNumOperands(); i != e; ++i)
   2730     Ops.push_back(Op.getOperand(i));
   2731 
   2732   if (BW == 32)
   2733     return buildVector32(Ops, dl, VecTy, DAG);
   2734   if (BW == 64)
   2735     return buildVector64(Ops, dl, VecTy, DAG);
   2736 
   2737   if (VecTy == MVT::v8i1 || VecTy == MVT::v4i1 || VecTy == MVT::v2i1) {
   2738     // Check if this is a special case or all-0 or all-1.
   2739     bool All0 = true, All1 = true;
   2740     for (SDValue P : Ops) {
   2741       auto *CN = dyn_cast<ConstantSDNode>(P.getNode());
   2742       if (CN == nullptr) {
   2743         All0 = All1 = false;
   2744         break;
   2745       }
   2746       uint32_t C = CN->getZExtValue();
   2747       All0 &= (C == 0);
   2748       All1 &= (C == 1);
   2749     }
   2750     if (All0)
   2751       return DAG.getNode(HexagonISD::PFALSE, dl, VecTy);
   2752     if (All1)
   2753       return DAG.getNode(HexagonISD::PTRUE, dl, VecTy);
   2754 
   2755     // For each i1 element in the resulting predicate register, put 1
   2756     // shifted by the index of the element into a general-purpose register,
   2757     // then or them together and transfer it back into a predicate register.
   2758     SDValue Rs[8];
   2759     SDValue Z = getZero(dl, MVT::i32, DAG);
   2760     // Always produce 8 bits, repeat inputs if necessary.
   2761     unsigned Rep = 8 / VecTy.getVectorNumElements();
   2762     for (unsigned i = 0; i != 8; ++i) {
   2763       SDValue S = DAG.getConstant(1ull << i, dl, MVT::i32);
   2764       Rs[i] = DAG.getSelect(dl, MVT::i32, Ops[i/Rep], S, Z);
   2765     }
   2766     for (ArrayRef<SDValue> A(Rs); A.size() != 1; A = A.drop_back(A.size()/2)) {
   2767       for (unsigned i = 0, e = A.size()/2; i != e; ++i)
   2768         Rs[i] = DAG.getNode(ISD::OR, dl, MVT::i32, Rs[2*i], Rs[2*i+1]);
   2769     }
   2770     // Move the value directly to a predicate register.
   2771     return getInstr(Hexagon::C2_tfrrp, dl, VecTy, {Rs[0]}, DAG);
   2772   }
   2773 
   2774   return SDValue();
   2775 }
   2776 
   2777 SDValue
   2778 HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op,
   2779                                            SelectionDAG &DAG) const {
   2780   MVT VecTy = ty(Op);
   2781   const SDLoc &dl(Op);
   2782   if (VecTy.getSizeInBits() == 64) {
   2783     assert(Op.getNumOperands() == 2);
   2784     return DAG.getNode(HexagonISD::COMBINE, dl, VecTy, Op.getOperand(1),
   2785                        Op.getOperand(0));
   2786   }
   2787 
   2788   MVT ElemTy = VecTy.getVectorElementType();
   2789   if (ElemTy == MVT::i1) {
   2790     assert(VecTy == MVT::v2i1 || VecTy == MVT::v4i1 || VecTy == MVT::v8i1);
   2791     MVT OpTy = ty(Op.getOperand(0));
   2792     // Scale is how many times the operands need to be contracted to match
   2793     // the representation in the target register.
   2794     unsigned Scale = VecTy.getVectorNumElements() / OpTy.getVectorNumElements();
   2795     assert(Scale == Op.getNumOperands() && Scale > 1);
   2796 
   2797     // First, convert all bool vectors to integers, then generate pairwise
   2798     // inserts to form values of doubled length. Up until there are only
   2799     // two values left to concatenate, all of these values will fit in a
   2800     // 32-bit integer, so keep them as i32 to use 32-bit inserts.
   2801     SmallVector<SDValue,4> Words[2];
   2802     unsigned IdxW = 0;
   2803 
   2804     for (SDValue P : Op.getNode()->op_values()) {
   2805       SDValue W = DAG.getNode(HexagonISD::P2D, dl, MVT::i64, P);
   2806       for (unsigned R = Scale; R > 1; R /= 2) {
   2807         W = contractPredicate(W, dl, DAG);
   2808         W = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
   2809                         DAG.getUNDEF(MVT::i32), W);
   2810       }
   2811       W = DAG.getTargetExtractSubreg(Hexagon::isub_lo, dl, MVT::i32, W);
   2812       Words[IdxW].push_back(W);
   2813     }
   2814 
   2815     while (Scale > 2) {
   2816       SDValue WidthV = DAG.getConstant(64 / Scale, dl, MVT::i32);
   2817       Words[IdxW ^ 1].clear();
   2818 
   2819       for (unsigned i = 0, e = Words[IdxW].size(); i != e; i += 2) {
   2820         SDValue W0 = Words[IdxW][i], W1 = Words[IdxW][i+1];
   2821         // Insert W1 into W0 right next to the significant bits of W0.
   2822         SDValue T = DAG.getNode(HexagonISD::INSERT, dl, MVT::i32,
   2823                                 {W0, W1, WidthV, WidthV});
   2824         Words[IdxW ^ 1].push_back(T);
   2825       }
   2826       IdxW ^= 1;
   2827       Scale /= 2;
   2828     }
   2829 
   2830     // Another sanity check. At this point there should only be two words
   2831     // left, and Scale should be 2.
   2832     assert(Scale == 2 && Words[IdxW].size() == 2);
   2833 
   2834     SDValue WW = DAG.getNode(HexagonISD::COMBINE, dl, MVT::i64,
   2835                              Words[IdxW][1], Words[IdxW][0]);
   2836     return DAG.getNode(HexagonISD::D2P, dl, VecTy, WW);
   2837   }
   2838 
   2839   return SDValue();
   2840 }
   2841 
   2842 SDValue
   2843 HexagonTargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
   2844                                                SelectionDAG &DAG) const {
   2845   SDValue Vec = Op.getOperand(0);
   2846   MVT ElemTy = ty(Vec).getVectorElementType();
   2847   return extractVector(Vec, Op.getOperand(1), SDLoc(Op), ElemTy, ty(Op), DAG);
   2848 }
   2849 
   2850 SDValue
   2851 HexagonTargetLowering::LowerEXTRACT_SUBVECTOR(SDValue Op,
   2852                                               SelectionDAG &DAG) const {
   2853   return extractVector(Op.getOperand(0), Op.getOperand(1), SDLoc(Op),
   2854                        ty(Op), ty(Op), DAG);
   2855 }
   2856 
   2857 SDValue
   2858 HexagonTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
   2859                                               SelectionDAG &DAG) const {
   2860   return insertVector(Op.getOperand(0), Op.getOperand(1), Op.getOperand(2),
   2861                       SDLoc(Op), ty(Op).getVectorElementType(), DAG);
   2862 }
   2863 
   2864 SDValue
   2865 HexagonTargetLowering::LowerINSERT_SUBVECTOR(SDValue Op,
   2866                                              SelectionDAG &DAG) const {
   2867   SDValue ValV = Op.getOperand(1);
   2868   return insertVector(Op.getOperand(0), ValV, Op.getOperand(2),
   2869                       SDLoc(Op), ty(ValV), DAG);
   2870 }
   2871 
   2872 bool
   2873 HexagonTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const {
   2874   // Assuming the caller does not have either a signext or zeroext modifier, and
   2875   // only one value is accepted, any reasonable truncation is allowed.
   2876   if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
   2877     return false;
   2878 
   2879   // FIXME: in principle up to 64-bit could be made safe, but it would be very
   2880   // fragile at the moment: any support for multiple value returns would be
   2881   // liable to disallow tail calls involving i64 -> iN truncation in many cases.
   2882   return Ty1->getPrimitiveSizeInBits() <= 32;
   2883 }
   2884 
   2885 SDValue
   2886 HexagonTargetLowering::LowerLoad(SDValue Op, SelectionDAG &DAG) const {
   2887   MVT Ty = ty(Op);
   2888   const SDLoc &dl(Op);
   2889   // Lower loads of scalar predicate vectors (v2i1, v4i1, v8i1) to loads of i1
   2890   // followed by a TYPECAST.
   2891   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
   2892   bool DoCast = (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1);
   2893   if (DoCast) {
   2894     SDValue NL = DAG.getLoad(
   2895         LN->getAddressingMode(), LN->getExtensionType(), MVT::i1, dl,
   2896         LN->getChain(), LN->getBasePtr(), LN->getOffset(), LN->getPointerInfo(),
   2897         /*MemoryVT*/ MVT::i1, LN->getAlign(), LN->getMemOperand()->getFlags(),
   2898         LN->getAAInfo(), LN->getRanges());
   2899     LN = cast<LoadSDNode>(NL.getNode());
   2900   }
   2901 
   2902   unsigned ClaimAlign = LN->getAlignment();
   2903   validateConstPtrAlignment(LN->getBasePtr(), dl, ClaimAlign);
   2904   // Call LowerUnalignedLoad for all loads, it recognizes loads that
   2905   // don't need extra aligning.
   2906   SDValue LU = LowerUnalignedLoad(SDValue(LN, 0), DAG);
   2907   if (DoCast) {
   2908     SDValue TC = DAG.getNode(HexagonISD::TYPECAST, dl, Ty, LU);
   2909     SDValue Ch = cast<LoadSDNode>(LU.getNode())->getChain();
   2910     return DAG.getMergeValues({TC, Ch}, dl);
   2911   }
   2912   return LU;
   2913 }
   2914 
   2915 SDValue
   2916 HexagonTargetLowering::LowerStore(SDValue Op, SelectionDAG &DAG) const {
   2917   const SDLoc &dl(Op);
   2918   StoreSDNode *SN = cast<StoreSDNode>(Op.getNode());
   2919   SDValue Val = SN->getValue();
   2920   MVT Ty = ty(Val);
   2921 
   2922   bool DoCast = (Ty == MVT::v2i1 || Ty == MVT::v4i1 || Ty == MVT::v8i1);
   2923   if (DoCast) {
   2924     SDValue TC = DAG.getNode(HexagonISD::TYPECAST, dl, MVT::i1, Val);
   2925     SDValue NS = DAG.getStore(SN->getChain(), dl, TC, SN->getBasePtr(),
   2926                               SN->getMemOperand());
   2927     if (SN->isIndexed()) {
   2928       NS = DAG.getIndexedStore(NS, dl, SN->getBasePtr(), SN->getOffset(),
   2929                                SN->getAddressingMode());
   2930     }
   2931     SN = cast<StoreSDNode>(NS.getNode());
   2932   }
   2933 
   2934   unsigned ClaimAlign = SN->getAlignment();
   2935   SDValue Ptr = SN->getBasePtr();
   2936   validateConstPtrAlignment(Ptr, dl, ClaimAlign);
   2937 
   2938   MVT StoreTy = SN->getMemoryVT().getSimpleVT();
   2939   unsigned NeedAlign = Subtarget.getTypeAlignment(StoreTy);
   2940   if (ClaimAlign < NeedAlign)
   2941     return expandUnalignedStore(SN, DAG);
   2942   return SDValue(SN, 0);
   2943 }
   2944 
   2945 SDValue
   2946 HexagonTargetLowering::LowerUnalignedLoad(SDValue Op, SelectionDAG &DAG)
   2947       const {
   2948   LoadSDNode *LN = cast<LoadSDNode>(Op.getNode());
   2949   MVT LoadTy = ty(Op);
   2950   unsigned NeedAlign = Subtarget.getTypeAlignment(LoadTy);
   2951   unsigned HaveAlign = LN->getAlignment();
   2952   if (HaveAlign >= NeedAlign)
   2953     return Op;
   2954 
   2955   const SDLoc &dl(Op);
   2956   const DataLayout &DL = DAG.getDataLayout();
   2957   LLVMContext &Ctx = *DAG.getContext();
   2958 
   2959   // If the load aligning is disabled or the load can be broken up into two
   2960   // smaller legal loads, do the default (target-independent) expansion.
   2961   bool DoDefault = false;
   2962   // Handle it in the default way if this is an indexed load.
   2963   if (!LN->isUnindexed())
   2964     DoDefault = true;
   2965 
   2966   if (!AlignLoads) {
   2967     if (allowsMemoryAccessForAlignment(Ctx, DL, LN->getMemoryVT(),
   2968                                        *LN->getMemOperand()))
   2969       return Op;
   2970     DoDefault = true;
   2971   }
   2972   if (!DoDefault && (2 * HaveAlign) == NeedAlign) {
   2973     // The PartTy is the equivalent of "getLoadableTypeOfSize(HaveAlign)".
   2974     MVT PartTy = HaveAlign <= 8 ? MVT::getIntegerVT(8 * HaveAlign)
   2975                                 : MVT::getVectorVT(MVT::i8, HaveAlign);
   2976     DoDefault =
   2977         allowsMemoryAccessForAlignment(Ctx, DL, PartTy, *LN->getMemOperand());
   2978   }
   2979   if (DoDefault) {
   2980     std::pair<SDValue, SDValue> P = expandUnalignedLoad(LN, DAG);
   2981     return DAG.getMergeValues({P.first, P.second}, dl);
   2982   }
   2983 
   2984   // The code below generates two loads, both aligned as NeedAlign, and
   2985   // with the distance of NeedAlign between them. For that to cover the
   2986   // bits that need to be loaded (and without overlapping), the size of
   2987   // the loads should be equal to NeedAlign. This is true for all loadable
   2988   // types, but add an assertion in case something changes in the future.
   2989   assert(LoadTy.getSizeInBits() == 8*NeedAlign);
   2990 
   2991   unsigned LoadLen = NeedAlign;
   2992   SDValue Base = LN->getBasePtr();
   2993   SDValue Chain = LN->getChain();
   2994   auto BO = getBaseAndOffset(Base);
   2995   unsigned BaseOpc = BO.first.getOpcode();
   2996   if (BaseOpc == HexagonISD::VALIGNADDR && BO.second % LoadLen == 0)
   2997     return Op;
   2998 
   2999   if (BO.second % LoadLen != 0) {
   3000     BO.first = DAG.getNode(ISD::ADD, dl, MVT::i32, BO.first,
   3001                            DAG.getConstant(BO.second % LoadLen, dl, MVT::i32));
   3002     BO.second -= BO.second % LoadLen;
   3003   }
   3004   SDValue BaseNoOff = (BaseOpc != HexagonISD::VALIGNADDR)
   3005       ? DAG.getNode(HexagonISD::VALIGNADDR, dl, MVT::i32, BO.first,
   3006                     DAG.getConstant(NeedAlign, dl, MVT::i32))
   3007       : BO.first;
   3008   SDValue Base0 =
   3009       DAG.getMemBasePlusOffset(BaseNoOff, TypeSize::Fixed(BO.second), dl);
   3010   SDValue Base1 = DAG.getMemBasePlusOffset(
   3011       BaseNoOff, TypeSize::Fixed(BO.second + LoadLen), dl);
   3012 
   3013   MachineMemOperand *WideMMO = nullptr;
   3014   if (MachineMemOperand *MMO = LN->getMemOperand()) {
   3015     MachineFunction &MF = DAG.getMachineFunction();
   3016     WideMMO = MF.getMachineMemOperand(
   3017         MMO->getPointerInfo(), MMO->getFlags(), 2 * LoadLen, Align(LoadLen),
   3018         MMO->getAAInfo(), MMO->getRanges(), MMO->getSyncScopeID(),
   3019         MMO->getOrdering(), MMO->getFailureOrdering());
   3020   }
   3021 
   3022   SDValue Load0 = DAG.getLoad(LoadTy, dl, Chain, Base0, WideMMO);
   3023   SDValue Load1 = DAG.getLoad(LoadTy, dl, Chain, Base1, WideMMO);
   3024 
   3025   SDValue Aligned = DAG.getNode(HexagonISD::VALIGN, dl, LoadTy,
   3026                                 {Load1, Load0, BaseNoOff.getOperand(0)});
   3027   SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
   3028                                  Load0.getValue(1), Load1.getValue(1));
   3029   SDValue M = DAG.getMergeValues({Aligned, NewChain}, dl);
   3030   return M;
   3031 }
   3032 
   3033 SDValue
   3034 HexagonTargetLowering::LowerUAddSubO(SDValue Op, SelectionDAG &DAG) const {
   3035   SDValue X = Op.getOperand(0), Y = Op.getOperand(1);
   3036   auto *CY = dyn_cast<ConstantSDNode>(Y);
   3037   if (!CY)
   3038     return SDValue();
   3039 
   3040   const SDLoc &dl(Op);
   3041   SDVTList VTs = Op.getNode()->getVTList();
   3042   assert(VTs.NumVTs == 2);
   3043   assert(VTs.VTs[1] == MVT::i1);
   3044   unsigned Opc = Op.getOpcode();
   3045 
   3046   if (CY) {
   3047     uint32_t VY = CY->getZExtValue();
   3048     assert(VY != 0 && "This should have been folded");
   3049     // X +/- 1
   3050     if (VY != 1)
   3051       return SDValue();
   3052 
   3053     if (Opc == ISD::UADDO) {
   3054       SDValue Op = DAG.getNode(ISD::ADD, dl, VTs.VTs[0], {X, Y});
   3055       SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op, getZero(dl, ty(Op), DAG),
   3056                                 ISD::SETEQ);
   3057       return DAG.getMergeValues({Op, Ov}, dl);
   3058     }
   3059     if (Opc == ISD::USUBO) {
   3060       SDValue Op = DAG.getNode(ISD::SUB, dl, VTs.VTs[0], {X, Y});
   3061       SDValue Ov = DAG.getSetCC(dl, MVT::i1, Op,
   3062                                 DAG.getConstant(-1, dl, ty(Op)), ISD::SETEQ);
   3063       return DAG.getMergeValues({Op, Ov}, dl);
   3064     }
   3065   }
   3066 
   3067   return SDValue();
   3068 }
   3069 
   3070 SDValue
   3071 HexagonTargetLowering::LowerAddSubCarry(SDValue Op, SelectionDAG &DAG) const {
   3072   const SDLoc &dl(Op);
   3073   unsigned Opc = Op.getOpcode();
   3074   SDValue X = Op.getOperand(0), Y = Op.getOperand(1), C = Op.getOperand(2);
   3075 
   3076   if (Opc == ISD::ADDCARRY)
   3077     return DAG.getNode(HexagonISD::ADDC, dl, Op.getNode()->getVTList(),
   3078                        { X, Y, C });
   3079 
   3080   EVT CarryTy = C.getValueType();
   3081   SDValue SubC = DAG.getNode(HexagonISD::SUBC, dl, Op.getNode()->getVTList(),
   3082                              { X, Y, DAG.getLogicalNOT(dl, C, CarryTy) });
   3083   SDValue Out[] = { SubC.getValue(0),
   3084                     DAG.getLogicalNOT(dl, SubC.getValue(1), CarryTy) };
   3085   return DAG.getMergeValues(Out, dl);
   3086 }
   3087 
   3088 SDValue
   3089 HexagonTargetLowering::LowerEH_RETURN(SDValue Op, SelectionDAG &DAG) const {
   3090   SDValue Chain     = Op.getOperand(0);
   3091   SDValue Offset    = Op.getOperand(1);
   3092   SDValue Handler   = Op.getOperand(2);
   3093   SDLoc dl(Op);
   3094   auto PtrVT = getPointerTy(DAG.getDataLayout());
   3095 
   3096   // Mark function as containing a call to EH_RETURN.
   3097   HexagonMachineFunctionInfo *FuncInfo =
   3098     DAG.getMachineFunction().getInfo<HexagonMachineFunctionInfo>();
   3099   FuncInfo->setHasEHReturn();
   3100 
   3101   unsigned OffsetReg = Hexagon::R28;
   3102 
   3103   SDValue StoreAddr =
   3104       DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getRegister(Hexagon::R30, PtrVT),
   3105                   DAG.getIntPtrConstant(4, dl));
   3106   Chain = DAG.getStore(Chain, dl, Handler, StoreAddr, MachinePointerInfo());
   3107   Chain = DAG.getCopyToReg(Chain, dl, OffsetReg, Offset);
   3108 
   3109   // Not needed we already use it as explict input to EH_RETURN.
   3110   // MF.getRegInfo().addLiveOut(OffsetReg);
   3111 
   3112   return DAG.getNode(HexagonISD::EH_RETURN, dl, MVT::Other, Chain);
   3113 }
   3114 
   3115 SDValue
   3116 HexagonTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
   3117   unsigned Opc = Op.getOpcode();
   3118 
   3119   // Handle INLINEASM first.
   3120   if (Opc == ISD::INLINEASM || Opc == ISD::INLINEASM_BR)
   3121     return LowerINLINEASM(Op, DAG);
   3122 
   3123   if (isHvxOperation(Op.getNode(), DAG)) {
   3124     // If HVX lowering returns nothing, try the default lowering.
   3125     if (SDValue V = LowerHvxOperation(Op, DAG))
   3126       return V;
   3127   }
   3128 
   3129   switch (Opc) {
   3130     default:
   3131 #ifndef NDEBUG
   3132       Op.getNode()->dumpr(&DAG);
   3133       if (Opc > HexagonISD::OP_BEGIN && Opc < HexagonISD::OP_END)
   3134         errs() << "Error: check for a non-legal type in this operation\n";
   3135 #endif
   3136       llvm_unreachable("Should not custom lower this!");
   3137     case ISD::CONCAT_VECTORS:       return LowerCONCAT_VECTORS(Op, DAG);
   3138     case ISD::INSERT_SUBVECTOR:     return LowerINSERT_SUBVECTOR(Op, DAG);
   3139     case ISD::INSERT_VECTOR_ELT:    return LowerINSERT_VECTOR_ELT(Op, DAG);
   3140     case ISD::EXTRACT_SUBVECTOR:    return LowerEXTRACT_SUBVECTOR(Op, DAG);
   3141     case ISD::EXTRACT_VECTOR_ELT:   return LowerEXTRACT_VECTOR_ELT(Op, DAG);
   3142     case ISD::BUILD_VECTOR:         return LowerBUILD_VECTOR(Op, DAG);
   3143     case ISD::VECTOR_SHUFFLE:       return LowerVECTOR_SHUFFLE(Op, DAG);
   3144     case ISD::BITCAST:              return LowerBITCAST(Op, DAG);
   3145     case ISD::LOAD:                 return LowerLoad(Op, DAG);
   3146     case ISD::STORE:                return LowerStore(Op, DAG);
   3147     case ISD::UADDO:
   3148     case ISD::USUBO:                return LowerUAddSubO(Op, DAG);
   3149     case ISD::ADDCARRY:
   3150     case ISD::SUBCARRY:             return LowerAddSubCarry(Op, DAG);
   3151     case ISD::SRA:
   3152     case ISD::SHL:
   3153     case ISD::SRL:                  return LowerVECTOR_SHIFT(Op, DAG);
   3154     case ISD::ROTL:                 return LowerROTL(Op, DAG);
   3155     case ISD::ConstantPool:         return LowerConstantPool(Op, DAG);
   3156     case ISD::JumpTable:            return LowerJumpTable(Op, DAG);
   3157     case ISD::EH_RETURN:            return LowerEH_RETURN(Op, DAG);
   3158     case ISD::RETURNADDR:           return LowerRETURNADDR(Op, DAG);
   3159     case ISD::FRAMEADDR:            return LowerFRAMEADDR(Op, DAG);
   3160     case ISD::GlobalTLSAddress:     return LowerGlobalTLSAddress(Op, DAG);
   3161     case ISD::ATOMIC_FENCE:         return LowerATOMIC_FENCE(Op, DAG);
   3162     case ISD::GlobalAddress:        return LowerGLOBALADDRESS(Op, DAG);
   3163     case ISD::BlockAddress:         return LowerBlockAddress(Op, DAG);
   3164     case ISD::GLOBAL_OFFSET_TABLE:  return LowerGLOBAL_OFFSET_TABLE(Op, DAG);
   3165     case ISD::VACOPY:               return LowerVACOPY(Op, DAG);
   3166     case ISD::VASTART:              return LowerVASTART(Op, DAG);
   3167     case ISD::DYNAMIC_STACKALLOC:   return LowerDYNAMIC_STACKALLOC(Op, DAG);
   3168     case ISD::SETCC:                return LowerSETCC(Op, DAG);
   3169     case ISD::VSELECT:              return LowerVSELECT(Op, DAG);
   3170     case ISD::INTRINSIC_WO_CHAIN:   return LowerINTRINSIC_WO_CHAIN(Op, DAG);
   3171     case ISD::INTRINSIC_VOID:       return LowerINTRINSIC_VOID(Op, DAG);
   3172     case ISD::PREFETCH:             return LowerPREFETCH(Op, DAG);
   3173     case ISD::READCYCLECOUNTER:     return LowerREADCYCLECOUNTER(Op, DAG);
   3174       break;
   3175   }
   3176 
   3177   return SDValue();
   3178 }
   3179 
   3180 void
   3181 HexagonTargetLowering::LowerOperationWrapper(SDNode *N,
   3182                                              SmallVectorImpl<SDValue> &Results,
   3183                                              SelectionDAG &DAG) const {
   3184   if (isHvxOperation(N, DAG)) {
   3185     LowerHvxOperationWrapper(N, Results, DAG);
   3186     if (!Results.empty())
   3187       return;
   3188   }
   3189 
   3190   // We are only custom-lowering stores to verify the alignment of the
   3191   // address if it is a compile-time constant. Since a store can be modified
   3192   // during type-legalization (the value being stored may need legalization),
   3193   // return empty Results here to indicate that we don't really make any
   3194   // changes in the custom lowering.
   3195   if (N->getOpcode() != ISD::STORE)
   3196     return TargetLowering::LowerOperationWrapper(N, Results, DAG);
   3197 }
   3198 
   3199 void
   3200 HexagonTargetLowering::ReplaceNodeResults(SDNode *N,
   3201                                           SmallVectorImpl<SDValue> &Results,
   3202                                           SelectionDAG &DAG) const {
   3203   if (isHvxOperation(N, DAG)) {
   3204     ReplaceHvxNodeResults(N, Results, DAG);
   3205     if (!Results.empty())
   3206       return;
   3207   }
   3208 
   3209   const SDLoc &dl(N);
   3210   switch (N->getOpcode()) {
   3211     case ISD::SRL:
   3212     case ISD::SRA:
   3213     case ISD::SHL:
   3214       return;
   3215     case ISD::BITCAST:
   3216       // Handle a bitcast from v8i1 to i8.
   3217       if (N->getValueType(0) == MVT::i8) {
   3218         if (N->getOperand(0).getValueType() == MVT::v8i1) {
   3219           SDValue P = getInstr(Hexagon::C2_tfrpr, dl, MVT::i32,
   3220                                N->getOperand(0), DAG);
   3221           SDValue T = DAG.getAnyExtOrTrunc(P, dl, MVT::i8);
   3222           Results.push_back(T);
   3223         }
   3224       }
   3225       break;
   3226   }
   3227 }
   3228 
   3229 SDValue
   3230 HexagonTargetLowering::PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI)
   3231       const {
   3232   if (isHvxOperation(N, DCI.DAG)) {
   3233     if (SDValue V = PerformHvxDAGCombine(N, DCI))
   3234       return V;
   3235     return SDValue();
   3236   }
   3237 
   3238   if (DCI.isBeforeLegalizeOps())
   3239     return SDValue();
   3240 
   3241   SDValue Op(N, 0);
   3242   const SDLoc &dl(Op);
   3243   unsigned Opc = Op.getOpcode();
   3244 
   3245   if (Opc == HexagonISD::P2D) {
   3246     SDValue P = Op.getOperand(0);
   3247     switch (P.getOpcode()) {
   3248       case HexagonISD::PTRUE:
   3249         return DCI.DAG.getConstant(-1, dl, ty(Op));
   3250       case HexagonISD::PFALSE:
   3251         return getZero(dl, ty(Op), DCI.DAG);
   3252       default:
   3253         break;
   3254     }
   3255   } else if (Opc == ISD::VSELECT) {
   3256     // This is pretty much duplicated in HexagonISelLoweringHVX...
   3257     //
   3258     // (vselect (xor x, ptrue), v0, v1) -> (vselect x, v1, v0)
   3259     SDValue Cond = Op.getOperand(0);
   3260     if (Cond->getOpcode() == ISD::XOR) {
   3261       SDValue C0 = Cond.getOperand(0), C1 = Cond.getOperand(1);
   3262       if (C1->getOpcode() == HexagonISD::PTRUE) {
   3263         SDValue VSel = DCI.DAG.getNode(ISD::VSELECT, dl, ty(Op), C0,
   3264                                        Op.getOperand(2), Op.getOperand(1));
   3265         return VSel;
   3266       }
   3267     }
   3268   }
   3269 
   3270   return SDValue();
   3271 }
   3272 
   3273 /// Returns relocation base for the given PIC jumptable.
   3274 SDValue
   3275 HexagonTargetLowering::getPICJumpTableRelocBase(SDValue Table,
   3276                                                 SelectionDAG &DAG) const {
   3277   int Idx = cast<JumpTableSDNode>(Table)->getIndex();
   3278   EVT VT = Table.getValueType();
   3279   SDValue T = DAG.getTargetJumpTable(Idx, VT, HexagonII::MO_PCREL);
   3280   return DAG.getNode(HexagonISD::AT_PCREL, SDLoc(Table), VT, T);
   3281 }
   3282 
   3283 //===----------------------------------------------------------------------===//
   3284 // Inline Assembly Support
   3285 //===----------------------------------------------------------------------===//
   3286 
   3287 TargetLowering::ConstraintType
   3288 HexagonTargetLowering::getConstraintType(StringRef Constraint) const {
   3289   if (Constraint.size() == 1) {
   3290     switch (Constraint[0]) {
   3291       case 'q':
   3292       case 'v':
   3293         if (Subtarget.useHVXOps())
   3294           return C_RegisterClass;
   3295         break;
   3296       case 'a':
   3297         return C_RegisterClass;
   3298       default:
   3299         break;
   3300     }
   3301   }
   3302   return TargetLowering::getConstraintType(Constraint);
   3303 }
   3304 
   3305 std::pair<unsigned, const TargetRegisterClass*>
   3306 HexagonTargetLowering::getRegForInlineAsmConstraint(
   3307     const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const {
   3308 
   3309   if (Constraint.size() == 1) {
   3310     switch (Constraint[0]) {
   3311     case 'r':   // R0-R31
   3312       switch (VT.SimpleTy) {
   3313       default:
   3314         return {0u, nullptr};
   3315       case MVT::i1:
   3316       case MVT::i8:
   3317       case MVT::i16:
   3318       case MVT::i32:
   3319       case MVT::f32:
   3320         return {0u, &Hexagon::IntRegsRegClass};
   3321       case MVT::i64:
   3322       case MVT::f64:
   3323         return {0u, &Hexagon::DoubleRegsRegClass};
   3324       }
   3325       break;
   3326     case 'a': // M0-M1
   3327       if (VT != MVT::i32)
   3328         return {0u, nullptr};
   3329       return {0u, &Hexagon::ModRegsRegClass};
   3330     case 'q': // q0-q3
   3331       switch (VT.getSizeInBits()) {
   3332       default:
   3333         return {0u, nullptr};
   3334       case 64:
   3335       case 128:
   3336         return {0u, &Hexagon::HvxQRRegClass};
   3337       }
   3338       break;
   3339     case 'v': // V0-V31
   3340       switch (VT.getSizeInBits()) {
   3341       default:
   3342         return {0u, nullptr};
   3343       case 512:
   3344         return {0u, &Hexagon::HvxVRRegClass};
   3345       case 1024:
   3346         if (Subtarget.hasV60Ops() && Subtarget.useHVX128BOps())
   3347           return {0u, &Hexagon::HvxVRRegClass};
   3348         return {0u, &Hexagon::HvxWRRegClass};
   3349       case 2048:
   3350         return {0u, &Hexagon::HvxWRRegClass};
   3351       }
   3352       break;
   3353     default:
   3354       return {0u, nullptr};
   3355     }
   3356   }
   3357 
   3358   return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
   3359 }
   3360 
   3361 /// isFPImmLegal - Returns true if the target can instruction select the
   3362 /// specified FP immediate natively. If false, the legalizer will
   3363 /// materialize the FP immediate as a load from a constant pool.
   3364 bool HexagonTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
   3365                                          bool ForCodeSize) const {
   3366   return true;
   3367 }
   3368 
   3369 /// isLegalAddressingMode - Return true if the addressing mode represented by
   3370 /// AM is legal for this target, for a load/store of the specified type.
   3371 bool HexagonTargetLowering::isLegalAddressingMode(const DataLayout &DL,
   3372                                                   const AddrMode &AM, Type *Ty,
   3373                                                   unsigned AS, Instruction *I) const {
   3374   if (Ty->isSized()) {
   3375     // When LSR detects uses of the same base address to access different
   3376     // types (e.g. unions), it will assume a conservative type for these
   3377     // uses:
   3378     //   LSR Use: Kind=Address of void in addrspace(4294967295), ...
   3379     // The type Ty passed here would then be "void". Skip the alignment
   3380     // checks, but do not return false right away, since that confuses
   3381     // LSR into crashing.
   3382     Align A = DL.getABITypeAlign(Ty);
   3383     // The base offset must be a multiple of the alignment.
   3384     if (!isAligned(A, AM.BaseOffs))
   3385       return false;
   3386     // The shifted offset must fit in 11 bits.
   3387     if (!isInt<11>(AM.BaseOffs >> Log2(A)))
   3388       return false;
   3389   }
   3390 
   3391   // No global is ever allowed as a base.
   3392   if (AM.BaseGV)
   3393     return false;
   3394 
   3395   int Scale = AM.Scale;
   3396   if (Scale < 0)
   3397     Scale = -Scale;
   3398   switch (Scale) {
   3399   case 0:  // No scale reg, "r+i", "r", or just "i".
   3400     break;
   3401   default: // No scaled addressing mode.
   3402     return false;
   3403   }
   3404   return true;
   3405 }
   3406 
   3407 /// Return true if folding a constant offset with the given GlobalAddress is
   3408 /// legal.  It is frequently not legal in PIC relocation models.
   3409 bool HexagonTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA)
   3410       const {
   3411   return HTM.getRelocationModel() == Reloc::Static;
   3412 }
   3413 
   3414 /// isLegalICmpImmediate - Return true if the specified immediate is legal
   3415 /// icmp immediate, that is the target has icmp instructions which can compare
   3416 /// a register against the immediate without having to materialize the
   3417 /// immediate into a register.
   3418 bool HexagonTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
   3419   return Imm >= -512 && Imm <= 511;
   3420 }
   3421 
   3422 /// IsEligibleForTailCallOptimization - Check whether the call is eligible
   3423 /// for tail call optimization. Targets which want to do tail call
   3424 /// optimization should implement this function.
   3425 bool HexagonTargetLowering::IsEligibleForTailCallOptimization(
   3426                                  SDValue Callee,
   3427                                  CallingConv::ID CalleeCC,
   3428                                  bool IsVarArg,
   3429                                  bool IsCalleeStructRet,
   3430                                  bool IsCallerStructRet,
   3431                                  const SmallVectorImpl<ISD::OutputArg> &Outs,
   3432                                  const SmallVectorImpl<SDValue> &OutVals,
   3433                                  const SmallVectorImpl<ISD::InputArg> &Ins,
   3434                                  SelectionDAG& DAG) const {
   3435   const Function &CallerF = DAG.getMachineFunction().getFunction();
   3436   CallingConv::ID CallerCC = CallerF.getCallingConv();
   3437   bool CCMatch = CallerCC == CalleeCC;
   3438 
   3439   // ***************************************************************************
   3440   //  Look for obvious safe cases to perform tail call optimization that do not
   3441   //  require ABI changes.
   3442   // ***************************************************************************
   3443 
   3444   // If this is a tail call via a function pointer, then don't do it!
   3445   if (!isa<GlobalAddressSDNode>(Callee) &&
   3446       !isa<ExternalSymbolSDNode>(Callee)) {
   3447     return false;
   3448   }
   3449 
   3450   // Do not optimize if the calling conventions do not match and the conventions
   3451   // used are not C or Fast.
   3452   if (!CCMatch) {
   3453     bool R = (CallerCC == CallingConv::C || CallerCC == CallingConv::Fast);
   3454     bool E = (CalleeCC == CallingConv::C || CalleeCC == CallingConv::Fast);
   3455     // If R & E, then ok.
   3456     if (!R || !E)
   3457       return false;
   3458   }
   3459 
   3460   // Do not tail call optimize vararg calls.
   3461   if (IsVarArg)
   3462     return false;
   3463 
   3464   // Also avoid tail call optimization if either caller or callee uses struct
   3465   // return semantics.
   3466   if (IsCalleeStructRet || IsCallerStructRet)
   3467     return false;
   3468 
   3469   // In addition to the cases above, we also disable Tail Call Optimization if
   3470   // the calling convention code that at least one outgoing argument needs to
   3471   // go on the stack. We cannot check that here because at this point that
   3472   // information is not available.
   3473   return true;
   3474 }
   3475 
   3476 /// Returns the target specific optimal type for load and store operations as
   3477 /// a result of memset, memcpy, and memmove lowering.
   3478 ///
   3479 /// If DstAlign is zero that means it's safe to destination alignment can
   3480 /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't
   3481 /// a need to check it against alignment requirement, probably because the
   3482 /// source does not need to be loaded. If 'IsMemset' is true, that means it's
   3483 /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of
   3484 /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it
   3485 /// does not need to be loaded.  It returns EVT::Other if the type should be
   3486 /// determined using generic target-independent logic.
   3487 EVT HexagonTargetLowering::getOptimalMemOpType(
   3488     const MemOp &Op, const AttributeList &FuncAttributes) const {
   3489   if (Op.size() >= 8 && Op.isAligned(Align(8)))
   3490     return MVT::i64;
   3491   if (Op.size() >= 4 && Op.isAligned(Align(4)))
   3492     return MVT::i32;
   3493   if (Op.size() >= 2 && Op.isAligned(Align(2)))
   3494     return MVT::i16;
   3495   return MVT::Other;
   3496 }
   3497 
   3498 bool HexagonTargetLowering::allowsMemoryAccess(
   3499     LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace,
   3500     Align Alignment, MachineMemOperand::Flags Flags, bool *Fast) const {
   3501   MVT SVT = VT.getSimpleVT();
   3502   if (Subtarget.isHVXVectorType(SVT, true))
   3503     return allowsHvxMemoryAccess(SVT, Flags, Fast);
   3504   return TargetLoweringBase::allowsMemoryAccess(
   3505               Context, DL, VT, AddrSpace, Alignment, Flags, Fast);
   3506 }
   3507 
   3508 bool HexagonTargetLowering::allowsMisalignedMemoryAccesses(
   3509     EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
   3510     bool *Fast) const {
   3511   MVT SVT = VT.getSimpleVT();
   3512   if (Subtarget.isHVXVectorType(SVT, true))
   3513     return allowsHvxMisalignedMemoryAccesses(SVT, Flags, Fast);
   3514   if (Fast)
   3515     *Fast = false;
   3516   return false;
   3517 }
   3518 
   3519 std::pair<const TargetRegisterClass*, uint8_t>
   3520 HexagonTargetLowering::findRepresentativeClass(const TargetRegisterInfo *TRI,
   3521       MVT VT) const {
   3522   if (Subtarget.isHVXVectorType(VT, true)) {
   3523     unsigned BitWidth = VT.getSizeInBits();
   3524     unsigned VecWidth = Subtarget.getVectorLength() * 8;
   3525 
   3526     if (VT.getVectorElementType() == MVT::i1)
   3527       return std::make_pair(&Hexagon::HvxQRRegClass, 1);
   3528     if (BitWidth == VecWidth)
   3529       return std::make_pair(&Hexagon::HvxVRRegClass, 1);
   3530     assert(BitWidth == 2 * VecWidth);
   3531     return std::make_pair(&Hexagon::HvxWRRegClass, 1);
   3532   }
   3533 
   3534   return TargetLowering::findRepresentativeClass(TRI, VT);
   3535 }
   3536 
   3537 bool HexagonTargetLowering::shouldReduceLoadWidth(SDNode *Load,
   3538       ISD::LoadExtType ExtTy, EVT NewVT) const {
   3539   // TODO: This may be worth removing. Check regression tests for diffs.
   3540   if (!TargetLoweringBase::shouldReduceLoadWidth(Load, ExtTy, NewVT))
   3541     return false;
   3542 
   3543   auto *L = cast<LoadSDNode>(Load);
   3544   std::pair<SDValue,int> BO = getBaseAndOffset(L->getBasePtr());
   3545   // Small-data object, do not shrink.
   3546   if (BO.first.getOpcode() == HexagonISD::CONST32_GP)
   3547     return false;
   3548   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(BO.first)) {
   3549     auto &HTM = static_cast<const HexagonTargetMachine&>(getTargetMachine());
   3550     const auto *GO = dyn_cast_or_null<const GlobalObject>(GA->getGlobal());
   3551     return !GO || !HTM.getObjFileLowering()->isGlobalInSmallSection(GO, HTM);
   3552   }
   3553   return true;
   3554 }
   3555 
   3556 Value *HexagonTargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
   3557       AtomicOrdering Ord) const {
   3558   BasicBlock *BB = Builder.GetInsertBlock();
   3559   Module *M = BB->getParent()->getParent();
   3560   auto PT = cast<PointerType>(Addr->getType());
   3561   Type *Ty = PT->getElementType();
   3562   unsigned SZ = Ty->getPrimitiveSizeInBits();
   3563   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic loads supported");
   3564   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_L2_loadw_locked
   3565                                    : Intrinsic::hexagon_L4_loadd_locked;
   3566   Function *Fn = Intrinsic::getDeclaration(M, IntID);
   3567 
   3568   PointerType *NewPtrTy
   3569     = Builder.getIntNTy(SZ)->getPointerTo(PT->getAddressSpace());
   3570   Addr = Builder.CreateBitCast(Addr, NewPtrTy);
   3571 
   3572   Value *Call = Builder.CreateCall(Fn, Addr, "larx");
   3573 
   3574   return Builder.CreateBitCast(Call, Ty);
   3575 }
   3576 
   3577 /// Perform a store-conditional operation to Addr. Return the status of the
   3578 /// store. This should be 0 if the store succeeded, non-zero otherwise.
   3579 Value *HexagonTargetLowering::emitStoreConditional(IRBuilder<> &Builder,
   3580       Value *Val, Value *Addr, AtomicOrdering Ord) const {
   3581   BasicBlock *BB = Builder.GetInsertBlock();
   3582   Module *M = BB->getParent()->getParent();
   3583   Type *Ty = Val->getType();
   3584   unsigned SZ = Ty->getPrimitiveSizeInBits();
   3585 
   3586   Type *CastTy = Builder.getIntNTy(SZ);
   3587   assert((SZ == 32 || SZ == 64) && "Only 32/64-bit atomic stores supported");
   3588   Intrinsic::ID IntID = (SZ == 32) ? Intrinsic::hexagon_S2_storew_locked
   3589                                    : Intrinsic::hexagon_S4_stored_locked;
   3590   Function *Fn = Intrinsic::getDeclaration(M, IntID);
   3591 
   3592   unsigned AS = Addr->getType()->getPointerAddressSpace();
   3593   Addr = Builder.CreateBitCast(Addr, CastTy->getPointerTo(AS));
   3594   Val = Builder.CreateBitCast(Val, CastTy);
   3595 
   3596   Value *Call = Builder.CreateCall(Fn, {Addr, Val}, "stcx");
   3597   Value *Cmp = Builder.CreateICmpEQ(Call, Builder.getInt32(0), "");
   3598   Value *Ext = Builder.CreateZExt(Cmp, Type::getInt32Ty(M->getContext()));
   3599   return Ext;
   3600 }
   3601 
   3602 TargetLowering::AtomicExpansionKind
   3603 HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
   3604   // Do not expand loads and stores that don't exceed 64 bits.
   3605   return LI->getType()->getPrimitiveSizeInBits() > 64
   3606              ? AtomicExpansionKind::LLOnly
   3607              : AtomicExpansionKind::None;
   3608 }
   3609 
   3610 bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
   3611   // Do not expand loads and stores that don't exceed 64 bits.
   3612   return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
   3613 }
   3614 
   3615 TargetLowering::AtomicExpansionKind
   3616 HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
   3617     AtomicCmpXchgInst *AI) const {
   3618   return AtomicExpansionKind::LLSC;
   3619 }
   3620