Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //===- AMDGPInstCombineIntrinsic.cpp - AMDGPU specific InstCombine pass ---===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 //
      9 // \file
     10 // This file implements a TargetTransformInfo analysis pass specific to the
     11 // AMDGPU target machine. It uses the target's detailed information to provide
     12 // more precise answers to certain TTI queries, while letting the target
     13 // independent and default TTI implementations handle the rest.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "AMDGPUInstrInfo.h"
     18 #include "AMDGPUTargetTransformInfo.h"
     19 #include "GCNSubtarget.h"
     20 #include "R600Subtarget.h"
     21 #include "llvm/IR/IntrinsicsAMDGPU.h"
     22 #include "llvm/Transforms/InstCombine/InstCombiner.h"
     23 
     24 using namespace llvm;
     25 
     26 #define DEBUG_TYPE "AMDGPUtti"
     27 
     28 namespace {
     29 
     30 struct AMDGPUImageDMaskIntrinsic {
     31   unsigned Intr;
     32 };
     33 
     34 #define GET_AMDGPUImageDMaskIntrinsicTable_IMPL
     35 #include "InstCombineTables.inc"
     36 
     37 } // end anonymous namespace
     38 
     39 // Constant fold llvm.amdgcn.fmed3 intrinsics for standard inputs.
     40 //
     41 // A single NaN input is folded to minnum, so we rely on that folding for
     42 // handling NaNs.
     43 static APFloat fmed3AMDGCN(const APFloat &Src0, const APFloat &Src1,
     44                            const APFloat &Src2) {
     45   APFloat Max3 = maxnum(maxnum(Src0, Src1), Src2);
     46 
     47   APFloat::cmpResult Cmp0 = Max3.compare(Src0);
     48   assert(Cmp0 != APFloat::cmpUnordered && "nans handled separately");
     49   if (Cmp0 == APFloat::cmpEqual)
     50     return maxnum(Src1, Src2);
     51 
     52   APFloat::cmpResult Cmp1 = Max3.compare(Src1);
     53   assert(Cmp1 != APFloat::cmpUnordered && "nans handled separately");
     54   if (Cmp1 == APFloat::cmpEqual)
     55     return maxnum(Src0, Src2);
     56 
     57   return maxnum(Src0, Src1);
     58 }
     59 
     60 // Check if a value can be converted to a 16-bit value without losing
     61 // precision.
     62 static bool canSafelyConvertTo16Bit(Value &V) {
     63   Type *VTy = V.getType();
     64   if (VTy->isHalfTy() || VTy->isIntegerTy(16)) {
     65     // The value is already 16-bit, so we don't want to convert to 16-bit again!
     66     return false;
     67   }
     68   if (ConstantFP *ConstFloat = dyn_cast<ConstantFP>(&V)) {
     69     // We need to check that if we cast the index down to a half, we do not lose
     70     // precision.
     71     APFloat FloatValue(ConstFloat->getValueAPF());
     72     bool LosesInfo = true;
     73     FloatValue.convert(APFloat::IEEEhalf(), APFloat::rmTowardZero, &LosesInfo);
     74     return !LosesInfo;
     75   }
     76   Value *CastSrc;
     77   if (match(&V, m_FPExt(PatternMatch::m_Value(CastSrc))) ||
     78       match(&V, m_SExt(PatternMatch::m_Value(CastSrc))) ||
     79       match(&V, m_ZExt(PatternMatch::m_Value(CastSrc)))) {
     80     Type *CastSrcTy = CastSrc->getType();
     81     if (CastSrcTy->isHalfTy() || CastSrcTy->isIntegerTy(16))
     82       return true;
     83   }
     84 
     85   return false;
     86 }
     87 
     88 // Convert a value to 16-bit.
     89 static Value *convertTo16Bit(Value &V, InstCombiner::BuilderTy &Builder) {
     90   Type *VTy = V.getType();
     91   if (isa<FPExtInst>(&V) || isa<SExtInst>(&V) || isa<ZExtInst>(&V))
     92     return cast<Instruction>(&V)->getOperand(0);
     93   if (VTy->isIntegerTy())
     94     return Builder.CreateIntCast(&V, Type::getInt16Ty(V.getContext()), false);
     95   if (VTy->isFloatingPointTy())
     96     return Builder.CreateFPCast(&V, Type::getHalfTy(V.getContext()));
     97 
     98   llvm_unreachable("Should never be called!");
     99 }
    100 
    101 static Optional<Instruction *>
    102 simplifyAMDGCNImageIntrinsic(const GCNSubtarget *ST,
    103                              const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr,
    104                              IntrinsicInst &II, InstCombiner &IC) {
    105   if (!ST->hasA16() && !ST->hasG16())
    106     return None;
    107 
    108   bool FloatCoord = false;
    109   // true means derivatives can be converted to 16 bit, coordinates not
    110   bool OnlyDerivatives = false;
    111 
    112   for (unsigned OperandIndex = ImageDimIntr->GradientStart;
    113        OperandIndex < ImageDimIntr->VAddrEnd; OperandIndex++) {
    114     Value *Coord = II.getOperand(OperandIndex);
    115     // If the values are not derived from 16-bit values, we cannot optimize.
    116     if (!canSafelyConvertTo16Bit(*Coord)) {
    117       if (OperandIndex < ImageDimIntr->CoordStart ||
    118           ImageDimIntr->GradientStart == ImageDimIntr->CoordStart) {
    119         return None;
    120       }
    121       // All gradients can be converted, so convert only them
    122       OnlyDerivatives = true;
    123       break;
    124     }
    125 
    126     assert(OperandIndex == ImageDimIntr->GradientStart ||
    127            FloatCoord == Coord->getType()->isFloatingPointTy());
    128     FloatCoord = Coord->getType()->isFloatingPointTy();
    129   }
    130 
    131   if (OnlyDerivatives) {
    132     if (!ST->hasG16())
    133       return None;
    134   } else {
    135     if (!ST->hasA16())
    136       OnlyDerivatives = true; // Only supports G16
    137   }
    138 
    139   Type *CoordType = FloatCoord ? Type::getHalfTy(II.getContext())
    140                                : Type::getInt16Ty(II.getContext());
    141 
    142   SmallVector<Type *, 4> ArgTys;
    143   if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), ArgTys))
    144     return None;
    145 
    146   ArgTys[ImageDimIntr->GradientTyArg] = CoordType;
    147   if (!OnlyDerivatives)
    148     ArgTys[ImageDimIntr->CoordTyArg] = CoordType;
    149   Function *I =
    150       Intrinsic::getDeclaration(II.getModule(), II.getIntrinsicID(), ArgTys);
    151 
    152   SmallVector<Value *, 8> Args(II.arg_operands());
    153 
    154   unsigned EndIndex =
    155       OnlyDerivatives ? ImageDimIntr->CoordStart : ImageDimIntr->VAddrEnd;
    156   for (unsigned OperandIndex = ImageDimIntr->GradientStart;
    157        OperandIndex < EndIndex; OperandIndex++) {
    158     Args[OperandIndex] =
    159         convertTo16Bit(*II.getOperand(OperandIndex), IC.Builder);
    160   }
    161 
    162   CallInst *NewCall = IC.Builder.CreateCall(I, Args);
    163   NewCall->takeName(&II);
    164   NewCall->copyMetadata(II);
    165   if (isa<FPMathOperator>(NewCall))
    166     NewCall->copyFastMathFlags(&II);
    167   return IC.replaceInstUsesWith(II, NewCall);
    168 }
    169 
    170 bool GCNTTIImpl::canSimplifyLegacyMulToMul(const Value *Op0, const Value *Op1,
    171                                            InstCombiner &IC) const {
    172   // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
    173   // infinity, gives +0.0. If we can prove we don't have one of the special
    174   // cases then we can use a normal multiply instead.
    175   // TODO: Create and use isKnownFiniteNonZero instead of just matching
    176   // constants here.
    177   if (match(Op0, PatternMatch::m_FiniteNonZero()) ||
    178       match(Op1, PatternMatch::m_FiniteNonZero())) {
    179     // One operand is not zero or infinity or NaN.
    180     return true;
    181   }
    182   auto *TLI = &IC.getTargetLibraryInfo();
    183   if (isKnownNeverInfinity(Op0, TLI) && isKnownNeverNaN(Op0, TLI) &&
    184       isKnownNeverInfinity(Op1, TLI) && isKnownNeverNaN(Op1, TLI)) {
    185     // Neither operand is infinity or NaN.
    186     return true;
    187   }
    188   return false;
    189 }
    190 
    191 Optional<Instruction *>
    192 GCNTTIImpl::instCombineIntrinsic(InstCombiner &IC, IntrinsicInst &II) const {
    193   Intrinsic::ID IID = II.getIntrinsicID();
    194   switch (IID) {
    195   case Intrinsic::amdgcn_rcp: {
    196     Value *Src = II.getArgOperand(0);
    197 
    198     // TODO: Move to ConstantFolding/InstSimplify?
    199     if (isa<UndefValue>(Src)) {
    200       Type *Ty = II.getType();
    201       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
    202       return IC.replaceInstUsesWith(II, QNaN);
    203     }
    204 
    205     if (II.isStrictFP())
    206       break;
    207 
    208     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
    209       const APFloat &ArgVal = C->getValueAPF();
    210       APFloat Val(ArgVal.getSemantics(), 1);
    211       Val.divide(ArgVal, APFloat::rmNearestTiesToEven);
    212 
    213       // This is more precise than the instruction may give.
    214       //
    215       // TODO: The instruction always flushes denormal results (except for f16),
    216       // should this also?
    217       return IC.replaceInstUsesWith(II, ConstantFP::get(II.getContext(), Val));
    218     }
    219 
    220     break;
    221   }
    222   case Intrinsic::amdgcn_rsq: {
    223     Value *Src = II.getArgOperand(0);
    224 
    225     // TODO: Move to ConstantFolding/InstSimplify?
    226     if (isa<UndefValue>(Src)) {
    227       Type *Ty = II.getType();
    228       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
    229       return IC.replaceInstUsesWith(II, QNaN);
    230     }
    231 
    232     break;
    233   }
    234   case Intrinsic::amdgcn_frexp_mant:
    235   case Intrinsic::amdgcn_frexp_exp: {
    236     Value *Src = II.getArgOperand(0);
    237     if (const ConstantFP *C = dyn_cast<ConstantFP>(Src)) {
    238       int Exp;
    239       APFloat Significand =
    240           frexp(C->getValueAPF(), Exp, APFloat::rmNearestTiesToEven);
    241 
    242       if (IID == Intrinsic::amdgcn_frexp_mant) {
    243         return IC.replaceInstUsesWith(
    244             II, ConstantFP::get(II.getContext(), Significand));
    245       }
    246 
    247       // Match instruction special case behavior.
    248       if (Exp == APFloat::IEK_NaN || Exp == APFloat::IEK_Inf)
    249         Exp = 0;
    250 
    251       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Exp));
    252     }
    253 
    254     if (isa<UndefValue>(Src)) {
    255       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
    256     }
    257 
    258     break;
    259   }
    260   case Intrinsic::amdgcn_class: {
    261     enum {
    262       S_NAN = 1 << 0,       // Signaling NaN
    263       Q_NAN = 1 << 1,       // Quiet NaN
    264       N_INFINITY = 1 << 2,  // Negative infinity
    265       N_NORMAL = 1 << 3,    // Negative normal
    266       N_SUBNORMAL = 1 << 4, // Negative subnormal
    267       N_ZERO = 1 << 5,      // Negative zero
    268       P_ZERO = 1 << 6,      // Positive zero
    269       P_SUBNORMAL = 1 << 7, // Positive subnormal
    270       P_NORMAL = 1 << 8,    // Positive normal
    271       P_INFINITY = 1 << 9   // Positive infinity
    272     };
    273 
    274     const uint32_t FullMask = S_NAN | Q_NAN | N_INFINITY | N_NORMAL |
    275                               N_SUBNORMAL | N_ZERO | P_ZERO | P_SUBNORMAL |
    276                               P_NORMAL | P_INFINITY;
    277 
    278     Value *Src0 = II.getArgOperand(0);
    279     Value *Src1 = II.getArgOperand(1);
    280     const ConstantInt *CMask = dyn_cast<ConstantInt>(Src1);
    281     if (!CMask) {
    282       if (isa<UndefValue>(Src0)) {
    283         return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
    284       }
    285 
    286       if (isa<UndefValue>(Src1)) {
    287         return IC.replaceInstUsesWith(II,
    288                                       ConstantInt::get(II.getType(), false));
    289       }
    290       break;
    291     }
    292 
    293     uint32_t Mask = CMask->getZExtValue();
    294 
    295     // If all tests are made, it doesn't matter what the value is.
    296     if ((Mask & FullMask) == FullMask) {
    297       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), true));
    298     }
    299 
    300     if ((Mask & FullMask) == 0) {
    301       return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), false));
    302     }
    303 
    304     if (Mask == (S_NAN | Q_NAN)) {
    305       // Equivalent of isnan. Replace with standard fcmp.
    306       Value *FCmp = IC.Builder.CreateFCmpUNO(Src0, Src0);
    307       FCmp->takeName(&II);
    308       return IC.replaceInstUsesWith(II, FCmp);
    309     }
    310 
    311     if (Mask == (N_ZERO | P_ZERO)) {
    312       // Equivalent of == 0.
    313       Value *FCmp =
    314           IC.Builder.CreateFCmpOEQ(Src0, ConstantFP::get(Src0->getType(), 0.0));
    315 
    316       FCmp->takeName(&II);
    317       return IC.replaceInstUsesWith(II, FCmp);
    318     }
    319 
    320     // fp_class (nnan x), qnan|snan|other -> fp_class (nnan x), other
    321     if (((Mask & S_NAN) || (Mask & Q_NAN)) &&
    322         isKnownNeverNaN(Src0, &IC.getTargetLibraryInfo())) {
    323       return IC.replaceOperand(
    324           II, 1, ConstantInt::get(Src1->getType(), Mask & ~(S_NAN | Q_NAN)));
    325     }
    326 
    327     const ConstantFP *CVal = dyn_cast<ConstantFP>(Src0);
    328     if (!CVal) {
    329       if (isa<UndefValue>(Src0)) {
    330         return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
    331       }
    332 
    333       // Clamp mask to used bits
    334       if ((Mask & FullMask) != Mask) {
    335         CallInst *NewCall = IC.Builder.CreateCall(
    336             II.getCalledFunction(),
    337             {Src0, ConstantInt::get(Src1->getType(), Mask & FullMask)});
    338 
    339         NewCall->takeName(&II);
    340         return IC.replaceInstUsesWith(II, NewCall);
    341       }
    342 
    343       break;
    344     }
    345 
    346     const APFloat &Val = CVal->getValueAPF();
    347 
    348     bool Result =
    349         ((Mask & S_NAN) && Val.isNaN() && Val.isSignaling()) ||
    350         ((Mask & Q_NAN) && Val.isNaN() && !Val.isSignaling()) ||
    351         ((Mask & N_INFINITY) && Val.isInfinity() && Val.isNegative()) ||
    352         ((Mask & N_NORMAL) && Val.isNormal() && Val.isNegative()) ||
    353         ((Mask & N_SUBNORMAL) && Val.isDenormal() && Val.isNegative()) ||
    354         ((Mask & N_ZERO) && Val.isZero() && Val.isNegative()) ||
    355         ((Mask & P_ZERO) && Val.isZero() && !Val.isNegative()) ||
    356         ((Mask & P_SUBNORMAL) && Val.isDenormal() && !Val.isNegative()) ||
    357         ((Mask & P_NORMAL) && Val.isNormal() && !Val.isNegative()) ||
    358         ((Mask & P_INFINITY) && Val.isInfinity() && !Val.isNegative());
    359 
    360     return IC.replaceInstUsesWith(II, ConstantInt::get(II.getType(), Result));
    361   }
    362   case Intrinsic::amdgcn_cvt_pkrtz: {
    363     Value *Src0 = II.getArgOperand(0);
    364     Value *Src1 = II.getArgOperand(1);
    365     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
    366       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
    367         const fltSemantics &HalfSem =
    368             II.getType()->getScalarType()->getFltSemantics();
    369         bool LosesInfo;
    370         APFloat Val0 = C0->getValueAPF();
    371         APFloat Val1 = C1->getValueAPF();
    372         Val0.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
    373         Val1.convert(HalfSem, APFloat::rmTowardZero, &LosesInfo);
    374 
    375         Constant *Folded =
    376             ConstantVector::get({ConstantFP::get(II.getContext(), Val0),
    377                                  ConstantFP::get(II.getContext(), Val1)});
    378         return IC.replaceInstUsesWith(II, Folded);
    379       }
    380     }
    381 
    382     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
    383       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
    384     }
    385 
    386     break;
    387   }
    388   case Intrinsic::amdgcn_cvt_pknorm_i16:
    389   case Intrinsic::amdgcn_cvt_pknorm_u16:
    390   case Intrinsic::amdgcn_cvt_pk_i16:
    391   case Intrinsic::amdgcn_cvt_pk_u16: {
    392     Value *Src0 = II.getArgOperand(0);
    393     Value *Src1 = II.getArgOperand(1);
    394 
    395     if (isa<UndefValue>(Src0) && isa<UndefValue>(Src1)) {
    396       return IC.replaceInstUsesWith(II, UndefValue::get(II.getType()));
    397     }
    398 
    399     break;
    400   }
    401   case Intrinsic::amdgcn_ubfe:
    402   case Intrinsic::amdgcn_sbfe: {
    403     // Decompose simple cases into standard shifts.
    404     Value *Src = II.getArgOperand(0);
    405     if (isa<UndefValue>(Src)) {
    406       return IC.replaceInstUsesWith(II, Src);
    407     }
    408 
    409     unsigned Width;
    410     Type *Ty = II.getType();
    411     unsigned IntSize = Ty->getIntegerBitWidth();
    412 
    413     ConstantInt *CWidth = dyn_cast<ConstantInt>(II.getArgOperand(2));
    414     if (CWidth) {
    415       Width = CWidth->getZExtValue();
    416       if ((Width & (IntSize - 1)) == 0) {
    417         return IC.replaceInstUsesWith(II, ConstantInt::getNullValue(Ty));
    418       }
    419 
    420       // Hardware ignores high bits, so remove those.
    421       if (Width >= IntSize) {
    422         return IC.replaceOperand(
    423             II, 2, ConstantInt::get(CWidth->getType(), Width & (IntSize - 1)));
    424       }
    425     }
    426 
    427     unsigned Offset;
    428     ConstantInt *COffset = dyn_cast<ConstantInt>(II.getArgOperand(1));
    429     if (COffset) {
    430       Offset = COffset->getZExtValue();
    431       if (Offset >= IntSize) {
    432         return IC.replaceOperand(
    433             II, 1,
    434             ConstantInt::get(COffset->getType(), Offset & (IntSize - 1)));
    435       }
    436     }
    437 
    438     bool Signed = IID == Intrinsic::amdgcn_sbfe;
    439 
    440     if (!CWidth || !COffset)
    441       break;
    442 
    443     // The case of Width == 0 is handled above, which makes this tranformation
    444     // safe.  If Width == 0, then the ashr and lshr instructions become poison
    445     // value since the shift amount would be equal to the bit size.
    446     assert(Width != 0);
    447 
    448     // TODO: This allows folding to undef when the hardware has specific
    449     // behavior?
    450     if (Offset + Width < IntSize) {
    451       Value *Shl = IC.Builder.CreateShl(Src, IntSize - Offset - Width);
    452       Value *RightShift = Signed ? IC.Builder.CreateAShr(Shl, IntSize - Width)
    453                                  : IC.Builder.CreateLShr(Shl, IntSize - Width);
    454       RightShift->takeName(&II);
    455       return IC.replaceInstUsesWith(II, RightShift);
    456     }
    457 
    458     Value *RightShift = Signed ? IC.Builder.CreateAShr(Src, Offset)
    459                                : IC.Builder.CreateLShr(Src, Offset);
    460 
    461     RightShift->takeName(&II);
    462     return IC.replaceInstUsesWith(II, RightShift);
    463   }
    464   case Intrinsic::amdgcn_exp:
    465   case Intrinsic::amdgcn_exp_compr: {
    466     ConstantInt *En = cast<ConstantInt>(II.getArgOperand(1));
    467     unsigned EnBits = En->getZExtValue();
    468     if (EnBits == 0xf)
    469       break; // All inputs enabled.
    470 
    471     bool IsCompr = IID == Intrinsic::amdgcn_exp_compr;
    472     bool Changed = false;
    473     for (int I = 0; I < (IsCompr ? 2 : 4); ++I) {
    474       if ((!IsCompr && (EnBits & (1 << I)) == 0) ||
    475           (IsCompr && ((EnBits & (0x3 << (2 * I))) == 0))) {
    476         Value *Src = II.getArgOperand(I + 2);
    477         if (!isa<UndefValue>(Src)) {
    478           IC.replaceOperand(II, I + 2, UndefValue::get(Src->getType()));
    479           Changed = true;
    480         }
    481       }
    482     }
    483 
    484     if (Changed) {
    485       return &II;
    486     }
    487 
    488     break;
    489   }
    490   case Intrinsic::amdgcn_fmed3: {
    491     // Note this does not preserve proper sNaN behavior if IEEE-mode is enabled
    492     // for the shader.
    493 
    494     Value *Src0 = II.getArgOperand(0);
    495     Value *Src1 = II.getArgOperand(1);
    496     Value *Src2 = II.getArgOperand(2);
    497 
    498     // Checking for NaN before canonicalization provides better fidelity when
    499     // mapping other operations onto fmed3 since the order of operands is
    500     // unchanged.
    501     CallInst *NewCall = nullptr;
    502     if (match(Src0, PatternMatch::m_NaN()) || isa<UndefValue>(Src0)) {
    503       NewCall = IC.Builder.CreateMinNum(Src1, Src2);
    504     } else if (match(Src1, PatternMatch::m_NaN()) || isa<UndefValue>(Src1)) {
    505       NewCall = IC.Builder.CreateMinNum(Src0, Src2);
    506     } else if (match(Src2, PatternMatch::m_NaN()) || isa<UndefValue>(Src2)) {
    507       NewCall = IC.Builder.CreateMaxNum(Src0, Src1);
    508     }
    509 
    510     if (NewCall) {
    511       NewCall->copyFastMathFlags(&II);
    512       NewCall->takeName(&II);
    513       return IC.replaceInstUsesWith(II, NewCall);
    514     }
    515 
    516     bool Swap = false;
    517     // Canonicalize constants to RHS operands.
    518     //
    519     // fmed3(c0, x, c1) -> fmed3(x, c0, c1)
    520     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
    521       std::swap(Src0, Src1);
    522       Swap = true;
    523     }
    524 
    525     if (isa<Constant>(Src1) && !isa<Constant>(Src2)) {
    526       std::swap(Src1, Src2);
    527       Swap = true;
    528     }
    529 
    530     if (isa<Constant>(Src0) && !isa<Constant>(Src1)) {
    531       std::swap(Src0, Src1);
    532       Swap = true;
    533     }
    534 
    535     if (Swap) {
    536       II.setArgOperand(0, Src0);
    537       II.setArgOperand(1, Src1);
    538       II.setArgOperand(2, Src2);
    539       return &II;
    540     }
    541 
    542     if (const ConstantFP *C0 = dyn_cast<ConstantFP>(Src0)) {
    543       if (const ConstantFP *C1 = dyn_cast<ConstantFP>(Src1)) {
    544         if (const ConstantFP *C2 = dyn_cast<ConstantFP>(Src2)) {
    545           APFloat Result = fmed3AMDGCN(C0->getValueAPF(), C1->getValueAPF(),
    546                                        C2->getValueAPF());
    547           return IC.replaceInstUsesWith(
    548               II, ConstantFP::get(IC.Builder.getContext(), Result));
    549         }
    550       }
    551     }
    552 
    553     break;
    554   }
    555   case Intrinsic::amdgcn_icmp:
    556   case Intrinsic::amdgcn_fcmp: {
    557     const ConstantInt *CC = cast<ConstantInt>(II.getArgOperand(2));
    558     // Guard against invalid arguments.
    559     int64_t CCVal = CC->getZExtValue();
    560     bool IsInteger = IID == Intrinsic::amdgcn_icmp;
    561     if ((IsInteger && (CCVal < CmpInst::FIRST_ICMP_PREDICATE ||
    562                        CCVal > CmpInst::LAST_ICMP_PREDICATE)) ||
    563         (!IsInteger && (CCVal < CmpInst::FIRST_FCMP_PREDICATE ||
    564                         CCVal > CmpInst::LAST_FCMP_PREDICATE)))
    565       break;
    566 
    567     Value *Src0 = II.getArgOperand(0);
    568     Value *Src1 = II.getArgOperand(1);
    569 
    570     if (auto *CSrc0 = dyn_cast<Constant>(Src0)) {
    571       if (auto *CSrc1 = dyn_cast<Constant>(Src1)) {
    572         Constant *CCmp = ConstantExpr::getCompare(CCVal, CSrc0, CSrc1);
    573         if (CCmp->isNullValue()) {
    574           return IC.replaceInstUsesWith(
    575               II, ConstantExpr::getSExt(CCmp, II.getType()));
    576         }
    577 
    578         // The result of V_ICMP/V_FCMP assembly instructions (which this
    579         // intrinsic exposes) is one bit per thread, masked with the EXEC
    580         // register (which contains the bitmask of live threads). So a
    581         // comparison that always returns true is the same as a read of the
    582         // EXEC register.
    583         Function *NewF = Intrinsic::getDeclaration(
    584             II.getModule(), Intrinsic::read_register, II.getType());
    585         Metadata *MDArgs[] = {MDString::get(II.getContext(), "exec")};
    586         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
    587         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
    588         CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
    589         NewCall->addAttribute(AttributeList::FunctionIndex,
    590                               Attribute::Convergent);
    591         NewCall->takeName(&II);
    592         return IC.replaceInstUsesWith(II, NewCall);
    593       }
    594 
    595       // Canonicalize constants to RHS.
    596       CmpInst::Predicate SwapPred =
    597           CmpInst::getSwappedPredicate(static_cast<CmpInst::Predicate>(CCVal));
    598       II.setArgOperand(0, Src1);
    599       II.setArgOperand(1, Src0);
    600       II.setArgOperand(
    601           2, ConstantInt::get(CC->getType(), static_cast<int>(SwapPred)));
    602       return &II;
    603     }
    604 
    605     if (CCVal != CmpInst::ICMP_EQ && CCVal != CmpInst::ICMP_NE)
    606       break;
    607 
    608     // Canonicalize compare eq with true value to compare != 0
    609     // llvm.amdgcn.icmp(zext (i1 x), 1, eq)
    610     //   -> llvm.amdgcn.icmp(zext (i1 x), 0, ne)
    611     // llvm.amdgcn.icmp(sext (i1 x), -1, eq)
    612     //   -> llvm.amdgcn.icmp(sext (i1 x), 0, ne)
    613     Value *ExtSrc;
    614     if (CCVal == CmpInst::ICMP_EQ &&
    615         ((match(Src1, PatternMatch::m_One()) &&
    616           match(Src0, m_ZExt(PatternMatch::m_Value(ExtSrc)))) ||
    617          (match(Src1, PatternMatch::m_AllOnes()) &&
    618           match(Src0, m_SExt(PatternMatch::m_Value(ExtSrc))))) &&
    619         ExtSrc->getType()->isIntegerTy(1)) {
    620       IC.replaceOperand(II, 1, ConstantInt::getNullValue(Src1->getType()));
    621       IC.replaceOperand(II, 2,
    622                         ConstantInt::get(CC->getType(), CmpInst::ICMP_NE));
    623       return &II;
    624     }
    625 
    626     CmpInst::Predicate SrcPred;
    627     Value *SrcLHS;
    628     Value *SrcRHS;
    629 
    630     // Fold compare eq/ne with 0 from a compare result as the predicate to the
    631     // intrinsic. The typical use is a wave vote function in the library, which
    632     // will be fed from a user code condition compared with 0. Fold in the
    633     // redundant compare.
    634 
    635     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, ne)
    636     //   -> llvm.amdgcn.[if]cmp(a, b, pred)
    637     //
    638     // llvm.amdgcn.icmp([sz]ext ([if]cmp pred a, b), 0, eq)
    639     //   -> llvm.amdgcn.[if]cmp(a, b, inv pred)
    640     if (match(Src1, PatternMatch::m_Zero()) &&
    641         match(Src0, PatternMatch::m_ZExtOrSExt(
    642                         m_Cmp(SrcPred, PatternMatch::m_Value(SrcLHS),
    643                               PatternMatch::m_Value(SrcRHS))))) {
    644       if (CCVal == CmpInst::ICMP_EQ)
    645         SrcPred = CmpInst::getInversePredicate(SrcPred);
    646 
    647       Intrinsic::ID NewIID = CmpInst::isFPPredicate(SrcPred)
    648                                  ? Intrinsic::amdgcn_fcmp
    649                                  : Intrinsic::amdgcn_icmp;
    650 
    651       Type *Ty = SrcLHS->getType();
    652       if (auto *CmpType = dyn_cast<IntegerType>(Ty)) {
    653         // Promote to next legal integer type.
    654         unsigned Width = CmpType->getBitWidth();
    655         unsigned NewWidth = Width;
    656 
    657         // Don't do anything for i1 comparisons.
    658         if (Width == 1)
    659           break;
    660 
    661         if (Width <= 16)
    662           NewWidth = 16;
    663         else if (Width <= 32)
    664           NewWidth = 32;
    665         else if (Width <= 64)
    666           NewWidth = 64;
    667         else if (Width > 64)
    668           break; // Can't handle this.
    669 
    670         if (Width != NewWidth) {
    671           IntegerType *CmpTy = IC.Builder.getIntNTy(NewWidth);
    672           if (CmpInst::isSigned(SrcPred)) {
    673             SrcLHS = IC.Builder.CreateSExt(SrcLHS, CmpTy);
    674             SrcRHS = IC.Builder.CreateSExt(SrcRHS, CmpTy);
    675           } else {
    676             SrcLHS = IC.Builder.CreateZExt(SrcLHS, CmpTy);
    677             SrcRHS = IC.Builder.CreateZExt(SrcRHS, CmpTy);
    678           }
    679         }
    680       } else if (!Ty->isFloatTy() && !Ty->isDoubleTy() && !Ty->isHalfTy())
    681         break;
    682 
    683       Function *NewF = Intrinsic::getDeclaration(
    684           II.getModule(), NewIID, {II.getType(), SrcLHS->getType()});
    685       Value *Args[] = {SrcLHS, SrcRHS,
    686                        ConstantInt::get(CC->getType(), SrcPred)};
    687       CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
    688       NewCall->takeName(&II);
    689       return IC.replaceInstUsesWith(II, NewCall);
    690     }
    691 
    692     break;
    693   }
    694   case Intrinsic::amdgcn_ballot: {
    695     if (auto *Src = dyn_cast<ConstantInt>(II.getArgOperand(0))) {
    696       if (Src->isZero()) {
    697         // amdgcn.ballot(i1 0) is zero.
    698         return IC.replaceInstUsesWith(II, Constant::getNullValue(II.getType()));
    699       }
    700 
    701       if (Src->isOne()) {
    702         // amdgcn.ballot(i1 1) is exec.
    703         const char *RegName = "exec";
    704         if (II.getType()->isIntegerTy(32))
    705           RegName = "exec_lo";
    706         else if (!II.getType()->isIntegerTy(64))
    707           break;
    708 
    709         Function *NewF = Intrinsic::getDeclaration(
    710             II.getModule(), Intrinsic::read_register, II.getType());
    711         Metadata *MDArgs[] = {MDString::get(II.getContext(), RegName)};
    712         MDNode *MD = MDNode::get(II.getContext(), MDArgs);
    713         Value *Args[] = {MetadataAsValue::get(II.getContext(), MD)};
    714         CallInst *NewCall = IC.Builder.CreateCall(NewF, Args);
    715         NewCall->addAttribute(AttributeList::FunctionIndex,
    716                               Attribute::Convergent);
    717         NewCall->takeName(&II);
    718         return IC.replaceInstUsesWith(II, NewCall);
    719       }
    720     }
    721     break;
    722   }
    723   case Intrinsic::amdgcn_wqm_vote: {
    724     // wqm_vote is identity when the argument is constant.
    725     if (!isa<Constant>(II.getArgOperand(0)))
    726       break;
    727 
    728     return IC.replaceInstUsesWith(II, II.getArgOperand(0));
    729   }
    730   case Intrinsic::amdgcn_kill: {
    731     const ConstantInt *C = dyn_cast<ConstantInt>(II.getArgOperand(0));
    732     if (!C || !C->getZExtValue())
    733       break;
    734 
    735     // amdgcn.kill(i1 1) is a no-op
    736     return IC.eraseInstFromFunction(II);
    737   }
    738   case Intrinsic::amdgcn_update_dpp: {
    739     Value *Old = II.getArgOperand(0);
    740 
    741     auto *BC = cast<ConstantInt>(II.getArgOperand(5));
    742     auto *RM = cast<ConstantInt>(II.getArgOperand(3));
    743     auto *BM = cast<ConstantInt>(II.getArgOperand(4));
    744     if (BC->isZeroValue() || RM->getZExtValue() != 0xF ||
    745         BM->getZExtValue() != 0xF || isa<UndefValue>(Old))
    746       break;
    747 
    748     // If bound_ctrl = 1, row mask = bank mask = 0xf we can omit old value.
    749     return IC.replaceOperand(II, 0, UndefValue::get(Old->getType()));
    750   }
    751   case Intrinsic::amdgcn_permlane16:
    752   case Intrinsic::amdgcn_permlanex16: {
    753     // Discard vdst_in if it's not going to be read.
    754     Value *VDstIn = II.getArgOperand(0);
    755     if (isa<UndefValue>(VDstIn))
    756       break;
    757 
    758     ConstantInt *FetchInvalid = cast<ConstantInt>(II.getArgOperand(4));
    759     ConstantInt *BoundCtrl = cast<ConstantInt>(II.getArgOperand(5));
    760     if (!FetchInvalid->getZExtValue() && !BoundCtrl->getZExtValue())
    761       break;
    762 
    763     return IC.replaceOperand(II, 0, UndefValue::get(VDstIn->getType()));
    764   }
    765   case Intrinsic::amdgcn_readfirstlane:
    766   case Intrinsic::amdgcn_readlane: {
    767     // A constant value is trivially uniform.
    768     if (Constant *C = dyn_cast<Constant>(II.getArgOperand(0))) {
    769       return IC.replaceInstUsesWith(II, C);
    770     }
    771 
    772     // The rest of these may not be safe if the exec may not be the same between
    773     // the def and use.
    774     Value *Src = II.getArgOperand(0);
    775     Instruction *SrcInst = dyn_cast<Instruction>(Src);
    776     if (SrcInst && SrcInst->getParent() != II.getParent())
    777       break;
    778 
    779     // readfirstlane (readfirstlane x) -> readfirstlane x
    780     // readlane (readfirstlane x), y -> readfirstlane x
    781     if (match(Src,
    782               PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readfirstlane>())) {
    783       return IC.replaceInstUsesWith(II, Src);
    784     }
    785 
    786     if (IID == Intrinsic::amdgcn_readfirstlane) {
    787       // readfirstlane (readlane x, y) -> readlane x, y
    788       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>())) {
    789         return IC.replaceInstUsesWith(II, Src);
    790       }
    791     } else {
    792       // readlane (readlane x, y), y -> readlane x, y
    793       if (match(Src, PatternMatch::m_Intrinsic<Intrinsic::amdgcn_readlane>(
    794                          PatternMatch::m_Value(),
    795                          PatternMatch::m_Specific(II.getArgOperand(1))))) {
    796         return IC.replaceInstUsesWith(II, Src);
    797       }
    798     }
    799 
    800     break;
    801   }
    802   case Intrinsic::amdgcn_ldexp: {
    803     // FIXME: This doesn't introduce new instructions and belongs in
    804     // InstructionSimplify.
    805     Type *Ty = II.getType();
    806     Value *Op0 = II.getArgOperand(0);
    807     Value *Op1 = II.getArgOperand(1);
    808 
    809     // Folding undef to qnan is safe regardless of the FP mode.
    810     if (isa<UndefValue>(Op0)) {
    811       auto *QNaN = ConstantFP::get(Ty, APFloat::getQNaN(Ty->getFltSemantics()));
    812       return IC.replaceInstUsesWith(II, QNaN);
    813     }
    814 
    815     const APFloat *C = nullptr;
    816     match(Op0, PatternMatch::m_APFloat(C));
    817 
    818     // FIXME: Should flush denorms depending on FP mode, but that's ignored
    819     // everywhere else.
    820     //
    821     // These cases should be safe, even with strictfp.
    822     // ldexp(0.0, x) -> 0.0
    823     // ldexp(-0.0, x) -> -0.0
    824     // ldexp(inf, x) -> inf
    825     // ldexp(-inf, x) -> -inf
    826     if (C && (C->isZero() || C->isInfinity())) {
    827       return IC.replaceInstUsesWith(II, Op0);
    828     }
    829 
    830     // With strictfp, be more careful about possibly needing to flush denormals
    831     // or not, and snan behavior depends on ieee_mode.
    832     if (II.isStrictFP())
    833       break;
    834 
    835     if (C && C->isNaN()) {
    836       // FIXME: We just need to make the nan quiet here, but that's unavailable
    837       // on APFloat, only IEEEfloat
    838       auto *Quieted =
    839           ConstantFP::get(Ty, scalbn(*C, 0, APFloat::rmNearestTiesToEven));
    840       return IC.replaceInstUsesWith(II, Quieted);
    841     }
    842 
    843     // ldexp(x, 0) -> x
    844     // ldexp(x, undef) -> x
    845     if (isa<UndefValue>(Op1) || match(Op1, PatternMatch::m_ZeroInt())) {
    846       return IC.replaceInstUsesWith(II, Op0);
    847     }
    848 
    849     break;
    850   }
    851   case Intrinsic::amdgcn_fmul_legacy: {
    852     Value *Op0 = II.getArgOperand(0);
    853     Value *Op1 = II.getArgOperand(1);
    854 
    855     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
    856     // infinity, gives +0.0.
    857     // TODO: Move to InstSimplify?
    858     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
    859         match(Op1, PatternMatch::m_AnyZeroFP()))
    860       return IC.replaceInstUsesWith(II, ConstantFP::getNullValue(II.getType()));
    861 
    862     // If we can prove we don't have one of the special cases then we can use a
    863     // normal fmul instruction instead.
    864     if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
    865       auto *FMul = IC.Builder.CreateFMulFMF(Op0, Op1, &II);
    866       FMul->takeName(&II);
    867       return IC.replaceInstUsesWith(II, FMul);
    868     }
    869     break;
    870   }
    871   case Intrinsic::amdgcn_fma_legacy: {
    872     Value *Op0 = II.getArgOperand(0);
    873     Value *Op1 = II.getArgOperand(1);
    874     Value *Op2 = II.getArgOperand(2);
    875 
    876     // The legacy behaviour is that multiplying +/-0.0 by anything, even NaN or
    877     // infinity, gives +0.0.
    878     // TODO: Move to InstSimplify?
    879     if (match(Op0, PatternMatch::m_AnyZeroFP()) ||
    880         match(Op1, PatternMatch::m_AnyZeroFP())) {
    881       // It's tempting to just return Op2 here, but that would give the wrong
    882       // result if Op2 was -0.0.
    883       auto *Zero = ConstantFP::getNullValue(II.getType());
    884       auto *FAdd = IC.Builder.CreateFAddFMF(Zero, Op2, &II);
    885       FAdd->takeName(&II);
    886       return IC.replaceInstUsesWith(II, FAdd);
    887     }
    888 
    889     // If we can prove we don't have one of the special cases then we can use a
    890     // normal fma instead.
    891     if (canSimplifyLegacyMulToMul(Op0, Op1, IC)) {
    892       II.setCalledOperand(Intrinsic::getDeclaration(
    893           II.getModule(), Intrinsic::fma, II.getType()));
    894       return &II;
    895     }
    896     break;
    897   }
    898   default: {
    899     if (const AMDGPU::ImageDimIntrinsicInfo *ImageDimIntr =
    900             AMDGPU::getImageDimIntrinsicInfo(II.getIntrinsicID())) {
    901       return simplifyAMDGCNImageIntrinsic(ST, ImageDimIntr, II, IC);
    902     }
    903   }
    904   }
    905   return None;
    906 }
    907 
    908 /// Implement SimplifyDemandedVectorElts for amdgcn buffer and image intrinsics.
    909 ///
    910 /// Note: This only supports non-TFE/LWE image intrinsic calls; those have
    911 ///       struct returns.
    912 static Value *simplifyAMDGCNMemoryIntrinsicDemanded(InstCombiner &IC,
    913                                                     IntrinsicInst &II,
    914                                                     APInt DemandedElts,
    915                                                     int DMaskIdx = -1) {
    916 
    917   auto *IIVTy = cast<FixedVectorType>(II.getType());
    918   unsigned VWidth = IIVTy->getNumElements();
    919   if (VWidth == 1)
    920     return nullptr;
    921 
    922   IRBuilderBase::InsertPointGuard Guard(IC.Builder);
    923   IC.Builder.SetInsertPoint(&II);
    924 
    925   // Assume the arguments are unchanged and later override them, if needed.
    926   SmallVector<Value *, 16> Args(II.args());
    927 
    928   if (DMaskIdx < 0) {
    929     // Buffer case.
    930 
    931     const unsigned ActiveBits = DemandedElts.getActiveBits();
    932     const unsigned UnusedComponentsAtFront = DemandedElts.countTrailingZeros();
    933 
    934     // Start assuming the prefix of elements is demanded, but possibly clear
    935     // some other bits if there are trailing zeros (unused components at front)
    936     // and update offset.
    937     DemandedElts = (1 << ActiveBits) - 1;
    938 
    939     if (UnusedComponentsAtFront > 0) {
    940       static const unsigned InvalidOffsetIdx = 0xf;
    941 
    942       unsigned OffsetIdx;
    943       switch (II.getIntrinsicID()) {
    944       case Intrinsic::amdgcn_raw_buffer_load:
    945         OffsetIdx = 1;
    946         break;
    947       case Intrinsic::amdgcn_s_buffer_load:
    948         // If resulting type is vec3, there is no point in trimming the
    949         // load with updated offset, as the vec3 would most likely be widened to
    950         // vec4 anyway during lowering.
    951         if (ActiveBits == 4 && UnusedComponentsAtFront == 1)
    952           OffsetIdx = InvalidOffsetIdx;
    953         else
    954           OffsetIdx = 1;
    955         break;
    956       case Intrinsic::amdgcn_struct_buffer_load:
    957         OffsetIdx = 2;
    958         break;
    959       default:
    960         // TODO: handle tbuffer* intrinsics.
    961         OffsetIdx = InvalidOffsetIdx;
    962         break;
    963       }
    964 
    965       if (OffsetIdx != InvalidOffsetIdx) {
    966         // Clear demanded bits and update the offset.
    967         DemandedElts &= ~((1 << UnusedComponentsAtFront) - 1);
    968         auto *Offset = II.getArgOperand(OffsetIdx);
    969         unsigned SingleComponentSizeInBits =
    970             IC.getDataLayout().getTypeSizeInBits(II.getType()->getScalarType());
    971         unsigned OffsetAdd =
    972             UnusedComponentsAtFront * SingleComponentSizeInBits / 8;
    973         auto *OffsetAddVal = ConstantInt::get(Offset->getType(), OffsetAdd);
    974         Args[OffsetIdx] = IC.Builder.CreateAdd(Offset, OffsetAddVal);
    975       }
    976     }
    977   } else {
    978     // Image case.
    979 
    980     ConstantInt *DMask = cast<ConstantInt>(II.getArgOperand(DMaskIdx));
    981     unsigned DMaskVal = DMask->getZExtValue() & 0xf;
    982 
    983     // Mask off values that are undefined because the dmask doesn't cover them
    984     DemandedElts &= (1 << countPopulation(DMaskVal)) - 1;
    985 
    986     unsigned NewDMaskVal = 0;
    987     unsigned OrigLoadIdx = 0;
    988     for (unsigned SrcIdx = 0; SrcIdx < 4; ++SrcIdx) {
    989       const unsigned Bit = 1 << SrcIdx;
    990       if (!!(DMaskVal & Bit)) {
    991         if (!!DemandedElts[OrigLoadIdx])
    992           NewDMaskVal |= Bit;
    993         OrigLoadIdx++;
    994       }
    995     }
    996 
    997     if (DMaskVal != NewDMaskVal)
    998       Args[DMaskIdx] = ConstantInt::get(DMask->getType(), NewDMaskVal);
    999   }
   1000 
   1001   unsigned NewNumElts = DemandedElts.countPopulation();
   1002   if (!NewNumElts)
   1003     return UndefValue::get(II.getType());
   1004 
   1005   if (NewNumElts >= VWidth && DemandedElts.isMask()) {
   1006     if (DMaskIdx >= 0)
   1007       II.setArgOperand(DMaskIdx, Args[DMaskIdx]);
   1008     return nullptr;
   1009   }
   1010 
   1011   // Validate function argument and return types, extracting overloaded types
   1012   // along the way.
   1013   SmallVector<Type *, 6> OverloadTys;
   1014   if (!Intrinsic::getIntrinsicSignature(II.getCalledFunction(), OverloadTys))
   1015     return nullptr;
   1016 
   1017   Module *M = II.getParent()->getParent()->getParent();
   1018   Type *EltTy = IIVTy->getElementType();
   1019   Type *NewTy =
   1020       (NewNumElts == 1) ? EltTy : FixedVectorType::get(EltTy, NewNumElts);
   1021 
   1022   OverloadTys[0] = NewTy;
   1023   Function *NewIntrin =
   1024       Intrinsic::getDeclaration(M, II.getIntrinsicID(), OverloadTys);
   1025 
   1026   CallInst *NewCall = IC.Builder.CreateCall(NewIntrin, Args);
   1027   NewCall->takeName(&II);
   1028   NewCall->copyMetadata(II);
   1029 
   1030   if (NewNumElts == 1) {
   1031     return IC.Builder.CreateInsertElement(UndefValue::get(II.getType()),
   1032                                           NewCall,
   1033                                           DemandedElts.countTrailingZeros());
   1034   }
   1035 
   1036   SmallVector<int, 8> EltMask;
   1037   unsigned NewLoadIdx = 0;
   1038   for (unsigned OrigLoadIdx = 0; OrigLoadIdx < VWidth; ++OrigLoadIdx) {
   1039     if (!!DemandedElts[OrigLoadIdx])
   1040       EltMask.push_back(NewLoadIdx++);
   1041     else
   1042       EltMask.push_back(NewNumElts);
   1043   }
   1044 
   1045   Value *Shuffle = IC.Builder.CreateShuffleVector(NewCall, EltMask);
   1046 
   1047   return Shuffle;
   1048 }
   1049 
   1050 Optional<Value *> GCNTTIImpl::simplifyDemandedVectorEltsIntrinsic(
   1051     InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
   1052     APInt &UndefElts2, APInt &UndefElts3,
   1053     std::function<void(Instruction *, unsigned, APInt, APInt &)>
   1054         SimplifyAndSetOp) const {
   1055   switch (II.getIntrinsicID()) {
   1056   case Intrinsic::amdgcn_buffer_load:
   1057   case Intrinsic::amdgcn_buffer_load_format:
   1058   case Intrinsic::amdgcn_raw_buffer_load:
   1059   case Intrinsic::amdgcn_raw_buffer_load_format:
   1060   case Intrinsic::amdgcn_raw_tbuffer_load:
   1061   case Intrinsic::amdgcn_s_buffer_load:
   1062   case Intrinsic::amdgcn_struct_buffer_load:
   1063   case Intrinsic::amdgcn_struct_buffer_load_format:
   1064   case Intrinsic::amdgcn_struct_tbuffer_load:
   1065   case Intrinsic::amdgcn_tbuffer_load:
   1066     return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts);
   1067   default: {
   1068     if (getAMDGPUImageDMaskIntrinsic(II.getIntrinsicID())) {
   1069       return simplifyAMDGCNMemoryIntrinsicDemanded(IC, II, DemandedElts, 0);
   1070     }
   1071     break;
   1072   }
   1073   }
   1074   return None;
   1075 }
   1076