Home | History | Annotate | Line # | Download | only in AsmParser
      1 //===- AMDGPUAsmParser.cpp - Parse SI asm to MCInst instructions ----------===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 //===----------------------------------------------------------------------===//
      8 
      9 #include "AMDKernelCodeT.h"
     10 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     11 #include "MCTargetDesc/AMDGPUTargetStreamer.h"
     12 #include "SIDefines.h"
     13 #include "SIInstrInfo.h"
     14 #include "SIRegisterInfo.h"
     15 #include "TargetInfo/AMDGPUTargetInfo.h"
     16 #include "Utils/AMDGPUAsmUtils.h"
     17 #include "Utils/AMDGPUBaseInfo.h"
     18 #include "Utils/AMDKernelCodeTUtils.h"
     19 #include "llvm/ADT/APFloat.h"
     20 #include "llvm/ADT/SmallBitVector.h"
     21 #include "llvm/ADT/StringSet.h"
     22 #include "llvm/ADT/Twine.h"
     23 #include "llvm/MC/MCAsmInfo.h"
     24 #include "llvm/MC/MCContext.h"
     25 #include "llvm/MC/MCExpr.h"
     26 #include "llvm/MC/MCInst.h"
     27 #include "llvm/MC/MCParser/MCAsmParser.h"
     28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
     29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
     30 #include "llvm/MC/MCSymbol.h"
     31 #include "llvm/Support/AMDGPUMetadata.h"
     32 #include "llvm/Support/AMDHSAKernelDescriptor.h"
     33 #include "llvm/Support/Casting.h"
     34 #include "llvm/Support/MachineValueType.h"
     35 #include "llvm/Support/TargetParser.h"
     36 #include "llvm/Support/TargetRegistry.h"
     37 
     38 using namespace llvm;
     39 using namespace llvm::AMDGPU;
     40 using namespace llvm::amdhsa;
     41 
     42 namespace {
     43 
     44 class AMDGPUAsmParser;
     45 
     46 enum RegisterKind { IS_UNKNOWN, IS_VGPR, IS_SGPR, IS_AGPR, IS_TTMP, IS_SPECIAL };
     47 
     48 //===----------------------------------------------------------------------===//
     49 // Operand
     50 //===----------------------------------------------------------------------===//
     51 
     52 class AMDGPUOperand : public MCParsedAsmOperand {
     53   enum KindTy {
     54     Token,
     55     Immediate,
     56     Register,
     57     Expression
     58   } Kind;
     59 
     60   SMLoc StartLoc, EndLoc;
     61   const AMDGPUAsmParser *AsmParser;
     62 
     63 public:
     64   AMDGPUOperand(KindTy Kind_, const AMDGPUAsmParser *AsmParser_)
     65     : MCParsedAsmOperand(), Kind(Kind_), AsmParser(AsmParser_) {}
     66 
     67   using Ptr = std::unique_ptr<AMDGPUOperand>;
     68 
     69   struct Modifiers {
     70     bool Abs = false;
     71     bool Neg = false;
     72     bool Sext = false;
     73 
     74     bool hasFPModifiers() const { return Abs || Neg; }
     75     bool hasIntModifiers() const { return Sext; }
     76     bool hasModifiers() const { return hasFPModifiers() || hasIntModifiers(); }
     77 
     78     int64_t getFPModifiersOperand() const {
     79       int64_t Operand = 0;
     80       Operand |= Abs ? SISrcMods::ABS : 0u;
     81       Operand |= Neg ? SISrcMods::NEG : 0u;
     82       return Operand;
     83     }
     84 
     85     int64_t getIntModifiersOperand() const {
     86       int64_t Operand = 0;
     87       Operand |= Sext ? SISrcMods::SEXT : 0u;
     88       return Operand;
     89     }
     90 
     91     int64_t getModifiersOperand() const {
     92       assert(!(hasFPModifiers() && hasIntModifiers())
     93            && "fp and int modifiers should not be used simultaneously");
     94       if (hasFPModifiers()) {
     95         return getFPModifiersOperand();
     96       } else if (hasIntModifiers()) {
     97         return getIntModifiersOperand();
     98       } else {
     99         return 0;
    100       }
    101     }
    102 
    103     friend raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods);
    104   };
    105 
    106   enum ImmTy {
    107     ImmTyNone,
    108     ImmTyGDS,
    109     ImmTyLDS,
    110     ImmTyOffen,
    111     ImmTyIdxen,
    112     ImmTyAddr64,
    113     ImmTyOffset,
    114     ImmTyInstOffset,
    115     ImmTyOffset0,
    116     ImmTyOffset1,
    117     ImmTyCPol,
    118     ImmTySWZ,
    119     ImmTyTFE,
    120     ImmTyD16,
    121     ImmTyClampSI,
    122     ImmTyOModSI,
    123     ImmTyDPP8,
    124     ImmTyDppCtrl,
    125     ImmTyDppRowMask,
    126     ImmTyDppBankMask,
    127     ImmTyDppBoundCtrl,
    128     ImmTyDppFi,
    129     ImmTySdwaDstSel,
    130     ImmTySdwaSrc0Sel,
    131     ImmTySdwaSrc1Sel,
    132     ImmTySdwaDstUnused,
    133     ImmTyDMask,
    134     ImmTyDim,
    135     ImmTyUNorm,
    136     ImmTyDA,
    137     ImmTyR128A16,
    138     ImmTyA16,
    139     ImmTyLWE,
    140     ImmTyExpTgt,
    141     ImmTyExpCompr,
    142     ImmTyExpVM,
    143     ImmTyFORMAT,
    144     ImmTyHwreg,
    145     ImmTyOff,
    146     ImmTySendMsg,
    147     ImmTyInterpSlot,
    148     ImmTyInterpAttr,
    149     ImmTyAttrChan,
    150     ImmTyOpSel,
    151     ImmTyOpSelHi,
    152     ImmTyNegLo,
    153     ImmTyNegHi,
    154     ImmTySwizzle,
    155     ImmTyGprIdxMode,
    156     ImmTyHigh,
    157     ImmTyBLGP,
    158     ImmTyCBSZ,
    159     ImmTyABID,
    160     ImmTyEndpgm,
    161   };
    162 
    163   enum ImmKindTy {
    164     ImmKindTyNone,
    165     ImmKindTyLiteral,
    166     ImmKindTyConst,
    167   };
    168 
    169 private:
    170   struct TokOp {
    171     const char *Data;
    172     unsigned Length;
    173   };
    174 
    175   struct ImmOp {
    176     int64_t Val;
    177     ImmTy Type;
    178     bool IsFPImm;
    179     mutable ImmKindTy Kind;
    180     Modifiers Mods;
    181   };
    182 
    183   struct RegOp {
    184     unsigned RegNo;
    185     Modifiers Mods;
    186   };
    187 
    188   union {
    189     TokOp Tok;
    190     ImmOp Imm;
    191     RegOp Reg;
    192     const MCExpr *Expr;
    193   };
    194 
    195 public:
    196   bool isToken() const override {
    197     if (Kind == Token)
    198       return true;
    199 
    200     // When parsing operands, we can't always tell if something was meant to be
    201     // a token, like 'gds', or an expression that references a global variable.
    202     // In this case, we assume the string is an expression, and if we need to
    203     // interpret is a token, then we treat the symbol name as the token.
    204     return isSymbolRefExpr();
    205   }
    206 
    207   bool isSymbolRefExpr() const {
    208     return isExpr() && Expr && isa<MCSymbolRefExpr>(Expr);
    209   }
    210 
    211   bool isImm() const override {
    212     return Kind == Immediate;
    213   }
    214 
    215   void setImmKindNone() const {
    216     assert(isImm());
    217     Imm.Kind = ImmKindTyNone;
    218   }
    219 
    220   void setImmKindLiteral() const {
    221     assert(isImm());
    222     Imm.Kind = ImmKindTyLiteral;
    223   }
    224 
    225   void setImmKindConst() const {
    226     assert(isImm());
    227     Imm.Kind = ImmKindTyConst;
    228   }
    229 
    230   bool IsImmKindLiteral() const {
    231     return isImm() && Imm.Kind == ImmKindTyLiteral;
    232   }
    233 
    234   bool isImmKindConst() const {
    235     return isImm() && Imm.Kind == ImmKindTyConst;
    236   }
    237 
    238   bool isInlinableImm(MVT type) const;
    239   bool isLiteralImm(MVT type) const;
    240 
    241   bool isRegKind() const {
    242     return Kind == Register;
    243   }
    244 
    245   bool isReg() const override {
    246     return isRegKind() && !hasModifiers();
    247   }
    248 
    249   bool isRegOrImmWithInputMods(unsigned RCID, MVT type) const {
    250     return isRegClass(RCID) || isInlinableImm(type) || isLiteralImm(type);
    251   }
    252 
    253   bool isRegOrImmWithInt16InputMods() const {
    254     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i16);
    255   }
    256 
    257   bool isRegOrImmWithInt32InputMods() const {
    258     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::i32);
    259   }
    260 
    261   bool isRegOrImmWithInt64InputMods() const {
    262     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::i64);
    263   }
    264 
    265   bool isRegOrImmWithFP16InputMods() const {
    266     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f16);
    267   }
    268 
    269   bool isRegOrImmWithFP32InputMods() const {
    270     return isRegOrImmWithInputMods(AMDGPU::VS_32RegClassID, MVT::f32);
    271   }
    272 
    273   bool isRegOrImmWithFP64InputMods() const {
    274     return isRegOrImmWithInputMods(AMDGPU::VS_64RegClassID, MVT::f64);
    275   }
    276 
    277   bool isVReg() const {
    278     return isRegClass(AMDGPU::VGPR_32RegClassID) ||
    279            isRegClass(AMDGPU::VReg_64RegClassID) ||
    280            isRegClass(AMDGPU::VReg_96RegClassID) ||
    281            isRegClass(AMDGPU::VReg_128RegClassID) ||
    282            isRegClass(AMDGPU::VReg_160RegClassID) ||
    283            isRegClass(AMDGPU::VReg_192RegClassID) ||
    284            isRegClass(AMDGPU::VReg_256RegClassID) ||
    285            isRegClass(AMDGPU::VReg_512RegClassID) ||
    286            isRegClass(AMDGPU::VReg_1024RegClassID);
    287   }
    288 
    289   bool isVReg32() const {
    290     return isRegClass(AMDGPU::VGPR_32RegClassID);
    291   }
    292 
    293   bool isVReg32OrOff() const {
    294     return isOff() || isVReg32();
    295   }
    296 
    297   bool isNull() const {
    298     return isRegKind() && getReg() == AMDGPU::SGPR_NULL;
    299   }
    300 
    301   bool isVRegWithInputMods() const;
    302 
    303   bool isSDWAOperand(MVT type) const;
    304   bool isSDWAFP16Operand() const;
    305   bool isSDWAFP32Operand() const;
    306   bool isSDWAInt16Operand() const;
    307   bool isSDWAInt32Operand() const;
    308 
    309   bool isImmTy(ImmTy ImmT) const {
    310     return isImm() && Imm.Type == ImmT;
    311   }
    312 
    313   bool isImmModifier() const {
    314     return isImm() && Imm.Type != ImmTyNone;
    315   }
    316 
    317   bool isClampSI() const { return isImmTy(ImmTyClampSI); }
    318   bool isOModSI() const { return isImmTy(ImmTyOModSI); }
    319   bool isDMask() const { return isImmTy(ImmTyDMask); }
    320   bool isDim() const { return isImmTy(ImmTyDim); }
    321   bool isUNorm() const { return isImmTy(ImmTyUNorm); }
    322   bool isDA() const { return isImmTy(ImmTyDA); }
    323   bool isR128A16() const { return isImmTy(ImmTyR128A16); }
    324   bool isGFX10A16() const { return isImmTy(ImmTyA16); }
    325   bool isLWE() const { return isImmTy(ImmTyLWE); }
    326   bool isOff() const { return isImmTy(ImmTyOff); }
    327   bool isExpTgt() const { return isImmTy(ImmTyExpTgt); }
    328   bool isExpVM() const { return isImmTy(ImmTyExpVM); }
    329   bool isExpCompr() const { return isImmTy(ImmTyExpCompr); }
    330   bool isOffen() const { return isImmTy(ImmTyOffen); }
    331   bool isIdxen() const { return isImmTy(ImmTyIdxen); }
    332   bool isAddr64() const { return isImmTy(ImmTyAddr64); }
    333   bool isOffset() const { return isImmTy(ImmTyOffset) && isUInt<16>(getImm()); }
    334   bool isOffset0() const { return isImmTy(ImmTyOffset0) && isUInt<8>(getImm()); }
    335   bool isOffset1() const { return isImmTy(ImmTyOffset1) && isUInt<8>(getImm()); }
    336 
    337   bool isFlatOffset() const { return isImmTy(ImmTyOffset) || isImmTy(ImmTyInstOffset); }
    338   bool isGDS() const { return isImmTy(ImmTyGDS); }
    339   bool isLDS() const { return isImmTy(ImmTyLDS); }
    340   bool isCPol() const { return isImmTy(ImmTyCPol); }
    341   bool isSWZ() const { return isImmTy(ImmTySWZ); }
    342   bool isTFE() const { return isImmTy(ImmTyTFE); }
    343   bool isD16() const { return isImmTy(ImmTyD16); }
    344   bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
    345   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
    346   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
    347   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
    348   bool isFI() const { return isImmTy(ImmTyDppFi); }
    349   bool isSDWADstSel() const { return isImmTy(ImmTySdwaDstSel); }
    350   bool isSDWASrc0Sel() const { return isImmTy(ImmTySdwaSrc0Sel); }
    351   bool isSDWASrc1Sel() const { return isImmTy(ImmTySdwaSrc1Sel); }
    352   bool isSDWADstUnused() const { return isImmTy(ImmTySdwaDstUnused); }
    353   bool isInterpSlot() const { return isImmTy(ImmTyInterpSlot); }
    354   bool isInterpAttr() const { return isImmTy(ImmTyInterpAttr); }
    355   bool isAttrChan() const { return isImmTy(ImmTyAttrChan); }
    356   bool isOpSel() const { return isImmTy(ImmTyOpSel); }
    357   bool isOpSelHi() const { return isImmTy(ImmTyOpSelHi); }
    358   bool isNegLo() const { return isImmTy(ImmTyNegLo); }
    359   bool isNegHi() const { return isImmTy(ImmTyNegHi); }
    360   bool isHigh() const { return isImmTy(ImmTyHigh); }
    361 
    362   bool isMod() const {
    363     return isClampSI() || isOModSI();
    364   }
    365 
    366   bool isRegOrImm() const {
    367     return isReg() || isImm();
    368   }
    369 
    370   bool isRegClass(unsigned RCID) const;
    371 
    372   bool isInlineValue() const;
    373 
    374   bool isRegOrInlineNoMods(unsigned RCID, MVT type) const {
    375     return (isRegClass(RCID) || isInlinableImm(type)) && !hasModifiers();
    376   }
    377 
    378   bool isSCSrcB16() const {
    379     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i16);
    380   }
    381 
    382   bool isSCSrcV2B16() const {
    383     return isSCSrcB16();
    384   }
    385 
    386   bool isSCSrcB32() const {
    387     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::i32);
    388   }
    389 
    390   bool isSCSrcB64() const {
    391     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::i64);
    392   }
    393 
    394   bool isBoolReg() const;
    395 
    396   bool isSCSrcF16() const {
    397     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f16);
    398   }
    399 
    400   bool isSCSrcV2F16() const {
    401     return isSCSrcF16();
    402   }
    403 
    404   bool isSCSrcF32() const {
    405     return isRegOrInlineNoMods(AMDGPU::SReg_32RegClassID, MVT::f32);
    406   }
    407 
    408   bool isSCSrcF64() const {
    409     return isRegOrInlineNoMods(AMDGPU::SReg_64RegClassID, MVT::f64);
    410   }
    411 
    412   bool isSSrcB32() const {
    413     return isSCSrcB32() || isLiteralImm(MVT::i32) || isExpr();
    414   }
    415 
    416   bool isSSrcB16() const {
    417     return isSCSrcB16() || isLiteralImm(MVT::i16);
    418   }
    419 
    420   bool isSSrcV2B16() const {
    421     llvm_unreachable("cannot happen");
    422     return isSSrcB16();
    423   }
    424 
    425   bool isSSrcB64() const {
    426     // TODO: Find out how SALU supports extension of 32-bit literals to 64 bits.
    427     // See isVSrc64().
    428     return isSCSrcB64() || isLiteralImm(MVT::i64);
    429   }
    430 
    431   bool isSSrcF32() const {
    432     return isSCSrcB32() || isLiteralImm(MVT::f32) || isExpr();
    433   }
    434 
    435   bool isSSrcF64() const {
    436     return isSCSrcB64() || isLiteralImm(MVT::f64);
    437   }
    438 
    439   bool isSSrcF16() const {
    440     return isSCSrcB16() || isLiteralImm(MVT::f16);
    441   }
    442 
    443   bool isSSrcV2F16() const {
    444     llvm_unreachable("cannot happen");
    445     return isSSrcF16();
    446   }
    447 
    448   bool isSSrcV2FP32() const {
    449     llvm_unreachable("cannot happen");
    450     return isSSrcF32();
    451   }
    452 
    453   bool isSCSrcV2FP32() const {
    454     llvm_unreachable("cannot happen");
    455     return isSCSrcF32();
    456   }
    457 
    458   bool isSSrcV2INT32() const {
    459     llvm_unreachable("cannot happen");
    460     return isSSrcB32();
    461   }
    462 
    463   bool isSCSrcV2INT32() const {
    464     llvm_unreachable("cannot happen");
    465     return isSCSrcB32();
    466   }
    467 
    468   bool isSSrcOrLdsB32() const {
    469     return isRegOrInlineNoMods(AMDGPU::SRegOrLds_32RegClassID, MVT::i32) ||
    470            isLiteralImm(MVT::i32) || isExpr();
    471   }
    472 
    473   bool isVCSrcB32() const {
    474     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i32);
    475   }
    476 
    477   bool isVCSrcB64() const {
    478     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::i64);
    479   }
    480 
    481   bool isVCSrcB16() const {
    482     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::i16);
    483   }
    484 
    485   bool isVCSrcV2B16() const {
    486     return isVCSrcB16();
    487   }
    488 
    489   bool isVCSrcF32() const {
    490     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f32);
    491   }
    492 
    493   bool isVCSrcF64() const {
    494     return isRegOrInlineNoMods(AMDGPU::VS_64RegClassID, MVT::f64);
    495   }
    496 
    497   bool isVCSrcF16() const {
    498     return isRegOrInlineNoMods(AMDGPU::VS_32RegClassID, MVT::f16);
    499   }
    500 
    501   bool isVCSrcV2F16() const {
    502     return isVCSrcF16();
    503   }
    504 
    505   bool isVSrcB32() const {
    506     return isVCSrcF32() || isLiteralImm(MVT::i32) || isExpr();
    507   }
    508 
    509   bool isVSrcB64() const {
    510     return isVCSrcF64() || isLiteralImm(MVT::i64);
    511   }
    512 
    513   bool isVSrcB16() const {
    514     return isVCSrcB16() || isLiteralImm(MVT::i16);
    515   }
    516 
    517   bool isVSrcV2B16() const {
    518     return isVSrcB16() || isLiteralImm(MVT::v2i16);
    519   }
    520 
    521   bool isVCSrcV2FP32() const {
    522     return isVCSrcF64();
    523   }
    524 
    525   bool isVSrcV2FP32() const {
    526     return isVSrcF64() || isLiteralImm(MVT::v2f32);
    527   }
    528 
    529   bool isVCSrcV2INT32() const {
    530     return isVCSrcB64();
    531   }
    532 
    533   bool isVSrcV2INT32() const {
    534     return isVSrcB64() || isLiteralImm(MVT::v2i32);
    535   }
    536 
    537   bool isVSrcF32() const {
    538     return isVCSrcF32() || isLiteralImm(MVT::f32) || isExpr();
    539   }
    540 
    541   bool isVSrcF64() const {
    542     return isVCSrcF64() || isLiteralImm(MVT::f64);
    543   }
    544 
    545   bool isVSrcF16() const {
    546     return isVCSrcF16() || isLiteralImm(MVT::f16);
    547   }
    548 
    549   bool isVSrcV2F16() const {
    550     return isVSrcF16() || isLiteralImm(MVT::v2f16);
    551   }
    552 
    553   bool isVISrcB32() const {
    554     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i32);
    555   }
    556 
    557   bool isVISrcB16() const {
    558     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::i16);
    559   }
    560 
    561   bool isVISrcV2B16() const {
    562     return isVISrcB16();
    563   }
    564 
    565   bool isVISrcF32() const {
    566     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f32);
    567   }
    568 
    569   bool isVISrcF16() const {
    570     return isRegOrInlineNoMods(AMDGPU::VGPR_32RegClassID, MVT::f16);
    571   }
    572 
    573   bool isVISrcV2F16() const {
    574     return isVISrcF16() || isVISrcB32();
    575   }
    576 
    577   bool isVISrc_64B64() const {
    578     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i64);
    579   }
    580 
    581   bool isVISrc_64F64() const {
    582     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f64);
    583   }
    584 
    585   bool isVISrc_64V2FP32() const {
    586     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::f32);
    587   }
    588 
    589   bool isVISrc_64V2INT32() const {
    590     return isRegOrInlineNoMods(AMDGPU::VReg_64RegClassID, MVT::i32);
    591   }
    592 
    593   bool isVISrc_256B64() const {
    594     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i64);
    595   }
    596 
    597   bool isVISrc_256F64() const {
    598     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f64);
    599   }
    600 
    601   bool isVISrc_128B16() const {
    602     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i16);
    603   }
    604 
    605   bool isVISrc_128V2B16() const {
    606     return isVISrc_128B16();
    607   }
    608 
    609   bool isVISrc_128B32() const {
    610     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::i32);
    611   }
    612 
    613   bool isVISrc_128F32() const {
    614     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f32);
    615   }
    616 
    617   bool isVISrc_256V2FP32() const {
    618     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::f32);
    619   }
    620 
    621   bool isVISrc_256V2INT32() const {
    622     return isRegOrInlineNoMods(AMDGPU::VReg_256RegClassID, MVT::i32);
    623   }
    624 
    625   bool isVISrc_512B32() const {
    626     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i32);
    627   }
    628 
    629   bool isVISrc_512B16() const {
    630     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::i16);
    631   }
    632 
    633   bool isVISrc_512V2B16() const {
    634     return isVISrc_512B16();
    635   }
    636 
    637   bool isVISrc_512F32() const {
    638     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f32);
    639   }
    640 
    641   bool isVISrc_512F16() const {
    642     return isRegOrInlineNoMods(AMDGPU::VReg_512RegClassID, MVT::f16);
    643   }
    644 
    645   bool isVISrc_512V2F16() const {
    646     return isVISrc_512F16() || isVISrc_512B32();
    647   }
    648 
    649   bool isVISrc_1024B32() const {
    650     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i32);
    651   }
    652 
    653   bool isVISrc_1024B16() const {
    654     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::i16);
    655   }
    656 
    657   bool isVISrc_1024V2B16() const {
    658     return isVISrc_1024B16();
    659   }
    660 
    661   bool isVISrc_1024F32() const {
    662     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f32);
    663   }
    664 
    665   bool isVISrc_1024F16() const {
    666     return isRegOrInlineNoMods(AMDGPU::VReg_1024RegClassID, MVT::f16);
    667   }
    668 
    669   bool isVISrc_1024V2F16() const {
    670     return isVISrc_1024F16() || isVISrc_1024B32();
    671   }
    672 
    673   bool isAISrcB32() const {
    674     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i32);
    675   }
    676 
    677   bool isAISrcB16() const {
    678     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::i16);
    679   }
    680 
    681   bool isAISrcV2B16() const {
    682     return isAISrcB16();
    683   }
    684 
    685   bool isAISrcF32() const {
    686     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f32);
    687   }
    688 
    689   bool isAISrcF16() const {
    690     return isRegOrInlineNoMods(AMDGPU::AGPR_32RegClassID, MVT::f16);
    691   }
    692 
    693   bool isAISrcV2F16() const {
    694     return isAISrcF16() || isAISrcB32();
    695   }
    696 
    697   bool isAISrc_64B64() const {
    698     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::i64);
    699   }
    700 
    701   bool isAISrc_64F64() const {
    702     return isRegOrInlineNoMods(AMDGPU::AReg_64RegClassID, MVT::f64);
    703   }
    704 
    705   bool isAISrc_128B32() const {
    706     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i32);
    707   }
    708 
    709   bool isAISrc_128B16() const {
    710     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::i16);
    711   }
    712 
    713   bool isAISrc_128V2B16() const {
    714     return isAISrc_128B16();
    715   }
    716 
    717   bool isAISrc_128F32() const {
    718     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f32);
    719   }
    720 
    721   bool isAISrc_128F16() const {
    722     return isRegOrInlineNoMods(AMDGPU::AReg_128RegClassID, MVT::f16);
    723   }
    724 
    725   bool isAISrc_128V2F16() const {
    726     return isAISrc_128F16() || isAISrc_128B32();
    727   }
    728 
    729   bool isVISrc_128F16() const {
    730     return isRegOrInlineNoMods(AMDGPU::VReg_128RegClassID, MVT::f16);
    731   }
    732 
    733   bool isVISrc_128V2F16() const {
    734     return isVISrc_128F16() || isVISrc_128B32();
    735   }
    736 
    737   bool isAISrc_256B64() const {
    738     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::i64);
    739   }
    740 
    741   bool isAISrc_256F64() const {
    742     return isRegOrInlineNoMods(AMDGPU::AReg_256RegClassID, MVT::f64);
    743   }
    744 
    745   bool isAISrc_512B32() const {
    746     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i32);
    747   }
    748 
    749   bool isAISrc_512B16() const {
    750     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::i16);
    751   }
    752 
    753   bool isAISrc_512V2B16() const {
    754     return isAISrc_512B16();
    755   }
    756 
    757   bool isAISrc_512F32() const {
    758     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f32);
    759   }
    760 
    761   bool isAISrc_512F16() const {
    762     return isRegOrInlineNoMods(AMDGPU::AReg_512RegClassID, MVT::f16);
    763   }
    764 
    765   bool isAISrc_512V2F16() const {
    766     return isAISrc_512F16() || isAISrc_512B32();
    767   }
    768 
    769   bool isAISrc_1024B32() const {
    770     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i32);
    771   }
    772 
    773   bool isAISrc_1024B16() const {
    774     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::i16);
    775   }
    776 
    777   bool isAISrc_1024V2B16() const {
    778     return isAISrc_1024B16();
    779   }
    780 
    781   bool isAISrc_1024F32() const {
    782     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f32);
    783   }
    784 
    785   bool isAISrc_1024F16() const {
    786     return isRegOrInlineNoMods(AMDGPU::AReg_1024RegClassID, MVT::f16);
    787   }
    788 
    789   bool isAISrc_1024V2F16() const {
    790     return isAISrc_1024F16() || isAISrc_1024B32();
    791   }
    792 
    793   bool isKImmFP32() const {
    794     return isLiteralImm(MVT::f32);
    795   }
    796 
    797   bool isKImmFP16() const {
    798     return isLiteralImm(MVT::f16);
    799   }
    800 
    801   bool isMem() const override {
    802     return false;
    803   }
    804 
    805   bool isExpr() const {
    806     return Kind == Expression;
    807   }
    808 
    809   bool isSoppBrTarget() const {
    810     return isExpr() || isImm();
    811   }
    812 
    813   bool isSWaitCnt() const;
    814   bool isHwreg() const;
    815   bool isSendMsg() const;
    816   bool isSwizzle() const;
    817   bool isSMRDOffset8() const;
    818   bool isSMEMOffset() const;
    819   bool isSMRDLiteralOffset() const;
    820   bool isDPP8() const;
    821   bool isDPPCtrl() const;
    822   bool isBLGP() const;
    823   bool isCBSZ() const;
    824   bool isABID() const;
    825   bool isGPRIdxMode() const;
    826   bool isS16Imm() const;
    827   bool isU16Imm() const;
    828   bool isEndpgm() const;
    829 
    830   StringRef getExpressionAsToken() const {
    831     assert(isExpr());
    832     const MCSymbolRefExpr *S = cast<MCSymbolRefExpr>(Expr);
    833     return S->getSymbol().getName();
    834   }
    835 
    836   StringRef getToken() const {
    837     assert(isToken());
    838 
    839     if (Kind == Expression)
    840       return getExpressionAsToken();
    841 
    842     return StringRef(Tok.Data, Tok.Length);
    843   }
    844 
    845   int64_t getImm() const {
    846     assert(isImm());
    847     return Imm.Val;
    848   }
    849 
    850   void setImm(int64_t Val) {
    851     assert(isImm());
    852     Imm.Val = Val;
    853   }
    854 
    855   ImmTy getImmTy() const {
    856     assert(isImm());
    857     return Imm.Type;
    858   }
    859 
    860   unsigned getReg() const override {
    861     assert(isRegKind());
    862     return Reg.RegNo;
    863   }
    864 
    865   SMLoc getStartLoc() const override {
    866     return StartLoc;
    867   }
    868 
    869   SMLoc getEndLoc() const override {
    870     return EndLoc;
    871   }
    872 
    873   SMRange getLocRange() const {
    874     return SMRange(StartLoc, EndLoc);
    875   }
    876 
    877   Modifiers getModifiers() const {
    878     assert(isRegKind() || isImmTy(ImmTyNone));
    879     return isRegKind() ? Reg.Mods : Imm.Mods;
    880   }
    881 
    882   void setModifiers(Modifiers Mods) {
    883     assert(isRegKind() || isImmTy(ImmTyNone));
    884     if (isRegKind())
    885       Reg.Mods = Mods;
    886     else
    887       Imm.Mods = Mods;
    888   }
    889 
    890   bool hasModifiers() const {
    891     return getModifiers().hasModifiers();
    892   }
    893 
    894   bool hasFPModifiers() const {
    895     return getModifiers().hasFPModifiers();
    896   }
    897 
    898   bool hasIntModifiers() const {
    899     return getModifiers().hasIntModifiers();
    900   }
    901 
    902   uint64_t applyInputFPModifiers(uint64_t Val, unsigned Size) const;
    903 
    904   void addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers = true) const;
    905 
    906   void addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const;
    907 
    908   template <unsigned Bitwidth>
    909   void addKImmFPOperands(MCInst &Inst, unsigned N) const;
    910 
    911   void addKImmFP16Operands(MCInst &Inst, unsigned N) const {
    912     addKImmFPOperands<16>(Inst, N);
    913   }
    914 
    915   void addKImmFP32Operands(MCInst &Inst, unsigned N) const {
    916     addKImmFPOperands<32>(Inst, N);
    917   }
    918 
    919   void addRegOperands(MCInst &Inst, unsigned N) const;
    920 
    921   void addBoolRegOperands(MCInst &Inst, unsigned N) const {
    922     addRegOperands(Inst, N);
    923   }
    924 
    925   void addRegOrImmOperands(MCInst &Inst, unsigned N) const {
    926     if (isRegKind())
    927       addRegOperands(Inst, N);
    928     else if (isExpr())
    929       Inst.addOperand(MCOperand::createExpr(Expr));
    930     else
    931       addImmOperands(Inst, N);
    932   }
    933 
    934   void addRegOrImmWithInputModsOperands(MCInst &Inst, unsigned N) const {
    935     Modifiers Mods = getModifiers();
    936     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
    937     if (isRegKind()) {
    938       addRegOperands(Inst, N);
    939     } else {
    940       addImmOperands(Inst, N, false);
    941     }
    942   }
    943 
    944   void addRegOrImmWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
    945     assert(!hasIntModifiers());
    946     addRegOrImmWithInputModsOperands(Inst, N);
    947   }
    948 
    949   void addRegOrImmWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
    950     assert(!hasFPModifiers());
    951     addRegOrImmWithInputModsOperands(Inst, N);
    952   }
    953 
    954   void addRegWithInputModsOperands(MCInst &Inst, unsigned N) const {
    955     Modifiers Mods = getModifiers();
    956     Inst.addOperand(MCOperand::createImm(Mods.getModifiersOperand()));
    957     assert(isRegKind());
    958     addRegOperands(Inst, N);
    959   }
    960 
    961   void addRegWithFPInputModsOperands(MCInst &Inst, unsigned N) const {
    962     assert(!hasIntModifiers());
    963     addRegWithInputModsOperands(Inst, N);
    964   }
    965 
    966   void addRegWithIntInputModsOperands(MCInst &Inst, unsigned N) const {
    967     assert(!hasFPModifiers());
    968     addRegWithInputModsOperands(Inst, N);
    969   }
    970 
    971   void addSoppBrTargetOperands(MCInst &Inst, unsigned N) const {
    972     if (isImm())
    973       addImmOperands(Inst, N);
    974     else {
    975       assert(isExpr());
    976       Inst.addOperand(MCOperand::createExpr(Expr));
    977     }
    978   }
    979 
    980   static void printImmTy(raw_ostream& OS, ImmTy Type) {
    981     switch (Type) {
    982     case ImmTyNone: OS << "None"; break;
    983     case ImmTyGDS: OS << "GDS"; break;
    984     case ImmTyLDS: OS << "LDS"; break;
    985     case ImmTyOffen: OS << "Offen"; break;
    986     case ImmTyIdxen: OS << "Idxen"; break;
    987     case ImmTyAddr64: OS << "Addr64"; break;
    988     case ImmTyOffset: OS << "Offset"; break;
    989     case ImmTyInstOffset: OS << "InstOffset"; break;
    990     case ImmTyOffset0: OS << "Offset0"; break;
    991     case ImmTyOffset1: OS << "Offset1"; break;
    992     case ImmTyCPol: OS << "CPol"; break;
    993     case ImmTySWZ: OS << "SWZ"; break;
    994     case ImmTyTFE: OS << "TFE"; break;
    995     case ImmTyD16: OS << "D16"; break;
    996     case ImmTyFORMAT: OS << "FORMAT"; break;
    997     case ImmTyClampSI: OS << "ClampSI"; break;
    998     case ImmTyOModSI: OS << "OModSI"; break;
    999     case ImmTyDPP8: OS << "DPP8"; break;
   1000     case ImmTyDppCtrl: OS << "DppCtrl"; break;
   1001     case ImmTyDppRowMask: OS << "DppRowMask"; break;
   1002     case ImmTyDppBankMask: OS << "DppBankMask"; break;
   1003     case ImmTyDppBoundCtrl: OS << "DppBoundCtrl"; break;
   1004     case ImmTyDppFi: OS << "FI"; break;
   1005     case ImmTySdwaDstSel: OS << "SdwaDstSel"; break;
   1006     case ImmTySdwaSrc0Sel: OS << "SdwaSrc0Sel"; break;
   1007     case ImmTySdwaSrc1Sel: OS << "SdwaSrc1Sel"; break;
   1008     case ImmTySdwaDstUnused: OS << "SdwaDstUnused"; break;
   1009     case ImmTyDMask: OS << "DMask"; break;
   1010     case ImmTyDim: OS << "Dim"; break;
   1011     case ImmTyUNorm: OS << "UNorm"; break;
   1012     case ImmTyDA: OS << "DA"; break;
   1013     case ImmTyR128A16: OS << "R128A16"; break;
   1014     case ImmTyA16: OS << "A16"; break;
   1015     case ImmTyLWE: OS << "LWE"; break;
   1016     case ImmTyOff: OS << "Off"; break;
   1017     case ImmTyExpTgt: OS << "ExpTgt"; break;
   1018     case ImmTyExpCompr: OS << "ExpCompr"; break;
   1019     case ImmTyExpVM: OS << "ExpVM"; break;
   1020     case ImmTyHwreg: OS << "Hwreg"; break;
   1021     case ImmTySendMsg: OS << "SendMsg"; break;
   1022     case ImmTyInterpSlot: OS << "InterpSlot"; break;
   1023     case ImmTyInterpAttr: OS << "InterpAttr"; break;
   1024     case ImmTyAttrChan: OS << "AttrChan"; break;
   1025     case ImmTyOpSel: OS << "OpSel"; break;
   1026     case ImmTyOpSelHi: OS << "OpSelHi"; break;
   1027     case ImmTyNegLo: OS << "NegLo"; break;
   1028     case ImmTyNegHi: OS << "NegHi"; break;
   1029     case ImmTySwizzle: OS << "Swizzle"; break;
   1030     case ImmTyGprIdxMode: OS << "GprIdxMode"; break;
   1031     case ImmTyHigh: OS << "High"; break;
   1032     case ImmTyBLGP: OS << "BLGP"; break;
   1033     case ImmTyCBSZ: OS << "CBSZ"; break;
   1034     case ImmTyABID: OS << "ABID"; break;
   1035     case ImmTyEndpgm: OS << "Endpgm"; break;
   1036     }
   1037   }
   1038 
   1039   void print(raw_ostream &OS) const override {
   1040     switch (Kind) {
   1041     case Register:
   1042       OS << "<register " << getReg() << " mods: " << Reg.Mods << '>';
   1043       break;
   1044     case Immediate:
   1045       OS << '<' << getImm();
   1046       if (getImmTy() != ImmTyNone) {
   1047         OS << " type: "; printImmTy(OS, getImmTy());
   1048       }
   1049       OS << " mods: " << Imm.Mods << '>';
   1050       break;
   1051     case Token:
   1052       OS << '\'' << getToken() << '\'';
   1053       break;
   1054     case Expression:
   1055       OS << "<expr " << *Expr << '>';
   1056       break;
   1057     }
   1058   }
   1059 
   1060   static AMDGPUOperand::Ptr CreateImm(const AMDGPUAsmParser *AsmParser,
   1061                                       int64_t Val, SMLoc Loc,
   1062                                       ImmTy Type = ImmTyNone,
   1063                                       bool IsFPImm = false) {
   1064     auto Op = std::make_unique<AMDGPUOperand>(Immediate, AsmParser);
   1065     Op->Imm.Val = Val;
   1066     Op->Imm.IsFPImm = IsFPImm;
   1067     Op->Imm.Kind = ImmKindTyNone;
   1068     Op->Imm.Type = Type;
   1069     Op->Imm.Mods = Modifiers();
   1070     Op->StartLoc = Loc;
   1071     Op->EndLoc = Loc;
   1072     return Op;
   1073   }
   1074 
   1075   static AMDGPUOperand::Ptr CreateToken(const AMDGPUAsmParser *AsmParser,
   1076                                         StringRef Str, SMLoc Loc,
   1077                                         bool HasExplicitEncodingSize = true) {
   1078     auto Res = std::make_unique<AMDGPUOperand>(Token, AsmParser);
   1079     Res->Tok.Data = Str.data();
   1080     Res->Tok.Length = Str.size();
   1081     Res->StartLoc = Loc;
   1082     Res->EndLoc = Loc;
   1083     return Res;
   1084   }
   1085 
   1086   static AMDGPUOperand::Ptr CreateReg(const AMDGPUAsmParser *AsmParser,
   1087                                       unsigned RegNo, SMLoc S,
   1088                                       SMLoc E) {
   1089     auto Op = std::make_unique<AMDGPUOperand>(Register, AsmParser);
   1090     Op->Reg.RegNo = RegNo;
   1091     Op->Reg.Mods = Modifiers();
   1092     Op->StartLoc = S;
   1093     Op->EndLoc = E;
   1094     return Op;
   1095   }
   1096 
   1097   static AMDGPUOperand::Ptr CreateExpr(const AMDGPUAsmParser *AsmParser,
   1098                                        const class MCExpr *Expr, SMLoc S) {
   1099     auto Op = std::make_unique<AMDGPUOperand>(Expression, AsmParser);
   1100     Op->Expr = Expr;
   1101     Op->StartLoc = S;
   1102     Op->EndLoc = S;
   1103     return Op;
   1104   }
   1105 };
   1106 
   1107 raw_ostream &operator <<(raw_ostream &OS, AMDGPUOperand::Modifiers Mods) {
   1108   OS << "abs:" << Mods.Abs << " neg: " << Mods.Neg << " sext:" << Mods.Sext;
   1109   return OS;
   1110 }
   1111 
   1112 //===----------------------------------------------------------------------===//
   1113 // AsmParser
   1114 //===----------------------------------------------------------------------===//
   1115 
   1116 // Holds info related to the current kernel, e.g. count of SGPRs used.
   1117 // Kernel scope begins at .amdgpu_hsa_kernel directive, ends at next
   1118 // .amdgpu_hsa_kernel or at EOF.
   1119 class KernelScopeInfo {
   1120   int SgprIndexUnusedMin = -1;
   1121   int VgprIndexUnusedMin = -1;
   1122   MCContext *Ctx = nullptr;
   1123 
   1124   void usesSgprAt(int i) {
   1125     if (i >= SgprIndexUnusedMin) {
   1126       SgprIndexUnusedMin = ++i;
   1127       if (Ctx) {
   1128         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.sgpr_count"));
   1129         Sym->setVariableValue(MCConstantExpr::create(SgprIndexUnusedMin, *Ctx));
   1130       }
   1131     }
   1132   }
   1133 
   1134   void usesVgprAt(int i) {
   1135     if (i >= VgprIndexUnusedMin) {
   1136       VgprIndexUnusedMin = ++i;
   1137       if (Ctx) {
   1138         MCSymbol * const Sym = Ctx->getOrCreateSymbol(Twine(".kernel.vgpr_count"));
   1139         Sym->setVariableValue(MCConstantExpr::create(VgprIndexUnusedMin, *Ctx));
   1140       }
   1141     }
   1142   }
   1143 
   1144 public:
   1145   KernelScopeInfo() = default;
   1146 
   1147   void initialize(MCContext &Context) {
   1148     Ctx = &Context;
   1149     usesSgprAt(SgprIndexUnusedMin = -1);
   1150     usesVgprAt(VgprIndexUnusedMin = -1);
   1151   }
   1152 
   1153   void usesRegister(RegisterKind RegKind, unsigned DwordRegIndex, unsigned RegWidth) {
   1154     switch (RegKind) {
   1155       case IS_SGPR: usesSgprAt(DwordRegIndex + RegWidth - 1); break;
   1156       case IS_AGPR: // fall through
   1157       case IS_VGPR: usesVgprAt(DwordRegIndex + RegWidth - 1); break;
   1158       default: break;
   1159     }
   1160   }
   1161 };
   1162 
   1163 class AMDGPUAsmParser : public MCTargetAsmParser {
   1164   MCAsmParser &Parser;
   1165 
   1166   // Number of extra operands parsed after the first optional operand.
   1167   // This may be necessary to skip hardcoded mandatory operands.
   1168   static const unsigned MAX_OPR_LOOKAHEAD = 8;
   1169 
   1170   unsigned ForcedEncodingSize = 0;
   1171   bool ForcedDPP = false;
   1172   bool ForcedSDWA = false;
   1173   KernelScopeInfo KernelScope;
   1174   unsigned CPolSeen;
   1175 
   1176   /// @name Auto-generated Match Functions
   1177   /// {
   1178 
   1179 #define GET_ASSEMBLER_HEADER
   1180 #include "AMDGPUGenAsmMatcher.inc"
   1181 
   1182   /// }
   1183 
   1184 private:
   1185   bool ParseAsAbsoluteExpression(uint32_t &Ret);
   1186   bool OutOfRangeError(SMRange Range);
   1187   /// Calculate VGPR/SGPR blocks required for given target, reserved
   1188   /// registers, and user-specified NextFreeXGPR values.
   1189   ///
   1190   /// \param Features [in] Target features, used for bug corrections.
   1191   /// \param VCCUsed [in] Whether VCC special SGPR is reserved.
   1192   /// \param FlatScrUsed [in] Whether FLAT_SCRATCH special SGPR is reserved.
   1193   /// \param XNACKUsed [in] Whether XNACK_MASK special SGPR is reserved.
   1194   /// \param EnableWavefrontSize32 [in] Value of ENABLE_WAVEFRONT_SIZE32 kernel
   1195   /// descriptor field, if valid.
   1196   /// \param NextFreeVGPR [in] Max VGPR number referenced, plus one.
   1197   /// \param VGPRRange [in] Token range, used for VGPR diagnostics.
   1198   /// \param NextFreeSGPR [in] Max SGPR number referenced, plus one.
   1199   /// \param SGPRRange [in] Token range, used for SGPR diagnostics.
   1200   /// \param VGPRBlocks [out] Result VGPR block count.
   1201   /// \param SGPRBlocks [out] Result SGPR block count.
   1202   bool calculateGPRBlocks(const FeatureBitset &Features, bool VCCUsed,
   1203                           bool FlatScrUsed, bool XNACKUsed,
   1204                           Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
   1205                           SMRange VGPRRange, unsigned NextFreeSGPR,
   1206                           SMRange SGPRRange, unsigned &VGPRBlocks,
   1207                           unsigned &SGPRBlocks);
   1208   bool ParseDirectiveAMDGCNTarget();
   1209   bool ParseDirectiveAMDHSAKernel();
   1210   bool ParseDirectiveMajorMinor(uint32_t &Major, uint32_t &Minor);
   1211   bool ParseDirectiveHSACodeObjectVersion();
   1212   bool ParseDirectiveHSACodeObjectISA();
   1213   bool ParseAMDKernelCodeTValue(StringRef ID, amd_kernel_code_t &Header);
   1214   bool ParseDirectiveAMDKernelCodeT();
   1215   // TODO: Possibly make subtargetHasRegister const.
   1216   bool subtargetHasRegister(const MCRegisterInfo &MRI, unsigned RegNo);
   1217   bool ParseDirectiveAMDGPUHsaKernel();
   1218 
   1219   bool ParseDirectiveISAVersion();
   1220   bool ParseDirectiveHSAMetadata();
   1221   bool ParseDirectivePALMetadataBegin();
   1222   bool ParseDirectivePALMetadata();
   1223   bool ParseDirectiveAMDGPULDS();
   1224 
   1225   /// Common code to parse out a block of text (typically YAML) between start and
   1226   /// end directives.
   1227   bool ParseToEndDirective(const char *AssemblerDirectiveBegin,
   1228                            const char *AssemblerDirectiveEnd,
   1229                            std::string &CollectString);
   1230 
   1231   bool AddNextRegisterToList(unsigned& Reg, unsigned& RegWidth,
   1232                              RegisterKind RegKind, unsigned Reg1, SMLoc Loc);
   1233   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
   1234                            unsigned &RegNum, unsigned &RegWidth,
   1235                            bool RestoreOnFailure = false);
   1236   bool ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
   1237                            unsigned &RegNum, unsigned &RegWidth,
   1238                            SmallVectorImpl<AsmToken> &Tokens);
   1239   unsigned ParseRegularReg(RegisterKind &RegKind, unsigned &RegNum,
   1240                            unsigned &RegWidth,
   1241                            SmallVectorImpl<AsmToken> &Tokens);
   1242   unsigned ParseSpecialReg(RegisterKind &RegKind, unsigned &RegNum,
   1243                            unsigned &RegWidth,
   1244                            SmallVectorImpl<AsmToken> &Tokens);
   1245   unsigned ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
   1246                         unsigned &RegWidth, SmallVectorImpl<AsmToken> &Tokens);
   1247   bool ParseRegRange(unsigned& Num, unsigned& Width);
   1248   unsigned getRegularReg(RegisterKind RegKind,
   1249                          unsigned RegNum,
   1250                          unsigned RegWidth,
   1251                          SMLoc Loc);
   1252 
   1253   bool isRegister();
   1254   bool isRegister(const AsmToken &Token, const AsmToken &NextToken) const;
   1255   Optional<StringRef> getGprCountSymbolName(RegisterKind RegKind);
   1256   void initializeGprCountSymbol(RegisterKind RegKind);
   1257   bool updateGprCountSymbols(RegisterKind RegKind, unsigned DwordRegIndex,
   1258                              unsigned RegWidth);
   1259   void cvtMubufImpl(MCInst &Inst, const OperandVector &Operands,
   1260                     bool IsAtomic, bool IsLds = false);
   1261   void cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
   1262                  bool IsGdsHardcoded);
   1263 
   1264 public:
   1265   enum AMDGPUMatchResultTy {
   1266     Match_PreferE32 = FIRST_TARGET_MATCH_RESULT_TY
   1267   };
   1268   enum OperandMode {
   1269     OperandMode_Default,
   1270     OperandMode_NSA,
   1271   };
   1272 
   1273   using OptionalImmIndexMap = std::map<AMDGPUOperand::ImmTy, unsigned>;
   1274 
   1275   AMDGPUAsmParser(const MCSubtargetInfo &STI, MCAsmParser &_Parser,
   1276                const MCInstrInfo &MII,
   1277                const MCTargetOptions &Options)
   1278       : MCTargetAsmParser(Options, STI, MII), Parser(_Parser) {
   1279     MCAsmParserExtension::Initialize(Parser);
   1280 
   1281     if (getFeatureBits().none()) {
   1282       // Set default features.
   1283       copySTI().ToggleFeature("southern-islands");
   1284     }
   1285 
   1286     setAvailableFeatures(ComputeAvailableFeatures(getFeatureBits()));
   1287 
   1288     {
   1289       // TODO: make those pre-defined variables read-only.
   1290       // Currently there is none suitable machinery in the core llvm-mc for this.
   1291       // MCSymbol::isRedefinable is intended for another purpose, and
   1292       // AsmParser::parseDirectiveSet() cannot be specialized for specific target.
   1293       AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
   1294       MCContext &Ctx = getContext();
   1295       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
   1296         MCSymbol *Sym =
   1297             Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_number"));
   1298         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
   1299         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_minor"));
   1300         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
   1301         Sym = Ctx.getOrCreateSymbol(Twine(".amdgcn.gfx_generation_stepping"));
   1302         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
   1303       } else {
   1304         MCSymbol *Sym =
   1305             Ctx.getOrCreateSymbol(Twine(".option.machine_version_major"));
   1306         Sym->setVariableValue(MCConstantExpr::create(ISA.Major, Ctx));
   1307         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_minor"));
   1308         Sym->setVariableValue(MCConstantExpr::create(ISA.Minor, Ctx));
   1309         Sym = Ctx.getOrCreateSymbol(Twine(".option.machine_version_stepping"));
   1310         Sym->setVariableValue(MCConstantExpr::create(ISA.Stepping, Ctx));
   1311       }
   1312       if (ISA.Major >= 6 && isHsaAbiVersion3Or4(&getSTI())) {
   1313         initializeGprCountSymbol(IS_VGPR);
   1314         initializeGprCountSymbol(IS_SGPR);
   1315       } else
   1316         KernelScope.initialize(getContext());
   1317     }
   1318   }
   1319 
   1320   bool hasMIMG_R128() const {
   1321     return AMDGPU::hasMIMG_R128(getSTI());
   1322   }
   1323 
   1324   bool hasPackedD16() const {
   1325     return AMDGPU::hasPackedD16(getSTI());
   1326   }
   1327 
   1328   bool hasGFX10A16() const {
   1329     return AMDGPU::hasGFX10A16(getSTI());
   1330   }
   1331 
   1332   bool hasG16() const { return AMDGPU::hasG16(getSTI()); }
   1333 
   1334   bool isSI() const {
   1335     return AMDGPU::isSI(getSTI());
   1336   }
   1337 
   1338   bool isCI() const {
   1339     return AMDGPU::isCI(getSTI());
   1340   }
   1341 
   1342   bool isVI() const {
   1343     return AMDGPU::isVI(getSTI());
   1344   }
   1345 
   1346   bool isGFX9() const {
   1347     return AMDGPU::isGFX9(getSTI());
   1348   }
   1349 
   1350   bool isGFX90A() const {
   1351     return AMDGPU::isGFX90A(getSTI());
   1352   }
   1353 
   1354   bool isGFX9Plus() const {
   1355     return AMDGPU::isGFX9Plus(getSTI());
   1356   }
   1357 
   1358   bool isGFX10() const {
   1359     return AMDGPU::isGFX10(getSTI());
   1360   }
   1361 
   1362   bool isGFX10Plus() const { return AMDGPU::isGFX10Plus(getSTI()); }
   1363 
   1364   bool isGFX10_BEncoding() const {
   1365     return AMDGPU::isGFX10_BEncoding(getSTI());
   1366   }
   1367 
   1368   bool hasInv2PiInlineImm() const {
   1369     return getFeatureBits()[AMDGPU::FeatureInv2PiInlineImm];
   1370   }
   1371 
   1372   bool hasFlatOffsets() const {
   1373     return getFeatureBits()[AMDGPU::FeatureFlatInstOffsets];
   1374   }
   1375 
   1376   bool hasArchitectedFlatScratch() const {
   1377     return getFeatureBits()[AMDGPU::FeatureArchitectedFlatScratch];
   1378   }
   1379 
   1380   bool hasSGPR102_SGPR103() const {
   1381     return !isVI() && !isGFX9();
   1382   }
   1383 
   1384   bool hasSGPR104_SGPR105() const { return isGFX10Plus(); }
   1385 
   1386   bool hasIntClamp() const {
   1387     return getFeatureBits()[AMDGPU::FeatureIntClamp];
   1388   }
   1389 
   1390   AMDGPUTargetStreamer &getTargetStreamer() {
   1391     MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
   1392     return static_cast<AMDGPUTargetStreamer &>(TS);
   1393   }
   1394 
   1395   const MCRegisterInfo *getMRI() const {
   1396     // We need this const_cast because for some reason getContext() is not const
   1397     // in MCAsmParser.
   1398     return const_cast<AMDGPUAsmParser*>(this)->getContext().getRegisterInfo();
   1399   }
   1400 
   1401   const MCInstrInfo *getMII() const {
   1402     return &MII;
   1403   }
   1404 
   1405   const FeatureBitset &getFeatureBits() const {
   1406     return getSTI().getFeatureBits();
   1407   }
   1408 
   1409   void setForcedEncodingSize(unsigned Size) { ForcedEncodingSize = Size; }
   1410   void setForcedDPP(bool ForceDPP_) { ForcedDPP = ForceDPP_; }
   1411   void setForcedSDWA(bool ForceSDWA_) { ForcedSDWA = ForceSDWA_; }
   1412 
   1413   unsigned getForcedEncodingSize() const { return ForcedEncodingSize; }
   1414   bool isForcedVOP3() const { return ForcedEncodingSize == 64; }
   1415   bool isForcedDPP() const { return ForcedDPP; }
   1416   bool isForcedSDWA() const { return ForcedSDWA; }
   1417   ArrayRef<unsigned> getMatchedVariants() const;
   1418   StringRef getMatchedVariantName() const;
   1419 
   1420   std::unique_ptr<AMDGPUOperand> parseRegister(bool RestoreOnFailure = false);
   1421   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
   1422                      bool RestoreOnFailure);
   1423   bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
   1424   OperandMatchResultTy tryParseRegister(unsigned &RegNo, SMLoc &StartLoc,
   1425                                         SMLoc &EndLoc) override;
   1426   unsigned checkTargetMatchPredicate(MCInst &Inst) override;
   1427   unsigned validateTargetOperandClass(MCParsedAsmOperand &Op,
   1428                                       unsigned Kind) override;
   1429   bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   1430                                OperandVector &Operands, MCStreamer &Out,
   1431                                uint64_t &ErrorInfo,
   1432                                bool MatchingInlineAsm) override;
   1433   bool ParseDirective(AsmToken DirectiveID) override;
   1434   OperandMatchResultTy parseOperand(OperandVector &Operands, StringRef Mnemonic,
   1435                                     OperandMode Mode = OperandMode_Default);
   1436   StringRef parseMnemonicSuffix(StringRef Name);
   1437   bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
   1438                         SMLoc NameLoc, OperandVector &Operands) override;
   1439   //bool ProcessInstruction(MCInst &Inst);
   1440 
   1441   OperandMatchResultTy parseIntWithPrefix(const char *Prefix, int64_t &Int);
   1442 
   1443   OperandMatchResultTy
   1444   parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
   1445                      AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
   1446                      bool (*ConvertResult)(int64_t &) = nullptr);
   1447 
   1448   OperandMatchResultTy
   1449   parseOperandArrayWithPrefix(const char *Prefix,
   1450                               OperandVector &Operands,
   1451                               AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone,
   1452                               bool (*ConvertResult)(int64_t&) = nullptr);
   1453 
   1454   OperandMatchResultTy
   1455   parseNamedBit(StringRef Name, OperandVector &Operands,
   1456                 AMDGPUOperand::ImmTy ImmTy = AMDGPUOperand::ImmTyNone);
   1457   OperandMatchResultTy parseCPol(OperandVector &Operands);
   1458   OperandMatchResultTy parseStringWithPrefix(StringRef Prefix,
   1459                                              StringRef &Value,
   1460                                              SMLoc &StringLoc);
   1461 
   1462   bool isModifier();
   1463   bool isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
   1464   bool isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
   1465   bool isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const;
   1466   bool isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const;
   1467   bool parseSP3NegModifier();
   1468   OperandMatchResultTy parseImm(OperandVector &Operands, bool HasSP3AbsModifier = false);
   1469   OperandMatchResultTy parseReg(OperandVector &Operands);
   1470   OperandMatchResultTy parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod = false);
   1471   OperandMatchResultTy parseRegOrImmWithFPInputMods(OperandVector &Operands, bool AllowImm = true);
   1472   OperandMatchResultTy parseRegOrImmWithIntInputMods(OperandVector &Operands, bool AllowImm = true);
   1473   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
   1474   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
   1475   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
   1476   OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
   1477   OperandMatchResultTy parseUfmt(int64_t &Format);
   1478   OperandMatchResultTy parseSymbolicSplitFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
   1479   OperandMatchResultTy parseSymbolicUnifiedFormat(StringRef FormatStr, SMLoc Loc, int64_t &Format);
   1480   OperandMatchResultTy parseFORMAT(OperandVector &Operands);
   1481   OperandMatchResultTy parseSymbolicOrNumericFormat(int64_t &Format);
   1482   OperandMatchResultTy parseNumericFormat(int64_t &Format);
   1483   bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
   1484   bool matchDfmtNfmt(int64_t &Dfmt, int64_t &Nfmt, StringRef FormatStr, SMLoc Loc);
   1485 
   1486   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
   1487   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
   1488   void cvtDSGds(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, true); }
   1489   void cvtExp(MCInst &Inst, const OperandVector &Operands);
   1490 
   1491   bool parseCnt(int64_t &IntVal);
   1492   OperandMatchResultTy parseSWaitCntOps(OperandVector &Operands);
   1493   OperandMatchResultTy parseHwreg(OperandVector &Operands);
   1494 
   1495 private:
   1496   struct OperandInfoTy {
   1497     SMLoc Loc;
   1498     int64_t Id;
   1499     bool IsSymbolic = false;
   1500     bool IsDefined = false;
   1501 
   1502     OperandInfoTy(int64_t Id_) : Id(Id_) {}
   1503   };
   1504 
   1505   bool parseSendMsgBody(OperandInfoTy &Msg, OperandInfoTy &Op, OperandInfoTy &Stream);
   1506   bool validateSendMsg(const OperandInfoTy &Msg,
   1507                        const OperandInfoTy &Op,
   1508                        const OperandInfoTy &Stream);
   1509 
   1510   bool parseHwregBody(OperandInfoTy &HwReg,
   1511                       OperandInfoTy &Offset,
   1512                       OperandInfoTy &Width);
   1513   bool validateHwreg(const OperandInfoTy &HwReg,
   1514                      const OperandInfoTy &Offset,
   1515                      const OperandInfoTy &Width);
   1516 
   1517   SMLoc getFlatOffsetLoc(const OperandVector &Operands) const;
   1518   SMLoc getSMEMOffsetLoc(const OperandVector &Operands) const;
   1519 
   1520   SMLoc getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
   1521                       const OperandVector &Operands) const;
   1522   SMLoc getImmLoc(AMDGPUOperand::ImmTy Type, const OperandVector &Operands) const;
   1523   SMLoc getRegLoc(unsigned Reg, const OperandVector &Operands) const;
   1524   SMLoc getLitLoc(const OperandVector &Operands) const;
   1525   SMLoc getConstLoc(const OperandVector &Operands) const;
   1526 
   1527   bool validateInstruction(const MCInst &Inst, const SMLoc &IDLoc, const OperandVector &Operands);
   1528   bool validateFlatOffset(const MCInst &Inst, const OperandVector &Operands);
   1529   bool validateSMEMOffset(const MCInst &Inst, const OperandVector &Operands);
   1530   bool validateSOPLiteral(const MCInst &Inst) const;
   1531   bool validateConstantBusLimitations(const MCInst &Inst, const OperandVector &Operands);
   1532   bool validateEarlyClobberLimitations(const MCInst &Inst, const OperandVector &Operands);
   1533   bool validateIntClampSupported(const MCInst &Inst);
   1534   bool validateMIMGAtomicDMask(const MCInst &Inst);
   1535   bool validateMIMGGatherDMask(const MCInst &Inst);
   1536   bool validateMovrels(const MCInst &Inst, const OperandVector &Operands);
   1537   bool validateMIMGDataSize(const MCInst &Inst);
   1538   bool validateMIMGAddrSize(const MCInst &Inst);
   1539   bool validateMIMGD16(const MCInst &Inst);
   1540   bool validateMIMGDim(const MCInst &Inst);
   1541   bool validateMIMGMSAA(const MCInst &Inst);
   1542   bool validateOpSel(const MCInst &Inst);
   1543   bool validateDPP(const MCInst &Inst, const OperandVector &Operands);
   1544   bool validateVccOperand(unsigned Reg) const;
   1545   bool validateVOP3Literal(const MCInst &Inst, const OperandVector &Operands);
   1546   bool validateMAIAccWrite(const MCInst &Inst, const OperandVector &Operands);
   1547   bool validateAGPRLdSt(const MCInst &Inst) const;
   1548   bool validateVGPRAlign(const MCInst &Inst) const;
   1549   bool validateDivScale(const MCInst &Inst);
   1550   bool validateCoherencyBits(const MCInst &Inst, const OperandVector &Operands,
   1551                              const SMLoc &IDLoc);
   1552   Optional<StringRef> validateLdsDirect(const MCInst &Inst);
   1553   unsigned getConstantBusLimit(unsigned Opcode) const;
   1554   bool usesConstantBus(const MCInst &Inst, unsigned OpIdx);
   1555   bool isInlineConstant(const MCInst &Inst, unsigned OpIdx) const;
   1556   unsigned findImplicitSGPRReadInVOP(const MCInst &Inst) const;
   1557 
   1558   bool isSupportedMnemo(StringRef Mnemo,
   1559                         const FeatureBitset &FBS);
   1560   bool isSupportedMnemo(StringRef Mnemo,
   1561                         const FeatureBitset &FBS,
   1562                         ArrayRef<unsigned> Variants);
   1563   bool checkUnsupportedInstruction(StringRef Name, const SMLoc &IDLoc);
   1564 
   1565   bool isId(const StringRef Id) const;
   1566   bool isId(const AsmToken &Token, const StringRef Id) const;
   1567   bool isToken(const AsmToken::TokenKind Kind) const;
   1568   bool trySkipId(const StringRef Id);
   1569   bool trySkipId(const StringRef Pref, const StringRef Id);
   1570   bool trySkipId(const StringRef Id, const AsmToken::TokenKind Kind);
   1571   bool trySkipToken(const AsmToken::TokenKind Kind);
   1572   bool skipToken(const AsmToken::TokenKind Kind, const StringRef ErrMsg);
   1573   bool parseString(StringRef &Val, const StringRef ErrMsg = "expected a string");
   1574   bool parseId(StringRef &Val, const StringRef ErrMsg = "");
   1575 
   1576   void peekTokens(MutableArrayRef<AsmToken> Tokens);
   1577   AsmToken::TokenKind getTokenKind() const;
   1578   bool parseExpr(int64_t &Imm, StringRef Expected = "");
   1579   bool parseExpr(OperandVector &Operands);
   1580   StringRef getTokenStr() const;
   1581   AsmToken peekToken();
   1582   AsmToken getToken() const;
   1583   SMLoc getLoc() const;
   1584   void lex();
   1585 
   1586 public:
   1587   void onBeginOfFile() override;
   1588 
   1589   OperandMatchResultTy parseOptionalOperand(OperandVector &Operands);
   1590   OperandMatchResultTy parseOptionalOpr(OperandVector &Operands);
   1591 
   1592   OperandMatchResultTy parseExpTgt(OperandVector &Operands);
   1593   OperandMatchResultTy parseSendMsgOp(OperandVector &Operands);
   1594   OperandMatchResultTy parseInterpSlot(OperandVector &Operands);
   1595   OperandMatchResultTy parseInterpAttr(OperandVector &Operands);
   1596   OperandMatchResultTy parseSOppBrTarget(OperandVector &Operands);
   1597   OperandMatchResultTy parseBoolReg(OperandVector &Operands);
   1598 
   1599   bool parseSwizzleOperand(int64_t &Op,
   1600                            const unsigned MinVal,
   1601                            const unsigned MaxVal,
   1602                            const StringRef ErrMsg,
   1603                            SMLoc &Loc);
   1604   bool parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
   1605                             const unsigned MinVal,
   1606                             const unsigned MaxVal,
   1607                             const StringRef ErrMsg);
   1608   OperandMatchResultTy parseSwizzleOp(OperandVector &Operands);
   1609   bool parseSwizzleOffset(int64_t &Imm);
   1610   bool parseSwizzleMacro(int64_t &Imm);
   1611   bool parseSwizzleQuadPerm(int64_t &Imm);
   1612   bool parseSwizzleBitmaskPerm(int64_t &Imm);
   1613   bool parseSwizzleBroadcast(int64_t &Imm);
   1614   bool parseSwizzleSwap(int64_t &Imm);
   1615   bool parseSwizzleReverse(int64_t &Imm);
   1616 
   1617   OperandMatchResultTy parseGPRIdxMode(OperandVector &Operands);
   1618   int64_t parseGPRIdxMacro();
   1619 
   1620   void cvtMubuf(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false); }
   1621   void cvtMubufAtomic(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, true); }
   1622   void cvtMubufLds(MCInst &Inst, const OperandVector &Operands) { cvtMubufImpl(Inst, Operands, false, true); }
   1623   void cvtMtbuf(MCInst &Inst, const OperandVector &Operands);
   1624 
   1625   AMDGPUOperand::Ptr defaultCPol() const;
   1626 
   1627   AMDGPUOperand::Ptr defaultSMRDOffset8() const;
   1628   AMDGPUOperand::Ptr defaultSMEMOffset() const;
   1629   AMDGPUOperand::Ptr defaultSMRDLiteralOffset() const;
   1630   AMDGPUOperand::Ptr defaultFlatOffset() const;
   1631 
   1632   OperandMatchResultTy parseOModOperand(OperandVector &Operands);
   1633 
   1634   void cvtVOP3(MCInst &Inst, const OperandVector &Operands,
   1635                OptionalImmIndexMap &OptionalIdx);
   1636   void cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands);
   1637   void cvtVOP3(MCInst &Inst, const OperandVector &Operands);
   1638   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands);
   1639   void cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
   1640                 OptionalImmIndexMap &OptionalIdx);
   1641 
   1642   void cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands);
   1643 
   1644   void cvtMIMG(MCInst &Inst, const OperandVector &Operands,
   1645                bool IsAtomic = false);
   1646   void cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands);
   1647   void cvtIntersectRay(MCInst &Inst, const OperandVector &Operands);
   1648 
   1649   void cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands);
   1650 
   1651   bool parseDimId(unsigned &Encoding);
   1652   OperandMatchResultTy parseDim(OperandVector &Operands);
   1653   OperandMatchResultTy parseDPP8(OperandVector &Operands);
   1654   OperandMatchResultTy parseDPPCtrl(OperandVector &Operands);
   1655   bool isSupportedDPPCtrl(StringRef Ctrl, const OperandVector &Operands);
   1656   int64_t parseDPPCtrlSel(StringRef Ctrl);
   1657   int64_t parseDPPCtrlPerm();
   1658   AMDGPUOperand::Ptr defaultRowMask() const;
   1659   AMDGPUOperand::Ptr defaultBankMask() const;
   1660   AMDGPUOperand::Ptr defaultBoundCtrl() const;
   1661   AMDGPUOperand::Ptr defaultFI() const;
   1662   void cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8 = false);
   1663   void cvtDPP8(MCInst &Inst, const OperandVector &Operands) { cvtDPP(Inst, Operands, true); }
   1664 
   1665   OperandMatchResultTy parseSDWASel(OperandVector &Operands, StringRef Prefix,
   1666                                     AMDGPUOperand::ImmTy Type);
   1667   OperandMatchResultTy parseSDWADstUnused(OperandVector &Operands);
   1668   void cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands);
   1669   void cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands);
   1670   void cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands);
   1671   void cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands);
   1672   void cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands);
   1673   void cvtSDWA(MCInst &Inst, const OperandVector &Operands,
   1674                uint64_t BasicInstType,
   1675                bool SkipDstVcc = false,
   1676                bool SkipSrcVcc = false);
   1677 
   1678   AMDGPUOperand::Ptr defaultBLGP() const;
   1679   AMDGPUOperand::Ptr defaultCBSZ() const;
   1680   AMDGPUOperand::Ptr defaultABID() const;
   1681 
   1682   OperandMatchResultTy parseEndpgmOp(OperandVector &Operands);
   1683   AMDGPUOperand::Ptr defaultEndpgmImmOperands() const;
   1684 };
   1685 
   1686 struct OptionalOperand {
   1687   const char *Name;
   1688   AMDGPUOperand::ImmTy Type;
   1689   bool IsBit;
   1690   bool (*ConvertResult)(int64_t&);
   1691 };
   1692 
   1693 } // end anonymous namespace
   1694 
   1695 // May be called with integer type with equivalent bitwidth.
   1696 static const fltSemantics *getFltSemantics(unsigned Size) {
   1697   switch (Size) {
   1698   case 4:
   1699     return &APFloat::IEEEsingle();
   1700   case 8:
   1701     return &APFloat::IEEEdouble();
   1702   case 2:
   1703     return &APFloat::IEEEhalf();
   1704   default:
   1705     llvm_unreachable("unsupported fp type");
   1706   }
   1707 }
   1708 
   1709 static const fltSemantics *getFltSemantics(MVT VT) {
   1710   return getFltSemantics(VT.getSizeInBits() / 8);
   1711 }
   1712 
   1713 static const fltSemantics *getOpFltSemantics(uint8_t OperandType) {
   1714   switch (OperandType) {
   1715   case AMDGPU::OPERAND_REG_IMM_INT32:
   1716   case AMDGPU::OPERAND_REG_IMM_FP32:
   1717   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   1718   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   1719   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
   1720   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
   1721   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
   1722   case AMDGPU::OPERAND_REG_IMM_V2FP32:
   1723   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
   1724   case AMDGPU::OPERAND_REG_IMM_V2INT32:
   1725     return &APFloat::IEEEsingle();
   1726   case AMDGPU::OPERAND_REG_IMM_INT64:
   1727   case AMDGPU::OPERAND_REG_IMM_FP64:
   1728   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
   1729   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
   1730   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
   1731     return &APFloat::IEEEdouble();
   1732   case AMDGPU::OPERAND_REG_IMM_INT16:
   1733   case AMDGPU::OPERAND_REG_IMM_FP16:
   1734   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   1735   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   1736   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
   1737   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
   1738   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
   1739   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
   1740   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
   1741   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
   1742   case AMDGPU::OPERAND_REG_IMM_V2INT16:
   1743   case AMDGPU::OPERAND_REG_IMM_V2FP16:
   1744     return &APFloat::IEEEhalf();
   1745   default:
   1746     llvm_unreachable("unsupported fp type");
   1747   }
   1748 }
   1749 
   1750 //===----------------------------------------------------------------------===//
   1751 // Operand
   1752 //===----------------------------------------------------------------------===//
   1753 
   1754 static bool canLosslesslyConvertToFPType(APFloat &FPLiteral, MVT VT) {
   1755   bool Lost;
   1756 
   1757   // Convert literal to single precision
   1758   APFloat::opStatus Status = FPLiteral.convert(*getFltSemantics(VT),
   1759                                                APFloat::rmNearestTiesToEven,
   1760                                                &Lost);
   1761   // We allow precision lost but not overflow or underflow
   1762   if (Status != APFloat::opOK &&
   1763       Lost &&
   1764       ((Status & APFloat::opOverflow)  != 0 ||
   1765        (Status & APFloat::opUnderflow) != 0)) {
   1766     return false;
   1767   }
   1768 
   1769   return true;
   1770 }
   1771 
   1772 static bool isSafeTruncation(int64_t Val, unsigned Size) {
   1773   return isUIntN(Size, Val) || isIntN(Size, Val);
   1774 }
   1775 
   1776 static bool isInlineableLiteralOp16(int64_t Val, MVT VT, bool HasInv2Pi) {
   1777   if (VT.getScalarType() == MVT::i16) {
   1778     // FP immediate values are broken.
   1779     return isInlinableIntLiteral(Val);
   1780   }
   1781 
   1782   // f16/v2f16 operands work correctly for all values.
   1783   return AMDGPU::isInlinableLiteral16(Val, HasInv2Pi);
   1784 }
   1785 
   1786 bool AMDGPUOperand::isInlinableImm(MVT type) const {
   1787 
   1788   // This is a hack to enable named inline values like
   1789   // shared_base with both 32-bit and 64-bit operands.
   1790   // Note that these values are defined as
   1791   // 32-bit operands only.
   1792   if (isInlineValue()) {
   1793     return true;
   1794   }
   1795 
   1796   if (!isImmTy(ImmTyNone)) {
   1797     // Only plain immediates are inlinable (e.g. "clamp" attribute is not)
   1798     return false;
   1799   }
   1800   // TODO: We should avoid using host float here. It would be better to
   1801   // check the float bit values which is what a few other places do.
   1802   // We've had bot failures before due to weird NaN support on mips hosts.
   1803 
   1804   APInt Literal(64, Imm.Val);
   1805 
   1806   if (Imm.IsFPImm) { // We got fp literal token
   1807     if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
   1808       return AMDGPU::isInlinableLiteral64(Imm.Val,
   1809                                           AsmParser->hasInv2PiInlineImm());
   1810     }
   1811 
   1812     APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
   1813     if (!canLosslesslyConvertToFPType(FPLiteral, type))
   1814       return false;
   1815 
   1816     if (type.getScalarSizeInBits() == 16) {
   1817       return isInlineableLiteralOp16(
   1818         static_cast<int16_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
   1819         type, AsmParser->hasInv2PiInlineImm());
   1820     }
   1821 
   1822     // Check if single precision literal is inlinable
   1823     return AMDGPU::isInlinableLiteral32(
   1824       static_cast<int32_t>(FPLiteral.bitcastToAPInt().getZExtValue()),
   1825       AsmParser->hasInv2PiInlineImm());
   1826   }
   1827 
   1828   // We got int literal token.
   1829   if (type == MVT::f64 || type == MVT::i64) { // Expected 64-bit operand
   1830     return AMDGPU::isInlinableLiteral64(Imm.Val,
   1831                                         AsmParser->hasInv2PiInlineImm());
   1832   }
   1833 
   1834   if (!isSafeTruncation(Imm.Val, type.getScalarSizeInBits())) {
   1835     return false;
   1836   }
   1837 
   1838   if (type.getScalarSizeInBits() == 16) {
   1839     return isInlineableLiteralOp16(
   1840       static_cast<int16_t>(Literal.getLoBits(16).getSExtValue()),
   1841       type, AsmParser->hasInv2PiInlineImm());
   1842   }
   1843 
   1844   return AMDGPU::isInlinableLiteral32(
   1845     static_cast<int32_t>(Literal.getLoBits(32).getZExtValue()),
   1846     AsmParser->hasInv2PiInlineImm());
   1847 }
   1848 
   1849 bool AMDGPUOperand::isLiteralImm(MVT type) const {
   1850   // Check that this immediate can be added as literal
   1851   if (!isImmTy(ImmTyNone)) {
   1852     return false;
   1853   }
   1854 
   1855   if (!Imm.IsFPImm) {
   1856     // We got int literal token.
   1857 
   1858     if (type == MVT::f64 && hasFPModifiers()) {
   1859       // Cannot apply fp modifiers to int literals preserving the same semantics
   1860       // for VOP1/2/C and VOP3 because of integer truncation. To avoid ambiguity,
   1861       // disable these cases.
   1862       return false;
   1863     }
   1864 
   1865     unsigned Size = type.getSizeInBits();
   1866     if (Size == 64)
   1867       Size = 32;
   1868 
   1869     // FIXME: 64-bit operands can zero extend, sign extend, or pad zeroes for FP
   1870     // types.
   1871     return isSafeTruncation(Imm.Val, Size);
   1872   }
   1873 
   1874   // We got fp literal token
   1875   if (type == MVT::f64) { // Expected 64-bit fp operand
   1876     // We would set low 64-bits of literal to zeroes but we accept this literals
   1877     return true;
   1878   }
   1879 
   1880   if (type == MVT::i64) { // Expected 64-bit int operand
   1881     // We don't allow fp literals in 64-bit integer instructions. It is
   1882     // unclear how we should encode them.
   1883     return false;
   1884   }
   1885 
   1886   // We allow fp literals with f16x2 operands assuming that the specified
   1887   // literal goes into the lower half and the upper half is zero. We also
   1888   // require that the literal may be losslesly converted to f16.
   1889   MVT ExpectedType = (type == MVT::v2f16)? MVT::f16 :
   1890                      (type == MVT::v2i16)? MVT::i16 :
   1891                      (type == MVT::v2f32)? MVT::f32 : type;
   1892 
   1893   APFloat FPLiteral(APFloat::IEEEdouble(), APInt(64, Imm.Val));
   1894   return canLosslesslyConvertToFPType(FPLiteral, ExpectedType);
   1895 }
   1896 
   1897 bool AMDGPUOperand::isRegClass(unsigned RCID) const {
   1898   return isRegKind() && AsmParser->getMRI()->getRegClass(RCID).contains(getReg());
   1899 }
   1900 
   1901 bool AMDGPUOperand::isVRegWithInputMods() const {
   1902   return isRegClass(AMDGPU::VGPR_32RegClassID) ||
   1903          // GFX90A allows DPP on 64-bit operands.
   1904          (isRegClass(AMDGPU::VReg_64RegClassID) &&
   1905           AsmParser->getFeatureBits()[AMDGPU::Feature64BitDPP]);
   1906 }
   1907 
   1908 bool AMDGPUOperand::isSDWAOperand(MVT type) const {
   1909   if (AsmParser->isVI())
   1910     return isVReg32();
   1911   else if (AsmParser->isGFX9Plus())
   1912     return isRegClass(AMDGPU::VS_32RegClassID) || isInlinableImm(type);
   1913   else
   1914     return false;
   1915 }
   1916 
   1917 bool AMDGPUOperand::isSDWAFP16Operand() const {
   1918   return isSDWAOperand(MVT::f16);
   1919 }
   1920 
   1921 bool AMDGPUOperand::isSDWAFP32Operand() const {
   1922   return isSDWAOperand(MVT::f32);
   1923 }
   1924 
   1925 bool AMDGPUOperand::isSDWAInt16Operand() const {
   1926   return isSDWAOperand(MVT::i16);
   1927 }
   1928 
   1929 bool AMDGPUOperand::isSDWAInt32Operand() const {
   1930   return isSDWAOperand(MVT::i32);
   1931 }
   1932 
   1933 bool AMDGPUOperand::isBoolReg() const {
   1934   auto FB = AsmParser->getFeatureBits();
   1935   return isReg() && ((FB[AMDGPU::FeatureWavefrontSize64] && isSCSrcB64()) ||
   1936                      (FB[AMDGPU::FeatureWavefrontSize32] && isSCSrcB32()));
   1937 }
   1938 
   1939 uint64_t AMDGPUOperand::applyInputFPModifiers(uint64_t Val, unsigned Size) const
   1940 {
   1941   assert(isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
   1942   assert(Size == 2 || Size == 4 || Size == 8);
   1943 
   1944   const uint64_t FpSignMask = (1ULL << (Size * 8 - 1));
   1945 
   1946   if (Imm.Mods.Abs) {
   1947     Val &= ~FpSignMask;
   1948   }
   1949   if (Imm.Mods.Neg) {
   1950     Val ^= FpSignMask;
   1951   }
   1952 
   1953   return Val;
   1954 }
   1955 
   1956 void AMDGPUOperand::addImmOperands(MCInst &Inst, unsigned N, bool ApplyModifiers) const {
   1957   if (AMDGPU::isSISrcOperand(AsmParser->getMII()->get(Inst.getOpcode()),
   1958                              Inst.getNumOperands())) {
   1959     addLiteralImmOperand(Inst, Imm.Val,
   1960                          ApplyModifiers &
   1961                          isImmTy(ImmTyNone) && Imm.Mods.hasFPModifiers());
   1962   } else {
   1963     assert(!isImmTy(ImmTyNone) || !hasModifiers());
   1964     Inst.addOperand(MCOperand::createImm(Imm.Val));
   1965     setImmKindNone();
   1966   }
   1967 }
   1968 
   1969 void AMDGPUOperand::addLiteralImmOperand(MCInst &Inst, int64_t Val, bool ApplyModifiers) const {
   1970   const auto& InstDesc = AsmParser->getMII()->get(Inst.getOpcode());
   1971   auto OpNum = Inst.getNumOperands();
   1972   // Check that this operand accepts literals
   1973   assert(AMDGPU::isSISrcOperand(InstDesc, OpNum));
   1974 
   1975   if (ApplyModifiers) {
   1976     assert(AMDGPU::isSISrcFPOperand(InstDesc, OpNum));
   1977     const unsigned Size = Imm.IsFPImm ? sizeof(double) : getOperandSize(InstDesc, OpNum);
   1978     Val = applyInputFPModifiers(Val, Size);
   1979   }
   1980 
   1981   APInt Literal(64, Val);
   1982   uint8_t OpTy = InstDesc.OpInfo[OpNum].OperandType;
   1983 
   1984   if (Imm.IsFPImm) { // We got fp literal token
   1985     switch (OpTy) {
   1986     case AMDGPU::OPERAND_REG_IMM_INT64:
   1987     case AMDGPU::OPERAND_REG_IMM_FP64:
   1988     case AMDGPU::OPERAND_REG_INLINE_C_INT64:
   1989     case AMDGPU::OPERAND_REG_INLINE_C_FP64:
   1990     case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
   1991       if (AMDGPU::isInlinableLiteral64(Literal.getZExtValue(),
   1992                                        AsmParser->hasInv2PiInlineImm())) {
   1993         Inst.addOperand(MCOperand::createImm(Literal.getZExtValue()));
   1994         setImmKindConst();
   1995         return;
   1996       }
   1997 
   1998       // Non-inlineable
   1999       if (AMDGPU::isSISrcFPOperand(InstDesc, OpNum)) { // Expected 64-bit fp operand
   2000         // For fp operands we check if low 32 bits are zeros
   2001         if (Literal.getLoBits(32) != 0) {
   2002           const_cast<AMDGPUAsmParser *>(AsmParser)->Warning(Inst.getLoc(),
   2003           "Can't encode literal as exact 64-bit floating-point operand. "
   2004           "Low 32-bits will be set to zero");
   2005         }
   2006 
   2007         Inst.addOperand(MCOperand::createImm(Literal.lshr(32).getZExtValue()));
   2008         setImmKindLiteral();
   2009         return;
   2010       }
   2011 
   2012       // We don't allow fp literals in 64-bit integer instructions. It is
   2013       // unclear how we should encode them. This case should be checked earlier
   2014       // in predicate methods (isLiteralImm())
   2015       llvm_unreachable("fp literal in 64-bit integer instruction.");
   2016 
   2017     case AMDGPU::OPERAND_REG_IMM_INT32:
   2018     case AMDGPU::OPERAND_REG_IMM_FP32:
   2019     case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   2020     case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   2021     case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
   2022     case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
   2023     case AMDGPU::OPERAND_REG_IMM_INT16:
   2024     case AMDGPU::OPERAND_REG_IMM_FP16:
   2025     case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   2026     case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   2027     case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
   2028     case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
   2029     case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
   2030     case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
   2031     case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
   2032     case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16:
   2033     case AMDGPU::OPERAND_REG_IMM_V2INT16:
   2034     case AMDGPU::OPERAND_REG_IMM_V2FP16:
   2035     case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
   2036     case AMDGPU::OPERAND_REG_IMM_V2FP32:
   2037     case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
   2038     case AMDGPU::OPERAND_REG_IMM_V2INT32: {
   2039       bool lost;
   2040       APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
   2041       // Convert literal to single precision
   2042       FPLiteral.convert(*getOpFltSemantics(OpTy),
   2043                         APFloat::rmNearestTiesToEven, &lost);
   2044       // We allow precision lost but not overflow or underflow. This should be
   2045       // checked earlier in isLiteralImm()
   2046 
   2047       uint64_t ImmVal = FPLiteral.bitcastToAPInt().getZExtValue();
   2048       Inst.addOperand(MCOperand::createImm(ImmVal));
   2049       setImmKindLiteral();
   2050       return;
   2051     }
   2052     default:
   2053       llvm_unreachable("invalid operand size");
   2054     }
   2055 
   2056     return;
   2057   }
   2058 
   2059   // We got int literal token.
   2060   // Only sign extend inline immediates.
   2061   switch (OpTy) {
   2062   case AMDGPU::OPERAND_REG_IMM_INT32:
   2063   case AMDGPU::OPERAND_REG_IMM_FP32:
   2064   case AMDGPU::OPERAND_REG_INLINE_C_INT32:
   2065   case AMDGPU::OPERAND_REG_INLINE_C_FP32:
   2066   case AMDGPU::OPERAND_REG_INLINE_AC_INT32:
   2067   case AMDGPU::OPERAND_REG_INLINE_AC_FP32:
   2068   case AMDGPU::OPERAND_REG_IMM_V2INT16:
   2069   case AMDGPU::OPERAND_REG_IMM_V2FP16:
   2070   case AMDGPU::OPERAND_REG_IMM_V2FP32:
   2071   case AMDGPU::OPERAND_REG_INLINE_C_V2FP32:
   2072   case AMDGPU::OPERAND_REG_IMM_V2INT32:
   2073   case AMDGPU::OPERAND_REG_INLINE_C_V2INT32:
   2074     if (isSafeTruncation(Val, 32) &&
   2075         AMDGPU::isInlinableLiteral32(static_cast<int32_t>(Val),
   2076                                      AsmParser->hasInv2PiInlineImm())) {
   2077       Inst.addOperand(MCOperand::createImm(Val));
   2078       setImmKindConst();
   2079       return;
   2080     }
   2081 
   2082     Inst.addOperand(MCOperand::createImm(Val & 0xffffffff));
   2083     setImmKindLiteral();
   2084     return;
   2085 
   2086   case AMDGPU::OPERAND_REG_IMM_INT64:
   2087   case AMDGPU::OPERAND_REG_IMM_FP64:
   2088   case AMDGPU::OPERAND_REG_INLINE_C_INT64:
   2089   case AMDGPU::OPERAND_REG_INLINE_C_FP64:
   2090   case AMDGPU::OPERAND_REG_INLINE_AC_FP64:
   2091     if (AMDGPU::isInlinableLiteral64(Val, AsmParser->hasInv2PiInlineImm())) {
   2092       Inst.addOperand(MCOperand::createImm(Val));
   2093       setImmKindConst();
   2094       return;
   2095     }
   2096 
   2097     Inst.addOperand(MCOperand::createImm(Lo_32(Val)));
   2098     setImmKindLiteral();
   2099     return;
   2100 
   2101   case AMDGPU::OPERAND_REG_IMM_INT16:
   2102   case AMDGPU::OPERAND_REG_IMM_FP16:
   2103   case AMDGPU::OPERAND_REG_INLINE_C_INT16:
   2104   case AMDGPU::OPERAND_REG_INLINE_C_FP16:
   2105   case AMDGPU::OPERAND_REG_INLINE_AC_INT16:
   2106   case AMDGPU::OPERAND_REG_INLINE_AC_FP16:
   2107     if (isSafeTruncation(Val, 16) &&
   2108         AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
   2109                                      AsmParser->hasInv2PiInlineImm())) {
   2110       Inst.addOperand(MCOperand::createImm(Val));
   2111       setImmKindConst();
   2112       return;
   2113     }
   2114 
   2115     Inst.addOperand(MCOperand::createImm(Val & 0xffff));
   2116     setImmKindLiteral();
   2117     return;
   2118 
   2119   case AMDGPU::OPERAND_REG_INLINE_C_V2INT16:
   2120   case AMDGPU::OPERAND_REG_INLINE_C_V2FP16:
   2121   case AMDGPU::OPERAND_REG_INLINE_AC_V2INT16:
   2122   case AMDGPU::OPERAND_REG_INLINE_AC_V2FP16: {
   2123     assert(isSafeTruncation(Val, 16));
   2124     assert(AMDGPU::isInlinableLiteral16(static_cast<int16_t>(Val),
   2125                                         AsmParser->hasInv2PiInlineImm()));
   2126 
   2127     Inst.addOperand(MCOperand::createImm(Val));
   2128     return;
   2129   }
   2130   default:
   2131     llvm_unreachable("invalid operand size");
   2132   }
   2133 }
   2134 
   2135 template <unsigned Bitwidth>
   2136 void AMDGPUOperand::addKImmFPOperands(MCInst &Inst, unsigned N) const {
   2137   APInt Literal(64, Imm.Val);
   2138   setImmKindNone();
   2139 
   2140   if (!Imm.IsFPImm) {
   2141     // We got int literal token.
   2142     Inst.addOperand(MCOperand::createImm(Literal.getLoBits(Bitwidth).getZExtValue()));
   2143     return;
   2144   }
   2145 
   2146   bool Lost;
   2147   APFloat FPLiteral(APFloat::IEEEdouble(), Literal);
   2148   FPLiteral.convert(*getFltSemantics(Bitwidth / 8),
   2149                     APFloat::rmNearestTiesToEven, &Lost);
   2150   Inst.addOperand(MCOperand::createImm(FPLiteral.bitcastToAPInt().getZExtValue()));
   2151 }
   2152 
   2153 void AMDGPUOperand::addRegOperands(MCInst &Inst, unsigned N) const {
   2154   Inst.addOperand(MCOperand::createReg(AMDGPU::getMCReg(getReg(), AsmParser->getSTI())));
   2155 }
   2156 
   2157 static bool isInlineValue(unsigned Reg) {
   2158   switch (Reg) {
   2159   case AMDGPU::SRC_SHARED_BASE:
   2160   case AMDGPU::SRC_SHARED_LIMIT:
   2161   case AMDGPU::SRC_PRIVATE_BASE:
   2162   case AMDGPU::SRC_PRIVATE_LIMIT:
   2163   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
   2164     return true;
   2165   case AMDGPU::SRC_VCCZ:
   2166   case AMDGPU::SRC_EXECZ:
   2167   case AMDGPU::SRC_SCC:
   2168     return true;
   2169   case AMDGPU::SGPR_NULL:
   2170     return true;
   2171   default:
   2172     return false;
   2173   }
   2174 }
   2175 
   2176 bool AMDGPUOperand::isInlineValue() const {
   2177   return isRegKind() && ::isInlineValue(getReg());
   2178 }
   2179 
   2180 //===----------------------------------------------------------------------===//
   2181 // AsmParser
   2182 //===----------------------------------------------------------------------===//
   2183 
   2184 static int getRegClass(RegisterKind Is, unsigned RegWidth) {
   2185   if (Is == IS_VGPR) {
   2186     switch (RegWidth) {
   2187       default: return -1;
   2188       case 1: return AMDGPU::VGPR_32RegClassID;
   2189       case 2: return AMDGPU::VReg_64RegClassID;
   2190       case 3: return AMDGPU::VReg_96RegClassID;
   2191       case 4: return AMDGPU::VReg_128RegClassID;
   2192       case 5: return AMDGPU::VReg_160RegClassID;
   2193       case 6: return AMDGPU::VReg_192RegClassID;
   2194       case 8: return AMDGPU::VReg_256RegClassID;
   2195       case 16: return AMDGPU::VReg_512RegClassID;
   2196       case 32: return AMDGPU::VReg_1024RegClassID;
   2197     }
   2198   } else if (Is == IS_TTMP) {
   2199     switch (RegWidth) {
   2200       default: return -1;
   2201       case 1: return AMDGPU::TTMP_32RegClassID;
   2202       case 2: return AMDGPU::TTMP_64RegClassID;
   2203       case 4: return AMDGPU::TTMP_128RegClassID;
   2204       case 8: return AMDGPU::TTMP_256RegClassID;
   2205       case 16: return AMDGPU::TTMP_512RegClassID;
   2206     }
   2207   } else if (Is == IS_SGPR) {
   2208     switch (RegWidth) {
   2209       default: return -1;
   2210       case 1: return AMDGPU::SGPR_32RegClassID;
   2211       case 2: return AMDGPU::SGPR_64RegClassID;
   2212       case 3: return AMDGPU::SGPR_96RegClassID;
   2213       case 4: return AMDGPU::SGPR_128RegClassID;
   2214       case 5: return AMDGPU::SGPR_160RegClassID;
   2215       case 6: return AMDGPU::SGPR_192RegClassID;
   2216       case 8: return AMDGPU::SGPR_256RegClassID;
   2217       case 16: return AMDGPU::SGPR_512RegClassID;
   2218     }
   2219   } else if (Is == IS_AGPR) {
   2220     switch (RegWidth) {
   2221       default: return -1;
   2222       case 1: return AMDGPU::AGPR_32RegClassID;
   2223       case 2: return AMDGPU::AReg_64RegClassID;
   2224       case 3: return AMDGPU::AReg_96RegClassID;
   2225       case 4: return AMDGPU::AReg_128RegClassID;
   2226       case 5: return AMDGPU::AReg_160RegClassID;
   2227       case 6: return AMDGPU::AReg_192RegClassID;
   2228       case 8: return AMDGPU::AReg_256RegClassID;
   2229       case 16: return AMDGPU::AReg_512RegClassID;
   2230       case 32: return AMDGPU::AReg_1024RegClassID;
   2231     }
   2232   }
   2233   return -1;
   2234 }
   2235 
   2236 static unsigned getSpecialRegForName(StringRef RegName) {
   2237   return StringSwitch<unsigned>(RegName)
   2238     .Case("exec", AMDGPU::EXEC)
   2239     .Case("vcc", AMDGPU::VCC)
   2240     .Case("flat_scratch", AMDGPU::FLAT_SCR)
   2241     .Case("xnack_mask", AMDGPU::XNACK_MASK)
   2242     .Case("shared_base", AMDGPU::SRC_SHARED_BASE)
   2243     .Case("src_shared_base", AMDGPU::SRC_SHARED_BASE)
   2244     .Case("shared_limit", AMDGPU::SRC_SHARED_LIMIT)
   2245     .Case("src_shared_limit", AMDGPU::SRC_SHARED_LIMIT)
   2246     .Case("private_base", AMDGPU::SRC_PRIVATE_BASE)
   2247     .Case("src_private_base", AMDGPU::SRC_PRIVATE_BASE)
   2248     .Case("private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
   2249     .Case("src_private_limit", AMDGPU::SRC_PRIVATE_LIMIT)
   2250     .Case("pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
   2251     .Case("src_pops_exiting_wave_id", AMDGPU::SRC_POPS_EXITING_WAVE_ID)
   2252     .Case("lds_direct", AMDGPU::LDS_DIRECT)
   2253     .Case("src_lds_direct", AMDGPU::LDS_DIRECT)
   2254     .Case("m0", AMDGPU::M0)
   2255     .Case("vccz", AMDGPU::SRC_VCCZ)
   2256     .Case("src_vccz", AMDGPU::SRC_VCCZ)
   2257     .Case("execz", AMDGPU::SRC_EXECZ)
   2258     .Case("src_execz", AMDGPU::SRC_EXECZ)
   2259     .Case("scc", AMDGPU::SRC_SCC)
   2260     .Case("src_scc", AMDGPU::SRC_SCC)
   2261     .Case("tba", AMDGPU::TBA)
   2262     .Case("tma", AMDGPU::TMA)
   2263     .Case("flat_scratch_lo", AMDGPU::FLAT_SCR_LO)
   2264     .Case("flat_scratch_hi", AMDGPU::FLAT_SCR_HI)
   2265     .Case("xnack_mask_lo", AMDGPU::XNACK_MASK_LO)
   2266     .Case("xnack_mask_hi", AMDGPU::XNACK_MASK_HI)
   2267     .Case("vcc_lo", AMDGPU::VCC_LO)
   2268     .Case("vcc_hi", AMDGPU::VCC_HI)
   2269     .Case("exec_lo", AMDGPU::EXEC_LO)
   2270     .Case("exec_hi", AMDGPU::EXEC_HI)
   2271     .Case("tma_lo", AMDGPU::TMA_LO)
   2272     .Case("tma_hi", AMDGPU::TMA_HI)
   2273     .Case("tba_lo", AMDGPU::TBA_LO)
   2274     .Case("tba_hi", AMDGPU::TBA_HI)
   2275     .Case("pc", AMDGPU::PC_REG)
   2276     .Case("null", AMDGPU::SGPR_NULL)
   2277     .Default(AMDGPU::NoRegister);
   2278 }
   2279 
   2280 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
   2281                                     SMLoc &EndLoc, bool RestoreOnFailure) {
   2282   auto R = parseRegister();
   2283   if (!R) return true;
   2284   assert(R->isReg());
   2285   RegNo = R->getReg();
   2286   StartLoc = R->getStartLoc();
   2287   EndLoc = R->getEndLoc();
   2288   return false;
   2289 }
   2290 
   2291 bool AMDGPUAsmParser::ParseRegister(unsigned &RegNo, SMLoc &StartLoc,
   2292                                     SMLoc &EndLoc) {
   2293   return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
   2294 }
   2295 
   2296 OperandMatchResultTy AMDGPUAsmParser::tryParseRegister(unsigned &RegNo,
   2297                                                        SMLoc &StartLoc,
   2298                                                        SMLoc &EndLoc) {
   2299   bool Result =
   2300       ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
   2301   bool PendingErrors = getParser().hasPendingError();
   2302   getParser().clearPendingErrors();
   2303   if (PendingErrors)
   2304     return MatchOperand_ParseFail;
   2305   if (Result)
   2306     return MatchOperand_NoMatch;
   2307   return MatchOperand_Success;
   2308 }
   2309 
   2310 bool AMDGPUAsmParser::AddNextRegisterToList(unsigned &Reg, unsigned &RegWidth,
   2311                                             RegisterKind RegKind, unsigned Reg1,
   2312                                             SMLoc Loc) {
   2313   switch (RegKind) {
   2314   case IS_SPECIAL:
   2315     if (Reg == AMDGPU::EXEC_LO && Reg1 == AMDGPU::EXEC_HI) {
   2316       Reg = AMDGPU::EXEC;
   2317       RegWidth = 2;
   2318       return true;
   2319     }
   2320     if (Reg == AMDGPU::FLAT_SCR_LO && Reg1 == AMDGPU::FLAT_SCR_HI) {
   2321       Reg = AMDGPU::FLAT_SCR;
   2322       RegWidth = 2;
   2323       return true;
   2324     }
   2325     if (Reg == AMDGPU::XNACK_MASK_LO && Reg1 == AMDGPU::XNACK_MASK_HI) {
   2326       Reg = AMDGPU::XNACK_MASK;
   2327       RegWidth = 2;
   2328       return true;
   2329     }
   2330     if (Reg == AMDGPU::VCC_LO && Reg1 == AMDGPU::VCC_HI) {
   2331       Reg = AMDGPU::VCC;
   2332       RegWidth = 2;
   2333       return true;
   2334     }
   2335     if (Reg == AMDGPU::TBA_LO && Reg1 == AMDGPU::TBA_HI) {
   2336       Reg = AMDGPU::TBA;
   2337       RegWidth = 2;
   2338       return true;
   2339     }
   2340     if (Reg == AMDGPU::TMA_LO && Reg1 == AMDGPU::TMA_HI) {
   2341       Reg = AMDGPU::TMA;
   2342       RegWidth = 2;
   2343       return true;
   2344     }
   2345     Error(Loc, "register does not fit in the list");
   2346     return false;
   2347   case IS_VGPR:
   2348   case IS_SGPR:
   2349   case IS_AGPR:
   2350   case IS_TTMP:
   2351     if (Reg1 != Reg + RegWidth) {
   2352       Error(Loc, "registers in a list must have consecutive indices");
   2353       return false;
   2354     }
   2355     RegWidth++;
   2356     return true;
   2357   default:
   2358     llvm_unreachable("unexpected register kind");
   2359   }
   2360 }
   2361 
   2362 struct RegInfo {
   2363   StringLiteral Name;
   2364   RegisterKind Kind;
   2365 };
   2366 
   2367 static constexpr RegInfo RegularRegisters[] = {
   2368   {{"v"},    IS_VGPR},
   2369   {{"s"},    IS_SGPR},
   2370   {{"ttmp"}, IS_TTMP},
   2371   {{"acc"},  IS_AGPR},
   2372   {{"a"},    IS_AGPR},
   2373 };
   2374 
   2375 static bool isRegularReg(RegisterKind Kind) {
   2376   return Kind == IS_VGPR ||
   2377          Kind == IS_SGPR ||
   2378          Kind == IS_TTMP ||
   2379          Kind == IS_AGPR;
   2380 }
   2381 
   2382 static const RegInfo* getRegularRegInfo(StringRef Str) {
   2383   for (const RegInfo &Reg : RegularRegisters)
   2384     if (Str.startswith(Reg.Name))
   2385       return &Reg;
   2386   return nullptr;
   2387 }
   2388 
   2389 static bool getRegNum(StringRef Str, unsigned& Num) {
   2390   return !Str.getAsInteger(10, Num);
   2391 }
   2392 
   2393 bool
   2394 AMDGPUAsmParser::isRegister(const AsmToken &Token,
   2395                             const AsmToken &NextToken) const {
   2396 
   2397   // A list of consecutive registers: [s0,s1,s2,s3]
   2398   if (Token.is(AsmToken::LBrac))
   2399     return true;
   2400 
   2401   if (!Token.is(AsmToken::Identifier))
   2402     return false;
   2403 
   2404   // A single register like s0 or a range of registers like s[0:1]
   2405 
   2406   StringRef Str = Token.getString();
   2407   const RegInfo *Reg = getRegularRegInfo(Str);
   2408   if (Reg) {
   2409     StringRef RegName = Reg->Name;
   2410     StringRef RegSuffix = Str.substr(RegName.size());
   2411     if (!RegSuffix.empty()) {
   2412       unsigned Num;
   2413       // A single register with an index: rXX
   2414       if (getRegNum(RegSuffix, Num))
   2415         return true;
   2416     } else {
   2417       // A range of registers: r[XX:YY].
   2418       if (NextToken.is(AsmToken::LBrac))
   2419         return true;
   2420     }
   2421   }
   2422 
   2423   return getSpecialRegForName(Str) != AMDGPU::NoRegister;
   2424 }
   2425 
   2426 bool
   2427 AMDGPUAsmParser::isRegister()
   2428 {
   2429   return isRegister(getToken(), peekToken());
   2430 }
   2431 
   2432 unsigned
   2433 AMDGPUAsmParser::getRegularReg(RegisterKind RegKind,
   2434                                unsigned RegNum,
   2435                                unsigned RegWidth,
   2436                                SMLoc Loc) {
   2437 
   2438   assert(isRegularReg(RegKind));
   2439 
   2440   unsigned AlignSize = 1;
   2441   if (RegKind == IS_SGPR || RegKind == IS_TTMP) {
   2442     // SGPR and TTMP registers must be aligned.
   2443     // Max required alignment is 4 dwords.
   2444     AlignSize = std::min(RegWidth, 4u);
   2445   }
   2446 
   2447   if (RegNum % AlignSize != 0) {
   2448     Error(Loc, "invalid register alignment");
   2449     return AMDGPU::NoRegister;
   2450   }
   2451 
   2452   unsigned RegIdx = RegNum / AlignSize;
   2453   int RCID = getRegClass(RegKind, RegWidth);
   2454   if (RCID == -1) {
   2455     Error(Loc, "invalid or unsupported register size");
   2456     return AMDGPU::NoRegister;
   2457   }
   2458 
   2459   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
   2460   const MCRegisterClass RC = TRI->getRegClass(RCID);
   2461   if (RegIdx >= RC.getNumRegs()) {
   2462     Error(Loc, "register index is out of range");
   2463     return AMDGPU::NoRegister;
   2464   }
   2465 
   2466   return RC.getRegister(RegIdx);
   2467 }
   2468 
   2469 bool
   2470 AMDGPUAsmParser::ParseRegRange(unsigned& Num, unsigned& Width) {
   2471   int64_t RegLo, RegHi;
   2472   if (!skipToken(AsmToken::LBrac, "missing register index"))
   2473     return false;
   2474 
   2475   SMLoc FirstIdxLoc = getLoc();
   2476   SMLoc SecondIdxLoc;
   2477 
   2478   if (!parseExpr(RegLo))
   2479     return false;
   2480 
   2481   if (trySkipToken(AsmToken::Colon)) {
   2482     SecondIdxLoc = getLoc();
   2483     if (!parseExpr(RegHi))
   2484       return false;
   2485   } else {
   2486     RegHi = RegLo;
   2487   }
   2488 
   2489   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
   2490     return false;
   2491 
   2492   if (!isUInt<32>(RegLo)) {
   2493     Error(FirstIdxLoc, "invalid register index");
   2494     return false;
   2495   }
   2496 
   2497   if (!isUInt<32>(RegHi)) {
   2498     Error(SecondIdxLoc, "invalid register index");
   2499     return false;
   2500   }
   2501 
   2502   if (RegLo > RegHi) {
   2503     Error(FirstIdxLoc, "first register index should not exceed second index");
   2504     return false;
   2505   }
   2506 
   2507   Num = static_cast<unsigned>(RegLo);
   2508   Width = (RegHi - RegLo) + 1;
   2509   return true;
   2510 }
   2511 
   2512 unsigned AMDGPUAsmParser::ParseSpecialReg(RegisterKind &RegKind,
   2513                                           unsigned &RegNum, unsigned &RegWidth,
   2514                                           SmallVectorImpl<AsmToken> &Tokens) {
   2515   assert(isToken(AsmToken::Identifier));
   2516   unsigned Reg = getSpecialRegForName(getTokenStr());
   2517   if (Reg) {
   2518     RegNum = 0;
   2519     RegWidth = 1;
   2520     RegKind = IS_SPECIAL;
   2521     Tokens.push_back(getToken());
   2522     lex(); // skip register name
   2523   }
   2524   return Reg;
   2525 }
   2526 
   2527 unsigned AMDGPUAsmParser::ParseRegularReg(RegisterKind &RegKind,
   2528                                           unsigned &RegNum, unsigned &RegWidth,
   2529                                           SmallVectorImpl<AsmToken> &Tokens) {
   2530   assert(isToken(AsmToken::Identifier));
   2531   StringRef RegName = getTokenStr();
   2532   auto Loc = getLoc();
   2533 
   2534   const RegInfo *RI = getRegularRegInfo(RegName);
   2535   if (!RI) {
   2536     Error(Loc, "invalid register name");
   2537     return AMDGPU::NoRegister;
   2538   }
   2539 
   2540   Tokens.push_back(getToken());
   2541   lex(); // skip register name
   2542 
   2543   RegKind = RI->Kind;
   2544   StringRef RegSuffix = RegName.substr(RI->Name.size());
   2545   if (!RegSuffix.empty()) {
   2546     // Single 32-bit register: vXX.
   2547     if (!getRegNum(RegSuffix, RegNum)) {
   2548       Error(Loc, "invalid register index");
   2549       return AMDGPU::NoRegister;
   2550     }
   2551     RegWidth = 1;
   2552   } else {
   2553     // Range of registers: v[XX:YY]. ":YY" is optional.
   2554     if (!ParseRegRange(RegNum, RegWidth))
   2555       return AMDGPU::NoRegister;
   2556   }
   2557 
   2558   return getRegularReg(RegKind, RegNum, RegWidth, Loc);
   2559 }
   2560 
   2561 unsigned AMDGPUAsmParser::ParseRegList(RegisterKind &RegKind, unsigned &RegNum,
   2562                                        unsigned &RegWidth,
   2563                                        SmallVectorImpl<AsmToken> &Tokens) {
   2564   unsigned Reg = AMDGPU::NoRegister;
   2565   auto ListLoc = getLoc();
   2566 
   2567   if (!skipToken(AsmToken::LBrac,
   2568                  "expected a register or a list of registers")) {
   2569     return AMDGPU::NoRegister;
   2570   }
   2571 
   2572   // List of consecutive registers, e.g.: [s0,s1,s2,s3]
   2573 
   2574   auto Loc = getLoc();
   2575   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth))
   2576     return AMDGPU::NoRegister;
   2577   if (RegWidth != 1) {
   2578     Error(Loc, "expected a single 32-bit register");
   2579     return AMDGPU::NoRegister;
   2580   }
   2581 
   2582   for (; trySkipToken(AsmToken::Comma); ) {
   2583     RegisterKind NextRegKind;
   2584     unsigned NextReg, NextRegNum, NextRegWidth;
   2585     Loc = getLoc();
   2586 
   2587     if (!ParseAMDGPURegister(NextRegKind, NextReg,
   2588                              NextRegNum, NextRegWidth,
   2589                              Tokens)) {
   2590       return AMDGPU::NoRegister;
   2591     }
   2592     if (NextRegWidth != 1) {
   2593       Error(Loc, "expected a single 32-bit register");
   2594       return AMDGPU::NoRegister;
   2595     }
   2596     if (NextRegKind != RegKind) {
   2597       Error(Loc, "registers in a list must be of the same kind");
   2598       return AMDGPU::NoRegister;
   2599     }
   2600     if (!AddNextRegisterToList(Reg, RegWidth, RegKind, NextReg, Loc))
   2601       return AMDGPU::NoRegister;
   2602   }
   2603 
   2604   if (!skipToken(AsmToken::RBrac,
   2605                  "expected a comma or a closing square bracket")) {
   2606     return AMDGPU::NoRegister;
   2607   }
   2608 
   2609   if (isRegularReg(RegKind))
   2610     Reg = getRegularReg(RegKind, RegNum, RegWidth, ListLoc);
   2611 
   2612   return Reg;
   2613 }
   2614 
   2615 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
   2616                                           unsigned &RegNum, unsigned &RegWidth,
   2617                                           SmallVectorImpl<AsmToken> &Tokens) {
   2618   auto Loc = getLoc();
   2619   Reg = AMDGPU::NoRegister;
   2620 
   2621   if (isToken(AsmToken::Identifier)) {
   2622     Reg = ParseSpecialReg(RegKind, RegNum, RegWidth, Tokens);
   2623     if (Reg == AMDGPU::NoRegister)
   2624       Reg = ParseRegularReg(RegKind, RegNum, RegWidth, Tokens);
   2625   } else {
   2626     Reg = ParseRegList(RegKind, RegNum, RegWidth, Tokens);
   2627   }
   2628 
   2629   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
   2630   if (Reg == AMDGPU::NoRegister) {
   2631     assert(Parser.hasPendingError());
   2632     return false;
   2633   }
   2634 
   2635   if (!subtargetHasRegister(*TRI, Reg)) {
   2636     if (Reg == AMDGPU::SGPR_NULL) {
   2637       Error(Loc, "'null' operand is not supported on this GPU");
   2638     } else {
   2639       Error(Loc, "register not available on this GPU");
   2640     }
   2641     return false;
   2642   }
   2643 
   2644   return true;
   2645 }
   2646 
   2647 bool AMDGPUAsmParser::ParseAMDGPURegister(RegisterKind &RegKind, unsigned &Reg,
   2648                                           unsigned &RegNum, unsigned &RegWidth,
   2649                                           bool RestoreOnFailure /*=false*/) {
   2650   Reg = AMDGPU::NoRegister;
   2651 
   2652   SmallVector<AsmToken, 1> Tokens;
   2653   if (ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth, Tokens)) {
   2654     if (RestoreOnFailure) {
   2655       while (!Tokens.empty()) {
   2656         getLexer().UnLex(Tokens.pop_back_val());
   2657       }
   2658     }
   2659     return true;
   2660   }
   2661   return false;
   2662 }
   2663 
   2664 Optional<StringRef>
   2665 AMDGPUAsmParser::getGprCountSymbolName(RegisterKind RegKind) {
   2666   switch (RegKind) {
   2667   case IS_VGPR:
   2668     return StringRef(".amdgcn.next_free_vgpr");
   2669   case IS_SGPR:
   2670     return StringRef(".amdgcn.next_free_sgpr");
   2671   default:
   2672     return None;
   2673   }
   2674 }
   2675 
   2676 void AMDGPUAsmParser::initializeGprCountSymbol(RegisterKind RegKind) {
   2677   auto SymbolName = getGprCountSymbolName(RegKind);
   2678   assert(SymbolName && "initializing invalid register kind");
   2679   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
   2680   Sym->setVariableValue(MCConstantExpr::create(0, getContext()));
   2681 }
   2682 
   2683 bool AMDGPUAsmParser::updateGprCountSymbols(RegisterKind RegKind,
   2684                                             unsigned DwordRegIndex,
   2685                                             unsigned RegWidth) {
   2686   // Symbols are only defined for GCN targets
   2687   if (AMDGPU::getIsaVersion(getSTI().getCPU()).Major < 6)
   2688     return true;
   2689 
   2690   auto SymbolName = getGprCountSymbolName(RegKind);
   2691   if (!SymbolName)
   2692     return true;
   2693   MCSymbol *Sym = getContext().getOrCreateSymbol(*SymbolName);
   2694 
   2695   int64_t NewMax = DwordRegIndex + RegWidth - 1;
   2696   int64_t OldCount;
   2697 
   2698   if (!Sym->isVariable())
   2699     return !Error(getLoc(),
   2700                   ".amdgcn.next_free_{v,s}gpr symbols must be variable");
   2701   if (!Sym->getVariableValue(false)->evaluateAsAbsolute(OldCount))
   2702     return !Error(
   2703         getLoc(),
   2704         ".amdgcn.next_free_{v,s}gpr symbols must be absolute expressions");
   2705 
   2706   if (OldCount <= NewMax)
   2707     Sym->setVariableValue(MCConstantExpr::create(NewMax + 1, getContext()));
   2708 
   2709   return true;
   2710 }
   2711 
   2712 std::unique_ptr<AMDGPUOperand>
   2713 AMDGPUAsmParser::parseRegister(bool RestoreOnFailure) {
   2714   const auto &Tok = getToken();
   2715   SMLoc StartLoc = Tok.getLoc();
   2716   SMLoc EndLoc = Tok.getEndLoc();
   2717   RegisterKind RegKind;
   2718   unsigned Reg, RegNum, RegWidth;
   2719 
   2720   if (!ParseAMDGPURegister(RegKind, Reg, RegNum, RegWidth)) {
   2721     return nullptr;
   2722   }
   2723   if (isHsaAbiVersion3Or4(&getSTI())) {
   2724     if (!updateGprCountSymbols(RegKind, RegNum, RegWidth))
   2725       return nullptr;
   2726   } else
   2727     KernelScope.usesRegister(RegKind, RegNum, RegWidth);
   2728   return AMDGPUOperand::CreateReg(this, Reg, StartLoc, EndLoc);
   2729 }
   2730 
   2731 OperandMatchResultTy
   2732 AMDGPUAsmParser::parseImm(OperandVector &Operands, bool HasSP3AbsModifier) {
   2733   // TODO: add syntactic sugar for 1/(2*PI)
   2734 
   2735   assert(!isRegister());
   2736   assert(!isModifier());
   2737 
   2738   const auto& Tok = getToken();
   2739   const auto& NextTok = peekToken();
   2740   bool IsReal = Tok.is(AsmToken::Real);
   2741   SMLoc S = getLoc();
   2742   bool Negate = false;
   2743 
   2744   if (!IsReal && Tok.is(AsmToken::Minus) && NextTok.is(AsmToken::Real)) {
   2745     lex();
   2746     IsReal = true;
   2747     Negate = true;
   2748   }
   2749 
   2750   if (IsReal) {
   2751     // Floating-point expressions are not supported.
   2752     // Can only allow floating-point literals with an
   2753     // optional sign.
   2754 
   2755     StringRef Num = getTokenStr();
   2756     lex();
   2757 
   2758     APFloat RealVal(APFloat::IEEEdouble());
   2759     auto roundMode = APFloat::rmNearestTiesToEven;
   2760     if (errorToBool(RealVal.convertFromString(Num, roundMode).takeError())) {
   2761       return MatchOperand_ParseFail;
   2762     }
   2763     if (Negate)
   2764       RealVal.changeSign();
   2765 
   2766     Operands.push_back(
   2767       AMDGPUOperand::CreateImm(this, RealVal.bitcastToAPInt().getZExtValue(), S,
   2768                                AMDGPUOperand::ImmTyNone, true));
   2769 
   2770     return MatchOperand_Success;
   2771 
   2772   } else {
   2773     int64_t IntVal;
   2774     const MCExpr *Expr;
   2775     SMLoc S = getLoc();
   2776 
   2777     if (HasSP3AbsModifier) {
   2778       // This is a workaround for handling expressions
   2779       // as arguments of SP3 'abs' modifier, for example:
   2780       //     |1.0|
   2781       //     |-1|
   2782       //     |1+x|
   2783       // This syntax is not compatible with syntax of standard
   2784       // MC expressions (due to the trailing '|').
   2785       SMLoc EndLoc;
   2786       if (getParser().parsePrimaryExpr(Expr, EndLoc, nullptr))
   2787         return MatchOperand_ParseFail;
   2788     } else {
   2789       if (Parser.parseExpression(Expr))
   2790         return MatchOperand_ParseFail;
   2791     }
   2792 
   2793     if (Expr->evaluateAsAbsolute(IntVal)) {
   2794       Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
   2795     } else {
   2796       Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
   2797     }
   2798 
   2799     return MatchOperand_Success;
   2800   }
   2801 
   2802   return MatchOperand_NoMatch;
   2803 }
   2804 
   2805 OperandMatchResultTy
   2806 AMDGPUAsmParser::parseReg(OperandVector &Operands) {
   2807   if (!isRegister())
   2808     return MatchOperand_NoMatch;
   2809 
   2810   if (auto R = parseRegister()) {
   2811     assert(R->isReg());
   2812     Operands.push_back(std::move(R));
   2813     return MatchOperand_Success;
   2814   }
   2815   return MatchOperand_ParseFail;
   2816 }
   2817 
   2818 OperandMatchResultTy
   2819 AMDGPUAsmParser::parseRegOrImm(OperandVector &Operands, bool HasSP3AbsMod) {
   2820   auto res = parseReg(Operands);
   2821   if (res != MatchOperand_NoMatch) {
   2822     return res;
   2823   } else if (isModifier()) {
   2824     return MatchOperand_NoMatch;
   2825   } else {
   2826     return parseImm(Operands, HasSP3AbsMod);
   2827   }
   2828 }
   2829 
   2830 bool
   2831 AMDGPUAsmParser::isNamedOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
   2832   if (Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::LParen)) {
   2833     const auto &str = Token.getString();
   2834     return str == "abs" || str == "neg" || str == "sext";
   2835   }
   2836   return false;
   2837 }
   2838 
   2839 bool
   2840 AMDGPUAsmParser::isOpcodeModifierWithVal(const AsmToken &Token, const AsmToken &NextToken) const {
   2841   return Token.is(AsmToken::Identifier) && NextToken.is(AsmToken::Colon);
   2842 }
   2843 
   2844 bool
   2845 AMDGPUAsmParser::isOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
   2846   return isNamedOperandModifier(Token, NextToken) || Token.is(AsmToken::Pipe);
   2847 }
   2848 
   2849 bool
   2850 AMDGPUAsmParser::isRegOrOperandModifier(const AsmToken &Token, const AsmToken &NextToken) const {
   2851   return isRegister(Token, NextToken) || isOperandModifier(Token, NextToken);
   2852 }
   2853 
   2854 // Check if this is an operand modifier or an opcode modifier
   2855 // which may look like an expression but it is not. We should
   2856 // avoid parsing these modifiers as expressions. Currently
   2857 // recognized sequences are:
   2858 //   |...|
   2859 //   abs(...)
   2860 //   neg(...)
   2861 //   sext(...)
   2862 //   -reg
   2863 //   -|...|
   2864 //   -abs(...)
   2865 //   name:...
   2866 // Note that simple opcode modifiers like 'gds' may be parsed as
   2867 // expressions; this is a special case. See getExpressionAsToken.
   2868 //
   2869 bool
   2870 AMDGPUAsmParser::isModifier() {
   2871 
   2872   AsmToken Tok = getToken();
   2873   AsmToken NextToken[2];
   2874   peekTokens(NextToken);
   2875 
   2876   return isOperandModifier(Tok, NextToken[0]) ||
   2877          (Tok.is(AsmToken::Minus) && isRegOrOperandModifier(NextToken[0], NextToken[1])) ||
   2878          isOpcodeModifierWithVal(Tok, NextToken[0]);
   2879 }
   2880 
   2881 // Check if the current token is an SP3 'neg' modifier.
   2882 // Currently this modifier is allowed in the following context:
   2883 //
   2884 // 1. Before a register, e.g. "-v0", "-v[...]" or "-[v0,v1]".
   2885 // 2. Before an 'abs' modifier: -abs(...)
   2886 // 3. Before an SP3 'abs' modifier: -|...|
   2887 //
   2888 // In all other cases "-" is handled as a part
   2889 // of an expression that follows the sign.
   2890 //
   2891 // Note: When "-" is followed by an integer literal,
   2892 // this is interpreted as integer negation rather
   2893 // than a floating-point NEG modifier applied to N.
   2894 // Beside being contr-intuitive, such use of floating-point
   2895 // NEG modifier would have resulted in different meaning
   2896 // of integer literals used with VOP1/2/C and VOP3,
   2897 // for example:
   2898 //    v_exp_f32_e32 v5, -1 // VOP1: src0 = 0xFFFFFFFF
   2899 //    v_exp_f32_e64 v5, -1 // VOP3: src0 = 0x80000001
   2900 // Negative fp literals with preceding "-" are
   2901 // handled likewise for unifomtity
   2902 //
   2903 bool
   2904 AMDGPUAsmParser::parseSP3NegModifier() {
   2905 
   2906   AsmToken NextToken[2];
   2907   peekTokens(NextToken);
   2908 
   2909   if (isToken(AsmToken::Minus) &&
   2910       (isRegister(NextToken[0], NextToken[1]) ||
   2911        NextToken[0].is(AsmToken::Pipe) ||
   2912        isId(NextToken[0], "abs"))) {
   2913     lex();
   2914     return true;
   2915   }
   2916 
   2917   return false;
   2918 }
   2919 
   2920 OperandMatchResultTy
   2921 AMDGPUAsmParser::parseRegOrImmWithFPInputMods(OperandVector &Operands,
   2922                                               bool AllowImm) {
   2923   bool Neg, SP3Neg;
   2924   bool Abs, SP3Abs;
   2925   SMLoc Loc;
   2926 
   2927   // Disable ambiguous constructs like '--1' etc. Should use neg(-1) instead.
   2928   if (isToken(AsmToken::Minus) && peekToken().is(AsmToken::Minus)) {
   2929     Error(getLoc(), "invalid syntax, expected 'neg' modifier");
   2930     return MatchOperand_ParseFail;
   2931   }
   2932 
   2933   SP3Neg = parseSP3NegModifier();
   2934 
   2935   Loc = getLoc();
   2936   Neg = trySkipId("neg");
   2937   if (Neg && SP3Neg) {
   2938     Error(Loc, "expected register or immediate");
   2939     return MatchOperand_ParseFail;
   2940   }
   2941   if (Neg && !skipToken(AsmToken::LParen, "expected left paren after neg"))
   2942     return MatchOperand_ParseFail;
   2943 
   2944   Abs = trySkipId("abs");
   2945   if (Abs && !skipToken(AsmToken::LParen, "expected left paren after abs"))
   2946     return MatchOperand_ParseFail;
   2947 
   2948   Loc = getLoc();
   2949   SP3Abs = trySkipToken(AsmToken::Pipe);
   2950   if (Abs && SP3Abs) {
   2951     Error(Loc, "expected register or immediate");
   2952     return MatchOperand_ParseFail;
   2953   }
   2954 
   2955   OperandMatchResultTy Res;
   2956   if (AllowImm) {
   2957     Res = parseRegOrImm(Operands, SP3Abs);
   2958   } else {
   2959     Res = parseReg(Operands);
   2960   }
   2961   if (Res != MatchOperand_Success) {
   2962     return (SP3Neg || Neg || SP3Abs || Abs)? MatchOperand_ParseFail : Res;
   2963   }
   2964 
   2965   if (SP3Abs && !skipToken(AsmToken::Pipe, "expected vertical bar"))
   2966     return MatchOperand_ParseFail;
   2967   if (Abs && !skipToken(AsmToken::RParen, "expected closing parentheses"))
   2968     return MatchOperand_ParseFail;
   2969   if (Neg && !skipToken(AsmToken::RParen, "expected closing parentheses"))
   2970     return MatchOperand_ParseFail;
   2971 
   2972   AMDGPUOperand::Modifiers Mods;
   2973   Mods.Abs = Abs || SP3Abs;
   2974   Mods.Neg = Neg || SP3Neg;
   2975 
   2976   if (Mods.hasFPModifiers()) {
   2977     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
   2978     if (Op.isExpr()) {
   2979       Error(Op.getStartLoc(), "expected an absolute expression");
   2980       return MatchOperand_ParseFail;
   2981     }
   2982     Op.setModifiers(Mods);
   2983   }
   2984   return MatchOperand_Success;
   2985 }
   2986 
   2987 OperandMatchResultTy
   2988 AMDGPUAsmParser::parseRegOrImmWithIntInputMods(OperandVector &Operands,
   2989                                                bool AllowImm) {
   2990   bool Sext = trySkipId("sext");
   2991   if (Sext && !skipToken(AsmToken::LParen, "expected left paren after sext"))
   2992     return MatchOperand_ParseFail;
   2993 
   2994   OperandMatchResultTy Res;
   2995   if (AllowImm) {
   2996     Res = parseRegOrImm(Operands);
   2997   } else {
   2998     Res = parseReg(Operands);
   2999   }
   3000   if (Res != MatchOperand_Success) {
   3001     return Sext? MatchOperand_ParseFail : Res;
   3002   }
   3003 
   3004   if (Sext && !skipToken(AsmToken::RParen, "expected closing parentheses"))
   3005     return MatchOperand_ParseFail;
   3006 
   3007   AMDGPUOperand::Modifiers Mods;
   3008   Mods.Sext = Sext;
   3009 
   3010   if (Mods.hasIntModifiers()) {
   3011     AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands.back());
   3012     if (Op.isExpr()) {
   3013       Error(Op.getStartLoc(), "expected an absolute expression");
   3014       return MatchOperand_ParseFail;
   3015     }
   3016     Op.setModifiers(Mods);
   3017   }
   3018 
   3019   return MatchOperand_Success;
   3020 }
   3021 
   3022 OperandMatchResultTy
   3023 AMDGPUAsmParser::parseRegWithFPInputMods(OperandVector &Operands) {
   3024   return parseRegOrImmWithFPInputMods(Operands, false);
   3025 }
   3026 
   3027 OperandMatchResultTy
   3028 AMDGPUAsmParser::parseRegWithIntInputMods(OperandVector &Operands) {
   3029   return parseRegOrImmWithIntInputMods(Operands, false);
   3030 }
   3031 
   3032 OperandMatchResultTy AMDGPUAsmParser::parseVReg32OrOff(OperandVector &Operands) {
   3033   auto Loc = getLoc();
   3034   if (trySkipId("off")) {
   3035     Operands.push_back(AMDGPUOperand::CreateImm(this, 0, Loc,
   3036                                                 AMDGPUOperand::ImmTyOff, false));
   3037     return MatchOperand_Success;
   3038   }
   3039 
   3040   if (!isRegister())
   3041     return MatchOperand_NoMatch;
   3042 
   3043   std::unique_ptr<AMDGPUOperand> Reg = parseRegister();
   3044   if (Reg) {
   3045     Operands.push_back(std::move(Reg));
   3046     return MatchOperand_Success;
   3047   }
   3048 
   3049   return MatchOperand_ParseFail;
   3050 
   3051 }
   3052 
   3053 unsigned AMDGPUAsmParser::checkTargetMatchPredicate(MCInst &Inst) {
   3054   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
   3055 
   3056   if ((getForcedEncodingSize() == 32 && (TSFlags & SIInstrFlags::VOP3)) ||
   3057       (getForcedEncodingSize() == 64 && !(TSFlags & SIInstrFlags::VOP3)) ||
   3058       (isForcedDPP() && !(TSFlags & SIInstrFlags::DPP)) ||
   3059       (isForcedSDWA() && !(TSFlags & SIInstrFlags::SDWA)) )
   3060     return Match_InvalidOperand;
   3061 
   3062   if ((TSFlags & SIInstrFlags::VOP3) &&
   3063       (TSFlags & SIInstrFlags::VOPAsmPrefer32Bit) &&
   3064       getForcedEncodingSize() != 64)
   3065     return Match_PreferE32;
   3066 
   3067   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
   3068       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi) {
   3069     // v_mac_f32/16 allow only dst_sel == DWORD;
   3070     auto OpNum =
   3071         AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::dst_sel);
   3072     const auto &Op = Inst.getOperand(OpNum);
   3073     if (!Op.isImm() || Op.getImm() != AMDGPU::SDWA::SdwaSel::DWORD) {
   3074       return Match_InvalidOperand;
   3075     }
   3076   }
   3077 
   3078   return Match_Success;
   3079 }
   3080 
   3081 static ArrayRef<unsigned> getAllVariants() {
   3082   static const unsigned Variants[] = {
   3083     AMDGPUAsmVariants::DEFAULT, AMDGPUAsmVariants::VOP3,
   3084     AMDGPUAsmVariants::SDWA, AMDGPUAsmVariants::SDWA9, AMDGPUAsmVariants::DPP
   3085   };
   3086 
   3087   return makeArrayRef(Variants);
   3088 }
   3089 
   3090 // What asm variants we should check
   3091 ArrayRef<unsigned> AMDGPUAsmParser::getMatchedVariants() const {
   3092   if (getForcedEncodingSize() == 32) {
   3093     static const unsigned Variants[] = {AMDGPUAsmVariants::DEFAULT};
   3094     return makeArrayRef(Variants);
   3095   }
   3096 
   3097   if (isForcedVOP3()) {
   3098     static const unsigned Variants[] = {AMDGPUAsmVariants::VOP3};
   3099     return makeArrayRef(Variants);
   3100   }
   3101 
   3102   if (isForcedSDWA()) {
   3103     static const unsigned Variants[] = {AMDGPUAsmVariants::SDWA,
   3104                                         AMDGPUAsmVariants::SDWA9};
   3105     return makeArrayRef(Variants);
   3106   }
   3107 
   3108   if (isForcedDPP()) {
   3109     static const unsigned Variants[] = {AMDGPUAsmVariants::DPP};
   3110     return makeArrayRef(Variants);
   3111   }
   3112 
   3113   return getAllVariants();
   3114 }
   3115 
   3116 StringRef AMDGPUAsmParser::getMatchedVariantName() const {
   3117   if (getForcedEncodingSize() == 32)
   3118     return "e32";
   3119 
   3120   if (isForcedVOP3())
   3121     return "e64";
   3122 
   3123   if (isForcedSDWA())
   3124     return "sdwa";
   3125 
   3126   if (isForcedDPP())
   3127     return "dpp";
   3128 
   3129   return "";
   3130 }
   3131 
   3132 unsigned AMDGPUAsmParser::findImplicitSGPRReadInVOP(const MCInst &Inst) const {
   3133   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
   3134   const unsigned Num = Desc.getNumImplicitUses();
   3135   for (unsigned i = 0; i < Num; ++i) {
   3136     unsigned Reg = Desc.ImplicitUses[i];
   3137     switch (Reg) {
   3138     case AMDGPU::FLAT_SCR:
   3139     case AMDGPU::VCC:
   3140     case AMDGPU::VCC_LO:
   3141     case AMDGPU::VCC_HI:
   3142     case AMDGPU::M0:
   3143       return Reg;
   3144     default:
   3145       break;
   3146     }
   3147   }
   3148   return AMDGPU::NoRegister;
   3149 }
   3150 
   3151 // NB: This code is correct only when used to check constant
   3152 // bus limitations because GFX7 support no f16 inline constants.
   3153 // Note that there are no cases when a GFX7 opcode violates
   3154 // constant bus limitations due to the use of an f16 constant.
   3155 bool AMDGPUAsmParser::isInlineConstant(const MCInst &Inst,
   3156                                        unsigned OpIdx) const {
   3157   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
   3158 
   3159   if (!AMDGPU::isSISrcOperand(Desc, OpIdx)) {
   3160     return false;
   3161   }
   3162 
   3163   const MCOperand &MO = Inst.getOperand(OpIdx);
   3164 
   3165   int64_t Val = MO.getImm();
   3166   auto OpSize = AMDGPU::getOperandSize(Desc, OpIdx);
   3167 
   3168   switch (OpSize) { // expected operand size
   3169   case 8:
   3170     return AMDGPU::isInlinableLiteral64(Val, hasInv2PiInlineImm());
   3171   case 4:
   3172     return AMDGPU::isInlinableLiteral32(Val, hasInv2PiInlineImm());
   3173   case 2: {
   3174     const unsigned OperandType = Desc.OpInfo[OpIdx].OperandType;
   3175     if (OperandType == AMDGPU::OPERAND_REG_IMM_INT16 ||
   3176         OperandType == AMDGPU::OPERAND_REG_INLINE_C_INT16 ||
   3177         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_INT16)
   3178       return AMDGPU::isInlinableIntLiteral(Val);
   3179 
   3180     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2INT16 ||
   3181         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2INT16 ||
   3182         OperandType == AMDGPU::OPERAND_REG_IMM_V2INT16)
   3183       return AMDGPU::isInlinableIntLiteralV216(Val);
   3184 
   3185     if (OperandType == AMDGPU::OPERAND_REG_INLINE_C_V2FP16 ||
   3186         OperandType == AMDGPU::OPERAND_REG_INLINE_AC_V2FP16 ||
   3187         OperandType == AMDGPU::OPERAND_REG_IMM_V2FP16)
   3188       return AMDGPU::isInlinableLiteralV216(Val, hasInv2PiInlineImm());
   3189 
   3190     return AMDGPU::isInlinableLiteral16(Val, hasInv2PiInlineImm());
   3191   }
   3192   default:
   3193     llvm_unreachable("invalid operand size");
   3194   }
   3195 }
   3196 
   3197 unsigned AMDGPUAsmParser::getConstantBusLimit(unsigned Opcode) const {
   3198   if (!isGFX10Plus())
   3199     return 1;
   3200 
   3201   switch (Opcode) {
   3202   // 64-bit shift instructions can use only one scalar value input
   3203   case AMDGPU::V_LSHLREV_B64_e64:
   3204   case AMDGPU::V_LSHLREV_B64_gfx10:
   3205   case AMDGPU::V_LSHRREV_B64_e64:
   3206   case AMDGPU::V_LSHRREV_B64_gfx10:
   3207   case AMDGPU::V_ASHRREV_I64_e64:
   3208   case AMDGPU::V_ASHRREV_I64_gfx10:
   3209   case AMDGPU::V_LSHL_B64_e64:
   3210   case AMDGPU::V_LSHR_B64_e64:
   3211   case AMDGPU::V_ASHR_I64_e64:
   3212     return 1;
   3213   default:
   3214     return 2;
   3215   }
   3216 }
   3217 
   3218 bool AMDGPUAsmParser::usesConstantBus(const MCInst &Inst, unsigned OpIdx) {
   3219   const MCOperand &MO = Inst.getOperand(OpIdx);
   3220   if (MO.isImm()) {
   3221     return !isInlineConstant(Inst, OpIdx);
   3222   } else if (MO.isReg()) {
   3223     auto Reg = MO.getReg();
   3224     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
   3225     auto PReg = mc2PseudoReg(Reg);
   3226     return isSGPR(PReg, TRI) && PReg != SGPR_NULL;
   3227   } else {
   3228     return true;
   3229   }
   3230 }
   3231 
   3232 bool
   3233 AMDGPUAsmParser::validateConstantBusLimitations(const MCInst &Inst,
   3234                                                 const OperandVector &Operands) {
   3235   const unsigned Opcode = Inst.getOpcode();
   3236   const MCInstrDesc &Desc = MII.get(Opcode);
   3237   unsigned LastSGPR = AMDGPU::NoRegister;
   3238   unsigned ConstantBusUseCount = 0;
   3239   unsigned NumLiterals = 0;
   3240   unsigned LiteralSize;
   3241 
   3242   if (Desc.TSFlags &
   3243       (SIInstrFlags::VOPC |
   3244        SIInstrFlags::VOP1 | SIInstrFlags::VOP2 |
   3245        SIInstrFlags::VOP3 | SIInstrFlags::VOP3P |
   3246        SIInstrFlags::SDWA)) {
   3247     // Check special imm operands (used by madmk, etc)
   3248     if (AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::imm) != -1) {
   3249       ++ConstantBusUseCount;
   3250     }
   3251 
   3252     SmallDenseSet<unsigned> SGPRsUsed;
   3253     unsigned SGPRUsed = findImplicitSGPRReadInVOP(Inst);
   3254     if (SGPRUsed != AMDGPU::NoRegister) {
   3255       SGPRsUsed.insert(SGPRUsed);
   3256       ++ConstantBusUseCount;
   3257     }
   3258 
   3259     const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
   3260     const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
   3261     const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
   3262 
   3263     const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
   3264 
   3265     for (int OpIdx : OpIndices) {
   3266       if (OpIdx == -1) break;
   3267 
   3268       const MCOperand &MO = Inst.getOperand(OpIdx);
   3269       if (usesConstantBus(Inst, OpIdx)) {
   3270         if (MO.isReg()) {
   3271           LastSGPR = mc2PseudoReg(MO.getReg());
   3272           // Pairs of registers with a partial intersections like these
   3273           //   s0, s[0:1]
   3274           //   flat_scratch_lo, flat_scratch
   3275           //   flat_scratch_lo, flat_scratch_hi
   3276           // are theoretically valid but they are disabled anyway.
   3277           // Note that this code mimics SIInstrInfo::verifyInstruction
   3278           if (!SGPRsUsed.count(LastSGPR)) {
   3279             SGPRsUsed.insert(LastSGPR);
   3280             ++ConstantBusUseCount;
   3281           }
   3282         } else { // Expression or a literal
   3283 
   3284           if (Desc.OpInfo[OpIdx].OperandType == MCOI::OPERAND_IMMEDIATE)
   3285             continue; // special operand like VINTERP attr_chan
   3286 
   3287           // An instruction may use only one literal.
   3288           // This has been validated on the previous step.
   3289           // See validateVOP3Literal.
   3290           // This literal may be used as more than one operand.
   3291           // If all these operands are of the same size,
   3292           // this literal counts as one scalar value.
   3293           // Otherwise it counts as 2 scalar values.
   3294           // See "GFX10 Shader Programming", section 3.6.2.3.
   3295 
   3296           unsigned Size = AMDGPU::getOperandSize(Desc, OpIdx);
   3297           if (Size < 4) Size = 4;
   3298 
   3299           if (NumLiterals == 0) {
   3300             NumLiterals = 1;
   3301             LiteralSize = Size;
   3302           } else if (LiteralSize != Size) {
   3303             NumLiterals = 2;
   3304           }
   3305         }
   3306       }
   3307     }
   3308   }
   3309   ConstantBusUseCount += NumLiterals;
   3310 
   3311   if (ConstantBusUseCount <= getConstantBusLimit(Opcode))
   3312     return true;
   3313 
   3314   SMLoc LitLoc = getLitLoc(Operands);
   3315   SMLoc RegLoc = getRegLoc(LastSGPR, Operands);
   3316   SMLoc Loc = (LitLoc.getPointer() < RegLoc.getPointer()) ? RegLoc : LitLoc;
   3317   Error(Loc, "invalid operand (violates constant bus restrictions)");
   3318   return false;
   3319 }
   3320 
   3321 bool
   3322 AMDGPUAsmParser::validateEarlyClobberLimitations(const MCInst &Inst,
   3323                                                  const OperandVector &Operands) {
   3324   const unsigned Opcode = Inst.getOpcode();
   3325   const MCInstrDesc &Desc = MII.get(Opcode);
   3326 
   3327   const int DstIdx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::vdst);
   3328   if (DstIdx == -1 ||
   3329       Desc.getOperandConstraint(DstIdx, MCOI::EARLY_CLOBBER) == -1) {
   3330     return true;
   3331   }
   3332 
   3333   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
   3334 
   3335   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
   3336   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
   3337   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
   3338 
   3339   assert(DstIdx != -1);
   3340   const MCOperand &Dst = Inst.getOperand(DstIdx);
   3341   assert(Dst.isReg());
   3342   const unsigned DstReg = mc2PseudoReg(Dst.getReg());
   3343 
   3344   const int SrcIndices[] = { Src0Idx, Src1Idx, Src2Idx };
   3345 
   3346   for (int SrcIdx : SrcIndices) {
   3347     if (SrcIdx == -1) break;
   3348     const MCOperand &Src = Inst.getOperand(SrcIdx);
   3349     if (Src.isReg()) {
   3350       const unsigned SrcReg = mc2PseudoReg(Src.getReg());
   3351       if (isRegIntersect(DstReg, SrcReg, TRI)) {
   3352         Error(getRegLoc(SrcReg, Operands),
   3353           "destination must be different than all sources");
   3354         return false;
   3355       }
   3356     }
   3357   }
   3358 
   3359   return true;
   3360 }
   3361 
   3362 bool AMDGPUAsmParser::validateIntClampSupported(const MCInst &Inst) {
   3363 
   3364   const unsigned Opc = Inst.getOpcode();
   3365   const MCInstrDesc &Desc = MII.get(Opc);
   3366 
   3367   if ((Desc.TSFlags & SIInstrFlags::IntClamp) != 0 && !hasIntClamp()) {
   3368     int ClampIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp);
   3369     assert(ClampIdx != -1);
   3370     return Inst.getOperand(ClampIdx).getImm() == 0;
   3371   }
   3372 
   3373   return true;
   3374 }
   3375 
   3376 bool AMDGPUAsmParser::validateMIMGDataSize(const MCInst &Inst) {
   3377 
   3378   const unsigned Opc = Inst.getOpcode();
   3379   const MCInstrDesc &Desc = MII.get(Opc);
   3380 
   3381   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
   3382     return true;
   3383 
   3384   int VDataIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdata);
   3385   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
   3386   int TFEIdx   = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::tfe);
   3387 
   3388   assert(VDataIdx != -1);
   3389 
   3390   if (DMaskIdx == -1 || TFEIdx == -1) // intersect_ray
   3391     return true;
   3392 
   3393   unsigned VDataSize = AMDGPU::getRegOperandSize(getMRI(), Desc, VDataIdx);
   3394   unsigned TFESize = (TFEIdx != -1 && Inst.getOperand(TFEIdx).getImm()) ? 1 : 0;
   3395   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
   3396   if (DMask == 0)
   3397     DMask = 1;
   3398 
   3399   unsigned DataSize =
   3400     (Desc.TSFlags & SIInstrFlags::Gather4) ? 4 : countPopulation(DMask);
   3401   if (hasPackedD16()) {
   3402     int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
   3403     if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm())
   3404       DataSize = (DataSize + 1) / 2;
   3405   }
   3406 
   3407   return (VDataSize / 4) == DataSize + TFESize;
   3408 }
   3409 
   3410 bool AMDGPUAsmParser::validateMIMGAddrSize(const MCInst &Inst) {
   3411   const unsigned Opc = Inst.getOpcode();
   3412   const MCInstrDesc &Desc = MII.get(Opc);
   3413 
   3414   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0 || !isGFX10Plus())
   3415     return true;
   3416 
   3417   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
   3418 
   3419   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
   3420       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
   3421   int VAddr0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr0);
   3422   int SrsrcIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::srsrc);
   3423   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
   3424   int A16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::a16);
   3425 
   3426   assert(VAddr0Idx != -1);
   3427   assert(SrsrcIdx != -1);
   3428   assert(SrsrcIdx > VAddr0Idx);
   3429 
   3430   if (DimIdx == -1)
   3431     return true; // intersect_ray
   3432 
   3433   unsigned Dim = Inst.getOperand(DimIdx).getImm();
   3434   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
   3435   bool IsNSA = SrsrcIdx - VAddr0Idx > 1;
   3436   unsigned VAddrSize =
   3437       IsNSA ? SrsrcIdx - VAddr0Idx
   3438             : AMDGPU::getRegOperandSize(getMRI(), Desc, VAddr0Idx) / 4;
   3439   bool IsA16 = (A16Idx != -1 && Inst.getOperand(A16Idx).getImm());
   3440 
   3441   unsigned AddrSize =
   3442       AMDGPU::getAddrSizeMIMGOp(BaseOpcode, DimInfo, IsA16, hasG16());
   3443 
   3444   if (!IsNSA) {
   3445     if (AddrSize > 8)
   3446       AddrSize = 16;
   3447     else if (AddrSize > 4)
   3448       AddrSize = 8;
   3449   }
   3450 
   3451   return VAddrSize == AddrSize;
   3452 }
   3453 
   3454 bool AMDGPUAsmParser::validateMIMGAtomicDMask(const MCInst &Inst) {
   3455 
   3456   const unsigned Opc = Inst.getOpcode();
   3457   const MCInstrDesc &Desc = MII.get(Opc);
   3458 
   3459   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
   3460     return true;
   3461   if (!Desc.mayLoad() || !Desc.mayStore())
   3462     return true; // Not atomic
   3463 
   3464   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
   3465   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
   3466 
   3467   // This is an incomplete check because image_atomic_cmpswap
   3468   // may only use 0x3 and 0xf while other atomic operations
   3469   // may use 0x1 and 0x3. However these limitations are
   3470   // verified when we check that dmask matches dst size.
   3471   return DMask == 0x1 || DMask == 0x3 || DMask == 0xf;
   3472 }
   3473 
   3474 bool AMDGPUAsmParser::validateMIMGGatherDMask(const MCInst &Inst) {
   3475 
   3476   const unsigned Opc = Inst.getOpcode();
   3477   const MCInstrDesc &Desc = MII.get(Opc);
   3478 
   3479   if ((Desc.TSFlags & SIInstrFlags::Gather4) == 0)
   3480     return true;
   3481 
   3482   int DMaskIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dmask);
   3483   unsigned DMask = Inst.getOperand(DMaskIdx).getImm() & 0xf;
   3484 
   3485   // GATHER4 instructions use dmask in a different fashion compared to
   3486   // other MIMG instructions. The only useful DMASK values are
   3487   // 1=red, 2=green, 4=blue, 8=alpha. (e.g. 1 returns
   3488   // (red,red,red,red) etc.) The ISA document doesn't mention
   3489   // this.
   3490   return DMask == 0x1 || DMask == 0x2 || DMask == 0x4 || DMask == 0x8;
   3491 }
   3492 
   3493 bool AMDGPUAsmParser::validateMIMGMSAA(const MCInst &Inst) {
   3494   const unsigned Opc = Inst.getOpcode();
   3495   const MCInstrDesc &Desc = MII.get(Opc);
   3496 
   3497   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
   3498     return true;
   3499 
   3500   const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(Opc);
   3501   const AMDGPU::MIMGBaseOpcodeInfo *BaseOpcode =
   3502       AMDGPU::getMIMGBaseOpcodeInfo(Info->BaseOpcode);
   3503 
   3504   if (!BaseOpcode->MSAA)
   3505     return true;
   3506 
   3507   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
   3508   assert(DimIdx != -1);
   3509 
   3510   unsigned Dim = Inst.getOperand(DimIdx).getImm();
   3511   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByEncoding(Dim);
   3512 
   3513   return DimInfo->MSAA;
   3514 }
   3515 
   3516 static bool IsMovrelsSDWAOpcode(const unsigned Opcode)
   3517 {
   3518   switch (Opcode) {
   3519   case AMDGPU::V_MOVRELS_B32_sdwa_gfx10:
   3520   case AMDGPU::V_MOVRELSD_B32_sdwa_gfx10:
   3521   case AMDGPU::V_MOVRELSD_2_B32_sdwa_gfx10:
   3522     return true;
   3523   default:
   3524     return false;
   3525   }
   3526 }
   3527 
   3528 // movrels* opcodes should only allow VGPRS as src0.
   3529 // This is specified in .td description for vop1/vop3,
   3530 // but sdwa is handled differently. See isSDWAOperand.
   3531 bool AMDGPUAsmParser::validateMovrels(const MCInst &Inst,
   3532                                       const OperandVector &Operands) {
   3533 
   3534   const unsigned Opc = Inst.getOpcode();
   3535   const MCInstrDesc &Desc = MII.get(Opc);
   3536 
   3537   if ((Desc.TSFlags & SIInstrFlags::SDWA) == 0 || !IsMovrelsSDWAOpcode(Opc))
   3538     return true;
   3539 
   3540   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
   3541   assert(Src0Idx != -1);
   3542 
   3543   SMLoc ErrLoc;
   3544   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
   3545   if (Src0.isReg()) {
   3546     auto Reg = mc2PseudoReg(Src0.getReg());
   3547     const MCRegisterInfo *TRI = getContext().getRegisterInfo();
   3548     if (!isSGPR(Reg, TRI))
   3549       return true;
   3550     ErrLoc = getRegLoc(Reg, Operands);
   3551   } else {
   3552     ErrLoc = getConstLoc(Operands);
   3553   }
   3554 
   3555   Error(ErrLoc, "source operand must be a VGPR");
   3556   return false;
   3557 }
   3558 
   3559 bool AMDGPUAsmParser::validateMAIAccWrite(const MCInst &Inst,
   3560                                           const OperandVector &Operands) {
   3561 
   3562   const unsigned Opc = Inst.getOpcode();
   3563 
   3564   if (Opc != AMDGPU::V_ACCVGPR_WRITE_B32_vi)
   3565     return true;
   3566 
   3567   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
   3568   assert(Src0Idx != -1);
   3569 
   3570   const MCOperand &Src0 = Inst.getOperand(Src0Idx);
   3571   if (!Src0.isReg())
   3572     return true;
   3573 
   3574   auto Reg = mc2PseudoReg(Src0.getReg());
   3575   const MCRegisterInfo *TRI = getContext().getRegisterInfo();
   3576   if (isSGPR(Reg, TRI)) {
   3577     Error(getRegLoc(Reg, Operands),
   3578           "source operand must be either a VGPR or an inline constant");
   3579     return false;
   3580   }
   3581 
   3582   return true;
   3583 }
   3584 
   3585 bool AMDGPUAsmParser::validateDivScale(const MCInst &Inst) {
   3586   switch (Inst.getOpcode()) {
   3587   default:
   3588     return true;
   3589   case V_DIV_SCALE_F32_gfx6_gfx7:
   3590   case V_DIV_SCALE_F32_vi:
   3591   case V_DIV_SCALE_F32_gfx10:
   3592   case V_DIV_SCALE_F64_gfx6_gfx7:
   3593   case V_DIV_SCALE_F64_vi:
   3594   case V_DIV_SCALE_F64_gfx10:
   3595     break;
   3596   }
   3597 
   3598   // TODO: Check that src0 = src1 or src2.
   3599 
   3600   for (auto Name : {AMDGPU::OpName::src0_modifiers,
   3601                     AMDGPU::OpName::src2_modifiers,
   3602                     AMDGPU::OpName::src2_modifiers}) {
   3603     if (Inst.getOperand(AMDGPU::getNamedOperandIdx(Inst.getOpcode(), Name))
   3604             .getImm() &
   3605         SISrcMods::ABS) {
   3606       return false;
   3607     }
   3608   }
   3609 
   3610   return true;
   3611 }
   3612 
   3613 bool AMDGPUAsmParser::validateMIMGD16(const MCInst &Inst) {
   3614 
   3615   const unsigned Opc = Inst.getOpcode();
   3616   const MCInstrDesc &Desc = MII.get(Opc);
   3617 
   3618   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
   3619     return true;
   3620 
   3621   int D16Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::d16);
   3622   if (D16Idx >= 0 && Inst.getOperand(D16Idx).getImm()) {
   3623     if (isCI() || isSI())
   3624       return false;
   3625   }
   3626 
   3627   return true;
   3628 }
   3629 
   3630 bool AMDGPUAsmParser::validateMIMGDim(const MCInst &Inst) {
   3631   const unsigned Opc = Inst.getOpcode();
   3632   const MCInstrDesc &Desc = MII.get(Opc);
   3633 
   3634   if ((Desc.TSFlags & SIInstrFlags::MIMG) == 0)
   3635     return true;
   3636 
   3637   int DimIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dim);
   3638   if (DimIdx < 0)
   3639     return true;
   3640 
   3641   long Imm = Inst.getOperand(DimIdx).getImm();
   3642   if (Imm < 0 || Imm >= 8)
   3643     return false;
   3644 
   3645   return true;
   3646 }
   3647 
   3648 static bool IsRevOpcode(const unsigned Opcode)
   3649 {
   3650   switch (Opcode) {
   3651   case AMDGPU::V_SUBREV_F32_e32:
   3652   case AMDGPU::V_SUBREV_F32_e64:
   3653   case AMDGPU::V_SUBREV_F32_e32_gfx10:
   3654   case AMDGPU::V_SUBREV_F32_e32_gfx6_gfx7:
   3655   case AMDGPU::V_SUBREV_F32_e32_vi:
   3656   case AMDGPU::V_SUBREV_F32_e64_gfx10:
   3657   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
   3658   case AMDGPU::V_SUBREV_F32_e64_vi:
   3659 
   3660   case AMDGPU::V_SUBREV_CO_U32_e32:
   3661   case AMDGPU::V_SUBREV_CO_U32_e64:
   3662   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
   3663   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
   3664 
   3665   case AMDGPU::V_SUBBREV_U32_e32:
   3666   case AMDGPU::V_SUBBREV_U32_e64:
   3667   case AMDGPU::V_SUBBREV_U32_e32_gfx6_gfx7:
   3668   case AMDGPU::V_SUBBREV_U32_e32_vi:
   3669   case AMDGPU::V_SUBBREV_U32_e64_gfx6_gfx7:
   3670   case AMDGPU::V_SUBBREV_U32_e64_vi:
   3671 
   3672   case AMDGPU::V_SUBREV_U32_e32:
   3673   case AMDGPU::V_SUBREV_U32_e64:
   3674   case AMDGPU::V_SUBREV_U32_e32_gfx9:
   3675   case AMDGPU::V_SUBREV_U32_e32_vi:
   3676   case AMDGPU::V_SUBREV_U32_e64_gfx9:
   3677   case AMDGPU::V_SUBREV_U32_e64_vi:
   3678 
   3679   case AMDGPU::V_SUBREV_F16_e32:
   3680   case AMDGPU::V_SUBREV_F16_e64:
   3681   case AMDGPU::V_SUBREV_F16_e32_gfx10:
   3682   case AMDGPU::V_SUBREV_F16_e32_vi:
   3683   case AMDGPU::V_SUBREV_F16_e64_gfx10:
   3684   case AMDGPU::V_SUBREV_F16_e64_vi:
   3685 
   3686   case AMDGPU::V_SUBREV_U16_e32:
   3687   case AMDGPU::V_SUBREV_U16_e64:
   3688   case AMDGPU::V_SUBREV_U16_e32_vi:
   3689   case AMDGPU::V_SUBREV_U16_e64_vi:
   3690 
   3691   case AMDGPU::V_SUBREV_CO_U32_e32_gfx9:
   3692   case AMDGPU::V_SUBREV_CO_U32_e64_gfx10:
   3693   case AMDGPU::V_SUBREV_CO_U32_e64_gfx9:
   3694 
   3695   case AMDGPU::V_SUBBREV_CO_U32_e32_gfx9:
   3696   case AMDGPU::V_SUBBREV_CO_U32_e64_gfx9:
   3697 
   3698   case AMDGPU::V_SUBREV_NC_U32_e32_gfx10:
   3699   case AMDGPU::V_SUBREV_NC_U32_e64_gfx10:
   3700 
   3701   case AMDGPU::V_SUBREV_CO_CI_U32_e32_gfx10:
   3702   case AMDGPU::V_SUBREV_CO_CI_U32_e64_gfx10:
   3703 
   3704   case AMDGPU::V_LSHRREV_B32_e32:
   3705   case AMDGPU::V_LSHRREV_B32_e64:
   3706   case AMDGPU::V_LSHRREV_B32_e32_gfx6_gfx7:
   3707   case AMDGPU::V_LSHRREV_B32_e64_gfx6_gfx7:
   3708   case AMDGPU::V_LSHRREV_B32_e32_vi:
   3709   case AMDGPU::V_LSHRREV_B32_e64_vi:
   3710   case AMDGPU::V_LSHRREV_B32_e32_gfx10:
   3711   case AMDGPU::V_LSHRREV_B32_e64_gfx10:
   3712 
   3713   case AMDGPU::V_ASHRREV_I32_e32:
   3714   case AMDGPU::V_ASHRREV_I32_e64:
   3715   case AMDGPU::V_ASHRREV_I32_e32_gfx10:
   3716   case AMDGPU::V_ASHRREV_I32_e32_gfx6_gfx7:
   3717   case AMDGPU::V_ASHRREV_I32_e32_vi:
   3718   case AMDGPU::V_ASHRREV_I32_e64_gfx10:
   3719   case AMDGPU::V_ASHRREV_I32_e64_gfx6_gfx7:
   3720   case AMDGPU::V_ASHRREV_I32_e64_vi:
   3721 
   3722   case AMDGPU::V_LSHLREV_B32_e32:
   3723   case AMDGPU::V_LSHLREV_B32_e64:
   3724   case AMDGPU::V_LSHLREV_B32_e32_gfx10:
   3725   case AMDGPU::V_LSHLREV_B32_e32_gfx6_gfx7:
   3726   case AMDGPU::V_LSHLREV_B32_e32_vi:
   3727   case AMDGPU::V_LSHLREV_B32_e64_gfx10:
   3728   case AMDGPU::V_LSHLREV_B32_e64_gfx6_gfx7:
   3729   case AMDGPU::V_LSHLREV_B32_e64_vi:
   3730 
   3731   case AMDGPU::V_LSHLREV_B16_e32:
   3732   case AMDGPU::V_LSHLREV_B16_e64:
   3733   case AMDGPU::V_LSHLREV_B16_e32_vi:
   3734   case AMDGPU::V_LSHLREV_B16_e64_vi:
   3735   case AMDGPU::V_LSHLREV_B16_gfx10:
   3736 
   3737   case AMDGPU::V_LSHRREV_B16_e32:
   3738   case AMDGPU::V_LSHRREV_B16_e64:
   3739   case AMDGPU::V_LSHRREV_B16_e32_vi:
   3740   case AMDGPU::V_LSHRREV_B16_e64_vi:
   3741   case AMDGPU::V_LSHRREV_B16_gfx10:
   3742 
   3743   case AMDGPU::V_ASHRREV_I16_e32:
   3744   case AMDGPU::V_ASHRREV_I16_e64:
   3745   case AMDGPU::V_ASHRREV_I16_e32_vi:
   3746   case AMDGPU::V_ASHRREV_I16_e64_vi:
   3747   case AMDGPU::V_ASHRREV_I16_gfx10:
   3748 
   3749   case AMDGPU::V_LSHLREV_B64_e64:
   3750   case AMDGPU::V_LSHLREV_B64_gfx10:
   3751   case AMDGPU::V_LSHLREV_B64_vi:
   3752 
   3753   case AMDGPU::V_LSHRREV_B64_e64:
   3754   case AMDGPU::V_LSHRREV_B64_gfx10:
   3755   case AMDGPU::V_LSHRREV_B64_vi:
   3756 
   3757   case AMDGPU::V_ASHRREV_I64_e64:
   3758   case AMDGPU::V_ASHRREV_I64_gfx10:
   3759   case AMDGPU::V_ASHRREV_I64_vi:
   3760 
   3761   case AMDGPU::V_PK_LSHLREV_B16:
   3762   case AMDGPU::V_PK_LSHLREV_B16_gfx10:
   3763   case AMDGPU::V_PK_LSHLREV_B16_vi:
   3764 
   3765   case AMDGPU::V_PK_LSHRREV_B16:
   3766   case AMDGPU::V_PK_LSHRREV_B16_gfx10:
   3767   case AMDGPU::V_PK_LSHRREV_B16_vi:
   3768   case AMDGPU::V_PK_ASHRREV_I16:
   3769   case AMDGPU::V_PK_ASHRREV_I16_gfx10:
   3770   case AMDGPU::V_PK_ASHRREV_I16_vi:
   3771     return true;
   3772   default:
   3773     return false;
   3774   }
   3775 }
   3776 
   3777 Optional<StringRef> AMDGPUAsmParser::validateLdsDirect(const MCInst &Inst) {
   3778 
   3779   using namespace SIInstrFlags;
   3780   const unsigned Opcode = Inst.getOpcode();
   3781   const MCInstrDesc &Desc = MII.get(Opcode);
   3782 
   3783   // lds_direct register is defined so that it can be used
   3784   // with 9-bit operands only. Ignore encodings which do not accept these.
   3785   const auto Enc = VOP1 | VOP2 | VOP3 | VOPC | VOP3P | SIInstrFlags::SDWA;
   3786   if ((Desc.TSFlags & Enc) == 0)
   3787     return None;
   3788 
   3789   for (auto SrcName : {OpName::src0, OpName::src1, OpName::src2}) {
   3790     auto SrcIdx = getNamedOperandIdx(Opcode, SrcName);
   3791     if (SrcIdx == -1)
   3792       break;
   3793     const auto &Src = Inst.getOperand(SrcIdx);
   3794     if (Src.isReg() && Src.getReg() == LDS_DIRECT) {
   3795 
   3796       if (isGFX90A())
   3797         return StringRef("lds_direct is not supported on this GPU");
   3798 
   3799       if (IsRevOpcode(Opcode) || (Desc.TSFlags & SIInstrFlags::SDWA))
   3800         return StringRef("lds_direct cannot be used with this instruction");
   3801 
   3802       if (SrcName != OpName::src0)
   3803         return StringRef("lds_direct may be used as src0 only");
   3804     }
   3805   }
   3806 
   3807   return None;
   3808 }
   3809 
   3810 SMLoc AMDGPUAsmParser::getFlatOffsetLoc(const OperandVector &Operands) const {
   3811   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
   3812     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   3813     if (Op.isFlatOffset())
   3814       return Op.getStartLoc();
   3815   }
   3816   return getLoc();
   3817 }
   3818 
   3819 bool AMDGPUAsmParser::validateFlatOffset(const MCInst &Inst,
   3820                                          const OperandVector &Operands) {
   3821   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
   3822   if ((TSFlags & SIInstrFlags::FLAT) == 0)
   3823     return true;
   3824 
   3825   auto Opcode = Inst.getOpcode();
   3826   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
   3827   assert(OpNum != -1);
   3828 
   3829   const auto &Op = Inst.getOperand(OpNum);
   3830   if (!hasFlatOffsets() && Op.getImm() != 0) {
   3831     Error(getFlatOffsetLoc(Operands),
   3832           "flat offset modifier is not supported on this GPU");
   3833     return false;
   3834   }
   3835 
   3836   // For FLAT segment the offset must be positive;
   3837   // MSB is ignored and forced to zero.
   3838   if (TSFlags & (SIInstrFlags::FlatGlobal | SIInstrFlags::FlatScratch)) {
   3839     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), true);
   3840     if (!isIntN(OffsetSize, Op.getImm())) {
   3841       Error(getFlatOffsetLoc(Operands),
   3842             Twine("expected a ") + Twine(OffsetSize) + "-bit signed offset");
   3843       return false;
   3844     }
   3845   } else {
   3846     unsigned OffsetSize = AMDGPU::getNumFlatOffsetBits(getSTI(), false);
   3847     if (!isUIntN(OffsetSize, Op.getImm())) {
   3848       Error(getFlatOffsetLoc(Operands),
   3849             Twine("expected a ") + Twine(OffsetSize) + "-bit unsigned offset");
   3850       return false;
   3851     }
   3852   }
   3853 
   3854   return true;
   3855 }
   3856 
   3857 SMLoc AMDGPUAsmParser::getSMEMOffsetLoc(const OperandVector &Operands) const {
   3858   // Start with second operand because SMEM Offset cannot be dst or src0.
   3859   for (unsigned i = 2, e = Operands.size(); i != e; ++i) {
   3860     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   3861     if (Op.isSMEMOffset())
   3862       return Op.getStartLoc();
   3863   }
   3864   return getLoc();
   3865 }
   3866 
   3867 bool AMDGPUAsmParser::validateSMEMOffset(const MCInst &Inst,
   3868                                          const OperandVector &Operands) {
   3869   if (isCI() || isSI())
   3870     return true;
   3871 
   3872   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
   3873   if ((TSFlags & SIInstrFlags::SMRD) == 0)
   3874     return true;
   3875 
   3876   auto Opcode = Inst.getOpcode();
   3877   auto OpNum = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::offset);
   3878   if (OpNum == -1)
   3879     return true;
   3880 
   3881   const auto &Op = Inst.getOperand(OpNum);
   3882   if (!Op.isImm())
   3883     return true;
   3884 
   3885   uint64_t Offset = Op.getImm();
   3886   bool IsBuffer = AMDGPU::getSMEMIsBuffer(Opcode);
   3887   if (AMDGPU::isLegalSMRDEncodedUnsignedOffset(getSTI(), Offset) ||
   3888       AMDGPU::isLegalSMRDEncodedSignedOffset(getSTI(), Offset, IsBuffer))
   3889     return true;
   3890 
   3891   Error(getSMEMOffsetLoc(Operands),
   3892         (isVI() || IsBuffer) ? "expected a 20-bit unsigned offset" :
   3893                                "expected a 21-bit signed offset");
   3894 
   3895   return false;
   3896 }
   3897 
   3898 bool AMDGPUAsmParser::validateSOPLiteral(const MCInst &Inst) const {
   3899   unsigned Opcode = Inst.getOpcode();
   3900   const MCInstrDesc &Desc = MII.get(Opcode);
   3901   if (!(Desc.TSFlags & (SIInstrFlags::SOP2 | SIInstrFlags::SOPC)))
   3902     return true;
   3903 
   3904   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
   3905   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
   3906 
   3907   const int OpIndices[] = { Src0Idx, Src1Idx };
   3908 
   3909   unsigned NumExprs = 0;
   3910   unsigned NumLiterals = 0;
   3911   uint32_t LiteralValue;
   3912 
   3913   for (int OpIdx : OpIndices) {
   3914     if (OpIdx == -1) break;
   3915 
   3916     const MCOperand &MO = Inst.getOperand(OpIdx);
   3917     // Exclude special imm operands (like that used by s_set_gpr_idx_on)
   3918     if (AMDGPU::isSISrcOperand(Desc, OpIdx)) {
   3919       if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
   3920         uint32_t Value = static_cast<uint32_t>(MO.getImm());
   3921         if (NumLiterals == 0 || LiteralValue != Value) {
   3922           LiteralValue = Value;
   3923           ++NumLiterals;
   3924         }
   3925       } else if (MO.isExpr()) {
   3926         ++NumExprs;
   3927       }
   3928     }
   3929   }
   3930 
   3931   return NumLiterals + NumExprs <= 1;
   3932 }
   3933 
   3934 bool AMDGPUAsmParser::validateOpSel(const MCInst &Inst) {
   3935   const unsigned Opc = Inst.getOpcode();
   3936   if (Opc == AMDGPU::V_PERMLANE16_B32_gfx10 ||
   3937       Opc == AMDGPU::V_PERMLANEX16_B32_gfx10) {
   3938     int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
   3939     unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
   3940 
   3941     if (OpSel & ~3)
   3942       return false;
   3943   }
   3944   return true;
   3945 }
   3946 
   3947 bool AMDGPUAsmParser::validateDPP(const MCInst &Inst,
   3948                                   const OperandVector &Operands) {
   3949   const unsigned Opc = Inst.getOpcode();
   3950   int DppCtrlIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::dpp_ctrl);
   3951   if (DppCtrlIdx < 0)
   3952     return true;
   3953   unsigned DppCtrl = Inst.getOperand(DppCtrlIdx).getImm();
   3954 
   3955   if (!AMDGPU::isLegal64BitDPPControl(DppCtrl)) {
   3956     // DPP64 is supported for row_newbcast only.
   3957     int Src0Idx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0);
   3958     if (Src0Idx >= 0 &&
   3959         getMRI()->getSubReg(Inst.getOperand(Src0Idx).getReg(), AMDGPU::sub1)) {
   3960       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyDppCtrl, Operands);
   3961       Error(S, "64 bit dpp only supports row_newbcast");
   3962       return false;
   3963     }
   3964   }
   3965 
   3966   return true;
   3967 }
   3968 
   3969 // Check if VCC register matches wavefront size
   3970 bool AMDGPUAsmParser::validateVccOperand(unsigned Reg) const {
   3971   auto FB = getFeatureBits();
   3972   return (FB[AMDGPU::FeatureWavefrontSize64] && Reg == AMDGPU::VCC) ||
   3973     (FB[AMDGPU::FeatureWavefrontSize32] && Reg == AMDGPU::VCC_LO);
   3974 }
   3975 
   3976 // VOP3 literal is only allowed in GFX10+ and only one can be used
   3977 bool AMDGPUAsmParser::validateVOP3Literal(const MCInst &Inst,
   3978                                           const OperandVector &Operands) {
   3979   unsigned Opcode = Inst.getOpcode();
   3980   const MCInstrDesc &Desc = MII.get(Opcode);
   3981   if (!(Desc.TSFlags & (SIInstrFlags::VOP3 | SIInstrFlags::VOP3P)))
   3982     return true;
   3983 
   3984   const int Src0Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src0);
   3985   const int Src1Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src1);
   3986   const int Src2Idx = AMDGPU::getNamedOperandIdx(Opcode, AMDGPU::OpName::src2);
   3987 
   3988   const int OpIndices[] = { Src0Idx, Src1Idx, Src2Idx };
   3989 
   3990   unsigned NumExprs = 0;
   3991   unsigned NumLiterals = 0;
   3992   uint32_t LiteralValue;
   3993 
   3994   for (int OpIdx : OpIndices) {
   3995     if (OpIdx == -1) break;
   3996 
   3997     const MCOperand &MO = Inst.getOperand(OpIdx);
   3998     if (!MO.isImm() && !MO.isExpr())
   3999       continue;
   4000     if (!AMDGPU::isSISrcOperand(Desc, OpIdx))
   4001       continue;
   4002 
   4003     if (OpIdx == Src2Idx && (Desc.TSFlags & SIInstrFlags::IsMAI) &&
   4004         getFeatureBits()[AMDGPU::FeatureMFMAInlineLiteralBug]) {
   4005       Error(getConstLoc(Operands),
   4006             "inline constants are not allowed for this operand");
   4007       return false;
   4008     }
   4009 
   4010     if (MO.isImm() && !isInlineConstant(Inst, OpIdx)) {
   4011       uint32_t Value = static_cast<uint32_t>(MO.getImm());
   4012       if (NumLiterals == 0 || LiteralValue != Value) {
   4013         LiteralValue = Value;
   4014         ++NumLiterals;
   4015       }
   4016     } else if (MO.isExpr()) {
   4017       ++NumExprs;
   4018     }
   4019   }
   4020   NumLiterals += NumExprs;
   4021 
   4022   if (!NumLiterals)
   4023     return true;
   4024 
   4025   if (!getFeatureBits()[AMDGPU::FeatureVOP3Literal]) {
   4026     Error(getLitLoc(Operands), "literal operands are not supported");
   4027     return false;
   4028   }
   4029 
   4030   if (NumLiterals > 1) {
   4031     Error(getLitLoc(Operands), "only one literal operand is allowed");
   4032     return false;
   4033   }
   4034 
   4035   return true;
   4036 }
   4037 
   4038 // Returns -1 if not a register, 0 if VGPR and 1 if AGPR.
   4039 static int IsAGPROperand(const MCInst &Inst, uint16_t NameIdx,
   4040                          const MCRegisterInfo *MRI) {
   4041   int OpIdx = AMDGPU::getNamedOperandIdx(Inst.getOpcode(), NameIdx);
   4042   if (OpIdx < 0)
   4043     return -1;
   4044 
   4045   const MCOperand &Op = Inst.getOperand(OpIdx);
   4046   if (!Op.isReg())
   4047     return -1;
   4048 
   4049   unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
   4050   auto Reg = Sub ? Sub : Op.getReg();
   4051   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
   4052   return AGRP32.contains(Reg) ? 1 : 0;
   4053 }
   4054 
   4055 bool AMDGPUAsmParser::validateAGPRLdSt(const MCInst &Inst) const {
   4056   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
   4057   if ((TSFlags & (SIInstrFlags::FLAT | SIInstrFlags::MUBUF |
   4058                   SIInstrFlags::MTBUF | SIInstrFlags::MIMG |
   4059                   SIInstrFlags::DS)) == 0)
   4060     return true;
   4061 
   4062   uint16_t DataNameIdx = (TSFlags & SIInstrFlags::DS) ? AMDGPU::OpName::data0
   4063                                                       : AMDGPU::OpName::vdata;
   4064 
   4065   const MCRegisterInfo *MRI = getMRI();
   4066   int DstAreg = IsAGPROperand(Inst, AMDGPU::OpName::vdst, MRI);
   4067   int DataAreg = IsAGPROperand(Inst, DataNameIdx, MRI);
   4068 
   4069   if ((TSFlags & SIInstrFlags::DS) && DataAreg >= 0) {
   4070     int Data2Areg = IsAGPROperand(Inst, AMDGPU::OpName::data1, MRI);
   4071     if (Data2Areg >= 0 && Data2Areg != DataAreg)
   4072       return false;
   4073   }
   4074 
   4075   auto FB = getFeatureBits();
   4076   if (FB[AMDGPU::FeatureGFX90AInsts]) {
   4077     if (DataAreg < 0 || DstAreg < 0)
   4078       return true;
   4079     return DstAreg == DataAreg;
   4080   }
   4081 
   4082   return DstAreg < 1 && DataAreg < 1;
   4083 }
   4084 
   4085 bool AMDGPUAsmParser::validateVGPRAlign(const MCInst &Inst) const {
   4086   auto FB = getFeatureBits();
   4087   if (!FB[AMDGPU::FeatureGFX90AInsts])
   4088     return true;
   4089 
   4090   const MCRegisterInfo *MRI = getMRI();
   4091   const MCRegisterClass &VGRP32 = MRI->getRegClass(AMDGPU::VGPR_32RegClassID);
   4092   const MCRegisterClass &AGRP32 = MRI->getRegClass(AMDGPU::AGPR_32RegClassID);
   4093   for (unsigned I = 0, E = Inst.getNumOperands(); I != E; ++I) {
   4094     const MCOperand &Op = Inst.getOperand(I);
   4095     if (!Op.isReg())
   4096       continue;
   4097 
   4098     unsigned Sub = MRI->getSubReg(Op.getReg(), AMDGPU::sub0);
   4099     if (!Sub)
   4100       continue;
   4101 
   4102     if (VGRP32.contains(Sub) && ((Sub - AMDGPU::VGPR0) & 1))
   4103       return false;
   4104     if (AGRP32.contains(Sub) && ((Sub - AMDGPU::AGPR0) & 1))
   4105       return false;
   4106   }
   4107 
   4108   return true;
   4109 }
   4110 
   4111 bool AMDGPUAsmParser::validateCoherencyBits(const MCInst &Inst,
   4112                                             const OperandVector &Operands,
   4113                                             const SMLoc &IDLoc) {
   4114   int CPolPos = AMDGPU::getNamedOperandIdx(Inst.getOpcode(),
   4115                                            AMDGPU::OpName::cpol);
   4116   if (CPolPos == -1)
   4117     return true;
   4118 
   4119   unsigned CPol = Inst.getOperand(CPolPos).getImm();
   4120 
   4121   uint64_t TSFlags = MII.get(Inst.getOpcode()).TSFlags;
   4122   if ((TSFlags & (SIInstrFlags::SMRD)) &&
   4123       (CPol & ~(AMDGPU::CPol::GLC | AMDGPU::CPol::DLC))) {
   4124     Error(IDLoc, "invalid cache policy for SMRD instruction");
   4125     return false;
   4126   }
   4127 
   4128   if (isGFX90A() && (CPol & CPol::SCC)) {
   4129     SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
   4130     StringRef CStr(S.getPointer());
   4131     S = SMLoc::getFromPointer(&CStr.data()[CStr.find("scc")]);
   4132     Error(S, "scc is not supported on this GPU");
   4133     return false;
   4134   }
   4135 
   4136   if (!(TSFlags & (SIInstrFlags::IsAtomicNoRet | SIInstrFlags::IsAtomicRet)))
   4137     return true;
   4138 
   4139   if (TSFlags & SIInstrFlags::IsAtomicRet) {
   4140     if (!(TSFlags & SIInstrFlags::MIMG) && !(CPol & CPol::GLC)) {
   4141       Error(IDLoc, "instruction must use glc");
   4142       return false;
   4143     }
   4144   } else {
   4145     if (CPol & CPol::GLC) {
   4146       SMLoc S = getImmLoc(AMDGPUOperand::ImmTyCPol, Operands);
   4147       StringRef CStr(S.getPointer());
   4148       S = SMLoc::getFromPointer(&CStr.data()[CStr.find("glc")]);
   4149       Error(S, "instruction must not use glc");
   4150       return false;
   4151     }
   4152   }
   4153 
   4154   return true;
   4155 }
   4156 
   4157 bool AMDGPUAsmParser::validateInstruction(const MCInst &Inst,
   4158                                           const SMLoc &IDLoc,
   4159                                           const OperandVector &Operands) {
   4160   if (auto ErrMsg = validateLdsDirect(Inst)) {
   4161     Error(getRegLoc(LDS_DIRECT, Operands), *ErrMsg);
   4162     return false;
   4163   }
   4164   if (!validateSOPLiteral(Inst)) {
   4165     Error(getLitLoc(Operands),
   4166       "only one literal operand is allowed");
   4167     return false;
   4168   }
   4169   if (!validateVOP3Literal(Inst, Operands)) {
   4170     return false;
   4171   }
   4172   if (!validateConstantBusLimitations(Inst, Operands)) {
   4173     return false;
   4174   }
   4175   if (!validateEarlyClobberLimitations(Inst, Operands)) {
   4176     return false;
   4177   }
   4178   if (!validateIntClampSupported(Inst)) {
   4179     Error(getImmLoc(AMDGPUOperand::ImmTyClampSI, Operands),
   4180       "integer clamping is not supported on this GPU");
   4181     return false;
   4182   }
   4183   if (!validateOpSel(Inst)) {
   4184     Error(getImmLoc(AMDGPUOperand::ImmTyOpSel, Operands),
   4185       "invalid op_sel operand");
   4186     return false;
   4187   }
   4188   if (!validateDPP(Inst, Operands)) {
   4189     return false;
   4190   }
   4191   // For MUBUF/MTBUF d16 is a part of opcode, so there is nothing to validate.
   4192   if (!validateMIMGD16(Inst)) {
   4193     Error(getImmLoc(AMDGPUOperand::ImmTyD16, Operands),
   4194       "d16 modifier is not supported on this GPU");
   4195     return false;
   4196   }
   4197   if (!validateMIMGDim(Inst)) {
   4198     Error(IDLoc, "dim modifier is required on this GPU");
   4199     return false;
   4200   }
   4201   if (!validateMIMGMSAA(Inst)) {
   4202     Error(getImmLoc(AMDGPUOperand::ImmTyDim, Operands),
   4203           "invalid dim; must be MSAA type");
   4204     return false;
   4205   }
   4206   if (!validateMIMGDataSize(Inst)) {
   4207     Error(IDLoc,
   4208       "image data size does not match dmask and tfe");
   4209     return false;
   4210   }
   4211   if (!validateMIMGAddrSize(Inst)) {
   4212     Error(IDLoc,
   4213       "image address size does not match dim and a16");
   4214     return false;
   4215   }
   4216   if (!validateMIMGAtomicDMask(Inst)) {
   4217     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
   4218       "invalid atomic image dmask");
   4219     return false;
   4220   }
   4221   if (!validateMIMGGatherDMask(Inst)) {
   4222     Error(getImmLoc(AMDGPUOperand::ImmTyDMask, Operands),
   4223       "invalid image_gather dmask: only one bit must be set");
   4224     return false;
   4225   }
   4226   if (!validateMovrels(Inst, Operands)) {
   4227     return false;
   4228   }
   4229   if (!validateFlatOffset(Inst, Operands)) {
   4230     return false;
   4231   }
   4232   if (!validateSMEMOffset(Inst, Operands)) {
   4233     return false;
   4234   }
   4235   if (!validateMAIAccWrite(Inst, Operands)) {
   4236     return false;
   4237   }
   4238   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
   4239     return false;
   4240   }
   4241 
   4242   if (!validateAGPRLdSt(Inst)) {
   4243     Error(IDLoc, getFeatureBits()[AMDGPU::FeatureGFX90AInsts]
   4244     ? "invalid register class: data and dst should be all VGPR or AGPR"
   4245     : "invalid register class: agpr loads and stores not supported on this GPU"
   4246     );
   4247     return false;
   4248   }
   4249   if (!validateVGPRAlign(Inst)) {
   4250     Error(IDLoc,
   4251       "invalid register class: vgpr tuples must be 64 bit aligned");
   4252     return false;
   4253   }
   4254 
   4255   if (!validateDivScale(Inst)) {
   4256     Error(IDLoc, "ABS not allowed in VOP3B instructions");
   4257     return false;
   4258   }
   4259   if (!validateCoherencyBits(Inst, Operands, IDLoc)) {
   4260     return false;
   4261   }
   4262 
   4263   return true;
   4264 }
   4265 
   4266 static std::string AMDGPUMnemonicSpellCheck(StringRef S,
   4267                                             const FeatureBitset &FBS,
   4268                                             unsigned VariantID = 0);
   4269 
   4270 static bool AMDGPUCheckMnemonic(StringRef Mnemonic,
   4271                                 const FeatureBitset &AvailableFeatures,
   4272                                 unsigned VariantID);
   4273 
   4274 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
   4275                                        const FeatureBitset &FBS) {
   4276   return isSupportedMnemo(Mnemo, FBS, getAllVariants());
   4277 }
   4278 
   4279 bool AMDGPUAsmParser::isSupportedMnemo(StringRef Mnemo,
   4280                                        const FeatureBitset &FBS,
   4281                                        ArrayRef<unsigned> Variants) {
   4282   for (auto Variant : Variants) {
   4283     if (AMDGPUCheckMnemonic(Mnemo, FBS, Variant))
   4284       return true;
   4285   }
   4286 
   4287   return false;
   4288 }
   4289 
   4290 bool AMDGPUAsmParser::checkUnsupportedInstruction(StringRef Mnemo,
   4291                                                   const SMLoc &IDLoc) {
   4292   FeatureBitset FBS = ComputeAvailableFeatures(getSTI().getFeatureBits());
   4293 
   4294   // Check if requested instruction variant is supported.
   4295   if (isSupportedMnemo(Mnemo, FBS, getMatchedVariants()))
   4296     return false;
   4297 
   4298   // This instruction is not supported.
   4299   // Clear any other pending errors because they are no longer relevant.
   4300   getParser().clearPendingErrors();
   4301 
   4302   // Requested instruction variant is not supported.
   4303   // Check if any other variants are supported.
   4304   StringRef VariantName = getMatchedVariantName();
   4305   if (!VariantName.empty() && isSupportedMnemo(Mnemo, FBS)) {
   4306     return Error(IDLoc,
   4307                  Twine(VariantName,
   4308                        " variant of this instruction is not supported"));
   4309   }
   4310 
   4311   // Finally check if this instruction is supported on any other GPU.
   4312   if (isSupportedMnemo(Mnemo, FeatureBitset().set())) {
   4313     return Error(IDLoc, "instruction not supported on this GPU");
   4314   }
   4315 
   4316   // Instruction not supported on any GPU. Probably a typo.
   4317   std::string Suggestion = AMDGPUMnemonicSpellCheck(Mnemo, FBS);
   4318   return Error(IDLoc, "invalid instruction" + Suggestion);
   4319 }
   4320 
   4321 bool AMDGPUAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
   4322                                               OperandVector &Operands,
   4323                                               MCStreamer &Out,
   4324                                               uint64_t &ErrorInfo,
   4325                                               bool MatchingInlineAsm) {
   4326   MCInst Inst;
   4327   unsigned Result = Match_Success;
   4328   for (auto Variant : getMatchedVariants()) {
   4329     uint64_t EI;
   4330     auto R = MatchInstructionImpl(Operands, Inst, EI, MatchingInlineAsm,
   4331                                   Variant);
   4332     // We order match statuses from least to most specific. We use most specific
   4333     // status as resulting
   4334     // Match_MnemonicFail < Match_InvalidOperand < Match_MissingFeature < Match_PreferE32
   4335     if ((R == Match_Success) ||
   4336         (R == Match_PreferE32) ||
   4337         (R == Match_MissingFeature && Result != Match_PreferE32) ||
   4338         (R == Match_InvalidOperand && Result != Match_MissingFeature
   4339                                    && Result != Match_PreferE32) ||
   4340         (R == Match_MnemonicFail   && Result != Match_InvalidOperand
   4341                                    && Result != Match_MissingFeature
   4342                                    && Result != Match_PreferE32)) {
   4343       Result = R;
   4344       ErrorInfo = EI;
   4345     }
   4346     if (R == Match_Success)
   4347       break;
   4348   }
   4349 
   4350   if (Result == Match_Success) {
   4351     if (!validateInstruction(Inst, IDLoc, Operands)) {
   4352       return true;
   4353     }
   4354     Inst.setLoc(IDLoc);
   4355     Out.emitInstruction(Inst, getSTI());
   4356     return false;
   4357   }
   4358 
   4359   StringRef Mnemo = ((AMDGPUOperand &)*Operands[0]).getToken();
   4360   if (checkUnsupportedInstruction(Mnemo, IDLoc)) {
   4361     return true;
   4362   }
   4363 
   4364   switch (Result) {
   4365   default: break;
   4366   case Match_MissingFeature:
   4367     // It has been verified that the specified instruction
   4368     // mnemonic is valid. A match was found but it requires
   4369     // features which are not supported on this GPU.
   4370     return Error(IDLoc, "operands are not valid for this GPU or mode");
   4371 
   4372   case Match_InvalidOperand: {
   4373     SMLoc ErrorLoc = IDLoc;
   4374     if (ErrorInfo != ~0ULL) {
   4375       if (ErrorInfo >= Operands.size()) {
   4376         return Error(IDLoc, "too few operands for instruction");
   4377       }
   4378       ErrorLoc = ((AMDGPUOperand &)*Operands[ErrorInfo]).getStartLoc();
   4379       if (ErrorLoc == SMLoc())
   4380         ErrorLoc = IDLoc;
   4381     }
   4382     return Error(ErrorLoc, "invalid operand for instruction");
   4383   }
   4384 
   4385   case Match_PreferE32:
   4386     return Error(IDLoc, "internal error: instruction without _e64 suffix "
   4387                         "should be encoded as e32");
   4388   case Match_MnemonicFail:
   4389     llvm_unreachable("Invalid instructions should have been handled already");
   4390   }
   4391   llvm_unreachable("Implement any new match types added!");
   4392 }
   4393 
   4394 bool AMDGPUAsmParser::ParseAsAbsoluteExpression(uint32_t &Ret) {
   4395   int64_t Tmp = -1;
   4396   if (!isToken(AsmToken::Integer) && !isToken(AsmToken::Identifier)) {
   4397     return true;
   4398   }
   4399   if (getParser().parseAbsoluteExpression(Tmp)) {
   4400     return true;
   4401   }
   4402   Ret = static_cast<uint32_t>(Tmp);
   4403   return false;
   4404 }
   4405 
   4406 bool AMDGPUAsmParser::ParseDirectiveMajorMinor(uint32_t &Major,
   4407                                                uint32_t &Minor) {
   4408   if (ParseAsAbsoluteExpression(Major))
   4409     return TokError("invalid major version");
   4410 
   4411   if (!trySkipToken(AsmToken::Comma))
   4412     return TokError("minor version number required, comma expected");
   4413 
   4414   if (ParseAsAbsoluteExpression(Minor))
   4415     return TokError("invalid minor version");
   4416 
   4417   return false;
   4418 }
   4419 
   4420 bool AMDGPUAsmParser::ParseDirectiveAMDGCNTarget() {
   4421   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
   4422     return TokError("directive only supported for amdgcn architecture");
   4423 
   4424   std::string TargetIDDirective;
   4425   SMLoc TargetStart = getTok().getLoc();
   4426   if (getParser().parseEscapedString(TargetIDDirective))
   4427     return true;
   4428 
   4429   SMRange TargetRange = SMRange(TargetStart, getTok().getLoc());
   4430   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
   4431     return getParser().Error(TargetRange.Start,
   4432         (Twine(".amdgcn_target directive's target id ") +
   4433          Twine(TargetIDDirective) +
   4434          Twine(" does not match the specified target id ") +
   4435          Twine(getTargetStreamer().getTargetID()->toString())).str());
   4436 
   4437   return false;
   4438 }
   4439 
   4440 bool AMDGPUAsmParser::OutOfRangeError(SMRange Range) {
   4441   return Error(Range.Start, "value out of range", Range);
   4442 }
   4443 
   4444 bool AMDGPUAsmParser::calculateGPRBlocks(
   4445     const FeatureBitset &Features, bool VCCUsed, bool FlatScrUsed,
   4446     bool XNACKUsed, Optional<bool> EnableWavefrontSize32, unsigned NextFreeVGPR,
   4447     SMRange VGPRRange, unsigned NextFreeSGPR, SMRange SGPRRange,
   4448     unsigned &VGPRBlocks, unsigned &SGPRBlocks) {
   4449   // TODO(scott.linder): These calculations are duplicated from
   4450   // AMDGPUAsmPrinter::getSIProgramInfo and could be unified.
   4451   IsaVersion Version = getIsaVersion(getSTI().getCPU());
   4452 
   4453   unsigned NumVGPRs = NextFreeVGPR;
   4454   unsigned NumSGPRs = NextFreeSGPR;
   4455 
   4456   if (Version.Major >= 10)
   4457     NumSGPRs = 0;
   4458   else {
   4459     unsigned MaxAddressableNumSGPRs =
   4460         IsaInfo::getAddressableNumSGPRs(&getSTI());
   4461 
   4462     if (Version.Major >= 8 && !Features.test(FeatureSGPRInitBug) &&
   4463         NumSGPRs > MaxAddressableNumSGPRs)
   4464       return OutOfRangeError(SGPRRange);
   4465 
   4466     NumSGPRs +=
   4467         IsaInfo::getNumExtraSGPRs(&getSTI(), VCCUsed, FlatScrUsed, XNACKUsed);
   4468 
   4469     if ((Version.Major <= 7 || Features.test(FeatureSGPRInitBug)) &&
   4470         NumSGPRs > MaxAddressableNumSGPRs)
   4471       return OutOfRangeError(SGPRRange);
   4472 
   4473     if (Features.test(FeatureSGPRInitBug))
   4474       NumSGPRs = IsaInfo::FIXED_NUM_SGPRS_FOR_INIT_BUG;
   4475   }
   4476 
   4477   VGPRBlocks =
   4478       IsaInfo::getNumVGPRBlocks(&getSTI(), NumVGPRs, EnableWavefrontSize32);
   4479   SGPRBlocks = IsaInfo::getNumSGPRBlocks(&getSTI(), NumSGPRs);
   4480 
   4481   return false;
   4482 }
   4483 
   4484 bool AMDGPUAsmParser::ParseDirectiveAMDHSAKernel() {
   4485   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn)
   4486     return TokError("directive only supported for amdgcn architecture");
   4487 
   4488   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA)
   4489     return TokError("directive only supported for amdhsa OS");
   4490 
   4491   StringRef KernelName;
   4492   if (getParser().parseIdentifier(KernelName))
   4493     return true;
   4494 
   4495   kernel_descriptor_t KD = getDefaultAmdhsaKernelDescriptor(&getSTI());
   4496 
   4497   StringSet<> Seen;
   4498 
   4499   IsaVersion IVersion = getIsaVersion(getSTI().getCPU());
   4500 
   4501   SMRange VGPRRange;
   4502   uint64_t NextFreeVGPR = 0;
   4503   uint64_t AccumOffset = 0;
   4504   SMRange SGPRRange;
   4505   uint64_t NextFreeSGPR = 0;
   4506   unsigned UserSGPRCount = 0;
   4507   bool ReserveVCC = true;
   4508   bool ReserveFlatScr = true;
   4509   Optional<bool> EnableWavefrontSize32;
   4510 
   4511   while (true) {
   4512     while (trySkipToken(AsmToken::EndOfStatement));
   4513 
   4514     StringRef ID;
   4515     SMRange IDRange = getTok().getLocRange();
   4516     if (!parseId(ID, "expected .amdhsa_ directive or .end_amdhsa_kernel"))
   4517       return true;
   4518 
   4519     if (ID == ".end_amdhsa_kernel")
   4520       break;
   4521 
   4522     if (Seen.find(ID) != Seen.end())
   4523       return TokError(".amdhsa_ directives cannot be repeated");
   4524     Seen.insert(ID);
   4525 
   4526     SMLoc ValStart = getLoc();
   4527     int64_t IVal;
   4528     if (getParser().parseAbsoluteExpression(IVal))
   4529       return true;
   4530     SMLoc ValEnd = getLoc();
   4531     SMRange ValRange = SMRange(ValStart, ValEnd);
   4532 
   4533     if (IVal < 0)
   4534       return OutOfRangeError(ValRange);
   4535 
   4536     uint64_t Val = IVal;
   4537 
   4538 #define PARSE_BITS_ENTRY(FIELD, ENTRY, VALUE, RANGE)                           \
   4539   if (!isUInt<ENTRY##_WIDTH>(VALUE))                                           \
   4540     return OutOfRangeError(RANGE);                                             \
   4541   AMDHSA_BITS_SET(FIELD, ENTRY, VALUE);
   4542 
   4543     if (ID == ".amdhsa_group_segment_fixed_size") {
   4544       if (!isUInt<sizeof(KD.group_segment_fixed_size) * CHAR_BIT>(Val))
   4545         return OutOfRangeError(ValRange);
   4546       KD.group_segment_fixed_size = Val;
   4547     } else if (ID == ".amdhsa_private_segment_fixed_size") {
   4548       if (!isUInt<sizeof(KD.private_segment_fixed_size) * CHAR_BIT>(Val))
   4549         return OutOfRangeError(ValRange);
   4550       KD.private_segment_fixed_size = Val;
   4551     } else if (ID == ".amdhsa_kernarg_size") {
   4552       if (!isUInt<sizeof(KD.kernarg_size) * CHAR_BIT>(Val))
   4553         return OutOfRangeError(ValRange);
   4554       KD.kernarg_size = Val;
   4555     } else if (ID == ".amdhsa_user_sgpr_private_segment_buffer") {
   4556       if (hasArchitectedFlatScratch())
   4557         return Error(IDRange.Start,
   4558                      "directive is not supported with architected flat scratch",
   4559                      IDRange);
   4560       PARSE_BITS_ENTRY(KD.kernel_code_properties,
   4561                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_BUFFER,
   4562                        Val, ValRange);
   4563       if (Val)
   4564         UserSGPRCount += 4;
   4565     } else if (ID == ".amdhsa_user_sgpr_dispatch_ptr") {
   4566       PARSE_BITS_ENTRY(KD.kernel_code_properties,
   4567                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_PTR, Val,
   4568                        ValRange);
   4569       if (Val)
   4570         UserSGPRCount += 2;
   4571     } else if (ID == ".amdhsa_user_sgpr_queue_ptr") {
   4572       PARSE_BITS_ENTRY(KD.kernel_code_properties,
   4573                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_QUEUE_PTR, Val,
   4574                        ValRange);
   4575       if (Val)
   4576         UserSGPRCount += 2;
   4577     } else if (ID == ".amdhsa_user_sgpr_kernarg_segment_ptr") {
   4578       PARSE_BITS_ENTRY(KD.kernel_code_properties,
   4579                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_KERNARG_SEGMENT_PTR,
   4580                        Val, ValRange);
   4581       if (Val)
   4582         UserSGPRCount += 2;
   4583     } else if (ID == ".amdhsa_user_sgpr_dispatch_id") {
   4584       PARSE_BITS_ENTRY(KD.kernel_code_properties,
   4585                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_DISPATCH_ID, Val,
   4586                        ValRange);
   4587       if (Val)
   4588         UserSGPRCount += 2;
   4589     } else if (ID == ".amdhsa_user_sgpr_flat_scratch_init") {
   4590       if (hasArchitectedFlatScratch())
   4591         return Error(IDRange.Start,
   4592                      "directive is not supported with architected flat scratch",
   4593                      IDRange);
   4594       PARSE_BITS_ENTRY(KD.kernel_code_properties,
   4595                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_FLAT_SCRATCH_INIT, Val,
   4596                        ValRange);
   4597       if (Val)
   4598         UserSGPRCount += 2;
   4599     } else if (ID == ".amdhsa_user_sgpr_private_segment_size") {
   4600       PARSE_BITS_ENTRY(KD.kernel_code_properties,
   4601                        KERNEL_CODE_PROPERTY_ENABLE_SGPR_PRIVATE_SEGMENT_SIZE,
   4602                        Val, ValRange);
   4603       if (Val)
   4604         UserSGPRCount += 1;
   4605     } else if (ID == ".amdhsa_wavefront_size32") {
   4606       if (IVersion.Major < 10)
   4607         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
   4608       EnableWavefrontSize32 = Val;
   4609       PARSE_BITS_ENTRY(KD.kernel_code_properties,
   4610                        KERNEL_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32,
   4611                        Val, ValRange);
   4612     } else if (ID == ".amdhsa_system_sgpr_private_segment_wavefront_offset") {
   4613       if (hasArchitectedFlatScratch())
   4614         return Error(IDRange.Start,
   4615                      "directive is not supported with architected flat scratch",
   4616                      IDRange);
   4617       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4618                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
   4619     } else if (ID == ".amdhsa_enable_private_segment") {
   4620       if (!hasArchitectedFlatScratch())
   4621         return Error(
   4622             IDRange.Start,
   4623             "directive is not supported without architected flat scratch",
   4624             IDRange);
   4625       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4626                        COMPUTE_PGM_RSRC2_ENABLE_PRIVATE_SEGMENT, Val, ValRange);
   4627     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_x") {
   4628       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4629                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_X, Val,
   4630                        ValRange);
   4631     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_y") {
   4632       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4633                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Y, Val,
   4634                        ValRange);
   4635     } else if (ID == ".amdhsa_system_sgpr_workgroup_id_z") {
   4636       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4637                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_ID_Z, Val,
   4638                        ValRange);
   4639     } else if (ID == ".amdhsa_system_sgpr_workgroup_info") {
   4640       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4641                        COMPUTE_PGM_RSRC2_ENABLE_SGPR_WORKGROUP_INFO, Val,
   4642                        ValRange);
   4643     } else if (ID == ".amdhsa_system_vgpr_workitem_id") {
   4644       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4645                        COMPUTE_PGM_RSRC2_ENABLE_VGPR_WORKITEM_ID, Val,
   4646                        ValRange);
   4647     } else if (ID == ".amdhsa_next_free_vgpr") {
   4648       VGPRRange = ValRange;
   4649       NextFreeVGPR = Val;
   4650     } else if (ID == ".amdhsa_next_free_sgpr") {
   4651       SGPRRange = ValRange;
   4652       NextFreeSGPR = Val;
   4653     } else if (ID == ".amdhsa_accum_offset") {
   4654       if (!isGFX90A())
   4655         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
   4656       AccumOffset = Val;
   4657     } else if (ID == ".amdhsa_reserve_vcc") {
   4658       if (!isUInt<1>(Val))
   4659         return OutOfRangeError(ValRange);
   4660       ReserveVCC = Val;
   4661     } else if (ID == ".amdhsa_reserve_flat_scratch") {
   4662       if (IVersion.Major < 7)
   4663         return Error(IDRange.Start, "directive requires gfx7+", IDRange);
   4664       if (hasArchitectedFlatScratch())
   4665         return Error(IDRange.Start,
   4666                      "directive is not supported with architected flat scratch",
   4667                      IDRange);
   4668       if (!isUInt<1>(Val))
   4669         return OutOfRangeError(ValRange);
   4670       ReserveFlatScr = Val;
   4671     } else if (ID == ".amdhsa_reserve_xnack_mask") {
   4672       if (IVersion.Major < 8)
   4673         return Error(IDRange.Start, "directive requires gfx8+", IDRange);
   4674       if (!isUInt<1>(Val))
   4675         return OutOfRangeError(ValRange);
   4676       if (Val != getTargetStreamer().getTargetID()->isXnackOnOrAny())
   4677         return getParser().Error(IDRange.Start, ".amdhsa_reserve_xnack_mask does not match target id",
   4678                                  IDRange);
   4679     } else if (ID == ".amdhsa_float_round_mode_32") {
   4680       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
   4681                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_32, Val, ValRange);
   4682     } else if (ID == ".amdhsa_float_round_mode_16_64") {
   4683       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
   4684                        COMPUTE_PGM_RSRC1_FLOAT_ROUND_MODE_16_64, Val, ValRange);
   4685     } else if (ID == ".amdhsa_float_denorm_mode_32") {
   4686       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
   4687                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_32, Val, ValRange);
   4688     } else if (ID == ".amdhsa_float_denorm_mode_16_64") {
   4689       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
   4690                        COMPUTE_PGM_RSRC1_FLOAT_DENORM_MODE_16_64, Val,
   4691                        ValRange);
   4692     } else if (ID == ".amdhsa_dx10_clamp") {
   4693       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1,
   4694                        COMPUTE_PGM_RSRC1_ENABLE_DX10_CLAMP, Val, ValRange);
   4695     } else if (ID == ".amdhsa_ieee_mode") {
   4696       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_ENABLE_IEEE_MODE,
   4697                        Val, ValRange);
   4698     } else if (ID == ".amdhsa_fp16_overflow") {
   4699       if (IVersion.Major < 9)
   4700         return Error(IDRange.Start, "directive requires gfx9+", IDRange);
   4701       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FP16_OVFL, Val,
   4702                        ValRange);
   4703     } else if (ID == ".amdhsa_tg_split") {
   4704       if (!isGFX90A())
   4705         return Error(IDRange.Start, "directive requires gfx90a+", IDRange);
   4706       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_TG_SPLIT, Val,
   4707                        ValRange);
   4708     } else if (ID == ".amdhsa_workgroup_processor_mode") {
   4709       if (IVersion.Major < 10)
   4710         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
   4711       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_WGP_MODE, Val,
   4712                        ValRange);
   4713     } else if (ID == ".amdhsa_memory_ordered") {
   4714       if (IVersion.Major < 10)
   4715         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
   4716       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_MEM_ORDERED, Val,
   4717                        ValRange);
   4718     } else if (ID == ".amdhsa_forward_progress") {
   4719       if (IVersion.Major < 10)
   4720         return Error(IDRange.Start, "directive requires gfx10+", IDRange);
   4721       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc1, COMPUTE_PGM_RSRC1_FWD_PROGRESS, Val,
   4722                        ValRange);
   4723     } else if (ID == ".amdhsa_exception_fp_ieee_invalid_op") {
   4724       PARSE_BITS_ENTRY(
   4725           KD.compute_pgm_rsrc2,
   4726           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INVALID_OPERATION, Val,
   4727           ValRange);
   4728     } else if (ID == ".amdhsa_exception_fp_denorm_src") {
   4729       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4730                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_FP_DENORMAL_SOURCE,
   4731                        Val, ValRange);
   4732     } else if (ID == ".amdhsa_exception_fp_ieee_div_zero") {
   4733       PARSE_BITS_ENTRY(
   4734           KD.compute_pgm_rsrc2,
   4735           COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_DIVISION_BY_ZERO, Val,
   4736           ValRange);
   4737     } else if (ID == ".amdhsa_exception_fp_ieee_overflow") {
   4738       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4739                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_OVERFLOW,
   4740                        Val, ValRange);
   4741     } else if (ID == ".amdhsa_exception_fp_ieee_underflow") {
   4742       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4743                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_UNDERFLOW,
   4744                        Val, ValRange);
   4745     } else if (ID == ".amdhsa_exception_fp_ieee_inexact") {
   4746       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4747                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_IEEE_754_FP_INEXACT,
   4748                        Val, ValRange);
   4749     } else if (ID == ".amdhsa_exception_int_div_zero") {
   4750       PARSE_BITS_ENTRY(KD.compute_pgm_rsrc2,
   4751                        COMPUTE_PGM_RSRC2_ENABLE_EXCEPTION_INT_DIVIDE_BY_ZERO,
   4752                        Val, ValRange);
   4753     } else {
   4754       return Error(IDRange.Start, "unknown .amdhsa_kernel directive", IDRange);
   4755     }
   4756 
   4757 #undef PARSE_BITS_ENTRY
   4758   }
   4759 
   4760   if (Seen.find(".amdhsa_next_free_vgpr") == Seen.end())
   4761     return TokError(".amdhsa_next_free_vgpr directive is required");
   4762 
   4763   if (Seen.find(".amdhsa_next_free_sgpr") == Seen.end())
   4764     return TokError(".amdhsa_next_free_sgpr directive is required");
   4765 
   4766   unsigned VGPRBlocks;
   4767   unsigned SGPRBlocks;
   4768   if (calculateGPRBlocks(getFeatureBits(), ReserveVCC, ReserveFlatScr,
   4769                          getTargetStreamer().getTargetID()->isXnackOnOrAny(),
   4770                          EnableWavefrontSize32, NextFreeVGPR,
   4771                          VGPRRange, NextFreeSGPR, SGPRRange, VGPRBlocks,
   4772                          SGPRBlocks))
   4773     return true;
   4774 
   4775   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT_WIDTH>(
   4776           VGPRBlocks))
   4777     return OutOfRangeError(VGPRRange);
   4778   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
   4779                   COMPUTE_PGM_RSRC1_GRANULATED_WORKITEM_VGPR_COUNT, VGPRBlocks);
   4780 
   4781   if (!isUInt<COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT_WIDTH>(
   4782           SGPRBlocks))
   4783     return OutOfRangeError(SGPRRange);
   4784   AMDHSA_BITS_SET(KD.compute_pgm_rsrc1,
   4785                   COMPUTE_PGM_RSRC1_GRANULATED_WAVEFRONT_SGPR_COUNT,
   4786                   SGPRBlocks);
   4787 
   4788   if (!isUInt<COMPUTE_PGM_RSRC2_USER_SGPR_COUNT_WIDTH>(UserSGPRCount))
   4789     return TokError("too many user SGPRs enabled");
   4790   AMDHSA_BITS_SET(KD.compute_pgm_rsrc2, COMPUTE_PGM_RSRC2_USER_SGPR_COUNT,
   4791                   UserSGPRCount);
   4792 
   4793   if (isGFX90A()) {
   4794     if (Seen.find(".amdhsa_accum_offset") == Seen.end())
   4795       return TokError(".amdhsa_accum_offset directive is required");
   4796     if (AccumOffset < 4 || AccumOffset > 256 || (AccumOffset & 3))
   4797       return TokError("accum_offset should be in range [4..256] in "
   4798                       "increments of 4");
   4799     if (AccumOffset > alignTo(std::max((uint64_t)1, NextFreeVGPR), 4))
   4800       return TokError("accum_offset exceeds total VGPR allocation");
   4801     AMDHSA_BITS_SET(KD.compute_pgm_rsrc3, COMPUTE_PGM_RSRC3_GFX90A_ACCUM_OFFSET,
   4802                     (AccumOffset / 4 - 1));
   4803   }
   4804 
   4805   getTargetStreamer().EmitAmdhsaKernelDescriptor(
   4806       getSTI(), KernelName, KD, NextFreeVGPR, NextFreeSGPR, ReserveVCC,
   4807       ReserveFlatScr);
   4808   return false;
   4809 }
   4810 
   4811 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectVersion() {
   4812   uint32_t Major;
   4813   uint32_t Minor;
   4814 
   4815   if (ParseDirectiveMajorMinor(Major, Minor))
   4816     return true;
   4817 
   4818   getTargetStreamer().EmitDirectiveHSACodeObjectVersion(Major, Minor);
   4819   return false;
   4820 }
   4821 
   4822 bool AMDGPUAsmParser::ParseDirectiveHSACodeObjectISA() {
   4823   uint32_t Major;
   4824   uint32_t Minor;
   4825   uint32_t Stepping;
   4826   StringRef VendorName;
   4827   StringRef ArchName;
   4828 
   4829   // If this directive has no arguments, then use the ISA version for the
   4830   // targeted GPU.
   4831   if (isToken(AsmToken::EndOfStatement)) {
   4832     AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
   4833     getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(ISA.Major, ISA.Minor,
   4834                                                         ISA.Stepping,
   4835                                                         "AMD", "AMDGPU");
   4836     return false;
   4837   }
   4838 
   4839   if (ParseDirectiveMajorMinor(Major, Minor))
   4840     return true;
   4841 
   4842   if (!trySkipToken(AsmToken::Comma))
   4843     return TokError("stepping version number required, comma expected");
   4844 
   4845   if (ParseAsAbsoluteExpression(Stepping))
   4846     return TokError("invalid stepping version");
   4847 
   4848   if (!trySkipToken(AsmToken::Comma))
   4849     return TokError("vendor name required, comma expected");
   4850 
   4851   if (!parseString(VendorName, "invalid vendor name"))
   4852     return true;
   4853 
   4854   if (!trySkipToken(AsmToken::Comma))
   4855     return TokError("arch name required, comma expected");
   4856 
   4857   if (!parseString(ArchName, "invalid arch name"))
   4858     return true;
   4859 
   4860   getTargetStreamer().EmitDirectiveHSACodeObjectISAV2(Major, Minor, Stepping,
   4861                                                       VendorName, ArchName);
   4862   return false;
   4863 }
   4864 
   4865 bool AMDGPUAsmParser::ParseAMDKernelCodeTValue(StringRef ID,
   4866                                                amd_kernel_code_t &Header) {
   4867   // max_scratch_backing_memory_byte_size is deprecated. Ignore it while parsing
   4868   // assembly for backwards compatibility.
   4869   if (ID == "max_scratch_backing_memory_byte_size") {
   4870     Parser.eatToEndOfStatement();
   4871     return false;
   4872   }
   4873 
   4874   SmallString<40> ErrStr;
   4875   raw_svector_ostream Err(ErrStr);
   4876   if (!parseAmdKernelCodeField(ID, getParser(), Header, Err)) {
   4877     return TokError(Err.str());
   4878   }
   4879   Lex();
   4880 
   4881   if (ID == "enable_wavefront_size32") {
   4882     if (Header.code_properties & AMD_CODE_PROPERTY_ENABLE_WAVEFRONT_SIZE32) {
   4883       if (!isGFX10Plus())
   4884         return TokError("enable_wavefront_size32=1 is only allowed on GFX10+");
   4885       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
   4886         return TokError("enable_wavefront_size32=1 requires +WavefrontSize32");
   4887     } else {
   4888       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
   4889         return TokError("enable_wavefront_size32=0 requires +WavefrontSize64");
   4890     }
   4891   }
   4892 
   4893   if (ID == "wavefront_size") {
   4894     if (Header.wavefront_size == 5) {
   4895       if (!isGFX10Plus())
   4896         return TokError("wavefront_size=5 is only allowed on GFX10+");
   4897       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize32])
   4898         return TokError("wavefront_size=5 requires +WavefrontSize32");
   4899     } else if (Header.wavefront_size == 6) {
   4900       if (!getFeatureBits()[AMDGPU::FeatureWavefrontSize64])
   4901         return TokError("wavefront_size=6 requires +WavefrontSize64");
   4902     }
   4903   }
   4904 
   4905   if (ID == "enable_wgp_mode") {
   4906     if (G_00B848_WGP_MODE(Header.compute_pgm_resource_registers) &&
   4907         !isGFX10Plus())
   4908       return TokError("enable_wgp_mode=1 is only allowed on GFX10+");
   4909   }
   4910 
   4911   if (ID == "enable_mem_ordered") {
   4912     if (G_00B848_MEM_ORDERED(Header.compute_pgm_resource_registers) &&
   4913         !isGFX10Plus())
   4914       return TokError("enable_mem_ordered=1 is only allowed on GFX10+");
   4915   }
   4916 
   4917   if (ID == "enable_fwd_progress") {
   4918     if (G_00B848_FWD_PROGRESS(Header.compute_pgm_resource_registers) &&
   4919         !isGFX10Plus())
   4920       return TokError("enable_fwd_progress=1 is only allowed on GFX10+");
   4921   }
   4922 
   4923   return false;
   4924 }
   4925 
   4926 bool AMDGPUAsmParser::ParseDirectiveAMDKernelCodeT() {
   4927   amd_kernel_code_t Header;
   4928   AMDGPU::initDefaultAMDKernelCodeT(Header, &getSTI());
   4929 
   4930   while (true) {
   4931     // Lex EndOfStatement.  This is in a while loop, because lexing a comment
   4932     // will set the current token to EndOfStatement.
   4933     while(trySkipToken(AsmToken::EndOfStatement));
   4934 
   4935     StringRef ID;
   4936     if (!parseId(ID, "expected value identifier or .end_amd_kernel_code_t"))
   4937       return true;
   4938 
   4939     if (ID == ".end_amd_kernel_code_t")
   4940       break;
   4941 
   4942     if (ParseAMDKernelCodeTValue(ID, Header))
   4943       return true;
   4944   }
   4945 
   4946   getTargetStreamer().EmitAMDKernelCodeT(Header);
   4947 
   4948   return false;
   4949 }
   4950 
   4951 bool AMDGPUAsmParser::ParseDirectiveAMDGPUHsaKernel() {
   4952   StringRef KernelName;
   4953   if (!parseId(KernelName, "expected symbol name"))
   4954     return true;
   4955 
   4956   getTargetStreamer().EmitAMDGPUSymbolType(KernelName,
   4957                                            ELF::STT_AMDGPU_HSA_KERNEL);
   4958 
   4959   KernelScope.initialize(getContext());
   4960   return false;
   4961 }
   4962 
   4963 bool AMDGPUAsmParser::ParseDirectiveISAVersion() {
   4964   if (getSTI().getTargetTriple().getArch() != Triple::amdgcn) {
   4965     return Error(getLoc(),
   4966                  ".amd_amdgpu_isa directive is not available on non-amdgcn "
   4967                  "architectures");
   4968   }
   4969 
   4970   auto TargetIDDirective = getLexer().getTok().getStringContents();
   4971   if (getTargetStreamer().getTargetID()->toString() != TargetIDDirective)
   4972     return Error(getParser().getTok().getLoc(), "target id must match options");
   4973 
   4974   getTargetStreamer().EmitISAVersion();
   4975   Lex();
   4976 
   4977   return false;
   4978 }
   4979 
   4980 bool AMDGPUAsmParser::ParseDirectiveHSAMetadata() {
   4981   const char *AssemblerDirectiveBegin;
   4982   const char *AssemblerDirectiveEnd;
   4983   std::tie(AssemblerDirectiveBegin, AssemblerDirectiveEnd) =
   4984       isHsaAbiVersion3Or4(&getSTI())
   4985           ? std::make_tuple(HSAMD::V3::AssemblerDirectiveBegin,
   4986                             HSAMD::V3::AssemblerDirectiveEnd)
   4987           : std::make_tuple(HSAMD::AssemblerDirectiveBegin,
   4988                             HSAMD::AssemblerDirectiveEnd);
   4989 
   4990   if (getSTI().getTargetTriple().getOS() != Triple::AMDHSA) {
   4991     return Error(getLoc(),
   4992                  (Twine(AssemblerDirectiveBegin) + Twine(" directive is "
   4993                  "not available on non-amdhsa OSes")).str());
   4994   }
   4995 
   4996   std::string HSAMetadataString;
   4997   if (ParseToEndDirective(AssemblerDirectiveBegin, AssemblerDirectiveEnd,
   4998                           HSAMetadataString))
   4999     return true;
   5000 
   5001   if (isHsaAbiVersion3Or4(&getSTI())) {
   5002     if (!getTargetStreamer().EmitHSAMetadataV3(HSAMetadataString))
   5003       return Error(getLoc(), "invalid HSA metadata");
   5004   } else {
   5005     if (!getTargetStreamer().EmitHSAMetadataV2(HSAMetadataString))
   5006       return Error(getLoc(), "invalid HSA metadata");
   5007   }
   5008 
   5009   return false;
   5010 }
   5011 
   5012 /// Common code to parse out a block of text (typically YAML) between start and
   5013 /// end directives.
   5014 bool AMDGPUAsmParser::ParseToEndDirective(const char *AssemblerDirectiveBegin,
   5015                                           const char *AssemblerDirectiveEnd,
   5016                                           std::string &CollectString) {
   5017 
   5018   raw_string_ostream CollectStream(CollectString);
   5019 
   5020   getLexer().setSkipSpace(false);
   5021 
   5022   bool FoundEnd = false;
   5023   while (!isToken(AsmToken::Eof)) {
   5024     while (isToken(AsmToken::Space)) {
   5025       CollectStream << getTokenStr();
   5026       Lex();
   5027     }
   5028 
   5029     if (trySkipId(AssemblerDirectiveEnd)) {
   5030       FoundEnd = true;
   5031       break;
   5032     }
   5033 
   5034     CollectStream << Parser.parseStringToEndOfStatement()
   5035                   << getContext().getAsmInfo()->getSeparatorString();
   5036 
   5037     Parser.eatToEndOfStatement();
   5038   }
   5039 
   5040   getLexer().setSkipSpace(true);
   5041 
   5042   if (isToken(AsmToken::Eof) && !FoundEnd) {
   5043     return TokError(Twine("expected directive ") +
   5044                     Twine(AssemblerDirectiveEnd) + Twine(" not found"));
   5045   }
   5046 
   5047   CollectStream.flush();
   5048   return false;
   5049 }
   5050 
   5051 /// Parse the assembler directive for new MsgPack-format PAL metadata.
   5052 bool AMDGPUAsmParser::ParseDirectivePALMetadataBegin() {
   5053   std::string String;
   5054   if (ParseToEndDirective(AMDGPU::PALMD::AssemblerDirectiveBegin,
   5055                           AMDGPU::PALMD::AssemblerDirectiveEnd, String))
   5056     return true;
   5057 
   5058   auto PALMetadata = getTargetStreamer().getPALMetadata();
   5059   if (!PALMetadata->setFromString(String))
   5060     return Error(getLoc(), "invalid PAL metadata");
   5061   return false;
   5062 }
   5063 
   5064 /// Parse the assembler directive for old linear-format PAL metadata.
   5065 bool AMDGPUAsmParser::ParseDirectivePALMetadata() {
   5066   if (getSTI().getTargetTriple().getOS() != Triple::AMDPAL) {
   5067     return Error(getLoc(),
   5068                  (Twine(PALMD::AssemblerDirective) + Twine(" directive is "
   5069                  "not available on non-amdpal OSes")).str());
   5070   }
   5071 
   5072   auto PALMetadata = getTargetStreamer().getPALMetadata();
   5073   PALMetadata->setLegacy();
   5074   for (;;) {
   5075     uint32_t Key, Value;
   5076     if (ParseAsAbsoluteExpression(Key)) {
   5077       return TokError(Twine("invalid value in ") +
   5078                       Twine(PALMD::AssemblerDirective));
   5079     }
   5080     if (!trySkipToken(AsmToken::Comma)) {
   5081       return TokError(Twine("expected an even number of values in ") +
   5082                       Twine(PALMD::AssemblerDirective));
   5083     }
   5084     if (ParseAsAbsoluteExpression(Value)) {
   5085       return TokError(Twine("invalid value in ") +
   5086                       Twine(PALMD::AssemblerDirective));
   5087     }
   5088     PALMetadata->setRegister(Key, Value);
   5089     if (!trySkipToken(AsmToken::Comma))
   5090       break;
   5091   }
   5092   return false;
   5093 }
   5094 
   5095 /// ParseDirectiveAMDGPULDS
   5096 ///  ::= .amdgpu_lds identifier ',' size_expression [',' align_expression]
   5097 bool AMDGPUAsmParser::ParseDirectiveAMDGPULDS() {
   5098   if (getParser().checkForValidSection())
   5099     return true;
   5100 
   5101   StringRef Name;
   5102   SMLoc NameLoc = getLoc();
   5103   if (getParser().parseIdentifier(Name))
   5104     return TokError("expected identifier in directive");
   5105 
   5106   MCSymbol *Symbol = getContext().getOrCreateSymbol(Name);
   5107   if (parseToken(AsmToken::Comma, "expected ','"))
   5108     return true;
   5109 
   5110   unsigned LocalMemorySize = AMDGPU::IsaInfo::getLocalMemorySize(&getSTI());
   5111 
   5112   int64_t Size;
   5113   SMLoc SizeLoc = getLoc();
   5114   if (getParser().parseAbsoluteExpression(Size))
   5115     return true;
   5116   if (Size < 0)
   5117     return Error(SizeLoc, "size must be non-negative");
   5118   if (Size > LocalMemorySize)
   5119     return Error(SizeLoc, "size is too large");
   5120 
   5121   int64_t Alignment = 4;
   5122   if (trySkipToken(AsmToken::Comma)) {
   5123     SMLoc AlignLoc = getLoc();
   5124     if (getParser().parseAbsoluteExpression(Alignment))
   5125       return true;
   5126     if (Alignment < 0 || !isPowerOf2_64(Alignment))
   5127       return Error(AlignLoc, "alignment must be a power of two");
   5128 
   5129     // Alignment larger than the size of LDS is possible in theory, as long
   5130     // as the linker manages to place to symbol at address 0, but we do want
   5131     // to make sure the alignment fits nicely into a 32-bit integer.
   5132     if (Alignment >= 1u << 31)
   5133       return Error(AlignLoc, "alignment is too large");
   5134   }
   5135 
   5136   if (parseToken(AsmToken::EndOfStatement,
   5137                  "unexpected token in '.amdgpu_lds' directive"))
   5138     return true;
   5139 
   5140   Symbol->redefineIfPossible();
   5141   if (!Symbol->isUndefined())
   5142     return Error(NameLoc, "invalid symbol redefinition");
   5143 
   5144   getTargetStreamer().emitAMDGPULDS(Symbol, Size, Align(Alignment));
   5145   return false;
   5146 }
   5147 
   5148 bool AMDGPUAsmParser::ParseDirective(AsmToken DirectiveID) {
   5149   StringRef IDVal = DirectiveID.getString();
   5150 
   5151   if (isHsaAbiVersion3Or4(&getSTI())) {
   5152     if (IDVal == ".amdhsa_kernel")
   5153      return ParseDirectiveAMDHSAKernel();
   5154 
   5155     // TODO: Restructure/combine with PAL metadata directive.
   5156     if (IDVal == AMDGPU::HSAMD::V3::AssemblerDirectiveBegin)
   5157       return ParseDirectiveHSAMetadata();
   5158   } else {
   5159     if (IDVal == ".hsa_code_object_version")
   5160       return ParseDirectiveHSACodeObjectVersion();
   5161 
   5162     if (IDVal == ".hsa_code_object_isa")
   5163       return ParseDirectiveHSACodeObjectISA();
   5164 
   5165     if (IDVal == ".amd_kernel_code_t")
   5166       return ParseDirectiveAMDKernelCodeT();
   5167 
   5168     if (IDVal == ".amdgpu_hsa_kernel")
   5169       return ParseDirectiveAMDGPUHsaKernel();
   5170 
   5171     if (IDVal == ".amd_amdgpu_isa")
   5172       return ParseDirectiveISAVersion();
   5173 
   5174     if (IDVal == AMDGPU::HSAMD::AssemblerDirectiveBegin)
   5175       return ParseDirectiveHSAMetadata();
   5176   }
   5177 
   5178   if (IDVal == ".amdgcn_target")
   5179     return ParseDirectiveAMDGCNTarget();
   5180 
   5181   if (IDVal == ".amdgpu_lds")
   5182     return ParseDirectiveAMDGPULDS();
   5183 
   5184   if (IDVal == PALMD::AssemblerDirectiveBegin)
   5185     return ParseDirectivePALMetadataBegin();
   5186 
   5187   if (IDVal == PALMD::AssemblerDirective)
   5188     return ParseDirectivePALMetadata();
   5189 
   5190   return true;
   5191 }
   5192 
   5193 bool AMDGPUAsmParser::subtargetHasRegister(const MCRegisterInfo &MRI,
   5194                                            unsigned RegNo) {
   5195 
   5196   for (MCRegAliasIterator R(AMDGPU::TTMP12_TTMP13_TTMP14_TTMP15, &MRI, true);
   5197        R.isValid(); ++R) {
   5198     if (*R == RegNo)
   5199       return isGFX9Plus();
   5200   }
   5201 
   5202   // GFX10 has 2 more SGPRs 104 and 105.
   5203   for (MCRegAliasIterator R(AMDGPU::SGPR104_SGPR105, &MRI, true);
   5204        R.isValid(); ++R) {
   5205     if (*R == RegNo)
   5206       return hasSGPR104_SGPR105();
   5207   }
   5208 
   5209   switch (RegNo) {
   5210   case AMDGPU::SRC_SHARED_BASE:
   5211   case AMDGPU::SRC_SHARED_LIMIT:
   5212   case AMDGPU::SRC_PRIVATE_BASE:
   5213   case AMDGPU::SRC_PRIVATE_LIMIT:
   5214   case AMDGPU::SRC_POPS_EXITING_WAVE_ID:
   5215     return isGFX9Plus();
   5216   case AMDGPU::TBA:
   5217   case AMDGPU::TBA_LO:
   5218   case AMDGPU::TBA_HI:
   5219   case AMDGPU::TMA:
   5220   case AMDGPU::TMA_LO:
   5221   case AMDGPU::TMA_HI:
   5222     return !isGFX9Plus();
   5223   case AMDGPU::XNACK_MASK:
   5224   case AMDGPU::XNACK_MASK_LO:
   5225   case AMDGPU::XNACK_MASK_HI:
   5226     return (isVI() || isGFX9()) && getTargetStreamer().getTargetID()->isXnackSupported();
   5227   case AMDGPU::SGPR_NULL:
   5228     return isGFX10Plus();
   5229   default:
   5230     break;
   5231   }
   5232 
   5233   if (isCI())
   5234     return true;
   5235 
   5236   if (isSI() || isGFX10Plus()) {
   5237     // No flat_scr on SI.
   5238     // On GFX10 flat scratch is not a valid register operand and can only be
   5239     // accessed with s_setreg/s_getreg.
   5240     switch (RegNo) {
   5241     case AMDGPU::FLAT_SCR:
   5242     case AMDGPU::FLAT_SCR_LO:
   5243     case AMDGPU::FLAT_SCR_HI:
   5244       return false;
   5245     default:
   5246       return true;
   5247     }
   5248   }
   5249 
   5250   // VI only has 102 SGPRs, so make sure we aren't trying to use the 2 more that
   5251   // SI/CI have.
   5252   for (MCRegAliasIterator R(AMDGPU::SGPR102_SGPR103, &MRI, true);
   5253        R.isValid(); ++R) {
   5254     if (*R == RegNo)
   5255       return hasSGPR102_SGPR103();
   5256   }
   5257 
   5258   return true;
   5259 }
   5260 
   5261 OperandMatchResultTy
   5262 AMDGPUAsmParser::parseOperand(OperandVector &Operands, StringRef Mnemonic,
   5263                               OperandMode Mode) {
   5264   // Try to parse with a custom parser
   5265   OperandMatchResultTy ResTy = MatchOperandParserImpl(Operands, Mnemonic);
   5266 
   5267   // If we successfully parsed the operand or if there as an error parsing,
   5268   // we are done.
   5269   //
   5270   // If we are parsing after we reach EndOfStatement then this means we
   5271   // are appending default values to the Operands list.  This is only done
   5272   // by custom parser, so we shouldn't continue on to the generic parsing.
   5273   if (ResTy == MatchOperand_Success || ResTy == MatchOperand_ParseFail ||
   5274       isToken(AsmToken::EndOfStatement))
   5275     return ResTy;
   5276 
   5277   SMLoc RBraceLoc;
   5278   SMLoc LBraceLoc = getLoc();
   5279   if (Mode == OperandMode_NSA && trySkipToken(AsmToken::LBrac)) {
   5280     unsigned Prefix = Operands.size();
   5281 
   5282     for (;;) {
   5283       auto Loc = getLoc();
   5284       ResTy = parseReg(Operands);
   5285       if (ResTy == MatchOperand_NoMatch)
   5286         Error(Loc, "expected a register");
   5287       if (ResTy != MatchOperand_Success)
   5288         return MatchOperand_ParseFail;
   5289 
   5290       RBraceLoc = getLoc();
   5291       if (trySkipToken(AsmToken::RBrac))
   5292         break;
   5293 
   5294       if (!skipToken(AsmToken::Comma,
   5295                      "expected a comma or a closing square bracket")) {
   5296         return MatchOperand_ParseFail;
   5297       }
   5298     }
   5299 
   5300     if (Operands.size() - Prefix > 1) {
   5301       Operands.insert(Operands.begin() + Prefix,
   5302                       AMDGPUOperand::CreateToken(this, "[", LBraceLoc));
   5303       Operands.push_back(AMDGPUOperand::CreateToken(this, "]", RBraceLoc));
   5304     }
   5305 
   5306     return MatchOperand_Success;
   5307   }
   5308 
   5309   return parseRegOrImm(Operands);
   5310 }
   5311 
   5312 StringRef AMDGPUAsmParser::parseMnemonicSuffix(StringRef Name) {
   5313   // Clear any forced encodings from the previous instruction.
   5314   setForcedEncodingSize(0);
   5315   setForcedDPP(false);
   5316   setForcedSDWA(false);
   5317 
   5318   if (Name.endswith("_e64")) {
   5319     setForcedEncodingSize(64);
   5320     return Name.substr(0, Name.size() - 4);
   5321   } else if (Name.endswith("_e32")) {
   5322     setForcedEncodingSize(32);
   5323     return Name.substr(0, Name.size() - 4);
   5324   } else if (Name.endswith("_dpp")) {
   5325     setForcedDPP(true);
   5326     return Name.substr(0, Name.size() - 4);
   5327   } else if (Name.endswith("_sdwa")) {
   5328     setForcedSDWA(true);
   5329     return Name.substr(0, Name.size() - 5);
   5330   }
   5331   return Name;
   5332 }
   5333 
   5334 bool AMDGPUAsmParser::ParseInstruction(ParseInstructionInfo &Info,
   5335                                        StringRef Name,
   5336                                        SMLoc NameLoc, OperandVector &Operands) {
   5337   // Add the instruction mnemonic
   5338   Name = parseMnemonicSuffix(Name);
   5339   Operands.push_back(AMDGPUOperand::CreateToken(this, Name, NameLoc));
   5340 
   5341   bool IsMIMG = Name.startswith("image_");
   5342 
   5343   while (!trySkipToken(AsmToken::EndOfStatement)) {
   5344     OperandMode Mode = OperandMode_Default;
   5345     if (IsMIMG && isGFX10Plus() && Operands.size() == 2)
   5346       Mode = OperandMode_NSA;
   5347     CPolSeen = 0;
   5348     OperandMatchResultTy Res = parseOperand(Operands, Name, Mode);
   5349 
   5350     if (Res != MatchOperand_Success) {
   5351       checkUnsupportedInstruction(Name, NameLoc);
   5352       if (!Parser.hasPendingError()) {
   5353         // FIXME: use real operand location rather than the current location.
   5354         StringRef Msg =
   5355           (Res == MatchOperand_ParseFail) ? "failed parsing operand." :
   5356                                             "not a valid operand.";
   5357         Error(getLoc(), Msg);
   5358       }
   5359       while (!trySkipToken(AsmToken::EndOfStatement)) {
   5360         lex();
   5361       }
   5362       return true;
   5363     }
   5364 
   5365     // Eat the comma or space if there is one.
   5366     trySkipToken(AsmToken::Comma);
   5367   }
   5368 
   5369   return false;
   5370 }
   5371 
   5372 //===----------------------------------------------------------------------===//
   5373 // Utility functions
   5374 //===----------------------------------------------------------------------===//
   5375 
   5376 OperandMatchResultTy
   5377 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, int64_t &IntVal) {
   5378 
   5379   if (!trySkipId(Prefix, AsmToken::Colon))
   5380     return MatchOperand_NoMatch;
   5381 
   5382   return parseExpr(IntVal) ? MatchOperand_Success : MatchOperand_ParseFail;
   5383 }
   5384 
   5385 OperandMatchResultTy
   5386 AMDGPUAsmParser::parseIntWithPrefix(const char *Prefix, OperandVector &Operands,
   5387                                     AMDGPUOperand::ImmTy ImmTy,
   5388                                     bool (*ConvertResult)(int64_t&)) {
   5389   SMLoc S = getLoc();
   5390   int64_t Value = 0;
   5391 
   5392   OperandMatchResultTy Res = parseIntWithPrefix(Prefix, Value);
   5393   if (Res != MatchOperand_Success)
   5394     return Res;
   5395 
   5396   if (ConvertResult && !ConvertResult(Value)) {
   5397     Error(S, "invalid " + StringRef(Prefix) + " value.");
   5398   }
   5399 
   5400   Operands.push_back(AMDGPUOperand::CreateImm(this, Value, S, ImmTy));
   5401   return MatchOperand_Success;
   5402 }
   5403 
   5404 OperandMatchResultTy
   5405 AMDGPUAsmParser::parseOperandArrayWithPrefix(const char *Prefix,
   5406                                              OperandVector &Operands,
   5407                                              AMDGPUOperand::ImmTy ImmTy,
   5408                                              bool (*ConvertResult)(int64_t&)) {
   5409   SMLoc S = getLoc();
   5410   if (!trySkipId(Prefix, AsmToken::Colon))
   5411     return MatchOperand_NoMatch;
   5412 
   5413   if (!skipToken(AsmToken::LBrac, "expected a left square bracket"))
   5414     return MatchOperand_ParseFail;
   5415 
   5416   unsigned Val = 0;
   5417   const unsigned MaxSize = 4;
   5418 
   5419   // FIXME: How to verify the number of elements matches the number of src
   5420   // operands?
   5421   for (int I = 0; ; ++I) {
   5422     int64_t Op;
   5423     SMLoc Loc = getLoc();
   5424     if (!parseExpr(Op))
   5425       return MatchOperand_ParseFail;
   5426 
   5427     if (Op != 0 && Op != 1) {
   5428       Error(Loc, "invalid " + StringRef(Prefix) + " value.");
   5429       return MatchOperand_ParseFail;
   5430     }
   5431 
   5432     Val |= (Op << I);
   5433 
   5434     if (trySkipToken(AsmToken::RBrac))
   5435       break;
   5436 
   5437     if (I + 1 == MaxSize) {
   5438       Error(getLoc(), "expected a closing square bracket");
   5439       return MatchOperand_ParseFail;
   5440     }
   5441 
   5442     if (!skipToken(AsmToken::Comma, "expected a comma"))
   5443       return MatchOperand_ParseFail;
   5444   }
   5445 
   5446   Operands.push_back(AMDGPUOperand::CreateImm(this, Val, S, ImmTy));
   5447   return MatchOperand_Success;
   5448 }
   5449 
   5450 OperandMatchResultTy
   5451 AMDGPUAsmParser::parseNamedBit(StringRef Name, OperandVector &Operands,
   5452                                AMDGPUOperand::ImmTy ImmTy) {
   5453   int64_t Bit;
   5454   SMLoc S = getLoc();
   5455 
   5456   if (trySkipId(Name)) {
   5457     Bit = 1;
   5458   } else if (trySkipId("no", Name)) {
   5459     Bit = 0;
   5460   } else {
   5461     return MatchOperand_NoMatch;
   5462   }
   5463 
   5464   if (Name == "r128" && !hasMIMG_R128()) {
   5465     Error(S, "r128 modifier is not supported on this GPU");
   5466     return MatchOperand_ParseFail;
   5467   }
   5468   if (Name == "a16" && !isGFX9() && !hasGFX10A16()) {
   5469     Error(S, "a16 modifier is not supported on this GPU");
   5470     return MatchOperand_ParseFail;
   5471   }
   5472 
   5473   if (isGFX9() && ImmTy == AMDGPUOperand::ImmTyA16)
   5474     ImmTy = AMDGPUOperand::ImmTyR128A16;
   5475 
   5476   Operands.push_back(AMDGPUOperand::CreateImm(this, Bit, S, ImmTy));
   5477   return MatchOperand_Success;
   5478 }
   5479 
   5480 OperandMatchResultTy
   5481 AMDGPUAsmParser::parseCPol(OperandVector &Operands) {
   5482   unsigned CPolOn = 0;
   5483   unsigned CPolOff = 0;
   5484   SMLoc S = getLoc();
   5485 
   5486   if (trySkipId("glc"))
   5487     CPolOn = AMDGPU::CPol::GLC;
   5488   else if (trySkipId("noglc"))
   5489     CPolOff = AMDGPU::CPol::GLC;
   5490   else if (trySkipId("slc"))
   5491     CPolOn = AMDGPU::CPol::SLC;
   5492   else if (trySkipId("noslc"))
   5493     CPolOff = AMDGPU::CPol::SLC;
   5494   else if (trySkipId("dlc"))
   5495     CPolOn = AMDGPU::CPol::DLC;
   5496   else if (trySkipId("nodlc"))
   5497     CPolOff = AMDGPU::CPol::DLC;
   5498   else if (trySkipId("scc"))
   5499     CPolOn = AMDGPU::CPol::SCC;
   5500   else if (trySkipId("noscc"))
   5501     CPolOff = AMDGPU::CPol::SCC;
   5502   else
   5503     return MatchOperand_NoMatch;
   5504 
   5505   if (!isGFX10Plus() && ((CPolOn | CPolOff) & AMDGPU::CPol::DLC)) {
   5506     Error(S, "dlc modifier is not supported on this GPU");
   5507     return MatchOperand_ParseFail;
   5508   }
   5509 
   5510   if (!isGFX90A() && ((CPolOn | CPolOff) & AMDGPU::CPol::SCC)) {
   5511     Error(S, "scc modifier is not supported on this GPU");
   5512     return MatchOperand_ParseFail;
   5513   }
   5514 
   5515   if (CPolSeen & (CPolOn | CPolOff)) {
   5516     Error(S, "duplicate cache policy modifier");
   5517     return MatchOperand_ParseFail;
   5518   }
   5519 
   5520   CPolSeen |= (CPolOn | CPolOff);
   5521 
   5522   for (unsigned I = 1; I != Operands.size(); ++I) {
   5523     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
   5524     if (Op.isCPol()) {
   5525       Op.setImm((Op.getImm() | CPolOn) & ~CPolOff);
   5526       return MatchOperand_Success;
   5527     }
   5528   }
   5529 
   5530   Operands.push_back(AMDGPUOperand::CreateImm(this, CPolOn, S,
   5531                                               AMDGPUOperand::ImmTyCPol));
   5532 
   5533   return MatchOperand_Success;
   5534 }
   5535 
   5536 static void addOptionalImmOperand(
   5537   MCInst& Inst, const OperandVector& Operands,
   5538   AMDGPUAsmParser::OptionalImmIndexMap& OptionalIdx,
   5539   AMDGPUOperand::ImmTy ImmT,
   5540   int64_t Default = 0) {
   5541   auto i = OptionalIdx.find(ImmT);
   5542   if (i != OptionalIdx.end()) {
   5543     unsigned Idx = i->second;
   5544     ((AMDGPUOperand &)*Operands[Idx]).addImmOperands(Inst, 1);
   5545   } else {
   5546     Inst.addOperand(MCOperand::createImm(Default));
   5547   }
   5548 }
   5549 
   5550 OperandMatchResultTy
   5551 AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix,
   5552                                        StringRef &Value,
   5553                                        SMLoc &StringLoc) {
   5554   if (!trySkipId(Prefix, AsmToken::Colon))
   5555     return MatchOperand_NoMatch;
   5556 
   5557   StringLoc = getLoc();
   5558   return parseId(Value, "expected an identifier") ? MatchOperand_Success
   5559                                                   : MatchOperand_ParseFail;
   5560 }
   5561 
   5562 //===----------------------------------------------------------------------===//
   5563 // MTBUF format
   5564 //===----------------------------------------------------------------------===//
   5565 
   5566 bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
   5567                                   int64_t MaxVal,
   5568                                   int64_t &Fmt) {
   5569   int64_t Val;
   5570   SMLoc Loc = getLoc();
   5571 
   5572   auto Res = parseIntWithPrefix(Pref, Val);
   5573   if (Res == MatchOperand_ParseFail)
   5574     return false;
   5575   if (Res == MatchOperand_NoMatch)
   5576     return true;
   5577 
   5578   if (Val < 0 || Val > MaxVal) {
   5579     Error(Loc, Twine("out of range ", StringRef(Pref)));
   5580     return false;
   5581   }
   5582 
   5583   Fmt = Val;
   5584   return true;
   5585 }
   5586 
   5587 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
   5588 // values to live in a joint format operand in the MCInst encoding.
   5589 OperandMatchResultTy
   5590 AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
   5591   using namespace llvm::AMDGPU::MTBUFFormat;
   5592 
   5593   int64_t Dfmt = DFMT_UNDEF;
   5594   int64_t Nfmt = NFMT_UNDEF;
   5595 
   5596   // dfmt and nfmt can appear in either order, and each is optional.
   5597   for (int I = 0; I < 2; ++I) {
   5598     if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
   5599       return MatchOperand_ParseFail;
   5600 
   5601     if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
   5602       return MatchOperand_ParseFail;
   5603     }
   5604     // Skip optional comma between dfmt/nfmt
   5605     // but guard against 2 commas following each other.
   5606     if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
   5607         !peekToken().is(AsmToken::Comma)) {
   5608       trySkipToken(AsmToken::Comma);
   5609     }
   5610   }
   5611 
   5612   if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
   5613     return MatchOperand_NoMatch;
   5614 
   5615   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
   5616   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
   5617 
   5618   Format = encodeDfmtNfmt(Dfmt, Nfmt);
   5619   return MatchOperand_Success;
   5620 }
   5621 
   5622 OperandMatchResultTy
   5623 AMDGPUAsmParser::parseUfmt(int64_t &Format) {
   5624   using namespace llvm::AMDGPU::MTBUFFormat;
   5625 
   5626   int64_t Fmt = UFMT_UNDEF;
   5627 
   5628   if (!tryParseFmt("format", UFMT_MAX, Fmt))
   5629     return MatchOperand_ParseFail;
   5630 
   5631   if (Fmt == UFMT_UNDEF)
   5632     return MatchOperand_NoMatch;
   5633 
   5634   Format = Fmt;
   5635   return MatchOperand_Success;
   5636 }
   5637 
   5638 bool AMDGPUAsmParser::matchDfmtNfmt(int64_t &Dfmt,
   5639                                     int64_t &Nfmt,
   5640                                     StringRef FormatStr,
   5641                                     SMLoc Loc) {
   5642   using namespace llvm::AMDGPU::MTBUFFormat;
   5643   int64_t Format;
   5644 
   5645   Format = getDfmt(FormatStr);
   5646   if (Format != DFMT_UNDEF) {
   5647     Dfmt = Format;
   5648     return true;
   5649   }
   5650 
   5651   Format = getNfmt(FormatStr, getSTI());
   5652   if (Format != NFMT_UNDEF) {
   5653     Nfmt = Format;
   5654     return true;
   5655   }
   5656 
   5657   Error(Loc, "unsupported format");
   5658   return false;
   5659 }
   5660 
   5661 OperandMatchResultTy
   5662 AMDGPUAsmParser::parseSymbolicSplitFormat(StringRef FormatStr,
   5663                                           SMLoc FormatLoc,
   5664                                           int64_t &Format) {
   5665   using namespace llvm::AMDGPU::MTBUFFormat;
   5666 
   5667   int64_t Dfmt = DFMT_UNDEF;
   5668   int64_t Nfmt = NFMT_UNDEF;
   5669   if (!matchDfmtNfmt(Dfmt, Nfmt, FormatStr, FormatLoc))
   5670     return MatchOperand_ParseFail;
   5671 
   5672   if (trySkipToken(AsmToken::Comma)) {
   5673     StringRef Str;
   5674     SMLoc Loc = getLoc();
   5675     if (!parseId(Str, "expected a format string") ||
   5676         !matchDfmtNfmt(Dfmt, Nfmt, Str, Loc)) {
   5677       return MatchOperand_ParseFail;
   5678     }
   5679     if (Dfmt == DFMT_UNDEF) {
   5680       Error(Loc, "duplicate numeric format");
   5681       return MatchOperand_ParseFail;
   5682     } else if (Nfmt == NFMT_UNDEF) {
   5683       Error(Loc, "duplicate data format");
   5684       return MatchOperand_ParseFail;
   5685     }
   5686   }
   5687 
   5688   Dfmt = (Dfmt == DFMT_UNDEF) ? DFMT_DEFAULT : Dfmt;
   5689   Nfmt = (Nfmt == NFMT_UNDEF) ? NFMT_DEFAULT : Nfmt;
   5690 
   5691   if (isGFX10Plus()) {
   5692     auto Ufmt = convertDfmtNfmt2Ufmt(Dfmt, Nfmt);
   5693     if (Ufmt == UFMT_UNDEF) {
   5694       Error(FormatLoc, "unsupported format");
   5695       return MatchOperand_ParseFail;
   5696     }
   5697     Format = Ufmt;
   5698   } else {
   5699     Format = encodeDfmtNfmt(Dfmt, Nfmt);
   5700   }
   5701 
   5702   return MatchOperand_Success;
   5703 }
   5704 
   5705 OperandMatchResultTy
   5706 AMDGPUAsmParser::parseSymbolicUnifiedFormat(StringRef FormatStr,
   5707                                             SMLoc Loc,
   5708                                             int64_t &Format) {
   5709   using namespace llvm::AMDGPU::MTBUFFormat;
   5710 
   5711   auto Id = getUnifiedFormat(FormatStr);
   5712   if (Id == UFMT_UNDEF)
   5713     return MatchOperand_NoMatch;
   5714 
   5715   if (!isGFX10Plus()) {
   5716     Error(Loc, "unified format is not supported on this GPU");
   5717     return MatchOperand_ParseFail;
   5718   }
   5719 
   5720   Format = Id;
   5721   return MatchOperand_Success;
   5722 }
   5723 
   5724 OperandMatchResultTy
   5725 AMDGPUAsmParser::parseNumericFormat(int64_t &Format) {
   5726   using namespace llvm::AMDGPU::MTBUFFormat;
   5727   SMLoc Loc = getLoc();
   5728 
   5729   if (!parseExpr(Format))
   5730     return MatchOperand_ParseFail;
   5731   if (!isValidFormatEncoding(Format, getSTI())) {
   5732     Error(Loc, "out of range format");
   5733     return MatchOperand_ParseFail;
   5734   }
   5735 
   5736   return MatchOperand_Success;
   5737 }
   5738 
   5739 OperandMatchResultTy
   5740 AMDGPUAsmParser::parseSymbolicOrNumericFormat(int64_t &Format) {
   5741   using namespace llvm::AMDGPU::MTBUFFormat;
   5742 
   5743   if (!trySkipId("format", AsmToken::Colon))
   5744     return MatchOperand_NoMatch;
   5745 
   5746   if (trySkipToken(AsmToken::LBrac)) {
   5747     StringRef FormatStr;
   5748     SMLoc Loc = getLoc();
   5749     if (!parseId(FormatStr, "expected a format string"))
   5750       return MatchOperand_ParseFail;
   5751 
   5752     auto Res = parseSymbolicUnifiedFormat(FormatStr, Loc, Format);
   5753     if (Res == MatchOperand_NoMatch)
   5754       Res = parseSymbolicSplitFormat(FormatStr, Loc, Format);
   5755     if (Res != MatchOperand_Success)
   5756       return Res;
   5757 
   5758     if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
   5759       return MatchOperand_ParseFail;
   5760 
   5761     return MatchOperand_Success;
   5762   }
   5763 
   5764   return parseNumericFormat(Format);
   5765 }
   5766 
   5767 OperandMatchResultTy
   5768 AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
   5769   using namespace llvm::AMDGPU::MTBUFFormat;
   5770 
   5771   int64_t Format = getDefaultFormatEncoding(getSTI());
   5772   OperandMatchResultTy Res;
   5773   SMLoc Loc = getLoc();
   5774 
   5775   // Parse legacy format syntax.
   5776   Res = isGFX10Plus() ? parseUfmt(Format) : parseDfmtNfmt(Format);
   5777   if (Res == MatchOperand_ParseFail)
   5778     return Res;
   5779 
   5780   bool FormatFound = (Res == MatchOperand_Success);
   5781 
   5782   Operands.push_back(
   5783     AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
   5784 
   5785   if (FormatFound)
   5786     trySkipToken(AsmToken::Comma);
   5787 
   5788   if (isToken(AsmToken::EndOfStatement)) {
   5789     // We are expecting an soffset operand,
   5790     // but let matcher handle the error.
   5791     return MatchOperand_Success;
   5792   }
   5793 
   5794   // Parse soffset.
   5795   Res = parseRegOrImm(Operands);
   5796   if (Res != MatchOperand_Success)
   5797     return Res;
   5798 
   5799   trySkipToken(AsmToken::Comma);
   5800 
   5801   if (!FormatFound) {
   5802     Res = parseSymbolicOrNumericFormat(Format);
   5803     if (Res == MatchOperand_ParseFail)
   5804       return Res;
   5805     if (Res == MatchOperand_Success) {
   5806       auto Size = Operands.size();
   5807       AMDGPUOperand &Op = static_cast<AMDGPUOperand &>(*Operands[Size - 2]);
   5808       assert(Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyFORMAT);
   5809       Op.setImm(Format);
   5810     }
   5811     return MatchOperand_Success;
   5812   }
   5813 
   5814   if (isId("format") && peekToken().is(AsmToken::Colon)) {
   5815     Error(getLoc(), "duplicate format");
   5816     return MatchOperand_ParseFail;
   5817   }
   5818   return MatchOperand_Success;
   5819 }
   5820 
   5821 //===----------------------------------------------------------------------===//
   5822 // ds
   5823 //===----------------------------------------------------------------------===//
   5824 
   5825 void AMDGPUAsmParser::cvtDSOffset01(MCInst &Inst,
   5826                                     const OperandVector &Operands) {
   5827   OptionalImmIndexMap OptionalIdx;
   5828 
   5829   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
   5830     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   5831 
   5832     // Add the register arguments
   5833     if (Op.isReg()) {
   5834       Op.addRegOperands(Inst, 1);
   5835       continue;
   5836     }
   5837 
   5838     // Handle optional arguments
   5839     OptionalIdx[Op.getImmTy()] = i;
   5840   }
   5841 
   5842   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset0);
   5843   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset1);
   5844   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
   5845 
   5846   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
   5847 }
   5848 
   5849 void AMDGPUAsmParser::cvtDSImpl(MCInst &Inst, const OperandVector &Operands,
   5850                                 bool IsGdsHardcoded) {
   5851   OptionalImmIndexMap OptionalIdx;
   5852 
   5853   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
   5854     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   5855 
   5856     // Add the register arguments
   5857     if (Op.isReg()) {
   5858       Op.addRegOperands(Inst, 1);
   5859       continue;
   5860     }
   5861 
   5862     if (Op.isToken() && Op.getToken() == "gds") {
   5863       IsGdsHardcoded = true;
   5864       continue;
   5865     }
   5866 
   5867     // Handle optional arguments
   5868     OptionalIdx[Op.getImmTy()] = i;
   5869   }
   5870 
   5871   AMDGPUOperand::ImmTy OffsetType =
   5872     (Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx10 ||
   5873      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_gfx6_gfx7 ||
   5874      Inst.getOpcode() == AMDGPU::DS_SWIZZLE_B32_vi) ? AMDGPUOperand::ImmTySwizzle :
   5875                                                       AMDGPUOperand::ImmTyOffset;
   5876 
   5877   addOptionalImmOperand(Inst, Operands, OptionalIdx, OffsetType);
   5878 
   5879   if (!IsGdsHardcoded) {
   5880     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyGDS);
   5881   }
   5882   Inst.addOperand(MCOperand::createReg(AMDGPU::M0)); // m0
   5883 }
   5884 
   5885 void AMDGPUAsmParser::cvtExp(MCInst &Inst, const OperandVector &Operands) {
   5886   OptionalImmIndexMap OptionalIdx;
   5887 
   5888   unsigned OperandIdx[4];
   5889   unsigned EnMask = 0;
   5890   int SrcIdx = 0;
   5891 
   5892   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
   5893     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   5894 
   5895     // Add the register arguments
   5896     if (Op.isReg()) {
   5897       assert(SrcIdx < 4);
   5898       OperandIdx[SrcIdx] = Inst.size();
   5899       Op.addRegOperands(Inst, 1);
   5900       ++SrcIdx;
   5901       continue;
   5902     }
   5903 
   5904     if (Op.isOff()) {
   5905       assert(SrcIdx < 4);
   5906       OperandIdx[SrcIdx] = Inst.size();
   5907       Inst.addOperand(MCOperand::createReg(AMDGPU::NoRegister));
   5908       ++SrcIdx;
   5909       continue;
   5910     }
   5911 
   5912     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyExpTgt) {
   5913       Op.addImmOperands(Inst, 1);
   5914       continue;
   5915     }
   5916 
   5917     if (Op.isToken() && Op.getToken() == "done")
   5918       continue;
   5919 
   5920     // Handle optional arguments
   5921     OptionalIdx[Op.getImmTy()] = i;
   5922   }
   5923 
   5924   assert(SrcIdx == 4);
   5925 
   5926   bool Compr = false;
   5927   if (OptionalIdx.find(AMDGPUOperand::ImmTyExpCompr) != OptionalIdx.end()) {
   5928     Compr = true;
   5929     Inst.getOperand(OperandIdx[1]) = Inst.getOperand(OperandIdx[2]);
   5930     Inst.getOperand(OperandIdx[2]).setReg(AMDGPU::NoRegister);
   5931     Inst.getOperand(OperandIdx[3]).setReg(AMDGPU::NoRegister);
   5932   }
   5933 
   5934   for (auto i = 0; i < SrcIdx; ++i) {
   5935     if (Inst.getOperand(OperandIdx[i]).getReg() != AMDGPU::NoRegister) {
   5936       EnMask |= Compr? (0x3 << i * 2) : (0x1 << i);
   5937     }
   5938   }
   5939 
   5940   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpVM);
   5941   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyExpCompr);
   5942 
   5943   Inst.addOperand(MCOperand::createImm(EnMask));
   5944 }
   5945 
   5946 //===----------------------------------------------------------------------===//
   5947 // s_waitcnt
   5948 //===----------------------------------------------------------------------===//
   5949 
   5950 static bool
   5951 encodeCnt(
   5952   const AMDGPU::IsaVersion ISA,
   5953   int64_t &IntVal,
   5954   int64_t CntVal,
   5955   bool Saturate,
   5956   unsigned (*encode)(const IsaVersion &Version, unsigned, unsigned),
   5957   unsigned (*decode)(const IsaVersion &Version, unsigned))
   5958 {
   5959   bool Failed = false;
   5960 
   5961   IntVal = encode(ISA, IntVal, CntVal);
   5962   if (CntVal != decode(ISA, IntVal)) {
   5963     if (Saturate) {
   5964       IntVal = encode(ISA, IntVal, -1);
   5965     } else {
   5966       Failed = true;
   5967     }
   5968   }
   5969   return Failed;
   5970 }
   5971 
   5972 bool AMDGPUAsmParser::parseCnt(int64_t &IntVal) {
   5973 
   5974   SMLoc CntLoc = getLoc();
   5975   StringRef CntName = getTokenStr();
   5976 
   5977   if (!skipToken(AsmToken::Identifier, "expected a counter name") ||
   5978       !skipToken(AsmToken::LParen, "expected a left parenthesis"))
   5979     return false;
   5980 
   5981   int64_t CntVal;
   5982   SMLoc ValLoc = getLoc();
   5983   if (!parseExpr(CntVal))
   5984     return false;
   5985 
   5986   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
   5987 
   5988   bool Failed = true;
   5989   bool Sat = CntName.endswith("_sat");
   5990 
   5991   if (CntName == "vmcnt" || CntName == "vmcnt_sat") {
   5992     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeVmcnt, decodeVmcnt);
   5993   } else if (CntName == "expcnt" || CntName == "expcnt_sat") {
   5994     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeExpcnt, decodeExpcnt);
   5995   } else if (CntName == "lgkmcnt" || CntName == "lgkmcnt_sat") {
   5996     Failed = encodeCnt(ISA, IntVal, CntVal, Sat, encodeLgkmcnt, decodeLgkmcnt);
   5997   } else {
   5998     Error(CntLoc, "invalid counter name " + CntName);
   5999     return false;
   6000   }
   6001 
   6002   if (Failed) {
   6003     Error(ValLoc, "too large value for " + CntName);
   6004     return false;
   6005   }
   6006 
   6007   if (!skipToken(AsmToken::RParen, "expected a closing parenthesis"))
   6008     return false;
   6009 
   6010   if (trySkipToken(AsmToken::Amp) || trySkipToken(AsmToken::Comma)) {
   6011     if (isToken(AsmToken::EndOfStatement)) {
   6012       Error(getLoc(), "expected a counter name");
   6013       return false;
   6014     }
   6015   }
   6016 
   6017   return true;
   6018 }
   6019 
   6020 OperandMatchResultTy
   6021 AMDGPUAsmParser::parseSWaitCntOps(OperandVector &Operands) {
   6022   AMDGPU::IsaVersion ISA = AMDGPU::getIsaVersion(getSTI().getCPU());
   6023   int64_t Waitcnt = getWaitcntBitMask(ISA);
   6024   SMLoc S = getLoc();
   6025 
   6026   if (isToken(AsmToken::Identifier) && peekToken().is(AsmToken::LParen)) {
   6027     while (!isToken(AsmToken::EndOfStatement)) {
   6028       if (!parseCnt(Waitcnt))
   6029         return MatchOperand_ParseFail;
   6030     }
   6031   } else {
   6032     if (!parseExpr(Waitcnt))
   6033       return MatchOperand_ParseFail;
   6034   }
   6035 
   6036   Operands.push_back(AMDGPUOperand::CreateImm(this, Waitcnt, S));
   6037   return MatchOperand_Success;
   6038 }
   6039 
   6040 bool
   6041 AMDGPUOperand::isSWaitCnt() const {
   6042   return isImm();
   6043 }
   6044 
   6045 //===----------------------------------------------------------------------===//
   6046 // hwreg
   6047 //===----------------------------------------------------------------------===//
   6048 
   6049 bool
   6050 AMDGPUAsmParser::parseHwregBody(OperandInfoTy &HwReg,
   6051                                 OperandInfoTy &Offset,
   6052                                 OperandInfoTy &Width) {
   6053   using namespace llvm::AMDGPU::Hwreg;
   6054 
   6055   // The register may be specified by name or using a numeric code
   6056   HwReg.Loc = getLoc();
   6057   if (isToken(AsmToken::Identifier) &&
   6058       (HwReg.Id = getHwregId(getTokenStr())) >= 0) {
   6059     HwReg.IsSymbolic = true;
   6060     lex(); // skip register name
   6061   } else if (!parseExpr(HwReg.Id, "a register name")) {
   6062     return false;
   6063   }
   6064 
   6065   if (trySkipToken(AsmToken::RParen))
   6066     return true;
   6067 
   6068   // parse optional params
   6069   if (!skipToken(AsmToken::Comma, "expected a comma or a closing parenthesis"))
   6070     return false;
   6071 
   6072   Offset.Loc = getLoc();
   6073   if (!parseExpr(Offset.Id))
   6074     return false;
   6075 
   6076   if (!skipToken(AsmToken::Comma, "expected a comma"))
   6077     return false;
   6078 
   6079   Width.Loc = getLoc();
   6080   return parseExpr(Width.Id) &&
   6081          skipToken(AsmToken::RParen, "expected a closing parenthesis");
   6082 }
   6083 
   6084 bool
   6085 AMDGPUAsmParser::validateHwreg(const OperandInfoTy &HwReg,
   6086                                const OperandInfoTy &Offset,
   6087                                const OperandInfoTy &Width) {
   6088 
   6089   using namespace llvm::AMDGPU::Hwreg;
   6090 
   6091   if (HwReg.IsSymbolic && !isValidHwreg(HwReg.Id, getSTI())) {
   6092     Error(HwReg.Loc,
   6093           "specified hardware register is not supported on this GPU");
   6094     return false;
   6095   }
   6096   if (!isValidHwreg(HwReg.Id)) {
   6097     Error(HwReg.Loc,
   6098           "invalid code of hardware register: only 6-bit values are legal");
   6099     return false;
   6100   }
   6101   if (!isValidHwregOffset(Offset.Id)) {
   6102     Error(Offset.Loc, "invalid bit offset: only 5-bit values are legal");
   6103     return false;
   6104   }
   6105   if (!isValidHwregWidth(Width.Id)) {
   6106     Error(Width.Loc,
   6107           "invalid bitfield width: only values from 1 to 32 are legal");
   6108     return false;
   6109   }
   6110   return true;
   6111 }
   6112 
   6113 OperandMatchResultTy
   6114 AMDGPUAsmParser::parseHwreg(OperandVector &Operands) {
   6115   using namespace llvm::AMDGPU::Hwreg;
   6116 
   6117   int64_t ImmVal = 0;
   6118   SMLoc Loc = getLoc();
   6119 
   6120   if (trySkipId("hwreg", AsmToken::LParen)) {
   6121     OperandInfoTy HwReg(ID_UNKNOWN_);
   6122     OperandInfoTy Offset(OFFSET_DEFAULT_);
   6123     OperandInfoTy Width(WIDTH_DEFAULT_);
   6124     if (parseHwregBody(HwReg, Offset, Width) &&
   6125         validateHwreg(HwReg, Offset, Width)) {
   6126       ImmVal = encodeHwreg(HwReg.Id, Offset.Id, Width.Id);
   6127     } else {
   6128       return MatchOperand_ParseFail;
   6129     }
   6130   } else if (parseExpr(ImmVal, "a hwreg macro")) {
   6131     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
   6132       Error(Loc, "invalid immediate: only 16-bit values are legal");
   6133       return MatchOperand_ParseFail;
   6134     }
   6135   } else {
   6136     return MatchOperand_ParseFail;
   6137   }
   6138 
   6139   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTyHwreg));
   6140   return MatchOperand_Success;
   6141 }
   6142 
   6143 bool AMDGPUOperand::isHwreg() const {
   6144   return isImmTy(ImmTyHwreg);
   6145 }
   6146 
   6147 //===----------------------------------------------------------------------===//
   6148 // sendmsg
   6149 //===----------------------------------------------------------------------===//
   6150 
   6151 bool
   6152 AMDGPUAsmParser::parseSendMsgBody(OperandInfoTy &Msg,
   6153                                   OperandInfoTy &Op,
   6154                                   OperandInfoTy &Stream) {
   6155   using namespace llvm::AMDGPU::SendMsg;
   6156 
   6157   Msg.Loc = getLoc();
   6158   if (isToken(AsmToken::Identifier) && (Msg.Id = getMsgId(getTokenStr())) >= 0) {
   6159     Msg.IsSymbolic = true;
   6160     lex(); // skip message name
   6161   } else if (!parseExpr(Msg.Id, "a message name")) {
   6162     return false;
   6163   }
   6164 
   6165   if (trySkipToken(AsmToken::Comma)) {
   6166     Op.IsDefined = true;
   6167     Op.Loc = getLoc();
   6168     if (isToken(AsmToken::Identifier) &&
   6169         (Op.Id = getMsgOpId(Msg.Id, getTokenStr())) >= 0) {
   6170       lex(); // skip operation name
   6171     } else if (!parseExpr(Op.Id, "an operation name")) {
   6172       return false;
   6173     }
   6174 
   6175     if (trySkipToken(AsmToken::Comma)) {
   6176       Stream.IsDefined = true;
   6177       Stream.Loc = getLoc();
   6178       if (!parseExpr(Stream.Id))
   6179         return false;
   6180     }
   6181   }
   6182 
   6183   return skipToken(AsmToken::RParen, "expected a closing parenthesis");
   6184 }
   6185 
   6186 bool
   6187 AMDGPUAsmParser::validateSendMsg(const OperandInfoTy &Msg,
   6188                                  const OperandInfoTy &Op,
   6189                                  const OperandInfoTy &Stream) {
   6190   using namespace llvm::AMDGPU::SendMsg;
   6191 
   6192   // Validation strictness depends on whether message is specified
   6193   // in a symbolc or in a numeric form. In the latter case
   6194   // only encoding possibility is checked.
   6195   bool Strict = Msg.IsSymbolic;
   6196 
   6197   if (!isValidMsgId(Msg.Id, getSTI(), Strict)) {
   6198     Error(Msg.Loc, "invalid message id");
   6199     return false;
   6200   }
   6201   if (Strict && (msgRequiresOp(Msg.Id) != Op.IsDefined)) {
   6202     if (Op.IsDefined) {
   6203       Error(Op.Loc, "message does not support operations");
   6204     } else {
   6205       Error(Msg.Loc, "missing message operation");
   6206     }
   6207     return false;
   6208   }
   6209   if (!isValidMsgOp(Msg.Id, Op.Id, getSTI(), Strict)) {
   6210     Error(Op.Loc, "invalid operation id");
   6211     return false;
   6212   }
   6213   if (Strict && !msgSupportsStream(Msg.Id, Op.Id) && Stream.IsDefined) {
   6214     Error(Stream.Loc, "message operation does not support streams");
   6215     return false;
   6216   }
   6217   if (!isValidMsgStream(Msg.Id, Op.Id, Stream.Id, getSTI(), Strict)) {
   6218     Error(Stream.Loc, "invalid message stream id");
   6219     return false;
   6220   }
   6221   return true;
   6222 }
   6223 
   6224 OperandMatchResultTy
   6225 AMDGPUAsmParser::parseSendMsgOp(OperandVector &Operands) {
   6226   using namespace llvm::AMDGPU::SendMsg;
   6227 
   6228   int64_t ImmVal = 0;
   6229   SMLoc Loc = getLoc();
   6230 
   6231   if (trySkipId("sendmsg", AsmToken::LParen)) {
   6232     OperandInfoTy Msg(ID_UNKNOWN_);
   6233     OperandInfoTy Op(OP_NONE_);
   6234     OperandInfoTy Stream(STREAM_ID_NONE_);
   6235     if (parseSendMsgBody(Msg, Op, Stream) &&
   6236         validateSendMsg(Msg, Op, Stream)) {
   6237       ImmVal = encodeMsg(Msg.Id, Op.Id, Stream.Id);
   6238     } else {
   6239       return MatchOperand_ParseFail;
   6240     }
   6241   } else if (parseExpr(ImmVal, "a sendmsg macro")) {
   6242     if (ImmVal < 0 || !isUInt<16>(ImmVal)) {
   6243       Error(Loc, "invalid immediate: only 16-bit values are legal");
   6244       return MatchOperand_ParseFail;
   6245     }
   6246   } else {
   6247     return MatchOperand_ParseFail;
   6248   }
   6249 
   6250   Operands.push_back(AMDGPUOperand::CreateImm(this, ImmVal, Loc, AMDGPUOperand::ImmTySendMsg));
   6251   return MatchOperand_Success;
   6252 }
   6253 
   6254 bool AMDGPUOperand::isSendMsg() const {
   6255   return isImmTy(ImmTySendMsg);
   6256 }
   6257 
   6258 //===----------------------------------------------------------------------===//
   6259 // v_interp
   6260 //===----------------------------------------------------------------------===//
   6261 
   6262 OperandMatchResultTy AMDGPUAsmParser::parseInterpSlot(OperandVector &Operands) {
   6263   StringRef Str;
   6264   SMLoc S = getLoc();
   6265 
   6266   if (!parseId(Str))
   6267     return MatchOperand_NoMatch;
   6268 
   6269   int Slot = StringSwitch<int>(Str)
   6270     .Case("p10", 0)
   6271     .Case("p20", 1)
   6272     .Case("p0", 2)
   6273     .Default(-1);
   6274 
   6275   if (Slot == -1) {
   6276     Error(S, "invalid interpolation slot");
   6277     return MatchOperand_ParseFail;
   6278   }
   6279 
   6280   Operands.push_back(AMDGPUOperand::CreateImm(this, Slot, S,
   6281                                               AMDGPUOperand::ImmTyInterpSlot));
   6282   return MatchOperand_Success;
   6283 }
   6284 
   6285 OperandMatchResultTy AMDGPUAsmParser::parseInterpAttr(OperandVector &Operands) {
   6286   StringRef Str;
   6287   SMLoc S = getLoc();
   6288 
   6289   if (!parseId(Str))
   6290     return MatchOperand_NoMatch;
   6291 
   6292   if (!Str.startswith("attr")) {
   6293     Error(S, "invalid interpolation attribute");
   6294     return MatchOperand_ParseFail;
   6295   }
   6296 
   6297   StringRef Chan = Str.take_back(2);
   6298   int AttrChan = StringSwitch<int>(Chan)
   6299     .Case(".x", 0)
   6300     .Case(".y", 1)
   6301     .Case(".z", 2)
   6302     .Case(".w", 3)
   6303     .Default(-1);
   6304   if (AttrChan == -1) {
   6305     Error(S, "invalid or missing interpolation attribute channel");
   6306     return MatchOperand_ParseFail;
   6307   }
   6308 
   6309   Str = Str.drop_back(2).drop_front(4);
   6310 
   6311   uint8_t Attr;
   6312   if (Str.getAsInteger(10, Attr)) {
   6313     Error(S, "invalid or missing interpolation attribute number");
   6314     return MatchOperand_ParseFail;
   6315   }
   6316 
   6317   if (Attr > 63) {
   6318     Error(S, "out of bounds interpolation attribute number");
   6319     return MatchOperand_ParseFail;
   6320   }
   6321 
   6322   SMLoc SChan = SMLoc::getFromPointer(Chan.data());
   6323 
   6324   Operands.push_back(AMDGPUOperand::CreateImm(this, Attr, S,
   6325                                               AMDGPUOperand::ImmTyInterpAttr));
   6326   Operands.push_back(AMDGPUOperand::CreateImm(this, AttrChan, SChan,
   6327                                               AMDGPUOperand::ImmTyAttrChan));
   6328   return MatchOperand_Success;
   6329 }
   6330 
   6331 //===----------------------------------------------------------------------===//
   6332 // exp
   6333 //===----------------------------------------------------------------------===//
   6334 
   6335 OperandMatchResultTy AMDGPUAsmParser::parseExpTgt(OperandVector &Operands) {
   6336   using namespace llvm::AMDGPU::Exp;
   6337 
   6338   StringRef Str;
   6339   SMLoc S = getLoc();
   6340 
   6341   if (!parseId(Str))
   6342     return MatchOperand_NoMatch;
   6343 
   6344   unsigned Id = getTgtId(Str);
   6345   if (Id == ET_INVALID || !isSupportedTgtId(Id, getSTI())) {
   6346     Error(S, (Id == ET_INVALID) ?
   6347                 "invalid exp target" :
   6348                 "exp target is not supported on this GPU");
   6349     return MatchOperand_ParseFail;
   6350   }
   6351 
   6352   Operands.push_back(AMDGPUOperand::CreateImm(this, Id, S,
   6353                                               AMDGPUOperand::ImmTyExpTgt));
   6354   return MatchOperand_Success;
   6355 }
   6356 
   6357 //===----------------------------------------------------------------------===//
   6358 // parser helpers
   6359 //===----------------------------------------------------------------------===//
   6360 
   6361 bool
   6362 AMDGPUAsmParser::isId(const AsmToken &Token, const StringRef Id) const {
   6363   return Token.is(AsmToken::Identifier) && Token.getString() == Id;
   6364 }
   6365 
   6366 bool
   6367 AMDGPUAsmParser::isId(const StringRef Id) const {
   6368   return isId(getToken(), Id);
   6369 }
   6370 
   6371 bool
   6372 AMDGPUAsmParser::isToken(const AsmToken::TokenKind Kind) const {
   6373   return getTokenKind() == Kind;
   6374 }
   6375 
   6376 bool
   6377 AMDGPUAsmParser::trySkipId(const StringRef Id) {
   6378   if (isId(Id)) {
   6379     lex();
   6380     return true;
   6381   }
   6382   return false;
   6383 }
   6384 
   6385 bool
   6386 AMDGPUAsmParser::trySkipId(const StringRef Pref, const StringRef Id) {
   6387   if (isToken(AsmToken::Identifier)) {
   6388     StringRef Tok = getTokenStr();
   6389     if (Tok.startswith(Pref) && Tok.drop_front(Pref.size()) == Id) {
   6390       lex();
   6391       return true;
   6392     }
   6393   }
   6394   return false;
   6395 }
   6396 
   6397 bool
   6398 AMDGPUAsmParser::trySkipId(const StringRef Id, const AsmToken::TokenKind Kind) {
   6399   if (isId(Id) && peekToken().is(Kind)) {
   6400     lex();
   6401     lex();
   6402     return true;
   6403   }
   6404   return false;
   6405 }
   6406 
   6407 bool
   6408 AMDGPUAsmParser::trySkipToken(const AsmToken::TokenKind Kind) {
   6409   if (isToken(Kind)) {
   6410     lex();
   6411     return true;
   6412   }
   6413   return false;
   6414 }
   6415 
   6416 bool
   6417 AMDGPUAsmParser::skipToken(const AsmToken::TokenKind Kind,
   6418                            const StringRef ErrMsg) {
   6419   if (!trySkipToken(Kind)) {
   6420     Error(getLoc(), ErrMsg);
   6421     return false;
   6422   }
   6423   return true;
   6424 }
   6425 
   6426 bool
   6427 AMDGPUAsmParser::parseExpr(int64_t &Imm, StringRef Expected) {
   6428   SMLoc S = getLoc();
   6429 
   6430   const MCExpr *Expr;
   6431   if (Parser.parseExpression(Expr))
   6432     return false;
   6433 
   6434   if (Expr->evaluateAsAbsolute(Imm))
   6435     return true;
   6436 
   6437   if (Expected.empty()) {
   6438     Error(S, "expected absolute expression");
   6439   } else {
   6440     Error(S, Twine("expected ", Expected) +
   6441              Twine(" or an absolute expression"));
   6442   }
   6443   return false;
   6444 }
   6445 
   6446 bool
   6447 AMDGPUAsmParser::parseExpr(OperandVector &Operands) {
   6448   SMLoc S = getLoc();
   6449 
   6450   const MCExpr *Expr;
   6451   if (Parser.parseExpression(Expr))
   6452     return false;
   6453 
   6454   int64_t IntVal;
   6455   if (Expr->evaluateAsAbsolute(IntVal)) {
   6456     Operands.push_back(AMDGPUOperand::CreateImm(this, IntVal, S));
   6457   } else {
   6458     Operands.push_back(AMDGPUOperand::CreateExpr(this, Expr, S));
   6459   }
   6460   return true;
   6461 }
   6462 
   6463 bool
   6464 AMDGPUAsmParser::parseString(StringRef &Val, const StringRef ErrMsg) {
   6465   if (isToken(AsmToken::String)) {
   6466     Val = getToken().getStringContents();
   6467     lex();
   6468     return true;
   6469   } else {
   6470     Error(getLoc(), ErrMsg);
   6471     return false;
   6472   }
   6473 }
   6474 
   6475 bool
   6476 AMDGPUAsmParser::parseId(StringRef &Val, const StringRef ErrMsg) {
   6477   if (isToken(AsmToken::Identifier)) {
   6478     Val = getTokenStr();
   6479     lex();
   6480     return true;
   6481   } else {
   6482     if (!ErrMsg.empty())
   6483       Error(getLoc(), ErrMsg);
   6484     return false;
   6485   }
   6486 }
   6487 
   6488 AsmToken
   6489 AMDGPUAsmParser::getToken() const {
   6490   return Parser.getTok();
   6491 }
   6492 
   6493 AsmToken
   6494 AMDGPUAsmParser::peekToken() {
   6495   return isToken(AsmToken::EndOfStatement) ? getToken() : getLexer().peekTok();
   6496 }
   6497 
   6498 void
   6499 AMDGPUAsmParser::peekTokens(MutableArrayRef<AsmToken> Tokens) {
   6500   auto TokCount = getLexer().peekTokens(Tokens);
   6501 
   6502   for (auto Idx = TokCount; Idx < Tokens.size(); ++Idx)
   6503     Tokens[Idx] = AsmToken(AsmToken::Error, "");
   6504 }
   6505 
   6506 AsmToken::TokenKind
   6507 AMDGPUAsmParser::getTokenKind() const {
   6508   return getLexer().getKind();
   6509 }
   6510 
   6511 SMLoc
   6512 AMDGPUAsmParser::getLoc() const {
   6513   return getToken().getLoc();
   6514 }
   6515 
   6516 StringRef
   6517 AMDGPUAsmParser::getTokenStr() const {
   6518   return getToken().getString();
   6519 }
   6520 
   6521 void
   6522 AMDGPUAsmParser::lex() {
   6523   Parser.Lex();
   6524 }
   6525 
   6526 SMLoc
   6527 AMDGPUAsmParser::getOperandLoc(std::function<bool(const AMDGPUOperand&)> Test,
   6528                                const OperandVector &Operands) const {
   6529   for (unsigned i = Operands.size() - 1; i > 0; --i) {
   6530     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   6531     if (Test(Op))
   6532       return Op.getStartLoc();
   6533   }
   6534   return ((AMDGPUOperand &)*Operands[0]).getStartLoc();
   6535 }
   6536 
   6537 SMLoc
   6538 AMDGPUAsmParser::getImmLoc(AMDGPUOperand::ImmTy Type,
   6539                            const OperandVector &Operands) const {
   6540   auto Test = [=](const AMDGPUOperand& Op) { return Op.isImmTy(Type); };
   6541   return getOperandLoc(Test, Operands);
   6542 }
   6543 
   6544 SMLoc
   6545 AMDGPUAsmParser::getRegLoc(unsigned Reg,
   6546                            const OperandVector &Operands) const {
   6547   auto Test = [=](const AMDGPUOperand& Op) {
   6548     return Op.isRegKind() && Op.getReg() == Reg;
   6549   };
   6550   return getOperandLoc(Test, Operands);
   6551 }
   6552 
   6553 SMLoc
   6554 AMDGPUAsmParser::getLitLoc(const OperandVector &Operands) const {
   6555   auto Test = [](const AMDGPUOperand& Op) {
   6556     return Op.IsImmKindLiteral() || Op.isExpr();
   6557   };
   6558   return getOperandLoc(Test, Operands);
   6559 }
   6560 
   6561 SMLoc
   6562 AMDGPUAsmParser::getConstLoc(const OperandVector &Operands) const {
   6563   auto Test = [](const AMDGPUOperand& Op) {
   6564     return Op.isImmKindConst();
   6565   };
   6566   return getOperandLoc(Test, Operands);
   6567 }
   6568 
   6569 //===----------------------------------------------------------------------===//
   6570 // swizzle
   6571 //===----------------------------------------------------------------------===//
   6572 
   6573 LLVM_READNONE
   6574 static unsigned
   6575 encodeBitmaskPerm(const unsigned AndMask,
   6576                   const unsigned OrMask,
   6577                   const unsigned XorMask) {
   6578   using namespace llvm::AMDGPU::Swizzle;
   6579 
   6580   return BITMASK_PERM_ENC |
   6581          (AndMask << BITMASK_AND_SHIFT) |
   6582          (OrMask  << BITMASK_OR_SHIFT)  |
   6583          (XorMask << BITMASK_XOR_SHIFT);
   6584 }
   6585 
   6586 bool
   6587 AMDGPUAsmParser::parseSwizzleOperand(int64_t &Op,
   6588                                      const unsigned MinVal,
   6589                                      const unsigned MaxVal,
   6590                                      const StringRef ErrMsg,
   6591                                      SMLoc &Loc) {
   6592   if (!skipToken(AsmToken::Comma, "expected a comma")) {
   6593     return false;
   6594   }
   6595   Loc = getLoc();
   6596   if (!parseExpr(Op)) {
   6597     return false;
   6598   }
   6599   if (Op < MinVal || Op > MaxVal) {
   6600     Error(Loc, ErrMsg);
   6601     return false;
   6602   }
   6603 
   6604   return true;
   6605 }
   6606 
   6607 bool
   6608 AMDGPUAsmParser::parseSwizzleOperands(const unsigned OpNum, int64_t* Op,
   6609                                       const unsigned MinVal,
   6610                                       const unsigned MaxVal,
   6611                                       const StringRef ErrMsg) {
   6612   SMLoc Loc;
   6613   for (unsigned i = 0; i < OpNum; ++i) {
   6614     if (!parseSwizzleOperand(Op[i], MinVal, MaxVal, ErrMsg, Loc))
   6615       return false;
   6616   }
   6617 
   6618   return true;
   6619 }
   6620 
   6621 bool
   6622 AMDGPUAsmParser::parseSwizzleQuadPerm(int64_t &Imm) {
   6623   using namespace llvm::AMDGPU::Swizzle;
   6624 
   6625   int64_t Lane[LANE_NUM];
   6626   if (parseSwizzleOperands(LANE_NUM, Lane, 0, LANE_MAX,
   6627                            "expected a 2-bit lane id")) {
   6628     Imm = QUAD_PERM_ENC;
   6629     for (unsigned I = 0; I < LANE_NUM; ++I) {
   6630       Imm |= Lane[I] << (LANE_SHIFT * I);
   6631     }
   6632     return true;
   6633   }
   6634   return false;
   6635 }
   6636 
   6637 bool
   6638 AMDGPUAsmParser::parseSwizzleBroadcast(int64_t &Imm) {
   6639   using namespace llvm::AMDGPU::Swizzle;
   6640 
   6641   SMLoc Loc;
   6642   int64_t GroupSize;
   6643   int64_t LaneIdx;
   6644 
   6645   if (!parseSwizzleOperand(GroupSize,
   6646                            2, 32,
   6647                            "group size must be in the interval [2,32]",
   6648                            Loc)) {
   6649     return false;
   6650   }
   6651   if (!isPowerOf2_64(GroupSize)) {
   6652     Error(Loc, "group size must be a power of two");
   6653     return false;
   6654   }
   6655   if (parseSwizzleOperand(LaneIdx,
   6656                           0, GroupSize - 1,
   6657                           "lane id must be in the interval [0,group size - 1]",
   6658                           Loc)) {
   6659     Imm = encodeBitmaskPerm(BITMASK_MAX - GroupSize + 1, LaneIdx, 0);
   6660     return true;
   6661   }
   6662   return false;
   6663 }
   6664 
   6665 bool
   6666 AMDGPUAsmParser::parseSwizzleReverse(int64_t &Imm) {
   6667   using namespace llvm::AMDGPU::Swizzle;
   6668 
   6669   SMLoc Loc;
   6670   int64_t GroupSize;
   6671 
   6672   if (!parseSwizzleOperand(GroupSize,
   6673                            2, 32,
   6674                            "group size must be in the interval [2,32]",
   6675                            Loc)) {
   6676     return false;
   6677   }
   6678   if (!isPowerOf2_64(GroupSize)) {
   6679     Error(Loc, "group size must be a power of two");
   6680     return false;
   6681   }
   6682 
   6683   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize - 1);
   6684   return true;
   6685 }
   6686 
   6687 bool
   6688 AMDGPUAsmParser::parseSwizzleSwap(int64_t &Imm) {
   6689   using namespace llvm::AMDGPU::Swizzle;
   6690 
   6691   SMLoc Loc;
   6692   int64_t GroupSize;
   6693 
   6694   if (!parseSwizzleOperand(GroupSize,
   6695                            1, 16,
   6696                            "group size must be in the interval [1,16]",
   6697                            Loc)) {
   6698     return false;
   6699   }
   6700   if (!isPowerOf2_64(GroupSize)) {
   6701     Error(Loc, "group size must be a power of two");
   6702     return false;
   6703   }
   6704 
   6705   Imm = encodeBitmaskPerm(BITMASK_MAX, 0, GroupSize);
   6706   return true;
   6707 }
   6708 
   6709 bool
   6710 AMDGPUAsmParser::parseSwizzleBitmaskPerm(int64_t &Imm) {
   6711   using namespace llvm::AMDGPU::Swizzle;
   6712 
   6713   if (!skipToken(AsmToken::Comma, "expected a comma")) {
   6714     return false;
   6715   }
   6716 
   6717   StringRef Ctl;
   6718   SMLoc StrLoc = getLoc();
   6719   if (!parseString(Ctl)) {
   6720     return false;
   6721   }
   6722   if (Ctl.size() != BITMASK_WIDTH) {
   6723     Error(StrLoc, "expected a 5-character mask");
   6724     return false;
   6725   }
   6726 
   6727   unsigned AndMask = 0;
   6728   unsigned OrMask = 0;
   6729   unsigned XorMask = 0;
   6730 
   6731   for (size_t i = 0; i < Ctl.size(); ++i) {
   6732     unsigned Mask = 1 << (BITMASK_WIDTH - 1 - i);
   6733     switch(Ctl[i]) {
   6734     default:
   6735       Error(StrLoc, "invalid mask");
   6736       return false;
   6737     case '0':
   6738       break;
   6739     case '1':
   6740       OrMask |= Mask;
   6741       break;
   6742     case 'p':
   6743       AndMask |= Mask;
   6744       break;
   6745     case 'i':
   6746       AndMask |= Mask;
   6747       XorMask |= Mask;
   6748       break;
   6749     }
   6750   }
   6751 
   6752   Imm = encodeBitmaskPerm(AndMask, OrMask, XorMask);
   6753   return true;
   6754 }
   6755 
   6756 bool
   6757 AMDGPUAsmParser::parseSwizzleOffset(int64_t &Imm) {
   6758 
   6759   SMLoc OffsetLoc = getLoc();
   6760 
   6761   if (!parseExpr(Imm, "a swizzle macro")) {
   6762     return false;
   6763   }
   6764   if (!isUInt<16>(Imm)) {
   6765     Error(OffsetLoc, "expected a 16-bit offset");
   6766     return false;
   6767   }
   6768   return true;
   6769 }
   6770 
   6771 bool
   6772 AMDGPUAsmParser::parseSwizzleMacro(int64_t &Imm) {
   6773   using namespace llvm::AMDGPU::Swizzle;
   6774 
   6775   if (skipToken(AsmToken::LParen, "expected a left parentheses")) {
   6776 
   6777     SMLoc ModeLoc = getLoc();
   6778     bool Ok = false;
   6779 
   6780     if (trySkipId(IdSymbolic[ID_QUAD_PERM])) {
   6781       Ok = parseSwizzleQuadPerm(Imm);
   6782     } else if (trySkipId(IdSymbolic[ID_BITMASK_PERM])) {
   6783       Ok = parseSwizzleBitmaskPerm(Imm);
   6784     } else if (trySkipId(IdSymbolic[ID_BROADCAST])) {
   6785       Ok = parseSwizzleBroadcast(Imm);
   6786     } else if (trySkipId(IdSymbolic[ID_SWAP])) {
   6787       Ok = parseSwizzleSwap(Imm);
   6788     } else if (trySkipId(IdSymbolic[ID_REVERSE])) {
   6789       Ok = parseSwizzleReverse(Imm);
   6790     } else {
   6791       Error(ModeLoc, "expected a swizzle mode");
   6792     }
   6793 
   6794     return Ok && skipToken(AsmToken::RParen, "expected a closing parentheses");
   6795   }
   6796 
   6797   return false;
   6798 }
   6799 
   6800 OperandMatchResultTy
   6801 AMDGPUAsmParser::parseSwizzleOp(OperandVector &Operands) {
   6802   SMLoc S = getLoc();
   6803   int64_t Imm = 0;
   6804 
   6805   if (trySkipId("offset")) {
   6806 
   6807     bool Ok = false;
   6808     if (skipToken(AsmToken::Colon, "expected a colon")) {
   6809       if (trySkipId("swizzle")) {
   6810         Ok = parseSwizzleMacro(Imm);
   6811       } else {
   6812         Ok = parseSwizzleOffset(Imm);
   6813       }
   6814     }
   6815 
   6816     Operands.push_back(AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTySwizzle));
   6817 
   6818     return Ok? MatchOperand_Success : MatchOperand_ParseFail;
   6819   } else {
   6820     // Swizzle "offset" operand is optional.
   6821     // If it is omitted, try parsing other optional operands.
   6822     return parseOptionalOpr(Operands);
   6823   }
   6824 }
   6825 
   6826 bool
   6827 AMDGPUOperand::isSwizzle() const {
   6828   return isImmTy(ImmTySwizzle);
   6829 }
   6830 
   6831 //===----------------------------------------------------------------------===//
   6832 // VGPR Index Mode
   6833 //===----------------------------------------------------------------------===//
   6834 
   6835 int64_t AMDGPUAsmParser::parseGPRIdxMacro() {
   6836 
   6837   using namespace llvm::AMDGPU::VGPRIndexMode;
   6838 
   6839   if (trySkipToken(AsmToken::RParen)) {
   6840     return OFF;
   6841   }
   6842 
   6843   int64_t Imm = 0;
   6844 
   6845   while (true) {
   6846     unsigned Mode = 0;
   6847     SMLoc S = getLoc();
   6848 
   6849     for (unsigned ModeId = ID_MIN; ModeId <= ID_MAX; ++ModeId) {
   6850       if (trySkipId(IdSymbolic[ModeId])) {
   6851         Mode = 1 << ModeId;
   6852         break;
   6853       }
   6854     }
   6855 
   6856     if (Mode == 0) {
   6857       Error(S, (Imm == 0)?
   6858                "expected a VGPR index mode or a closing parenthesis" :
   6859                "expected a VGPR index mode");
   6860       return UNDEF;
   6861     }
   6862 
   6863     if (Imm & Mode) {
   6864       Error(S, "duplicate VGPR index mode");
   6865       return UNDEF;
   6866     }
   6867     Imm |= Mode;
   6868 
   6869     if (trySkipToken(AsmToken::RParen))
   6870       break;
   6871     if (!skipToken(AsmToken::Comma,
   6872                    "expected a comma or a closing parenthesis"))
   6873       return UNDEF;
   6874   }
   6875 
   6876   return Imm;
   6877 }
   6878 
   6879 OperandMatchResultTy
   6880 AMDGPUAsmParser::parseGPRIdxMode(OperandVector &Operands) {
   6881 
   6882   using namespace llvm::AMDGPU::VGPRIndexMode;
   6883 
   6884   int64_t Imm = 0;
   6885   SMLoc S = getLoc();
   6886 
   6887   if (trySkipId("gpr_idx", AsmToken::LParen)) {
   6888     Imm = parseGPRIdxMacro();
   6889     if (Imm == UNDEF)
   6890       return MatchOperand_ParseFail;
   6891   } else {
   6892     if (getParser().parseAbsoluteExpression(Imm))
   6893       return MatchOperand_ParseFail;
   6894     if (Imm < 0 || !isUInt<4>(Imm)) {
   6895       Error(S, "invalid immediate: only 4-bit values are legal");
   6896       return MatchOperand_ParseFail;
   6897     }
   6898   }
   6899 
   6900   Operands.push_back(
   6901       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyGprIdxMode));
   6902   return MatchOperand_Success;
   6903 }
   6904 
   6905 bool AMDGPUOperand::isGPRIdxMode() const {
   6906   return isImmTy(ImmTyGprIdxMode);
   6907 }
   6908 
   6909 //===----------------------------------------------------------------------===//
   6910 // sopp branch targets
   6911 //===----------------------------------------------------------------------===//
   6912 
   6913 OperandMatchResultTy
   6914 AMDGPUAsmParser::parseSOppBrTarget(OperandVector &Operands) {
   6915 
   6916   // Make sure we are not parsing something
   6917   // that looks like a label or an expression but is not.
   6918   // This will improve error messages.
   6919   if (isRegister() || isModifier())
   6920     return MatchOperand_NoMatch;
   6921 
   6922   if (!parseExpr(Operands))
   6923     return MatchOperand_ParseFail;
   6924 
   6925   AMDGPUOperand &Opr = ((AMDGPUOperand &)*Operands[Operands.size() - 1]);
   6926   assert(Opr.isImm() || Opr.isExpr());
   6927   SMLoc Loc = Opr.getStartLoc();
   6928 
   6929   // Currently we do not support arbitrary expressions as branch targets.
   6930   // Only labels and absolute expressions are accepted.
   6931   if (Opr.isExpr() && !Opr.isSymbolRefExpr()) {
   6932     Error(Loc, "expected an absolute expression or a label");
   6933   } else if (Opr.isImm() && !Opr.isS16Imm()) {
   6934     Error(Loc, "expected a 16-bit signed jump offset");
   6935   }
   6936 
   6937   return MatchOperand_Success;
   6938 }
   6939 
   6940 //===----------------------------------------------------------------------===//
   6941 // Boolean holding registers
   6942 //===----------------------------------------------------------------------===//
   6943 
   6944 OperandMatchResultTy
   6945 AMDGPUAsmParser::parseBoolReg(OperandVector &Operands) {
   6946   return parseReg(Operands);
   6947 }
   6948 
   6949 //===----------------------------------------------------------------------===//
   6950 // mubuf
   6951 //===----------------------------------------------------------------------===//
   6952 
   6953 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCPol() const {
   6954   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCPol);
   6955 }
   6956 
   6957 void AMDGPUAsmParser::cvtMubufImpl(MCInst &Inst,
   6958                                    const OperandVector &Operands,
   6959                                    bool IsAtomic,
   6960                                    bool IsLds) {
   6961   bool IsLdsOpcode = IsLds;
   6962   bool HasLdsModifier = false;
   6963   OptionalImmIndexMap OptionalIdx;
   6964   unsigned FirstOperandIdx = 1;
   6965   bool IsAtomicReturn = false;
   6966 
   6967   if (IsAtomic) {
   6968     for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
   6969       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   6970       if (!Op.isCPol())
   6971         continue;
   6972       IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
   6973       break;
   6974     }
   6975 
   6976     if (!IsAtomicReturn) {
   6977       int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
   6978       if (NewOpc != -1)
   6979         Inst.setOpcode(NewOpc);
   6980     }
   6981 
   6982     IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
   6983                       SIInstrFlags::IsAtomicRet;
   6984   }
   6985 
   6986   for (unsigned i = FirstOperandIdx, e = Operands.size(); i != e; ++i) {
   6987     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   6988 
   6989     // Add the register arguments
   6990     if (Op.isReg()) {
   6991       Op.addRegOperands(Inst, 1);
   6992       // Insert a tied src for atomic return dst.
   6993       // This cannot be postponed as subsequent calls to
   6994       // addImmOperands rely on correct number of MC operands.
   6995       if (IsAtomicReturn && i == FirstOperandIdx)
   6996         Op.addRegOperands(Inst, 1);
   6997       continue;
   6998     }
   6999 
   7000     // Handle the case where soffset is an immediate
   7001     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
   7002       Op.addImmOperands(Inst, 1);
   7003       continue;
   7004     }
   7005 
   7006     HasLdsModifier |= Op.isLDS();
   7007 
   7008     // Handle tokens like 'offen' which are sometimes hard-coded into the
   7009     // asm string.  There are no MCInst operands for these.
   7010     if (Op.isToken()) {
   7011       continue;
   7012     }
   7013     assert(Op.isImm());
   7014 
   7015     // Handle optional arguments
   7016     OptionalIdx[Op.getImmTy()] = i;
   7017   }
   7018 
   7019   // This is a workaround for an llvm quirk which may result in an
   7020   // incorrect instruction selection. Lds and non-lds versions of
   7021   // MUBUF instructions are identical except that lds versions
   7022   // have mandatory 'lds' modifier. However this modifier follows
   7023   // optional modifiers and llvm asm matcher regards this 'lds'
   7024   // modifier as an optional one. As a result, an lds version
   7025   // of opcode may be selected even if it has no 'lds' modifier.
   7026   if (IsLdsOpcode && !HasLdsModifier) {
   7027     int NoLdsOpcode = AMDGPU::getMUBUFNoLdsInst(Inst.getOpcode());
   7028     if (NoLdsOpcode != -1) { // Got lds version - correct it.
   7029       Inst.setOpcode(NoLdsOpcode);
   7030       IsLdsOpcode = false;
   7031     }
   7032   }
   7033 
   7034   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
   7035   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
   7036 
   7037   if (!IsLdsOpcode) { // tfe is not legal with lds opcodes
   7038     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   7039   }
   7040   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
   7041 }
   7042 
   7043 void AMDGPUAsmParser::cvtMtbuf(MCInst &Inst, const OperandVector &Operands) {
   7044   OptionalImmIndexMap OptionalIdx;
   7045 
   7046   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
   7047     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   7048 
   7049     // Add the register arguments
   7050     if (Op.isReg()) {
   7051       Op.addRegOperands(Inst, 1);
   7052       continue;
   7053     }
   7054 
   7055     // Handle the case where soffset is an immediate
   7056     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
   7057       Op.addImmOperands(Inst, 1);
   7058       continue;
   7059     }
   7060 
   7061     // Handle tokens like 'offen' which are sometimes hard-coded into the
   7062     // asm string.  There are no MCInst operands for these.
   7063     if (Op.isToken()) {
   7064       continue;
   7065     }
   7066     assert(Op.isImm());
   7067 
   7068     // Handle optional arguments
   7069     OptionalIdx[Op.getImmTy()] = i;
   7070   }
   7071 
   7072   addOptionalImmOperand(Inst, Operands, OptionalIdx,
   7073                         AMDGPUOperand::ImmTyOffset);
   7074   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyFORMAT);
   7075   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
   7076   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   7077   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySWZ);
   7078 }
   7079 
   7080 //===----------------------------------------------------------------------===//
   7081 // mimg
   7082 //===----------------------------------------------------------------------===//
   7083 
   7084 void AMDGPUAsmParser::cvtMIMG(MCInst &Inst, const OperandVector &Operands,
   7085                               bool IsAtomic) {
   7086   unsigned I = 1;
   7087   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
   7088   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
   7089     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   7090   }
   7091 
   7092   if (IsAtomic) {
   7093     // Add src, same as dst
   7094     assert(Desc.getNumDefs() == 1);
   7095     ((AMDGPUOperand &)*Operands[I - 1]).addRegOperands(Inst, 1);
   7096   }
   7097 
   7098   OptionalImmIndexMap OptionalIdx;
   7099 
   7100   for (unsigned E = Operands.size(); I != E; ++I) {
   7101     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
   7102 
   7103     // Add the register arguments
   7104     if (Op.isReg()) {
   7105       Op.addRegOperands(Inst, 1);
   7106     } else if (Op.isImmModifier()) {
   7107       OptionalIdx[Op.getImmTy()] = I;
   7108     } else if (!Op.isToken()) {
   7109       llvm_unreachable("unexpected operand type");
   7110     }
   7111   }
   7112 
   7113   bool IsGFX10Plus = isGFX10Plus();
   7114 
   7115   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDMask);
   7116   if (IsGFX10Plus)
   7117     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDim, -1);
   7118   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyUNorm);
   7119   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol);
   7120   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyR128A16);
   7121   if (IsGFX10Plus)
   7122     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyA16);
   7123   if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::tfe) != -1)
   7124     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyTFE);
   7125   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyLWE);
   7126   if (!IsGFX10Plus)
   7127     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDA);
   7128   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyD16);
   7129 }
   7130 
   7131 void AMDGPUAsmParser::cvtMIMGAtomic(MCInst &Inst, const OperandVector &Operands) {
   7132   cvtMIMG(Inst, Operands, true);
   7133 }
   7134 
   7135 void AMDGPUAsmParser::cvtSMEMAtomic(MCInst &Inst, const OperandVector &Operands) {
   7136   OptionalImmIndexMap OptionalIdx;
   7137   bool IsAtomicReturn = false;
   7138 
   7139   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
   7140     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   7141     if (!Op.isCPol())
   7142       continue;
   7143     IsAtomicReturn = Op.getImm() & AMDGPU::CPol::GLC;
   7144     break;
   7145   }
   7146 
   7147   if (!IsAtomicReturn) {
   7148     int NewOpc = AMDGPU::getAtomicNoRetOp(Inst.getOpcode());
   7149     if (NewOpc != -1)
   7150       Inst.setOpcode(NewOpc);
   7151   }
   7152 
   7153   IsAtomicReturn =  MII.get(Inst.getOpcode()).TSFlags &
   7154                     SIInstrFlags::IsAtomicRet;
   7155 
   7156   for (unsigned i = 1, e = Operands.size(); i != e; ++i) {
   7157     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[i]);
   7158 
   7159     // Add the register arguments
   7160     if (Op.isReg()) {
   7161       Op.addRegOperands(Inst, 1);
   7162       if (IsAtomicReturn && i == 1)
   7163         Op.addRegOperands(Inst, 1);
   7164       continue;
   7165     }
   7166 
   7167     // Handle the case where soffset is an immediate
   7168     if (Op.isImm() && Op.getImmTy() == AMDGPUOperand::ImmTyNone) {
   7169       Op.addImmOperands(Inst, 1);
   7170       continue;
   7171     }
   7172 
   7173     // Handle tokens like 'offen' which are sometimes hard-coded into the
   7174     // asm string.  There are no MCInst operands for these.
   7175     if (Op.isToken()) {
   7176       continue;
   7177     }
   7178     assert(Op.isImm());
   7179 
   7180     // Handle optional arguments
   7181     OptionalIdx[Op.getImmTy()] = i;
   7182   }
   7183 
   7184   if ((int)Inst.getNumOperands() <=
   7185       AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::offset))
   7186     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOffset);
   7187   addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyCPol, 0);
   7188 }
   7189 
   7190 void AMDGPUAsmParser::cvtIntersectRay(MCInst &Inst,
   7191                                       const OperandVector &Operands) {
   7192   for (unsigned I = 1; I < Operands.size(); ++I) {
   7193     auto &Operand = (AMDGPUOperand &)*Operands[I];
   7194     if (Operand.isReg())
   7195       Operand.addRegOperands(Inst, 1);
   7196   }
   7197 
   7198   Inst.addOperand(MCOperand::createImm(1)); // a16
   7199 }
   7200 
   7201 //===----------------------------------------------------------------------===//
   7202 // smrd
   7203 //===----------------------------------------------------------------------===//
   7204 
   7205 bool AMDGPUOperand::isSMRDOffset8() const {
   7206   return isImm() && isUInt<8>(getImm());
   7207 }
   7208 
   7209 bool AMDGPUOperand::isSMEMOffset() const {
   7210   return isImm(); // Offset range is checked later by validator.
   7211 }
   7212 
   7213 bool AMDGPUOperand::isSMRDLiteralOffset() const {
   7214   // 32-bit literals are only supported on CI and we only want to use them
   7215   // when the offset is > 8-bits.
   7216   return isImm() && !isUInt<8>(getImm()) && isUInt<32>(getImm());
   7217 }
   7218 
   7219 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDOffset8() const {
   7220   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
   7221 }
   7222 
   7223 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMEMOffset() const {
   7224   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
   7225 }
   7226 
   7227 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultSMRDLiteralOffset() const {
   7228   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
   7229 }
   7230 
   7231 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFlatOffset() const {
   7232   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyOffset);
   7233 }
   7234 
   7235 //===----------------------------------------------------------------------===//
   7236 // vop3
   7237 //===----------------------------------------------------------------------===//
   7238 
   7239 static bool ConvertOmodMul(int64_t &Mul) {
   7240   if (Mul != 1 && Mul != 2 && Mul != 4)
   7241     return false;
   7242 
   7243   Mul >>= 1;
   7244   return true;
   7245 }
   7246 
   7247 static bool ConvertOmodDiv(int64_t &Div) {
   7248   if (Div == 1) {
   7249     Div = 0;
   7250     return true;
   7251   }
   7252 
   7253   if (Div == 2) {
   7254     Div = 3;
   7255     return true;
   7256   }
   7257 
   7258   return false;
   7259 }
   7260 
   7261 // Both bound_ctrl:0 and bound_ctrl:1 are encoded as 1.
   7262 // This is intentional and ensures compatibility with sp3.
   7263 // See bug 35397 for details.
   7264 static bool ConvertBoundCtrl(int64_t &BoundCtrl) {
   7265   if (BoundCtrl == 0 || BoundCtrl == 1) {
   7266     BoundCtrl = 1;
   7267     return true;
   7268   }
   7269   return false;
   7270 }
   7271 
   7272 // Note: the order in this table matches the order of operands in AsmString.
   7273 static const OptionalOperand AMDGPUOptionalOperandTable[] = {
   7274   {"offen",   AMDGPUOperand::ImmTyOffen, true, nullptr},
   7275   {"idxen",   AMDGPUOperand::ImmTyIdxen, true, nullptr},
   7276   {"addr64",  AMDGPUOperand::ImmTyAddr64, true, nullptr},
   7277   {"offset0", AMDGPUOperand::ImmTyOffset0, false, nullptr},
   7278   {"offset1", AMDGPUOperand::ImmTyOffset1, false, nullptr},
   7279   {"gds",     AMDGPUOperand::ImmTyGDS, true, nullptr},
   7280   {"lds",     AMDGPUOperand::ImmTyLDS, true, nullptr},
   7281   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
   7282   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
   7283   {"",        AMDGPUOperand::ImmTyCPol, false, nullptr},
   7284   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
   7285   {"tfe",     AMDGPUOperand::ImmTyTFE, true, nullptr},
   7286   {"d16",     AMDGPUOperand::ImmTyD16, true, nullptr},
   7287   {"high",    AMDGPUOperand::ImmTyHigh, true, nullptr},
   7288   {"clamp",   AMDGPUOperand::ImmTyClampSI, true, nullptr},
   7289   {"omod",    AMDGPUOperand::ImmTyOModSI, false, ConvertOmodMul},
   7290   {"unorm",   AMDGPUOperand::ImmTyUNorm, true, nullptr},
   7291   {"da",      AMDGPUOperand::ImmTyDA,    true, nullptr},
   7292   {"r128",    AMDGPUOperand::ImmTyR128A16,  true, nullptr},
   7293   {"a16",     AMDGPUOperand::ImmTyA16,  true, nullptr},
   7294   {"lwe",     AMDGPUOperand::ImmTyLWE,   true, nullptr},
   7295   {"d16",     AMDGPUOperand::ImmTyD16,   true, nullptr},
   7296   {"dmask",   AMDGPUOperand::ImmTyDMask, false, nullptr},
   7297   {"dim",     AMDGPUOperand::ImmTyDim,   false, nullptr},
   7298   {"row_mask",   AMDGPUOperand::ImmTyDppRowMask, false, nullptr},
   7299   {"bank_mask",  AMDGPUOperand::ImmTyDppBankMask, false, nullptr},
   7300   {"bound_ctrl", AMDGPUOperand::ImmTyDppBoundCtrl, false, ConvertBoundCtrl},
   7301   {"fi",         AMDGPUOperand::ImmTyDppFi, false, nullptr},
   7302   {"dst_sel",    AMDGPUOperand::ImmTySdwaDstSel, false, nullptr},
   7303   {"src0_sel",   AMDGPUOperand::ImmTySdwaSrc0Sel, false, nullptr},
   7304   {"src1_sel",   AMDGPUOperand::ImmTySdwaSrc1Sel, false, nullptr},
   7305   {"dst_unused", AMDGPUOperand::ImmTySdwaDstUnused, false, nullptr},
   7306   {"compr", AMDGPUOperand::ImmTyExpCompr, true, nullptr },
   7307   {"vm", AMDGPUOperand::ImmTyExpVM, true, nullptr},
   7308   {"op_sel", AMDGPUOperand::ImmTyOpSel, false, nullptr},
   7309   {"op_sel_hi", AMDGPUOperand::ImmTyOpSelHi, false, nullptr},
   7310   {"neg_lo", AMDGPUOperand::ImmTyNegLo, false, nullptr},
   7311   {"neg_hi", AMDGPUOperand::ImmTyNegHi, false, nullptr},
   7312   {"blgp", AMDGPUOperand::ImmTyBLGP, false, nullptr},
   7313   {"cbsz", AMDGPUOperand::ImmTyCBSZ, false, nullptr},
   7314   {"abid", AMDGPUOperand::ImmTyABID, false, nullptr}
   7315 };
   7316 
   7317 void AMDGPUAsmParser::onBeginOfFile() {
   7318   if (!getParser().getStreamer().getTargetStreamer() ||
   7319       getSTI().getTargetTriple().getArch() == Triple::r600)
   7320     return;
   7321 
   7322   if (!getTargetStreamer().getTargetID())
   7323     getTargetStreamer().initializeTargetID(getSTI(), getSTI().getFeatureString());
   7324 
   7325   if (isHsaAbiVersion3Or4(&getSTI()))
   7326     getTargetStreamer().EmitDirectiveAMDGCNTarget();
   7327 }
   7328 
   7329 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOperand(OperandVector &Operands) {
   7330 
   7331   OperandMatchResultTy res = parseOptionalOpr(Operands);
   7332 
   7333   // This is a hack to enable hardcoded mandatory operands which follow
   7334   // optional operands.
   7335   //
   7336   // Current design assumes that all operands after the first optional operand
   7337   // are also optional. However implementation of some instructions violates
   7338   // this rule (see e.g. flat/global atomic which have hardcoded 'glc' operands).
   7339   //
   7340   // To alleviate this problem, we have to (implicitly) parse extra operands
   7341   // to make sure autogenerated parser of custom operands never hit hardcoded
   7342   // mandatory operands.
   7343 
   7344   for (unsigned i = 0; i < MAX_OPR_LOOKAHEAD; ++i) {
   7345     if (res != MatchOperand_Success ||
   7346         isToken(AsmToken::EndOfStatement))
   7347       break;
   7348 
   7349     trySkipToken(AsmToken::Comma);
   7350     res = parseOptionalOpr(Operands);
   7351   }
   7352 
   7353   return res;
   7354 }
   7355 
   7356 OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands) {
   7357   OperandMatchResultTy res;
   7358   for (const OptionalOperand &Op : AMDGPUOptionalOperandTable) {
   7359     // try to parse any optional operand here
   7360     if (Op.IsBit) {
   7361       res = parseNamedBit(Op.Name, Operands, Op.Type);
   7362     } else if (Op.Type == AMDGPUOperand::ImmTyOModSI) {
   7363       res = parseOModOperand(Operands);
   7364     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstSel ||
   7365                Op.Type == AMDGPUOperand::ImmTySdwaSrc0Sel ||
   7366                Op.Type == AMDGPUOperand::ImmTySdwaSrc1Sel) {
   7367       res = parseSDWASel(Operands, Op.Name, Op.Type);
   7368     } else if (Op.Type == AMDGPUOperand::ImmTySdwaDstUnused) {
   7369       res = parseSDWADstUnused(Operands);
   7370     } else if (Op.Type == AMDGPUOperand::ImmTyOpSel ||
   7371                Op.Type == AMDGPUOperand::ImmTyOpSelHi ||
   7372                Op.Type == AMDGPUOperand::ImmTyNegLo ||
   7373                Op.Type == AMDGPUOperand::ImmTyNegHi) {
   7374       res = parseOperandArrayWithPrefix(Op.Name, Operands, Op.Type,
   7375                                         Op.ConvertResult);
   7376     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
   7377       res = parseDim(Operands);
   7378     } else if (Op.Type == AMDGPUOperand::ImmTyCPol) {
   7379       res = parseCPol(Operands);
   7380     } else {
   7381       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
   7382     }
   7383     if (res != MatchOperand_NoMatch) {
   7384       return res;
   7385     }
   7386   }
   7387   return MatchOperand_NoMatch;
   7388 }
   7389 
   7390 OperandMatchResultTy AMDGPUAsmParser::parseOModOperand(OperandVector &Operands) {
   7391   StringRef Name = getTokenStr();
   7392   if (Name == "mul") {
   7393     return parseIntWithPrefix("mul", Operands,
   7394                               AMDGPUOperand::ImmTyOModSI, ConvertOmodMul);
   7395   }
   7396 
   7397   if (Name == "div") {
   7398     return parseIntWithPrefix("div", Operands,
   7399                               AMDGPUOperand::ImmTyOModSI, ConvertOmodDiv);
   7400   }
   7401 
   7402   return MatchOperand_NoMatch;
   7403 }
   7404 
   7405 void AMDGPUAsmParser::cvtVOP3OpSel(MCInst &Inst, const OperandVector &Operands) {
   7406   cvtVOP3P(Inst, Operands);
   7407 
   7408   int Opc = Inst.getOpcode();
   7409 
   7410   int SrcNum;
   7411   const int Ops[] = { AMDGPU::OpName::src0,
   7412                       AMDGPU::OpName::src1,
   7413                       AMDGPU::OpName::src2 };
   7414   for (SrcNum = 0;
   7415        SrcNum < 3 && AMDGPU::getNamedOperandIdx(Opc, Ops[SrcNum]) != -1;
   7416        ++SrcNum);
   7417   assert(SrcNum > 0);
   7418 
   7419   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
   7420   unsigned OpSel = Inst.getOperand(OpSelIdx).getImm();
   7421 
   7422   if ((OpSel & (1 << SrcNum)) != 0) {
   7423     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers);
   7424     uint32_t ModVal = Inst.getOperand(ModIdx).getImm();
   7425     Inst.getOperand(ModIdx).setImm(ModVal | SISrcMods::DST_OP_SEL);
   7426   }
   7427 }
   7428 
   7429 static bool isRegOrImmWithInputMods(const MCInstrDesc &Desc, unsigned OpNum) {
   7430       // 1. This operand is input modifiers
   7431   return Desc.OpInfo[OpNum].OperandType == AMDGPU::OPERAND_INPUT_MODS
   7432       // 2. This is not last operand
   7433       && Desc.NumOperands > (OpNum + 1)
   7434       // 3. Next operand is register class
   7435       && Desc.OpInfo[OpNum + 1].RegClass != -1
   7436       // 4. Next register is not tied to any other operand
   7437       && Desc.getOperandConstraint(OpNum + 1, MCOI::OperandConstraint::TIED_TO) == -1;
   7438 }
   7439 
   7440 void AMDGPUAsmParser::cvtVOP3Interp(MCInst &Inst, const OperandVector &Operands)
   7441 {
   7442   OptionalImmIndexMap OptionalIdx;
   7443   unsigned Opc = Inst.getOpcode();
   7444 
   7445   unsigned I = 1;
   7446   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
   7447   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
   7448     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   7449   }
   7450 
   7451   for (unsigned E = Operands.size(); I != E; ++I) {
   7452     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
   7453     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
   7454       Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
   7455     } else if (Op.isInterpSlot() ||
   7456                Op.isInterpAttr() ||
   7457                Op.isAttrChan()) {
   7458       Inst.addOperand(MCOperand::createImm(Op.getImm()));
   7459     } else if (Op.isImmModifier()) {
   7460       OptionalIdx[Op.getImmTy()] = I;
   7461     } else {
   7462       llvm_unreachable("unhandled operand type");
   7463     }
   7464   }
   7465 
   7466   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::high) != -1) {
   7467     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyHigh);
   7468   }
   7469 
   7470   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
   7471     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
   7472   }
   7473 
   7474   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
   7475     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
   7476   }
   7477 }
   7478 
   7479 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands,
   7480                               OptionalImmIndexMap &OptionalIdx) {
   7481   unsigned Opc = Inst.getOpcode();
   7482 
   7483   unsigned I = 1;
   7484   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
   7485   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
   7486     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   7487   }
   7488 
   7489   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src0_modifiers) != -1) {
   7490     // This instruction has src modifiers
   7491     for (unsigned E = Operands.size(); I != E; ++I) {
   7492       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
   7493       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
   7494         Op.addRegOrImmWithFPInputModsOperands(Inst, 2);
   7495       } else if (Op.isImmModifier()) {
   7496         OptionalIdx[Op.getImmTy()] = I;
   7497       } else if (Op.isRegOrImm()) {
   7498         Op.addRegOrImmOperands(Inst, 1);
   7499       } else {
   7500         llvm_unreachable("unhandled operand type");
   7501       }
   7502     }
   7503   } else {
   7504     // No src modifiers
   7505     for (unsigned E = Operands.size(); I != E; ++I) {
   7506       AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
   7507       if (Op.isMod()) {
   7508         OptionalIdx[Op.getImmTy()] = I;
   7509       } else {
   7510         Op.addRegOrImmOperands(Inst, 1);
   7511       }
   7512     }
   7513   }
   7514 
   7515   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::clamp) != -1) {
   7516     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI);
   7517   }
   7518 
   7519   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::omod) != -1) {
   7520     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI);
   7521   }
   7522 
   7523   // Special case v_mac_{f16, f32} and v_fmac_{f16, f32} (gfx906/gfx10+):
   7524   // it has src2 register operand that is tied to dst operand
   7525   // we don't allow modifiers for this operand in assembler so src2_modifiers
   7526   // should be 0.
   7527   if (Opc == AMDGPU::V_MAC_F32_e64_gfx6_gfx7 ||
   7528       Opc == AMDGPU::V_MAC_F32_e64_gfx10 ||
   7529       Opc == AMDGPU::V_MAC_F32_e64_vi ||
   7530       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx6_gfx7 ||
   7531       Opc == AMDGPU::V_MAC_LEGACY_F32_e64_gfx10 ||
   7532       Opc == AMDGPU::V_MAC_F16_e64_vi ||
   7533       Opc == AMDGPU::V_FMAC_F64_e64_gfx90a ||
   7534       Opc == AMDGPU::V_FMAC_F32_e64_gfx10 ||
   7535       Opc == AMDGPU::V_FMAC_F32_e64_vi ||
   7536       Opc == AMDGPU::V_FMAC_LEGACY_F32_e64_gfx10 ||
   7537       Opc == AMDGPU::V_FMAC_F16_e64_gfx10) {
   7538     auto it = Inst.begin();
   7539     std::advance(it, AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::src2_modifiers));
   7540     it = Inst.insert(it, MCOperand::createImm(0)); // no modifiers for src2
   7541     ++it;
   7542     // Copy the operand to ensure it's not invalidated when Inst grows.
   7543     Inst.insert(it, MCOperand(Inst.getOperand(0))); // src2 = dst
   7544   }
   7545 }
   7546 
   7547 void AMDGPUAsmParser::cvtVOP3(MCInst &Inst, const OperandVector &Operands) {
   7548   OptionalImmIndexMap OptionalIdx;
   7549   cvtVOP3(Inst, Operands, OptionalIdx);
   7550 }
   7551 
   7552 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands,
   7553                                OptionalImmIndexMap &OptIdx) {
   7554   const int Opc = Inst.getOpcode();
   7555   const MCInstrDesc &Desc = MII.get(Opc);
   7556 
   7557   const bool IsPacked = (Desc.TSFlags & SIInstrFlags::IsPacked) != 0;
   7558 
   7559   if (AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vdst_in) != -1) {
   7560     assert(!IsPacked);
   7561     Inst.addOperand(Inst.getOperand(0));
   7562   }
   7563 
   7564   // FIXME: This is messy. Parse the modifiers as if it was a normal VOP3
   7565   // instruction, and then figure out where to actually put the modifiers
   7566 
   7567   int OpSelIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel);
   7568   if (OpSelIdx != -1) {
   7569     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSel);
   7570   }
   7571 
   7572   int OpSelHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::op_sel_hi);
   7573   if (OpSelHiIdx != -1) {
   7574     int DefaultVal = IsPacked ? -1 : 0;
   7575     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyOpSelHi,
   7576                           DefaultVal);
   7577   }
   7578 
   7579   int NegLoIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_lo);
   7580   if (NegLoIdx != -1) {
   7581     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegLo);
   7582     addOptionalImmOperand(Inst, Operands, OptIdx, AMDGPUOperand::ImmTyNegHi);
   7583   }
   7584 
   7585   const int Ops[] = { AMDGPU::OpName::src0,
   7586                       AMDGPU::OpName::src1,
   7587                       AMDGPU::OpName::src2 };
   7588   const int ModOps[] = { AMDGPU::OpName::src0_modifiers,
   7589                          AMDGPU::OpName::src1_modifiers,
   7590                          AMDGPU::OpName::src2_modifiers };
   7591 
   7592   unsigned OpSel = 0;
   7593   unsigned OpSelHi = 0;
   7594   unsigned NegLo = 0;
   7595   unsigned NegHi = 0;
   7596 
   7597   if (OpSelIdx != -1)
   7598     OpSel = Inst.getOperand(OpSelIdx).getImm();
   7599 
   7600   if (OpSelHiIdx != -1)
   7601     OpSelHi = Inst.getOperand(OpSelHiIdx).getImm();
   7602 
   7603   if (NegLoIdx != -1) {
   7604     int NegHiIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::neg_hi);
   7605     NegLo = Inst.getOperand(NegLoIdx).getImm();
   7606     NegHi = Inst.getOperand(NegHiIdx).getImm();
   7607   }
   7608 
   7609   for (int J = 0; J < 3; ++J) {
   7610     int OpIdx = AMDGPU::getNamedOperandIdx(Opc, Ops[J]);
   7611     if (OpIdx == -1)
   7612       break;
   7613 
   7614     uint32_t ModVal = 0;
   7615 
   7616     if ((OpSel & (1 << J)) != 0)
   7617       ModVal |= SISrcMods::OP_SEL_0;
   7618 
   7619     if ((OpSelHi & (1 << J)) != 0)
   7620       ModVal |= SISrcMods::OP_SEL_1;
   7621 
   7622     if ((NegLo & (1 << J)) != 0)
   7623       ModVal |= SISrcMods::NEG;
   7624 
   7625     if ((NegHi & (1 << J)) != 0)
   7626       ModVal |= SISrcMods::NEG_HI;
   7627 
   7628     int ModIdx = AMDGPU::getNamedOperandIdx(Opc, ModOps[J]);
   7629 
   7630     Inst.getOperand(ModIdx).setImm(Inst.getOperand(ModIdx).getImm() | ModVal);
   7631   }
   7632 }
   7633 
   7634 void AMDGPUAsmParser::cvtVOP3P(MCInst &Inst, const OperandVector &Operands) {
   7635   OptionalImmIndexMap OptIdx;
   7636   cvtVOP3(Inst, Operands, OptIdx);
   7637   cvtVOP3P(Inst, Operands, OptIdx);
   7638 }
   7639 
   7640 //===----------------------------------------------------------------------===//
   7641 // dpp
   7642 //===----------------------------------------------------------------------===//
   7643 
   7644 bool AMDGPUOperand::isDPP8() const {
   7645   return isImmTy(ImmTyDPP8);
   7646 }
   7647 
   7648 bool AMDGPUOperand::isDPPCtrl() const {
   7649   using namespace AMDGPU::DPP;
   7650 
   7651   bool result = isImm() && getImmTy() == ImmTyDppCtrl && isUInt<9>(getImm());
   7652   if (result) {
   7653     int64_t Imm = getImm();
   7654     return (Imm >= DppCtrl::QUAD_PERM_FIRST && Imm <= DppCtrl::QUAD_PERM_LAST) ||
   7655            (Imm >= DppCtrl::ROW_SHL_FIRST && Imm <= DppCtrl::ROW_SHL_LAST) ||
   7656            (Imm >= DppCtrl::ROW_SHR_FIRST && Imm <= DppCtrl::ROW_SHR_LAST) ||
   7657            (Imm >= DppCtrl::ROW_ROR_FIRST && Imm <= DppCtrl::ROW_ROR_LAST) ||
   7658            (Imm == DppCtrl::WAVE_SHL1) ||
   7659            (Imm == DppCtrl::WAVE_ROL1) ||
   7660            (Imm == DppCtrl::WAVE_SHR1) ||
   7661            (Imm == DppCtrl::WAVE_ROR1) ||
   7662            (Imm == DppCtrl::ROW_MIRROR) ||
   7663            (Imm == DppCtrl::ROW_HALF_MIRROR) ||
   7664            (Imm == DppCtrl::BCAST15) ||
   7665            (Imm == DppCtrl::BCAST31) ||
   7666            (Imm >= DppCtrl::ROW_SHARE_FIRST && Imm <= DppCtrl::ROW_SHARE_LAST) ||
   7667            (Imm >= DppCtrl::ROW_XMASK_FIRST && Imm <= DppCtrl::ROW_XMASK_LAST);
   7668   }
   7669   return false;
   7670 }
   7671 
   7672 //===----------------------------------------------------------------------===//
   7673 // mAI
   7674 //===----------------------------------------------------------------------===//
   7675 
   7676 bool AMDGPUOperand::isBLGP() const {
   7677   return isImm() && getImmTy() == ImmTyBLGP && isUInt<3>(getImm());
   7678 }
   7679 
   7680 bool AMDGPUOperand::isCBSZ() const {
   7681   return isImm() && getImmTy() == ImmTyCBSZ && isUInt<3>(getImm());
   7682 }
   7683 
   7684 bool AMDGPUOperand::isABID() const {
   7685   return isImm() && getImmTy() == ImmTyABID && isUInt<4>(getImm());
   7686 }
   7687 
   7688 bool AMDGPUOperand::isS16Imm() const {
   7689   return isImm() && (isInt<16>(getImm()) || isUInt<16>(getImm()));
   7690 }
   7691 
   7692 bool AMDGPUOperand::isU16Imm() const {
   7693   return isImm() && isUInt<16>(getImm());
   7694 }
   7695 
   7696 //===----------------------------------------------------------------------===//
   7697 // dim
   7698 //===----------------------------------------------------------------------===//
   7699 
   7700 bool AMDGPUAsmParser::parseDimId(unsigned &Encoding) {
   7701   // We want to allow "dim:1D" etc.,
   7702   // but the initial 1 is tokenized as an integer.
   7703   std::string Token;
   7704   if (isToken(AsmToken::Integer)) {
   7705     SMLoc Loc = getToken().getEndLoc();
   7706     Token = std::string(getTokenStr());
   7707     lex();
   7708     if (getLoc() != Loc)
   7709       return false;
   7710   }
   7711 
   7712   StringRef Suffix;
   7713   if (!parseId(Suffix))
   7714     return false;
   7715   Token += Suffix;
   7716 
   7717   StringRef DimId = Token;
   7718   if (DimId.startswith("SQ_RSRC_IMG_"))
   7719     DimId = DimId.drop_front(12);
   7720 
   7721   const AMDGPU::MIMGDimInfo *DimInfo = AMDGPU::getMIMGDimInfoByAsmSuffix(DimId);
   7722   if (!DimInfo)
   7723     return false;
   7724 
   7725   Encoding = DimInfo->Encoding;
   7726   return true;
   7727 }
   7728 
   7729 OperandMatchResultTy AMDGPUAsmParser::parseDim(OperandVector &Operands) {
   7730   if (!isGFX10Plus())
   7731     return MatchOperand_NoMatch;
   7732 
   7733   SMLoc S = getLoc();
   7734 
   7735   if (!trySkipId("dim", AsmToken::Colon))
   7736     return MatchOperand_NoMatch;
   7737 
   7738   unsigned Encoding;
   7739   SMLoc Loc = getLoc();
   7740   if (!parseDimId(Encoding)) {
   7741     Error(Loc, "invalid dim value");
   7742     return MatchOperand_ParseFail;
   7743   }
   7744 
   7745   Operands.push_back(AMDGPUOperand::CreateImm(this, Encoding, S,
   7746                                               AMDGPUOperand::ImmTyDim));
   7747   return MatchOperand_Success;
   7748 }
   7749 
   7750 //===----------------------------------------------------------------------===//
   7751 // dpp
   7752 //===----------------------------------------------------------------------===//
   7753 
   7754 OperandMatchResultTy AMDGPUAsmParser::parseDPP8(OperandVector &Operands) {
   7755   SMLoc S = getLoc();
   7756 
   7757   if (!isGFX10Plus() || !trySkipId("dpp8", AsmToken::Colon))
   7758     return MatchOperand_NoMatch;
   7759 
   7760   // dpp8:[%d,%d,%d,%d,%d,%d,%d,%d]
   7761 
   7762   int64_t Sels[8];
   7763 
   7764   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
   7765     return MatchOperand_ParseFail;
   7766 
   7767   for (size_t i = 0; i < 8; ++i) {
   7768     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
   7769       return MatchOperand_ParseFail;
   7770 
   7771     SMLoc Loc = getLoc();
   7772     if (getParser().parseAbsoluteExpression(Sels[i]))
   7773       return MatchOperand_ParseFail;
   7774     if (0 > Sels[i] || 7 < Sels[i]) {
   7775       Error(Loc, "expected a 3-bit value");
   7776       return MatchOperand_ParseFail;
   7777     }
   7778   }
   7779 
   7780   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
   7781     return MatchOperand_ParseFail;
   7782 
   7783   unsigned DPP8 = 0;
   7784   for (size_t i = 0; i < 8; ++i)
   7785     DPP8 |= (Sels[i] << (i * 3));
   7786 
   7787   Operands.push_back(AMDGPUOperand::CreateImm(this, DPP8, S, AMDGPUOperand::ImmTyDPP8));
   7788   return MatchOperand_Success;
   7789 }
   7790 
   7791 bool
   7792 AMDGPUAsmParser::isSupportedDPPCtrl(StringRef Ctrl,
   7793                                     const OperandVector &Operands) {
   7794   if (Ctrl == "row_newbcast")
   7795     return isGFX90A();
   7796 
   7797   if (Ctrl == "row_share" ||
   7798       Ctrl == "row_xmask")
   7799     return isGFX10Plus();
   7800 
   7801   if (Ctrl == "wave_shl" ||
   7802       Ctrl == "wave_shr" ||
   7803       Ctrl == "wave_rol" ||
   7804       Ctrl == "wave_ror" ||
   7805       Ctrl == "row_bcast")
   7806     return isVI() || isGFX9();
   7807 
   7808   return Ctrl == "row_mirror" ||
   7809          Ctrl == "row_half_mirror" ||
   7810          Ctrl == "quad_perm" ||
   7811          Ctrl == "row_shl" ||
   7812          Ctrl == "row_shr" ||
   7813          Ctrl == "row_ror";
   7814 }
   7815 
   7816 int64_t
   7817 AMDGPUAsmParser::parseDPPCtrlPerm() {
   7818   // quad_perm:[%d,%d,%d,%d]
   7819 
   7820   if (!skipToken(AsmToken::LBrac, "expected an opening square bracket"))
   7821     return -1;
   7822 
   7823   int64_t Val = 0;
   7824   for (int i = 0; i < 4; ++i) {
   7825     if (i > 0 && !skipToken(AsmToken::Comma, "expected a comma"))
   7826       return -1;
   7827 
   7828     int64_t Temp;
   7829     SMLoc Loc = getLoc();
   7830     if (getParser().parseAbsoluteExpression(Temp))
   7831       return -1;
   7832     if (Temp < 0 || Temp > 3) {
   7833       Error(Loc, "expected a 2-bit value");
   7834       return -1;
   7835     }
   7836 
   7837     Val += (Temp << i * 2);
   7838   }
   7839 
   7840   if (!skipToken(AsmToken::RBrac, "expected a closing square bracket"))
   7841     return -1;
   7842 
   7843   return Val;
   7844 }
   7845 
   7846 int64_t
   7847 AMDGPUAsmParser::parseDPPCtrlSel(StringRef Ctrl) {
   7848   using namespace AMDGPU::DPP;
   7849 
   7850   // sel:%d
   7851 
   7852   int64_t Val;
   7853   SMLoc Loc = getLoc();
   7854 
   7855   if (getParser().parseAbsoluteExpression(Val))
   7856     return -1;
   7857 
   7858   struct DppCtrlCheck {
   7859     int64_t Ctrl;
   7860     int Lo;
   7861     int Hi;
   7862   };
   7863 
   7864   DppCtrlCheck Check = StringSwitch<DppCtrlCheck>(Ctrl)
   7865     .Case("wave_shl",  {DppCtrl::WAVE_SHL1,       1,  1})
   7866     .Case("wave_rol",  {DppCtrl::WAVE_ROL1,       1,  1})
   7867     .Case("wave_shr",  {DppCtrl::WAVE_SHR1,       1,  1})
   7868     .Case("wave_ror",  {DppCtrl::WAVE_ROR1,       1,  1})
   7869     .Case("row_shl",   {DppCtrl::ROW_SHL0,        1, 15})
   7870     .Case("row_shr",   {DppCtrl::ROW_SHR0,        1, 15})
   7871     .Case("row_ror",   {DppCtrl::ROW_ROR0,        1, 15})
   7872     .Case("row_share", {DppCtrl::ROW_SHARE_FIRST, 0, 15})
   7873     .Case("row_xmask", {DppCtrl::ROW_XMASK_FIRST, 0, 15})
   7874     .Case("row_newbcast", {DppCtrl::ROW_NEWBCAST_FIRST, 0, 15})
   7875     .Default({-1, 0, 0});
   7876 
   7877   bool Valid;
   7878   if (Check.Ctrl == -1) {
   7879     Valid = (Ctrl == "row_bcast" && (Val == 15 || Val == 31));
   7880     Val = (Val == 15)? DppCtrl::BCAST15 : DppCtrl::BCAST31;
   7881   } else {
   7882     Valid = Check.Lo <= Val && Val <= Check.Hi;
   7883     Val = (Check.Lo == Check.Hi) ? Check.Ctrl : (Check.Ctrl | Val);
   7884   }
   7885 
   7886   if (!Valid) {
   7887     Error(Loc, Twine("invalid ", Ctrl) + Twine(" value"));
   7888     return -1;
   7889   }
   7890 
   7891   return Val;
   7892 }
   7893 
   7894 OperandMatchResultTy
   7895 AMDGPUAsmParser::parseDPPCtrl(OperandVector &Operands) {
   7896   using namespace AMDGPU::DPP;
   7897 
   7898   if (!isToken(AsmToken::Identifier) ||
   7899       !isSupportedDPPCtrl(getTokenStr(), Operands))
   7900     return MatchOperand_NoMatch;
   7901 
   7902   SMLoc S = getLoc();
   7903   int64_t Val = -1;
   7904   StringRef Ctrl;
   7905 
   7906   parseId(Ctrl);
   7907 
   7908   if (Ctrl == "row_mirror") {
   7909     Val = DppCtrl::ROW_MIRROR;
   7910   } else if (Ctrl == "row_half_mirror") {
   7911     Val = DppCtrl::ROW_HALF_MIRROR;
   7912   } else {
   7913     if (skipToken(AsmToken::Colon, "expected a colon")) {
   7914       if (Ctrl == "quad_perm") {
   7915         Val = parseDPPCtrlPerm();
   7916       } else {
   7917         Val = parseDPPCtrlSel(Ctrl);
   7918       }
   7919     }
   7920   }
   7921 
   7922   if (Val == -1)
   7923     return MatchOperand_ParseFail;
   7924 
   7925   Operands.push_back(
   7926     AMDGPUOperand::CreateImm(this, Val, S, AMDGPUOperand::ImmTyDppCtrl));
   7927   return MatchOperand_Success;
   7928 }
   7929 
   7930 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultRowMask() const {
   7931   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppRowMask);
   7932 }
   7933 
   7934 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultEndpgmImmOperands() const {
   7935   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyEndpgm);
   7936 }
   7937 
   7938 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBankMask() const {
   7939   return AMDGPUOperand::CreateImm(this, 0xf, SMLoc(), AMDGPUOperand::ImmTyDppBankMask);
   7940 }
   7941 
   7942 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBoundCtrl() const {
   7943   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppBoundCtrl);
   7944 }
   7945 
   7946 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultFI() const {
   7947   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyDppFi);
   7948 }
   7949 
   7950 void AMDGPUAsmParser::cvtDPP(MCInst &Inst, const OperandVector &Operands, bool IsDPP8) {
   7951   OptionalImmIndexMap OptionalIdx;
   7952 
   7953   unsigned I = 1;
   7954   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
   7955   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
   7956     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   7957   }
   7958 
   7959   int Fi = 0;
   7960   for (unsigned E = Operands.size(); I != E; ++I) {
   7961     auto TiedTo = Desc.getOperandConstraint(Inst.getNumOperands(),
   7962                                             MCOI::TIED_TO);
   7963     if (TiedTo != -1) {
   7964       assert((unsigned)TiedTo < Inst.getNumOperands());
   7965       // handle tied old or src2 for MAC instructions
   7966       Inst.addOperand(Inst.getOperand(TiedTo));
   7967     }
   7968     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
   7969     // Add the register arguments
   7970     if (Op.isReg() && validateVccOperand(Op.getReg())) {
   7971       // VOP2b (v_add_u32, v_sub_u32 ...) dpp use "vcc" token.
   7972       // Skip it.
   7973       continue;
   7974     }
   7975 
   7976     if (IsDPP8) {
   7977       if (Op.isDPP8()) {
   7978         Op.addImmOperands(Inst, 1);
   7979       } else if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
   7980         Op.addRegWithFPInputModsOperands(Inst, 2);
   7981       } else if (Op.isFI()) {
   7982         Fi = Op.getImm();
   7983       } else if (Op.isReg()) {
   7984         Op.addRegOperands(Inst, 1);
   7985       } else {
   7986         llvm_unreachable("Invalid operand type");
   7987       }
   7988     } else {
   7989       if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
   7990         Op.addRegWithFPInputModsOperands(Inst, 2);
   7991       } else if (Op.isDPPCtrl()) {
   7992         Op.addImmOperands(Inst, 1);
   7993       } else if (Op.isImm()) {
   7994         // Handle optional arguments
   7995         OptionalIdx[Op.getImmTy()] = I;
   7996       } else {
   7997         llvm_unreachable("Invalid operand type");
   7998       }
   7999     }
   8000   }
   8001 
   8002   if (IsDPP8) {
   8003     using namespace llvm::AMDGPU::DPP;
   8004     Inst.addOperand(MCOperand::createImm(Fi? DPP8_FI_1 : DPP8_FI_0));
   8005   } else {
   8006     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppRowMask, 0xf);
   8007     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBankMask, 0xf);
   8008     addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppBoundCtrl);
   8009     if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::fi) != -1) {
   8010       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyDppFi);
   8011     }
   8012   }
   8013 }
   8014 
   8015 //===----------------------------------------------------------------------===//
   8016 // sdwa
   8017 //===----------------------------------------------------------------------===//
   8018 
   8019 OperandMatchResultTy
   8020 AMDGPUAsmParser::parseSDWASel(OperandVector &Operands, StringRef Prefix,
   8021                               AMDGPUOperand::ImmTy Type) {
   8022   using namespace llvm::AMDGPU::SDWA;
   8023 
   8024   SMLoc S = getLoc();
   8025   StringRef Value;
   8026   OperandMatchResultTy res;
   8027 
   8028   SMLoc StringLoc;
   8029   res = parseStringWithPrefix(Prefix, Value, StringLoc);
   8030   if (res != MatchOperand_Success) {
   8031     return res;
   8032   }
   8033 
   8034   int64_t Int;
   8035   Int = StringSwitch<int64_t>(Value)
   8036         .Case("BYTE_0", SdwaSel::BYTE_0)
   8037         .Case("BYTE_1", SdwaSel::BYTE_1)
   8038         .Case("BYTE_2", SdwaSel::BYTE_2)
   8039         .Case("BYTE_3", SdwaSel::BYTE_3)
   8040         .Case("WORD_0", SdwaSel::WORD_0)
   8041         .Case("WORD_1", SdwaSel::WORD_1)
   8042         .Case("DWORD", SdwaSel::DWORD)
   8043         .Default(0xffffffff);
   8044 
   8045   if (Int == 0xffffffff) {
   8046     Error(StringLoc, "invalid " + Twine(Prefix) + " value");
   8047     return MatchOperand_ParseFail;
   8048   }
   8049 
   8050   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, Type));
   8051   return MatchOperand_Success;
   8052 }
   8053 
   8054 OperandMatchResultTy
   8055 AMDGPUAsmParser::parseSDWADstUnused(OperandVector &Operands) {
   8056   using namespace llvm::AMDGPU::SDWA;
   8057 
   8058   SMLoc S = getLoc();
   8059   StringRef Value;
   8060   OperandMatchResultTy res;
   8061 
   8062   SMLoc StringLoc;
   8063   res = parseStringWithPrefix("dst_unused", Value, StringLoc);
   8064   if (res != MatchOperand_Success) {
   8065     return res;
   8066   }
   8067 
   8068   int64_t Int;
   8069   Int = StringSwitch<int64_t>(Value)
   8070         .Case("UNUSED_PAD", DstUnused::UNUSED_PAD)
   8071         .Case("UNUSED_SEXT", DstUnused::UNUSED_SEXT)
   8072         .Case("UNUSED_PRESERVE", DstUnused::UNUSED_PRESERVE)
   8073         .Default(0xffffffff);
   8074 
   8075   if (Int == 0xffffffff) {
   8076     Error(StringLoc, "invalid dst_unused value");
   8077     return MatchOperand_ParseFail;
   8078   }
   8079 
   8080   Operands.push_back(AMDGPUOperand::CreateImm(this, Int, S, AMDGPUOperand::ImmTySdwaDstUnused));
   8081   return MatchOperand_Success;
   8082 }
   8083 
   8084 void AMDGPUAsmParser::cvtSdwaVOP1(MCInst &Inst, const OperandVector &Operands) {
   8085   cvtSDWA(Inst, Operands, SIInstrFlags::VOP1);
   8086 }
   8087 
   8088 void AMDGPUAsmParser::cvtSdwaVOP2(MCInst &Inst, const OperandVector &Operands) {
   8089   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2);
   8090 }
   8091 
   8092 void AMDGPUAsmParser::cvtSdwaVOP2b(MCInst &Inst, const OperandVector &Operands) {
   8093   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, true, true);
   8094 }
   8095 
   8096 void AMDGPUAsmParser::cvtSdwaVOP2e(MCInst &Inst, const OperandVector &Operands) {
   8097   cvtSDWA(Inst, Operands, SIInstrFlags::VOP2, false, true);
   8098 }
   8099 
   8100 void AMDGPUAsmParser::cvtSdwaVOPC(MCInst &Inst, const OperandVector &Operands) {
   8101   cvtSDWA(Inst, Operands, SIInstrFlags::VOPC, isVI());
   8102 }
   8103 
   8104 void AMDGPUAsmParser::cvtSDWA(MCInst &Inst, const OperandVector &Operands,
   8105                               uint64_t BasicInstType,
   8106                               bool SkipDstVcc,
   8107                               bool SkipSrcVcc) {
   8108   using namespace llvm::AMDGPU::SDWA;
   8109 
   8110   OptionalImmIndexMap OptionalIdx;
   8111   bool SkipVcc = SkipDstVcc || SkipSrcVcc;
   8112   bool SkippedVcc = false;
   8113 
   8114   unsigned I = 1;
   8115   const MCInstrDesc &Desc = MII.get(Inst.getOpcode());
   8116   for (unsigned J = 0; J < Desc.getNumDefs(); ++J) {
   8117     ((AMDGPUOperand &)*Operands[I++]).addRegOperands(Inst, 1);
   8118   }
   8119 
   8120   for (unsigned E = Operands.size(); I != E; ++I) {
   8121     AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[I]);
   8122     if (SkipVcc && !SkippedVcc && Op.isReg() &&
   8123         (Op.getReg() == AMDGPU::VCC || Op.getReg() == AMDGPU::VCC_LO)) {
   8124       // VOP2b (v_add_u32, v_sub_u32 ...) sdwa use "vcc" token as dst.
   8125       // Skip it if it's 2nd (e.g. v_add_i32_sdwa v1, vcc, v2, v3)
   8126       // or 4th (v_addc_u32_sdwa v1, vcc, v2, v3, vcc) operand.
   8127       // Skip VCC only if we didn't skip it on previous iteration.
   8128       // Note that src0 and src1 occupy 2 slots each because of modifiers.
   8129       if (BasicInstType == SIInstrFlags::VOP2 &&
   8130           ((SkipDstVcc && Inst.getNumOperands() == 1) ||
   8131            (SkipSrcVcc && Inst.getNumOperands() == 5))) {
   8132         SkippedVcc = true;
   8133         continue;
   8134       } else if (BasicInstType == SIInstrFlags::VOPC &&
   8135                  Inst.getNumOperands() == 0) {
   8136         SkippedVcc = true;
   8137         continue;
   8138       }
   8139     }
   8140     if (isRegOrImmWithInputMods(Desc, Inst.getNumOperands())) {
   8141       Op.addRegOrImmWithInputModsOperands(Inst, 2);
   8142     } else if (Op.isImm()) {
   8143       // Handle optional arguments
   8144       OptionalIdx[Op.getImmTy()] = I;
   8145     } else {
   8146       llvm_unreachable("Invalid operand type");
   8147     }
   8148     SkippedVcc = false;
   8149   }
   8150 
   8151   if (Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx10 &&
   8152       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_gfx9 &&
   8153       Inst.getOpcode() != AMDGPU::V_NOP_sdwa_vi) {
   8154     // v_nop_sdwa_sdwa_vi/gfx9 has no optional sdwa arguments
   8155     switch (BasicInstType) {
   8156     case SIInstrFlags::VOP1:
   8157       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
   8158       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
   8159         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
   8160       }
   8161       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
   8162       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
   8163       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
   8164       break;
   8165 
   8166     case SIInstrFlags::VOP2:
   8167       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
   8168       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::omod) != -1) {
   8169         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyOModSI, 0);
   8170       }
   8171       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstSel, SdwaSel::DWORD);
   8172       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaDstUnused, DstUnused::UNUSED_PRESERVE);
   8173       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
   8174       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
   8175       break;
   8176 
   8177     case SIInstrFlags::VOPC:
   8178       if (AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::clamp) != -1)
   8179         addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTyClampSI, 0);
   8180       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc0Sel, SdwaSel::DWORD);
   8181       addOptionalImmOperand(Inst, Operands, OptionalIdx, AMDGPUOperand::ImmTySdwaSrc1Sel, SdwaSel::DWORD);
   8182       break;
   8183 
   8184     default:
   8185       llvm_unreachable("Invalid instruction type. Only VOP1, VOP2 and VOPC allowed");
   8186     }
   8187   }
   8188 
   8189   // special case v_mac_{f16, f32}:
   8190   // it has src2 register operand that is tied to dst operand
   8191   if (Inst.getOpcode() == AMDGPU::V_MAC_F32_sdwa_vi ||
   8192       Inst.getOpcode() == AMDGPU::V_MAC_F16_sdwa_vi)  {
   8193     auto it = Inst.begin();
   8194     std::advance(
   8195       it, AMDGPU::getNamedOperandIdx(Inst.getOpcode(), AMDGPU::OpName::src2));
   8196     Inst.insert(it, Inst.getOperand(0)); // src2 = dst
   8197   }
   8198 }
   8199 
   8200 //===----------------------------------------------------------------------===//
   8201 // mAI
   8202 //===----------------------------------------------------------------------===//
   8203 
   8204 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultBLGP() const {
   8205   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyBLGP);
   8206 }
   8207 
   8208 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultCBSZ() const {
   8209   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyCBSZ);
   8210 }
   8211 
   8212 AMDGPUOperand::Ptr AMDGPUAsmParser::defaultABID() const {
   8213   return AMDGPUOperand::CreateImm(this, 0, SMLoc(), AMDGPUOperand::ImmTyABID);
   8214 }
   8215 
   8216 /// Force static initialization.
   8217 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeAMDGPUAsmParser() {
   8218   RegisterMCAsmParser<AMDGPUAsmParser> A(getTheAMDGPUTarget());
   8219   RegisterMCAsmParser<AMDGPUAsmParser> B(getTheGCNTarget());
   8220 }
   8221 
   8222 #define GET_REGISTER_MATCHER
   8223 #define GET_MATCHER_IMPLEMENTATION
   8224 #define GET_MNEMONIC_SPELL_CHECKER
   8225 #define GET_MNEMONIC_CHECKER
   8226 #include "AMDGPUGenAsmMatcher.inc"
   8227 
   8228 // This fuction should be defined after auto-generated include so that we have
   8229 // MatchClassKind enum defined
   8230 unsigned AMDGPUAsmParser::validateTargetOperandClass(MCParsedAsmOperand &Op,
   8231                                                      unsigned Kind) {
   8232   // Tokens like "glc" would be parsed as immediate operands in ParseOperand().
   8233   // But MatchInstructionImpl() expects to meet token and fails to validate
   8234   // operand. This method checks if we are given immediate operand but expect to
   8235   // get corresponding token.
   8236   AMDGPUOperand &Operand = (AMDGPUOperand&)Op;
   8237   switch (Kind) {
   8238   case MCK_addr64:
   8239     return Operand.isAddr64() ? Match_Success : Match_InvalidOperand;
   8240   case MCK_gds:
   8241     return Operand.isGDS() ? Match_Success : Match_InvalidOperand;
   8242   case MCK_lds:
   8243     return Operand.isLDS() ? Match_Success : Match_InvalidOperand;
   8244   case MCK_idxen:
   8245     return Operand.isIdxen() ? Match_Success : Match_InvalidOperand;
   8246   case MCK_offen:
   8247     return Operand.isOffen() ? Match_Success : Match_InvalidOperand;
   8248   case MCK_SSrcB32:
   8249     // When operands have expression values, they will return true for isToken,
   8250     // because it is not possible to distinguish between a token and an
   8251     // expression at parse time. MatchInstructionImpl() will always try to
   8252     // match an operand as a token, when isToken returns true, and when the
   8253     // name of the expression is not a valid token, the match will fail,
   8254     // so we need to handle it here.
   8255     return Operand.isSSrcB32() ? Match_Success : Match_InvalidOperand;
   8256   case MCK_SSrcF32:
   8257     return Operand.isSSrcF32() ? Match_Success : Match_InvalidOperand;
   8258   case MCK_SoppBrTarget:
   8259     return Operand.isSoppBrTarget() ? Match_Success : Match_InvalidOperand;
   8260   case MCK_VReg32OrOff:
   8261     return Operand.isVReg32OrOff() ? Match_Success : Match_InvalidOperand;
   8262   case MCK_InterpSlot:
   8263     return Operand.isInterpSlot() ? Match_Success : Match_InvalidOperand;
   8264   case MCK_Attr:
   8265     return Operand.isInterpAttr() ? Match_Success : Match_InvalidOperand;
   8266   case MCK_AttrChan:
   8267     return Operand.isAttrChan() ? Match_Success : Match_InvalidOperand;
   8268   case MCK_ImmSMEMOffset:
   8269     return Operand.isSMEMOffset() ? Match_Success : Match_InvalidOperand;
   8270   case MCK_SReg_64:
   8271   case MCK_SReg_64_XEXEC:
   8272     // Null is defined as a 32-bit register but
   8273     // it should also be enabled with 64-bit operands.
   8274     // The following code enables it for SReg_64 operands
   8275     // used as source and destination. Remaining source
   8276     // operands are handled in isInlinableImm.
   8277     return Operand.isNull() ? Match_Success : Match_InvalidOperand;
   8278   default:
   8279     return Match_InvalidOperand;
   8280   }
   8281 }
   8282 
   8283 //===----------------------------------------------------------------------===//
   8284 // endpgm
   8285 //===----------------------------------------------------------------------===//
   8286 
   8287 OperandMatchResultTy AMDGPUAsmParser::parseEndpgmOp(OperandVector &Operands) {
   8288   SMLoc S = getLoc();
   8289   int64_t Imm = 0;
   8290 
   8291   if (!parseExpr(Imm)) {
   8292     // The operand is optional, if not present default to 0
   8293     Imm = 0;
   8294   }
   8295 
   8296   if (!isUInt<16>(Imm)) {
   8297     Error(S, "expected a 16-bit value");
   8298     return MatchOperand_ParseFail;
   8299   }
   8300 
   8301   Operands.push_back(
   8302       AMDGPUOperand::CreateImm(this, Imm, S, AMDGPUOperand::ImmTyEndpgm));
   8303   return MatchOperand_Success;
   8304 }
   8305 
   8306 bool AMDGPUOperand::isEndpgm() const { return isImmTy(ImmTyEndpgm); }
   8307