Home | History | Annotate | Line # | Download | only in AMDGPU
      1 //===-- SIDefines.h - SI Helper Macros ----------------------*- C++ -*-===//
      2 //
      3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
      4 // See https://llvm.org/LICENSE.txt for license information.
      5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
      6 //
      7 /// \file
      8 //===----------------------------------------------------------------------===//
      9 
     10 #include "llvm/MC/MCInstrDesc.h"
     11 
     12 #ifndef LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H
     13 #define LLVM_LIB_TARGET_AMDGPU_SIDEFINES_H
     14 
     15 namespace llvm {
     16 
     17 namespace SIInstrFlags {
     18 // This needs to be kept in sync with the field bits in InstSI.
     19 enum : uint64_t {
     20   // Low bits - basic encoding information.
     21   SALU = 1 << 0,
     22   VALU = 1 << 1,
     23 
     24   // SALU instruction formats.
     25   SOP1 = 1 << 2,
     26   SOP2 = 1 << 3,
     27   SOPC = 1 << 4,
     28   SOPK = 1 << 5,
     29   SOPP = 1 << 6,
     30 
     31   // VALU instruction formats.
     32   VOP1 = 1 << 7,
     33   VOP2 = 1 << 8,
     34   VOPC = 1 << 9,
     35 
     36   // TODO: Should this be spilt into VOP3 a and b?
     37   VOP3 = 1 << 10,
     38   VOP3P = 1 << 12,
     39 
     40   VINTRP = 1 << 13,
     41   SDWA = 1 << 14,
     42   DPP = 1 << 15,
     43   TRANS = 1 << 16,
     44 
     45   // Memory instruction formats.
     46   MUBUF = 1 << 17,
     47   MTBUF = 1 << 18,
     48   SMRD = 1 << 19,
     49   MIMG = 1 << 20,
     50   EXP = 1 << 21,
     51   FLAT = 1 << 22,
     52   DS = 1 << 23,
     53 
     54   // Pseudo instruction formats.
     55   VGPRSpill = 1 << 24,
     56   SGPRSpill = 1 << 25,
     57 
     58   // High bits - other information.
     59   VM_CNT = UINT64_C(1) << 32,
     60   EXP_CNT = UINT64_C(1) << 33,
     61   LGKM_CNT = UINT64_C(1) << 34,
     62 
     63   WQM = UINT64_C(1) << 35,
     64   DisableWQM = UINT64_C(1) << 36,
     65   Gather4 = UINT64_C(1) << 37,
     66   SOPK_ZEXT = UINT64_C(1) << 38,
     67   SCALAR_STORE = UINT64_C(1) << 39,
     68   FIXED_SIZE = UINT64_C(1) << 40,
     69   VOPAsmPrefer32Bit = UINT64_C(1) << 41,
     70   VOP3_OPSEL = UINT64_C(1) << 42,
     71   maybeAtomic = UINT64_C(1) << 43,
     72   renamedInGFX9 = UINT64_C(1) << 44,
     73 
     74   // Is a clamp on FP type.
     75   FPClamp = UINT64_C(1) << 45,
     76 
     77   // Is an integer clamp
     78   IntClamp = UINT64_C(1) << 46,
     79 
     80   // Clamps lo component of register.
     81   ClampLo = UINT64_C(1) << 47,
     82 
     83   // Clamps hi component of register.
     84   // ClampLo and ClampHi set for packed clamp.
     85   ClampHi = UINT64_C(1) << 48,
     86 
     87   // Is a packed VOP3P instruction.
     88   IsPacked = UINT64_C(1) << 49,
     89 
     90   // Is a D16 buffer instruction.
     91   D16Buf = UINT64_C(1) << 50,
     92 
     93   // FLAT instruction accesses FLAT_GLBL segment.
     94   FlatGlobal = UINT64_C(1) << 51,
     95 
     96   // Uses floating point double precision rounding mode
     97   FPDPRounding = UINT64_C(1) << 52,
     98 
     99   // Instruction is FP atomic.
    100   FPAtomic = UINT64_C(1) << 53,
    101 
    102   // Is a MFMA instruction.
    103   IsMAI = UINT64_C(1) << 54,
    104 
    105   // Is a DOT instruction.
    106   IsDOT = UINT64_C(1) << 55,
    107 
    108   // FLAT instruction accesses FLAT_SCRATCH segment.
    109   FlatScratch = UINT64_C(1) << 56,
    110 
    111   // Atomic without return.
    112   IsAtomicNoRet = UINT64_C(1) << 57,
    113 
    114   // Atomic with return.
    115   IsAtomicRet = UINT64_C(1) << 58
    116 };
    117 
    118 // v_cmp_class_* etc. use a 10-bit mask for what operation is checked.
    119 // The result is true if any of these tests are true.
    120 enum ClassFlags : unsigned {
    121   S_NAN = 1 << 0,        // Signaling NaN
    122   Q_NAN = 1 << 1,        // Quiet NaN
    123   N_INFINITY = 1 << 2,   // Negative infinity
    124   N_NORMAL = 1 << 3,     // Negative normal
    125   N_SUBNORMAL = 1 << 4,  // Negative subnormal
    126   N_ZERO = 1 << 5,       // Negative zero
    127   P_ZERO = 1 << 6,       // Positive zero
    128   P_SUBNORMAL = 1 << 7,  // Positive subnormal
    129   P_NORMAL = 1 << 8,     // Positive normal
    130   P_INFINITY = 1 << 9    // Positive infinity
    131 };
    132 }
    133 
    134 namespace AMDGPU {
    135   enum OperandType : unsigned {
    136     /// Operands with register or 32-bit immediate
    137     OPERAND_REG_IMM_INT32 = MCOI::OPERAND_FIRST_TARGET,
    138     OPERAND_REG_IMM_INT64,
    139     OPERAND_REG_IMM_INT16,
    140     OPERAND_REG_IMM_FP32,
    141     OPERAND_REG_IMM_FP64,
    142     OPERAND_REG_IMM_FP16,
    143     OPERAND_REG_IMM_V2FP16,
    144     OPERAND_REG_IMM_V2INT16,
    145     OPERAND_REG_IMM_V2INT32,
    146     OPERAND_REG_IMM_V2FP32,
    147 
    148     /// Operands with register or inline constant
    149     OPERAND_REG_INLINE_C_INT16,
    150     OPERAND_REG_INLINE_C_INT32,
    151     OPERAND_REG_INLINE_C_INT64,
    152     OPERAND_REG_INLINE_C_FP16,
    153     OPERAND_REG_INLINE_C_FP32,
    154     OPERAND_REG_INLINE_C_FP64,
    155     OPERAND_REG_INLINE_C_V2INT16,
    156     OPERAND_REG_INLINE_C_V2FP16,
    157     OPERAND_REG_INLINE_C_V2INT32,
    158     OPERAND_REG_INLINE_C_V2FP32,
    159 
    160     /// Operands with an AccVGPR register or inline constant
    161     OPERAND_REG_INLINE_AC_INT16,
    162     OPERAND_REG_INLINE_AC_INT32,
    163     OPERAND_REG_INLINE_AC_FP16,
    164     OPERAND_REG_INLINE_AC_FP32,
    165     OPERAND_REG_INLINE_AC_FP64,
    166     OPERAND_REG_INLINE_AC_V2INT16,
    167     OPERAND_REG_INLINE_AC_V2FP16,
    168     OPERAND_REG_INLINE_AC_V2INT32,
    169     OPERAND_REG_INLINE_AC_V2FP32,
    170 
    171     OPERAND_REG_IMM_FIRST = OPERAND_REG_IMM_INT32,
    172     OPERAND_REG_IMM_LAST = OPERAND_REG_IMM_V2FP32,
    173 
    174     OPERAND_REG_INLINE_C_FIRST = OPERAND_REG_INLINE_C_INT16,
    175     OPERAND_REG_INLINE_C_LAST = OPERAND_REG_INLINE_AC_V2FP32,
    176 
    177     OPERAND_REG_INLINE_AC_FIRST = OPERAND_REG_INLINE_AC_INT16,
    178     OPERAND_REG_INLINE_AC_LAST = OPERAND_REG_INLINE_AC_V2FP32,
    179 
    180     OPERAND_SRC_FIRST = OPERAND_REG_IMM_INT32,
    181     OPERAND_SRC_LAST = OPERAND_REG_INLINE_C_LAST,
    182 
    183     // Operand for source modifiers for VOP instructions
    184     OPERAND_INPUT_MODS,
    185 
    186     // Operand for SDWA instructions
    187     OPERAND_SDWA_VOPC_DST,
    188 
    189     /// Operand with 32-bit immediate that uses the constant bus.
    190     OPERAND_KIMM32,
    191     OPERAND_KIMM16
    192   };
    193 }
    194 
    195 // Input operand modifiers bit-masks
    196 // NEG and SEXT share same bit-mask because they can't be set simultaneously.
    197 namespace SISrcMods {
    198   enum : unsigned {
    199    NEG = 1 << 0,   // Floating-point negate modifier
    200    ABS = 1 << 1,   // Floating-point absolute modifier
    201    SEXT = 1 << 0,  // Integer sign-extend modifier
    202    NEG_HI = ABS,   // Floating-point negate high packed component modifier.
    203    OP_SEL_0 = 1 << 2,
    204    OP_SEL_1 = 1 << 3,
    205    DST_OP_SEL = 1 << 3 // VOP3 dst op_sel (share mask with OP_SEL_1)
    206   };
    207 }
    208 
    209 namespace SIOutMods {
    210   enum : unsigned {
    211     NONE = 0,
    212     MUL2 = 1,
    213     MUL4 = 2,
    214     DIV2 = 3
    215   };
    216 }
    217 
    218 namespace AMDGPU {
    219 namespace VGPRIndexMode {
    220 
    221 enum Id : unsigned { // id of symbolic names
    222   ID_SRC0 = 0,
    223   ID_SRC1,
    224   ID_SRC2,
    225   ID_DST,
    226 
    227   ID_MIN = ID_SRC0,
    228   ID_MAX = ID_DST
    229 };
    230 
    231 enum EncBits : unsigned {
    232   OFF = 0,
    233   SRC0_ENABLE = 1 << ID_SRC0,
    234   SRC1_ENABLE = 1 << ID_SRC1,
    235   SRC2_ENABLE = 1 << ID_SRC2,
    236   DST_ENABLE = 1 << ID_DST,
    237   ENABLE_MASK = SRC0_ENABLE | SRC1_ENABLE | SRC2_ENABLE | DST_ENABLE,
    238   UNDEF = 0xFFFF
    239 };
    240 
    241 } // namespace VGPRIndexMode
    242 } // namespace AMDGPU
    243 
    244 namespace AMDGPUAsmVariants {
    245   enum : unsigned {
    246     DEFAULT = 0,
    247     VOP3 = 1,
    248     SDWA = 2,
    249     SDWA9 = 3,
    250     DPP = 4
    251   };
    252 }
    253 
    254 namespace AMDGPU {
    255 namespace EncValues { // Encoding values of enum9/8/7 operands
    256 
    257 enum : unsigned {
    258   SGPR_MIN = 0,
    259   SGPR_MAX_SI = 101,
    260   SGPR_MAX_GFX10 = 105,
    261   TTMP_VI_MIN = 112,
    262   TTMP_VI_MAX = 123,
    263   TTMP_GFX9PLUS_MIN = 108,
    264   TTMP_GFX9PLUS_MAX = 123,
    265   INLINE_INTEGER_C_MIN = 128,
    266   INLINE_INTEGER_C_POSITIVE_MAX = 192, // 64
    267   INLINE_INTEGER_C_MAX = 208,
    268   INLINE_FLOATING_C_MIN = 240,
    269   INLINE_FLOATING_C_MAX = 248,
    270   LITERAL_CONST = 255,
    271   VGPR_MIN = 256,
    272   VGPR_MAX = 511
    273 };
    274 
    275 } // namespace EncValues
    276 } // namespace AMDGPU
    277 
    278 namespace AMDGPU {
    279 namespace CPol {
    280 
    281 enum CPol {
    282   GLC = 1,
    283   SLC = 2,
    284   DLC = 4,
    285   SCC = 16,
    286   ALL = GLC | SLC | DLC | SCC
    287 };
    288 
    289 } // namespace CPol
    290 
    291 namespace SendMsg { // Encoding of SIMM16 used in s_sendmsg* insns.
    292 
    293 enum Id { // Message ID, width(4) [3:0].
    294   ID_UNKNOWN_ = -1,
    295   ID_INTERRUPT = 1,
    296   ID_GS = 2,
    297   ID_GS_DONE = 3,
    298   ID_SAVEWAVE = 4,           // added in GFX8
    299   ID_STALL_WAVE_GEN = 5,     // added in GFX9
    300   ID_HALT_WAVES = 6,         // added in GFX9
    301   ID_ORDERED_PS_DONE = 7,    // added in GFX9
    302   ID_EARLY_PRIM_DEALLOC = 8, // added in GFX9, removed in GFX10
    303   ID_GS_ALLOC_REQ = 9,       // added in GFX9
    304   ID_GET_DOORBELL = 10,      // added in GFX9
    305   ID_GET_DDID = 11,          // added in GFX10
    306   ID_SYSMSG = 15,
    307   ID_GAPS_LAST_, // Indicate that sequence has gaps.
    308   ID_GAPS_FIRST_ = ID_INTERRUPT,
    309   ID_SHIFT_ = 0,
    310   ID_WIDTH_ = 4,
    311   ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
    312 };
    313 
    314 enum Op { // Both GS and SYS operation IDs.
    315   OP_UNKNOWN_ = -1,
    316   OP_SHIFT_ = 4,
    317   OP_NONE_ = 0,
    318   // Bits used for operation encoding
    319   OP_WIDTH_ = 3,
    320   OP_MASK_ = (((1 << OP_WIDTH_) - 1) << OP_SHIFT_),
    321   // GS operations are encoded in bits 5:4
    322   OP_GS_NOP = 0,
    323   OP_GS_CUT = 1,
    324   OP_GS_EMIT = 2,
    325   OP_GS_EMIT_CUT = 3,
    326   OP_GS_LAST_,
    327   OP_GS_FIRST_ = OP_GS_NOP,
    328   // SYS operations are encoded in bits 6:4
    329   OP_SYS_ECC_ERR_INTERRUPT = 1,
    330   OP_SYS_REG_RD = 2,
    331   OP_SYS_HOST_TRAP_ACK = 3,
    332   OP_SYS_TTRACE_PC = 4,
    333   OP_SYS_LAST_,
    334   OP_SYS_FIRST_ = OP_SYS_ECC_ERR_INTERRUPT,
    335 };
    336 
    337 enum StreamId : unsigned { // Stream ID, (2) [9:8].
    338   STREAM_ID_NONE_ = 0,
    339   STREAM_ID_DEFAULT_ = 0,
    340   STREAM_ID_LAST_ = 4,
    341   STREAM_ID_FIRST_ = STREAM_ID_DEFAULT_,
    342   STREAM_ID_SHIFT_ = 8,
    343   STREAM_ID_WIDTH_=  2,
    344   STREAM_ID_MASK_ = (((1 << STREAM_ID_WIDTH_) - 1) << STREAM_ID_SHIFT_)
    345 };
    346 
    347 } // namespace SendMsg
    348 
    349 namespace Hwreg { // Encoding of SIMM16 used in s_setreg/getreg* insns.
    350 
    351 enum Id { // HwRegCode, (6) [5:0]
    352   ID_UNKNOWN_ = -1,
    353   ID_SYMBOLIC_FIRST_ = 1, // There are corresponding symbolic names defined.
    354   ID_MODE = 1,
    355   ID_STATUS = 2,
    356   ID_TRAPSTS = 3,
    357   ID_HW_ID = 4,
    358   ID_GPR_ALLOC = 5,
    359   ID_LDS_ALLOC = 6,
    360   ID_IB_STS = 7,
    361   ID_MEM_BASES = 15,
    362   ID_SYMBOLIC_FIRST_GFX9_ = ID_MEM_BASES,
    363   ID_TBA_LO = 16,
    364   ID_SYMBOLIC_FIRST_GFX10_ = ID_TBA_LO,
    365   ID_TBA_HI = 17,
    366   ID_TMA_LO = 18,
    367   ID_TMA_HI = 19,
    368   ID_FLAT_SCR_LO = 20,
    369   ID_FLAT_SCR_HI = 21,
    370   ID_XNACK_MASK = 22,
    371   ID_POPS_PACKER = 25,
    372   ID_SHADER_CYCLES = 29,
    373   ID_SYMBOLIC_FIRST_GFX1030_ = ID_SHADER_CYCLES,
    374   ID_SYMBOLIC_LAST_ = 30,
    375   ID_SHIFT_ = 0,
    376   ID_WIDTH_ = 6,
    377   ID_MASK_ = (((1 << ID_WIDTH_) - 1) << ID_SHIFT_)
    378 };
    379 
    380 enum Offset : unsigned { // Offset, (5) [10:6]
    381   OFFSET_DEFAULT_ = 0,
    382   OFFSET_SHIFT_ = 6,
    383   OFFSET_WIDTH_ = 5,
    384   OFFSET_MASK_ = (((1 << OFFSET_WIDTH_) - 1) << OFFSET_SHIFT_),
    385 
    386   OFFSET_MEM_VIOL = 8,
    387 
    388   OFFSET_SRC_SHARED_BASE = 16,
    389   OFFSET_SRC_PRIVATE_BASE = 0
    390 };
    391 
    392 enum WidthMinusOne : unsigned { // WidthMinusOne, (5) [15:11]
    393   WIDTH_M1_DEFAULT_ = 31,
    394   WIDTH_M1_SHIFT_ = 11,
    395   WIDTH_M1_WIDTH_ = 5,
    396   WIDTH_M1_MASK_ = (((1 << WIDTH_M1_WIDTH_) - 1) << WIDTH_M1_SHIFT_),
    397 
    398   WIDTH_M1_SRC_SHARED_BASE = 15,
    399   WIDTH_M1_SRC_PRIVATE_BASE = 15
    400 };
    401 
    402 // Some values from WidthMinusOne mapped into Width domain.
    403 enum Width : unsigned {
    404   WIDTH_DEFAULT_ = WIDTH_M1_DEFAULT_ + 1,
    405 };
    406 
    407 enum ModeRegisterMasks : uint32_t {
    408   FP_ROUND_MASK = 0xf << 0,  // Bits 0..3
    409   FP_DENORM_MASK = 0xf << 4, // Bits 4..7
    410   DX10_CLAMP_MASK = 1 << 8,
    411   IEEE_MODE_MASK = 1 << 9,
    412   LOD_CLAMP_MASK = 1 << 10,
    413   DEBUG_MASK = 1 << 11,
    414 
    415   // EXCP_EN fields.
    416   EXCP_EN_INVALID_MASK = 1 << 12,
    417   EXCP_EN_INPUT_DENORMAL_MASK = 1 << 13,
    418   EXCP_EN_FLOAT_DIV0_MASK = 1 << 14,
    419   EXCP_EN_OVERFLOW_MASK = 1 << 15,
    420   EXCP_EN_UNDERFLOW_MASK = 1 << 16,
    421   EXCP_EN_INEXACT_MASK = 1 << 17,
    422   EXCP_EN_INT_DIV0_MASK = 1 << 18,
    423 
    424   GPR_IDX_EN_MASK = 1 << 27,
    425   VSKIP_MASK = 1 << 28,
    426   CSP_MASK = 0x7u << 29 // Bits 29..31
    427 };
    428 
    429 } // namespace Hwreg
    430 
    431 namespace MTBUFFormat {
    432 
    433 enum DataFormat : int64_t {
    434   DFMT_INVALID = 0,
    435   DFMT_8,
    436   DFMT_16,
    437   DFMT_8_8,
    438   DFMT_32,
    439   DFMT_16_16,
    440   DFMT_10_11_11,
    441   DFMT_11_11_10,
    442   DFMT_10_10_10_2,
    443   DFMT_2_10_10_10,
    444   DFMT_8_8_8_8,
    445   DFMT_32_32,
    446   DFMT_16_16_16_16,
    447   DFMT_32_32_32,
    448   DFMT_32_32_32_32,
    449   DFMT_RESERVED_15,
    450 
    451   DFMT_MIN = DFMT_INVALID,
    452   DFMT_MAX = DFMT_RESERVED_15,
    453 
    454   DFMT_UNDEF = -1,
    455   DFMT_DEFAULT = DFMT_8,
    456 
    457   DFMT_SHIFT = 0,
    458   DFMT_MASK = 0xF
    459 };
    460 
    461 enum NumFormat : int64_t {
    462   NFMT_UNORM = 0,
    463   NFMT_SNORM,
    464   NFMT_USCALED,
    465   NFMT_SSCALED,
    466   NFMT_UINT,
    467   NFMT_SINT,
    468   NFMT_RESERVED_6,                    // VI and GFX9
    469   NFMT_SNORM_OGL = NFMT_RESERVED_6,   // SI and CI only
    470   NFMT_FLOAT,
    471 
    472   NFMT_MIN = NFMT_UNORM,
    473   NFMT_MAX = NFMT_FLOAT,
    474 
    475   NFMT_UNDEF = -1,
    476   NFMT_DEFAULT = NFMT_UNORM,
    477 
    478   NFMT_SHIFT = 4,
    479   NFMT_MASK = 7
    480 };
    481 
    482 enum MergedFormat : int64_t {
    483   DFMT_NFMT_UNDEF = -1,
    484   DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) |
    485                       ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT),
    486 
    487 
    488   DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT),
    489 
    490   DFMT_NFMT_MAX = DFMT_NFMT_MASK
    491 };
    492 
    493 enum UnifiedFormat : int64_t {
    494   UFMT_INVALID = 0,
    495 
    496   UFMT_8_UNORM,
    497   UFMT_8_SNORM,
    498   UFMT_8_USCALED,
    499   UFMT_8_SSCALED,
    500   UFMT_8_UINT,
    501   UFMT_8_SINT,
    502 
    503   UFMT_16_UNORM,
    504   UFMT_16_SNORM,
    505   UFMT_16_USCALED,
    506   UFMT_16_SSCALED,
    507   UFMT_16_UINT,
    508   UFMT_16_SINT,
    509   UFMT_16_FLOAT,
    510 
    511   UFMT_8_8_UNORM,
    512   UFMT_8_8_SNORM,
    513   UFMT_8_8_USCALED,
    514   UFMT_8_8_SSCALED,
    515   UFMT_8_8_UINT,
    516   UFMT_8_8_SINT,
    517 
    518   UFMT_32_UINT,
    519   UFMT_32_SINT,
    520   UFMT_32_FLOAT,
    521 
    522   UFMT_16_16_UNORM,
    523   UFMT_16_16_SNORM,
    524   UFMT_16_16_USCALED,
    525   UFMT_16_16_SSCALED,
    526   UFMT_16_16_UINT,
    527   UFMT_16_16_SINT,
    528   UFMT_16_16_FLOAT,
    529 
    530   UFMT_10_11_11_UNORM,
    531   UFMT_10_11_11_SNORM,
    532   UFMT_10_11_11_USCALED,
    533   UFMT_10_11_11_SSCALED,
    534   UFMT_10_11_11_UINT,
    535   UFMT_10_11_11_SINT,
    536   UFMT_10_11_11_FLOAT,
    537 
    538   UFMT_11_11_10_UNORM,
    539   UFMT_11_11_10_SNORM,
    540   UFMT_11_11_10_USCALED,
    541   UFMT_11_11_10_SSCALED,
    542   UFMT_11_11_10_UINT,
    543   UFMT_11_11_10_SINT,
    544   UFMT_11_11_10_FLOAT,
    545 
    546   UFMT_10_10_10_2_UNORM,
    547   UFMT_10_10_10_2_SNORM,
    548   UFMT_10_10_10_2_USCALED,
    549   UFMT_10_10_10_2_SSCALED,
    550   UFMT_10_10_10_2_UINT,
    551   UFMT_10_10_10_2_SINT,
    552 
    553   UFMT_2_10_10_10_UNORM,
    554   UFMT_2_10_10_10_SNORM,
    555   UFMT_2_10_10_10_USCALED,
    556   UFMT_2_10_10_10_SSCALED,
    557   UFMT_2_10_10_10_UINT,
    558   UFMT_2_10_10_10_SINT,
    559 
    560   UFMT_8_8_8_8_UNORM,
    561   UFMT_8_8_8_8_SNORM,
    562   UFMT_8_8_8_8_USCALED,
    563   UFMT_8_8_8_8_SSCALED,
    564   UFMT_8_8_8_8_UINT,
    565   UFMT_8_8_8_8_SINT,
    566 
    567   UFMT_32_32_UINT,
    568   UFMT_32_32_SINT,
    569   UFMT_32_32_FLOAT,
    570 
    571   UFMT_16_16_16_16_UNORM,
    572   UFMT_16_16_16_16_SNORM,
    573   UFMT_16_16_16_16_USCALED,
    574   UFMT_16_16_16_16_SSCALED,
    575   UFMT_16_16_16_16_UINT,
    576   UFMT_16_16_16_16_SINT,
    577   UFMT_16_16_16_16_FLOAT,
    578 
    579   UFMT_32_32_32_UINT,
    580   UFMT_32_32_32_SINT,
    581   UFMT_32_32_32_FLOAT,
    582   UFMT_32_32_32_32_UINT,
    583   UFMT_32_32_32_32_SINT,
    584   UFMT_32_32_32_32_FLOAT,
    585 
    586   UFMT_FIRST = UFMT_INVALID,
    587   UFMT_LAST = UFMT_32_32_32_32_FLOAT,
    588 
    589   UFMT_MAX = 127,
    590 
    591   UFMT_UNDEF = -1,
    592   UFMT_DEFAULT = UFMT_8_UNORM
    593 };
    594 
    595 } // namespace MTBUFFormat
    596 
    597 namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
    598 
    599 enum Id : unsigned { // id of symbolic names
    600   ID_QUAD_PERM = 0,
    601   ID_BITMASK_PERM,
    602   ID_SWAP,
    603   ID_REVERSE,
    604   ID_BROADCAST
    605 };
    606 
    607 enum EncBits : unsigned {
    608 
    609   // swizzle mode encodings
    610 
    611   QUAD_PERM_ENC         = 0x8000,
    612   QUAD_PERM_ENC_MASK    = 0xFF00,
    613 
    614   BITMASK_PERM_ENC      = 0x0000,
    615   BITMASK_PERM_ENC_MASK = 0x8000,
    616 
    617   // QUAD_PERM encodings
    618 
    619   LANE_MASK             = 0x3,
    620   LANE_MAX              = LANE_MASK,
    621   LANE_SHIFT            = 2,
    622   LANE_NUM              = 4,
    623 
    624   // BITMASK_PERM encodings
    625 
    626   BITMASK_MASK          = 0x1F,
    627   BITMASK_MAX           = BITMASK_MASK,
    628   BITMASK_WIDTH         = 5,
    629 
    630   BITMASK_AND_SHIFT     = 0,
    631   BITMASK_OR_SHIFT      = 5,
    632   BITMASK_XOR_SHIFT     = 10
    633 };
    634 
    635 } // namespace Swizzle
    636 
    637 namespace SDWA {
    638 
    639 enum SdwaSel : unsigned {
    640   BYTE_0 = 0,
    641   BYTE_1 = 1,
    642   BYTE_2 = 2,
    643   BYTE_3 = 3,
    644   WORD_0 = 4,
    645   WORD_1 = 5,
    646   DWORD = 6,
    647 };
    648 
    649 enum DstUnused : unsigned {
    650   UNUSED_PAD = 0,
    651   UNUSED_SEXT = 1,
    652   UNUSED_PRESERVE = 2,
    653 };
    654 
    655 enum SDWA9EncValues : unsigned {
    656   SRC_SGPR_MASK = 0x100,
    657   SRC_VGPR_MASK = 0xFF,
    658   VOPC_DST_VCC_MASK = 0x80,
    659   VOPC_DST_SGPR_MASK = 0x7F,
    660 
    661   SRC_VGPR_MIN = 0,
    662   SRC_VGPR_MAX = 255,
    663   SRC_SGPR_MIN = 256,
    664   SRC_SGPR_MAX_SI = 357,
    665   SRC_SGPR_MAX_GFX10 = 361,
    666   SRC_TTMP_MIN = 364,
    667   SRC_TTMP_MAX = 379,
    668 };
    669 
    670 } // namespace SDWA
    671 
    672 namespace DPP {
    673 
    674 // clang-format off
    675 enum DppCtrl : unsigned {
    676   QUAD_PERM_FIRST   = 0,
    677   QUAD_PERM_ID      = 0xE4, // identity permutation
    678   QUAD_PERM_LAST    = 0xFF,
    679   DPP_UNUSED1       = 0x100,
    680   ROW_SHL0          = 0x100,
    681   ROW_SHL_FIRST     = 0x101,
    682   ROW_SHL_LAST      = 0x10F,
    683   DPP_UNUSED2       = 0x110,
    684   ROW_SHR0          = 0x110,
    685   ROW_SHR_FIRST     = 0x111,
    686   ROW_SHR_LAST      = 0x11F,
    687   DPP_UNUSED3       = 0x120,
    688   ROW_ROR0          = 0x120,
    689   ROW_ROR_FIRST     = 0x121,
    690   ROW_ROR_LAST      = 0x12F,
    691   WAVE_SHL1         = 0x130,
    692   DPP_UNUSED4_FIRST = 0x131,
    693   DPP_UNUSED4_LAST  = 0x133,
    694   WAVE_ROL1         = 0x134,
    695   DPP_UNUSED5_FIRST = 0x135,
    696   DPP_UNUSED5_LAST  = 0x137,
    697   WAVE_SHR1         = 0x138,
    698   DPP_UNUSED6_FIRST = 0x139,
    699   DPP_UNUSED6_LAST  = 0x13B,
    700   WAVE_ROR1         = 0x13C,
    701   DPP_UNUSED7_FIRST = 0x13D,
    702   DPP_UNUSED7_LAST  = 0x13F,
    703   ROW_MIRROR        = 0x140,
    704   ROW_HALF_MIRROR   = 0x141,
    705   BCAST15           = 0x142,
    706   BCAST31           = 0x143,
    707   DPP_UNUSED8_FIRST = 0x144,
    708   DPP_UNUSED8_LAST  = 0x14F,
    709   ROW_NEWBCAST_FIRST= 0x150,
    710   ROW_NEWBCAST_LAST = 0x15F,
    711   ROW_SHARE0        = 0x150,
    712   ROW_SHARE_FIRST   = 0x150,
    713   ROW_SHARE_LAST    = 0x15F,
    714   ROW_XMASK0        = 0x160,
    715   ROW_XMASK_FIRST   = 0x160,
    716   ROW_XMASK_LAST    = 0x16F,
    717   DPP_LAST          = ROW_XMASK_LAST
    718 };
    719 // clang-format on
    720 
    721 enum DppFiMode {
    722   DPP_FI_0  = 0,
    723   DPP_FI_1  = 1,
    724   DPP8_FI_0 = 0xE9,
    725   DPP8_FI_1 = 0xEA,
    726 };
    727 
    728 } // namespace DPP
    729 
    730 namespace Exp {
    731 
    732 enum Target : unsigned {
    733   ET_MRT0 = 0,
    734   ET_MRT7 = 7,
    735   ET_MRTZ = 8,
    736   ET_NULL = 9,
    737   ET_POS0 = 12,
    738   ET_POS3 = 15,
    739   ET_POS4 = 16,          // GFX10+
    740   ET_POS_LAST = ET_POS4, // Highest pos used on any subtarget
    741   ET_PRIM = 20,          // GFX10+
    742   ET_PARAM0 = 32,
    743   ET_PARAM31 = 63,
    744 
    745   ET_NULL_MAX_IDX = 0,
    746   ET_MRTZ_MAX_IDX = 0,
    747   ET_PRIM_MAX_IDX = 0,
    748   ET_MRT_MAX_IDX = 7,
    749   ET_POS_MAX_IDX = 4,
    750   ET_PARAM_MAX_IDX = 31,
    751 
    752   ET_INVALID = 255,
    753 };
    754 
    755 } // namespace Exp
    756 
    757 namespace VOP3PEncoding {
    758 
    759 enum OpSel : uint64_t {
    760   OP_SEL_HI_0 = UINT64_C(1) << 59,
    761   OP_SEL_HI_1 = UINT64_C(1) << 60,
    762   OP_SEL_HI_2 = UINT64_C(1) << 14,
    763 };
    764 
    765 } // namespace VOP3PEncoding
    766 
    767 } // namespace AMDGPU
    768 
    769 #define R_00B028_SPI_SHADER_PGM_RSRC1_PS                                0x00B028
    770 #define   S_00B028_VGPRS(x)                                           (((x) & 0x3F) << 0)
    771 #define   S_00B028_SGPRS(x)                                           (((x) & 0x0F) << 6)
    772 #define   S_00B028_MEM_ORDERED(x)                                     (((x) & 0x1) << 25)
    773 #define   G_00B028_MEM_ORDERED(x)                                     (((x) >> 25) & 0x1)
    774 #define   C_00B028_MEM_ORDERED                                        0xFDFFFFFF
    775 
    776 #define R_00B02C_SPI_SHADER_PGM_RSRC2_PS                                0x00B02C
    777 #define   S_00B02C_EXTRA_LDS_SIZE(x)                                  (((x) & 0xFF) << 8)
    778 #define R_00B128_SPI_SHADER_PGM_RSRC1_VS                                0x00B128
    779 #define   S_00B128_MEM_ORDERED(x)                                     (((x) & 0x1) << 27)
    780 #define   G_00B128_MEM_ORDERED(x)                                     (((x) >> 27) & 0x1)
    781 #define   C_00B128_MEM_ORDERED                                        0xF7FFFFFF
    782 
    783 #define R_00B228_SPI_SHADER_PGM_RSRC1_GS                                0x00B228
    784 #define   S_00B228_WGP_MODE(x)                                        (((x) & 0x1) << 27)
    785 #define   G_00B228_WGP_MODE(x)                                        (((x) >> 27) & 0x1)
    786 #define   C_00B228_WGP_MODE                                           0xF7FFFFFF
    787 #define   S_00B228_MEM_ORDERED(x)                                     (((x) & 0x1) << 25)
    788 #define   G_00B228_MEM_ORDERED(x)                                     (((x) >> 25) & 0x1)
    789 #define   C_00B228_MEM_ORDERED                                        0xFDFFFFFF
    790 
    791 #define R_00B328_SPI_SHADER_PGM_RSRC1_ES                                0x00B328
    792 #define R_00B428_SPI_SHADER_PGM_RSRC1_HS                                0x00B428
    793 #define   S_00B428_WGP_MODE(x)                                        (((x) & 0x1) << 26)
    794 #define   G_00B428_WGP_MODE(x)                                        (((x) >> 26) & 0x1)
    795 #define   C_00B428_WGP_MODE                                           0xFBFFFFFF
    796 #define   S_00B428_MEM_ORDERED(x)                                     (((x) & 0x1) << 24)
    797 #define   G_00B428_MEM_ORDERED(x)                                     (((x) >> 24) & 0x1)
    798 #define   C_00B428_MEM_ORDERED                                        0xFEFFFFFF
    799 
    800 #define R_00B528_SPI_SHADER_PGM_RSRC1_LS                                0x00B528
    801 
    802 #define R_00B84C_COMPUTE_PGM_RSRC2                                      0x00B84C
    803 #define   S_00B84C_SCRATCH_EN(x)                                      (((x) & 0x1) << 0)
    804 #define   G_00B84C_SCRATCH_EN(x)                                      (((x) >> 0) & 0x1)
    805 #define   C_00B84C_SCRATCH_EN                                         0xFFFFFFFE
    806 #define   S_00B84C_USER_SGPR(x)                                       (((x) & 0x1F) << 1)
    807 #define   G_00B84C_USER_SGPR(x)                                       (((x) >> 1) & 0x1F)
    808 #define   C_00B84C_USER_SGPR                                          0xFFFFFFC1
    809 #define   S_00B84C_TRAP_HANDLER(x)                                    (((x) & 0x1) << 6)
    810 #define   G_00B84C_TRAP_HANDLER(x)                                    (((x) >> 6) & 0x1)
    811 #define   C_00B84C_TRAP_HANDLER                                       0xFFFFFFBF
    812 #define   S_00B84C_TGID_X_EN(x)                                       (((x) & 0x1) << 7)
    813 #define   G_00B84C_TGID_X_EN(x)                                       (((x) >> 7) & 0x1)
    814 #define   C_00B84C_TGID_X_EN                                          0xFFFFFF7F
    815 #define   S_00B84C_TGID_Y_EN(x)                                       (((x) & 0x1) << 8)
    816 #define   G_00B84C_TGID_Y_EN(x)                                       (((x) >> 8) & 0x1)
    817 #define   C_00B84C_TGID_Y_EN                                          0xFFFFFEFF
    818 #define   S_00B84C_TGID_Z_EN(x)                                       (((x) & 0x1) << 9)
    819 #define   G_00B84C_TGID_Z_EN(x)                                       (((x) >> 9) & 0x1)
    820 #define   C_00B84C_TGID_Z_EN                                          0xFFFFFDFF
    821 #define   S_00B84C_TG_SIZE_EN(x)                                      (((x) & 0x1) << 10)
    822 #define   G_00B84C_TG_SIZE_EN(x)                                      (((x) >> 10) & 0x1)
    823 #define   C_00B84C_TG_SIZE_EN                                         0xFFFFFBFF
    824 #define   S_00B84C_TIDIG_COMP_CNT(x)                                  (((x) & 0x03) << 11)
    825 #define   G_00B84C_TIDIG_COMP_CNT(x)                                  (((x) >> 11) & 0x03)
    826 #define   C_00B84C_TIDIG_COMP_CNT                                     0xFFFFE7FF
    827 /* CIK */
    828 #define   S_00B84C_EXCP_EN_MSB(x)                                     (((x) & 0x03) << 13)
    829 #define   G_00B84C_EXCP_EN_MSB(x)                                     (((x) >> 13) & 0x03)
    830 #define   C_00B84C_EXCP_EN_MSB                                        0xFFFF9FFF
    831 /*     */
    832 #define   S_00B84C_LDS_SIZE(x)                                        (((x) & 0x1FF) << 15)
    833 #define   G_00B84C_LDS_SIZE(x)                                        (((x) >> 15) & 0x1FF)
    834 #define   C_00B84C_LDS_SIZE                                           0xFF007FFF
    835 #define   S_00B84C_EXCP_EN(x)                                         (((x) & 0x7F) << 24)
    836 #define   G_00B84C_EXCP_EN(x)                                         (((x) >> 24) & 0x7F)
    837 #define   C_00B84C_EXCP_EN
    838 
    839 #define R_0286CC_SPI_PS_INPUT_ENA                                       0x0286CC
    840 #define R_0286D0_SPI_PS_INPUT_ADDR                                      0x0286D0
    841 
    842 #define R_00B848_COMPUTE_PGM_RSRC1                                      0x00B848
    843 #define   S_00B848_VGPRS(x)                                           (((x) & 0x3F) << 0)
    844 #define   G_00B848_VGPRS(x)                                           (((x) >> 0) & 0x3F)
    845 #define   C_00B848_VGPRS                                              0xFFFFFFC0
    846 #define   S_00B848_SGPRS(x)                                           (((x) & 0x0F) << 6)
    847 #define   G_00B848_SGPRS(x)                                           (((x) >> 6) & 0x0F)
    848 #define   C_00B848_SGPRS                                              0xFFFFFC3F
    849 #define   S_00B848_PRIORITY(x)                                        (((x) & 0x03) << 10)
    850 #define   G_00B848_PRIORITY(x)                                        (((x) >> 10) & 0x03)
    851 #define   C_00B848_PRIORITY                                           0xFFFFF3FF
    852 #define   S_00B848_FLOAT_MODE(x)                                      (((x) & 0xFF) << 12)
    853 #define   G_00B848_FLOAT_MODE(x)                                      (((x) >> 12) & 0xFF)
    854 #define   C_00B848_FLOAT_MODE                                         0xFFF00FFF
    855 #define   S_00B848_PRIV(x)                                            (((x) & 0x1) << 20)
    856 #define   G_00B848_PRIV(x)                                            (((x) >> 20) & 0x1)
    857 #define   C_00B848_PRIV                                               0xFFEFFFFF
    858 #define   S_00B848_DX10_CLAMP(x)                                      (((x) & 0x1) << 21)
    859 #define   G_00B848_DX10_CLAMP(x)                                      (((x) >> 21) & 0x1)
    860 #define   C_00B848_DX10_CLAMP                                         0xFFDFFFFF
    861 #define   S_00B848_DEBUG_MODE(x)                                      (((x) & 0x1) << 22)
    862 #define   G_00B848_DEBUG_MODE(x)                                      (((x) >> 22) & 0x1)
    863 #define   C_00B848_DEBUG_MODE                                         0xFFBFFFFF
    864 #define   S_00B848_IEEE_MODE(x)                                       (((x) & 0x1) << 23)
    865 #define   G_00B848_IEEE_MODE(x)                                       (((x) >> 23) & 0x1)
    866 #define   C_00B848_IEEE_MODE                                          0xFF7FFFFF
    867 #define   S_00B848_WGP_MODE(x)                                        (((x) & 0x1) << 29)
    868 #define   G_00B848_WGP_MODE(x)                                        (((x) >> 29) & 0x1)
    869 #define   C_00B848_WGP_MODE                                           0xDFFFFFFF
    870 #define   S_00B848_MEM_ORDERED(x)                                     (((x) & 0x1) << 30)
    871 #define   G_00B848_MEM_ORDERED(x)                                     (((x) >> 30) & 0x1)
    872 #define   C_00B848_MEM_ORDERED                                        0xBFFFFFFF
    873 #define   S_00B848_FWD_PROGRESS(x)                                    (((x) & 0x1) << 31)
    874 #define   G_00B848_FWD_PROGRESS(x)                                    (((x) >> 31) & 0x1)
    875 #define   C_00B848_FWD_PROGRESS                                       0x7FFFFFFF
    876 
    877 
    878 // Helpers for setting FLOAT_MODE
    879 #define FP_ROUND_ROUND_TO_NEAREST 0
    880 #define FP_ROUND_ROUND_TO_INF 1
    881 #define FP_ROUND_ROUND_TO_NEGINF 2
    882 #define FP_ROUND_ROUND_TO_ZERO 3
    883 
    884 // Bits 3:0 control rounding mode. 1:0 control single precision, 3:2 double
    885 // precision.
    886 #define FP_ROUND_MODE_SP(x) ((x) & 0x3)
    887 #define FP_ROUND_MODE_DP(x) (((x) & 0x3) << 2)
    888 
    889 #define FP_DENORM_FLUSH_IN_FLUSH_OUT 0
    890 #define FP_DENORM_FLUSH_OUT 1
    891 #define FP_DENORM_FLUSH_IN 2
    892 #define FP_DENORM_FLUSH_NONE 3
    893 
    894 
    895 // Bits 7:4 control denormal handling. 5:4 control single precision, 6:7 double
    896 // precision.
    897 #define FP_DENORM_MODE_SP(x) (((x) & 0x3) << 4)
    898 #define FP_DENORM_MODE_DP(x) (((x) & 0x3) << 6)
    899 
    900 #define R_00B860_COMPUTE_TMPRING_SIZE                                   0x00B860
    901 #define   S_00B860_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
    902 
    903 #define R_0286E8_SPI_TMPRING_SIZE                                       0x0286E8
    904 #define   S_0286E8_WAVESIZE(x)                                        (((x) & 0x1FFF) << 12)
    905 
    906 #define R_028B54_VGT_SHADER_STAGES_EN                                 0x028B54
    907 #define   S_028B54_HS_W32_EN(x)                                       (((x) & 0x1) << 21)
    908 #define   S_028B54_GS_W32_EN(x)                                       (((x) & 0x1) << 22)
    909 #define   S_028B54_VS_W32_EN(x)                                       (((x) & 0x1) << 23)
    910 #define R_0286D8_SPI_PS_IN_CONTROL                                    0x0286D8
    911 #define   S_0286D8_PS_W32_EN(x)                                       (((x) & 0x1) << 15)
    912 #define R_00B800_COMPUTE_DISPATCH_INITIATOR                           0x00B800
    913 #define   S_00B800_CS_W32_EN(x)                                       (((x) & 0x1) << 15)
    914 
    915 #define R_SPILLED_SGPRS         0x4
    916 #define R_SPILLED_VGPRS         0x8
    917 } // End namespace llvm
    918 
    919 #endif
    920