Home | History | Annotate | Line # | Download | only in ir3
      1 /*
      2  * Copyright (c) 2013 Rob Clark <robdclark (at) gmail.com>
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
     20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
     21  * SOFTWARE.
     22  */
     23 
     24 #ifndef INSTR_A3XX_H_
     25 #define INSTR_A3XX_H_
     26 
     27 #define PACKED __attribute__((__packed__))
     28 
     29 #include <assert.h>
     30 #include <stdbool.h>
     31 #include <stdint.h>
     32 #include <stdio.h>
     33 
     34 /* clang-format off */
     35 void ir3_assert_handler(const char *expr, const char *file, int line,
     36                         const char *func) __attribute__((weak)) __attribute__((__noreturn__));
     37 /* clang-format on */
     38 
     39 /* A wrapper for assert() that allows overriding handling of a failed
     40  * assert.  This is needed for tools like crashdec which can want to
     41  * attempt to disassemble memory that might not actually be valid
     42  * instructions.
     43  */
     44 #define ir3_assert(expr)                                                       \
     45    do {                                                                        \
     46       if (!(expr)) {                                                           \
     47          if (ir3_assert_handler) {                                             \
     48             ir3_assert_handler(#expr, __FILE__, __LINE__, __func__);           \
     49          }                                                                     \
     50          assert(expr);                                                         \
     51       }                                                                        \
     52    } while (0)
     53 /* size of largest OPC field of all the instruction categories: */
     54 #define NOPC_BITS 6
     55 
     56 #define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
     57 
     58 /* clang-format off */
     59 typedef enum {
     60    /* category 0: */
     61    OPC_NOP             = _OPC(0, 0),
     62    OPC_B               = _OPC(0, 1),
     63    OPC_JUMP            = _OPC(0, 2),
     64    OPC_CALL            = _OPC(0, 3),
     65    OPC_RET             = _OPC(0, 4),
     66    OPC_KILL            = _OPC(0, 5),
     67    OPC_END             = _OPC(0, 6),
     68    OPC_EMIT            = _OPC(0, 7),
     69    OPC_CUT             = _OPC(0, 8),
     70    OPC_CHMASK          = _OPC(0, 9),
     71    OPC_CHSH            = _OPC(0, 10),
     72    OPC_FLOW_REV        = _OPC(0, 11),
     73 
     74    OPC_BKT             = _OPC(0, 16),
     75    OPC_STKS            = _OPC(0, 17),
     76    OPC_STKR            = _OPC(0, 18),
     77    OPC_XSET            = _OPC(0, 19),
     78    OPC_XCLR            = _OPC(0, 20),
     79    OPC_GETONE          = _OPC(0, 21),
     80    OPC_DBG             = _OPC(0, 22),
     81    OPC_SHPS            = _OPC(0, 23),   /* shader prologue start */
     82    OPC_SHPE            = _OPC(0, 24),   /* shader prologue end */
     83 
     84    OPC_PREDT           = _OPC(0, 29),   /* predicated true */
     85    OPC_PREDF           = _OPC(0, 30),   /* predicated false */
     86    OPC_PREDE           = _OPC(0, 31),   /* predicated end */
     87 
     88    /* Logical opcodes for different branch instruction variations: */
     89    OPC_BR              = _OPC(0, 40),
     90    OPC_BRAO            = _OPC(0, 41),
     91    OPC_BRAA            = _OPC(0, 42),
     92    OPC_BRAC            = _OPC(0, 43),
     93    OPC_BANY            = _OPC(0, 44),
     94    OPC_BALL            = _OPC(0, 45),
     95    OPC_BRAX            = _OPC(0, 46),
     96 
     97    /* Logical opcode to distinguish kill and demote */
     98    OPC_DEMOTE          = _OPC(0, 47),
     99 
    100    /* category 1: */
    101    OPC_MOV             = _OPC(1, 0),
    102    OPC_MOVP            = _OPC(1, 1),
    103    /* swz, gat, sct */
    104    OPC_MOVMSK          = _OPC(1, 3),
    105 
    106    /* Virtual opcodes for instructions differentiated via a "sub-opcode" that
    107     * replaces the repeat field:
    108     */
    109    OPC_SWZ            = _OPC(1, 4),
    110    OPC_GAT            = _OPC(1, 5),
    111    OPC_SCT            = _OPC(1, 6),
    112 
    113    /* Logical opcodes for different variants of mov: */
    114    OPC_MOV_IMMED       = _OPC(1, 40),
    115    OPC_MOV_CONST       = _OPC(1, 41),
    116    OPC_MOV_GPR         = _OPC(1, 42),
    117    OPC_MOV_RELGPR      = _OPC(1, 43),
    118    OPC_MOV_RELCONST    = _OPC(1, 44),
    119 
    120    /* Macros that expand to an if statement + move */
    121    OPC_BALLOT_MACRO    = _OPC(1, 50),
    122    OPC_ANY_MACRO       = _OPC(1, 51),
    123    OPC_ALL_MACRO       = _OPC(1, 52),
    124    OPC_ELECT_MACRO     = _OPC(1, 53),
    125    OPC_READ_COND_MACRO = _OPC(1, 54),
    126    OPC_READ_FIRST_MACRO = _OPC(1, 55),
    127    OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
    128 
    129    /* category 2: */
    130    OPC_ADD_F           = _OPC(2, 0),
    131    OPC_MIN_F           = _OPC(2, 1),
    132    OPC_MAX_F           = _OPC(2, 2),
    133    OPC_MUL_F           = _OPC(2, 3),
    134    OPC_SIGN_F          = _OPC(2, 4),
    135    OPC_CMPS_F          = _OPC(2, 5),
    136    OPC_ABSNEG_F        = _OPC(2, 6),
    137    OPC_CMPV_F          = _OPC(2, 7),
    138    /* 8 - invalid */
    139    OPC_FLOOR_F         = _OPC(2, 9),
    140    OPC_CEIL_F          = _OPC(2, 10),
    141    OPC_RNDNE_F         = _OPC(2, 11),
    142    OPC_RNDAZ_F         = _OPC(2, 12),
    143    OPC_TRUNC_F         = _OPC(2, 13),
    144    /* 14-15 - invalid */
    145    OPC_ADD_U           = _OPC(2, 16),
    146    OPC_ADD_S           = _OPC(2, 17),
    147    OPC_SUB_U           = _OPC(2, 18),
    148    OPC_SUB_S           = _OPC(2, 19),
    149    OPC_CMPS_U          = _OPC(2, 20),
    150    OPC_CMPS_S          = _OPC(2, 21),
    151    OPC_MIN_U           = _OPC(2, 22),
    152    OPC_MIN_S           = _OPC(2, 23),
    153    OPC_MAX_U           = _OPC(2, 24),
    154    OPC_MAX_S           = _OPC(2, 25),
    155    OPC_ABSNEG_S        = _OPC(2, 26),
    156    /* 27 - invalid */
    157    OPC_AND_B           = _OPC(2, 28),
    158    OPC_OR_B            = _OPC(2, 29),
    159    OPC_NOT_B           = _OPC(2, 30),
    160    OPC_XOR_B           = _OPC(2, 31),
    161    /* 32 - invalid */
    162    OPC_CMPV_U          = _OPC(2, 33),
    163    OPC_CMPV_S          = _OPC(2, 34),
    164    /* 35-47 - invalid */
    165    OPC_MUL_U24         = _OPC(2, 48), /* 24b mul into 32b result */
    166    OPC_MUL_S24         = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
    167    OPC_MULL_U          = _OPC(2, 50),
    168    OPC_BFREV_B         = _OPC(2, 51),
    169    OPC_CLZ_S           = _OPC(2, 52),
    170    OPC_CLZ_B           = _OPC(2, 53),
    171    OPC_SHL_B           = _OPC(2, 54),
    172    OPC_SHR_B           = _OPC(2, 55),
    173    OPC_ASHR_B          = _OPC(2, 56),
    174    OPC_BARY_F          = _OPC(2, 57),
    175    OPC_MGEN_B          = _OPC(2, 58),
    176    OPC_GETBIT_B        = _OPC(2, 59),
    177    OPC_SETRM           = _OPC(2, 60),
    178    OPC_CBITS_B         = _OPC(2, 61),
    179    OPC_SHB             = _OPC(2, 62),
    180    OPC_MSAD            = _OPC(2, 63),
    181 
    182    /* category 3: */
    183    OPC_MAD_U16         = _OPC(3, 0),
    184    OPC_MADSH_U16       = _OPC(3, 1),
    185    OPC_MAD_S16         = _OPC(3, 2),
    186    OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
    187    OPC_MAD_U24         = _OPC(3, 4),
    188    OPC_MAD_S24         = _OPC(3, 5),
    189    OPC_MAD_F16         = _OPC(3, 6),
    190    OPC_MAD_F32         = _OPC(3, 7),
    191    OPC_SEL_B16         = _OPC(3, 8),
    192    OPC_SEL_B32         = _OPC(3, 9),
    193    OPC_SEL_S16         = _OPC(3, 10),
    194    OPC_SEL_S32         = _OPC(3, 11),
    195    OPC_SEL_F16         = _OPC(3, 12),
    196    OPC_SEL_F32         = _OPC(3, 13),
    197    OPC_SAD_S16         = _OPC(3, 14),
    198    OPC_SAD_S32         = _OPC(3, 15),
    199    OPC_SHLG_B16        = _OPC(3, 16),
    200 
    201    /* category 4: */
    202    OPC_RCP             = _OPC(4, 0),
    203    OPC_RSQ             = _OPC(4, 1),
    204    OPC_LOG2            = _OPC(4, 2),
    205    OPC_EXP2            = _OPC(4, 3),
    206    OPC_SIN             = _OPC(4, 4),
    207    OPC_COS             = _OPC(4, 5),
    208    OPC_SQRT            = _OPC(4, 6),
    209    /* NOTE that these are 8+opc from their highp equivs, so it's possible
    210     * that the high order bit in the opc field has been repurposed for
    211     * half-precision use?  But note that other ops (rcp/lsin/cos/sqrt)
    212     * still use the same opc as highp
    213     */
    214    OPC_HRSQ            = _OPC(4, 9),
    215    OPC_HLOG2           = _OPC(4, 10),
    216    OPC_HEXP2           = _OPC(4, 11),
    217 
    218    /* category 5: */
    219    OPC_ISAM            = _OPC(5, 0),
    220    OPC_ISAML           = _OPC(5, 1),
    221    OPC_ISAMM           = _OPC(5, 2),
    222    OPC_SAM             = _OPC(5, 3),
    223    OPC_SAMB            = _OPC(5, 4),
    224    OPC_SAML            = _OPC(5, 5),
    225    OPC_SAMGQ           = _OPC(5, 6),
    226    OPC_GETLOD          = _OPC(5, 7),
    227    OPC_CONV            = _OPC(5, 8),
    228    OPC_CONVM           = _OPC(5, 9),
    229    OPC_GETSIZE         = _OPC(5, 10),
    230    OPC_GETBUF          = _OPC(5, 11),
    231    OPC_GETPOS          = _OPC(5, 12),
    232    OPC_GETINFO         = _OPC(5, 13),
    233    OPC_DSX             = _OPC(5, 14),
    234    OPC_DSY             = _OPC(5, 15),
    235    OPC_GATHER4R        = _OPC(5, 16),
    236    OPC_GATHER4G        = _OPC(5, 17),
    237    OPC_GATHER4B        = _OPC(5, 18),
    238    OPC_GATHER4A        = _OPC(5, 19),
    239    OPC_SAMGP0          = _OPC(5, 20),
    240    OPC_SAMGP1          = _OPC(5, 21),
    241    OPC_SAMGP2          = _OPC(5, 22),
    242    OPC_SAMGP3          = _OPC(5, 23),
    243    OPC_DSXPP_1         = _OPC(5, 24),
    244    OPC_DSYPP_1         = _OPC(5, 25),
    245    OPC_RGETPOS         = _OPC(5, 26),
    246    OPC_RGETINFO        = _OPC(5, 27),
    247    /* cat5 meta instructions, placed above the cat5 opc field's size */
    248    OPC_DSXPP_MACRO     = _OPC(5, 32),
    249    OPC_DSYPP_MACRO     = _OPC(5, 33),
    250 
    251    /* category 6: */
    252    OPC_LDG             = _OPC(6, 0),        /* load-global */
    253    OPC_LDL             = _OPC(6, 1),
    254    OPC_LDP             = _OPC(6, 2),
    255    OPC_STG             = _OPC(6, 3),        /* store-global */
    256    OPC_STL             = _OPC(6, 4),
    257    OPC_STP             = _OPC(6, 5),
    258    OPC_LDIB            = _OPC(6, 6),
    259    OPC_G2L             = _OPC(6, 7),
    260    OPC_L2G             = _OPC(6, 8),
    261    OPC_PREFETCH        = _OPC(6, 9),
    262    OPC_LDLW            = _OPC(6, 10),
    263    OPC_STLW            = _OPC(6, 11),
    264    OPC_RESFMT          = _OPC(6, 14),
    265    OPC_RESINFO         = _OPC(6, 15),
    266    OPC_ATOMIC_ADD      = _OPC(6, 16),
    267    OPC_ATOMIC_SUB      = _OPC(6, 17),
    268    OPC_ATOMIC_XCHG     = _OPC(6, 18),
    269    OPC_ATOMIC_INC      = _OPC(6, 19),
    270    OPC_ATOMIC_DEC      = _OPC(6, 20),
    271    OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
    272    OPC_ATOMIC_MIN      = _OPC(6, 22),
    273    OPC_ATOMIC_MAX      = _OPC(6, 23),
    274    OPC_ATOMIC_AND      = _OPC(6, 24),
    275    OPC_ATOMIC_OR       = _OPC(6, 25),
    276    OPC_ATOMIC_XOR      = _OPC(6, 26),
    277    OPC_LDGB            = _OPC(6, 27),
    278    OPC_STGB            = _OPC(6, 28),
    279    OPC_STIB            = _OPC(6, 29),
    280    OPC_LDC             = _OPC(6, 30),
    281    OPC_LDLV            = _OPC(6, 31),
    282    OPC_PIPR            = _OPC(6, 32), /* ??? */
    283    OPC_PIPC            = _OPC(6, 33), /* ??? */
    284    OPC_EMIT2           = _OPC(6, 34), /* ??? */
    285    OPC_ENDLS           = _OPC(6, 35), /* ??? */
    286    OPC_GETSPID         = _OPC(6, 36), /* SP ID */
    287    OPC_GETWID          = _OPC(6, 37), /* wavefront ID */
    288 
    289    /* Logical opcodes for things that differ in a6xx+ */
    290    OPC_STC             = _OPC(6, 40),
    291    OPC_RESINFO_B       = _OPC(6, 41),
    292    OPC_LDIB_B          = _OPC(6, 42),
    293    OPC_STIB_B          = _OPC(6, 43),
    294 
    295    /* Logical opcodes for different atomic instruction variations: */
    296    OPC_ATOMIC_B_ADD      = _OPC(6, 44),
    297    OPC_ATOMIC_B_SUB      = _OPC(6, 45),
    298    OPC_ATOMIC_B_XCHG     = _OPC(6, 46),
    299    OPC_ATOMIC_B_INC      = _OPC(6, 47),
    300    OPC_ATOMIC_B_DEC      = _OPC(6, 48),
    301    OPC_ATOMIC_B_CMPXCHG  = _OPC(6, 49),
    302    OPC_ATOMIC_B_MIN      = _OPC(6, 50),
    303    OPC_ATOMIC_B_MAX      = _OPC(6, 51),
    304    OPC_ATOMIC_B_AND      = _OPC(6, 52),
    305    OPC_ATOMIC_B_OR       = _OPC(6, 53),
    306    OPC_ATOMIC_B_XOR      = _OPC(6, 54),
    307 
    308    OPC_LDG_A           = _OPC(6, 55),
    309    OPC_STG_A           = _OPC(6, 56),
    310 
    311    OPC_SPILL_MACRO     = _OPC(6, 57),
    312    OPC_RELOAD_MACRO    = _OPC(6, 58),
    313 
    314    /* category 7: */
    315    OPC_BAR             = _OPC(7, 0),
    316    OPC_FENCE           = _OPC(7, 1),
    317 
    318    /* meta instructions (category -1): */
    319    /* placeholder instr to mark shader inputs: */
    320    OPC_META_INPUT      = _OPC(-1, 0),
    321    /* The "collect" and "split" instructions are used for keeping
    322     * track of instructions that write to multiple dst registers
    323     * (split) like texture sample instructions, or read multiple
    324     * consecutive scalar registers (collect) (bary.f, texture samp)
    325     *
    326     * A "split" extracts a scalar component from a vecN, and a
    327     * "collect" gathers multiple scalar components into a vecN
    328     */
    329    OPC_META_SPLIT      = _OPC(-1, 2),
    330    OPC_META_COLLECT    = _OPC(-1, 3),
    331 
    332    /* placeholder for texture fetches that run before FS invocation
    333     * starts:
    334     */
    335    OPC_META_TEX_PREFETCH = _OPC(-1, 4),
    336 
    337    /* Parallel copies have multiple destinations, and copy each destination
    338     * to its corresponding source. This happens "in parallel," meaning that
    339     * it happens as-if every source is read first and then every destination
    340     * is stored. These are produced in RA when register shuffling is
    341     * required, and then lowered away immediately afterwards.
    342     */
    343    OPC_META_PARALLEL_COPY = _OPC(-1, 5),
    344    OPC_META_PHI = _OPC(-1, 6),
    345 } opc_t;
    346 /* clang-format on */
    347 
    348 #define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
    349 #define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
    350 
    351 const char *disasm_a3xx_instr_name(opc_t opc);
    352 
    353 typedef enum {
    354    TYPE_F16 = 0,
    355    TYPE_F32 = 1,
    356    TYPE_U16 = 2,
    357    TYPE_U32 = 3,
    358    TYPE_S16 = 4,
    359    TYPE_S32 = 5,
    360    TYPE_U8 = 6,
    361    TYPE_S8 = 7, // XXX I assume?
    362 } type_t;
    363 
    364 static inline uint32_t
    365 type_size(type_t type)
    366 {
    367    switch (type) {
    368    case TYPE_F32:
    369    case TYPE_U32:
    370    case TYPE_S32:
    371       return 32;
    372    case TYPE_F16:
    373    case TYPE_U16:
    374    case TYPE_S16:
    375       return 16;
    376    case TYPE_U8:
    377    case TYPE_S8:
    378       return 8;
    379    default:
    380       ir3_assert(0); /* invalid type */
    381       return 0;
    382    }
    383 }
    384 
    385 static inline int
    386 type_float(type_t type)
    387 {
    388    return (type == TYPE_F32) || (type == TYPE_F16);
    389 }
    390 
    391 static inline int
    392 type_uint(type_t type)
    393 {
    394    return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
    395 }
    396 
    397 static inline int
    398 type_sint(type_t type)
    399 {
    400    return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
    401 }
    402 
    403 typedef enum {
    404    ROUND_ZERO = 0,
    405    ROUND_EVEN = 1,
    406    ROUND_POS_INF = 2,
    407    ROUND_NEG_INF = 3,
    408 } round_t;
    409 
    410 /* comp:
    411  *   0 - x
    412  *   1 - y
    413  *   2 - z
    414  *   3 - w
    415  */
    416 static inline uint32_t
    417 regid(int num, int comp)
    418 {
    419    return (num << 2) | (comp & 0x3);
    420 }
    421 
    422 #define INVALID_REG     regid(63, 0)
    423 #define VALIDREG(r)     ((r) != INVALID_REG)
    424 #define CONDREG(r, val) COND(VALIDREG(r), (val))
    425 
    426 /* special registers: */
    427 #define REG_A0 61 /* address register */
    428 #define REG_P0 62 /* predicate register */
    429 
    430 typedef enum {
    431    BRANCH_PLAIN = 0, /* br */
    432    BRANCH_OR = 1,    /* brao */
    433    BRANCH_AND = 2,   /* braa */
    434    BRANCH_CONST = 3, /* brac */
    435    BRANCH_ANY = 4,   /* bany */
    436    BRANCH_ALL = 5,   /* ball */
    437    BRANCH_X = 6,     /* brax ??? */
    438 } brtype_t;
    439 
    440 /* With is_bindless_s2en = 1, this determines whether bindless is enabled and
    441  * if so, how to get the (base, index) pair for both sampler and texture.
    442  * There is a single base embedded in the instruction, which is always used
    443  * for the texture.
    444  */
    445 typedef enum {
    446    /* Use traditional GL binding model, get texture and sampler index
    447     * from src3 which is not presumed to be uniform. This is
    448     * backwards-compatible with earlier generations, where this field was
    449     * always 0 and nonuniform-indexed sampling always worked.
    450     */
    451    CAT5_NONUNIFORM = 0,
    452 
    453    /* The sampler base comes from the low 3 bits of a1.x, and the sampler
    454     * and texture index come from src3 which is presumed to be uniform.
    455     */
    456    CAT5_BINDLESS_A1_UNIFORM = 1,
    457 
    458    /* The texture and sampler share the same base, and the sampler and
    459     * texture index come from src3 which is *not* presumed to be uniform.
    460     */
    461    CAT5_BINDLESS_NONUNIFORM = 2,
    462 
    463    /* The sampler base comes from the low 3 bits of a1.x, and the sampler
    464     * and texture index come from src3 which is *not* presumed to be
    465     * uniform.
    466     */
    467    CAT5_BINDLESS_A1_NONUNIFORM = 3,
    468 
    469    /* Use traditional GL binding model, get texture and sampler index
    470     * from src3 which is presumed to be uniform.
    471     */
    472    CAT5_UNIFORM = 4,
    473 
    474    /* The texture and sampler share the same base, and the sampler and
    475     * texture index come from src3 which is presumed to be uniform.
    476     */
    477    CAT5_BINDLESS_UNIFORM = 5,
    478 
    479    /* The texture and sampler share the same base, get sampler index from low
    480     * 4 bits of src3 and texture index from high 4 bits.
    481     */
    482    CAT5_BINDLESS_IMM = 6,
    483 
    484    /* The sampler base comes from the low 3 bits of a1.x, and the texture
    485     * index comes from the next 8 bits of a1.x. The sampler index is an
    486     * immediate in src3.
    487     */
    488    CAT5_BINDLESS_A1_IMM = 7,
    489 } cat5_desc_mode_t;
    490 
    491 /* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
    492  */
    493 typedef enum {
    494    /* Use old GL binding model with an immediate index. */
    495    CAT6_IMM = 0,
    496 
    497    CAT6_UNIFORM = 1,
    498 
    499    CAT6_NONUNIFORM = 2,
    500 
    501    /* Use the bindless model, with an immediate index.
    502     */
    503    CAT6_BINDLESS_IMM = 4,
    504 
    505    /* Use the bindless model, with a uniform register index.
    506     */
    507    CAT6_BINDLESS_UNIFORM = 5,
    508 
    509    /* Use the bindless model, with a register index that isn't guaranteed
    510     * to be uniform. This presumably checks if the indices are equal and
    511     * splits up the load/store, because it works the way you would
    512     * expect.
    513     */
    514    CAT6_BINDLESS_NONUNIFORM = 6,
    515 } cat6_desc_mode_t;
    516 
    517 static inline bool
    518 is_sat_compatible(opc_t opc)
    519 {
    520    /* On a6xx saturation doesn't work on cat4 */
    521    if (opc_cat(opc) != 2 && opc_cat(opc) != 3)
    522       return false;
    523 
    524    switch (opc) {
    525    /* On a3xx and a6xx saturation doesn't work on bary.f */
    526    case OPC_BARY_F:
    527    /* On a6xx saturation doesn't work on sel.* */
    528    case OPC_SEL_B16:
    529    case OPC_SEL_B32:
    530    case OPC_SEL_S16:
    531    case OPC_SEL_S32:
    532    case OPC_SEL_F16:
    533    case OPC_SEL_F32:
    534       return false;
    535    default:
    536       return true;
    537    }
    538 }
    539 
    540 static inline bool
    541 is_mad(opc_t opc)
    542 {
    543    switch (opc) {
    544    case OPC_MAD_U16:
    545    case OPC_MAD_S16:
    546    case OPC_MAD_U24:
    547    case OPC_MAD_S24:
    548    case OPC_MAD_F16:
    549    case OPC_MAD_F32:
    550       return true;
    551    default:
    552       return false;
    553    }
    554 }
    555 
    556 static inline bool
    557 is_madsh(opc_t opc)
    558 {
    559    switch (opc) {
    560    case OPC_MADSH_U16:
    561    case OPC_MADSH_M16:
    562       return true;
    563    default:
    564       return false;
    565    }
    566 }
    567 
    568 static inline bool
    569 is_atomic(opc_t opc)
    570 {
    571    switch (opc) {
    572    case OPC_ATOMIC_ADD:
    573    case OPC_ATOMIC_SUB:
    574    case OPC_ATOMIC_XCHG:
    575    case OPC_ATOMIC_INC:
    576    case OPC_ATOMIC_DEC:
    577    case OPC_ATOMIC_CMPXCHG:
    578    case OPC_ATOMIC_MIN:
    579    case OPC_ATOMIC_MAX:
    580    case OPC_ATOMIC_AND:
    581    case OPC_ATOMIC_OR:
    582    case OPC_ATOMIC_XOR:
    583       return true;
    584    default:
    585       return false;
    586    }
    587 }
    588 
    589 static inline bool
    590 is_ssbo(opc_t opc)
    591 {
    592    switch (opc) {
    593    case OPC_RESFMT:
    594    case OPC_RESINFO:
    595    case OPC_LDGB:
    596    case OPC_STGB:
    597    case OPC_STIB:
    598       return true;
    599    default:
    600       return false;
    601    }
    602 }
    603 
    604 static inline bool
    605 is_isam(opc_t opc)
    606 {
    607    switch (opc) {
    608    case OPC_ISAM:
    609    case OPC_ISAML:
    610    case OPC_ISAMM:
    611       return true;
    612    default:
    613       return false;
    614    }
    615 }
    616 
    617 static inline bool
    618 is_cat2_float(opc_t opc)
    619 {
    620    switch (opc) {
    621    case OPC_ADD_F:
    622    case OPC_MIN_F:
    623    case OPC_MAX_F:
    624    case OPC_MUL_F:
    625    case OPC_SIGN_F:
    626    case OPC_CMPS_F:
    627    case OPC_ABSNEG_F:
    628    case OPC_CMPV_F:
    629    case OPC_FLOOR_F:
    630    case OPC_CEIL_F:
    631    case OPC_RNDNE_F:
    632    case OPC_RNDAZ_F:
    633    case OPC_TRUNC_F:
    634       return true;
    635 
    636    default:
    637       return false;
    638    }
    639 }
    640 
    641 static inline bool
    642 is_cat3_float(opc_t opc)
    643 {
    644    switch (opc) {
    645    case OPC_MAD_F16:
    646    case OPC_MAD_F32:
    647    case OPC_SEL_F16:
    648    case OPC_SEL_F32:
    649       return true;
    650    default:
    651       return false;
    652    }
    653 }
    654 
    655 #endif /* INSTR_A3XX_H_ */
    656