17e102996Smaya/*
27e102996Smaya * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
37e102996Smaya *
47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a
57e102996Smaya * copy of this software and associated documentation files (the "Software"),
67e102996Smaya * to deal in the Software without restriction, including without limitation
77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the
97e102996Smaya * Software is furnished to do so, subject to the following conditions:
107e102996Smaya *
117e102996Smaya * The above copyright notice and this permission notice (including the next
127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the
137e102996Smaya * Software.
147e102996Smaya *
157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217e102996Smaya * SOFTWARE.
227e102996Smaya */
237e102996Smaya
247e102996Smaya#ifndef INSTR_A3XX_H_
257e102996Smaya#define INSTR_A3XX_H_
267e102996Smaya
277e102996Smaya#define PACKED __attribute__((__packed__))
287e102996Smaya
297ec681f3Smrg#include <assert.h>
307ec681f3Smrg#include <stdbool.h>
317e102996Smaya#include <stdint.h>
327e102996Smaya#include <stdio.h>
337e102996Smaya
347ec681f3Smrg/* clang-format off */
357ec681f3Smrgvoid ir3_assert_handler(const char *expr, const char *file, int line,
367ec681f3Smrg                        const char *func) __attribute__((weak)) __attribute__((__noreturn__));
377ec681f3Smrg/* clang-format on */
387ec681f3Smrg
397ec681f3Smrg/* A wrapper for assert() that allows overriding handling of a failed
407ec681f3Smrg * assert.  This is needed for tools like crashdec which can want to
417ec681f3Smrg * attempt to disassemble memory that might not actually be valid
427ec681f3Smrg * instructions.
437ec681f3Smrg */
447ec681f3Smrg#define ir3_assert(expr)                                                       \
457ec681f3Smrg   do {                                                                        \
467ec681f3Smrg      if (!(expr)) {                                                           \
477ec681f3Smrg         if (ir3_assert_handler) {                                             \
487ec681f3Smrg            ir3_assert_handler(#expr, __FILE__, __LINE__, __func__);           \
497ec681f3Smrg         }                                                                     \
507ec681f3Smrg         assert(expr);                                                         \
517ec681f3Smrg      }                                                                        \
527ec681f3Smrg   } while (0)
537e102996Smaya/* size of largest OPC field of all the instruction categories: */
547e102996Smaya#define NOPC_BITS 6
557e102996Smaya
567ec681f3Smrg#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc)
577e102996Smaya
587ec681f3Smrg/* clang-format off */
597e102996Smayatypedef enum {
607ec681f3Smrg   /* category 0: */
617ec681f3Smrg   OPC_NOP             = _OPC(0, 0),
627ec681f3Smrg   OPC_B               = _OPC(0, 1),
637ec681f3Smrg   OPC_JUMP            = _OPC(0, 2),
647ec681f3Smrg   OPC_CALL            = _OPC(0, 3),
657ec681f3Smrg   OPC_RET             = _OPC(0, 4),
667ec681f3Smrg   OPC_KILL            = _OPC(0, 5),
677ec681f3Smrg   OPC_END             = _OPC(0, 6),
687ec681f3Smrg   OPC_EMIT            = _OPC(0, 7),
697ec681f3Smrg   OPC_CUT             = _OPC(0, 8),
707ec681f3Smrg   OPC_CHMASK          = _OPC(0, 9),
717ec681f3Smrg   OPC_CHSH            = _OPC(0, 10),
727ec681f3Smrg   OPC_FLOW_REV        = _OPC(0, 11),
737ec681f3Smrg
747ec681f3Smrg   OPC_BKT             = _OPC(0, 16),
757ec681f3Smrg   OPC_STKS            = _OPC(0, 17),
767ec681f3Smrg   OPC_STKR            = _OPC(0, 18),
777ec681f3Smrg   OPC_XSET            = _OPC(0, 19),
787ec681f3Smrg   OPC_XCLR            = _OPC(0, 20),
797ec681f3Smrg   OPC_GETONE          = _OPC(0, 21),
807ec681f3Smrg   OPC_DBG             = _OPC(0, 22),
817ec681f3Smrg   OPC_SHPS            = _OPC(0, 23),   /* shader prologue start */
827ec681f3Smrg   OPC_SHPE            = _OPC(0, 24),   /* shader prologue end */
837ec681f3Smrg
847ec681f3Smrg   OPC_PREDT           = _OPC(0, 29),   /* predicated true */
857ec681f3Smrg   OPC_PREDF           = _OPC(0, 30),   /* predicated false */
867ec681f3Smrg   OPC_PREDE           = _OPC(0, 31),   /* predicated end */
877ec681f3Smrg
887ec681f3Smrg   /* Logical opcodes for different branch instruction variations: */
897ec681f3Smrg   OPC_BR              = _OPC(0, 40),
907ec681f3Smrg   OPC_BRAO            = _OPC(0, 41),
917ec681f3Smrg   OPC_BRAA            = _OPC(0, 42),
927ec681f3Smrg   OPC_BRAC            = _OPC(0, 43),
937ec681f3Smrg   OPC_BANY            = _OPC(0, 44),
947ec681f3Smrg   OPC_BALL            = _OPC(0, 45),
957ec681f3Smrg   OPC_BRAX            = _OPC(0, 46),
967ec681f3Smrg
977ec681f3Smrg   /* Logical opcode to distinguish kill and demote */
987ec681f3Smrg   OPC_DEMOTE          = _OPC(0, 47),
997ec681f3Smrg
1007ec681f3Smrg   /* category 1: */
1017ec681f3Smrg   OPC_MOV             = _OPC(1, 0),
1027ec681f3Smrg   OPC_MOVP            = _OPC(1, 1),
1037ec681f3Smrg   /* swz, gat, sct */
1047ec681f3Smrg   OPC_MOVMSK          = _OPC(1, 3),
1057ec681f3Smrg
1067ec681f3Smrg   /* Virtual opcodes for instructions differentiated via a "sub-opcode" that
1077ec681f3Smrg    * replaces the repeat field:
1087ec681f3Smrg    */
1097ec681f3Smrg   OPC_SWZ            = _OPC(1, 4),
1107ec681f3Smrg   OPC_GAT            = _OPC(1, 5),
1117ec681f3Smrg   OPC_SCT            = _OPC(1, 6),
1127ec681f3Smrg
1137ec681f3Smrg   /* Logical opcodes for different variants of mov: */
1147ec681f3Smrg   OPC_MOV_IMMED       = _OPC(1, 40),
1157ec681f3Smrg   OPC_MOV_CONST       = _OPC(1, 41),
1167ec681f3Smrg   OPC_MOV_GPR         = _OPC(1, 42),
1177ec681f3Smrg   OPC_MOV_RELGPR      = _OPC(1, 43),
1187ec681f3Smrg   OPC_MOV_RELCONST    = _OPC(1, 44),
1197ec681f3Smrg
1207ec681f3Smrg   /* Macros that expand to an if statement + move */
1217ec681f3Smrg   OPC_BALLOT_MACRO    = _OPC(1, 50),
1227ec681f3Smrg   OPC_ANY_MACRO       = _OPC(1, 51),
1237ec681f3Smrg   OPC_ALL_MACRO       = _OPC(1, 52),
1247ec681f3Smrg   OPC_ELECT_MACRO     = _OPC(1, 53),
1257ec681f3Smrg   OPC_READ_COND_MACRO = _OPC(1, 54),
1267ec681f3Smrg   OPC_READ_FIRST_MACRO = _OPC(1, 55),
1277ec681f3Smrg   OPC_SWZ_SHARED_MACRO = _OPC(1, 56),
1287ec681f3Smrg
1297ec681f3Smrg   /* category 2: */
1307ec681f3Smrg   OPC_ADD_F           = _OPC(2, 0),
1317ec681f3Smrg   OPC_MIN_F           = _OPC(2, 1),
1327ec681f3Smrg   OPC_MAX_F           = _OPC(2, 2),
1337ec681f3Smrg   OPC_MUL_F           = _OPC(2, 3),
1347ec681f3Smrg   OPC_SIGN_F          = _OPC(2, 4),
1357ec681f3Smrg   OPC_CMPS_F          = _OPC(2, 5),
1367ec681f3Smrg   OPC_ABSNEG_F        = _OPC(2, 6),
1377ec681f3Smrg   OPC_CMPV_F          = _OPC(2, 7),
1387ec681f3Smrg   /* 8 - invalid */
1397ec681f3Smrg   OPC_FLOOR_F         = _OPC(2, 9),
1407ec681f3Smrg   OPC_CEIL_F          = _OPC(2, 10),
1417ec681f3Smrg   OPC_RNDNE_F         = _OPC(2, 11),
1427ec681f3Smrg   OPC_RNDAZ_F         = _OPC(2, 12),
1437ec681f3Smrg   OPC_TRUNC_F         = _OPC(2, 13),
1447ec681f3Smrg   /* 14-15 - invalid */
1457ec681f3Smrg   OPC_ADD_U           = _OPC(2, 16),
1467ec681f3Smrg   OPC_ADD_S           = _OPC(2, 17),
1477ec681f3Smrg   OPC_SUB_U           = _OPC(2, 18),
1487ec681f3Smrg   OPC_SUB_S           = _OPC(2, 19),
1497ec681f3Smrg   OPC_CMPS_U          = _OPC(2, 20),
1507ec681f3Smrg   OPC_CMPS_S          = _OPC(2, 21),
1517ec681f3Smrg   OPC_MIN_U           = _OPC(2, 22),
1527ec681f3Smrg   OPC_MIN_S           = _OPC(2, 23),
1537ec681f3Smrg   OPC_MAX_U           = _OPC(2, 24),
1547ec681f3Smrg   OPC_MAX_S           = _OPC(2, 25),
1557ec681f3Smrg   OPC_ABSNEG_S        = _OPC(2, 26),
1567ec681f3Smrg   /* 27 - invalid */
1577ec681f3Smrg   OPC_AND_B           = _OPC(2, 28),
1587ec681f3Smrg   OPC_OR_B            = _OPC(2, 29),
1597ec681f3Smrg   OPC_NOT_B           = _OPC(2, 30),
1607ec681f3Smrg   OPC_XOR_B           = _OPC(2, 31),
1617ec681f3Smrg   /* 32 - invalid */
1627ec681f3Smrg   OPC_CMPV_U          = _OPC(2, 33),
1637ec681f3Smrg   OPC_CMPV_S          = _OPC(2, 34),
1647ec681f3Smrg   /* 35-47 - invalid */
1657ec681f3Smrg   OPC_MUL_U24         = _OPC(2, 48), /* 24b mul into 32b result */
1667ec681f3Smrg   OPC_MUL_S24         = _OPC(2, 49), /* 24b mul into 32b result with sign extension */
1677ec681f3Smrg   OPC_MULL_U          = _OPC(2, 50),
1687ec681f3Smrg   OPC_BFREV_B         = _OPC(2, 51),
1697ec681f3Smrg   OPC_CLZ_S           = _OPC(2, 52),
1707ec681f3Smrg   OPC_CLZ_B           = _OPC(2, 53),
1717ec681f3Smrg   OPC_SHL_B           = _OPC(2, 54),
1727ec681f3Smrg   OPC_SHR_B           = _OPC(2, 55),
1737ec681f3Smrg   OPC_ASHR_B          = _OPC(2, 56),
1747ec681f3Smrg   OPC_BARY_F          = _OPC(2, 57),
1757ec681f3Smrg   OPC_MGEN_B          = _OPC(2, 58),
1767ec681f3Smrg   OPC_GETBIT_B        = _OPC(2, 59),
1777ec681f3Smrg   OPC_SETRM           = _OPC(2, 60),
1787ec681f3Smrg   OPC_CBITS_B         = _OPC(2, 61),
1797ec681f3Smrg   OPC_SHB             = _OPC(2, 62),
1807ec681f3Smrg   OPC_MSAD            = _OPC(2, 63),
1817ec681f3Smrg
1827ec681f3Smrg   /* category 3: */
1837ec681f3Smrg   OPC_MAD_U16         = _OPC(3, 0),
1847ec681f3Smrg   OPC_MADSH_U16       = _OPC(3, 1),
1857ec681f3Smrg   OPC_MAD_S16         = _OPC(3, 2),
1867ec681f3Smrg   OPC_MADSH_M16       = _OPC(3, 3),   /* should this be .s16? */
1877ec681f3Smrg   OPC_MAD_U24         = _OPC(3, 4),
1887ec681f3Smrg   OPC_MAD_S24         = _OPC(3, 5),
1897ec681f3Smrg   OPC_MAD_F16         = _OPC(3, 6),
1907ec681f3Smrg   OPC_MAD_F32         = _OPC(3, 7),
1917ec681f3Smrg   OPC_SEL_B16         = _OPC(3, 8),
1927ec681f3Smrg   OPC_SEL_B32         = _OPC(3, 9),
1937ec681f3Smrg   OPC_SEL_S16         = _OPC(3, 10),
1947ec681f3Smrg   OPC_SEL_S32         = _OPC(3, 11),
1957ec681f3Smrg   OPC_SEL_F16         = _OPC(3, 12),
1967ec681f3Smrg   OPC_SEL_F32         = _OPC(3, 13),
1977ec681f3Smrg   OPC_SAD_S16         = _OPC(3, 14),
1987ec681f3Smrg   OPC_SAD_S32         = _OPC(3, 15),
1997ec681f3Smrg   OPC_SHLG_B16        = _OPC(3, 16),
2007ec681f3Smrg
2017ec681f3Smrg   /* category 4: */
2027ec681f3Smrg   OPC_RCP             = _OPC(4, 0),
2037ec681f3Smrg   OPC_RSQ             = _OPC(4, 1),
2047ec681f3Smrg   OPC_LOG2            = _OPC(4, 2),
2057ec681f3Smrg   OPC_EXP2            = _OPC(4, 3),
2067ec681f3Smrg   OPC_SIN             = _OPC(4, 4),
2077ec681f3Smrg   OPC_COS             = _OPC(4, 5),
2087ec681f3Smrg   OPC_SQRT            = _OPC(4, 6),
2097ec681f3Smrg   /* NOTE that these are 8+opc from their highp equivs, so it's possible
2107ec681f3Smrg    * that the high order bit in the opc field has been repurposed for
2117ec681f3Smrg    * half-precision use?  But note that other ops (rcp/lsin/cos/sqrt)
2127ec681f3Smrg    * still use the same opc as highp
2137ec681f3Smrg    */
2147ec681f3Smrg   OPC_HRSQ            = _OPC(4, 9),
2157ec681f3Smrg   OPC_HLOG2           = _OPC(4, 10),
2167ec681f3Smrg   OPC_HEXP2           = _OPC(4, 11),
2177ec681f3Smrg
2187ec681f3Smrg   /* category 5: */
2197ec681f3Smrg   OPC_ISAM            = _OPC(5, 0),
2207ec681f3Smrg   OPC_ISAML           = _OPC(5, 1),
2217ec681f3Smrg   OPC_ISAMM           = _OPC(5, 2),
2227ec681f3Smrg   OPC_SAM             = _OPC(5, 3),
2237ec681f3Smrg   OPC_SAMB            = _OPC(5, 4),
2247ec681f3Smrg   OPC_SAML            = _OPC(5, 5),
2257ec681f3Smrg   OPC_SAMGQ           = _OPC(5, 6),
2267ec681f3Smrg   OPC_GETLOD          = _OPC(5, 7),
2277ec681f3Smrg   OPC_CONV            = _OPC(5, 8),
2287ec681f3Smrg   OPC_CONVM           = _OPC(5, 9),
2297ec681f3Smrg   OPC_GETSIZE         = _OPC(5, 10),
2307ec681f3Smrg   OPC_GETBUF          = _OPC(5, 11),
2317ec681f3Smrg   OPC_GETPOS          = _OPC(5, 12),
2327ec681f3Smrg   OPC_GETINFO         = _OPC(5, 13),
2337ec681f3Smrg   OPC_DSX             = _OPC(5, 14),
2347ec681f3Smrg   OPC_DSY             = _OPC(5, 15),
2357ec681f3Smrg   OPC_GATHER4R        = _OPC(5, 16),
2367ec681f3Smrg   OPC_GATHER4G        = _OPC(5, 17),
2377ec681f3Smrg   OPC_GATHER4B        = _OPC(5, 18),
2387ec681f3Smrg   OPC_GATHER4A        = _OPC(5, 19),
2397ec681f3Smrg   OPC_SAMGP0          = _OPC(5, 20),
2407ec681f3Smrg   OPC_SAMGP1          = _OPC(5, 21),
2417ec681f3Smrg   OPC_SAMGP2          = _OPC(5, 22),
2427ec681f3Smrg   OPC_SAMGP3          = _OPC(5, 23),
2437ec681f3Smrg   OPC_DSXPP_1         = _OPC(5, 24),
2447ec681f3Smrg   OPC_DSYPP_1         = _OPC(5, 25),
2457ec681f3Smrg   OPC_RGETPOS         = _OPC(5, 26),
2467ec681f3Smrg   OPC_RGETINFO        = _OPC(5, 27),
2477ec681f3Smrg   /* cat5 meta instructions, placed above the cat5 opc field's size */
2487ec681f3Smrg   OPC_DSXPP_MACRO     = _OPC(5, 32),
2497ec681f3Smrg   OPC_DSYPP_MACRO     = _OPC(5, 33),
2507ec681f3Smrg
2517ec681f3Smrg   /* category 6: */
2527ec681f3Smrg   OPC_LDG             = _OPC(6, 0),        /* load-global */
2537ec681f3Smrg   OPC_LDL             = _OPC(6, 1),
2547ec681f3Smrg   OPC_LDP             = _OPC(6, 2),
2557ec681f3Smrg   OPC_STG             = _OPC(6, 3),        /* store-global */
2567ec681f3Smrg   OPC_STL             = _OPC(6, 4),
2577ec681f3Smrg   OPC_STP             = _OPC(6, 5),
2587ec681f3Smrg   OPC_LDIB            = _OPC(6, 6),
2597ec681f3Smrg   OPC_G2L             = _OPC(6, 7),
2607ec681f3Smrg   OPC_L2G             = _OPC(6, 8),
2617ec681f3Smrg   OPC_PREFETCH        = _OPC(6, 9),
2627ec681f3Smrg   OPC_LDLW            = _OPC(6, 10),
2637ec681f3Smrg   OPC_STLW            = _OPC(6, 11),
2647ec681f3Smrg   OPC_RESFMT          = _OPC(6, 14),
2657ec681f3Smrg   OPC_RESINFO         = _OPC(6, 15),
2667ec681f3Smrg   OPC_ATOMIC_ADD      = _OPC(6, 16),
2677ec681f3Smrg   OPC_ATOMIC_SUB      = _OPC(6, 17),
2687ec681f3Smrg   OPC_ATOMIC_XCHG     = _OPC(6, 18),
2697ec681f3Smrg   OPC_ATOMIC_INC      = _OPC(6, 19),
2707ec681f3Smrg   OPC_ATOMIC_DEC      = _OPC(6, 20),
2717ec681f3Smrg   OPC_ATOMIC_CMPXCHG  = _OPC(6, 21),
2727ec681f3Smrg   OPC_ATOMIC_MIN      = _OPC(6, 22),
2737ec681f3Smrg   OPC_ATOMIC_MAX      = _OPC(6, 23),
2747ec681f3Smrg   OPC_ATOMIC_AND      = _OPC(6, 24),
2757ec681f3Smrg   OPC_ATOMIC_OR       = _OPC(6, 25),
2767ec681f3Smrg   OPC_ATOMIC_XOR      = _OPC(6, 26),
2777ec681f3Smrg   OPC_LDGB            = _OPC(6, 27),
2787ec681f3Smrg   OPC_STGB            = _OPC(6, 28),
2797ec681f3Smrg   OPC_STIB            = _OPC(6, 29),
2807ec681f3Smrg   OPC_LDC             = _OPC(6, 30),
2817ec681f3Smrg   OPC_LDLV            = _OPC(6, 31),
2827ec681f3Smrg   OPC_PIPR            = _OPC(6, 32), /* ??? */
2837ec681f3Smrg   OPC_PIPC            = _OPC(6, 33), /* ??? */
2847ec681f3Smrg   OPC_EMIT2           = _OPC(6, 34), /* ??? */
2857ec681f3Smrg   OPC_ENDLS           = _OPC(6, 35), /* ??? */
2867ec681f3Smrg   OPC_GETSPID         = _OPC(6, 36), /* SP ID */
2877ec681f3Smrg   OPC_GETWID          = _OPC(6, 37), /* wavefront ID */
2887ec681f3Smrg
2897ec681f3Smrg   /* Logical opcodes for things that differ in a6xx+ */
2907ec681f3Smrg   OPC_STC             = _OPC(6, 40),
2917ec681f3Smrg   OPC_RESINFO_B       = _OPC(6, 41),
2927ec681f3Smrg   OPC_LDIB_B          = _OPC(6, 42),
2937ec681f3Smrg   OPC_STIB_B          = _OPC(6, 43),
2947ec681f3Smrg
2957ec681f3Smrg   /* Logical opcodes for different atomic instruction variations: */
2967ec681f3Smrg   OPC_ATOMIC_B_ADD      = _OPC(6, 44),
2977ec681f3Smrg   OPC_ATOMIC_B_SUB      = _OPC(6, 45),
2987ec681f3Smrg   OPC_ATOMIC_B_XCHG     = _OPC(6, 46),
2997ec681f3Smrg   OPC_ATOMIC_B_INC      = _OPC(6, 47),
3007ec681f3Smrg   OPC_ATOMIC_B_DEC      = _OPC(6, 48),
3017ec681f3Smrg   OPC_ATOMIC_B_CMPXCHG  = _OPC(6, 49),
3027ec681f3Smrg   OPC_ATOMIC_B_MIN      = _OPC(6, 50),
3037ec681f3Smrg   OPC_ATOMIC_B_MAX      = _OPC(6, 51),
3047ec681f3Smrg   OPC_ATOMIC_B_AND      = _OPC(6, 52),
3057ec681f3Smrg   OPC_ATOMIC_B_OR       = _OPC(6, 53),
3067ec681f3Smrg   OPC_ATOMIC_B_XOR      = _OPC(6, 54),
3077ec681f3Smrg
3087ec681f3Smrg   OPC_LDG_A           = _OPC(6, 55),
3097ec681f3Smrg   OPC_STG_A           = _OPC(6, 56),
3107ec681f3Smrg
3117ec681f3Smrg   OPC_SPILL_MACRO     = _OPC(6, 57),
3127ec681f3Smrg   OPC_RELOAD_MACRO    = _OPC(6, 58),
3137ec681f3Smrg
3147ec681f3Smrg   /* category 7: */
3157ec681f3Smrg   OPC_BAR             = _OPC(7, 0),
3167ec681f3Smrg   OPC_FENCE           = _OPC(7, 1),
3177ec681f3Smrg
3187ec681f3Smrg   /* meta instructions (category -1): */
3197ec681f3Smrg   /* placeholder instr to mark shader inputs: */
3207ec681f3Smrg   OPC_META_INPUT      = _OPC(-1, 0),
3217ec681f3Smrg   /* The "collect" and "split" instructions are used for keeping
3227ec681f3Smrg    * track of instructions that write to multiple dst registers
3237ec681f3Smrg    * (split) like texture sample instructions, or read multiple
3247ec681f3Smrg    * consecutive scalar registers (collect) (bary.f, texture samp)
3257ec681f3Smrg    *
3267ec681f3Smrg    * A "split" extracts a scalar component from a vecN, and a
3277ec681f3Smrg    * "collect" gathers multiple scalar components into a vecN
3287ec681f3Smrg    */
3297ec681f3Smrg   OPC_META_SPLIT      = _OPC(-1, 2),
3307ec681f3Smrg   OPC_META_COLLECT    = _OPC(-1, 3),
3317ec681f3Smrg
3327ec681f3Smrg   /* placeholder for texture fetches that run before FS invocation
3337ec681f3Smrg    * starts:
3347ec681f3Smrg    */
3357ec681f3Smrg   OPC_META_TEX_PREFETCH = _OPC(-1, 4),
3367ec681f3Smrg
3377ec681f3Smrg   /* Parallel copies have multiple destinations, and copy each destination
3387ec681f3Smrg    * to its corresponding source. This happens "in parallel," meaning that
3397ec681f3Smrg    * it happens as-if every source is read first and then every destination
3407ec681f3Smrg    * is stored. These are produced in RA when register shuffling is
3417ec681f3Smrg    * required, and then lowered away immediately afterwards.
3427ec681f3Smrg    */
3437ec681f3Smrg   OPC_META_PARALLEL_COPY = _OPC(-1, 5),
3447ec681f3Smrg   OPC_META_PHI = _OPC(-1, 6),
3457e102996Smaya} opc_t;
3467ec681f3Smrg/* clang-format on */
3477e102996Smaya
3487e102996Smaya#define opc_cat(opc) ((int)((opc) >> NOPC_BITS))
3497e102996Smaya#define opc_op(opc)  ((unsigned)((opc) & ((1 << NOPC_BITS) - 1)))
3507e102996Smaya
3517ec681f3Smrgconst char *disasm_a3xx_instr_name(opc_t opc);
3527ec681f3Smrg
3537e102996Smayatypedef enum {
3547ec681f3Smrg   TYPE_F16 = 0,
3557ec681f3Smrg   TYPE_F32 = 1,
3567ec681f3Smrg   TYPE_U16 = 2,
3577ec681f3Smrg   TYPE_U32 = 3,
3587ec681f3Smrg   TYPE_S16 = 4,
3597ec681f3Smrg   TYPE_S32 = 5,
3607ec681f3Smrg   TYPE_U8 = 6,
3617ec681f3Smrg   TYPE_S8 = 7, // XXX I assume?
3627e102996Smaya} type_t;
3637e102996Smaya
3647ec681f3Smrgstatic inline uint32_t
3657ec681f3Smrgtype_size(type_t type)
3667e102996Smaya{
3677ec681f3Smrg   switch (type) {
3687ec681f3Smrg   case TYPE_F32:
3697ec681f3Smrg   case TYPE_U32:
3707ec681f3Smrg   case TYPE_S32:
3717ec681f3Smrg      return 32;
3727ec681f3Smrg   case TYPE_F16:
3737ec681f3Smrg   case TYPE_U16:
3747ec681f3Smrg   case TYPE_S16:
3757ec681f3Smrg      return 16;
3767ec681f3Smrg   case TYPE_U8:
3777ec681f3Smrg   case TYPE_S8:
3787ec681f3Smrg      return 8;
3797ec681f3Smrg   default:
3807ec681f3Smrg      ir3_assert(0); /* invalid type */
3817ec681f3Smrg      return 0;
3827ec681f3Smrg   }
3837e102996Smaya}
3847e102996Smaya
3857ec681f3Smrgstatic inline int
3867ec681f3Smrgtype_float(type_t type)
3877e102996Smaya{
3887ec681f3Smrg   return (type == TYPE_F32) || (type == TYPE_F16);
3897e102996Smaya}
3907e102996Smaya
3917ec681f3Smrgstatic inline int
3927ec681f3Smrgtype_uint(type_t type)
3937e102996Smaya{
3947ec681f3Smrg   return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8);
3957e102996Smaya}
3967e102996Smaya
3977ec681f3Smrgstatic inline int
3987ec681f3Smrgtype_sint(type_t type)
3997e102996Smaya{
4007ec681f3Smrg   return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8);
4017e102996Smaya}
4027e102996Smaya
4037ec681f3Smrgtypedef enum {
4047ec681f3Smrg   ROUND_ZERO = 0,
4057ec681f3Smrg   ROUND_EVEN = 1,
4067ec681f3Smrg   ROUND_POS_INF = 2,
4077ec681f3Smrg   ROUND_NEG_INF = 3,
4087ec681f3Smrg} round_t;
4097ec681f3Smrg
4107ec681f3Smrg/* comp:
4117ec681f3Smrg *   0 - x
4127ec681f3Smrg *   1 - y
4137ec681f3Smrg *   2 - z
4147ec681f3Smrg *   3 - w
4157ec681f3Smrg */
4167ec681f3Smrgstatic inline uint32_t
4177ec681f3Smrgregid(int num, int comp)
4187e102996Smaya{
4197ec681f3Smrg   return (num << 2) | (comp & 0x3);
4207e102996Smaya}
4217e102996Smaya
4227ec681f3Smrg#define INVALID_REG     regid(63, 0)
4237ec681f3Smrg#define VALIDREG(r)     ((r) != INVALID_REG)
4247ec681f3Smrg#define CONDREG(r, val) COND(VALIDREG(r), (val))
4257e102996Smaya
4267ec681f3Smrg/* special registers: */
4277ec681f3Smrg#define REG_A0 61 /* address register */
4287ec681f3Smrg#define REG_P0 62 /* predicate register */
4297ec681f3Smrg
4307ec681f3Smrgtypedef enum {
4317ec681f3Smrg   BRANCH_PLAIN = 0, /* br */
4327ec681f3Smrg   BRANCH_OR = 1,    /* brao */
4337ec681f3Smrg   BRANCH_AND = 2,   /* braa */
4347ec681f3Smrg   BRANCH_CONST = 3, /* brac */
4357ec681f3Smrg   BRANCH_ANY = 4,   /* bany */
4367ec681f3Smrg   BRANCH_ALL = 5,   /* ball */
4377ec681f3Smrg   BRANCH_X = 6,     /* brax ??? */
4387ec681f3Smrg} brtype_t;
4397ec681f3Smrg
4407ec681f3Smrg/* With is_bindless_s2en = 1, this determines whether bindless is enabled and
4417ec681f3Smrg * if so, how to get the (base, index) pair for both sampler and texture.
4427ec681f3Smrg * There is a single base embedded in the instruction, which is always used
4437ec681f3Smrg * for the texture.
4447e102996Smaya */
4457ec681f3Smrgtypedef enum {
4467ec681f3Smrg   /* Use traditional GL binding model, get texture and sampler index
4477ec681f3Smrg    * from src3 which is not presumed to be uniform. This is
4487ec681f3Smrg    * backwards-compatible with earlier generations, where this field was
4497ec681f3Smrg    * always 0 and nonuniform-indexed sampling always worked.
4507ec681f3Smrg    */
4517ec681f3Smrg   CAT5_NONUNIFORM = 0,
4527ec681f3Smrg
4537ec681f3Smrg   /* The sampler base comes from the low 3 bits of a1.x, and the sampler
4547ec681f3Smrg    * and texture index come from src3 which is presumed to be uniform.
4557ec681f3Smrg    */
4567ec681f3Smrg   CAT5_BINDLESS_A1_UNIFORM = 1,
4577ec681f3Smrg
4587ec681f3Smrg   /* The texture and sampler share the same base, and the sampler and
4597ec681f3Smrg    * texture index come from src3 which is *not* presumed to be uniform.
4607ec681f3Smrg    */
4617ec681f3Smrg   CAT5_BINDLESS_NONUNIFORM = 2,
4627ec681f3Smrg
4637ec681f3Smrg   /* The sampler base comes from the low 3 bits of a1.x, and the sampler
4647ec681f3Smrg    * and texture index come from src3 which is *not* presumed to be
4657ec681f3Smrg    * uniform.
4667ec681f3Smrg    */
4677ec681f3Smrg   CAT5_BINDLESS_A1_NONUNIFORM = 3,
4687ec681f3Smrg
4697ec681f3Smrg   /* Use traditional GL binding model, get texture and sampler index
4707ec681f3Smrg    * from src3 which is presumed to be uniform.
4717ec681f3Smrg    */
4727ec681f3Smrg   CAT5_UNIFORM = 4,
4737ec681f3Smrg
4747ec681f3Smrg   /* The texture and sampler share the same base, and the sampler and
4757ec681f3Smrg    * texture index come from src3 which is presumed to be uniform.
4767ec681f3Smrg    */
4777ec681f3Smrg   CAT5_BINDLESS_UNIFORM = 5,
4787ec681f3Smrg
4797ec681f3Smrg   /* The texture and sampler share the same base, get sampler index from low
4807ec681f3Smrg    * 4 bits of src3 and texture index from high 4 bits.
4817ec681f3Smrg    */
4827ec681f3Smrg   CAT5_BINDLESS_IMM = 6,
4837ec681f3Smrg
4847ec681f3Smrg   /* The sampler base comes from the low 3 bits of a1.x, and the texture
4857ec681f3Smrg    * index comes from the next 8 bits of a1.x. The sampler index is an
4867ec681f3Smrg    * immediate in src3.
4877ec681f3Smrg    */
4887ec681f3Smrg   CAT5_BINDLESS_A1_IMM = 7,
4897ec681f3Smrg} cat5_desc_mode_t;
4907ec681f3Smrg
4917ec681f3Smrg/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded.
4927e102996Smaya */
4937ec681f3Smrgtypedef enum {
4947ec681f3Smrg   /* Use old GL binding model with an immediate index. */
4957ec681f3Smrg   CAT6_IMM = 0,
4967ec681f3Smrg
4977ec681f3Smrg   CAT6_UNIFORM = 1,
4987ec681f3Smrg
4997ec681f3Smrg   CAT6_NONUNIFORM = 2,
5007ec681f3Smrg
5017ec681f3Smrg   /* Use the bindless model, with an immediate index.
5027ec681f3Smrg    */
5037ec681f3Smrg   CAT6_BINDLESS_IMM = 4,
5047e102996Smaya
5057ec681f3Smrg   /* Use the bindless model, with a uniform register index.
5067ec681f3Smrg    */
5077ec681f3Smrg   CAT6_BINDLESS_UNIFORM = 5,
5087ec681f3Smrg
5097ec681f3Smrg   /* Use the bindless model, with a register index that isn't guaranteed
5107ec681f3Smrg    * to be uniform. This presumably checks if the indices are equal and
5117ec681f3Smrg    * splits up the load/store, because it works the way you would
5127ec681f3Smrg    * expect.
5137ec681f3Smrg    */
5147ec681f3Smrg   CAT6_BINDLESS_NONUNIFORM = 6,
5157ec681f3Smrg} cat6_desc_mode_t;
5167ec681f3Smrg
5177ec681f3Smrgstatic inline bool
5187ec681f3Smrgis_sat_compatible(opc_t opc)
5197e102996Smaya{
5207ec681f3Smrg   /* On a6xx saturation doesn't work on cat4 */
5217ec681f3Smrg   if (opc_cat(opc) != 2 && opc_cat(opc) != 3)
5227ec681f3Smrg      return false;
5237ec681f3Smrg
5247ec681f3Smrg   switch (opc) {
5257ec681f3Smrg   /* On a3xx and a6xx saturation doesn't work on bary.f */
5267ec681f3Smrg   case OPC_BARY_F:
5277ec681f3Smrg   /* On a6xx saturation doesn't work on sel.* */
5287ec681f3Smrg   case OPC_SEL_B16:
5297ec681f3Smrg   case OPC_SEL_B32:
5307ec681f3Smrg   case OPC_SEL_S16:
5317ec681f3Smrg   case OPC_SEL_S32:
5327ec681f3Smrg   case OPC_SEL_F16:
5337ec681f3Smrg   case OPC_SEL_F32:
5347ec681f3Smrg      return false;
5357ec681f3Smrg   default:
5367ec681f3Smrg      return true;
5377ec681f3Smrg   }
5387e102996Smaya}
5397e102996Smaya
5407ec681f3Smrgstatic inline bool
5417ec681f3Smrgis_mad(opc_t opc)
5427e102996Smaya{
5437ec681f3Smrg   switch (opc) {
5447ec681f3Smrg   case OPC_MAD_U16:
5457ec681f3Smrg   case OPC_MAD_S16:
5467ec681f3Smrg   case OPC_MAD_U24:
5477ec681f3Smrg   case OPC_MAD_S24:
5487ec681f3Smrg   case OPC_MAD_F16:
5497ec681f3Smrg   case OPC_MAD_F32:
5507ec681f3Smrg      return true;
5517ec681f3Smrg   default:
5527ec681f3Smrg      return false;
5537ec681f3Smrg   }
5547e102996Smaya}
5557e102996Smaya
5567ec681f3Smrgstatic inline bool
5577ec681f3Smrgis_madsh(opc_t opc)
5587e102996Smaya{
5597ec681f3Smrg   switch (opc) {
5607ec681f3Smrg   case OPC_MADSH_U16:
5617ec681f3Smrg   case OPC_MADSH_M16:
5627ec681f3Smrg      return true;
5637ec681f3Smrg   default:
5647ec681f3Smrg      return false;
5657ec681f3Smrg   }
5667e102996Smaya}
5677e102996Smaya
5687ec681f3Smrgstatic inline bool
5697ec681f3Smrgis_atomic(opc_t opc)
5707e102996Smaya{
5717ec681f3Smrg   switch (opc) {
5727ec681f3Smrg   case OPC_ATOMIC_ADD:
5737ec681f3Smrg   case OPC_ATOMIC_SUB:
5747ec681f3Smrg   case OPC_ATOMIC_XCHG:
5757ec681f3Smrg   case OPC_ATOMIC_INC:
5767ec681f3Smrg   case OPC_ATOMIC_DEC:
5777ec681f3Smrg   case OPC_ATOMIC_CMPXCHG:
5787ec681f3Smrg   case OPC_ATOMIC_MIN:
5797ec681f3Smrg   case OPC_ATOMIC_MAX:
5807ec681f3Smrg   case OPC_ATOMIC_AND:
5817ec681f3Smrg   case OPC_ATOMIC_OR:
5827ec681f3Smrg   case OPC_ATOMIC_XOR:
5837ec681f3Smrg      return true;
5847ec681f3Smrg   default:
5857ec681f3Smrg      return false;
5867ec681f3Smrg   }
5877e102996Smaya}
5887e102996Smaya
5897ec681f3Smrgstatic inline bool
5907ec681f3Smrgis_ssbo(opc_t opc)
5917e102996Smaya{
5927ec681f3Smrg   switch (opc) {
5937ec681f3Smrg   case OPC_RESFMT:
5947ec681f3Smrg   case OPC_RESINFO:
5957ec681f3Smrg   case OPC_LDGB:
5967ec681f3Smrg   case OPC_STGB:
5977ec681f3Smrg   case OPC_STIB:
5987ec681f3Smrg      return true;
5997ec681f3Smrg   default:
6007ec681f3Smrg      return false;
6017ec681f3Smrg   }
6027e102996Smaya}
6037e102996Smaya
6047ec681f3Smrgstatic inline bool
6057ec681f3Smrgis_isam(opc_t opc)
6067e102996Smaya{
6077ec681f3Smrg   switch (opc) {
6087ec681f3Smrg   case OPC_ISAM:
6097ec681f3Smrg   case OPC_ISAML:
6107ec681f3Smrg   case OPC_ISAMM:
6117ec681f3Smrg      return true;
6127ec681f3Smrg   default:
6137ec681f3Smrg      return false;
6147ec681f3Smrg   }
6157e102996Smaya}
6167e102996Smaya
6177ec681f3Smrgstatic inline bool
6187ec681f3Smrgis_cat2_float(opc_t opc)
6197e102996Smaya{
6207ec681f3Smrg   switch (opc) {
6217ec681f3Smrg   case OPC_ADD_F:
6227ec681f3Smrg   case OPC_MIN_F:
6237ec681f3Smrg   case OPC_MAX_F:
6247ec681f3Smrg   case OPC_MUL_F:
6257ec681f3Smrg   case OPC_SIGN_F:
6267ec681f3Smrg   case OPC_CMPS_F:
6277ec681f3Smrg   case OPC_ABSNEG_F:
6287ec681f3Smrg   case OPC_CMPV_F:
6297ec681f3Smrg   case OPC_FLOOR_F:
6307ec681f3Smrg   case OPC_CEIL_F:
6317ec681f3Smrg   case OPC_RNDNE_F:
6327ec681f3Smrg   case OPC_RNDAZ_F:
6337ec681f3Smrg   case OPC_TRUNC_F:
6347ec681f3Smrg      return true;
6357ec681f3Smrg
6367ec681f3Smrg   default:
6377ec681f3Smrg      return false;
6387ec681f3Smrg   }
6397e102996Smaya}
6407e102996Smaya
6417ec681f3Smrgstatic inline bool
6427ec681f3Smrgis_cat3_float(opc_t opc)
6437e102996Smaya{
6447ec681f3Smrg   switch (opc) {
6457ec681f3Smrg   case OPC_MAD_F16:
6467ec681f3Smrg   case OPC_MAD_F32:
6477ec681f3Smrg   case OPC_SEL_F16:
6487ec681f3Smrg   case OPC_SEL_F32:
6497ec681f3Smrg      return true;
6507ec681f3Smrg   default:
6517ec681f3Smrg      return false;
6527ec681f3Smrg   }
6537e102996Smaya}
6547e102996Smaya
6557e102996Smaya#endif /* INSTR_A3XX_H_ */
656