17e102996Smaya/* 27e102996Smaya * Copyright (c) 2013 Rob Clark <robdclark@gmail.com> 37e102996Smaya * 47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a 57e102996Smaya * copy of this software and associated documentation files (the "Software"), 67e102996Smaya * to deal in the Software without restriction, including without limitation 77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the 97e102996Smaya * Software is furnished to do so, subject to the following conditions: 107e102996Smaya * 117e102996Smaya * The above copyright notice and this permission notice (including the next 127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the 137e102996Smaya * Software. 147e102996Smaya * 157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217e102996Smaya * SOFTWARE. 227e102996Smaya */ 237e102996Smaya 247e102996Smaya#ifndef INSTR_A3XX_H_ 257e102996Smaya#define INSTR_A3XX_H_ 267e102996Smaya 277e102996Smaya#define PACKED __attribute__((__packed__)) 287e102996Smaya 297ec681f3Smrg#include <assert.h> 307ec681f3Smrg#include <stdbool.h> 317e102996Smaya#include <stdint.h> 327e102996Smaya#include <stdio.h> 337e102996Smaya 347ec681f3Smrg/* clang-format off */ 357ec681f3Smrgvoid ir3_assert_handler(const char *expr, const char *file, int line, 367ec681f3Smrg const char *func) __attribute__((weak)) __attribute__((__noreturn__)); 377ec681f3Smrg/* clang-format on */ 387ec681f3Smrg 397ec681f3Smrg/* A wrapper for assert() that allows overriding handling of a failed 407ec681f3Smrg * assert. This is needed for tools like crashdec which can want to 417ec681f3Smrg * attempt to disassemble memory that might not actually be valid 427ec681f3Smrg * instructions. 437ec681f3Smrg */ 447ec681f3Smrg#define ir3_assert(expr) \ 457ec681f3Smrg do { \ 467ec681f3Smrg if (!(expr)) { \ 477ec681f3Smrg if (ir3_assert_handler) { \ 487ec681f3Smrg ir3_assert_handler(#expr, __FILE__, __LINE__, __func__); \ 497ec681f3Smrg } \ 507ec681f3Smrg assert(expr); \ 517ec681f3Smrg } \ 527ec681f3Smrg } while (0) 537e102996Smaya/* size of largest OPC field of all the instruction categories: */ 547e102996Smaya#define NOPC_BITS 6 557e102996Smaya 567ec681f3Smrg#define _OPC(cat, opc) (((cat) << NOPC_BITS) | opc) 577e102996Smaya 587ec681f3Smrg/* clang-format off */ 597e102996Smayatypedef enum { 607ec681f3Smrg /* category 0: */ 617ec681f3Smrg OPC_NOP = _OPC(0, 0), 627ec681f3Smrg OPC_B = _OPC(0, 1), 637ec681f3Smrg OPC_JUMP = _OPC(0, 2), 647ec681f3Smrg OPC_CALL = _OPC(0, 3), 657ec681f3Smrg OPC_RET = _OPC(0, 4), 667ec681f3Smrg OPC_KILL = _OPC(0, 5), 677ec681f3Smrg OPC_END = _OPC(0, 6), 687ec681f3Smrg OPC_EMIT = _OPC(0, 7), 697ec681f3Smrg OPC_CUT = _OPC(0, 8), 707ec681f3Smrg OPC_CHMASK = _OPC(0, 9), 717ec681f3Smrg OPC_CHSH = _OPC(0, 10), 727ec681f3Smrg OPC_FLOW_REV = _OPC(0, 11), 737ec681f3Smrg 747ec681f3Smrg OPC_BKT = _OPC(0, 16), 757ec681f3Smrg OPC_STKS = _OPC(0, 17), 767ec681f3Smrg OPC_STKR = _OPC(0, 18), 777ec681f3Smrg OPC_XSET = _OPC(0, 19), 787ec681f3Smrg OPC_XCLR = _OPC(0, 20), 797ec681f3Smrg OPC_GETONE = _OPC(0, 21), 807ec681f3Smrg OPC_DBG = _OPC(0, 22), 817ec681f3Smrg OPC_SHPS = _OPC(0, 23), /* shader prologue start */ 827ec681f3Smrg OPC_SHPE = _OPC(0, 24), /* shader prologue end */ 837ec681f3Smrg 847ec681f3Smrg OPC_PREDT = _OPC(0, 29), /* predicated true */ 857ec681f3Smrg OPC_PREDF = _OPC(0, 30), /* predicated false */ 867ec681f3Smrg OPC_PREDE = _OPC(0, 31), /* predicated end */ 877ec681f3Smrg 887ec681f3Smrg /* Logical opcodes for different branch instruction variations: */ 897ec681f3Smrg OPC_BR = _OPC(0, 40), 907ec681f3Smrg OPC_BRAO = _OPC(0, 41), 917ec681f3Smrg OPC_BRAA = _OPC(0, 42), 927ec681f3Smrg OPC_BRAC = _OPC(0, 43), 937ec681f3Smrg OPC_BANY = _OPC(0, 44), 947ec681f3Smrg OPC_BALL = _OPC(0, 45), 957ec681f3Smrg OPC_BRAX = _OPC(0, 46), 967ec681f3Smrg 977ec681f3Smrg /* Logical opcode to distinguish kill and demote */ 987ec681f3Smrg OPC_DEMOTE = _OPC(0, 47), 997ec681f3Smrg 1007ec681f3Smrg /* category 1: */ 1017ec681f3Smrg OPC_MOV = _OPC(1, 0), 1027ec681f3Smrg OPC_MOVP = _OPC(1, 1), 1037ec681f3Smrg /* swz, gat, sct */ 1047ec681f3Smrg OPC_MOVMSK = _OPC(1, 3), 1057ec681f3Smrg 1067ec681f3Smrg /* Virtual opcodes for instructions differentiated via a "sub-opcode" that 1077ec681f3Smrg * replaces the repeat field: 1087ec681f3Smrg */ 1097ec681f3Smrg OPC_SWZ = _OPC(1, 4), 1107ec681f3Smrg OPC_GAT = _OPC(1, 5), 1117ec681f3Smrg OPC_SCT = _OPC(1, 6), 1127ec681f3Smrg 1137ec681f3Smrg /* Logical opcodes for different variants of mov: */ 1147ec681f3Smrg OPC_MOV_IMMED = _OPC(1, 40), 1157ec681f3Smrg OPC_MOV_CONST = _OPC(1, 41), 1167ec681f3Smrg OPC_MOV_GPR = _OPC(1, 42), 1177ec681f3Smrg OPC_MOV_RELGPR = _OPC(1, 43), 1187ec681f3Smrg OPC_MOV_RELCONST = _OPC(1, 44), 1197ec681f3Smrg 1207ec681f3Smrg /* Macros that expand to an if statement + move */ 1217ec681f3Smrg OPC_BALLOT_MACRO = _OPC(1, 50), 1227ec681f3Smrg OPC_ANY_MACRO = _OPC(1, 51), 1237ec681f3Smrg OPC_ALL_MACRO = _OPC(1, 52), 1247ec681f3Smrg OPC_ELECT_MACRO = _OPC(1, 53), 1257ec681f3Smrg OPC_READ_COND_MACRO = _OPC(1, 54), 1267ec681f3Smrg OPC_READ_FIRST_MACRO = _OPC(1, 55), 1277ec681f3Smrg OPC_SWZ_SHARED_MACRO = _OPC(1, 56), 1287ec681f3Smrg 1297ec681f3Smrg /* category 2: */ 1307ec681f3Smrg OPC_ADD_F = _OPC(2, 0), 1317ec681f3Smrg OPC_MIN_F = _OPC(2, 1), 1327ec681f3Smrg OPC_MAX_F = _OPC(2, 2), 1337ec681f3Smrg OPC_MUL_F = _OPC(2, 3), 1347ec681f3Smrg OPC_SIGN_F = _OPC(2, 4), 1357ec681f3Smrg OPC_CMPS_F = _OPC(2, 5), 1367ec681f3Smrg OPC_ABSNEG_F = _OPC(2, 6), 1377ec681f3Smrg OPC_CMPV_F = _OPC(2, 7), 1387ec681f3Smrg /* 8 - invalid */ 1397ec681f3Smrg OPC_FLOOR_F = _OPC(2, 9), 1407ec681f3Smrg OPC_CEIL_F = _OPC(2, 10), 1417ec681f3Smrg OPC_RNDNE_F = _OPC(2, 11), 1427ec681f3Smrg OPC_RNDAZ_F = _OPC(2, 12), 1437ec681f3Smrg OPC_TRUNC_F = _OPC(2, 13), 1447ec681f3Smrg /* 14-15 - invalid */ 1457ec681f3Smrg OPC_ADD_U = _OPC(2, 16), 1467ec681f3Smrg OPC_ADD_S = _OPC(2, 17), 1477ec681f3Smrg OPC_SUB_U = _OPC(2, 18), 1487ec681f3Smrg OPC_SUB_S = _OPC(2, 19), 1497ec681f3Smrg OPC_CMPS_U = _OPC(2, 20), 1507ec681f3Smrg OPC_CMPS_S = _OPC(2, 21), 1517ec681f3Smrg OPC_MIN_U = _OPC(2, 22), 1527ec681f3Smrg OPC_MIN_S = _OPC(2, 23), 1537ec681f3Smrg OPC_MAX_U = _OPC(2, 24), 1547ec681f3Smrg OPC_MAX_S = _OPC(2, 25), 1557ec681f3Smrg OPC_ABSNEG_S = _OPC(2, 26), 1567ec681f3Smrg /* 27 - invalid */ 1577ec681f3Smrg OPC_AND_B = _OPC(2, 28), 1587ec681f3Smrg OPC_OR_B = _OPC(2, 29), 1597ec681f3Smrg OPC_NOT_B = _OPC(2, 30), 1607ec681f3Smrg OPC_XOR_B = _OPC(2, 31), 1617ec681f3Smrg /* 32 - invalid */ 1627ec681f3Smrg OPC_CMPV_U = _OPC(2, 33), 1637ec681f3Smrg OPC_CMPV_S = _OPC(2, 34), 1647ec681f3Smrg /* 35-47 - invalid */ 1657ec681f3Smrg OPC_MUL_U24 = _OPC(2, 48), /* 24b mul into 32b result */ 1667ec681f3Smrg OPC_MUL_S24 = _OPC(2, 49), /* 24b mul into 32b result with sign extension */ 1677ec681f3Smrg OPC_MULL_U = _OPC(2, 50), 1687ec681f3Smrg OPC_BFREV_B = _OPC(2, 51), 1697ec681f3Smrg OPC_CLZ_S = _OPC(2, 52), 1707ec681f3Smrg OPC_CLZ_B = _OPC(2, 53), 1717ec681f3Smrg OPC_SHL_B = _OPC(2, 54), 1727ec681f3Smrg OPC_SHR_B = _OPC(2, 55), 1737ec681f3Smrg OPC_ASHR_B = _OPC(2, 56), 1747ec681f3Smrg OPC_BARY_F = _OPC(2, 57), 1757ec681f3Smrg OPC_MGEN_B = _OPC(2, 58), 1767ec681f3Smrg OPC_GETBIT_B = _OPC(2, 59), 1777ec681f3Smrg OPC_SETRM = _OPC(2, 60), 1787ec681f3Smrg OPC_CBITS_B = _OPC(2, 61), 1797ec681f3Smrg OPC_SHB = _OPC(2, 62), 1807ec681f3Smrg OPC_MSAD = _OPC(2, 63), 1817ec681f3Smrg 1827ec681f3Smrg /* category 3: */ 1837ec681f3Smrg OPC_MAD_U16 = _OPC(3, 0), 1847ec681f3Smrg OPC_MADSH_U16 = _OPC(3, 1), 1857ec681f3Smrg OPC_MAD_S16 = _OPC(3, 2), 1867ec681f3Smrg OPC_MADSH_M16 = _OPC(3, 3), /* should this be .s16? */ 1877ec681f3Smrg OPC_MAD_U24 = _OPC(3, 4), 1887ec681f3Smrg OPC_MAD_S24 = _OPC(3, 5), 1897ec681f3Smrg OPC_MAD_F16 = _OPC(3, 6), 1907ec681f3Smrg OPC_MAD_F32 = _OPC(3, 7), 1917ec681f3Smrg OPC_SEL_B16 = _OPC(3, 8), 1927ec681f3Smrg OPC_SEL_B32 = _OPC(3, 9), 1937ec681f3Smrg OPC_SEL_S16 = _OPC(3, 10), 1947ec681f3Smrg OPC_SEL_S32 = _OPC(3, 11), 1957ec681f3Smrg OPC_SEL_F16 = _OPC(3, 12), 1967ec681f3Smrg OPC_SEL_F32 = _OPC(3, 13), 1977ec681f3Smrg OPC_SAD_S16 = _OPC(3, 14), 1987ec681f3Smrg OPC_SAD_S32 = _OPC(3, 15), 1997ec681f3Smrg OPC_SHLG_B16 = _OPC(3, 16), 2007ec681f3Smrg 2017ec681f3Smrg /* category 4: */ 2027ec681f3Smrg OPC_RCP = _OPC(4, 0), 2037ec681f3Smrg OPC_RSQ = _OPC(4, 1), 2047ec681f3Smrg OPC_LOG2 = _OPC(4, 2), 2057ec681f3Smrg OPC_EXP2 = _OPC(4, 3), 2067ec681f3Smrg OPC_SIN = _OPC(4, 4), 2077ec681f3Smrg OPC_COS = _OPC(4, 5), 2087ec681f3Smrg OPC_SQRT = _OPC(4, 6), 2097ec681f3Smrg /* NOTE that these are 8+opc from their highp equivs, so it's possible 2107ec681f3Smrg * that the high order bit in the opc field has been repurposed for 2117ec681f3Smrg * half-precision use? But note that other ops (rcp/lsin/cos/sqrt) 2127ec681f3Smrg * still use the same opc as highp 2137ec681f3Smrg */ 2147ec681f3Smrg OPC_HRSQ = _OPC(4, 9), 2157ec681f3Smrg OPC_HLOG2 = _OPC(4, 10), 2167ec681f3Smrg OPC_HEXP2 = _OPC(4, 11), 2177ec681f3Smrg 2187ec681f3Smrg /* category 5: */ 2197ec681f3Smrg OPC_ISAM = _OPC(5, 0), 2207ec681f3Smrg OPC_ISAML = _OPC(5, 1), 2217ec681f3Smrg OPC_ISAMM = _OPC(5, 2), 2227ec681f3Smrg OPC_SAM = _OPC(5, 3), 2237ec681f3Smrg OPC_SAMB = _OPC(5, 4), 2247ec681f3Smrg OPC_SAML = _OPC(5, 5), 2257ec681f3Smrg OPC_SAMGQ = _OPC(5, 6), 2267ec681f3Smrg OPC_GETLOD = _OPC(5, 7), 2277ec681f3Smrg OPC_CONV = _OPC(5, 8), 2287ec681f3Smrg OPC_CONVM = _OPC(5, 9), 2297ec681f3Smrg OPC_GETSIZE = _OPC(5, 10), 2307ec681f3Smrg OPC_GETBUF = _OPC(5, 11), 2317ec681f3Smrg OPC_GETPOS = _OPC(5, 12), 2327ec681f3Smrg OPC_GETINFO = _OPC(5, 13), 2337ec681f3Smrg OPC_DSX = _OPC(5, 14), 2347ec681f3Smrg OPC_DSY = _OPC(5, 15), 2357ec681f3Smrg OPC_GATHER4R = _OPC(5, 16), 2367ec681f3Smrg OPC_GATHER4G = _OPC(5, 17), 2377ec681f3Smrg OPC_GATHER4B = _OPC(5, 18), 2387ec681f3Smrg OPC_GATHER4A = _OPC(5, 19), 2397ec681f3Smrg OPC_SAMGP0 = _OPC(5, 20), 2407ec681f3Smrg OPC_SAMGP1 = _OPC(5, 21), 2417ec681f3Smrg OPC_SAMGP2 = _OPC(5, 22), 2427ec681f3Smrg OPC_SAMGP3 = _OPC(5, 23), 2437ec681f3Smrg OPC_DSXPP_1 = _OPC(5, 24), 2447ec681f3Smrg OPC_DSYPP_1 = _OPC(5, 25), 2457ec681f3Smrg OPC_RGETPOS = _OPC(5, 26), 2467ec681f3Smrg OPC_RGETINFO = _OPC(5, 27), 2477ec681f3Smrg /* cat5 meta instructions, placed above the cat5 opc field's size */ 2487ec681f3Smrg OPC_DSXPP_MACRO = _OPC(5, 32), 2497ec681f3Smrg OPC_DSYPP_MACRO = _OPC(5, 33), 2507ec681f3Smrg 2517ec681f3Smrg /* category 6: */ 2527ec681f3Smrg OPC_LDG = _OPC(6, 0), /* load-global */ 2537ec681f3Smrg OPC_LDL = _OPC(6, 1), 2547ec681f3Smrg OPC_LDP = _OPC(6, 2), 2557ec681f3Smrg OPC_STG = _OPC(6, 3), /* store-global */ 2567ec681f3Smrg OPC_STL = _OPC(6, 4), 2577ec681f3Smrg OPC_STP = _OPC(6, 5), 2587ec681f3Smrg OPC_LDIB = _OPC(6, 6), 2597ec681f3Smrg OPC_G2L = _OPC(6, 7), 2607ec681f3Smrg OPC_L2G = _OPC(6, 8), 2617ec681f3Smrg OPC_PREFETCH = _OPC(6, 9), 2627ec681f3Smrg OPC_LDLW = _OPC(6, 10), 2637ec681f3Smrg OPC_STLW = _OPC(6, 11), 2647ec681f3Smrg OPC_RESFMT = _OPC(6, 14), 2657ec681f3Smrg OPC_RESINFO = _OPC(6, 15), 2667ec681f3Smrg OPC_ATOMIC_ADD = _OPC(6, 16), 2677ec681f3Smrg OPC_ATOMIC_SUB = _OPC(6, 17), 2687ec681f3Smrg OPC_ATOMIC_XCHG = _OPC(6, 18), 2697ec681f3Smrg OPC_ATOMIC_INC = _OPC(6, 19), 2707ec681f3Smrg OPC_ATOMIC_DEC = _OPC(6, 20), 2717ec681f3Smrg OPC_ATOMIC_CMPXCHG = _OPC(6, 21), 2727ec681f3Smrg OPC_ATOMIC_MIN = _OPC(6, 22), 2737ec681f3Smrg OPC_ATOMIC_MAX = _OPC(6, 23), 2747ec681f3Smrg OPC_ATOMIC_AND = _OPC(6, 24), 2757ec681f3Smrg OPC_ATOMIC_OR = _OPC(6, 25), 2767ec681f3Smrg OPC_ATOMIC_XOR = _OPC(6, 26), 2777ec681f3Smrg OPC_LDGB = _OPC(6, 27), 2787ec681f3Smrg OPC_STGB = _OPC(6, 28), 2797ec681f3Smrg OPC_STIB = _OPC(6, 29), 2807ec681f3Smrg OPC_LDC = _OPC(6, 30), 2817ec681f3Smrg OPC_LDLV = _OPC(6, 31), 2827ec681f3Smrg OPC_PIPR = _OPC(6, 32), /* ??? */ 2837ec681f3Smrg OPC_PIPC = _OPC(6, 33), /* ??? */ 2847ec681f3Smrg OPC_EMIT2 = _OPC(6, 34), /* ??? */ 2857ec681f3Smrg OPC_ENDLS = _OPC(6, 35), /* ??? */ 2867ec681f3Smrg OPC_GETSPID = _OPC(6, 36), /* SP ID */ 2877ec681f3Smrg OPC_GETWID = _OPC(6, 37), /* wavefront ID */ 2887ec681f3Smrg 2897ec681f3Smrg /* Logical opcodes for things that differ in a6xx+ */ 2907ec681f3Smrg OPC_STC = _OPC(6, 40), 2917ec681f3Smrg OPC_RESINFO_B = _OPC(6, 41), 2927ec681f3Smrg OPC_LDIB_B = _OPC(6, 42), 2937ec681f3Smrg OPC_STIB_B = _OPC(6, 43), 2947ec681f3Smrg 2957ec681f3Smrg /* Logical opcodes for different atomic instruction variations: */ 2967ec681f3Smrg OPC_ATOMIC_B_ADD = _OPC(6, 44), 2977ec681f3Smrg OPC_ATOMIC_B_SUB = _OPC(6, 45), 2987ec681f3Smrg OPC_ATOMIC_B_XCHG = _OPC(6, 46), 2997ec681f3Smrg OPC_ATOMIC_B_INC = _OPC(6, 47), 3007ec681f3Smrg OPC_ATOMIC_B_DEC = _OPC(6, 48), 3017ec681f3Smrg OPC_ATOMIC_B_CMPXCHG = _OPC(6, 49), 3027ec681f3Smrg OPC_ATOMIC_B_MIN = _OPC(6, 50), 3037ec681f3Smrg OPC_ATOMIC_B_MAX = _OPC(6, 51), 3047ec681f3Smrg OPC_ATOMIC_B_AND = _OPC(6, 52), 3057ec681f3Smrg OPC_ATOMIC_B_OR = _OPC(6, 53), 3067ec681f3Smrg OPC_ATOMIC_B_XOR = _OPC(6, 54), 3077ec681f3Smrg 3087ec681f3Smrg OPC_LDG_A = _OPC(6, 55), 3097ec681f3Smrg OPC_STG_A = _OPC(6, 56), 3107ec681f3Smrg 3117ec681f3Smrg OPC_SPILL_MACRO = _OPC(6, 57), 3127ec681f3Smrg OPC_RELOAD_MACRO = _OPC(6, 58), 3137ec681f3Smrg 3147ec681f3Smrg /* category 7: */ 3157ec681f3Smrg OPC_BAR = _OPC(7, 0), 3167ec681f3Smrg OPC_FENCE = _OPC(7, 1), 3177ec681f3Smrg 3187ec681f3Smrg /* meta instructions (category -1): */ 3197ec681f3Smrg /* placeholder instr to mark shader inputs: */ 3207ec681f3Smrg OPC_META_INPUT = _OPC(-1, 0), 3217ec681f3Smrg /* The "collect" and "split" instructions are used for keeping 3227ec681f3Smrg * track of instructions that write to multiple dst registers 3237ec681f3Smrg * (split) like texture sample instructions, or read multiple 3247ec681f3Smrg * consecutive scalar registers (collect) (bary.f, texture samp) 3257ec681f3Smrg * 3267ec681f3Smrg * A "split" extracts a scalar component from a vecN, and a 3277ec681f3Smrg * "collect" gathers multiple scalar components into a vecN 3287ec681f3Smrg */ 3297ec681f3Smrg OPC_META_SPLIT = _OPC(-1, 2), 3307ec681f3Smrg OPC_META_COLLECT = _OPC(-1, 3), 3317ec681f3Smrg 3327ec681f3Smrg /* placeholder for texture fetches that run before FS invocation 3337ec681f3Smrg * starts: 3347ec681f3Smrg */ 3357ec681f3Smrg OPC_META_TEX_PREFETCH = _OPC(-1, 4), 3367ec681f3Smrg 3377ec681f3Smrg /* Parallel copies have multiple destinations, and copy each destination 3387ec681f3Smrg * to its corresponding source. This happens "in parallel," meaning that 3397ec681f3Smrg * it happens as-if every source is read first and then every destination 3407ec681f3Smrg * is stored. These are produced in RA when register shuffling is 3417ec681f3Smrg * required, and then lowered away immediately afterwards. 3427ec681f3Smrg */ 3437ec681f3Smrg OPC_META_PARALLEL_COPY = _OPC(-1, 5), 3447ec681f3Smrg OPC_META_PHI = _OPC(-1, 6), 3457e102996Smaya} opc_t; 3467ec681f3Smrg/* clang-format on */ 3477e102996Smaya 3487e102996Smaya#define opc_cat(opc) ((int)((opc) >> NOPC_BITS)) 3497e102996Smaya#define opc_op(opc) ((unsigned)((opc) & ((1 << NOPC_BITS) - 1))) 3507e102996Smaya 3517ec681f3Smrgconst char *disasm_a3xx_instr_name(opc_t opc); 3527ec681f3Smrg 3537e102996Smayatypedef enum { 3547ec681f3Smrg TYPE_F16 = 0, 3557ec681f3Smrg TYPE_F32 = 1, 3567ec681f3Smrg TYPE_U16 = 2, 3577ec681f3Smrg TYPE_U32 = 3, 3587ec681f3Smrg TYPE_S16 = 4, 3597ec681f3Smrg TYPE_S32 = 5, 3607ec681f3Smrg TYPE_U8 = 6, 3617ec681f3Smrg TYPE_S8 = 7, // XXX I assume? 3627e102996Smaya} type_t; 3637e102996Smaya 3647ec681f3Smrgstatic inline uint32_t 3657ec681f3Smrgtype_size(type_t type) 3667e102996Smaya{ 3677ec681f3Smrg switch (type) { 3687ec681f3Smrg case TYPE_F32: 3697ec681f3Smrg case TYPE_U32: 3707ec681f3Smrg case TYPE_S32: 3717ec681f3Smrg return 32; 3727ec681f3Smrg case TYPE_F16: 3737ec681f3Smrg case TYPE_U16: 3747ec681f3Smrg case TYPE_S16: 3757ec681f3Smrg return 16; 3767ec681f3Smrg case TYPE_U8: 3777ec681f3Smrg case TYPE_S8: 3787ec681f3Smrg return 8; 3797ec681f3Smrg default: 3807ec681f3Smrg ir3_assert(0); /* invalid type */ 3817ec681f3Smrg return 0; 3827ec681f3Smrg } 3837e102996Smaya} 3847e102996Smaya 3857ec681f3Smrgstatic inline int 3867ec681f3Smrgtype_float(type_t type) 3877e102996Smaya{ 3887ec681f3Smrg return (type == TYPE_F32) || (type == TYPE_F16); 3897e102996Smaya} 3907e102996Smaya 3917ec681f3Smrgstatic inline int 3927ec681f3Smrgtype_uint(type_t type) 3937e102996Smaya{ 3947ec681f3Smrg return (type == TYPE_U32) || (type == TYPE_U16) || (type == TYPE_U8); 3957e102996Smaya} 3967e102996Smaya 3977ec681f3Smrgstatic inline int 3987ec681f3Smrgtype_sint(type_t type) 3997e102996Smaya{ 4007ec681f3Smrg return (type == TYPE_S32) || (type == TYPE_S16) || (type == TYPE_S8); 4017e102996Smaya} 4027e102996Smaya 4037ec681f3Smrgtypedef enum { 4047ec681f3Smrg ROUND_ZERO = 0, 4057ec681f3Smrg ROUND_EVEN = 1, 4067ec681f3Smrg ROUND_POS_INF = 2, 4077ec681f3Smrg ROUND_NEG_INF = 3, 4087ec681f3Smrg} round_t; 4097ec681f3Smrg 4107ec681f3Smrg/* comp: 4117ec681f3Smrg * 0 - x 4127ec681f3Smrg * 1 - y 4137ec681f3Smrg * 2 - z 4147ec681f3Smrg * 3 - w 4157ec681f3Smrg */ 4167ec681f3Smrgstatic inline uint32_t 4177ec681f3Smrgregid(int num, int comp) 4187e102996Smaya{ 4197ec681f3Smrg return (num << 2) | (comp & 0x3); 4207e102996Smaya} 4217e102996Smaya 4227ec681f3Smrg#define INVALID_REG regid(63, 0) 4237ec681f3Smrg#define VALIDREG(r) ((r) != INVALID_REG) 4247ec681f3Smrg#define CONDREG(r, val) COND(VALIDREG(r), (val)) 4257e102996Smaya 4267ec681f3Smrg/* special registers: */ 4277ec681f3Smrg#define REG_A0 61 /* address register */ 4287ec681f3Smrg#define REG_P0 62 /* predicate register */ 4297ec681f3Smrg 4307ec681f3Smrgtypedef enum { 4317ec681f3Smrg BRANCH_PLAIN = 0, /* br */ 4327ec681f3Smrg BRANCH_OR = 1, /* brao */ 4337ec681f3Smrg BRANCH_AND = 2, /* braa */ 4347ec681f3Smrg BRANCH_CONST = 3, /* brac */ 4357ec681f3Smrg BRANCH_ANY = 4, /* bany */ 4367ec681f3Smrg BRANCH_ALL = 5, /* ball */ 4377ec681f3Smrg BRANCH_X = 6, /* brax ??? */ 4387ec681f3Smrg} brtype_t; 4397ec681f3Smrg 4407ec681f3Smrg/* With is_bindless_s2en = 1, this determines whether bindless is enabled and 4417ec681f3Smrg * if so, how to get the (base, index) pair for both sampler and texture. 4427ec681f3Smrg * There is a single base embedded in the instruction, which is always used 4437ec681f3Smrg * for the texture. 4447e102996Smaya */ 4457ec681f3Smrgtypedef enum { 4467ec681f3Smrg /* Use traditional GL binding model, get texture and sampler index 4477ec681f3Smrg * from src3 which is not presumed to be uniform. This is 4487ec681f3Smrg * backwards-compatible with earlier generations, where this field was 4497ec681f3Smrg * always 0 and nonuniform-indexed sampling always worked. 4507ec681f3Smrg */ 4517ec681f3Smrg CAT5_NONUNIFORM = 0, 4527ec681f3Smrg 4537ec681f3Smrg /* The sampler base comes from the low 3 bits of a1.x, and the sampler 4547ec681f3Smrg * and texture index come from src3 which is presumed to be uniform. 4557ec681f3Smrg */ 4567ec681f3Smrg CAT5_BINDLESS_A1_UNIFORM = 1, 4577ec681f3Smrg 4587ec681f3Smrg /* The texture and sampler share the same base, and the sampler and 4597ec681f3Smrg * texture index come from src3 which is *not* presumed to be uniform. 4607ec681f3Smrg */ 4617ec681f3Smrg CAT5_BINDLESS_NONUNIFORM = 2, 4627ec681f3Smrg 4637ec681f3Smrg /* The sampler base comes from the low 3 bits of a1.x, and the sampler 4647ec681f3Smrg * and texture index come from src3 which is *not* presumed to be 4657ec681f3Smrg * uniform. 4667ec681f3Smrg */ 4677ec681f3Smrg CAT5_BINDLESS_A1_NONUNIFORM = 3, 4687ec681f3Smrg 4697ec681f3Smrg /* Use traditional GL binding model, get texture and sampler index 4707ec681f3Smrg * from src3 which is presumed to be uniform. 4717ec681f3Smrg */ 4727ec681f3Smrg CAT5_UNIFORM = 4, 4737ec681f3Smrg 4747ec681f3Smrg /* The texture and sampler share the same base, and the sampler and 4757ec681f3Smrg * texture index come from src3 which is presumed to be uniform. 4767ec681f3Smrg */ 4777ec681f3Smrg CAT5_BINDLESS_UNIFORM = 5, 4787ec681f3Smrg 4797ec681f3Smrg /* The texture and sampler share the same base, get sampler index from low 4807ec681f3Smrg * 4 bits of src3 and texture index from high 4 bits. 4817ec681f3Smrg */ 4827ec681f3Smrg CAT5_BINDLESS_IMM = 6, 4837ec681f3Smrg 4847ec681f3Smrg /* The sampler base comes from the low 3 bits of a1.x, and the texture 4857ec681f3Smrg * index comes from the next 8 bits of a1.x. The sampler index is an 4867ec681f3Smrg * immediate in src3. 4877ec681f3Smrg */ 4887ec681f3Smrg CAT5_BINDLESS_A1_IMM = 7, 4897ec681f3Smrg} cat5_desc_mode_t; 4907ec681f3Smrg 4917ec681f3Smrg/* Similar to cat5_desc_mode_t, describes how the descriptor is loaded. 4927e102996Smaya */ 4937ec681f3Smrgtypedef enum { 4947ec681f3Smrg /* Use old GL binding model with an immediate index. */ 4957ec681f3Smrg CAT6_IMM = 0, 4967ec681f3Smrg 4977ec681f3Smrg CAT6_UNIFORM = 1, 4987ec681f3Smrg 4997ec681f3Smrg CAT6_NONUNIFORM = 2, 5007ec681f3Smrg 5017ec681f3Smrg /* Use the bindless model, with an immediate index. 5027ec681f3Smrg */ 5037ec681f3Smrg CAT6_BINDLESS_IMM = 4, 5047e102996Smaya 5057ec681f3Smrg /* Use the bindless model, with a uniform register index. 5067ec681f3Smrg */ 5077ec681f3Smrg CAT6_BINDLESS_UNIFORM = 5, 5087ec681f3Smrg 5097ec681f3Smrg /* Use the bindless model, with a register index that isn't guaranteed 5107ec681f3Smrg * to be uniform. This presumably checks if the indices are equal and 5117ec681f3Smrg * splits up the load/store, because it works the way you would 5127ec681f3Smrg * expect. 5137ec681f3Smrg */ 5147ec681f3Smrg CAT6_BINDLESS_NONUNIFORM = 6, 5157ec681f3Smrg} cat6_desc_mode_t; 5167ec681f3Smrg 5177ec681f3Smrgstatic inline bool 5187ec681f3Smrgis_sat_compatible(opc_t opc) 5197e102996Smaya{ 5207ec681f3Smrg /* On a6xx saturation doesn't work on cat4 */ 5217ec681f3Smrg if (opc_cat(opc) != 2 && opc_cat(opc) != 3) 5227ec681f3Smrg return false; 5237ec681f3Smrg 5247ec681f3Smrg switch (opc) { 5257ec681f3Smrg /* On a3xx and a6xx saturation doesn't work on bary.f */ 5267ec681f3Smrg case OPC_BARY_F: 5277ec681f3Smrg /* On a6xx saturation doesn't work on sel.* */ 5287ec681f3Smrg case OPC_SEL_B16: 5297ec681f3Smrg case OPC_SEL_B32: 5307ec681f3Smrg case OPC_SEL_S16: 5317ec681f3Smrg case OPC_SEL_S32: 5327ec681f3Smrg case OPC_SEL_F16: 5337ec681f3Smrg case OPC_SEL_F32: 5347ec681f3Smrg return false; 5357ec681f3Smrg default: 5367ec681f3Smrg return true; 5377ec681f3Smrg } 5387e102996Smaya} 5397e102996Smaya 5407ec681f3Smrgstatic inline bool 5417ec681f3Smrgis_mad(opc_t opc) 5427e102996Smaya{ 5437ec681f3Smrg switch (opc) { 5447ec681f3Smrg case OPC_MAD_U16: 5457ec681f3Smrg case OPC_MAD_S16: 5467ec681f3Smrg case OPC_MAD_U24: 5477ec681f3Smrg case OPC_MAD_S24: 5487ec681f3Smrg case OPC_MAD_F16: 5497ec681f3Smrg case OPC_MAD_F32: 5507ec681f3Smrg return true; 5517ec681f3Smrg default: 5527ec681f3Smrg return false; 5537ec681f3Smrg } 5547e102996Smaya} 5557e102996Smaya 5567ec681f3Smrgstatic inline bool 5577ec681f3Smrgis_madsh(opc_t opc) 5587e102996Smaya{ 5597ec681f3Smrg switch (opc) { 5607ec681f3Smrg case OPC_MADSH_U16: 5617ec681f3Smrg case OPC_MADSH_M16: 5627ec681f3Smrg return true; 5637ec681f3Smrg default: 5647ec681f3Smrg return false; 5657ec681f3Smrg } 5667e102996Smaya} 5677e102996Smaya 5687ec681f3Smrgstatic inline bool 5697ec681f3Smrgis_atomic(opc_t opc) 5707e102996Smaya{ 5717ec681f3Smrg switch (opc) { 5727ec681f3Smrg case OPC_ATOMIC_ADD: 5737ec681f3Smrg case OPC_ATOMIC_SUB: 5747ec681f3Smrg case OPC_ATOMIC_XCHG: 5757ec681f3Smrg case OPC_ATOMIC_INC: 5767ec681f3Smrg case OPC_ATOMIC_DEC: 5777ec681f3Smrg case OPC_ATOMIC_CMPXCHG: 5787ec681f3Smrg case OPC_ATOMIC_MIN: 5797ec681f3Smrg case OPC_ATOMIC_MAX: 5807ec681f3Smrg case OPC_ATOMIC_AND: 5817ec681f3Smrg case OPC_ATOMIC_OR: 5827ec681f3Smrg case OPC_ATOMIC_XOR: 5837ec681f3Smrg return true; 5847ec681f3Smrg default: 5857ec681f3Smrg return false; 5867ec681f3Smrg } 5877e102996Smaya} 5887e102996Smaya 5897ec681f3Smrgstatic inline bool 5907ec681f3Smrgis_ssbo(opc_t opc) 5917e102996Smaya{ 5927ec681f3Smrg switch (opc) { 5937ec681f3Smrg case OPC_RESFMT: 5947ec681f3Smrg case OPC_RESINFO: 5957ec681f3Smrg case OPC_LDGB: 5967ec681f3Smrg case OPC_STGB: 5977ec681f3Smrg case OPC_STIB: 5987ec681f3Smrg return true; 5997ec681f3Smrg default: 6007ec681f3Smrg return false; 6017ec681f3Smrg } 6027e102996Smaya} 6037e102996Smaya 6047ec681f3Smrgstatic inline bool 6057ec681f3Smrgis_isam(opc_t opc) 6067e102996Smaya{ 6077ec681f3Smrg switch (opc) { 6087ec681f3Smrg case OPC_ISAM: 6097ec681f3Smrg case OPC_ISAML: 6107ec681f3Smrg case OPC_ISAMM: 6117ec681f3Smrg return true; 6127ec681f3Smrg default: 6137ec681f3Smrg return false; 6147ec681f3Smrg } 6157e102996Smaya} 6167e102996Smaya 6177ec681f3Smrgstatic inline bool 6187ec681f3Smrgis_cat2_float(opc_t opc) 6197e102996Smaya{ 6207ec681f3Smrg switch (opc) { 6217ec681f3Smrg case OPC_ADD_F: 6227ec681f3Smrg case OPC_MIN_F: 6237ec681f3Smrg case OPC_MAX_F: 6247ec681f3Smrg case OPC_MUL_F: 6257ec681f3Smrg case OPC_SIGN_F: 6267ec681f3Smrg case OPC_CMPS_F: 6277ec681f3Smrg case OPC_ABSNEG_F: 6287ec681f3Smrg case OPC_CMPV_F: 6297ec681f3Smrg case OPC_FLOOR_F: 6307ec681f3Smrg case OPC_CEIL_F: 6317ec681f3Smrg case OPC_RNDNE_F: 6327ec681f3Smrg case OPC_RNDAZ_F: 6337ec681f3Smrg case OPC_TRUNC_F: 6347ec681f3Smrg return true; 6357ec681f3Smrg 6367ec681f3Smrg default: 6377ec681f3Smrg return false; 6387ec681f3Smrg } 6397e102996Smaya} 6407e102996Smaya 6417ec681f3Smrgstatic inline bool 6427ec681f3Smrgis_cat3_float(opc_t opc) 6437e102996Smaya{ 6447ec681f3Smrg switch (opc) { 6457ec681f3Smrg case OPC_MAD_F16: 6467ec681f3Smrg case OPC_MAD_F32: 6477ec681f3Smrg case OPC_SEL_F16: 6487ec681f3Smrg case OPC_SEL_F32: 6497ec681f3Smrg return true; 6507ec681f3Smrg default: 6517ec681f3Smrg return false; 6527ec681f3Smrg } 6537e102996Smaya} 6547e102996Smaya 6557e102996Smaya#endif /* INSTR_A3XX_H_ */ 656