17e102996Smaya/*
27e102996Smaya * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
37e102996Smaya *
47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a
57e102996Smaya * copy of this software and associated documentation files (the "Software"),
67e102996Smaya * to deal in the Software without restriction, including without limitation
77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the
97e102996Smaya * Software is furnished to do so, subject to the following conditions:
107e102996Smaya *
117e102996Smaya * The above copyright notice and this permission notice (including the next
127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the
137e102996Smaya * Software.
147e102996Smaya *
157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217e102996Smaya * SOFTWARE.
227e102996Smaya */
237e102996Smaya
247ec681f3Smrg#include <assert.h>
257ec681f3Smrg#include <stdbool.h>
267ec681f3Smrg#include <stdint.h>
277e102996Smaya#include <stdio.h>
287e102996Smaya#include <stdlib.h>
297e102996Smaya#include <string.h>
307e102996Smaya
317ec681f3Smrg#include <util/log.h>
327e102996Smaya#include <util/u_debug.h>
337e102996Smaya
347ec681f3Smrg#include "isa/isa.h"
357e102996Smaya
367ec681f3Smrg#include "disasm.h"
377ec681f3Smrg#include "instr-a3xx.h"
387e102996Smaya
397e102996Smayastatic enum debug_t debug;
407e102996Smaya
417e102996Smayastatic const char *levels[] = {
427ec681f3Smrg   "",
437ec681f3Smrg   "\t",
447ec681f3Smrg   "\t\t",
457ec681f3Smrg   "\t\t\t",
467ec681f3Smrg   "\t\t\t\t",
477ec681f3Smrg   "\t\t\t\t\t",
487ec681f3Smrg   "\t\t\t\t\t\t",
497ec681f3Smrg   "\t\t\t\t\t\t\t",
507ec681f3Smrg   "\t\t\t\t\t\t\t\t",
517ec681f3Smrg   "\t\t\t\t\t\t\t\t\t",
527ec681f3Smrg   "x",
537ec681f3Smrg   "x",
547ec681f3Smrg   "x",
557ec681f3Smrg   "x",
567ec681f3Smrg   "x",
577ec681f3Smrg   "x",
587e102996Smaya};
597e102996Smaya
607e102996Smayastruct disasm_ctx {
617ec681f3Smrg   FILE *out;
627ec681f3Smrg   struct isa_decode_options *options;
637ec681f3Smrg   unsigned level;
647ec681f3Smrg   unsigned extra_cycles;
657ec681f3Smrg
667ec681f3Smrg   /**
677ec681f3Smrg    * nop_count/has_end used to detect the real end of shader.  Since
687ec681f3Smrg    * in some cases there can be a epilogue following an `end` we look
697ec681f3Smrg    * for a sequence of `nop`s following the `end`
707ec681f3Smrg    */
717ec681f3Smrg   int nop_count; /* number of nop's since non-nop instruction: */
727ec681f3Smrg   bool has_end;  /* have we seen end instruction */
737ec681f3Smrg
747ec681f3Smrg   int cur_n;       /* current instr # */
757ec681f3Smrg   int cur_opc_cat; /* current opc_cat */
767ec681f3Smrg
777ec681f3Smrg   int sfu_delay;
787ec681f3Smrg
797ec681f3Smrg   /**
807ec681f3Smrg    * State accumulated decoding fields of the current instruction,
817ec681f3Smrg    * handled after decoding is complete (ie. at start of next instr)
827ec681f3Smrg    */
837ec681f3Smrg   struct {
847ec681f3Smrg      bool ss;
857ec681f3Smrg      uint8_t nop;
867ec681f3Smrg      uint8_t repeat;
877ec681f3Smrg   } last;
887ec681f3Smrg
897ec681f3Smrg   /**
907ec681f3Smrg    * State accumulated decoding fields of src or dst register
917ec681f3Smrg    */
927ec681f3Smrg   struct {
937ec681f3Smrg      bool half;
947ec681f3Smrg      bool r;
957ec681f3Smrg      enum {
967ec681f3Smrg         FILE_GPR = 1,
977ec681f3Smrg         FILE_CONST = 2,
987ec681f3Smrg      } file;
997ec681f3Smrg      unsigned num;
1007ec681f3Smrg   } reg;
1017ec681f3Smrg
1027ec681f3Smrg   struct shader_stats *stats;
1037e102996Smaya};
1047e102996Smaya
1057ec681f3Smrgstatic void
1067ec681f3Smrgprint_stats(struct disasm_ctx *ctx)
1077e102996Smaya{
1087ec681f3Smrg   if (ctx->options->gpu_id >= 600) {
1097ec681f3Smrg      /* handle MERGEREGS case.. this isn't *entirely* accurate, as
1107ec681f3Smrg       * you can have shader stages not using merged register file,
1117ec681f3Smrg       * but it is good enough for a guestimate:
1127ec681f3Smrg       */
1137ec681f3Smrg      unsigned n = (ctx->stats->halfreg + 1) / 2;
1147ec681f3Smrg
1157ec681f3Smrg      ctx->stats->halfreg = 0;
1167ec681f3Smrg      ctx->stats->fullreg = MAX2(ctx->stats->fullreg, n);
1177ec681f3Smrg   }
1187ec681f3Smrg
1197ec681f3Smrg   unsigned instructions = ctx->cur_n + ctx->extra_cycles + 1;
1207ec681f3Smrg
1217ec681f3Smrg   fprintf(ctx->out, "%sStats:\n", levels[ctx->level]);
1227ec681f3Smrg   fprintf(ctx->out,
1237ec681f3Smrg           "%s- shaderdb: %u instr, %u nops, %u non-nops, %u mov, %u cov\n",
1247ec681f3Smrg           levels[ctx->level], instructions, ctx->stats->nops,
1257ec681f3Smrg           instructions - ctx->stats->nops, ctx->stats->mov_count,
1267ec681f3Smrg           ctx->stats->cov_count);
1277ec681f3Smrg
1287ec681f3Smrg   fprintf(ctx->out,
1297ec681f3Smrg           "%s- shaderdb: %u last-baryf, %d half, %d full, %u constlen\n",
1307ec681f3Smrg           levels[ctx->level], ctx->stats->last_baryf,
1317ec681f3Smrg           DIV_ROUND_UP(ctx->stats->halfreg, 4),
1327ec681f3Smrg           DIV_ROUND_UP(ctx->stats->fullreg, 4),
1337ec681f3Smrg           DIV_ROUND_UP(ctx->stats->constlen, 4));
1347ec681f3Smrg
1357ec681f3Smrg   fprintf(
1367ec681f3Smrg      ctx->out,
1377ec681f3Smrg      "%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7\n",
1387ec681f3Smrg      levels[ctx->level], ctx->stats->instrs_per_cat[0],
1397ec681f3Smrg      ctx->stats->instrs_per_cat[1], ctx->stats->instrs_per_cat[2],
1407ec681f3Smrg      ctx->stats->instrs_per_cat[3], ctx->stats->instrs_per_cat[4],
1417ec681f3Smrg      ctx->stats->instrs_per_cat[5], ctx->stats->instrs_per_cat[6],
1427ec681f3Smrg      ctx->stats->instrs_per_cat[7]);
1437ec681f3Smrg
1447ec681f3Smrg   fprintf(ctx->out, "%s- shaderdb: %u sstall, %u (ss), %u (sy)\n",
1457ec681f3Smrg           levels[ctx->level], ctx->stats->sstall, ctx->stats->ss,
1467ec681f3Smrg           ctx->stats->sy);
1477e102996Smaya}
1487e102996Smaya
1497ec681f3Smrg/* size of largest OPC field of all the instruction categories: */
1507ec681f3Smrg#define NOPC_BITS 6
1517e102996Smaya
1527ec681f3Smrgstatic const struct opc_info {
1537ec681f3Smrg   const char *name;
1547ec681f3Smrg} opcs[1 << (3 + NOPC_BITS)] = {
1557ec681f3Smrg#define OPC(cat, opc, name) [(opc)] = {#name}
1567ec681f3Smrg   /* clang-format off */
1577ec681f3Smrg   /* category 0: */
1587ec681f3Smrg   OPC(0, OPC_NOP,          nop),
1597ec681f3Smrg   OPC(0, OPC_B,            b),
1607ec681f3Smrg   OPC(0, OPC_JUMP,         jump),
1617ec681f3Smrg   OPC(0, OPC_CALL,         call),
1627ec681f3Smrg   OPC(0, OPC_RET,          ret),
1637ec681f3Smrg   OPC(0, OPC_KILL,         kill),
1647ec681f3Smrg   OPC(0, OPC_DEMOTE,       demote),
1657ec681f3Smrg   OPC(0, OPC_END,          end),
1667ec681f3Smrg   OPC(0, OPC_EMIT,         emit),
1677ec681f3Smrg   OPC(0, OPC_CUT,          cut),
1687ec681f3Smrg   OPC(0, OPC_CHMASK,       chmask),
1697ec681f3Smrg   OPC(0, OPC_CHSH,         chsh),
1707ec681f3Smrg   OPC(0, OPC_FLOW_REV,     flow_rev),
1717ec681f3Smrg   OPC(0, OPC_PREDT,        predt),
1727ec681f3Smrg   OPC(0, OPC_PREDF,        predf),
1737ec681f3Smrg   OPC(0, OPC_PREDE,        prede),
1747ec681f3Smrg   OPC(0, OPC_BKT,          bkt),
1757ec681f3Smrg   OPC(0, OPC_STKS,         stks),
1767ec681f3Smrg   OPC(0, OPC_STKR,         stkr),
1777ec681f3Smrg   OPC(0, OPC_XSET,         xset),
1787ec681f3Smrg   OPC(0, OPC_XCLR,         xclr),
1797ec681f3Smrg   OPC(0, OPC_GETONE,       getone),
1807ec681f3Smrg   OPC(0, OPC_DBG,          dbg),
1817ec681f3Smrg   OPC(0, OPC_SHPS,         shps),
1827ec681f3Smrg   OPC(0, OPC_SHPE,         shpe),
1837ec681f3Smrg
1847ec681f3Smrg   /* category 1: */
1857ec681f3Smrg   OPC(1, OPC_MOV,          ),
1867ec681f3Smrg   OPC(1, OPC_MOVMSK,       movmsk),
1877ec681f3Smrg   OPC(1, OPC_SWZ,          swz),
1887ec681f3Smrg   OPC(1, OPC_SCT,          sct),
1897ec681f3Smrg   OPC(1, OPC_GAT,          gat),
1907ec681f3Smrg   OPC(1, OPC_BALLOT_MACRO, ballot.macro),
1917ec681f3Smrg   OPC(1, OPC_ANY_MACRO,    any.macro),
1927ec681f3Smrg   OPC(1, OPC_ALL_MACRO,    all.macro),
1937ec681f3Smrg   OPC(1, OPC_ELECT_MACRO,  elect.macro),
1947ec681f3Smrg   OPC(1, OPC_READ_COND_MACRO, read_cond.macro),
1957ec681f3Smrg   OPC(1, OPC_READ_FIRST_MACRO, read_first.macro),
1967ec681f3Smrg   OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro),
1977ec681f3Smrg
1987ec681f3Smrg   /* category 2: */
1997ec681f3Smrg   OPC(2, OPC_ADD_F,        add.f),
2007ec681f3Smrg   OPC(2, OPC_MIN_F,        min.f),
2017ec681f3Smrg   OPC(2, OPC_MAX_F,        max.f),
2027ec681f3Smrg   OPC(2, OPC_MUL_F,        mul.f),
2037ec681f3Smrg   OPC(2, OPC_SIGN_F,       sign.f),
2047ec681f3Smrg   OPC(2, OPC_CMPS_F,       cmps.f),
2057ec681f3Smrg   OPC(2, OPC_ABSNEG_F,     absneg.f),
2067ec681f3Smrg   OPC(2, OPC_CMPV_F,       cmpv.f),
2077ec681f3Smrg   OPC(2, OPC_FLOOR_F,      floor.f),
2087ec681f3Smrg   OPC(2, OPC_CEIL_F,       ceil.f),
2097ec681f3Smrg   OPC(2, OPC_RNDNE_F,      rndne.f),
2107ec681f3Smrg   OPC(2, OPC_RNDAZ_F,      rndaz.f),
2117ec681f3Smrg   OPC(2, OPC_TRUNC_F,      trunc.f),
2127ec681f3Smrg   OPC(2, OPC_ADD_U,        add.u),
2137ec681f3Smrg   OPC(2, OPC_ADD_S,        add.s),
2147ec681f3Smrg   OPC(2, OPC_SUB_U,        sub.u),
2157ec681f3Smrg   OPC(2, OPC_SUB_S,        sub.s),
2167ec681f3Smrg   OPC(2, OPC_CMPS_U,       cmps.u),
2177ec681f3Smrg   OPC(2, OPC_CMPS_S,       cmps.s),
2187ec681f3Smrg   OPC(2, OPC_MIN_U,        min.u),
2197ec681f3Smrg   OPC(2, OPC_MIN_S,        min.s),
2207ec681f3Smrg   OPC(2, OPC_MAX_U,        max.u),
2217ec681f3Smrg   OPC(2, OPC_MAX_S,        max.s),
2227ec681f3Smrg   OPC(2, OPC_ABSNEG_S,     absneg.s),
2237ec681f3Smrg   OPC(2, OPC_AND_B,        and.b),
2247ec681f3Smrg   OPC(2, OPC_OR_B,         or.b),
2257ec681f3Smrg   OPC(2, OPC_NOT_B,        not.b),
2267ec681f3Smrg   OPC(2, OPC_XOR_B,        xor.b),
2277ec681f3Smrg   OPC(2, OPC_CMPV_U,       cmpv.u),
2287ec681f3Smrg   OPC(2, OPC_CMPV_S,       cmpv.s),
2297ec681f3Smrg   OPC(2, OPC_MUL_U24,      mul.u24),
2307ec681f3Smrg   OPC(2, OPC_MUL_S24,      mul.s24),
2317ec681f3Smrg   OPC(2, OPC_MULL_U,       mull.u),
2327ec681f3Smrg   OPC(2, OPC_BFREV_B,      bfrev.b),
2337ec681f3Smrg   OPC(2, OPC_CLZ_S,        clz.s),
2347ec681f3Smrg   OPC(2, OPC_CLZ_B,        clz.b),
2357ec681f3Smrg   OPC(2, OPC_SHL_B,        shl.b),
2367ec681f3Smrg   OPC(2, OPC_SHR_B,        shr.b),
2377ec681f3Smrg   OPC(2, OPC_ASHR_B,       ashr.b),
2387ec681f3Smrg   OPC(2, OPC_BARY_F,       bary.f),
2397ec681f3Smrg   OPC(2, OPC_MGEN_B,       mgen.b),
2407ec681f3Smrg   OPC(2, OPC_GETBIT_B,     getbit.b),
2417ec681f3Smrg   OPC(2, OPC_SETRM,        setrm),
2427ec681f3Smrg   OPC(2, OPC_CBITS_B,      cbits.b),
2437ec681f3Smrg   OPC(2, OPC_SHB,          shb),
2447ec681f3Smrg   OPC(2, OPC_MSAD,         msad),
2457ec681f3Smrg
2467ec681f3Smrg   /* category 3: */
2477ec681f3Smrg   OPC(3, OPC_MAD_U16,      mad.u16),
2487ec681f3Smrg   OPC(3, OPC_MADSH_U16,    madsh.u16),
2497ec681f3Smrg   OPC(3, OPC_MAD_S16,      mad.s16),
2507ec681f3Smrg   OPC(3, OPC_MADSH_M16,    madsh.m16),
2517ec681f3Smrg   OPC(3, OPC_MAD_U24,      mad.u24),
2527ec681f3Smrg   OPC(3, OPC_MAD_S24,      mad.s24),
2537ec681f3Smrg   OPC(3, OPC_MAD_F16,      mad.f16),
2547ec681f3Smrg   OPC(3, OPC_MAD_F32,      mad.f32),
2557ec681f3Smrg   OPC(3, OPC_SEL_B16,      sel.b16),
2567ec681f3Smrg   OPC(3, OPC_SEL_B32,      sel.b32),
2577ec681f3Smrg   OPC(3, OPC_SEL_S16,      sel.s16),
2587ec681f3Smrg   OPC(3, OPC_SEL_S32,      sel.s32),
2597ec681f3Smrg   OPC(3, OPC_SEL_F16,      sel.f16),
2607ec681f3Smrg   OPC(3, OPC_SEL_F32,      sel.f32),
2617ec681f3Smrg   OPC(3, OPC_SAD_S16,      sad.s16),
2627ec681f3Smrg   OPC(3, OPC_SAD_S32,      sad.s32),
2637ec681f3Smrg   OPC(3, OPC_SHLG_B16,     shlg.b16),
2647ec681f3Smrg
2657ec681f3Smrg   /* category 4: */
2667ec681f3Smrg   OPC(4, OPC_RCP,          rcp),
2677ec681f3Smrg   OPC(4, OPC_RSQ,          rsq),
2687ec681f3Smrg   OPC(4, OPC_LOG2,         log2),
2697ec681f3Smrg   OPC(4, OPC_EXP2,         exp2),
2707ec681f3Smrg   OPC(4, OPC_SIN,          sin),
2717ec681f3Smrg   OPC(4, OPC_COS,          cos),
2727ec681f3Smrg   OPC(4, OPC_SQRT,         sqrt),
2737ec681f3Smrg   OPC(4, OPC_HRSQ,         hrsq),
2747ec681f3Smrg   OPC(4, OPC_HLOG2,        hlog2),
2757ec681f3Smrg   OPC(4, OPC_HEXP2,        hexp2),
2767ec681f3Smrg
2777ec681f3Smrg   /* category 5: */
2787ec681f3Smrg   OPC(5, OPC_ISAM,         isam),
2797ec681f3Smrg   OPC(5, OPC_ISAML,        isaml),
2807ec681f3Smrg   OPC(5, OPC_ISAMM,        isamm),
2817ec681f3Smrg   OPC(5, OPC_SAM,          sam),
2827ec681f3Smrg   OPC(5, OPC_SAMB,         samb),
2837ec681f3Smrg   OPC(5, OPC_SAML,         saml),
2847ec681f3Smrg   OPC(5, OPC_SAMGQ,        samgq),
2857ec681f3Smrg   OPC(5, OPC_GETLOD,       getlod),
2867ec681f3Smrg   OPC(5, OPC_CONV,         conv),
2877ec681f3Smrg   OPC(5, OPC_CONVM,        convm),
2887ec681f3Smrg   OPC(5, OPC_GETSIZE,      getsize),
2897ec681f3Smrg   OPC(5, OPC_GETBUF,       getbuf),
2907ec681f3Smrg   OPC(5, OPC_GETPOS,       getpos),
2917ec681f3Smrg   OPC(5, OPC_GETINFO,      getinfo),
2927ec681f3Smrg   OPC(5, OPC_DSX,          dsx),
2937ec681f3Smrg   OPC(5, OPC_DSY,          dsy),
2947ec681f3Smrg   OPC(5, OPC_GATHER4R,     gather4r),
2957ec681f3Smrg   OPC(5, OPC_GATHER4G,     gather4g),
2967ec681f3Smrg   OPC(5, OPC_GATHER4B,     gather4b),
2977ec681f3Smrg   OPC(5, OPC_GATHER4A,     gather4a),
2987ec681f3Smrg   OPC(5, OPC_SAMGP0,       samgp0),
2997ec681f3Smrg   OPC(5, OPC_SAMGP1,       samgp1),
3007ec681f3Smrg   OPC(5, OPC_SAMGP2,       samgp2),
3017ec681f3Smrg   OPC(5, OPC_SAMGP3,       samgp3),
3027ec681f3Smrg   OPC(5, OPC_DSXPP_1,      dsxpp.1),
3037ec681f3Smrg   OPC(5, OPC_DSYPP_1,      dsypp.1),
3047ec681f3Smrg   OPC(5, OPC_RGETPOS,      rgetpos),
3057ec681f3Smrg   OPC(5, OPC_RGETINFO,     rgetinfo),
3067ec681f3Smrg   /* macros are needed here for ir3_print */
3077ec681f3Smrg   OPC(5, OPC_DSXPP_MACRO,  dsxpp.macro),
3087ec681f3Smrg   OPC(5, OPC_DSYPP_MACRO,  dsypp.macro),
3097ec681f3Smrg
3107ec681f3Smrg
3117ec681f3Smrg   /* category 6: */
3127ec681f3Smrg   OPC(6, OPC_LDG,          ldg),
3137ec681f3Smrg   OPC(6, OPC_LDG_A,        ldg.a),
3147ec681f3Smrg   OPC(6, OPC_LDL,          ldl),
3157ec681f3Smrg   OPC(6, OPC_LDP,          ldp),
3167ec681f3Smrg   OPC(6, OPC_STG,          stg),
3177ec681f3Smrg   OPC(6, OPC_STG_A,        stg.a),
3187ec681f3Smrg   OPC(6, OPC_STL,          stl),
3197ec681f3Smrg   OPC(6, OPC_STP,          stp),
3207ec681f3Smrg   OPC(6, OPC_LDIB,         ldib),
3217ec681f3Smrg   OPC(6, OPC_G2L,          g2l),
3227ec681f3Smrg   OPC(6, OPC_L2G,          l2g),
3237ec681f3Smrg   OPC(6, OPC_PREFETCH,     prefetch),
3247ec681f3Smrg   OPC(6, OPC_LDLW,         ldlw),
3257ec681f3Smrg   OPC(6, OPC_STLW,         stlw),
3267ec681f3Smrg   OPC(6, OPC_RESFMT,       resfmt),
3277ec681f3Smrg   OPC(6, OPC_RESINFO,      resinfo),
3287ec681f3Smrg   OPC(6, OPC_ATOMIC_ADD,     atomic.add),
3297ec681f3Smrg   OPC(6, OPC_ATOMIC_SUB,     atomic.sub),
3307ec681f3Smrg   OPC(6, OPC_ATOMIC_XCHG,    atomic.xchg),
3317ec681f3Smrg   OPC(6, OPC_ATOMIC_INC,     atomic.inc),
3327ec681f3Smrg   OPC(6, OPC_ATOMIC_DEC,     atomic.dec),
3337ec681f3Smrg   OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),
3347ec681f3Smrg   OPC(6, OPC_ATOMIC_MIN,     atomic.min),
3357ec681f3Smrg   OPC(6, OPC_ATOMIC_MAX,     atomic.max),
3367ec681f3Smrg   OPC(6, OPC_ATOMIC_AND,     atomic.and),
3377ec681f3Smrg   OPC(6, OPC_ATOMIC_OR,      atomic.or),
3387ec681f3Smrg   OPC(6, OPC_ATOMIC_XOR,     atomic.xor),
3397ec681f3Smrg   OPC(6, OPC_LDGB,         ldgb),
3407ec681f3Smrg   OPC(6, OPC_STGB,         stgb),
3417ec681f3Smrg   OPC(6, OPC_STIB,         stib),
3427ec681f3Smrg   OPC(6, OPC_LDC,          ldc),
3437ec681f3Smrg   OPC(6, OPC_LDLV,         ldlv),
3447ec681f3Smrg   OPC(6, OPC_PIPR,         pipr),
3457ec681f3Smrg   OPC(6, OPC_PIPC,         pipc),
3467ec681f3Smrg   OPC(6, OPC_EMIT2,        emit),
3477ec681f3Smrg   OPC(6, OPC_ENDLS,        endls),
3487ec681f3Smrg   OPC(6, OPC_GETSPID,      getspid),
3497ec681f3Smrg   OPC(6, OPC_GETWID,       getwid),
3507ec681f3Smrg
3517ec681f3Smrg   OPC(6, OPC_SPILL_MACRO,  spill.macro),
3527ec681f3Smrg   OPC(6, OPC_RELOAD_MACRO, reload.macro),
3537ec681f3Smrg
3547ec681f3Smrg   OPC(7, OPC_BAR,          bar),
3557ec681f3Smrg   OPC(7, OPC_FENCE,        fence),
3567ec681f3Smrg/* clang-format on */
3577ec681f3Smrg#undef OPC
3587e102996Smaya};
3597e102996Smaya
3607ec681f3Smrg#define GETINFO(instr)                                                         \
3617ec681f3Smrg   (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))
3627e102996Smaya
3637ec681f3Smrgconst char *
3647ec681f3Smrgdisasm_a3xx_instr_name(opc_t opc)
3657e102996Smaya{
3667ec681f3Smrg   if (opc_cat(opc) == -1)
3677ec681f3Smrg      return "??meta??";
3687ec681f3Smrg   return opcs[opc].name;
3697e102996Smaya}
3707e102996Smaya
3717ec681f3Smrgstatic void
3727ec681f3Smrgdisasm_field_cb(void *d, const char *field_name, struct isa_decode_value *val)
3737e102996Smaya{
3747ec681f3Smrg   struct disasm_ctx *ctx = d;
3757ec681f3Smrg
3767ec681f3Smrg   if (!strcmp(field_name, "NAME")) {
3777ec681f3Smrg      if (!strcmp("nop", val->str)) {
3787ec681f3Smrg         if (ctx->has_end) {
3797ec681f3Smrg            ctx->nop_count++;
3807ec681f3Smrg            if (ctx->nop_count > 3) {
3817ec681f3Smrg               ctx->options->stop = true;
3827ec681f3Smrg            }
3837ec681f3Smrg         }
3847ec681f3Smrg         ctx->stats->nops += 1 + ctx->last.repeat;
3857ec681f3Smrg      } else {
3867ec681f3Smrg         ctx->nop_count = 0;
3877ec681f3Smrg      }
3887ec681f3Smrg
3897ec681f3Smrg      if (!strcmp("end", val->str)) {
3907ec681f3Smrg         ctx->has_end = true;
3917ec681f3Smrg         ctx->nop_count = 0;
3927ec681f3Smrg      } else if (!strcmp("chsh", val->str)) {
3937ec681f3Smrg         ctx->options->stop = true;
3947ec681f3Smrg      } else if (!strcmp("bary.f", val->str)) {
3957ec681f3Smrg         ctx->stats->last_baryf = ctx->cur_n;
3967ec681f3Smrg      }
3977ec681f3Smrg   } else if (!strcmp(field_name, "REPEAT")) {
3987ec681f3Smrg      ctx->extra_cycles += val->num;
3997ec681f3Smrg      ctx->stats->instrs_per_cat[ctx->cur_opc_cat] += val->num;
4007ec681f3Smrg      ctx->last.repeat = val->num;
4017ec681f3Smrg   } else if (!strcmp(field_name, "NOP")) {
4027ec681f3Smrg      ctx->extra_cycles += val->num;
4037ec681f3Smrg      ctx->stats->instrs_per_cat[0] += val->num;
4047ec681f3Smrg      ctx->stats->nops += val->num;
4057ec681f3Smrg      ctx->last.nop = val->num;
4067ec681f3Smrg   } else if (!strcmp(field_name, "SY")) {
4077ec681f3Smrg      ctx->stats->sy += val->num;
4087ec681f3Smrg   } else if (!strcmp(field_name, "SS")) {
4097ec681f3Smrg      ctx->stats->ss += val->num;
4107ec681f3Smrg      ctx->last.ss = !!val->num;
4117ec681f3Smrg   } else if (!strcmp(field_name, "CONST")) {
4127ec681f3Smrg      ctx->reg.num = val->num;
4137ec681f3Smrg      ctx->reg.file = FILE_CONST;
4147ec681f3Smrg   } else if (!strcmp(field_name, "GPR")) {
4157ec681f3Smrg      /* don't count GPR regs r48.x (shared) or higher: */
4167ec681f3Smrg      if (val->num < 48) {
4177ec681f3Smrg         ctx->reg.num = val->num;
4187ec681f3Smrg         ctx->reg.file = FILE_GPR;
4197ec681f3Smrg      }
4207ec681f3Smrg   } else if (!strcmp(field_name, "SRC_R") || !strcmp(field_name, "SRC1_R") ||
4217ec681f3Smrg              !strcmp(field_name, "SRC2_R") || !strcmp(field_name, "SRC3_R")) {
4227ec681f3Smrg      ctx->reg.r = val->num;
4237ec681f3Smrg   } else if (!strcmp(field_name, "DST")) {
4247ec681f3Smrg      /* Dest register is always repeated
4257ec681f3Smrg       *
4267ec681f3Smrg       * Note that this doesn't really properly handle instructions
4277ec681f3Smrg       * that write multiple components.. the old disasm didn't handle
4287ec681f3Smrg       * that case either.
4297ec681f3Smrg       */
4307ec681f3Smrg      ctx->reg.r = true;
4317ec681f3Smrg   } else if (strstr(field_name, "HALF")) {
4327ec681f3Smrg      ctx->reg.half = val->num;
4337ec681f3Smrg   } else if (!strcmp(field_name, "SWIZ")) {
4347ec681f3Smrg      unsigned num = (ctx->reg.num << 2) | val->num;
4357ec681f3Smrg      if (ctx->reg.r)
4367ec681f3Smrg         num += ctx->last.repeat;
4377ec681f3Smrg
4387ec681f3Smrg      if (ctx->reg.file == FILE_CONST) {
4397ec681f3Smrg         ctx->stats->constlen = MAX2(ctx->stats->constlen, num);
4407ec681f3Smrg      } else if (ctx->reg.file == FILE_GPR) {
4417ec681f3Smrg         if (ctx->reg.half) {
4427ec681f3Smrg            ctx->stats->halfreg = MAX2(ctx->stats->halfreg, num);
4437ec681f3Smrg         } else {
4447ec681f3Smrg            ctx->stats->fullreg = MAX2(ctx->stats->fullreg, num);
4457ec681f3Smrg         }
4467ec681f3Smrg      }
4477ec681f3Smrg
4487ec681f3Smrg      memset(&ctx->reg, 0, sizeof(ctx->reg));
4497ec681f3Smrg   }
4507e102996Smaya}
4517e102996Smaya
4527ec681f3Smrg/**
4537ec681f3Smrg * Handle stat updates dealt with at the end of instruction decoding,
4547ec681f3Smrg * ie. before beginning of next instruction
4557ec681f3Smrg */
4567ec681f3Smrgstatic void
4577ec681f3Smrgdisasm_handle_last(struct disasm_ctx *ctx)
4587e102996Smaya{
4597ec681f3Smrg   if (ctx->last.ss) {
4607ec681f3Smrg      ctx->stats->sstall += ctx->sfu_delay;
4617ec681f3Smrg      ctx->sfu_delay = 0;
4627ec681f3Smrg   }
4637ec681f3Smrg
4647ec681f3Smrg   if (ctx->cur_opc_cat == 4) {
4657ec681f3Smrg      ctx->sfu_delay = 10;
4667ec681f3Smrg   } else {
4677ec681f3Smrg      int n = MIN2(ctx->sfu_delay, 1 + ctx->last.repeat + ctx->last.nop);
4687ec681f3Smrg      ctx->sfu_delay -= n;
4697ec681f3Smrg   }
4707ec681f3Smrg
4717ec681f3Smrg   memset(&ctx->last, 0, sizeof(ctx->last));
4727e102996Smaya}
4737e102996Smaya
4747ec681f3Smrgstatic void
4757ec681f3Smrgdisasm_instr_cb(void *d, unsigned n, void *instr)
4767e102996Smaya{
4777ec681f3Smrg   struct disasm_ctx *ctx = d;
4787ec681f3Smrg   uint32_t *dwords = (uint32_t *)instr;
4797ec681f3Smrg   uint64_t val = dwords[1];
4807ec681f3Smrg   val = val << 32;
4817ec681f3Smrg   val |= dwords[0];
4827ec681f3Smrg
4837ec681f3Smrg   unsigned opc_cat = val >> 61;
4847ec681f3Smrg
4857ec681f3Smrg   /* There are some cases where we can get instr_cb called multiple
4867ec681f3Smrg    * times per instruction (like when we need an extra line for branch
4877ec681f3Smrg    * target labels), don't update stats in these cases:
4887ec681f3Smrg    */
4897ec681f3Smrg   if (n != ctx->cur_n) {
4907ec681f3Smrg      if (n > 0) {
4917ec681f3Smrg         disasm_handle_last(ctx);
4927ec681f3Smrg      }
4937ec681f3Smrg      ctx->stats->instrs_per_cat[opc_cat]++;
4947ec681f3Smrg      ctx->cur_n = n;
4957ec681f3Smrg
4967ec681f3Smrg      /* mov vs cov stats are a bit harder to fish out of the field
4977ec681f3Smrg       * names, because current ir3-cat1.xml doesn't use {NAME} for
4987ec681f3Smrg       * this distinction.  So for now just handle this case with
4997ec681f3Smrg       * some hand-coded parsing:
5007ec681f3Smrg       */
5017ec681f3Smrg      if (opc_cat == 1) {
5027ec681f3Smrg         unsigned opc = (val >> 57) & 0x3;
5037ec681f3Smrg         unsigned src_type = (val >> 50) & 0x7;
5047ec681f3Smrg         unsigned dst_type = (val >> 46) & 0x7;
5057ec681f3Smrg
5067ec681f3Smrg         if (opc == 0) {
5077ec681f3Smrg            if (src_type == dst_type) {
5087ec681f3Smrg               ctx->stats->mov_count++;
5097ec681f3Smrg            } else {
5107ec681f3Smrg               ctx->stats->cov_count++;
5117ec681f3Smrg            }
5127ec681f3Smrg         }
5137ec681f3Smrg      }
5147ec681f3Smrg   }
5157ec681f3Smrg
5167ec681f3Smrg   ctx->cur_opc_cat = opc_cat;
5177ec681f3Smrg
5187ec681f3Smrg   if (debug & PRINT_RAW) {
5197ec681f3Smrg      fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],
5207ec681f3Smrg              opc_cat, n, ctx->extra_cycles + n, dwords[1], dwords[0]);
5217ec681f3Smrg   }
5227e102996Smaya}
5237e102996Smaya
5247ec681f3Smrgint
5257ec681f3Smrgdisasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,
5267ec681f3Smrg                 unsigned gpu_id, struct shader_stats *stats)
5277e102996Smaya{
5287ec681f3Smrg   struct isa_decode_options decode_options = {
5297ec681f3Smrg      .gpu_id = gpu_id,
5307ec681f3Smrg      .show_errors = true,
5317ec681f3Smrg      .max_errors = 5,
5327ec681f3Smrg      .branch_labels = true,
5337ec681f3Smrg      .field_cb = disasm_field_cb,
5347ec681f3Smrg      .instr_cb = disasm_instr_cb,
5357ec681f3Smrg   };
5367ec681f3Smrg   struct disasm_ctx ctx = {
5377ec681f3Smrg      .out = out,
5387ec681f3Smrg      .level = level,
5397ec681f3Smrg      .options = &decode_options,
5407ec681f3Smrg      .stats = stats,
5417ec681f3Smrg      .cur_n = -1,
5427ec681f3Smrg   };
5437ec681f3Smrg
5447ec681f3Smrg   memset(stats, 0, sizeof(*stats));
5457ec681f3Smrg
5467ec681f3Smrg   decode_options.cbdata = &ctx;
5477ec681f3Smrg
5487ec681f3Smrg   isa_decode(dwords, sizedwords * 4, out, &decode_options);
5497ec681f3Smrg
5507ec681f3Smrg   disasm_handle_last(&ctx);
5517ec681f3Smrg
5527ec681f3Smrg   if (debug & PRINT_STATS)
5537ec681f3Smrg      print_stats(&ctx);
5547ec681f3Smrg
5557ec681f3Smrg   return 0;
5567e102996Smaya}
5577e102996Smaya
5587ec681f3Smrgvoid
5597ec681f3Smrgdisasm_a3xx_set_debug(enum debug_t d)
5607e102996Smaya{
5617ec681f3Smrg   debug = d;
5627e102996Smaya}
5637e102996Smaya
5647ec681f3Smrg#include <setjmp.h>
5657e102996Smaya
5667ec681f3Smrgstatic bool jmp_env_valid;
5677ec681f3Smrgstatic jmp_buf jmp_env;
5687e102996Smaya
5697ec681f3Smrgvoid
5707ec681f3Smrgir3_assert_handler(const char *expr, const char *file, int line,
5717ec681f3Smrg                   const char *func)
5727e102996Smaya{
5737ec681f3Smrg   mesa_loge("%s:%u: %s: Assertion `%s' failed.", file, line, func, expr);
5747ec681f3Smrg   if (jmp_env_valid)
5757ec681f3Smrg      longjmp(jmp_env, 1);
5767ec681f3Smrg   abort();
5777e102996Smaya}
5787e102996Smaya
5797ec681f3Smrg#define TRY(x)                                                                 \
5807ec681f3Smrg   do {                                                                        \
5817ec681f3Smrg      assert(!jmp_env_valid);                                                  \
5827ec681f3Smrg      if (setjmp(jmp_env) == 0) {                                              \
5837ec681f3Smrg         jmp_env_valid = true;                                                 \
5847ec681f3Smrg         x;                                                                    \
5857ec681f3Smrg      }                                                                        \
5867ec681f3Smrg      jmp_env_valid = false;                                                   \
5877ec681f3Smrg   } while (0)
5887ec681f3Smrg
5897ec681f3Smrgint
5907ec681f3Smrgdisasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out,
5917ec681f3Smrg            unsigned gpu_id)
5927e102996Smaya{
5937ec681f3Smrg   struct shader_stats stats;
5947ec681f3Smrg   return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);
5957e102996Smaya}
5967e102996Smaya
5977ec681f3Smrgint
5987ec681f3Smrgtry_disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out,
5997ec681f3Smrg                unsigned gpu_id)
6007e102996Smaya{
6017ec681f3Smrg   struct shader_stats stats;
6027ec681f3Smrg   int ret = -1;
6037ec681f3Smrg   TRY(ret = disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats));
6047ec681f3Smrg   return ret;
6057e102996Smaya}
606