17e102996Smaya/* 27e102996Smaya * Copyright (c) 2013 Rob Clark <robdclark@gmail.com> 37e102996Smaya * 47e102996Smaya * Permission is hereby granted, free of charge, to any person obtaining a 57e102996Smaya * copy of this software and associated documentation files (the "Software"), 67e102996Smaya * to deal in the Software without restriction, including without limitation 77e102996Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87e102996Smaya * and/or sell copies of the Software, and to permit persons to whom the 97e102996Smaya * Software is furnished to do so, subject to the following conditions: 107e102996Smaya * 117e102996Smaya * The above copyright notice and this permission notice (including the next 127e102996Smaya * paragraph) shall be included in all copies or substantial portions of the 137e102996Smaya * Software. 147e102996Smaya * 157e102996Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167e102996Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177e102996Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187e102996Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197e102996Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 207e102996Smaya * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 217e102996Smaya * SOFTWARE. 227e102996Smaya */ 237e102996Smaya 247ec681f3Smrg#include <assert.h> 257ec681f3Smrg#include <stdbool.h> 267ec681f3Smrg#include <stdint.h> 277e102996Smaya#include <stdio.h> 287e102996Smaya#include <stdlib.h> 297e102996Smaya#include <string.h> 307e102996Smaya 317ec681f3Smrg#include <util/log.h> 327e102996Smaya#include <util/u_debug.h> 337e102996Smaya 347ec681f3Smrg#include "isa/isa.h" 357e102996Smaya 367ec681f3Smrg#include "disasm.h" 377ec681f3Smrg#include "instr-a3xx.h" 387e102996Smaya 397e102996Smayastatic enum debug_t debug; 407e102996Smaya 417e102996Smayastatic const char *levels[] = { 427ec681f3Smrg "", 437ec681f3Smrg "\t", 447ec681f3Smrg "\t\t", 457ec681f3Smrg "\t\t\t", 467ec681f3Smrg "\t\t\t\t", 477ec681f3Smrg "\t\t\t\t\t", 487ec681f3Smrg "\t\t\t\t\t\t", 497ec681f3Smrg "\t\t\t\t\t\t\t", 507ec681f3Smrg "\t\t\t\t\t\t\t\t", 517ec681f3Smrg "\t\t\t\t\t\t\t\t\t", 527ec681f3Smrg "x", 537ec681f3Smrg "x", 547ec681f3Smrg "x", 557ec681f3Smrg "x", 567ec681f3Smrg "x", 577ec681f3Smrg "x", 587e102996Smaya}; 597e102996Smaya 607e102996Smayastruct disasm_ctx { 617ec681f3Smrg FILE *out; 627ec681f3Smrg struct isa_decode_options *options; 637ec681f3Smrg unsigned level; 647ec681f3Smrg unsigned extra_cycles; 657ec681f3Smrg 667ec681f3Smrg /** 677ec681f3Smrg * nop_count/has_end used to detect the real end of shader. Since 687ec681f3Smrg * in some cases there can be a epilogue following an `end` we look 697ec681f3Smrg * for a sequence of `nop`s following the `end` 707ec681f3Smrg */ 717ec681f3Smrg int nop_count; /* number of nop's since non-nop instruction: */ 727ec681f3Smrg bool has_end; /* have we seen end instruction */ 737ec681f3Smrg 747ec681f3Smrg int cur_n; /* current instr # */ 757ec681f3Smrg int cur_opc_cat; /* current opc_cat */ 767ec681f3Smrg 777ec681f3Smrg int sfu_delay; 787ec681f3Smrg 797ec681f3Smrg /** 807ec681f3Smrg * State accumulated decoding fields of the current instruction, 817ec681f3Smrg * handled after decoding is complete (ie. at start of next instr) 827ec681f3Smrg */ 837ec681f3Smrg struct { 847ec681f3Smrg bool ss; 857ec681f3Smrg uint8_t nop; 867ec681f3Smrg uint8_t repeat; 877ec681f3Smrg } last; 887ec681f3Smrg 897ec681f3Smrg /** 907ec681f3Smrg * State accumulated decoding fields of src or dst register 917ec681f3Smrg */ 927ec681f3Smrg struct { 937ec681f3Smrg bool half; 947ec681f3Smrg bool r; 957ec681f3Smrg enum { 967ec681f3Smrg FILE_GPR = 1, 977ec681f3Smrg FILE_CONST = 2, 987ec681f3Smrg } file; 997ec681f3Smrg unsigned num; 1007ec681f3Smrg } reg; 1017ec681f3Smrg 1027ec681f3Smrg struct shader_stats *stats; 1037e102996Smaya}; 1047e102996Smaya 1057ec681f3Smrgstatic void 1067ec681f3Smrgprint_stats(struct disasm_ctx *ctx) 1077e102996Smaya{ 1087ec681f3Smrg if (ctx->options->gpu_id >= 600) { 1097ec681f3Smrg /* handle MERGEREGS case.. this isn't *entirely* accurate, as 1107ec681f3Smrg * you can have shader stages not using merged register file, 1117ec681f3Smrg * but it is good enough for a guestimate: 1127ec681f3Smrg */ 1137ec681f3Smrg unsigned n = (ctx->stats->halfreg + 1) / 2; 1147ec681f3Smrg 1157ec681f3Smrg ctx->stats->halfreg = 0; 1167ec681f3Smrg ctx->stats->fullreg = MAX2(ctx->stats->fullreg, n); 1177ec681f3Smrg } 1187ec681f3Smrg 1197ec681f3Smrg unsigned instructions = ctx->cur_n + ctx->extra_cycles + 1; 1207ec681f3Smrg 1217ec681f3Smrg fprintf(ctx->out, "%sStats:\n", levels[ctx->level]); 1227ec681f3Smrg fprintf(ctx->out, 1237ec681f3Smrg "%s- shaderdb: %u instr, %u nops, %u non-nops, %u mov, %u cov\n", 1247ec681f3Smrg levels[ctx->level], instructions, ctx->stats->nops, 1257ec681f3Smrg instructions - ctx->stats->nops, ctx->stats->mov_count, 1267ec681f3Smrg ctx->stats->cov_count); 1277ec681f3Smrg 1287ec681f3Smrg fprintf(ctx->out, 1297ec681f3Smrg "%s- shaderdb: %u last-baryf, %d half, %d full, %u constlen\n", 1307ec681f3Smrg levels[ctx->level], ctx->stats->last_baryf, 1317ec681f3Smrg DIV_ROUND_UP(ctx->stats->halfreg, 4), 1327ec681f3Smrg DIV_ROUND_UP(ctx->stats->fullreg, 4), 1337ec681f3Smrg DIV_ROUND_UP(ctx->stats->constlen, 4)); 1347ec681f3Smrg 1357ec681f3Smrg fprintf( 1367ec681f3Smrg ctx->out, 1377ec681f3Smrg "%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7\n", 1387ec681f3Smrg levels[ctx->level], ctx->stats->instrs_per_cat[0], 1397ec681f3Smrg ctx->stats->instrs_per_cat[1], ctx->stats->instrs_per_cat[2], 1407ec681f3Smrg ctx->stats->instrs_per_cat[3], ctx->stats->instrs_per_cat[4], 1417ec681f3Smrg ctx->stats->instrs_per_cat[5], ctx->stats->instrs_per_cat[6], 1427ec681f3Smrg ctx->stats->instrs_per_cat[7]); 1437ec681f3Smrg 1447ec681f3Smrg fprintf(ctx->out, "%s- shaderdb: %u sstall, %u (ss), %u (sy)\n", 1457ec681f3Smrg levels[ctx->level], ctx->stats->sstall, ctx->stats->ss, 1467ec681f3Smrg ctx->stats->sy); 1477e102996Smaya} 1487e102996Smaya 1497ec681f3Smrg/* size of largest OPC field of all the instruction categories: */ 1507ec681f3Smrg#define NOPC_BITS 6 1517e102996Smaya 1527ec681f3Smrgstatic const struct opc_info { 1537ec681f3Smrg const char *name; 1547ec681f3Smrg} opcs[1 << (3 + NOPC_BITS)] = { 1557ec681f3Smrg#define OPC(cat, opc, name) [(opc)] = {#name} 1567ec681f3Smrg /* clang-format off */ 1577ec681f3Smrg /* category 0: */ 1587ec681f3Smrg OPC(0, OPC_NOP, nop), 1597ec681f3Smrg OPC(0, OPC_B, b), 1607ec681f3Smrg OPC(0, OPC_JUMP, jump), 1617ec681f3Smrg OPC(0, OPC_CALL, call), 1627ec681f3Smrg OPC(0, OPC_RET, ret), 1637ec681f3Smrg OPC(0, OPC_KILL, kill), 1647ec681f3Smrg OPC(0, OPC_DEMOTE, demote), 1657ec681f3Smrg OPC(0, OPC_END, end), 1667ec681f3Smrg OPC(0, OPC_EMIT, emit), 1677ec681f3Smrg OPC(0, OPC_CUT, cut), 1687ec681f3Smrg OPC(0, OPC_CHMASK, chmask), 1697ec681f3Smrg OPC(0, OPC_CHSH, chsh), 1707ec681f3Smrg OPC(0, OPC_FLOW_REV, flow_rev), 1717ec681f3Smrg OPC(0, OPC_PREDT, predt), 1727ec681f3Smrg OPC(0, OPC_PREDF, predf), 1737ec681f3Smrg OPC(0, OPC_PREDE, prede), 1747ec681f3Smrg OPC(0, OPC_BKT, bkt), 1757ec681f3Smrg OPC(0, OPC_STKS, stks), 1767ec681f3Smrg OPC(0, OPC_STKR, stkr), 1777ec681f3Smrg OPC(0, OPC_XSET, xset), 1787ec681f3Smrg OPC(0, OPC_XCLR, xclr), 1797ec681f3Smrg OPC(0, OPC_GETONE, getone), 1807ec681f3Smrg OPC(0, OPC_DBG, dbg), 1817ec681f3Smrg OPC(0, OPC_SHPS, shps), 1827ec681f3Smrg OPC(0, OPC_SHPE, shpe), 1837ec681f3Smrg 1847ec681f3Smrg /* category 1: */ 1857ec681f3Smrg OPC(1, OPC_MOV, ), 1867ec681f3Smrg OPC(1, OPC_MOVMSK, movmsk), 1877ec681f3Smrg OPC(1, OPC_SWZ, swz), 1887ec681f3Smrg OPC(1, OPC_SCT, sct), 1897ec681f3Smrg OPC(1, OPC_GAT, gat), 1907ec681f3Smrg OPC(1, OPC_BALLOT_MACRO, ballot.macro), 1917ec681f3Smrg OPC(1, OPC_ANY_MACRO, any.macro), 1927ec681f3Smrg OPC(1, OPC_ALL_MACRO, all.macro), 1937ec681f3Smrg OPC(1, OPC_ELECT_MACRO, elect.macro), 1947ec681f3Smrg OPC(1, OPC_READ_COND_MACRO, read_cond.macro), 1957ec681f3Smrg OPC(1, OPC_READ_FIRST_MACRO, read_first.macro), 1967ec681f3Smrg OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro), 1977ec681f3Smrg 1987ec681f3Smrg /* category 2: */ 1997ec681f3Smrg OPC(2, OPC_ADD_F, add.f), 2007ec681f3Smrg OPC(2, OPC_MIN_F, min.f), 2017ec681f3Smrg OPC(2, OPC_MAX_F, max.f), 2027ec681f3Smrg OPC(2, OPC_MUL_F, mul.f), 2037ec681f3Smrg OPC(2, OPC_SIGN_F, sign.f), 2047ec681f3Smrg OPC(2, OPC_CMPS_F, cmps.f), 2057ec681f3Smrg OPC(2, OPC_ABSNEG_F, absneg.f), 2067ec681f3Smrg OPC(2, OPC_CMPV_F, cmpv.f), 2077ec681f3Smrg OPC(2, OPC_FLOOR_F, floor.f), 2087ec681f3Smrg OPC(2, OPC_CEIL_F, ceil.f), 2097ec681f3Smrg OPC(2, OPC_RNDNE_F, rndne.f), 2107ec681f3Smrg OPC(2, OPC_RNDAZ_F, rndaz.f), 2117ec681f3Smrg OPC(2, OPC_TRUNC_F, trunc.f), 2127ec681f3Smrg OPC(2, OPC_ADD_U, add.u), 2137ec681f3Smrg OPC(2, OPC_ADD_S, add.s), 2147ec681f3Smrg OPC(2, OPC_SUB_U, sub.u), 2157ec681f3Smrg OPC(2, OPC_SUB_S, sub.s), 2167ec681f3Smrg OPC(2, OPC_CMPS_U, cmps.u), 2177ec681f3Smrg OPC(2, OPC_CMPS_S, cmps.s), 2187ec681f3Smrg OPC(2, OPC_MIN_U, min.u), 2197ec681f3Smrg OPC(2, OPC_MIN_S, min.s), 2207ec681f3Smrg OPC(2, OPC_MAX_U, max.u), 2217ec681f3Smrg OPC(2, OPC_MAX_S, max.s), 2227ec681f3Smrg OPC(2, OPC_ABSNEG_S, absneg.s), 2237ec681f3Smrg OPC(2, OPC_AND_B, and.b), 2247ec681f3Smrg OPC(2, OPC_OR_B, or.b), 2257ec681f3Smrg OPC(2, OPC_NOT_B, not.b), 2267ec681f3Smrg OPC(2, OPC_XOR_B, xor.b), 2277ec681f3Smrg OPC(2, OPC_CMPV_U, cmpv.u), 2287ec681f3Smrg OPC(2, OPC_CMPV_S, cmpv.s), 2297ec681f3Smrg OPC(2, OPC_MUL_U24, mul.u24), 2307ec681f3Smrg OPC(2, OPC_MUL_S24, mul.s24), 2317ec681f3Smrg OPC(2, OPC_MULL_U, mull.u), 2327ec681f3Smrg OPC(2, OPC_BFREV_B, bfrev.b), 2337ec681f3Smrg OPC(2, OPC_CLZ_S, clz.s), 2347ec681f3Smrg OPC(2, OPC_CLZ_B, clz.b), 2357ec681f3Smrg OPC(2, OPC_SHL_B, shl.b), 2367ec681f3Smrg OPC(2, OPC_SHR_B, shr.b), 2377ec681f3Smrg OPC(2, OPC_ASHR_B, ashr.b), 2387ec681f3Smrg OPC(2, OPC_BARY_F, bary.f), 2397ec681f3Smrg OPC(2, OPC_MGEN_B, mgen.b), 2407ec681f3Smrg OPC(2, OPC_GETBIT_B, getbit.b), 2417ec681f3Smrg OPC(2, OPC_SETRM, setrm), 2427ec681f3Smrg OPC(2, OPC_CBITS_B, cbits.b), 2437ec681f3Smrg OPC(2, OPC_SHB, shb), 2447ec681f3Smrg OPC(2, OPC_MSAD, msad), 2457ec681f3Smrg 2467ec681f3Smrg /* category 3: */ 2477ec681f3Smrg OPC(3, OPC_MAD_U16, mad.u16), 2487ec681f3Smrg OPC(3, OPC_MADSH_U16, madsh.u16), 2497ec681f3Smrg OPC(3, OPC_MAD_S16, mad.s16), 2507ec681f3Smrg OPC(3, OPC_MADSH_M16, madsh.m16), 2517ec681f3Smrg OPC(3, OPC_MAD_U24, mad.u24), 2527ec681f3Smrg OPC(3, OPC_MAD_S24, mad.s24), 2537ec681f3Smrg OPC(3, OPC_MAD_F16, mad.f16), 2547ec681f3Smrg OPC(3, OPC_MAD_F32, mad.f32), 2557ec681f3Smrg OPC(3, OPC_SEL_B16, sel.b16), 2567ec681f3Smrg OPC(3, OPC_SEL_B32, sel.b32), 2577ec681f3Smrg OPC(3, OPC_SEL_S16, sel.s16), 2587ec681f3Smrg OPC(3, OPC_SEL_S32, sel.s32), 2597ec681f3Smrg OPC(3, OPC_SEL_F16, sel.f16), 2607ec681f3Smrg OPC(3, OPC_SEL_F32, sel.f32), 2617ec681f3Smrg OPC(3, OPC_SAD_S16, sad.s16), 2627ec681f3Smrg OPC(3, OPC_SAD_S32, sad.s32), 2637ec681f3Smrg OPC(3, OPC_SHLG_B16, shlg.b16), 2647ec681f3Smrg 2657ec681f3Smrg /* category 4: */ 2667ec681f3Smrg OPC(4, OPC_RCP, rcp), 2677ec681f3Smrg OPC(4, OPC_RSQ, rsq), 2687ec681f3Smrg OPC(4, OPC_LOG2, log2), 2697ec681f3Smrg OPC(4, OPC_EXP2, exp2), 2707ec681f3Smrg OPC(4, OPC_SIN, sin), 2717ec681f3Smrg OPC(4, OPC_COS, cos), 2727ec681f3Smrg OPC(4, OPC_SQRT, sqrt), 2737ec681f3Smrg OPC(4, OPC_HRSQ, hrsq), 2747ec681f3Smrg OPC(4, OPC_HLOG2, hlog2), 2757ec681f3Smrg OPC(4, OPC_HEXP2, hexp2), 2767ec681f3Smrg 2777ec681f3Smrg /* category 5: */ 2787ec681f3Smrg OPC(5, OPC_ISAM, isam), 2797ec681f3Smrg OPC(5, OPC_ISAML, isaml), 2807ec681f3Smrg OPC(5, OPC_ISAMM, isamm), 2817ec681f3Smrg OPC(5, OPC_SAM, sam), 2827ec681f3Smrg OPC(5, OPC_SAMB, samb), 2837ec681f3Smrg OPC(5, OPC_SAML, saml), 2847ec681f3Smrg OPC(5, OPC_SAMGQ, samgq), 2857ec681f3Smrg OPC(5, OPC_GETLOD, getlod), 2867ec681f3Smrg OPC(5, OPC_CONV, conv), 2877ec681f3Smrg OPC(5, OPC_CONVM, convm), 2887ec681f3Smrg OPC(5, OPC_GETSIZE, getsize), 2897ec681f3Smrg OPC(5, OPC_GETBUF, getbuf), 2907ec681f3Smrg OPC(5, OPC_GETPOS, getpos), 2917ec681f3Smrg OPC(5, OPC_GETINFO, getinfo), 2927ec681f3Smrg OPC(5, OPC_DSX, dsx), 2937ec681f3Smrg OPC(5, OPC_DSY, dsy), 2947ec681f3Smrg OPC(5, OPC_GATHER4R, gather4r), 2957ec681f3Smrg OPC(5, OPC_GATHER4G, gather4g), 2967ec681f3Smrg OPC(5, OPC_GATHER4B, gather4b), 2977ec681f3Smrg OPC(5, OPC_GATHER4A, gather4a), 2987ec681f3Smrg OPC(5, OPC_SAMGP0, samgp0), 2997ec681f3Smrg OPC(5, OPC_SAMGP1, samgp1), 3007ec681f3Smrg OPC(5, OPC_SAMGP2, samgp2), 3017ec681f3Smrg OPC(5, OPC_SAMGP3, samgp3), 3027ec681f3Smrg OPC(5, OPC_DSXPP_1, dsxpp.1), 3037ec681f3Smrg OPC(5, OPC_DSYPP_1, dsypp.1), 3047ec681f3Smrg OPC(5, OPC_RGETPOS, rgetpos), 3057ec681f3Smrg OPC(5, OPC_RGETINFO, rgetinfo), 3067ec681f3Smrg /* macros are needed here for ir3_print */ 3077ec681f3Smrg OPC(5, OPC_DSXPP_MACRO, dsxpp.macro), 3087ec681f3Smrg OPC(5, OPC_DSYPP_MACRO, dsypp.macro), 3097ec681f3Smrg 3107ec681f3Smrg 3117ec681f3Smrg /* category 6: */ 3127ec681f3Smrg OPC(6, OPC_LDG, ldg), 3137ec681f3Smrg OPC(6, OPC_LDG_A, ldg.a), 3147ec681f3Smrg OPC(6, OPC_LDL, ldl), 3157ec681f3Smrg OPC(6, OPC_LDP, ldp), 3167ec681f3Smrg OPC(6, OPC_STG, stg), 3177ec681f3Smrg OPC(6, OPC_STG_A, stg.a), 3187ec681f3Smrg OPC(6, OPC_STL, stl), 3197ec681f3Smrg OPC(6, OPC_STP, stp), 3207ec681f3Smrg OPC(6, OPC_LDIB, ldib), 3217ec681f3Smrg OPC(6, OPC_G2L, g2l), 3227ec681f3Smrg OPC(6, OPC_L2G, l2g), 3237ec681f3Smrg OPC(6, OPC_PREFETCH, prefetch), 3247ec681f3Smrg OPC(6, OPC_LDLW, ldlw), 3257ec681f3Smrg OPC(6, OPC_STLW, stlw), 3267ec681f3Smrg OPC(6, OPC_RESFMT, resfmt), 3277ec681f3Smrg OPC(6, OPC_RESINFO, resinfo), 3287ec681f3Smrg OPC(6, OPC_ATOMIC_ADD, atomic.add), 3297ec681f3Smrg OPC(6, OPC_ATOMIC_SUB, atomic.sub), 3307ec681f3Smrg OPC(6, OPC_ATOMIC_XCHG, atomic.xchg), 3317ec681f3Smrg OPC(6, OPC_ATOMIC_INC, atomic.inc), 3327ec681f3Smrg OPC(6, OPC_ATOMIC_DEC, atomic.dec), 3337ec681f3Smrg OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg), 3347ec681f3Smrg OPC(6, OPC_ATOMIC_MIN, atomic.min), 3357ec681f3Smrg OPC(6, OPC_ATOMIC_MAX, atomic.max), 3367ec681f3Smrg OPC(6, OPC_ATOMIC_AND, atomic.and), 3377ec681f3Smrg OPC(6, OPC_ATOMIC_OR, atomic.or), 3387ec681f3Smrg OPC(6, OPC_ATOMIC_XOR, atomic.xor), 3397ec681f3Smrg OPC(6, OPC_LDGB, ldgb), 3407ec681f3Smrg OPC(6, OPC_STGB, stgb), 3417ec681f3Smrg OPC(6, OPC_STIB, stib), 3427ec681f3Smrg OPC(6, OPC_LDC, ldc), 3437ec681f3Smrg OPC(6, OPC_LDLV, ldlv), 3447ec681f3Smrg OPC(6, OPC_PIPR, pipr), 3457ec681f3Smrg OPC(6, OPC_PIPC, pipc), 3467ec681f3Smrg OPC(6, OPC_EMIT2, emit), 3477ec681f3Smrg OPC(6, OPC_ENDLS, endls), 3487ec681f3Smrg OPC(6, OPC_GETSPID, getspid), 3497ec681f3Smrg OPC(6, OPC_GETWID, getwid), 3507ec681f3Smrg 3517ec681f3Smrg OPC(6, OPC_SPILL_MACRO, spill.macro), 3527ec681f3Smrg OPC(6, OPC_RELOAD_MACRO, reload.macro), 3537ec681f3Smrg 3547ec681f3Smrg OPC(7, OPC_BAR, bar), 3557ec681f3Smrg OPC(7, OPC_FENCE, fence), 3567ec681f3Smrg/* clang-format on */ 3577ec681f3Smrg#undef OPC 3587e102996Smaya}; 3597e102996Smaya 3607ec681f3Smrg#define GETINFO(instr) \ 3617ec681f3Smrg (&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)])) 3627e102996Smaya 3637ec681f3Smrgconst char * 3647ec681f3Smrgdisasm_a3xx_instr_name(opc_t opc) 3657e102996Smaya{ 3667ec681f3Smrg if (opc_cat(opc) == -1) 3677ec681f3Smrg return "??meta??"; 3687ec681f3Smrg return opcs[opc].name; 3697e102996Smaya} 3707e102996Smaya 3717ec681f3Smrgstatic void 3727ec681f3Smrgdisasm_field_cb(void *d, const char *field_name, struct isa_decode_value *val) 3737e102996Smaya{ 3747ec681f3Smrg struct disasm_ctx *ctx = d; 3757ec681f3Smrg 3767ec681f3Smrg if (!strcmp(field_name, "NAME")) { 3777ec681f3Smrg if (!strcmp("nop", val->str)) { 3787ec681f3Smrg if (ctx->has_end) { 3797ec681f3Smrg ctx->nop_count++; 3807ec681f3Smrg if (ctx->nop_count > 3) { 3817ec681f3Smrg ctx->options->stop = true; 3827ec681f3Smrg } 3837ec681f3Smrg } 3847ec681f3Smrg ctx->stats->nops += 1 + ctx->last.repeat; 3857ec681f3Smrg } else { 3867ec681f3Smrg ctx->nop_count = 0; 3877ec681f3Smrg } 3887ec681f3Smrg 3897ec681f3Smrg if (!strcmp("end", val->str)) { 3907ec681f3Smrg ctx->has_end = true; 3917ec681f3Smrg ctx->nop_count = 0; 3927ec681f3Smrg } else if (!strcmp("chsh", val->str)) { 3937ec681f3Smrg ctx->options->stop = true; 3947ec681f3Smrg } else if (!strcmp("bary.f", val->str)) { 3957ec681f3Smrg ctx->stats->last_baryf = ctx->cur_n; 3967ec681f3Smrg } 3977ec681f3Smrg } else if (!strcmp(field_name, "REPEAT")) { 3987ec681f3Smrg ctx->extra_cycles += val->num; 3997ec681f3Smrg ctx->stats->instrs_per_cat[ctx->cur_opc_cat] += val->num; 4007ec681f3Smrg ctx->last.repeat = val->num; 4017ec681f3Smrg } else if (!strcmp(field_name, "NOP")) { 4027ec681f3Smrg ctx->extra_cycles += val->num; 4037ec681f3Smrg ctx->stats->instrs_per_cat[0] += val->num; 4047ec681f3Smrg ctx->stats->nops += val->num; 4057ec681f3Smrg ctx->last.nop = val->num; 4067ec681f3Smrg } else if (!strcmp(field_name, "SY")) { 4077ec681f3Smrg ctx->stats->sy += val->num; 4087ec681f3Smrg } else if (!strcmp(field_name, "SS")) { 4097ec681f3Smrg ctx->stats->ss += val->num; 4107ec681f3Smrg ctx->last.ss = !!val->num; 4117ec681f3Smrg } else if (!strcmp(field_name, "CONST")) { 4127ec681f3Smrg ctx->reg.num = val->num; 4137ec681f3Smrg ctx->reg.file = FILE_CONST; 4147ec681f3Smrg } else if (!strcmp(field_name, "GPR")) { 4157ec681f3Smrg /* don't count GPR regs r48.x (shared) or higher: */ 4167ec681f3Smrg if (val->num < 48) { 4177ec681f3Smrg ctx->reg.num = val->num; 4187ec681f3Smrg ctx->reg.file = FILE_GPR; 4197ec681f3Smrg } 4207ec681f3Smrg } else if (!strcmp(field_name, "SRC_R") || !strcmp(field_name, "SRC1_R") || 4217ec681f3Smrg !strcmp(field_name, "SRC2_R") || !strcmp(field_name, "SRC3_R")) { 4227ec681f3Smrg ctx->reg.r = val->num; 4237ec681f3Smrg } else if (!strcmp(field_name, "DST")) { 4247ec681f3Smrg /* Dest register is always repeated 4257ec681f3Smrg * 4267ec681f3Smrg * Note that this doesn't really properly handle instructions 4277ec681f3Smrg * that write multiple components.. the old disasm didn't handle 4287ec681f3Smrg * that case either. 4297ec681f3Smrg */ 4307ec681f3Smrg ctx->reg.r = true; 4317ec681f3Smrg } else if (strstr(field_name, "HALF")) { 4327ec681f3Smrg ctx->reg.half = val->num; 4337ec681f3Smrg } else if (!strcmp(field_name, "SWIZ")) { 4347ec681f3Smrg unsigned num = (ctx->reg.num << 2) | val->num; 4357ec681f3Smrg if (ctx->reg.r) 4367ec681f3Smrg num += ctx->last.repeat; 4377ec681f3Smrg 4387ec681f3Smrg if (ctx->reg.file == FILE_CONST) { 4397ec681f3Smrg ctx->stats->constlen = MAX2(ctx->stats->constlen, num); 4407ec681f3Smrg } else if (ctx->reg.file == FILE_GPR) { 4417ec681f3Smrg if (ctx->reg.half) { 4427ec681f3Smrg ctx->stats->halfreg = MAX2(ctx->stats->halfreg, num); 4437ec681f3Smrg } else { 4447ec681f3Smrg ctx->stats->fullreg = MAX2(ctx->stats->fullreg, num); 4457ec681f3Smrg } 4467ec681f3Smrg } 4477ec681f3Smrg 4487ec681f3Smrg memset(&ctx->reg, 0, sizeof(ctx->reg)); 4497ec681f3Smrg } 4507e102996Smaya} 4517e102996Smaya 4527ec681f3Smrg/** 4537ec681f3Smrg * Handle stat updates dealt with at the end of instruction decoding, 4547ec681f3Smrg * ie. before beginning of next instruction 4557ec681f3Smrg */ 4567ec681f3Smrgstatic void 4577ec681f3Smrgdisasm_handle_last(struct disasm_ctx *ctx) 4587e102996Smaya{ 4597ec681f3Smrg if (ctx->last.ss) { 4607ec681f3Smrg ctx->stats->sstall += ctx->sfu_delay; 4617ec681f3Smrg ctx->sfu_delay = 0; 4627ec681f3Smrg } 4637ec681f3Smrg 4647ec681f3Smrg if (ctx->cur_opc_cat == 4) { 4657ec681f3Smrg ctx->sfu_delay = 10; 4667ec681f3Smrg } else { 4677ec681f3Smrg int n = MIN2(ctx->sfu_delay, 1 + ctx->last.repeat + ctx->last.nop); 4687ec681f3Smrg ctx->sfu_delay -= n; 4697ec681f3Smrg } 4707ec681f3Smrg 4717ec681f3Smrg memset(&ctx->last, 0, sizeof(ctx->last)); 4727e102996Smaya} 4737e102996Smaya 4747ec681f3Smrgstatic void 4757ec681f3Smrgdisasm_instr_cb(void *d, unsigned n, void *instr) 4767e102996Smaya{ 4777ec681f3Smrg struct disasm_ctx *ctx = d; 4787ec681f3Smrg uint32_t *dwords = (uint32_t *)instr; 4797ec681f3Smrg uint64_t val = dwords[1]; 4807ec681f3Smrg val = val << 32; 4817ec681f3Smrg val |= dwords[0]; 4827ec681f3Smrg 4837ec681f3Smrg unsigned opc_cat = val >> 61; 4847ec681f3Smrg 4857ec681f3Smrg /* There are some cases where we can get instr_cb called multiple 4867ec681f3Smrg * times per instruction (like when we need an extra line for branch 4877ec681f3Smrg * target labels), don't update stats in these cases: 4887ec681f3Smrg */ 4897ec681f3Smrg if (n != ctx->cur_n) { 4907ec681f3Smrg if (n > 0) { 4917ec681f3Smrg disasm_handle_last(ctx); 4927ec681f3Smrg } 4937ec681f3Smrg ctx->stats->instrs_per_cat[opc_cat]++; 4947ec681f3Smrg ctx->cur_n = n; 4957ec681f3Smrg 4967ec681f3Smrg /* mov vs cov stats are a bit harder to fish out of the field 4977ec681f3Smrg * names, because current ir3-cat1.xml doesn't use {NAME} for 4987ec681f3Smrg * this distinction. So for now just handle this case with 4997ec681f3Smrg * some hand-coded parsing: 5007ec681f3Smrg */ 5017ec681f3Smrg if (opc_cat == 1) { 5027ec681f3Smrg unsigned opc = (val >> 57) & 0x3; 5037ec681f3Smrg unsigned src_type = (val >> 50) & 0x7; 5047ec681f3Smrg unsigned dst_type = (val >> 46) & 0x7; 5057ec681f3Smrg 5067ec681f3Smrg if (opc == 0) { 5077ec681f3Smrg if (src_type == dst_type) { 5087ec681f3Smrg ctx->stats->mov_count++; 5097ec681f3Smrg } else { 5107ec681f3Smrg ctx->stats->cov_count++; 5117ec681f3Smrg } 5127ec681f3Smrg } 5137ec681f3Smrg } 5147ec681f3Smrg } 5157ec681f3Smrg 5167ec681f3Smrg ctx->cur_opc_cat = opc_cat; 5177ec681f3Smrg 5187ec681f3Smrg if (debug & PRINT_RAW) { 5197ec681f3Smrg fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level], 5207ec681f3Smrg opc_cat, n, ctx->extra_cycles + n, dwords[1], dwords[0]); 5217ec681f3Smrg } 5227e102996Smaya} 5237e102996Smaya 5247ec681f3Smrgint 5257ec681f3Smrgdisasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out, 5267ec681f3Smrg unsigned gpu_id, struct shader_stats *stats) 5277e102996Smaya{ 5287ec681f3Smrg struct isa_decode_options decode_options = { 5297ec681f3Smrg .gpu_id = gpu_id, 5307ec681f3Smrg .show_errors = true, 5317ec681f3Smrg .max_errors = 5, 5327ec681f3Smrg .branch_labels = true, 5337ec681f3Smrg .field_cb = disasm_field_cb, 5347ec681f3Smrg .instr_cb = disasm_instr_cb, 5357ec681f3Smrg }; 5367ec681f3Smrg struct disasm_ctx ctx = { 5377ec681f3Smrg .out = out, 5387ec681f3Smrg .level = level, 5397ec681f3Smrg .options = &decode_options, 5407ec681f3Smrg .stats = stats, 5417ec681f3Smrg .cur_n = -1, 5427ec681f3Smrg }; 5437ec681f3Smrg 5447ec681f3Smrg memset(stats, 0, sizeof(*stats)); 5457ec681f3Smrg 5467ec681f3Smrg decode_options.cbdata = &ctx; 5477ec681f3Smrg 5487ec681f3Smrg isa_decode(dwords, sizedwords * 4, out, &decode_options); 5497ec681f3Smrg 5507ec681f3Smrg disasm_handle_last(&ctx); 5517ec681f3Smrg 5527ec681f3Smrg if (debug & PRINT_STATS) 5537ec681f3Smrg print_stats(&ctx); 5547ec681f3Smrg 5557ec681f3Smrg return 0; 5567e102996Smaya} 5577e102996Smaya 5587ec681f3Smrgvoid 5597ec681f3Smrgdisasm_a3xx_set_debug(enum debug_t d) 5607e102996Smaya{ 5617ec681f3Smrg debug = d; 5627e102996Smaya} 5637e102996Smaya 5647ec681f3Smrg#include <setjmp.h> 5657e102996Smaya 5667ec681f3Smrgstatic bool jmp_env_valid; 5677ec681f3Smrgstatic jmp_buf jmp_env; 5687e102996Smaya 5697ec681f3Smrgvoid 5707ec681f3Smrgir3_assert_handler(const char *expr, const char *file, int line, 5717ec681f3Smrg const char *func) 5727e102996Smaya{ 5737ec681f3Smrg mesa_loge("%s:%u: %s: Assertion `%s' failed.", file, line, func, expr); 5747ec681f3Smrg if (jmp_env_valid) 5757ec681f3Smrg longjmp(jmp_env, 1); 5767ec681f3Smrg abort(); 5777e102996Smaya} 5787e102996Smaya 5797ec681f3Smrg#define TRY(x) \ 5807ec681f3Smrg do { \ 5817ec681f3Smrg assert(!jmp_env_valid); \ 5827ec681f3Smrg if (setjmp(jmp_env) == 0) { \ 5837ec681f3Smrg jmp_env_valid = true; \ 5847ec681f3Smrg x; \ 5857ec681f3Smrg } \ 5867ec681f3Smrg jmp_env_valid = false; \ 5877ec681f3Smrg } while (0) 5887ec681f3Smrg 5897ec681f3Smrgint 5907ec681f3Smrgdisasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, 5917ec681f3Smrg unsigned gpu_id) 5927e102996Smaya{ 5937ec681f3Smrg struct shader_stats stats; 5947ec681f3Smrg return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats); 5957e102996Smaya} 5967e102996Smaya 5977ec681f3Smrgint 5987ec681f3Smrgtry_disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out, 5997ec681f3Smrg unsigned gpu_id) 6007e102996Smaya{ 6017ec681f3Smrg struct shader_stats stats; 6027ec681f3Smrg int ret = -1; 6037ec681f3Smrg TRY(ret = disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats)); 6047ec681f3Smrg return ret; 6057e102996Smaya} 606