17ec681f3Smrg/*
27ec681f3Smrg * Copyright (c) 2012 Rob Clark <robdclark@gmail.com>
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
207ec681f3Smrg * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
217ec681f3Smrg * SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#include <assert.h>
257ec681f3Smrg#include <ctype.h>
267ec681f3Smrg#include <err.h>
277ec681f3Smrg#include <errno.h>
287ec681f3Smrg#include <fcntl.h>
297ec681f3Smrg#include <inttypes.h>
307ec681f3Smrg#include <signal.h>
317ec681f3Smrg#include <stdarg.h>
327ec681f3Smrg#include <stdbool.h>
337ec681f3Smrg#include <stdint.h>
347ec681f3Smrg#include <stdio.h>
357ec681f3Smrg#include <stdlib.h>
367ec681f3Smrg#include <string.h>
377ec681f3Smrg#include <unistd.h>
387ec681f3Smrg#include <sys/stat.h>
397ec681f3Smrg#include <sys/types.h>
407ec681f3Smrg#include <sys/wait.h>
417ec681f3Smrg
427ec681f3Smrg#include "freedreno_pm4.h"
437ec681f3Smrg
447ec681f3Smrg#include "buffers.h"
457ec681f3Smrg#include "cffdec.h"
467ec681f3Smrg#include "disasm.h"
477ec681f3Smrg#include "redump.h"
487ec681f3Smrg#include "rnnutil.h"
497ec681f3Smrg#include "script.h"
507ec681f3Smrg
517ec681f3Smrg/* ************************************************************************* */
527ec681f3Smrg/* originally based on kernel recovery dump code: */
537ec681f3Smrg
547ec681f3Smrgstatic const struct cffdec_options *options;
557ec681f3Smrg
567ec681f3Smrgstatic bool needs_wfi = false;
577ec681f3Smrgstatic bool summary = false;
587ec681f3Smrgstatic bool in_summary = false;
597ec681f3Smrgstatic int vertices;
607ec681f3Smrg
617ec681f3Smrgstatic inline unsigned
627ec681f3Smrgregcnt(void)
637ec681f3Smrg{
647ec681f3Smrg   if (options->gpu_id >= 500)
657ec681f3Smrg      return 0xffff;
667ec681f3Smrg   else
677ec681f3Smrg      return 0x7fff;
687ec681f3Smrg}
697ec681f3Smrg
707ec681f3Smrgstatic int
717ec681f3Smrgis_64b(void)
727ec681f3Smrg{
737ec681f3Smrg   return options->gpu_id >= 500;
747ec681f3Smrg}
757ec681f3Smrg
767ec681f3Smrgstatic int draws[4];
777ec681f3Smrgstatic struct {
787ec681f3Smrg   uint64_t base;
797ec681f3Smrg   uint32_t size; /* in dwords */
807ec681f3Smrg   /* Generally cmdstream consists of multiple IB calls to different
817ec681f3Smrg    * buffers, which are themselves often re-used for each tile.  The
827ec681f3Smrg    * triggered flag serves two purposes to help make it more clear
837ec681f3Smrg    * what part of the cmdstream is before vs after the the GPU hang:
847ec681f3Smrg    *
857ec681f3Smrg    * 1) if in IB2 we are passed the point within the IB2 buffer where
867ec681f3Smrg    *    the GPU hung, but IB1 is not passed the point within its
877ec681f3Smrg    *    buffer where the GPU had hung, then we know the GPU hang
887ec681f3Smrg    *    happens on a future use of that IB2 buffer.
897ec681f3Smrg    *
907ec681f3Smrg    * 2) if in an IB1 or IB2 buffer that is not the one where the GPU
917ec681f3Smrg    *    hung, but we've already passed the trigger point at the same
927ec681f3Smrg    *    IB level, we know that we are passed the point where the GPU
937ec681f3Smrg    *    had hung.
947ec681f3Smrg    *
957ec681f3Smrg    * So this is a one way switch, false->true.  And a higher #'d
967ec681f3Smrg    * IB level isn't considered triggered unless the lower #'d IB
977ec681f3Smrg    * level is.
987ec681f3Smrg    */
997ec681f3Smrg   bool triggered;
1007ec681f3Smrg} ibs[4];
1017ec681f3Smrgstatic int ib;
1027ec681f3Smrg
1037ec681f3Smrgstatic int draw_count;
1047ec681f3Smrgstatic int current_draw_count;
1057ec681f3Smrg
1067ec681f3Smrg/* query mode.. to handle symbolic register name queries, we need to
1077ec681f3Smrg * defer parsing query string until after gpu_id is know and rnn db
1087ec681f3Smrg * loaded:
1097ec681f3Smrg */
1107ec681f3Smrgstatic int *queryvals;
1117ec681f3Smrg
1127ec681f3Smrgstatic bool
1137ec681f3Smrgquiet(int lvl)
1147ec681f3Smrg{
1157ec681f3Smrg   if ((options->draw_filter != -1) &&
1167ec681f3Smrg       (options->draw_filter != current_draw_count))
1177ec681f3Smrg      return true;
1187ec681f3Smrg   if ((lvl >= 3) && (summary || options->querystrs || options->script))
1197ec681f3Smrg      return true;
1207ec681f3Smrg   if ((lvl >= 2) && (options->querystrs || options->script))
1217ec681f3Smrg      return true;
1227ec681f3Smrg   return false;
1237ec681f3Smrg}
1247ec681f3Smrg
1257ec681f3Smrgvoid
1267ec681f3Smrgprintl(int lvl, const char *fmt, ...)
1277ec681f3Smrg{
1287ec681f3Smrg   va_list args;
1297ec681f3Smrg   if (quiet(lvl))
1307ec681f3Smrg      return;
1317ec681f3Smrg   va_start(args, fmt);
1327ec681f3Smrg   vprintf(fmt, args);
1337ec681f3Smrg   va_end(args);
1347ec681f3Smrg}
1357ec681f3Smrg
1367ec681f3Smrgstatic const char *levels[] = {
1377ec681f3Smrg   "\t",
1387ec681f3Smrg   "\t\t",
1397ec681f3Smrg   "\t\t\t",
1407ec681f3Smrg   "\t\t\t\t",
1417ec681f3Smrg   "\t\t\t\t\t",
1427ec681f3Smrg   "\t\t\t\t\t\t",
1437ec681f3Smrg   "\t\t\t\t\t\t\t",
1447ec681f3Smrg   "\t\t\t\t\t\t\t\t",
1457ec681f3Smrg   "\t\t\t\t\t\t\t\t\t",
1467ec681f3Smrg   "x",
1477ec681f3Smrg   "x",
1487ec681f3Smrg   "x",
1497ec681f3Smrg   "x",
1507ec681f3Smrg   "x",
1517ec681f3Smrg   "x",
1527ec681f3Smrg};
1537ec681f3Smrg
1547ec681f3Smrgenum state_src_t {
1557ec681f3Smrg   STATE_SRC_DIRECT,
1567ec681f3Smrg   STATE_SRC_INDIRECT,
1577ec681f3Smrg   STATE_SRC_BINDLESS,
1587ec681f3Smrg};
1597ec681f3Smrg
1607ec681f3Smrg/* SDS (CP_SET_DRAW_STATE) helpers: */
1617ec681f3Smrgstatic void load_all_groups(int level);
1627ec681f3Smrgstatic void disable_all_groups(void);
1637ec681f3Smrg
1647ec681f3Smrgstatic void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,
1657ec681f3Smrg                          int level);
1667ec681f3Smrgstatic void dump_tex_const(uint32_t *texsamp, int num_unit, int level);
1677ec681f3Smrg
1687ec681f3Smrgstatic bool
1697ec681f3Smrghighlight_gpuaddr(uint64_t gpuaddr)
1707ec681f3Smrg{
1717ec681f3Smrg   if (!options->ibs[ib].base)
1727ec681f3Smrg      return false;
1737ec681f3Smrg
1747ec681f3Smrg   if ((ib > 0) && options->ibs[ib - 1].base && !ibs[ib - 1].triggered)
1757ec681f3Smrg      return false;
1767ec681f3Smrg
1777ec681f3Smrg   if (ibs[ib].triggered)
1787ec681f3Smrg      return options->color;
1797ec681f3Smrg
1807ec681f3Smrg   if (options->ibs[ib].base != ibs[ib].base)
1817ec681f3Smrg      return false;
1827ec681f3Smrg
1837ec681f3Smrg   uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);
1847ec681f3Smrg   uint64_t end = ibs[ib].base + 4 * ibs[ib].size;
1857ec681f3Smrg
1867ec681f3Smrg   bool triggered = (start <= gpuaddr) && (gpuaddr <= end);
1877ec681f3Smrg
1887ec681f3Smrg   ibs[ib].triggered |= triggered;
1897ec681f3Smrg
1907ec681f3Smrg   if (triggered)
1917ec681f3Smrg      printf("ESTIMATED CRASH LOCATION!\n");
1927ec681f3Smrg
1937ec681f3Smrg   return triggered & options->color;
1947ec681f3Smrg}
1957ec681f3Smrg
1967ec681f3Smrgstatic void
1977ec681f3Smrgdump_hex(uint32_t *dwords, uint32_t sizedwords, int level)
1987ec681f3Smrg{
1997ec681f3Smrg   int i, j;
2007ec681f3Smrg   int lastzero = 1;
2017ec681f3Smrg
2027ec681f3Smrg   if (quiet(2))
2037ec681f3Smrg      return;
2047ec681f3Smrg
2057ec681f3Smrg   for (i = 0; i < sizedwords; i += 8) {
2067ec681f3Smrg      int zero = 1;
2077ec681f3Smrg
2087ec681f3Smrg      /* always show first row: */
2097ec681f3Smrg      if (i == 0)
2107ec681f3Smrg         zero = 0;
2117ec681f3Smrg
2127ec681f3Smrg      for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)
2137ec681f3Smrg         if (dwords[i + j])
2147ec681f3Smrg            zero = 0;
2157ec681f3Smrg
2167ec681f3Smrg      if (zero && !lastzero)
2177ec681f3Smrg         printf("*\n");
2187ec681f3Smrg
2197ec681f3Smrg      lastzero = zero;
2207ec681f3Smrg
2217ec681f3Smrg      if (zero)
2227ec681f3Smrg         continue;
2237ec681f3Smrg
2247ec681f3Smrg      uint64_t addr = gpuaddr(&dwords[i]);
2257ec681f3Smrg      bool highlight = highlight_gpuaddr(addr);
2267ec681f3Smrg
2277ec681f3Smrg      if (highlight)
2287ec681f3Smrg         printf("\x1b[0;1;31m");
2297ec681f3Smrg
2307ec681f3Smrg      if (is_64b()) {
2317ec681f3Smrg         printf("%016" PRIx64 ":%s", addr, levels[level]);
2327ec681f3Smrg      } else {
2337ec681f3Smrg         printf("%08x:%s", (uint32_t)addr, levels[level]);
2347ec681f3Smrg      }
2357ec681f3Smrg
2367ec681f3Smrg      if (highlight)
2377ec681f3Smrg         printf("\x1b[0m");
2387ec681f3Smrg
2397ec681f3Smrg      printf("%04x:", i * 4);
2407ec681f3Smrg
2417ec681f3Smrg      for (j = 0; (j < 8) && (i + j < sizedwords); j++) {
2427ec681f3Smrg         printf(" %08x", dwords[i + j]);
2437ec681f3Smrg      }
2447ec681f3Smrg
2457ec681f3Smrg      printf("\n");
2467ec681f3Smrg   }
2477ec681f3Smrg}
2487ec681f3Smrg
2497ec681f3Smrgstatic void
2507ec681f3Smrgdump_float(float *dwords, uint32_t sizedwords, int level)
2517ec681f3Smrg{
2527ec681f3Smrg   int i;
2537ec681f3Smrg   for (i = 0; i < sizedwords; i++) {
2547ec681f3Smrg      if ((i % 8) == 0) {
2557ec681f3Smrg         if (is_64b()) {
2567ec681f3Smrg            printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);
2577ec681f3Smrg         } else {
2587ec681f3Smrg            printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);
2597ec681f3Smrg         }
2607ec681f3Smrg      } else {
2617ec681f3Smrg         printf(" ");
2627ec681f3Smrg      }
2637ec681f3Smrg      printf("%8f", *(dwords++));
2647ec681f3Smrg      if ((i % 8) == 7)
2657ec681f3Smrg         printf("\n");
2667ec681f3Smrg   }
2677ec681f3Smrg   if (i % 8)
2687ec681f3Smrg      printf("\n");
2697ec681f3Smrg}
2707ec681f3Smrg
2717ec681f3Smrg/* I believe the surface format is low bits:
2727ec681f3Smrg#define RB_COLOR_INFO__COLOR_FORMAT_MASK                   0x0000000fL
2737ec681f3Smrgcomments in sys2gmem_tex_const indicate that address is [31:12], but
2747ec681f3Smrglooks like at least some of the bits above the format have different meaning..
2757ec681f3Smrg*/
2767ec681f3Smrgstatic void
2777ec681f3Smrgparse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,
2787ec681f3Smrg                 uint32_t mask)
2797ec681f3Smrg{
2807ec681f3Smrg   assert(!is_64b()); /* this is only used on a2xx */
2817ec681f3Smrg   *gpuaddr = dword & ~mask;
2827ec681f3Smrg   *flags = dword & mask;
2837ec681f3Smrg}
2847ec681f3Smrg
2857ec681f3Smrgstatic uint32_t type0_reg_vals[0xffff + 1];
2867ec681f3Smrgstatic uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /
2877ec681f3Smrg                                   8]; /* written since last draw */
2887ec681f3Smrgstatic uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];
2897ec681f3Smrgstatic uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];
2907ec681f3Smrg
2917ec681f3Smrgstatic bool
2927ec681f3Smrgreg_rewritten(uint32_t regbase)
2937ec681f3Smrg{
2947ec681f3Smrg   return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));
2957ec681f3Smrg}
2967ec681f3Smrg
2977ec681f3Smrgbool
2987ec681f3Smrgreg_written(uint32_t regbase)
2997ec681f3Smrg{
3007ec681f3Smrg   return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));
3017ec681f3Smrg}
3027ec681f3Smrg
3037ec681f3Smrgstatic void
3047ec681f3Smrgclear_rewritten(void)
3057ec681f3Smrg{
3067ec681f3Smrg   memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));
3077ec681f3Smrg}
3087ec681f3Smrg
3097ec681f3Smrgstatic void
3107ec681f3Smrgclear_written(void)
3117ec681f3Smrg{
3127ec681f3Smrg   memset(type0_reg_written, 0, sizeof(type0_reg_written));
3137ec681f3Smrg   clear_rewritten();
3147ec681f3Smrg}
3157ec681f3Smrg
3167ec681f3Smrguint32_t
3177ec681f3Smrgreg_lastval(uint32_t regbase)
3187ec681f3Smrg{
3197ec681f3Smrg   return lastvals[regbase];
3207ec681f3Smrg}
3217ec681f3Smrg
3227ec681f3Smrgstatic void
3237ec681f3Smrgclear_lastvals(void)
3247ec681f3Smrg{
3257ec681f3Smrg   memset(lastvals, 0, sizeof(lastvals));
3267ec681f3Smrg}
3277ec681f3Smrg
3287ec681f3Smrguint32_t
3297ec681f3Smrgreg_val(uint32_t regbase)
3307ec681f3Smrg{
3317ec681f3Smrg   return type0_reg_vals[regbase];
3327ec681f3Smrg}
3337ec681f3Smrg
3347ec681f3Smrgvoid
3357ec681f3Smrgreg_set(uint32_t regbase, uint32_t val)
3367ec681f3Smrg{
3377ec681f3Smrg   assert(regbase < regcnt());
3387ec681f3Smrg   type0_reg_vals[regbase] = val;
3397ec681f3Smrg   type0_reg_written[regbase / 8] |= (1 << (regbase % 8));
3407ec681f3Smrg   type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));
3417ec681f3Smrg}
3427ec681f3Smrg
3437ec681f3Smrgstatic void
3447ec681f3Smrgreg_dump_scratch(const char *name, uint32_t dword, int level)
3457ec681f3Smrg{
3467ec681f3Smrg   unsigned r;
3477ec681f3Smrg
3487ec681f3Smrg   if (quiet(3))
3497ec681f3Smrg      return;
3507ec681f3Smrg
3517ec681f3Smrg   r = regbase("CP_SCRATCH[0].REG");
3527ec681f3Smrg
3537ec681f3Smrg   // if not, try old a2xx/a3xx version:
3547ec681f3Smrg   if (!r)
3557ec681f3Smrg      r = regbase("CP_SCRATCH_REG0");
3567ec681f3Smrg
3577ec681f3Smrg   if (!r)
3587ec681f3Smrg      return;
3597ec681f3Smrg
3607ec681f3Smrg   printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),
3617ec681f3Smrg          reg_val(r + 6), reg_val(r + 7));
3627ec681f3Smrg}
3637ec681f3Smrg
3647ec681f3Smrgstatic void
3657ec681f3Smrgdump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)
3667ec681f3Smrg{
3677ec681f3Smrg   void *buf;
3687ec681f3Smrg
3697ec681f3Smrg   if (quiet(quietlvl))
3707ec681f3Smrg      return;
3717ec681f3Smrg
3727ec681f3Smrg   buf = hostptr(gpuaddr);
3737ec681f3Smrg   if (buf) {
3747ec681f3Smrg      dump_hex(buf, sizedwords, level + 1);
3757ec681f3Smrg   }
3767ec681f3Smrg}
3777ec681f3Smrg
3787ec681f3Smrgstatic void
3797ec681f3Smrgdump_gpuaddr(uint64_t gpuaddr, int level)
3807ec681f3Smrg{
3817ec681f3Smrg   dump_gpuaddr_size(gpuaddr, level, 64, 3);
3827ec681f3Smrg}
3837ec681f3Smrg
3847ec681f3Smrgstatic void
3857ec681f3Smrgreg_dump_gpuaddr(const char *name, uint32_t dword, int level)
3867ec681f3Smrg{
3877ec681f3Smrg   dump_gpuaddr(dword, level);
3887ec681f3Smrg}
3897ec681f3Smrg
3907ec681f3Smrguint32_t gpuaddr_lo;
3917ec681f3Smrgstatic void
3927ec681f3Smrgreg_gpuaddr_lo(const char *name, uint32_t dword, int level)
3937ec681f3Smrg{
3947ec681f3Smrg   gpuaddr_lo = dword;
3957ec681f3Smrg}
3967ec681f3Smrg
3977ec681f3Smrgstatic void
3987ec681f3Smrgreg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)
3997ec681f3Smrg{
4007ec681f3Smrg   dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);
4017ec681f3Smrg}
4027ec681f3Smrg
4037ec681f3Smrgstatic void
4047ec681f3Smrgreg_dump_gpuaddr64(const char *name, uint64_t qword, int level)
4057ec681f3Smrg{
4067ec681f3Smrg   dump_gpuaddr(qword, level);
4077ec681f3Smrg}
4087ec681f3Smrg
4097ec681f3Smrgstatic void
4107ec681f3Smrgdump_shader(const char *ext, void *buf, int bufsz)
4117ec681f3Smrg{
4127ec681f3Smrg   if (options->dump_shaders) {
4137ec681f3Smrg      static int n = 0;
4147ec681f3Smrg      char filename[16];
4157ec681f3Smrg      int fd;
4167ec681f3Smrg      sprintf(filename, "%04d.%s", n++, ext);
4177ec681f3Smrg      fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);
4187ec681f3Smrg      if (fd != -1) {
4197ec681f3Smrg         write(fd, buf, bufsz);
4207ec681f3Smrg         close(fd);
4217ec681f3Smrg      }
4227ec681f3Smrg   }
4237ec681f3Smrg}
4247ec681f3Smrg
4257ec681f3Smrgstatic void
4267ec681f3Smrgdisasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)
4277ec681f3Smrg{
4287ec681f3Smrg   void *buf;
4297ec681f3Smrg
4307ec681f3Smrg   gpuaddr &= 0xfffffffffffffff0;
4317ec681f3Smrg
4327ec681f3Smrg   if (quiet(3))
4337ec681f3Smrg      return;
4347ec681f3Smrg
4357ec681f3Smrg   buf = hostptr(gpuaddr);
4367ec681f3Smrg   if (buf) {
4377ec681f3Smrg      uint32_t sizedwords = hostlen(gpuaddr) / 4;
4387ec681f3Smrg      const char *ext;
4397ec681f3Smrg
4407ec681f3Smrg      dump_hex(buf, min(64, sizedwords), level + 1);
4417ec681f3Smrg      try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->gpu_id);
4427ec681f3Smrg
4437ec681f3Smrg      /* this is a bit ugly way, but oh well.. */
4447ec681f3Smrg      if (strstr(name, "SP_VS_OBJ")) {
4457ec681f3Smrg         ext = "vo3";
4467ec681f3Smrg      } else if (strstr(name, "SP_FS_OBJ")) {
4477ec681f3Smrg         ext = "fo3";
4487ec681f3Smrg      } else if (strstr(name, "SP_GS_OBJ")) {
4497ec681f3Smrg         ext = "go3";
4507ec681f3Smrg      } else if (strstr(name, "SP_CS_OBJ")) {
4517ec681f3Smrg         ext = "co3";
4527ec681f3Smrg      } else {
4537ec681f3Smrg         ext = NULL;
4547ec681f3Smrg      }
4557ec681f3Smrg
4567ec681f3Smrg      if (ext)
4577ec681f3Smrg         dump_shader(ext, buf, sizedwords * 4);
4587ec681f3Smrg   }
4597ec681f3Smrg}
4607ec681f3Smrg
4617ec681f3Smrgstatic void
4627ec681f3Smrgreg_disasm_gpuaddr(const char *name, uint32_t dword, int level)
4637ec681f3Smrg{
4647ec681f3Smrg   disasm_gpuaddr(name, dword, level);
4657ec681f3Smrg}
4667ec681f3Smrg
4677ec681f3Smrgstatic void
4687ec681f3Smrgreg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)
4697ec681f3Smrg{
4707ec681f3Smrg   disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);
4717ec681f3Smrg}
4727ec681f3Smrg
4737ec681f3Smrgstatic void
4747ec681f3Smrgreg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)
4757ec681f3Smrg{
4767ec681f3Smrg   disasm_gpuaddr(name, qword, level);
4777ec681f3Smrg}
4787ec681f3Smrg
4797ec681f3Smrg/* Find the value of the TEX_COUNT register that corresponds to the named
4807ec681f3Smrg * TEX_SAMP/TEX_CONST reg.
4817ec681f3Smrg *
4827ec681f3Smrg * Note, this kinda assumes an equal # of samplers and textures, but not
4837ec681f3Smrg * really sure if there is a much better option.  I suppose on a6xx we
4847ec681f3Smrg * could instead decode the bitfields in SP_xS_CONFIG
4857ec681f3Smrg */
4867ec681f3Smrgstatic int
4877ec681f3Smrgget_tex_count(const char *name)
4887ec681f3Smrg{
4897ec681f3Smrg   char count_reg[strlen(name) + 5];
4907ec681f3Smrg   char *p;
4917ec681f3Smrg
4927ec681f3Smrg   p = strstr(name, "CONST");
4937ec681f3Smrg   if (!p)
4947ec681f3Smrg      p = strstr(name, "SAMP");
4957ec681f3Smrg   if (!p)
4967ec681f3Smrg      return 0;
4977ec681f3Smrg
4987ec681f3Smrg   int n = p - name;
4997ec681f3Smrg   strncpy(count_reg, name, n);
5007ec681f3Smrg   strcpy(count_reg + n, "COUNT");
5017ec681f3Smrg
5027ec681f3Smrg   return reg_val(regbase(count_reg));
5037ec681f3Smrg}
5047ec681f3Smrg
5057ec681f3Smrgstatic void
5067ec681f3Smrgreg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)
5077ec681f3Smrg{
5087ec681f3Smrg   if (!in_summary)
5097ec681f3Smrg      return;
5107ec681f3Smrg
5117ec681f3Smrg   int num_unit = get_tex_count(name);
5127ec681f3Smrg   uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
5137ec681f3Smrg   void *buf = hostptr(gpuaddr);
5147ec681f3Smrg
5157ec681f3Smrg   if (!buf)
5167ec681f3Smrg      return;
5177ec681f3Smrg
5187ec681f3Smrg   dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);
5197ec681f3Smrg}
5207ec681f3Smrg
5217ec681f3Smrgstatic void
5227ec681f3Smrgreg_dump_tex_const_hi(const char *name, uint32_t dword, int level)
5237ec681f3Smrg{
5247ec681f3Smrg   if (!in_summary)
5257ec681f3Smrg      return;
5267ec681f3Smrg
5277ec681f3Smrg   int num_unit = get_tex_count(name);
5287ec681f3Smrg   uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);
5297ec681f3Smrg   void *buf = hostptr(gpuaddr);
5307ec681f3Smrg
5317ec681f3Smrg   if (!buf)
5327ec681f3Smrg      return;
5337ec681f3Smrg
5347ec681f3Smrg   dump_tex_const(buf, num_unit, level + 1);
5357ec681f3Smrg}
5367ec681f3Smrg
5377ec681f3Smrg/*
5387ec681f3Smrg * Registers with special handling (rnndec_decode() handles rest):
5397ec681f3Smrg */
5407ec681f3Smrg#define REG(x, fxn)    { #x, fxn }
5417ec681f3Smrg#define REG64(x, fxn)  { #x, .fxn64 = fxn, .is_reg64 = true }
5427ec681f3Smrgstatic struct {
5437ec681f3Smrg   const char *regname;
5447ec681f3Smrg   void (*fxn)(const char *name, uint32_t dword, int level);
5457ec681f3Smrg   void (*fxn64)(const char *name, uint64_t qword, int level);
5467ec681f3Smrg   uint32_t regbase;
5477ec681f3Smrg   bool is_reg64;
5487ec681f3Smrg} reg_a2xx[] = {
5497ec681f3Smrg      REG(CP_SCRATCH_REG0, reg_dump_scratch),
5507ec681f3Smrg      REG(CP_SCRATCH_REG1, reg_dump_scratch),
5517ec681f3Smrg      REG(CP_SCRATCH_REG2, reg_dump_scratch),
5527ec681f3Smrg      REG(CP_SCRATCH_REG3, reg_dump_scratch),
5537ec681f3Smrg      REG(CP_SCRATCH_REG4, reg_dump_scratch),
5547ec681f3Smrg      REG(CP_SCRATCH_REG5, reg_dump_scratch),
5557ec681f3Smrg      REG(CP_SCRATCH_REG6, reg_dump_scratch),
5567ec681f3Smrg      REG(CP_SCRATCH_REG7, reg_dump_scratch),
5577ec681f3Smrg      {NULL},
5587ec681f3Smrg}, reg_a3xx[] = {
5597ec681f3Smrg      REG(CP_SCRATCH_REG0, reg_dump_scratch),
5607ec681f3Smrg      REG(CP_SCRATCH_REG1, reg_dump_scratch),
5617ec681f3Smrg      REG(CP_SCRATCH_REG2, reg_dump_scratch),
5627ec681f3Smrg      REG(CP_SCRATCH_REG3, reg_dump_scratch),
5637ec681f3Smrg      REG(CP_SCRATCH_REG4, reg_dump_scratch),
5647ec681f3Smrg      REG(CP_SCRATCH_REG5, reg_dump_scratch),
5657ec681f3Smrg      REG(CP_SCRATCH_REG6, reg_dump_scratch),
5667ec681f3Smrg      REG(CP_SCRATCH_REG7, reg_dump_scratch),
5677ec681f3Smrg      REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),
5687ec681f3Smrg      REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
5697ec681f3Smrg      REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),
5707ec681f3Smrg      REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),
5717ec681f3Smrg      REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),
5727ec681f3Smrg      REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
5737ec681f3Smrg      {NULL},
5747ec681f3Smrg}, reg_a4xx[] = {
5757ec681f3Smrg      REG(CP_SCRATCH[0].REG, reg_dump_scratch),
5767ec681f3Smrg      REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),
5777ec681f3Smrg      REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),
5787ec681f3Smrg      REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),
5797ec681f3Smrg      REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
5807ec681f3Smrg      REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
5817ec681f3Smrg      REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
5827ec681f3Smrg      REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
5837ec681f3Smrg      REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),
5847ec681f3Smrg      REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),
5857ec681f3Smrg      REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),
5867ec681f3Smrg      REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),
5877ec681f3Smrg      REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),
5887ec681f3Smrg      REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),
5897ec681f3Smrg      REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),
5907ec681f3Smrg      REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),
5917ec681f3Smrg      REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),
5927ec681f3Smrg      REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),
5937ec681f3Smrg      REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),
5947ec681f3Smrg      REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),
5957ec681f3Smrg      REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
5967ec681f3Smrg      REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
5977ec681f3Smrg      REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
5987ec681f3Smrg      REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
5997ec681f3Smrg      REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),
6007ec681f3Smrg      {NULL},
6017ec681f3Smrg}, reg_a5xx[] = {
6027ec681f3Smrg      REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
6037ec681f3Smrg      REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
6047ec681f3Smrg      REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
6057ec681f3Smrg      REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
6067ec681f3Smrg      REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),
6077ec681f3Smrg      REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
6087ec681f3Smrg      REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),
6097ec681f3Smrg      REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
6107ec681f3Smrg      REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),
6117ec681f3Smrg      REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
6127ec681f3Smrg      REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),
6137ec681f3Smrg      REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
6147ec681f3Smrg      REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),
6157ec681f3Smrg      REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
6167ec681f3Smrg      REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),
6177ec681f3Smrg      REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),
6187ec681f3Smrg      REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),
6197ec681f3Smrg      REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),
6207ec681f3Smrg      REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),
6217ec681f3Smrg      REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
6227ec681f3Smrg      REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),
6237ec681f3Smrg      REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),
6247ec681f3Smrg      REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),
6257ec681f3Smrg      REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
6267ec681f3Smrg      REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),
6277ec681f3Smrg      REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),
6287ec681f3Smrg      REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),
6297ec681f3Smrg      REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
6307ec681f3Smrg      REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),
6317ec681f3Smrg      REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),
6327ec681f3Smrg      REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),
6337ec681f3Smrg      REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
6347ec681f3Smrg      REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),
6357ec681f3Smrg      REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),
6367ec681f3Smrg      REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),
6377ec681f3Smrg      REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
6387ec681f3Smrg      REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),
6397ec681f3Smrg      REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),
6407ec681f3Smrg      REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),
6417ec681f3Smrg      REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),
6427ec681f3Smrg      REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),
6437ec681f3Smrg      REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),
6447ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),
6457ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),
6467ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),
6477ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),
6487ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),
6497ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),
6507ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),
6517ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),
6527ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),
6537ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),
6547ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),
6557ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),
6567ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),
6577ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),
6587ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),
6597ec681f3Smrg//      REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),
6607ec681f3Smrg//      REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),
6617ec681f3Smrg//      REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),
6627ec681f3Smrg//      REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),
6637ec681f3Smrg//      REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),
6647ec681f3Smrg//      REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),
6657ec681f3Smrg//      REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
6667ec681f3Smrg//      REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),
6677ec681f3Smrg//      REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),
6687ec681f3Smrg//      REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),
6697ec681f3Smrg//      REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),
6707ec681f3Smrg
6717ec681f3Smrg//      REG(RB_2D_SRC_LO, reg_gpuaddr_lo),
6727ec681f3Smrg//      REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),
6737ec681f3Smrg//      REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),
6747ec681f3Smrg//      REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),
6757ec681f3Smrg//      REG(RB_2D_DST_LO, reg_gpuaddr_lo),
6767ec681f3Smrg//      REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),
6777ec681f3Smrg//      REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),
6787ec681f3Smrg//      REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),
6797ec681f3Smrg
6807ec681f3Smrg      {NULL},
6817ec681f3Smrg}, reg_a6xx[] = {
6827ec681f3Smrg      REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),
6837ec681f3Smrg      REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),
6847ec681f3Smrg      REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),
6857ec681f3Smrg      REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),
6867ec681f3Smrg
6877ec681f3Smrg      REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),
6887ec681f3Smrg      REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),
6897ec681f3Smrg      REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),
6907ec681f3Smrg      REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),
6917ec681f3Smrg      REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),
6927ec681f3Smrg      REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),
6937ec681f3Smrg
6947ec681f3Smrg      REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),
6957ec681f3Smrg      REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),
6967ec681f3Smrg      REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),
6977ec681f3Smrg      REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),
6987ec681f3Smrg      REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),
6997ec681f3Smrg      REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),
7007ec681f3Smrg      REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),
7017ec681f3Smrg      REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),
7027ec681f3Smrg      REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),
7037ec681f3Smrg      REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),
7047ec681f3Smrg      REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),
7057ec681f3Smrg      REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),
7067ec681f3Smrg
7077ec681f3Smrg      {NULL},
7087ec681f3Smrg}, *type0_reg;
7097ec681f3Smrg
7107ec681f3Smrgstatic struct rnn *rnn;
7117ec681f3Smrg
7127ec681f3Smrgstatic void
7137ec681f3Smrginit_rnn(const char *gpuname)
7147ec681f3Smrg{
7157ec681f3Smrg   rnn = rnn_new(!options->color);
7167ec681f3Smrg
7177ec681f3Smrg   rnn_load(rnn, gpuname);
7187ec681f3Smrg
7197ec681f3Smrg   if (options->querystrs) {
7207ec681f3Smrg      int i;
7217ec681f3Smrg      queryvals = calloc(options->nquery, sizeof(queryvals[0]));
7227ec681f3Smrg
7237ec681f3Smrg      for (i = 0; i < options->nquery; i++) {
7247ec681f3Smrg         int val = strtol(options->querystrs[i], NULL, 0);
7257ec681f3Smrg
7267ec681f3Smrg         if (val == 0)
7277ec681f3Smrg            val = regbase(options->querystrs[i]);
7287ec681f3Smrg
7297ec681f3Smrg         queryvals[i] = val;
7307ec681f3Smrg         printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);
7317ec681f3Smrg      }
7327ec681f3Smrg   }
7337ec681f3Smrg
7347ec681f3Smrg   for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
7357ec681f3Smrg      type0_reg[idx].regbase = regbase(type0_reg[idx].regname);
7367ec681f3Smrg      if (!type0_reg[idx].regbase) {
7377ec681f3Smrg         printf("invalid register name: %s\n", type0_reg[idx].regname);
7387ec681f3Smrg         exit(1);
7397ec681f3Smrg      }
7407ec681f3Smrg   }
7417ec681f3Smrg}
7427ec681f3Smrg
7437ec681f3Smrgvoid
7447ec681f3Smrgreset_regs(void)
7457ec681f3Smrg{
7467ec681f3Smrg   clear_written();
7477ec681f3Smrg   clear_lastvals();
7487ec681f3Smrg   memset(&ibs, 0, sizeof(ibs));
7497ec681f3Smrg}
7507ec681f3Smrg
7517ec681f3Smrgvoid
7527ec681f3Smrgcffdec_init(const struct cffdec_options *_options)
7537ec681f3Smrg{
7547ec681f3Smrg   options = _options;
7557ec681f3Smrg   summary = options->summary;
7567ec681f3Smrg
7577ec681f3Smrg   /* in case we're decoding multiple files: */
7587ec681f3Smrg   free(queryvals);
7597ec681f3Smrg   reset_regs();
7607ec681f3Smrg   draw_count = 0;
7617ec681f3Smrg
7627ec681f3Smrg   /* TODO we need an API to free/cleanup any previous rnn */
7637ec681f3Smrg
7647ec681f3Smrg   switch (options->gpu_id) {
7657ec681f3Smrg   case 200 ... 299:
7667ec681f3Smrg      type0_reg = reg_a2xx;
7677ec681f3Smrg      init_rnn("a2xx");
7687ec681f3Smrg      break;
7697ec681f3Smrg   case 300 ... 399:
7707ec681f3Smrg      type0_reg = reg_a3xx;
7717ec681f3Smrg      init_rnn("a3xx");
7727ec681f3Smrg      break;
7737ec681f3Smrg   case 400 ... 499:
7747ec681f3Smrg      type0_reg = reg_a4xx;
7757ec681f3Smrg      init_rnn("a4xx");
7767ec681f3Smrg      break;
7777ec681f3Smrg   case 500 ... 599:
7787ec681f3Smrg      type0_reg = reg_a5xx;
7797ec681f3Smrg      init_rnn("a5xx");
7807ec681f3Smrg      break;
7817ec681f3Smrg   case 600 ... 699:
7827ec681f3Smrg      type0_reg = reg_a6xx;
7837ec681f3Smrg      init_rnn("a6xx");
7847ec681f3Smrg      break;
7857ec681f3Smrg   default:
7867ec681f3Smrg      errx(-1, "unsupported gpu");
7877ec681f3Smrg   }
7887ec681f3Smrg}
7897ec681f3Smrg
7907ec681f3Smrgconst char *
7917ec681f3Smrgpktname(unsigned opc)
7927ec681f3Smrg{
7937ec681f3Smrg   return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);
7947ec681f3Smrg}
7957ec681f3Smrg
7967ec681f3Smrgconst char *
7977ec681f3Smrgregname(uint32_t regbase, int color)
7987ec681f3Smrg{
7997ec681f3Smrg   return rnn_regname(rnn, regbase, color);
8007ec681f3Smrg}
8017ec681f3Smrg
8027ec681f3Smrguint32_t
8037ec681f3Smrgregbase(const char *name)
8047ec681f3Smrg{
8057ec681f3Smrg   return rnn_regbase(rnn, name);
8067ec681f3Smrg}
8077ec681f3Smrg
8087ec681f3Smrgstatic int
8097ec681f3Smrgendswith(uint32_t regbase, const char *suffix)
8107ec681f3Smrg{
8117ec681f3Smrg   const char *name = regname(regbase, 0);
8127ec681f3Smrg   const char *s = strstr(name, suffix);
8137ec681f3Smrg   if (!s)
8147ec681f3Smrg      return 0;
8157ec681f3Smrg   return (s - strlen(name) + strlen(suffix)) == name;
8167ec681f3Smrg}
8177ec681f3Smrg
8187ec681f3Smrgvoid
8197ec681f3Smrgdump_register_val(uint32_t regbase, uint32_t dword, int level)
8207ec681f3Smrg{
8217ec681f3Smrg   struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);
8227ec681f3Smrg
8237ec681f3Smrg   if (info && info->typeinfo) {
8247ec681f3Smrg      uint64_t gpuaddr = 0;
8257ec681f3Smrg      char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);
8267ec681f3Smrg      printf("%s%s: %s", levels[level], info->name, decoded);
8277ec681f3Smrg
8287ec681f3Smrg      /* Try and figure out if we are looking at a gpuaddr.. this
8297ec681f3Smrg       * might be useful for other gen's too, but at least a5xx has
8307ec681f3Smrg       * the _HI/_LO suffix we can look for.  Maybe a better approach
8317ec681f3Smrg       * would be some special annotation in the xml..
8327ec681f3Smrg       * for a6xx use "address" and "waddress" types
8337ec681f3Smrg       */
8347ec681f3Smrg      if (options->gpu_id >= 600) {
8357ec681f3Smrg         if (!strcmp(info->typeinfo->name, "address") ||
8367ec681f3Smrg             !strcmp(info->typeinfo->name, "waddress")) {
8377ec681f3Smrg            gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
8387ec681f3Smrg         }
8397ec681f3Smrg      } else if (options->gpu_id >= 500) {
8407ec681f3Smrg         if (endswith(regbase, "_HI") && endswith(regbase - 1, "_LO")) {
8417ec681f3Smrg            gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase - 1);
8427ec681f3Smrg         } else if (endswith(regbase, "_LO") && endswith(regbase + 1, "_HI")) {
8437ec681f3Smrg            gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
8447ec681f3Smrg         }
8457ec681f3Smrg      }
8467ec681f3Smrg
8477ec681f3Smrg      if (gpuaddr && hostptr(gpuaddr)) {
8487ec681f3Smrg         printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",
8497ec681f3Smrg                gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),
8507ec681f3Smrg                hostlen(gpubaseaddr(gpuaddr)));
8517ec681f3Smrg      }
8527ec681f3Smrg
8537ec681f3Smrg      printf("\n");
8547ec681f3Smrg
8557ec681f3Smrg      free(decoded);
8567ec681f3Smrg   } else if (info) {
8577ec681f3Smrg      printf("%s%s: %08x\n", levels[level], info->name, dword);
8587ec681f3Smrg   } else {
8597ec681f3Smrg      printf("%s<%04x>: %08x\n", levels[level], regbase, dword);
8607ec681f3Smrg   }
8617ec681f3Smrg
8627ec681f3Smrg   if (info) {
8637ec681f3Smrg      free(info->name);
8647ec681f3Smrg      free(info);
8657ec681f3Smrg   }
8667ec681f3Smrg}
8677ec681f3Smrg
8687ec681f3Smrgstatic void
8697ec681f3Smrgdump_register(uint32_t regbase, uint32_t dword, int level)
8707ec681f3Smrg{
8717ec681f3Smrg   if (!quiet(3)) {
8727ec681f3Smrg      dump_register_val(regbase, dword, level);
8737ec681f3Smrg   }
8747ec681f3Smrg
8757ec681f3Smrg   for (unsigned idx = 0; type0_reg[idx].regname; idx++) {
8767ec681f3Smrg      if (type0_reg[idx].regbase == regbase) {
8777ec681f3Smrg         if (type0_reg[idx].is_reg64) {
8787ec681f3Smrg            uint64_t qword = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;
8797ec681f3Smrg            type0_reg[idx].fxn64(type0_reg[idx].regname, qword, level);
8807ec681f3Smrg         } else {
8817ec681f3Smrg            type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);
8827ec681f3Smrg         }
8837ec681f3Smrg         break;
8847ec681f3Smrg      }
8857ec681f3Smrg   }
8867ec681f3Smrg}
8877ec681f3Smrg
8887ec681f3Smrgstatic bool
8897ec681f3Smrgis_banked_reg(uint32_t regbase)
8907ec681f3Smrg{
8917ec681f3Smrg   return (0x2000 <= regbase) && (regbase < 0x2400);
8927ec681f3Smrg}
8937ec681f3Smrg
8947ec681f3Smrgstatic void
8957ec681f3Smrgdump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,
8967ec681f3Smrg               int level)
8977ec681f3Smrg{
8987ec681f3Smrg   while (sizedwords--) {
8997ec681f3Smrg      int last_summary = summary;
9007ec681f3Smrg
9017ec681f3Smrg      /* access to non-banked registers needs a WFI:
9027ec681f3Smrg       * TODO banked register range for a2xx??
9037ec681f3Smrg       */
9047ec681f3Smrg      if (needs_wfi && !is_banked_reg(regbase))
9057ec681f3Smrg         printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);
9067ec681f3Smrg
9077ec681f3Smrg      reg_set(regbase, *dwords);
9087ec681f3Smrg      dump_register(regbase, *dwords, level);
9097ec681f3Smrg      regbase++;
9107ec681f3Smrg      dwords++;
9117ec681f3Smrg      summary = last_summary;
9127ec681f3Smrg   }
9137ec681f3Smrg}
9147ec681f3Smrg
9157ec681f3Smrgstatic void
9167ec681f3Smrgdump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)
9177ec681f3Smrg{
9187ec681f3Smrg   struct rnndomain *dom;
9197ec681f3Smrg   int i;
9207ec681f3Smrg
9217ec681f3Smrg   dom = rnn_finddomain(rnn->db, name);
9227ec681f3Smrg
9237ec681f3Smrg   if (!dom)
9247ec681f3Smrg      return;
9257ec681f3Smrg
9267ec681f3Smrg   if (script_packet)
9277ec681f3Smrg      script_packet(dwords, sizedwords, rnn, dom);
9287ec681f3Smrg
9297ec681f3Smrg   if (quiet(2))
9307ec681f3Smrg      return;
9317ec681f3Smrg
9327ec681f3Smrg   for (i = 0; i < sizedwords; i++) {
9337ec681f3Smrg      struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);
9347ec681f3Smrg      char *decoded;
9357ec681f3Smrg      if (!(info && info->typeinfo))
9367ec681f3Smrg         break;
9377ec681f3Smrg      uint64_t value = dwords[i];
9387ec681f3Smrg      if (info->typeinfo->high >= 32 && i < sizedwords - 1) {
9397ec681f3Smrg         value |= (uint64_t)dwords[i + 1] << 32;
9407ec681f3Smrg         i++; /* skip the next dword since we're printing it now */
9417ec681f3Smrg      }
9427ec681f3Smrg      decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
9437ec681f3Smrg      /* Unlike the register printing path, we don't print the name
9447ec681f3Smrg       * of the register, so if it doesn't contain other named
9457ec681f3Smrg       * things (i.e. it isn't a bitset) then print the register
9467ec681f3Smrg       * name as if it's a bitset with a single entry. This avoids
9477ec681f3Smrg       * having to create a dummy register with a single entry to
9487ec681f3Smrg       * get a name in the decoding.
9497ec681f3Smrg       */
9507ec681f3Smrg      if (info->typeinfo->type == RNN_TTYPE_BITSET ||
9517ec681f3Smrg          info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {
9527ec681f3Smrg         printf("%s%s\n", levels[level], decoded);
9537ec681f3Smrg      } else {
9547ec681f3Smrg         printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,
9557ec681f3Smrg                info->name, rnn->vc->colors->reset, decoded);
9567ec681f3Smrg      }
9577ec681f3Smrg      free(decoded);
9587ec681f3Smrg      free(info->name);
9597ec681f3Smrg      free(info);
9607ec681f3Smrg   }
9617ec681f3Smrg}
9627ec681f3Smrg
9637ec681f3Smrgstatic uint32_t bin_x1, bin_x2, bin_y1, bin_y2;
9647ec681f3Smrgstatic unsigned mode;
9657ec681f3Smrgstatic const char *render_mode;
9667ec681f3Smrgstatic enum {
9677ec681f3Smrg   MODE_BINNING = 0x1,
9687ec681f3Smrg   MODE_GMEM = 0x2,
9697ec681f3Smrg   MODE_BYPASS = 0x4,
9707ec681f3Smrg   MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,
9717ec681f3Smrg} enable_mask = MODE_ALL;
9727ec681f3Smrgstatic bool skip_ib2_enable_global;
9737ec681f3Smrgstatic bool skip_ib2_enable_local;
9747ec681f3Smrg
9757ec681f3Smrgstatic void
9767ec681f3Smrgprint_mode(int level)
9777ec681f3Smrg{
9787ec681f3Smrg   if ((options->gpu_id >= 500) && !quiet(2)) {
9797ec681f3Smrg      printf("%smode: %s\n", levels[level], render_mode);
9807ec681f3Smrg      printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,
9817ec681f3Smrg             skip_ib2_enable_local);
9827ec681f3Smrg   }
9837ec681f3Smrg}
9847ec681f3Smrg
9857ec681f3Smrgstatic bool
9867ec681f3Smrgskip_query(void)
9877ec681f3Smrg{
9887ec681f3Smrg   switch (options->query_mode) {
9897ec681f3Smrg   case QUERY_ALL:
9907ec681f3Smrg      /* never skip: */
9917ec681f3Smrg      return false;
9927ec681f3Smrg   case QUERY_WRITTEN:
9937ec681f3Smrg      for (int i = 0; i < options->nquery; i++) {
9947ec681f3Smrg         uint32_t regbase = queryvals[i];
9957ec681f3Smrg         if (!reg_written(regbase)) {
9967ec681f3Smrg            continue;
9977ec681f3Smrg         }
9987ec681f3Smrg         if (reg_rewritten(regbase)) {
9997ec681f3Smrg            return false;
10007ec681f3Smrg         }
10017ec681f3Smrg      }
10027ec681f3Smrg      return true;
10037ec681f3Smrg   case QUERY_DELTA:
10047ec681f3Smrg      for (int i = 0; i < options->nquery; i++) {
10057ec681f3Smrg         uint32_t regbase = queryvals[i];
10067ec681f3Smrg         if (!reg_written(regbase)) {
10077ec681f3Smrg            continue;
10087ec681f3Smrg         }
10097ec681f3Smrg         uint32_t lastval = reg_val(regbase);
10107ec681f3Smrg         if (lastval != lastvals[regbase]) {
10117ec681f3Smrg            return false;
10127ec681f3Smrg         }
10137ec681f3Smrg      }
10147ec681f3Smrg      return true;
10157ec681f3Smrg   }
10167ec681f3Smrg   return true;
10177ec681f3Smrg}
10187ec681f3Smrg
10197ec681f3Smrgstatic void
10207ec681f3Smrg__do_query(const char *primtype, uint32_t num_indices)
10217ec681f3Smrg{
10227ec681f3Smrg   int n = 0;
10237ec681f3Smrg
10247ec681f3Smrg   if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {
10257ec681f3Smrg      uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));
10267ec681f3Smrg      uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));
10277ec681f3Smrg
10287ec681f3Smrg      bin_x1 = scissor_tl & 0xffff;
10297ec681f3Smrg      bin_y1 = scissor_tl >> 16;
10307ec681f3Smrg      bin_x2 = scissor_br & 0xffff;
10317ec681f3Smrg      bin_y2 = scissor_br >> 16;
10327ec681f3Smrg   }
10337ec681f3Smrg
10347ec681f3Smrg   for (int i = 0; i < options->nquery; i++) {
10357ec681f3Smrg      uint32_t regbase = queryvals[i];
10367ec681f3Smrg      if (reg_written(regbase)) {
10377ec681f3Smrg         uint32_t lastval = reg_val(regbase);
10387ec681f3Smrg         printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,
10397ec681f3Smrg                bin_y1, bin_x2, bin_y2, num_indices);
10407ec681f3Smrg         if (options->gpu_id >= 500)
10417ec681f3Smrg            printf("%s:", render_mode);
10427ec681f3Smrg         printf("\t%08x", lastval);
10437ec681f3Smrg         if (lastval != lastvals[regbase]) {
10447ec681f3Smrg            printf("!");
10457ec681f3Smrg         } else {
10467ec681f3Smrg            printf(" ");
10477ec681f3Smrg         }
10487ec681f3Smrg         if (reg_rewritten(regbase)) {
10497ec681f3Smrg            printf("+");
10507ec681f3Smrg         } else {
10517ec681f3Smrg            printf(" ");
10527ec681f3Smrg         }
10537ec681f3Smrg         dump_register_val(regbase, lastval, 0);
10547ec681f3Smrg         n++;
10557ec681f3Smrg      }
10567ec681f3Smrg   }
10577ec681f3Smrg
10587ec681f3Smrg   if (n > 1)
10597ec681f3Smrg      printf("\n");
10607ec681f3Smrg}
10617ec681f3Smrg
10627ec681f3Smrgstatic void
10637ec681f3Smrgdo_query_compare(const char *primtype, uint32_t num_indices)
10647ec681f3Smrg{
10657ec681f3Smrg   unsigned saved_enable_mask = enable_mask;
10667ec681f3Smrg   const char *saved_render_mode = render_mode;
10677ec681f3Smrg
10687ec681f3Smrg   /* in 'query-compare' mode, we want to see if the register is writtten
10697ec681f3Smrg    * or changed in any mode:
10707ec681f3Smrg    *
10717ec681f3Smrg    * (NOTE: this could cause false-positive for 'query-delta' if the reg
10727ec681f3Smrg    * is written with different values in binning vs sysmem/gmem mode, as
10737ec681f3Smrg    * we don't track previous values per-mode, but I think we can live with
10747ec681f3Smrg    * that)
10757ec681f3Smrg    */
10767ec681f3Smrg   enable_mask = MODE_ALL;
10777ec681f3Smrg
10787ec681f3Smrg   clear_rewritten();
10797ec681f3Smrg   load_all_groups(0);
10807ec681f3Smrg
10817ec681f3Smrg   if (!skip_query()) {
10827ec681f3Smrg      /* dump binning pass values: */
10837ec681f3Smrg      enable_mask = MODE_BINNING;
10847ec681f3Smrg      render_mode = "BINNING";
10857ec681f3Smrg      clear_rewritten();
10867ec681f3Smrg      load_all_groups(0);
10877ec681f3Smrg      __do_query(primtype, num_indices);
10887ec681f3Smrg
10897ec681f3Smrg      /* dump draw pass values: */
10907ec681f3Smrg      enable_mask = MODE_GMEM | MODE_BYPASS;
10917ec681f3Smrg      render_mode = "DRAW";
10927ec681f3Smrg      clear_rewritten();
10937ec681f3Smrg      load_all_groups(0);
10947ec681f3Smrg      __do_query(primtype, num_indices);
10957ec681f3Smrg
10967ec681f3Smrg      printf("\n");
10977ec681f3Smrg   }
10987ec681f3Smrg
10997ec681f3Smrg   enable_mask = saved_enable_mask;
11007ec681f3Smrg   render_mode = saved_render_mode;
11017ec681f3Smrg
11027ec681f3Smrg   disable_all_groups();
11037ec681f3Smrg}
11047ec681f3Smrg
11057ec681f3Smrg/* well, actually query and script..
11067ec681f3Smrg * NOTE: call this before dump_register_summary()
11077ec681f3Smrg */
11087ec681f3Smrgstatic void
11097ec681f3Smrgdo_query(const char *primtype, uint32_t num_indices)
11107ec681f3Smrg{
11117ec681f3Smrg   if (script_draw)
11127ec681f3Smrg      script_draw(primtype, num_indices);
11137ec681f3Smrg
11147ec681f3Smrg   if (options->query_compare) {
11157ec681f3Smrg      do_query_compare(primtype, num_indices);
11167ec681f3Smrg      return;
11177ec681f3Smrg   }
11187ec681f3Smrg
11197ec681f3Smrg   if (skip_query())
11207ec681f3Smrg      return;
11217ec681f3Smrg
11227ec681f3Smrg   __do_query(primtype, num_indices);
11237ec681f3Smrg}
11247ec681f3Smrg
11257ec681f3Smrgstatic void
11267ec681f3Smrgcp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)
11277ec681f3Smrg{
11287ec681f3Smrg   uint32_t start = dwords[1] >> 16;
11297ec681f3Smrg   uint32_t size = dwords[1] & 0xffff;
11307ec681f3Smrg   const char *type = NULL, *ext = NULL;
11317ec681f3Smrg   gl_shader_stage disasm_type;
11327ec681f3Smrg
11337ec681f3Smrg   switch (dwords[0]) {
11347ec681f3Smrg   case 0:
11357ec681f3Smrg      type = "vertex";
11367ec681f3Smrg      ext = "vo";
11377ec681f3Smrg      disasm_type = MESA_SHADER_VERTEX;
11387ec681f3Smrg      break;
11397ec681f3Smrg   case 1:
11407ec681f3Smrg      type = "fragment";
11417ec681f3Smrg      ext = "fo";
11427ec681f3Smrg      disasm_type = MESA_SHADER_FRAGMENT;
11437ec681f3Smrg      break;
11447ec681f3Smrg   default:
11457ec681f3Smrg      type = "<unknown>";
11467ec681f3Smrg      disasm_type = 0;
11477ec681f3Smrg      break;
11487ec681f3Smrg   }
11497ec681f3Smrg
11507ec681f3Smrg   printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,
11517ec681f3Smrg          size);
11527ec681f3Smrg   disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);
11537ec681f3Smrg
11547ec681f3Smrg   /* dump raw shader: */
11557ec681f3Smrg   if (ext)
11567ec681f3Smrg      dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);
11577ec681f3Smrg}
11587ec681f3Smrg
11597ec681f3Smrgstatic void
11607ec681f3Smrgcp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
11617ec681f3Smrg{
11627ec681f3Smrg   uint32_t reg = dwords[0] & 0xffff;
11637ec681f3Smrg   int i;
11647ec681f3Smrg   for (i = 1; i < sizedwords; i++) {
11657ec681f3Smrg      dump_register(reg, dwords[i], level + 1);
11667ec681f3Smrg      reg_set(reg, dwords[i]);
11677ec681f3Smrg      reg++;
11687ec681f3Smrg   }
11697ec681f3Smrg}
11707ec681f3Smrg
11717ec681f3Smrgenum state_t {
11727ec681f3Smrg   TEX_SAMP = 1,
11737ec681f3Smrg   TEX_CONST,
11747ec681f3Smrg   TEX_MIPADDR, /* a3xx only */
11757ec681f3Smrg   SHADER_PROG,
11767ec681f3Smrg   SHADER_CONST,
11777ec681f3Smrg
11787ec681f3Smrg   // image/ssbo state:
11797ec681f3Smrg   SSBO_0,
11807ec681f3Smrg   SSBO_1,
11817ec681f3Smrg   SSBO_2,
11827ec681f3Smrg
11837ec681f3Smrg   UBO,
11847ec681f3Smrg
11857ec681f3Smrg   // unknown things, just to hexdumps:
11867ec681f3Smrg   UNKNOWN_DWORDS,
11877ec681f3Smrg   UNKNOWN_2DWORDS,
11887ec681f3Smrg   UNKNOWN_4DWORDS,
11897ec681f3Smrg};
11907ec681f3Smrg
11917ec681f3Smrgenum adreno_state_block {
11927ec681f3Smrg   SB_VERT_TEX = 0,
11937ec681f3Smrg   SB_VERT_MIPADDR = 1,
11947ec681f3Smrg   SB_FRAG_TEX = 2,
11957ec681f3Smrg   SB_FRAG_MIPADDR = 3,
11967ec681f3Smrg   SB_VERT_SHADER = 4,
11977ec681f3Smrg   SB_GEOM_SHADER = 5,
11987ec681f3Smrg   SB_FRAG_SHADER = 6,
11997ec681f3Smrg   SB_COMPUTE_SHADER = 7,
12007ec681f3Smrg};
12017ec681f3Smrg
12027ec681f3Smrg/* TODO there is probably a clever way to let rnndec parse things so
12037ec681f3Smrg * we don't have to care about packet format differences across gens
12047ec681f3Smrg */
12057ec681f3Smrg
12067ec681f3Smrgstatic void
12077ec681f3Smrga3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
12087ec681f3Smrg                    enum state_t *state, enum state_src_t *src)
12097ec681f3Smrg{
12107ec681f3Smrg   unsigned state_block_id = (dwords[0] >> 19) & 0x7;
12117ec681f3Smrg   unsigned state_type = dwords[1] & 0x3;
12127ec681f3Smrg   static const struct {
12137ec681f3Smrg      gl_shader_stage stage;
12147ec681f3Smrg      enum state_t state;
12157ec681f3Smrg   } lookup[0xf][0x3] = {
12167ec681f3Smrg      [SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
12177ec681f3Smrg      [SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},
12187ec681f3Smrg      [SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
12197ec681f3Smrg      [SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
12207ec681f3Smrg      [SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
12217ec681f3Smrg      [SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
12227ec681f3Smrg      [SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
12237ec681f3Smrg      [SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
12247ec681f3Smrg   };
12257ec681f3Smrg
12267ec681f3Smrg   *stage = lookup[state_block_id][state_type].stage;
12277ec681f3Smrg   *state = lookup[state_block_id][state_type].state;
12287ec681f3Smrg   unsigned state_src = (dwords[0] >> 16) & 0x7;
12297ec681f3Smrg   if (state_src == 0 /* SS_DIRECT */)
12307ec681f3Smrg      *src = STATE_SRC_DIRECT;
12317ec681f3Smrg   else
12327ec681f3Smrg      *src = STATE_SRC_INDIRECT;
12337ec681f3Smrg}
12347ec681f3Smrg
12357ec681f3Smrgstatic enum state_src_t
12367ec681f3Smrg_get_state_src(unsigned dword0)
12377ec681f3Smrg{
12387ec681f3Smrg   switch ((dword0 >> 16) & 0x3) {
12397ec681f3Smrg   case 0: /* SS4_DIRECT / SS6_DIRECT */
12407ec681f3Smrg      return STATE_SRC_DIRECT;
12417ec681f3Smrg   case 2: /* SS4_INDIRECT / SS6_INDIRECT */
12427ec681f3Smrg      return STATE_SRC_INDIRECT;
12437ec681f3Smrg   case 1: /* SS6_BINDLESS */
12447ec681f3Smrg      return STATE_SRC_BINDLESS;
12457ec681f3Smrg   default:
12467ec681f3Smrg      return STATE_SRC_DIRECT;
12477ec681f3Smrg   }
12487ec681f3Smrg}
12497ec681f3Smrg
12507ec681f3Smrgstatic void
12517ec681f3Smrg_get_state_type(unsigned state_block_id, unsigned state_type,
12527ec681f3Smrg                gl_shader_stage *stage, enum state_t *state)
12537ec681f3Smrg{
12547ec681f3Smrg   static const struct {
12557ec681f3Smrg      gl_shader_stage stage;
12567ec681f3Smrg      enum state_t state;
12577ec681f3Smrg   } lookup[0x10][0x4] = {
12587ec681f3Smrg      // SB4_VS_TEX:
12597ec681f3Smrg      [0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},
12607ec681f3Smrg      [0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},
12617ec681f3Smrg      [0x0][2] = {MESA_SHADER_VERTEX, UBO},
12627ec681f3Smrg      // SB4_HS_TEX:
12637ec681f3Smrg      [0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},
12647ec681f3Smrg      [0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},
12657ec681f3Smrg      [0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},
12667ec681f3Smrg      // SB4_DS_TEX:
12677ec681f3Smrg      [0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},
12687ec681f3Smrg      [0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},
12697ec681f3Smrg      [0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},
12707ec681f3Smrg      // SB4_GS_TEX:
12717ec681f3Smrg      [0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},
12727ec681f3Smrg      [0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},
12737ec681f3Smrg      [0x3][2] = {MESA_SHADER_GEOMETRY, UBO},
12747ec681f3Smrg      // SB4_FS_TEX:
12757ec681f3Smrg      [0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},
12767ec681f3Smrg      [0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},
12777ec681f3Smrg      [0x4][2] = {MESA_SHADER_FRAGMENT, UBO},
12787ec681f3Smrg      // SB4_CS_TEX:
12797ec681f3Smrg      [0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},
12807ec681f3Smrg      [0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},
12817ec681f3Smrg      [0x5][2] = {MESA_SHADER_COMPUTE, UBO},
12827ec681f3Smrg      // SB4_VS_SHADER:
12837ec681f3Smrg      [0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},
12847ec681f3Smrg      [0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},
12857ec681f3Smrg      [0x8][2] = {MESA_SHADER_VERTEX, UBO},
12867ec681f3Smrg      // SB4_HS_SHADER
12877ec681f3Smrg      [0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},
12887ec681f3Smrg      [0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},
12897ec681f3Smrg      [0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},
12907ec681f3Smrg      // SB4_DS_SHADER
12917ec681f3Smrg      [0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},
12927ec681f3Smrg      [0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},
12937ec681f3Smrg      [0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},
12947ec681f3Smrg      // SB4_GS_SHADER
12957ec681f3Smrg      [0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},
12967ec681f3Smrg      [0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},
12977ec681f3Smrg      [0xb][2] = {MESA_SHADER_GEOMETRY, UBO},
12987ec681f3Smrg      // SB4_FS_SHADER:
12997ec681f3Smrg      [0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},
13007ec681f3Smrg      [0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},
13017ec681f3Smrg      [0xc][2] = {MESA_SHADER_FRAGMENT, UBO},
13027ec681f3Smrg      // SB4_CS_SHADER:
13037ec681f3Smrg      [0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},
13047ec681f3Smrg      [0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},
13057ec681f3Smrg      [0xd][2] = {MESA_SHADER_COMPUTE, UBO},
13067ec681f3Smrg      [0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */
13077ec681f3Smrg      // SB4_SSBO (shared across all stages)
13087ec681f3Smrg      [0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */
13097ec681f3Smrg      [0xe][1] = {0, SSBO_1},
13107ec681f3Smrg      [0xe][2] = {0, SSBO_2},
13117ec681f3Smrg      // SB4_CS_SSBO
13127ec681f3Smrg      [0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},
13137ec681f3Smrg      [0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},
13147ec681f3Smrg      [0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},
13157ec681f3Smrg      // unknown things
13167ec681f3Smrg      /* This looks like combined UBO state for 3d stages (a5xx and
13177ec681f3Smrg       * before??  I think a6xx has UBO state per shader stage:
13187ec681f3Smrg       */
13197ec681f3Smrg      [0x6][2] = {0, UBO},
13207ec681f3Smrg      [0x7][1] = {0, UNKNOWN_2DWORDS},
13217ec681f3Smrg   };
13227ec681f3Smrg
13237ec681f3Smrg   *stage = lookup[state_block_id][state_type].stage;
13247ec681f3Smrg   *state = lookup[state_block_id][state_type].state;
13257ec681f3Smrg}
13267ec681f3Smrg
13277ec681f3Smrgstatic void
13287ec681f3Smrga4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
13297ec681f3Smrg                    enum state_t *state, enum state_src_t *src)
13307ec681f3Smrg{
13317ec681f3Smrg   unsigned state_block_id = (dwords[0] >> 18) & 0xf;
13327ec681f3Smrg   unsigned state_type = dwords[1] & 0x3;
13337ec681f3Smrg   _get_state_type(state_block_id, state_type, stage, state);
13347ec681f3Smrg   *src = _get_state_src(dwords[0]);
13357ec681f3Smrg}
13367ec681f3Smrg
13377ec681f3Smrgstatic void
13387ec681f3Smrga6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,
13397ec681f3Smrg                    enum state_t *state, enum state_src_t *src)
13407ec681f3Smrg{
13417ec681f3Smrg   unsigned state_block_id = (dwords[0] >> 18) & 0xf;
13427ec681f3Smrg   unsigned state_type = (dwords[0] >> 14) & 0x3;
13437ec681f3Smrg   _get_state_type(state_block_id, state_type, stage, state);
13447ec681f3Smrg   *src = _get_state_src(dwords[0]);
13457ec681f3Smrg}
13467ec681f3Smrg
13477ec681f3Smrgstatic void
13487ec681f3Smrgdump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)
13497ec681f3Smrg{
13507ec681f3Smrg   for (int i = 0; i < num_unit; i++) {
13517ec681f3Smrg      /* work-around to reduce noise for opencl blob which always
13527ec681f3Smrg       * writes the max # regardless of # of textures used
13537ec681f3Smrg       */
13547ec681f3Smrg      if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))
13557ec681f3Smrg         break;
13567ec681f3Smrg
13577ec681f3Smrg      if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
13587ec681f3Smrg         dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");
13597ec681f3Smrg         dump_hex(texsamp, 2, level + 1);
13607ec681f3Smrg         texsamp += 2;
13617ec681f3Smrg      } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
13627ec681f3Smrg         dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");
13637ec681f3Smrg         dump_hex(texsamp, 2, level + 1);
13647ec681f3Smrg         texsamp += 2;
13657ec681f3Smrg      } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
13667ec681f3Smrg         dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");
13677ec681f3Smrg         dump_hex(texsamp, 4, level + 1);
13687ec681f3Smrg         texsamp += 4;
13697ec681f3Smrg      } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
13707ec681f3Smrg         dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");
13717ec681f3Smrg         dump_hex(texsamp, 4, level + 1);
13727ec681f3Smrg         texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;
13737ec681f3Smrg      }
13747ec681f3Smrg   }
13757ec681f3Smrg}
13767ec681f3Smrg
13777ec681f3Smrgstatic void
13787ec681f3Smrgdump_tex_const(uint32_t *texconst, int num_unit, int level)
13797ec681f3Smrg{
13807ec681f3Smrg   for (int i = 0; i < num_unit; i++) {
13817ec681f3Smrg      /* work-around to reduce noise for opencl blob which always
13827ec681f3Smrg       * writes the max # regardless of # of textures used
13837ec681f3Smrg       */
13847ec681f3Smrg      if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&
13857ec681f3Smrg          (texconst[2] == 0) && (texconst[3] == 0))
13867ec681f3Smrg         break;
13877ec681f3Smrg
13887ec681f3Smrg      if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {
13897ec681f3Smrg         dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");
13907ec681f3Smrg         dump_hex(texconst, 4, level + 1);
13917ec681f3Smrg         texconst += 4;
13927ec681f3Smrg      } else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {
13937ec681f3Smrg         dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");
13947ec681f3Smrg         if (options->dump_textures) {
13957ec681f3Smrg            uint32_t addr = texconst[4] & ~0x1f;
13967ec681f3Smrg            dump_gpuaddr(addr, level - 2);
13977ec681f3Smrg         }
13987ec681f3Smrg         dump_hex(texconst, 8, level + 1);
13997ec681f3Smrg         texconst += 8;
14007ec681f3Smrg      } else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
14017ec681f3Smrg         dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");
14027ec681f3Smrg         if (options->dump_textures) {
14037ec681f3Smrg            uint64_t addr =
14047ec681f3Smrg               (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
14057ec681f3Smrg            dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
14067ec681f3Smrg         }
14077ec681f3Smrg         dump_hex(texconst, 12, level + 1);
14087ec681f3Smrg         texconst += 12;
14097ec681f3Smrg      } else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {
14107ec681f3Smrg         dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");
14117ec681f3Smrg         if (options->dump_textures) {
14127ec681f3Smrg            uint64_t addr =
14137ec681f3Smrg               (((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];
14147ec681f3Smrg            dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
14157ec681f3Smrg         }
14167ec681f3Smrg         dump_hex(texconst, 16, level + 1);
14177ec681f3Smrg         texconst += 16;
14187ec681f3Smrg      }
14197ec681f3Smrg   }
14207ec681f3Smrg}
14217ec681f3Smrg
14227ec681f3Smrgstatic void
14237ec681f3Smrgcp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)
14247ec681f3Smrg{
14257ec681f3Smrg   gl_shader_stage stage;
14267ec681f3Smrg   enum state_t state;
14277ec681f3Smrg   enum state_src_t src;
14287ec681f3Smrg   uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;
14297ec681f3Smrg   uint64_t ext_src_addr;
14307ec681f3Smrg   void *contents;
14317ec681f3Smrg   int i;
14327ec681f3Smrg
14337ec681f3Smrg   if (quiet(2) && !options->script)
14347ec681f3Smrg      return;
14357ec681f3Smrg
14367ec681f3Smrg   if (options->gpu_id >= 600)
14377ec681f3Smrg      a6xx_get_state_type(dwords, &stage, &state, &src);
14387ec681f3Smrg   else if (options->gpu_id >= 400)
14397ec681f3Smrg      a4xx_get_state_type(dwords, &stage, &state, &src);
14407ec681f3Smrg   else
14417ec681f3Smrg      a3xx_get_state_type(dwords, &stage, &state, &src);
14427ec681f3Smrg
14437ec681f3Smrg   switch (src) {
14447ec681f3Smrg   case STATE_SRC_DIRECT:
14457ec681f3Smrg      ext_src_addr = 0;
14467ec681f3Smrg      break;
14477ec681f3Smrg   case STATE_SRC_INDIRECT:
14487ec681f3Smrg      if (is_64b()) {
14497ec681f3Smrg         ext_src_addr = dwords[1] & 0xfffffffc;
14507ec681f3Smrg         ext_src_addr |= ((uint64_t)dwords[2]) << 32;
14517ec681f3Smrg      } else {
14527ec681f3Smrg         ext_src_addr = dwords[1] & 0xfffffffc;
14537ec681f3Smrg      }
14547ec681f3Smrg
14557ec681f3Smrg      break;
14567ec681f3Smrg   case STATE_SRC_BINDLESS: {
14577ec681f3Smrg      const unsigned base_reg = stage == MESA_SHADER_COMPUTE
14587ec681f3Smrg                                   ? regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR")
14597ec681f3Smrg                                   : regbase("HLSQ_BINDLESS_BASE[0].ADDR");
14607ec681f3Smrg
14617ec681f3Smrg      if (is_64b()) {
14627ec681f3Smrg         const unsigned reg = base_reg + (dwords[1] >> 28) * 2;
14637ec681f3Smrg         ext_src_addr = reg_val(reg) & 0xfffffffc;
14647ec681f3Smrg         ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;
14657ec681f3Smrg      } else {
14667ec681f3Smrg         const unsigned reg = base_reg + (dwords[1] >> 28);
14677ec681f3Smrg         ext_src_addr = reg_val(reg) & 0xfffffffc;
14687ec681f3Smrg      }
14697ec681f3Smrg
14707ec681f3Smrg      ext_src_addr += 4 * (dwords[1] & 0xffffff);
14717ec681f3Smrg      break;
14727ec681f3Smrg   }
14737ec681f3Smrg   }
14747ec681f3Smrg
14757ec681f3Smrg   if (ext_src_addr)
14767ec681f3Smrg      contents = hostptr(ext_src_addr);
14777ec681f3Smrg   else
14787ec681f3Smrg      contents = is_64b() ? dwords + 3 : dwords + 2;
14797ec681f3Smrg
14807ec681f3Smrg   if (!contents)
14817ec681f3Smrg      return;
14827ec681f3Smrg
14837ec681f3Smrg   switch (state) {
14847ec681f3Smrg   case SHADER_PROG: {
14857ec681f3Smrg      const char *ext = NULL;
14867ec681f3Smrg
14877ec681f3Smrg      if (quiet(2))
14887ec681f3Smrg         return;
14897ec681f3Smrg
14907ec681f3Smrg      if (options->gpu_id >= 400)
14917ec681f3Smrg         num_unit *= 16;
14927ec681f3Smrg      else if (options->gpu_id >= 300)
14937ec681f3Smrg         num_unit *= 4;
14947ec681f3Smrg
14957ec681f3Smrg      /* shaders:
14967ec681f3Smrg       *
14977ec681f3Smrg       * note: num_unit seems to be # of instruction groups, where
14987ec681f3Smrg       * an instruction group has 4 64bit instructions.
14997ec681f3Smrg       */
15007ec681f3Smrg      if (stage == MESA_SHADER_VERTEX) {
15017ec681f3Smrg         ext = "vo3";
15027ec681f3Smrg      } else if (stage == MESA_SHADER_GEOMETRY) {
15037ec681f3Smrg         ext = "go3";
15047ec681f3Smrg      } else if (stage == MESA_SHADER_COMPUTE) {
15057ec681f3Smrg         ext = "co3";
15067ec681f3Smrg      } else if (stage == MESA_SHADER_FRAGMENT) {
15077ec681f3Smrg         ext = "fo3";
15087ec681f3Smrg      }
15097ec681f3Smrg
15107ec681f3Smrg      if (contents)
15117ec681f3Smrg         try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,
15127ec681f3Smrg                         options->gpu_id);
15137ec681f3Smrg
15147ec681f3Smrg      /* dump raw shader: */
15157ec681f3Smrg      if (ext)
15167ec681f3Smrg         dump_shader(ext, contents, num_unit * 2 * 4);
15177ec681f3Smrg
15187ec681f3Smrg      break;
15197ec681f3Smrg   }
15207ec681f3Smrg   case SHADER_CONST: {
15217ec681f3Smrg      if (quiet(2))
15227ec681f3Smrg         return;
15237ec681f3Smrg
15247ec681f3Smrg      /* uniforms/consts:
15257ec681f3Smrg       *
15267ec681f3Smrg       * note: num_unit seems to be # of pairs of dwords??
15277ec681f3Smrg       */
15287ec681f3Smrg
15297ec681f3Smrg      if (options->gpu_id >= 400)
15307ec681f3Smrg         num_unit *= 2;
15317ec681f3Smrg
15327ec681f3Smrg      dump_float(contents, num_unit * 2, level + 1);
15337ec681f3Smrg      dump_hex(contents, num_unit * 2, level + 1);
15347ec681f3Smrg
15357ec681f3Smrg      break;
15367ec681f3Smrg   }
15377ec681f3Smrg   case TEX_MIPADDR: {
15387ec681f3Smrg      uint32_t *addrs = contents;
15397ec681f3Smrg
15407ec681f3Smrg      if (quiet(2))
15417ec681f3Smrg         return;
15427ec681f3Smrg
15437ec681f3Smrg      /* mipmap consts block just appears to be array of num_unit gpu addr's: */
15447ec681f3Smrg      for (i = 0; i < num_unit; i++) {
15457ec681f3Smrg         void *ptr = hostptr(addrs[i]);
15467ec681f3Smrg         printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);
15477ec681f3Smrg         if (options->dump_textures) {
15487ec681f3Smrg            printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));
15497ec681f3Smrg            dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);
15507ec681f3Smrg         }
15517ec681f3Smrg      }
15527ec681f3Smrg      break;
15537ec681f3Smrg   }
15547ec681f3Smrg   case TEX_SAMP: {
15557ec681f3Smrg      dump_tex_samp(contents, src, num_unit, level);
15567ec681f3Smrg      break;
15577ec681f3Smrg   }
15587ec681f3Smrg   case TEX_CONST: {
15597ec681f3Smrg      dump_tex_const(contents, num_unit, level);
15607ec681f3Smrg      break;
15617ec681f3Smrg   }
15627ec681f3Smrg   case SSBO_0: {
15637ec681f3Smrg      uint32_t *ssboconst = (uint32_t *)contents;
15647ec681f3Smrg
15657ec681f3Smrg      for (i = 0; i < num_unit; i++) {
15667ec681f3Smrg         int sz = 4;
15677ec681f3Smrg         if (400 <= options->gpu_id && options->gpu_id < 500) {
15687ec681f3Smrg            dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");
15697ec681f3Smrg         } else if (500 <= options->gpu_id && options->gpu_id < 600) {
15707ec681f3Smrg            dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");
15717ec681f3Smrg         } else if (600 <= options->gpu_id && options->gpu_id < 700) {
15727ec681f3Smrg            sz = 16;
15737ec681f3Smrg            dump_domain(ssboconst, 16, level + 2, "A6XX_IBO");
15747ec681f3Smrg         }
15757ec681f3Smrg         dump_hex(ssboconst, sz, level + 1);
15767ec681f3Smrg         ssboconst += sz;
15777ec681f3Smrg      }
15787ec681f3Smrg      break;
15797ec681f3Smrg   }
15807ec681f3Smrg   case SSBO_1: {
15817ec681f3Smrg      uint32_t *ssboconst = (uint32_t *)contents;
15827ec681f3Smrg
15837ec681f3Smrg      for (i = 0; i < num_unit; i++) {
15847ec681f3Smrg         if (400 <= options->gpu_id && options->gpu_id < 500)
15857ec681f3Smrg            dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");
15867ec681f3Smrg         else if (500 <= options->gpu_id && options->gpu_id < 600)
15877ec681f3Smrg            dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");
15887ec681f3Smrg         dump_hex(ssboconst, 2, level + 1);
15897ec681f3Smrg         ssboconst += 2;
15907ec681f3Smrg      }
15917ec681f3Smrg      break;
15927ec681f3Smrg   }
15937ec681f3Smrg   case SSBO_2: {
15947ec681f3Smrg      uint32_t *ssboconst = (uint32_t *)contents;
15957ec681f3Smrg
15967ec681f3Smrg      for (i = 0; i < num_unit; i++) {
15977ec681f3Smrg         /* TODO a4xx and a5xx might be same: */
15987ec681f3Smrg         if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {
15997ec681f3Smrg            dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");
16007ec681f3Smrg            dump_hex(ssboconst, 2, level + 1);
16017ec681f3Smrg         }
16027ec681f3Smrg         if (options->dump_textures) {
16037ec681f3Smrg            uint64_t addr =
16047ec681f3Smrg               (((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];
16057ec681f3Smrg            dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);
16067ec681f3Smrg         }
16077ec681f3Smrg         ssboconst += 2;
16087ec681f3Smrg      }
16097ec681f3Smrg      break;
16107ec681f3Smrg   }
16117ec681f3Smrg   case UBO: {
16127ec681f3Smrg      uint32_t *uboconst = (uint32_t *)contents;
16137ec681f3Smrg
16147ec681f3Smrg      for (i = 0; i < num_unit; i++) {
16157ec681f3Smrg         // TODO probably similar on a4xx..
16167ec681f3Smrg         if (500 <= options->gpu_id && options->gpu_id < 600)
16177ec681f3Smrg            dump_domain(uboconst, 2, level + 2, "A5XX_UBO");
16187ec681f3Smrg         else if (600 <= options->gpu_id && options->gpu_id < 700)
16197ec681f3Smrg            dump_domain(uboconst, 2, level + 2, "A6XX_UBO");
16207ec681f3Smrg         dump_hex(uboconst, 2, level + 1);
16217ec681f3Smrg         uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;
16227ec681f3Smrg      }
16237ec681f3Smrg      break;
16247ec681f3Smrg   }
16257ec681f3Smrg   case UNKNOWN_DWORDS: {
16267ec681f3Smrg      if (quiet(2))
16277ec681f3Smrg         return;
16287ec681f3Smrg      dump_hex(contents, num_unit, level + 1);
16297ec681f3Smrg      break;
16307ec681f3Smrg   }
16317ec681f3Smrg   case UNKNOWN_2DWORDS: {
16327ec681f3Smrg      if (quiet(2))
16337ec681f3Smrg         return;
16347ec681f3Smrg      dump_hex(contents, num_unit * 2, level + 1);
16357ec681f3Smrg      break;
16367ec681f3Smrg   }
16377ec681f3Smrg   case UNKNOWN_4DWORDS: {
16387ec681f3Smrg      if (quiet(2))
16397ec681f3Smrg         return;
16407ec681f3Smrg      dump_hex(contents, num_unit * 4, level + 1);
16417ec681f3Smrg      break;
16427ec681f3Smrg   }
16437ec681f3Smrg   default:
16447ec681f3Smrg      if (quiet(2))
16457ec681f3Smrg         return;
16467ec681f3Smrg      /* hmm.. */
16477ec681f3Smrg      dump_hex(contents, num_unit, level + 1);
16487ec681f3Smrg      break;
16497ec681f3Smrg   }
16507ec681f3Smrg}
16517ec681f3Smrg
16527ec681f3Smrgstatic void
16537ec681f3Smrgcp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)
16547ec681f3Smrg{
16557ec681f3Smrg   bin_x1 = dwords[1] & 0xffff;
16567ec681f3Smrg   bin_y1 = dwords[1] >> 16;
16577ec681f3Smrg   bin_x2 = dwords[2] & 0xffff;
16587ec681f3Smrg   bin_y2 = dwords[2] >> 16;
16597ec681f3Smrg}
16607ec681f3Smrg
16617ec681f3Smrgstatic void
16627ec681f3Smrgdump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
16637ec681f3Smrg                    int level)
16647ec681f3Smrg{
16657ec681f3Smrg   uint32_t w, h, p;
16667ec681f3Smrg   uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;
16677ec681f3Smrg   uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;
16687ec681f3Smrg   static const char *filter[] = {
16697ec681f3Smrg      "point",
16707ec681f3Smrg      "bilinear",
16717ec681f3Smrg      "bicubic",
16727ec681f3Smrg   };
16737ec681f3Smrg   static const char *clamp[] = {
16747ec681f3Smrg      "wrap",
16757ec681f3Smrg      "mirror",
16767ec681f3Smrg      "clamp-last-texel",
16777ec681f3Smrg   };
16787ec681f3Smrg   static const char swiznames[] = "xyzw01??";
16797ec681f3Smrg
16807ec681f3Smrg   /* see sys2gmem_tex_const[] in adreno_a2xxx.c */
16817ec681f3Smrg
16827ec681f3Smrg   /* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,
16837ec681f3Smrg    * RFMode=ZeroClamp-1, Dim=1:2d, pitch
16847ec681f3Smrg    */
16857ec681f3Smrg   p = (dwords[0] >> 22) << 5;
16867ec681f3Smrg   clamp_x = (dwords[0] >> 10) & 0x3;
16877ec681f3Smrg   clamp_y = (dwords[0] >> 13) & 0x3;
16887ec681f3Smrg   clamp_z = (dwords[0] >> 16) & 0x3;
16897ec681f3Smrg
16907ec681f3Smrg   /* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,
16917ec681f3Smrg    * NearestClamp=1:OGL Mode
16927ec681f3Smrg    */
16937ec681f3Smrg   parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);
16947ec681f3Smrg
16957ec681f3Smrg   /* Width, Height, EndianSwap=0:None */
16967ec681f3Smrg   w = (dwords[2] & 0x1fff) + 1;
16977ec681f3Smrg   h = ((dwords[2] >> 13) & 0x1fff) + 1;
16987ec681f3Smrg
16997ec681f3Smrg   /* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,
17007ec681f3Smrg    * Mip=2:BaseMap
17017ec681f3Smrg    */
17027ec681f3Smrg   mag = (dwords[3] >> 19) & 0x3;
17037ec681f3Smrg   min = (dwords[3] >> 21) & 0x3;
17047ec681f3Smrg   swiz = (dwords[3] >> 1) & 0xfff;
17057ec681f3Smrg
17067ec681f3Smrg   /* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,
17077ec681f3Smrg    * Dim3d=0
17087ec681f3Smrg    */
17097ec681f3Smrg   // XXX
17107ec681f3Smrg
17117ec681f3Smrg   /* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,
17127ec681f3Smrg    * Dim=1:2d, MipPacking=0
17137ec681f3Smrg    */
17147ec681f3Smrg   parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);
17157ec681f3Smrg
17167ec681f3Smrg   printf("%sset texture const %04x\n", levels[level], val);
17177ec681f3Smrg   printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],
17187ec681f3Smrg          clamp[clamp_y], clamp[clamp_z]);
17197ec681f3Smrg   printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],
17207ec681f3Smrg          filter[mag]);
17217ec681f3Smrg   printf("%sswizzle: %c%c%c%c\n", levels[level + 1],
17227ec681f3Smrg          swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],
17237ec681f3Smrg          swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);
17247ec681f3Smrg   printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",
17257ec681f3Smrg          levels[level + 1], gpuaddr, flags, w, h, p,
17267ec681f3Smrg          rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));
17277ec681f3Smrg   printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,
17287ec681f3Smrg          mip_flags);
17297ec681f3Smrg}
17307ec681f3Smrg
17317ec681f3Smrgstatic void
17327ec681f3Smrgdump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,
17337ec681f3Smrg                       int level)
17347ec681f3Smrg{
17357ec681f3Smrg   int i;
17367ec681f3Smrg   printf("%sset shader const %04x\n", levels[level], val);
17377ec681f3Smrg   for (i = 0; i < sizedwords;) {
17387ec681f3Smrg      uint32_t gpuaddr, flags;
17397ec681f3Smrg      parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);
17407ec681f3Smrg      void *addr = hostptr(gpuaddr);
17417ec681f3Smrg      if (addr) {
17427ec681f3Smrg         const char *fmt =
17437ec681f3Smrg            rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);
17447ec681f3Smrg         uint32_t size = dwords[i++];
17457ec681f3Smrg         printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,
17467ec681f3Smrg                size, fmt);
17477ec681f3Smrg         // TODO maybe dump these as bytes instead of dwords?
17487ec681f3Smrg         size = (size + 3) / 4; // for now convert to dwords
17497ec681f3Smrg         dump_hex(addr, min(size, 64), level + 1);
17507ec681f3Smrg         if (size > min(size, 64))
17517ec681f3Smrg            printf("%s\t\t...\n", levels[level + 1]);
17527ec681f3Smrg         dump_float(addr, min(size, 64), level + 1);
17537ec681f3Smrg         if (size > min(size, 64))
17547ec681f3Smrg            printf("%s\t\t...\n", levels[level + 1]);
17557ec681f3Smrg      }
17567ec681f3Smrg   }
17577ec681f3Smrg}
17587ec681f3Smrg
17597ec681f3Smrgstatic void
17607ec681f3Smrgcp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)
17617ec681f3Smrg{
17627ec681f3Smrg   uint32_t val = dwords[0] & 0xffff;
17637ec681f3Smrg   switch ((dwords[0] >> 16) & 0xf) {
17647ec681f3Smrg   case 0x0:
17657ec681f3Smrg      dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);
17667ec681f3Smrg      break;
17677ec681f3Smrg   case 0x1:
17687ec681f3Smrg      /* need to figure out how const space is partitioned between
17697ec681f3Smrg       * attributes, textures, etc..
17707ec681f3Smrg       */
17717ec681f3Smrg      if (val < 0x78) {
17727ec681f3Smrg         dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);
17737ec681f3Smrg      } else {
17747ec681f3Smrg         dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);
17757ec681f3Smrg      }
17767ec681f3Smrg      break;
17777ec681f3Smrg   case 0x2:
17787ec681f3Smrg      printf("%sset bool const %04x\n", levels[level], val);
17797ec681f3Smrg      break;
17807ec681f3Smrg   case 0x3:
17817ec681f3Smrg      printf("%sset loop const %04x\n", levels[level], val);
17827ec681f3Smrg      break;
17837ec681f3Smrg   case 0x4:
17847ec681f3Smrg      val += 0x2000;
17857ec681f3Smrg      if (dwords[0] & 0x80000000) {
17867ec681f3Smrg         uint32_t srcreg = dwords[1];
17877ec681f3Smrg         uint32_t dstval = dwords[2];
17887ec681f3Smrg
17897ec681f3Smrg         /* TODO: not sure what happens w/ payload != 2.. */
17907ec681f3Smrg         assert(sizedwords == 3);
17917ec681f3Smrg         assert(srcreg < ARRAY_SIZE(type0_reg_vals));
17927ec681f3Smrg
17937ec681f3Smrg         /* note: rnn_regname uses a static buf so we can't do
17947ec681f3Smrg          * two regname() calls for one printf..
17957ec681f3Smrg          */
17967ec681f3Smrg         printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);
17977ec681f3Smrg         printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);
17987ec681f3Smrg
17997ec681f3Smrg         dstval += type0_reg_vals[srcreg];
18007ec681f3Smrg
18017ec681f3Smrg         dump_registers(val, &dstval, 1, level + 1);
18027ec681f3Smrg      } else {
18037ec681f3Smrg         dump_registers(val, dwords + 1, sizedwords - 1, level + 1);
18047ec681f3Smrg      }
18057ec681f3Smrg      break;
18067ec681f3Smrg   }
18077ec681f3Smrg}
18087ec681f3Smrg
18097ec681f3Smrgstatic void dump_register_summary(int level);
18107ec681f3Smrg
18117ec681f3Smrgstatic void
18127ec681f3Smrgcp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)
18137ec681f3Smrg{
18147ec681f3Smrg   const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);
18157ec681f3Smrg   printl(2, "%sevent %s\n", levels[level], name);
18167ec681f3Smrg
18177ec681f3Smrg   if (name && (options->gpu_id > 500)) {
18187ec681f3Smrg      char eventname[64];
18197ec681f3Smrg      snprintf(eventname, sizeof(eventname), "EVENT:%s", name);
18207ec681f3Smrg      if (!strcmp(name, "BLIT")) {
18217ec681f3Smrg         do_query(eventname, 0);
18227ec681f3Smrg         print_mode(level);
18237ec681f3Smrg         dump_register_summary(level);
18247ec681f3Smrg      }
18257ec681f3Smrg   }
18267ec681f3Smrg}
18277ec681f3Smrg
18287ec681f3Smrgstatic void
18297ec681f3Smrgdump_register_summary(int level)
18307ec681f3Smrg{
18317ec681f3Smrg   uint32_t i;
18327ec681f3Smrg   bool saved_summary = summary;
18337ec681f3Smrg   summary = false;
18347ec681f3Smrg
18357ec681f3Smrg   in_summary = true;
18367ec681f3Smrg
18377ec681f3Smrg   /* dump current state of registers: */
18387ec681f3Smrg   printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);
18397ec681f3Smrg   for (i = 0; i < regcnt(); i++) {
18407ec681f3Smrg      uint32_t regbase = i;
18417ec681f3Smrg      uint32_t lastval = reg_val(regbase);
18427ec681f3Smrg      /* skip registers that haven't been updated since last draw/blit: */
18437ec681f3Smrg      if (!(options->allregs || reg_rewritten(regbase)))
18447ec681f3Smrg         continue;
18457ec681f3Smrg      if (!reg_written(regbase))
18467ec681f3Smrg         continue;
18477ec681f3Smrg      if (lastval != lastvals[regbase]) {
18487ec681f3Smrg         printl(2, "!");
18497ec681f3Smrg         lastvals[regbase] = lastval;
18507ec681f3Smrg      } else {
18517ec681f3Smrg         printl(2, " ");
18527ec681f3Smrg      }
18537ec681f3Smrg      if (reg_rewritten(regbase)) {
18547ec681f3Smrg         printl(2, "+");
18557ec681f3Smrg      } else {
18567ec681f3Smrg         printl(2, " ");
18577ec681f3Smrg      }
18587ec681f3Smrg      printl(2, "\t%08x", lastval);
18597ec681f3Smrg      if (!quiet(2)) {
18607ec681f3Smrg         dump_register(regbase, lastval, level);
18617ec681f3Smrg      }
18627ec681f3Smrg   }
18637ec681f3Smrg
18647ec681f3Smrg   clear_rewritten();
18657ec681f3Smrg
18667ec681f3Smrg   in_summary = false;
18677ec681f3Smrg
18687ec681f3Smrg   draw_count++;
18697ec681f3Smrg   summary = saved_summary;
18707ec681f3Smrg}
18717ec681f3Smrg
18727ec681f3Smrgstatic uint32_t
18737ec681f3Smrgdraw_indx_common(uint32_t *dwords, int level)
18747ec681f3Smrg{
18757ec681f3Smrg   uint32_t prim_type = dwords[1] & 0x1f;
18767ec681f3Smrg   uint32_t source_select = (dwords[1] >> 6) & 0x3;
18777ec681f3Smrg   uint32_t num_indices = dwords[2];
18787ec681f3Smrg   const char *primtype;
18797ec681f3Smrg
18807ec681f3Smrg   primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);
18817ec681f3Smrg
18827ec681f3Smrg   do_query(primtype, num_indices);
18837ec681f3Smrg
18847ec681f3Smrg   printl(2, "%sdraw:          %d\n", levels[level], draws[ib]);
18857ec681f3Smrg   printl(2, "%sprim_type:     %s (%d)\n", levels[level], primtype, prim_type);
18867ec681f3Smrg   printl(2, "%ssource_select: %s (%d)\n", levels[level],
18877ec681f3Smrg          rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);
18887ec681f3Smrg   printl(2, "%snum_indices:   %d\n", levels[level], num_indices);
18897ec681f3Smrg
18907ec681f3Smrg   vertices += num_indices;
18917ec681f3Smrg
18927ec681f3Smrg   draws[ib]++;
18937ec681f3Smrg
18947ec681f3Smrg   return num_indices;
18957ec681f3Smrg}
18967ec681f3Smrg
18977ec681f3Smrgenum pc_di_index_size {
18987ec681f3Smrg   INDEX_SIZE_IGN = 0,
18997ec681f3Smrg   INDEX_SIZE_16_BIT = 0,
19007ec681f3Smrg   INDEX_SIZE_32_BIT = 1,
19017ec681f3Smrg   INDEX_SIZE_8_BIT = 2,
19027ec681f3Smrg   INDEX_SIZE_INVALID = 0,
19037ec681f3Smrg};
19047ec681f3Smrg
19057ec681f3Smrgstatic void
19067ec681f3Smrgcp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)
19077ec681f3Smrg{
19087ec681f3Smrg   uint32_t num_indices = draw_indx_common(dwords, level);
19097ec681f3Smrg
19107ec681f3Smrg   assert(!is_64b());
19117ec681f3Smrg
19127ec681f3Smrg   /* if we have an index buffer, dump that: */
19137ec681f3Smrg   if (sizedwords == 5) {
19147ec681f3Smrg      void *ptr = hostptr(dwords[3]);
19157ec681f3Smrg      printl(2, "%sgpuaddr:       %08x\n", levels[level], dwords[3]);
19167ec681f3Smrg      printl(2, "%sidx_size:      %d\n", levels[level], dwords[4]);
19177ec681f3Smrg      if (ptr) {
19187ec681f3Smrg         enum pc_di_index_size size =
19197ec681f3Smrg            ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
19207ec681f3Smrg         if (!quiet(2)) {
19217ec681f3Smrg            int i;
19227ec681f3Smrg            printf("%sidxs:         ", levels[level]);
19237ec681f3Smrg            if (size == INDEX_SIZE_8_BIT) {
19247ec681f3Smrg               uint8_t *idx = ptr;
19257ec681f3Smrg               for (i = 0; i < dwords[4]; i++)
19267ec681f3Smrg                  printf(" %u", idx[i]);
19277ec681f3Smrg            } else if (size == INDEX_SIZE_16_BIT) {
19287ec681f3Smrg               uint16_t *idx = ptr;
19297ec681f3Smrg               for (i = 0; i < dwords[4] / 2; i++)
19307ec681f3Smrg                  printf(" %u", idx[i]);
19317ec681f3Smrg            } else if (size == INDEX_SIZE_32_BIT) {
19327ec681f3Smrg               uint32_t *idx = ptr;
19337ec681f3Smrg               for (i = 0; i < dwords[4] / 4; i++)
19347ec681f3Smrg                  printf(" %u", idx[i]);
19357ec681f3Smrg            }
19367ec681f3Smrg            printf("\n");
19377ec681f3Smrg            dump_hex(ptr, dwords[4] / 4, level + 1);
19387ec681f3Smrg         }
19397ec681f3Smrg      }
19407ec681f3Smrg   }
19417ec681f3Smrg
19427ec681f3Smrg   /* don't bother dumping registers for the dummy draw_indx's.. */
19437ec681f3Smrg   if (num_indices > 0)
19447ec681f3Smrg      dump_register_summary(level);
19457ec681f3Smrg
19467ec681f3Smrg   needs_wfi = true;
19477ec681f3Smrg}
19487ec681f3Smrg
19497ec681f3Smrgstatic void
19507ec681f3Smrgcp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)
19517ec681f3Smrg{
19527ec681f3Smrg   uint32_t num_indices = draw_indx_common(dwords, level);
19537ec681f3Smrg   enum pc_di_index_size size =
19547ec681f3Smrg      ((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);
19557ec681f3Smrg   void *ptr = &dwords[3];
19567ec681f3Smrg   int sz = 0;
19577ec681f3Smrg
19587ec681f3Smrg   assert(!is_64b());
19597ec681f3Smrg
19607ec681f3Smrg   /* CP_DRAW_INDX_2 has embedded/inline idx buffer: */
19617ec681f3Smrg   if (!quiet(2)) {
19627ec681f3Smrg      int i;
19637ec681f3Smrg      printf("%sidxs:         ", levels[level]);
19647ec681f3Smrg      if (size == INDEX_SIZE_8_BIT) {
19657ec681f3Smrg         uint8_t *idx = ptr;
19667ec681f3Smrg         for (i = 0; i < num_indices; i++)
19677ec681f3Smrg            printf(" %u", idx[i]);
19687ec681f3Smrg         sz = num_indices;
19697ec681f3Smrg      } else if (size == INDEX_SIZE_16_BIT) {
19707ec681f3Smrg         uint16_t *idx = ptr;
19717ec681f3Smrg         for (i = 0; i < num_indices; i++)
19727ec681f3Smrg            printf(" %u", idx[i]);
19737ec681f3Smrg         sz = num_indices * 2;
19747ec681f3Smrg      } else if (size == INDEX_SIZE_32_BIT) {
19757ec681f3Smrg         uint32_t *idx = ptr;
19767ec681f3Smrg         for (i = 0; i < num_indices; i++)
19777ec681f3Smrg            printf(" %u", idx[i]);
19787ec681f3Smrg         sz = num_indices * 4;
19797ec681f3Smrg      }
19807ec681f3Smrg      printf("\n");
19817ec681f3Smrg      dump_hex(ptr, sz / 4, level + 1);
19827ec681f3Smrg   }
19837ec681f3Smrg
19847ec681f3Smrg   /* don't bother dumping registers for the dummy draw_indx's.. */
19857ec681f3Smrg   if (num_indices > 0)
19867ec681f3Smrg      dump_register_summary(level);
19877ec681f3Smrg}
19887ec681f3Smrg
19897ec681f3Smrgstatic void
19907ec681f3Smrgcp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)
19917ec681f3Smrg{
19927ec681f3Smrg   uint32_t num_indices = dwords[2];
19937ec681f3Smrg   uint32_t prim_type = dwords[0] & 0x1f;
19947ec681f3Smrg
19957ec681f3Smrg   do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);
19967ec681f3Smrg   print_mode(level);
19977ec681f3Smrg
19987ec681f3Smrg   /* don't bother dumping registers for the dummy draw_indx's.. */
19997ec681f3Smrg   if (num_indices > 0)
20007ec681f3Smrg      dump_register_summary(level);
20017ec681f3Smrg}
20027ec681f3Smrg
20037ec681f3Smrgstatic void
20047ec681f3Smrgcp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
20057ec681f3Smrg{
20067ec681f3Smrg   uint32_t prim_type = dwords[0] & 0x1f;
20077ec681f3Smrg   uint64_t addr;
20087ec681f3Smrg
20097ec681f3Smrg   do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
20107ec681f3Smrg   print_mode(level);
20117ec681f3Smrg
20127ec681f3Smrg   if (is_64b())
20137ec681f3Smrg      addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
20147ec681f3Smrg   else
20157ec681f3Smrg      addr = dwords[1];
20167ec681f3Smrg   dump_gpuaddr_size(addr, level, 0x10, 2);
20177ec681f3Smrg
20187ec681f3Smrg   if (is_64b())
20197ec681f3Smrg      addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];
20207ec681f3Smrg   else
20217ec681f3Smrg      addr = dwords[3];
20227ec681f3Smrg   dump_gpuaddr_size(addr, level, 0x10, 2);
20237ec681f3Smrg
20247ec681f3Smrg   dump_register_summary(level);
20257ec681f3Smrg}
20267ec681f3Smrg
20277ec681f3Smrgstatic void
20287ec681f3Smrgcp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
20297ec681f3Smrg{
20307ec681f3Smrg   uint32_t prim_type = dwords[0] & 0x1f;
20317ec681f3Smrg   uint64_t addr;
20327ec681f3Smrg
20337ec681f3Smrg   do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
20347ec681f3Smrg   print_mode(level);
20357ec681f3Smrg
20367ec681f3Smrg   addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
20377ec681f3Smrg   dump_gpuaddr_size(addr, level, 0x10, 2);
20387ec681f3Smrg
20397ec681f3Smrg   dump_register_summary(level);
20407ec681f3Smrg}
20417ec681f3Smrg
20427ec681f3Smrgstatic void
20437ec681f3Smrgcp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)
20447ec681f3Smrg{
20457ec681f3Smrg   uint32_t prim_type = dwords[0] & 0x1f;
20467ec681f3Smrg   uint32_t count = dwords[2];
20477ec681f3Smrg
20487ec681f3Smrg   do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);
20497ec681f3Smrg   print_mode(level);
20507ec681f3Smrg
20517ec681f3Smrg   struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");
20527ec681f3Smrg   uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");
20537ec681f3Smrg   uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");
20547ec681f3Smrg   uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");
20557ec681f3Smrg
20567ec681f3Smrg   if (count_dword) {
20577ec681f3Smrg      uint64_t count_addr =
20587ec681f3Smrg         ((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];
20597ec681f3Smrg      uint32_t *buf = hostptr(count_addr);
20607ec681f3Smrg
20617ec681f3Smrg      /* Don't print more draws than this if we don't know the indirect
20627ec681f3Smrg       * count. It's possible the user will give ~0 or some other large
20637ec681f3Smrg       * value, expecting the GPU to fill in the draw count, and we don't
20647ec681f3Smrg       * want to print a gazillion draws in that case:
20657ec681f3Smrg       */
20667ec681f3Smrg      const uint32_t max_draw_count = 0x100;
20677ec681f3Smrg
20687ec681f3Smrg      /* Assume the indirect count is garbage if it's larger than this
20697ec681f3Smrg       * (quite large) value or 0. Hopefully this catches most cases.
20707ec681f3Smrg       */
20717ec681f3Smrg      const uint32_t max_indirect_draw_count = 0x10000;
20727ec681f3Smrg
20737ec681f3Smrg      if (buf) {
20747ec681f3Smrg         printf("%sindirect count: %u\n", levels[level], *buf);
20757ec681f3Smrg         if (*buf == 0 || *buf > max_indirect_draw_count) {
20767ec681f3Smrg            /* garbage value */
20777ec681f3Smrg            count = min(count, max_draw_count);
20787ec681f3Smrg         } else {
20797ec681f3Smrg            /* not garbage */
20807ec681f3Smrg            count = min(count, *buf);
20817ec681f3Smrg         }
20827ec681f3Smrg      } else {
20837ec681f3Smrg         count = min(count, max_draw_count);
20847ec681f3Smrg      }
20857ec681f3Smrg   }
20867ec681f3Smrg
20877ec681f3Smrg   if (addr_dword && stride_dword) {
20887ec681f3Smrg      uint64_t addr =
20897ec681f3Smrg         ((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];
20907ec681f3Smrg      uint32_t stride = dwords[stride_dword];
20917ec681f3Smrg
20927ec681f3Smrg      for (unsigned i = 0; i < count; i++, addr += stride) {
20937ec681f3Smrg         printf("%sdraw %d:\n", levels[level], i);
20947ec681f3Smrg         dump_gpuaddr_size(addr, level, 0x10, 2);
20957ec681f3Smrg      }
20967ec681f3Smrg   }
20977ec681f3Smrg
20987ec681f3Smrg   dump_register_summary(level);
20997ec681f3Smrg}
21007ec681f3Smrg
21017ec681f3Smrgstatic void
21027ec681f3Smrgcp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)
21037ec681f3Smrg{
21047ec681f3Smrg   do_query("COMPUTE", 1);
21057ec681f3Smrg   dump_register_summary(level);
21067ec681f3Smrg}
21077ec681f3Smrg
21087ec681f3Smrgstatic void
21097ec681f3Smrgcp_nop(uint32_t *dwords, uint32_t sizedwords, int level)
21107ec681f3Smrg{
21117ec681f3Smrg   const char *buf = (void *)dwords;
21127ec681f3Smrg   int i;
21137ec681f3Smrg
21147ec681f3Smrg   if (quiet(3))
21157ec681f3Smrg      return;
21167ec681f3Smrg
21177ec681f3Smrg   // blob doesn't use CP_NOP for string_marker but it does
21187ec681f3Smrg   // use it for things that end up looking like, but aren't
21197ec681f3Smrg   // ascii chars:
21207ec681f3Smrg   if (!options->decode_markers)
21217ec681f3Smrg      return;
21227ec681f3Smrg
21237ec681f3Smrg   for (i = 0; i < 4 * sizedwords; i++) {
21247ec681f3Smrg      if (buf[i] == '\0')
21257ec681f3Smrg         break;
21267ec681f3Smrg      if (isascii(buf[i]))
21277ec681f3Smrg         printf("%c", buf[i]);
21287ec681f3Smrg   }
21297ec681f3Smrg   printf("\n");
21307ec681f3Smrg}
21317ec681f3Smrg
21327ec681f3Smrgstatic void
21337ec681f3Smrgcp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
21347ec681f3Smrg{
21357ec681f3Smrg   /* traverse indirect buffers */
21367ec681f3Smrg   uint64_t ibaddr;
21377ec681f3Smrg   uint32_t ibsize;
21387ec681f3Smrg   uint32_t *ptr = NULL;
21397ec681f3Smrg
21407ec681f3Smrg   if (is_64b()) {
21417ec681f3Smrg      /* a5xx+.. high 32b of gpu addr, then size: */
21427ec681f3Smrg      ibaddr = dwords[0];
21437ec681f3Smrg      ibaddr |= ((uint64_t)dwords[1]) << 32;
21447ec681f3Smrg      ibsize = dwords[2];
21457ec681f3Smrg   } else {
21467ec681f3Smrg      ibaddr = dwords[0];
21477ec681f3Smrg      ibsize = dwords[1];
21487ec681f3Smrg   }
21497ec681f3Smrg
21507ec681f3Smrg   if (!quiet(3)) {
21517ec681f3Smrg      if (is_64b()) {
21527ec681f3Smrg         printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);
21537ec681f3Smrg      } else {
21547ec681f3Smrg         printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);
21557ec681f3Smrg      }
21567ec681f3Smrg      printf("%sibsize:%08x\n", levels[level], ibsize);
21577ec681f3Smrg   }
21587ec681f3Smrg
21597ec681f3Smrg   if (options->once && has_dumped(ibaddr, enable_mask))
21607ec681f3Smrg      return;
21617ec681f3Smrg
21627ec681f3Smrg   /* 'query-compare' mode implies 'once' mode, although we need only to
21637ec681f3Smrg    * process the cmdstream for *any* enable_mask mode, since we are
21647ec681f3Smrg    * comparing binning vs draw reg values at the same time, ie. it is
21657ec681f3Smrg    * not useful to process the same draw in both binning and draw pass.
21667ec681f3Smrg    */
21677ec681f3Smrg   if (options->query_compare && has_dumped(ibaddr, MODE_ALL))
21687ec681f3Smrg      return;
21697ec681f3Smrg
21707ec681f3Smrg   /* map gpuaddr back to hostptr: */
21717ec681f3Smrg   ptr = hostptr(ibaddr);
21727ec681f3Smrg
21737ec681f3Smrg   if (ptr) {
21747ec681f3Smrg      /* If the GPU hung within the target IB, the trigger point will be
21757ec681f3Smrg       * just after the current CP_INDIRECT_BUFFER.  Because the IB is
21767ec681f3Smrg       * executed but never returns.  Account for this by checking if
21777ec681f3Smrg       * the IB returned:
21787ec681f3Smrg       */
21797ec681f3Smrg      highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));
21807ec681f3Smrg
21817ec681f3Smrg      ib++;
21827ec681f3Smrg      ibs[ib].base = ibaddr;
21837ec681f3Smrg      ibs[ib].size = ibsize;
21847ec681f3Smrg
21857ec681f3Smrg      dump_commands(ptr, ibsize, level);
21867ec681f3Smrg      ib--;
21877ec681f3Smrg   } else {
21887ec681f3Smrg      fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
21897ec681f3Smrg   }
21907ec681f3Smrg}
21917ec681f3Smrg
21927ec681f3Smrgstatic void
21937ec681f3Smrgcp_start_bin(uint32_t *dwords, uint32_t sizedwords, int level)
21947ec681f3Smrg{
21957ec681f3Smrg   uint64_t ibaddr;
21967ec681f3Smrg   uint32_t ibsize;
21977ec681f3Smrg   uint32_t loopcount;
21987ec681f3Smrg   uint32_t *ptr = NULL;
21997ec681f3Smrg
22007ec681f3Smrg   loopcount = dwords[0];
22017ec681f3Smrg   ibaddr = dwords[1];
22027ec681f3Smrg   ibaddr |= ((uint64_t)dwords[2]) << 32;
22037ec681f3Smrg   ibsize = dwords[3];
22047ec681f3Smrg
22057ec681f3Smrg   /* map gpuaddr back to hostptr: */
22067ec681f3Smrg   ptr = hostptr(ibaddr);
22077ec681f3Smrg
22087ec681f3Smrg   if (ptr) {
22097ec681f3Smrg      /* If the GPU hung within the target IB, the trigger point will be
22107ec681f3Smrg       * just after the current CP_START_BIN.  Because the IB is
22117ec681f3Smrg       * executed but never returns.  Account for this by checking if
22127ec681f3Smrg       * the IB returned:
22137ec681f3Smrg       */
22147ec681f3Smrg      highlight_gpuaddr(gpuaddr(&dwords[5]));
22157ec681f3Smrg
22167ec681f3Smrg      /* TODO: we should duplicate the body of the loop after each bin, so
22177ec681f3Smrg       * that draws get the correct state. We should also figure out if there
22187ec681f3Smrg       * are any registers that can tell us what bin we're in when we hang so
22197ec681f3Smrg       * that crashdec points to the right place.
22207ec681f3Smrg       */
22217ec681f3Smrg      ib++;
22227ec681f3Smrg      for (uint32_t i = 0; i < loopcount; i++) {
22237ec681f3Smrg         ibs[ib].base = ibaddr;
22247ec681f3Smrg         ibs[ib].size = ibsize;
22257ec681f3Smrg         printf("%sbin %u\n", levels[level], i);
22267ec681f3Smrg         dump_commands(ptr, ibsize, level);
22277ec681f3Smrg         ibaddr += ibsize;
22287ec681f3Smrg         ptr += ibsize;
22297ec681f3Smrg      }
22307ec681f3Smrg      ib--;
22317ec681f3Smrg   } else {
22327ec681f3Smrg      fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);
22337ec681f3Smrg   }
22347ec681f3Smrg}
22357ec681f3Smrg
22367ec681f3Smrgstatic void
22377ec681f3Smrgcp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)
22387ec681f3Smrg{
22397ec681f3Smrg   needs_wfi = false;
22407ec681f3Smrg}
22417ec681f3Smrg
22427ec681f3Smrgstatic void
22437ec681f3Smrgcp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)
22447ec681f3Smrg{
22457ec681f3Smrg   if (quiet(2))
22467ec681f3Smrg      return;
22477ec681f3Smrg
22487ec681f3Smrg   if (is_64b()) {
22497ec681f3Smrg      uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);
22507ec681f3Smrg      printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
22517ec681f3Smrg      dump_hex(&dwords[2], sizedwords - 2, level + 1);
22527ec681f3Smrg
22537ec681f3Smrg      if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))
22547ec681f3Smrg         dump_commands(&dwords[2], sizedwords - 2, level + 1);
22557ec681f3Smrg   } else {
22567ec681f3Smrg      uint32_t gpuaddr = dwords[0];
22577ec681f3Smrg      printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);
22587ec681f3Smrg      dump_float((float *)&dwords[1], sizedwords - 1, level + 1);
22597ec681f3Smrg   }
22607ec681f3Smrg}
22617ec681f3Smrg
22627ec681f3Smrgstatic void
22637ec681f3Smrgcp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)
22647ec681f3Smrg{
22657ec681f3Smrg   uint32_t val = dwords[0] & 0xffff;
22667ec681f3Smrg   uint32_t and = dwords[1];
22677ec681f3Smrg   uint32_t or = dwords[2];
22687ec681f3Smrg   printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),
22697ec681f3Smrg          and, or);
22707ec681f3Smrg   if (needs_wfi)
22717ec681f3Smrg      printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),
22727ec681f3Smrg             and, or);
22737ec681f3Smrg   reg_set(val, (reg_val(val) & and) | or);
22747ec681f3Smrg}
22757ec681f3Smrg
22767ec681f3Smrgstatic void
22777ec681f3Smrgcp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)
22787ec681f3Smrg{
22797ec681f3Smrg   uint32_t val = dwords[0] & 0xffff;
22807ec681f3Smrg   printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));
22817ec681f3Smrg
22827ec681f3Smrg   if (quiet(2))
22837ec681f3Smrg      return;
22847ec681f3Smrg
22857ec681f3Smrg   uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);
22867ec681f3Smrg   printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);
22877ec681f3Smrg   void *ptr = hostptr(gpuaddr);
22887ec681f3Smrg   if (ptr) {
22897ec681f3Smrg      uint32_t cnt = (dwords[0] >> 19) & 0x3ff;
22907ec681f3Smrg      dump_hex(ptr, cnt, level + 1);
22917ec681f3Smrg   }
22927ec681f3Smrg}
22937ec681f3Smrg
22947ec681f3Smrgstruct draw_state {
22957ec681f3Smrg   uint16_t enable_mask;
22967ec681f3Smrg   uint16_t flags;
22977ec681f3Smrg   uint32_t count;
22987ec681f3Smrg   uint64_t addr;
22997ec681f3Smrg};
23007ec681f3Smrg
23017ec681f3Smrgstruct draw_state state[32];
23027ec681f3Smrg
23037ec681f3Smrg#define FLAG_DIRTY              0x1
23047ec681f3Smrg#define FLAG_DISABLE            0x2
23057ec681f3Smrg#define FLAG_DISABLE_ALL_GROUPS 0x4
23067ec681f3Smrg#define FLAG_LOAD_IMMED         0x8
23077ec681f3Smrg
23087ec681f3Smrgstatic int draw_mode;
23097ec681f3Smrg
23107ec681f3Smrgstatic void
23117ec681f3Smrgdisable_group(unsigned group_id)
23127ec681f3Smrg{
23137ec681f3Smrg   struct draw_state *ds = &state[group_id];
23147ec681f3Smrg   memset(ds, 0, sizeof(*ds));
23157ec681f3Smrg}
23167ec681f3Smrg
23177ec681f3Smrgstatic void
23187ec681f3Smrgdisable_all_groups(void)
23197ec681f3Smrg{
23207ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
23217ec681f3Smrg      disable_group(i);
23227ec681f3Smrg}
23237ec681f3Smrg
23247ec681f3Smrgstatic void
23257ec681f3Smrgload_group(unsigned group_id, int level)
23267ec681f3Smrg{
23277ec681f3Smrg   struct draw_state *ds = &state[group_id];
23287ec681f3Smrg
23297ec681f3Smrg   if (!ds->count)
23307ec681f3Smrg      return;
23317ec681f3Smrg
23327ec681f3Smrg   printl(2, "%sgroup_id: %u\n", levels[level], group_id);
23337ec681f3Smrg   printl(2, "%scount: %d\n", levels[level], ds->count);
23347ec681f3Smrg   printl(2, "%saddr: %016llx\n", levels[level], ds->addr);
23357ec681f3Smrg   printl(2, "%sflags: %x\n", levels[level], ds->flags);
23367ec681f3Smrg
23377ec681f3Smrg   if (options->gpu_id >= 600) {
23387ec681f3Smrg      printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);
23397ec681f3Smrg
23407ec681f3Smrg      if (!(ds->enable_mask & enable_mask)) {
23417ec681f3Smrg         printl(2, "%s\tskipped!\n\n", levels[level]);
23427ec681f3Smrg         return;
23437ec681f3Smrg      }
23447ec681f3Smrg   }
23457ec681f3Smrg
23467ec681f3Smrg   void *ptr = hostptr(ds->addr);
23477ec681f3Smrg   if (ptr) {
23487ec681f3Smrg      if (!quiet(2))
23497ec681f3Smrg         dump_hex(ptr, ds->count, level + 1);
23507ec681f3Smrg
23517ec681f3Smrg      ib++;
23527ec681f3Smrg      dump_commands(ptr, ds->count, level + 1);
23537ec681f3Smrg      ib--;
23547ec681f3Smrg   }
23557ec681f3Smrg}
23567ec681f3Smrg
23577ec681f3Smrgstatic void
23587ec681f3Smrgload_all_groups(int level)
23597ec681f3Smrg{
23607ec681f3Smrg   /* sanity check, we should never recursively hit recursion here, and if
23617ec681f3Smrg    * we do bad things happen:
23627ec681f3Smrg    */
23637ec681f3Smrg   static bool loading_groups = false;
23647ec681f3Smrg   if (loading_groups) {
23657ec681f3Smrg      printf("ERROR: nothing in draw state should trigger recursively loading "
23667ec681f3Smrg             "groups!\n");
23677ec681f3Smrg      return;
23687ec681f3Smrg   }
23697ec681f3Smrg   loading_groups = true;
23707ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(state); i++)
23717ec681f3Smrg      load_group(i, level);
23727ec681f3Smrg   loading_groups = false;
23737ec681f3Smrg
23747ec681f3Smrg   /* in 'query-compare' mode, defer disabling all groups until we have a
23757ec681f3Smrg    * chance to process the query:
23767ec681f3Smrg    */
23777ec681f3Smrg   if (!options->query_compare)
23787ec681f3Smrg      disable_all_groups();
23797ec681f3Smrg}
23807ec681f3Smrg
23817ec681f3Smrgstatic void
23827ec681f3Smrgcp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)
23837ec681f3Smrg{
23847ec681f3Smrg   uint32_t i;
23857ec681f3Smrg
23867ec681f3Smrg   for (i = 0; i < sizedwords;) {
23877ec681f3Smrg      struct draw_state *ds;
23887ec681f3Smrg      uint32_t count = dwords[i] & 0xffff;
23897ec681f3Smrg      uint32_t group_id = (dwords[i] >> 24) & 0x1f;
23907ec681f3Smrg      uint32_t enable_mask = (dwords[i] >> 20) & 0xf;
23917ec681f3Smrg      uint32_t flags = (dwords[i] >> 16) & 0xf;
23927ec681f3Smrg      uint64_t addr;
23937ec681f3Smrg
23947ec681f3Smrg      if (is_64b()) {
23957ec681f3Smrg         addr = dwords[i + 1];
23967ec681f3Smrg         addr |= ((uint64_t)dwords[i + 2]) << 32;
23977ec681f3Smrg         i += 3;
23987ec681f3Smrg      } else {
23997ec681f3Smrg         addr = dwords[i + 1];
24007ec681f3Smrg         i += 2;
24017ec681f3Smrg      }
24027ec681f3Smrg
24037ec681f3Smrg      if (flags & FLAG_DISABLE_ALL_GROUPS) {
24047ec681f3Smrg         disable_all_groups();
24057ec681f3Smrg         continue;
24067ec681f3Smrg      }
24077ec681f3Smrg
24087ec681f3Smrg      if (flags & FLAG_DISABLE) {
24097ec681f3Smrg         disable_group(group_id);
24107ec681f3Smrg         continue;
24117ec681f3Smrg      }
24127ec681f3Smrg
24137ec681f3Smrg      assert(group_id < ARRAY_SIZE(state));
24147ec681f3Smrg      disable_group(group_id);
24157ec681f3Smrg
24167ec681f3Smrg      ds = &state[group_id];
24177ec681f3Smrg
24187ec681f3Smrg      ds->enable_mask = enable_mask;
24197ec681f3Smrg      ds->flags = flags;
24207ec681f3Smrg      ds->count = count;
24217ec681f3Smrg      ds->addr = addr;
24227ec681f3Smrg
24237ec681f3Smrg      if (flags & FLAG_LOAD_IMMED) {
24247ec681f3Smrg         load_group(group_id, level);
24257ec681f3Smrg         disable_group(group_id);
24267ec681f3Smrg      }
24277ec681f3Smrg   }
24287ec681f3Smrg}
24297ec681f3Smrg
24307ec681f3Smrgstatic void
24317ec681f3Smrgcp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)
24327ec681f3Smrg{
24337ec681f3Smrg   draw_mode = dwords[0];
24347ec681f3Smrg}
24357ec681f3Smrg
24367ec681f3Smrg/* execute compute shader */
24377ec681f3Smrgstatic void
24387ec681f3Smrgcp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)
24397ec681f3Smrg{
24407ec681f3Smrg   do_query("compute", 0);
24417ec681f3Smrg   dump_register_summary(level);
24427ec681f3Smrg}
24437ec681f3Smrg
24447ec681f3Smrgstatic void
24457ec681f3Smrgcp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)
24467ec681f3Smrg{
24477ec681f3Smrg   uint64_t addr;
24487ec681f3Smrg
24497ec681f3Smrg   if (is_64b()) {
24507ec681f3Smrg      addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];
24517ec681f3Smrg   } else {
24527ec681f3Smrg      addr = dwords[1];
24537ec681f3Smrg   }
24547ec681f3Smrg
24557ec681f3Smrg   printl(3, "%saddr: %016llx\n", levels[level], addr);
24567ec681f3Smrg   dump_gpuaddr_size(addr, level, 0x10, 2);
24577ec681f3Smrg
24587ec681f3Smrg   do_query("compute", 0);
24597ec681f3Smrg   dump_register_summary(level);
24607ec681f3Smrg}
24617ec681f3Smrg
24627ec681f3Smrgstatic void
24637ec681f3Smrgcp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)
24647ec681f3Smrg{
24657ec681f3Smrg   render_mode = rnn_enumname(rnn, "a6xx_marker", dwords[0] & 0xf);
24667ec681f3Smrg
24677ec681f3Smrg   if (!strcmp(render_mode, "RM6_BINNING")) {
24687ec681f3Smrg      enable_mask = MODE_BINNING;
24697ec681f3Smrg   } else if (!strcmp(render_mode, "RM6_GMEM")) {
24707ec681f3Smrg      enable_mask = MODE_GMEM;
24717ec681f3Smrg   } else if (!strcmp(render_mode, "RM6_BYPASS")) {
24727ec681f3Smrg      enable_mask = MODE_BYPASS;
24737ec681f3Smrg   }
24747ec681f3Smrg}
24757ec681f3Smrg
24767ec681f3Smrgstatic void
24777ec681f3Smrgcp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)
24787ec681f3Smrg{
24797ec681f3Smrg   uint64_t addr;
24807ec681f3Smrg   uint32_t *ptr, len;
24817ec681f3Smrg
24827ec681f3Smrg   assert(is_64b());
24837ec681f3Smrg
24847ec681f3Smrg   /* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..
24857ec681f3Smrg    * not sure if this can come in different sizes.
24867ec681f3Smrg    *
24877ec681f3Smrg    * First ptr doesn't seem to be cmdstream, second one does.
24887ec681f3Smrg    *
24897ec681f3Smrg    * Comment from downstream kernel:
24907ec681f3Smrg    *
24917ec681f3Smrg    * SRM -- set render mode (ex binning, direct render etc)
24927ec681f3Smrg    * SRM is set by UMD usually at start of IB to tell CP the type of
24937ec681f3Smrg    * preemption.
24947ec681f3Smrg    * KMD needs to set SRM to NULL to indicate CP that rendering is
24957ec681f3Smrg    * done by IB.
24967ec681f3Smrg    * ------------------------------------------------------------------
24977ec681f3Smrg    *
24987ec681f3Smrg    * Seems to always be one of these two:
24997ec681f3Smrg    * 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c2000
25007ec681f3Smrg    * 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d
25017ec681f3Smrg    * 001c2000 00000000
25027ec681f3Smrg    *
25037ec681f3Smrg    */
25047ec681f3Smrg
25057ec681f3Smrg   assert(options->gpu_id >= 500);
25067ec681f3Smrg
25077ec681f3Smrg   render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);
25087ec681f3Smrg
25097ec681f3Smrg   if (sizedwords == 1)
25107ec681f3Smrg      return;
25117ec681f3Smrg
25127ec681f3Smrg   addr = dwords[1];
25137ec681f3Smrg   addr |= ((uint64_t)dwords[2]) << 32;
25147ec681f3Smrg
25157ec681f3Smrg   mode = dwords[3];
25167ec681f3Smrg
25177ec681f3Smrg   dump_gpuaddr(addr, level + 1);
25187ec681f3Smrg
25197ec681f3Smrg   if (sizedwords == 5)
25207ec681f3Smrg      return;
25217ec681f3Smrg
25227ec681f3Smrg   assert(sizedwords == 8);
25237ec681f3Smrg
25247ec681f3Smrg   len = dwords[5];
25257ec681f3Smrg   addr = dwords[6];
25267ec681f3Smrg   addr |= ((uint64_t)dwords[7]) << 32;
25277ec681f3Smrg
25287ec681f3Smrg   printl(3, "%saddr: 0x%016lx\n", levels[level], addr);
25297ec681f3Smrg   printl(3, "%slen:  0x%x\n", levels[level], len);
25307ec681f3Smrg
25317ec681f3Smrg   ptr = hostptr(addr);
25327ec681f3Smrg
25337ec681f3Smrg   if (ptr) {
25347ec681f3Smrg      if (!quiet(2)) {
25357ec681f3Smrg         ib++;
25367ec681f3Smrg         dump_commands(ptr, len, level + 1);
25377ec681f3Smrg         ib--;
25387ec681f3Smrg         dump_hex(ptr, len, level + 1);
25397ec681f3Smrg      }
25407ec681f3Smrg   }
25417ec681f3Smrg}
25427ec681f3Smrg
25437ec681f3Smrgstatic void
25447ec681f3Smrgcp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)
25457ec681f3Smrg{
25467ec681f3Smrg   uint64_t addr;
25477ec681f3Smrg   uint32_t *ptr, len;
25487ec681f3Smrg
25497ec681f3Smrg   assert(is_64b());
25507ec681f3Smrg   assert(options->gpu_id >= 500);
25517ec681f3Smrg
25527ec681f3Smrg   assert(sizedwords == 8);
25537ec681f3Smrg
25547ec681f3Smrg   addr = dwords[5];
25557ec681f3Smrg   addr |= ((uint64_t)dwords[6]) << 32;
25567ec681f3Smrg   len = dwords[7];
25577ec681f3Smrg
25587ec681f3Smrg   printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);
25597ec681f3Smrg   printl(3, "%slen:  0x%x\n", levels[level], len);
25607ec681f3Smrg
25617ec681f3Smrg   ptr = hostptr(addr);
25627ec681f3Smrg
25637ec681f3Smrg   if (ptr) {
25647ec681f3Smrg      if (!quiet(2)) {
25657ec681f3Smrg         ib++;
25667ec681f3Smrg         dump_commands(ptr, len, level + 1);
25677ec681f3Smrg         ib--;
25687ec681f3Smrg         dump_hex(ptr, len, level + 1);
25697ec681f3Smrg      }
25707ec681f3Smrg   }
25717ec681f3Smrg}
25727ec681f3Smrg
25737ec681f3Smrgstatic void
25747ec681f3Smrgcp_blit(uint32_t *dwords, uint32_t sizedwords, int level)
25757ec681f3Smrg{
25767ec681f3Smrg   do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);
25777ec681f3Smrg   print_mode(level);
25787ec681f3Smrg   dump_register_summary(level);
25797ec681f3Smrg}
25807ec681f3Smrg
25817ec681f3Smrgstatic void
25827ec681f3Smrgcp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)
25837ec681f3Smrg{
25847ec681f3Smrg   int i;
25857ec681f3Smrg
25867ec681f3Smrg   /* NOTE: seems to write same reg multiple times.. not sure if different parts
25877ec681f3Smrg    * of these are triggered by the FLUSH_SO_n events?? (if that is what they
25887ec681f3Smrg    * actually are?)
25897ec681f3Smrg    */
25907ec681f3Smrg   bool saved_summary = summary;
25917ec681f3Smrg   summary = false;
25927ec681f3Smrg
25937ec681f3Smrg   for (i = 0; i < sizedwords; i += 2) {
25947ec681f3Smrg      dump_register(dwords[i + 0], dwords[i + 1], level + 1);
25957ec681f3Smrg      reg_set(dwords[i + 0], dwords[i + 1]);
25967ec681f3Smrg   }
25977ec681f3Smrg
25987ec681f3Smrg   summary = saved_summary;
25997ec681f3Smrg}
26007ec681f3Smrg
26017ec681f3Smrgstatic void
26027ec681f3Smrgcp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)
26037ec681f3Smrg{
26047ec681f3Smrg   uint32_t reg = dwords[1] & 0xffff;
26057ec681f3Smrg
26067ec681f3Smrg   dump_register(reg, dwords[2], level + 1);
26077ec681f3Smrg   reg_set(reg, dwords[2]);
26087ec681f3Smrg}
26097ec681f3Smrg
26107ec681f3Smrgstatic void
26117ec681f3Smrgcp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)
26127ec681f3Smrg{
26137ec681f3Smrg   uint64_t addr;
26147ec681f3Smrg   uint32_t size = dwords[2] & 0xffff;
26157ec681f3Smrg   void *ptr;
26167ec681f3Smrg
26177ec681f3Smrg   addr = dwords[0] | ((uint64_t)dwords[1] << 32);
26187ec681f3Smrg
26197ec681f3Smrg   if (!quiet(3)) {
26207ec681f3Smrg      printf("%saddr=%" PRIx64 "\n", levels[level], addr);
26217ec681f3Smrg   }
26227ec681f3Smrg
26237ec681f3Smrg   ptr = hostptr(addr);
26247ec681f3Smrg   if (ptr) {
26257ec681f3Smrg      dump_commands(ptr, size, level + 1);
26267ec681f3Smrg   }
26277ec681f3Smrg}
26287ec681f3Smrg
26297ec681f3Smrgstatic void
26307ec681f3Smrgcp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)
26317ec681f3Smrg{
26327ec681f3Smrg   skip_ib2_enable_global = dwords[0];
26337ec681f3Smrg}
26347ec681f3Smrg
26357ec681f3Smrgstatic void
26367ec681f3Smrgcp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)
26377ec681f3Smrg{
26387ec681f3Smrg   skip_ib2_enable_local = dwords[0];
26397ec681f3Smrg}
26407ec681f3Smrg
26417ec681f3Smrg#define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }
26427ec681f3Smrgstatic const struct type3_op {
26437ec681f3Smrg   const char *name;
26447ec681f3Smrg   void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);
26457ec681f3Smrg   struct {
26467ec681f3Smrg      bool load_all_groups;
26477ec681f3Smrg   } options;
26487ec681f3Smrg} type3_op[] = {
26497ec681f3Smrg   CP(NOP, cp_nop),
26507ec681f3Smrg   CP(INDIRECT_BUFFER, cp_indirect),
26517ec681f3Smrg   CP(INDIRECT_BUFFER_PFD, cp_indirect),
26527ec681f3Smrg   CP(WAIT_FOR_IDLE, cp_wfi),
26537ec681f3Smrg   CP(REG_RMW, cp_rmw),
26547ec681f3Smrg   CP(REG_TO_MEM, cp_reg_mem),
26557ec681f3Smrg   CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */
26567ec681f3Smrg   CP(MEM_WRITE, cp_mem_write),
26577ec681f3Smrg   CP(EVENT_WRITE, cp_event_write),
26587ec681f3Smrg   CP(RUN_OPENCL, cp_run_cl),
26597ec681f3Smrg   CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),
26607ec681f3Smrg   CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),
26617ec681f3Smrg   CP(SET_CONSTANT, cp_set_const),
26627ec681f3Smrg   CP(IM_LOAD_IMMEDIATE, cp_im_loadi),
26637ec681f3Smrg   CP(WIDE_REG_WRITE, cp_wide_reg_write),
26647ec681f3Smrg
26657ec681f3Smrg   /* for a3xx */
26667ec681f3Smrg   CP(LOAD_STATE, cp_load_state),
26677ec681f3Smrg   CP(SET_BIN, cp_set_bin),
26687ec681f3Smrg
26697ec681f3Smrg   /* for a4xx */
26707ec681f3Smrg   CP(LOAD_STATE4, cp_load_state),
26717ec681f3Smrg   CP(SET_DRAW_STATE, cp_set_draw_state),
26727ec681f3Smrg   CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),
26737ec681f3Smrg   CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),
26747ec681f3Smrg   CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),
26757ec681f3Smrg
26767ec681f3Smrg   /* for a5xx */
26777ec681f3Smrg   CP(SET_RENDER_MODE, cp_set_render_mode),
26787ec681f3Smrg   CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),
26797ec681f3Smrg   CP(BLIT, cp_blit),
26807ec681f3Smrg   CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),
26817ec681f3Smrg   CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),
26827ec681f3Smrg   CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),
26837ec681f3Smrg   CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),
26847ec681f3Smrg   CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),
26857ec681f3Smrg   CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),
26867ec681f3Smrg
26877ec681f3Smrg   /* for a6xx */
26887ec681f3Smrg   CP(LOAD_STATE6_GEOM, cp_load_state),
26897ec681f3Smrg   CP(LOAD_STATE6_FRAG, cp_load_state),
26907ec681f3Smrg   CP(LOAD_STATE6, cp_load_state),
26917ec681f3Smrg   CP(SET_MODE, cp_set_mode),
26927ec681f3Smrg   CP(SET_MARKER, cp_set_marker),
26937ec681f3Smrg   CP(REG_WRITE, cp_reg_write),
26947ec681f3Smrg
26957ec681f3Smrg   CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),
26967ec681f3Smrg
26977ec681f3Smrg   CP(START_BIN, cp_start_bin),
26987ec681f3Smrg};
26997ec681f3Smrg
27007ec681f3Smrgstatic void
27017ec681f3Smrgnoop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)
27027ec681f3Smrg{
27037ec681f3Smrg}
27047ec681f3Smrg
27057ec681f3Smrgstatic const struct type3_op *
27067ec681f3Smrgget_type3_op(unsigned opc)
27077ec681f3Smrg{
27087ec681f3Smrg   static const struct type3_op dummy_op = {
27097ec681f3Smrg      .fxn = noop_fxn,
27107ec681f3Smrg   };
27117ec681f3Smrg   const char *name = pktname(opc);
27127ec681f3Smrg
27137ec681f3Smrg   if (!name)
27147ec681f3Smrg      return &dummy_op;
27157ec681f3Smrg
27167ec681f3Smrg   for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)
27177ec681f3Smrg      if (!strcmp(name, type3_op[i].name))
27187ec681f3Smrg         return &type3_op[i];
27197ec681f3Smrg
27207ec681f3Smrg   return &dummy_op;
27217ec681f3Smrg}
27227ec681f3Smrg
27237ec681f3Smrgvoid
27247ec681f3Smrgdump_commands(uint32_t *dwords, uint32_t sizedwords, int level)
27257ec681f3Smrg{
27267ec681f3Smrg   int dwords_left = sizedwords;
27277ec681f3Smrg   uint32_t count = 0; /* dword count including packet header */
27287ec681f3Smrg   uint32_t val;
27297ec681f3Smrg
27307ec681f3Smrg   //	assert(dwords);
27317ec681f3Smrg   if (!dwords) {
27327ec681f3Smrg      printf("NULL cmd buffer!\n");
27337ec681f3Smrg      return;
27347ec681f3Smrg   }
27357ec681f3Smrg
27367ec681f3Smrg   assert(ib < ARRAY_SIZE(draws));
27377ec681f3Smrg   draws[ib] = 0;
27387ec681f3Smrg
27397ec681f3Smrg   while (dwords_left > 0) {
27407ec681f3Smrg
27417ec681f3Smrg      current_draw_count = draw_count;
27427ec681f3Smrg
27437ec681f3Smrg      /* hack, this looks like a -1 underflow, in some versions
27447ec681f3Smrg       * when it tries to write zero registers via pkt0
27457ec681f3Smrg       */
27467ec681f3Smrg      //		if ((dwords[0] >> 16) == 0xffff)
27477ec681f3Smrg      //			goto skip;
27487ec681f3Smrg
27497ec681f3Smrg      if (pkt_is_type0(dwords[0])) {
27507ec681f3Smrg         printl(3, "t0");
27517ec681f3Smrg         count = type0_pkt_size(dwords[0]) + 1;
27527ec681f3Smrg         val = type0_pkt_offset(dwords[0]);
27537ec681f3Smrg         assert(val < regcnt());
27547ec681f3Smrg         printl(3, "%swrite %s%s (%04x)\n", levels[level + 1], regname(val, 1),
27557ec681f3Smrg                (dwords[0] & 0x8000) ? " (same register)" : "", val);
27567ec681f3Smrg         dump_registers(val, dwords + 1, count - 1, level + 2);
27577ec681f3Smrg         if (!quiet(3))
27587ec681f3Smrg            dump_hex(dwords, count, level + 1);
27597ec681f3Smrg      } else if (pkt_is_type4(dwords[0])) {
27607ec681f3Smrg         /* basically the same(ish) as type0 prior to a5xx */
27617ec681f3Smrg         printl(3, "t4");
27627ec681f3Smrg         count = type4_pkt_size(dwords[0]) + 1;
27637ec681f3Smrg         val = type4_pkt_offset(dwords[0]);
27647ec681f3Smrg         assert(val < regcnt());
27657ec681f3Smrg         printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),
27667ec681f3Smrg                val);
27677ec681f3Smrg         dump_registers(val, dwords + 1, count - 1, level + 2);
27687ec681f3Smrg         if (!quiet(3))
27697ec681f3Smrg            dump_hex(dwords, count, level + 1);
27707ec681f3Smrg#if 0
27717ec681f3Smrg      } else if (pkt_is_type1(dwords[0])) {
27727ec681f3Smrg         printl(3, "t1");
27737ec681f3Smrg         count = 3;
27747ec681f3Smrg         val = dwords[0] & 0xfff;
27757ec681f3Smrg         printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
27767ec681f3Smrg         dump_registers(val, dwords+1, 1, level+2);
27777ec681f3Smrg         val = (dwords[0] >> 12) & 0xfff;
27787ec681f3Smrg         printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));
27797ec681f3Smrg         dump_registers(val, dwords+2, 1, level+2);
27807ec681f3Smrg         if (!quiet(3))
27817ec681f3Smrg            dump_hex(dwords, count, level+1);
27827ec681f3Smrg      } else if (pkt_is_type2(dwords[0])) {
27837ec681f3Smrg         printl(3, "t2");
27847ec681f3Smrg         printf("%sNOP\n", levels[level+1]);
27857ec681f3Smrg         count = 1;
27867ec681f3Smrg         if (!quiet(3))
27877ec681f3Smrg            dump_hex(dwords, count, level+1);
27887ec681f3Smrg#endif
27897ec681f3Smrg      } else if (pkt_is_type3(dwords[0])) {
27907ec681f3Smrg         count = type3_pkt_size(dwords[0]) + 1;
27917ec681f3Smrg         val = cp_type3_opcode(dwords[0]);
27927ec681f3Smrg         const struct type3_op *op = get_type3_op(val);
27937ec681f3Smrg         if (op->options.load_all_groups)
27947ec681f3Smrg            load_all_groups(level + 1);
27957ec681f3Smrg         printl(3, "t3");
27967ec681f3Smrg         const char *name = pktname(val);
27977ec681f3Smrg         if (!quiet(2)) {
27987ec681f3Smrg            printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],
27997ec681f3Smrg                   rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
28007ec681f3Smrg                   count, (dwords[0] & 0x1) ? " (predicated)" : "");
28017ec681f3Smrg         }
28027ec681f3Smrg         if (name)
28037ec681f3Smrg            dump_domain(dwords + 1, count - 1, level + 2, name);
28047ec681f3Smrg         op->fxn(dwords + 1, count - 1, level + 1);
28057ec681f3Smrg         if (!quiet(2))
28067ec681f3Smrg            dump_hex(dwords, count, level + 1);
28077ec681f3Smrg      } else if (pkt_is_type7(dwords[0])) {
28087ec681f3Smrg         count = type7_pkt_size(dwords[0]) + 1;
28097ec681f3Smrg         val = cp_type7_opcode(dwords[0]);
28107ec681f3Smrg         const struct type3_op *op = get_type3_op(val);
28117ec681f3Smrg         if (op->options.load_all_groups)
28127ec681f3Smrg            load_all_groups(level + 1);
28137ec681f3Smrg         printl(3, "t7");
28147ec681f3Smrg         const char *name = pktname(val);
28157ec681f3Smrg         if (!quiet(2)) {
28167ec681f3Smrg            printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],
28177ec681f3Smrg                   rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,
28187ec681f3Smrg                   count);
28197ec681f3Smrg         }
28207ec681f3Smrg         if (name) {
28217ec681f3Smrg            /* special hack for two packets that decode the same way
28227ec681f3Smrg             * on a6xx:
28237ec681f3Smrg             */
28247ec681f3Smrg            if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||
28257ec681f3Smrg                !strcmp(name, "CP_LOAD_STATE6_GEOM"))
28267ec681f3Smrg               name = "CP_LOAD_STATE6";
28277ec681f3Smrg            dump_domain(dwords + 1, count - 1, level + 2, name);
28287ec681f3Smrg         }
28297ec681f3Smrg         op->fxn(dwords + 1, count - 1, level + 1);
28307ec681f3Smrg         if (!quiet(2))
28317ec681f3Smrg            dump_hex(dwords, count, level + 1);
28327ec681f3Smrg      } else if (pkt_is_type2(dwords[0])) {
28337ec681f3Smrg         printl(3, "t2");
28347ec681f3Smrg         printl(3, "%snop\n", levels[level + 1]);
28357ec681f3Smrg      } else {
28367ec681f3Smrg         /* for 5xx+ we can do a passable job of looking for start of next valid
28377ec681f3Smrg          * packet: */
28387ec681f3Smrg         if (options->gpu_id >= 500) {
28397ec681f3Smrg            while (dwords_left > 0) {
28407ec681f3Smrg               if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))
28417ec681f3Smrg                  break;
28427ec681f3Smrg               printf("bad type! %08x\n", dwords[0]);
28437ec681f3Smrg               dwords++;
28447ec681f3Smrg               dwords_left--;
28457ec681f3Smrg            }
28467ec681f3Smrg         } else {
28477ec681f3Smrg            printf("bad type! %08x\n", dwords[0]);
28487ec681f3Smrg            return;
28497ec681f3Smrg         }
28507ec681f3Smrg      }
28517ec681f3Smrg
28527ec681f3Smrg      dwords += count;
28537ec681f3Smrg      dwords_left -= count;
28547ec681f3Smrg   }
28557ec681f3Smrg
28567ec681f3Smrg   if (dwords_left < 0)
28577ec681f3Smrg      printf("**** this ain't right!! dwords_left=%d\n", dwords_left);
28587ec681f3Smrg}
2859