crashdec.c revision 7ec681f3
1/*
2 * Copyright © 2020 Google, Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24/*
25 * Decoder for devcoredump traces from drm/msm.  In case of a gpu crash/hang,
26 * the coredump should be found in:
27 *
28 *    /sys/class/devcoredump/devcd<n>/data
29 *
30 * The crashdump will hang around for 5min, it can be cleared by writing to
31 * the file, ie:
32 *
33 *    echo 1 > /sys/class/devcoredump/devcd<n>/data
34 *
35 * (the driver won't log any new crashdumps until the previous one is cleared
36 * or times out after 5min)
37 */
38
39#include <assert.h>
40#include <getopt.h>
41#include <inttypes.h>
42#include <stdarg.h>
43#include <stdbool.h>
44#include <stdint.h>
45#include <stdio.h>
46#include <stdlib.h>
47#include <string.h>
48#include <unistd.h>
49
50#include "freedreno_pm4.h"
51
52#include "ir3/instr-a3xx.h"
53#include "buffers.h"
54#include "cffdec.h"
55#include "disasm.h"
56#include "pager.h"
57#include "rnnutil.h"
58#include "util.h"
59
60static FILE *in;
61static bool verbose;
62
63static struct rnn *rnn_gmu;
64static struct rnn *rnn_control;
65static struct rnn *rnn_pipe;
66
67static struct cffdec_options options = {
68   .draw_filter = -1,
69};
70
71static inline bool
72is_a6xx(void)
73{
74   return (600 <= options.gpu_id) && (options.gpu_id < 700);
75}
76static inline bool
77is_a5xx(void)
78{
79   return (500 <= options.gpu_id) && (options.gpu_id < 600);
80}
81static inline bool
82is_64b(void)
83{
84   return options.gpu_id >= 500;
85}
86
87/*
88 * Helpers to read register values:
89 */
90
91/* read registers that are 64b on 64b GPUs (ie. a5xx+) */
92static uint64_t
93regval64(const char *name)
94{
95   unsigned reg = regbase(name);
96   assert(reg);
97   uint64_t val = reg_val(reg);
98   if (is_64b())
99      val |= ((uint64_t)reg_val(reg + 1)) << 32;
100   return val;
101}
102
103static uint32_t
104regval(const char *name)
105{
106   unsigned reg = regbase(name);
107   assert(reg);
108   return reg_val(reg);
109}
110
111/*
112 * Line reading and string helpers:
113 */
114
115static char *
116replacestr(char *line, const char *find, const char *replace)
117{
118   char *tail, *s;
119
120   if (!(s = strstr(line, find)))
121      return line;
122
123   tail = s + strlen(find);
124
125   char *newline;
126   asprintf(&newline, "%.*s%s%s", (int)(s - line), line, replace, tail);
127   free(line);
128
129   return newline;
130}
131
132static char *lastline;
133static char *pushedline;
134
135static const char *
136popline(void)
137{
138   char *r = pushedline;
139
140   if (r) {
141      pushedline = NULL;
142      return r;
143   }
144
145   free(lastline);
146
147   size_t n = 0;
148   if (getline(&r, &n, in) < 0)
149      exit(0);
150
151   /* Handle section name typo's from earlier kernels: */
152   r = replacestr(r, "CP_MEMPOOOL", "CP_MEMPOOL");
153   r = replacestr(r, "CP_SEQ_STAT", "CP_SQE_STAT");
154
155   lastline = r;
156   return r;
157}
158
159static void
160pushline(void)
161{
162   assert(!pushedline);
163   pushedline = lastline;
164}
165
166static uint32_t *
167popline_ascii85(uint32_t sizedwords)
168{
169   const char *line = popline();
170
171   /* At this point we exepct the ascii85 data to be indented *some*
172    * amount, and to terminate at the end of the line.  So just eat
173    * up the leading whitespace.
174    */
175   assert(*line == ' ');
176   while (*line == ' ')
177      line++;
178
179   uint32_t *buf = calloc(1, 4 * sizedwords);
180   int idx = 0;
181
182   while (*line != '\n') {
183      if (*line == 'z') {
184         buf[idx++] = 0;
185         line++;
186         continue;
187      }
188
189      uint32_t accum = 0;
190      for (int i = 0; (i < 5) && (*line != '\n'); i++) {
191         accum *= 85;
192         accum += *line - '!';
193         line++;
194      }
195
196      buf[idx++] = accum;
197   }
198
199   return buf;
200}
201
202static bool
203startswith(const char *line, const char *start)
204{
205   return strstr(line, start) == line;
206}
207
208static void
209parseline(const char *line, const char *fmt, ...)
210{
211   int fmtlen = strlen(fmt);
212   int n = 0;
213   int l = 0;
214
215   /* scan fmt string to extract expected # of conversions: */
216   for (int i = 0; i < fmtlen; i++) {
217      if (fmt[i] == '%') {
218         if (i == (l - 1)) { /* prev char was %, ie. we have %% */
219            n--;
220            l = 0;
221         } else {
222            n++;
223            l = i;
224         }
225      }
226   }
227
228   va_list ap;
229   va_start(ap, fmt);
230   if (vsscanf(line, fmt, ap) != n) {
231      fprintf(stderr, "parse error scanning: '%s'\n", fmt);
232      exit(1);
233   }
234   va_end(ap);
235}
236
237#define foreach_line_in_section(_line)                                         \
238   for (const char *_line = popline(); _line; _line = popline())               \
239      /* check for start of next section */                                    \
240      if (_line[0] != ' ') {                                                   \
241         pushline();                                                           \
242         break;                                                                \
243      } else
244
245/*
246 * Decode ringbuffer section:
247 */
248
249static struct {
250   uint64_t iova;
251   uint32_t rptr;
252   uint32_t wptr;
253   uint32_t size;
254   uint32_t *buf;
255} ringbuffers[5];
256
257static void
258decode_ringbuffer(void)
259{
260   int id = 0;
261
262   foreach_line_in_section (line) {
263      if (startswith(line, "  - id:")) {
264         parseline(line, "  - id: %d", &id);
265         assert(id < ARRAY_SIZE(ringbuffers));
266      } else if (startswith(line, "    iova:")) {
267         parseline(line, "    iova: %" PRIx64, &ringbuffers[id].iova);
268      } else if (startswith(line, "    rptr:")) {
269         parseline(line, "    rptr: %d", &ringbuffers[id].rptr);
270      } else if (startswith(line, "    wptr:")) {
271         parseline(line, "    wptr: %d", &ringbuffers[id].wptr);
272      } else if (startswith(line, "    size:")) {
273         parseline(line, "    size: %d", &ringbuffers[id].size);
274      } else if (startswith(line, "    data: !!ascii85 |")) {
275         ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4);
276         add_buffer(ringbuffers[id].iova, ringbuffers[id].size,
277                    ringbuffers[id].buf);
278         continue;
279      }
280
281      printf("%s", line);
282   }
283}
284
285static bool
286valid_header(uint32_t pkt)
287{
288   if (options.gpu_id >= 500) {
289      return pkt_is_type4(pkt) || pkt_is_type7(pkt);
290   } else {
291      /* TODO maybe we can check validish looking pkt3 opc or pkt0
292       * register offset.. the cmds sent by kernel are usually
293       * fairly limited (other than initialization) which confines
294       * the search space a bit..
295       */
296      return true;
297   }
298}
299
300static void
301dump_cmdstream(void)
302{
303   uint64_t rb_base = regval64("CP_RB_BASE");
304
305   printf("got rb_base=%" PRIx64 "\n", rb_base);
306
307   options.ibs[1].base = regval64("CP_IB1_BASE");
308   options.ibs[1].rem = regval("CP_IB1_REM_SIZE");
309   options.ibs[2].base = regval64("CP_IB2_BASE");
310   options.ibs[2].rem = regval("CP_IB2_REM_SIZE");
311
312   /* Adjust remaining size to account for cmdstream slurped into ROQ
313    * but not yet consumed by SQE
314    *
315    * TODO add support for earlier GPUs once we tease out the needed
316    * registers.. see crashit.c in msmtest for hints.
317    *
318    * TODO it would be nice to be able to extract out register bitfields
319    * by name rather than hard-coding this.
320    */
321   if (is_a6xx()) {
322      options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16;
323      options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16;
324   }
325
326   printf("IB1: %" PRIx64 ", %u\n", options.ibs[1].base, options.ibs[1].rem);
327   printf("IB2: %" PRIx64 ", %u\n", options.ibs[2].base, options.ibs[2].rem);
328
329   /* now that we've got the regvals we want, reset register state
330    * so we aren't seeing values from decode_registers();
331    */
332   reset_regs();
333
334   for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) {
335      if (ringbuffers[id].iova != rb_base)
336         continue;
337      if (!ringbuffers[id].size)
338         continue;
339
340      printf("found ring!\n");
341
342      /* The kernel level ringbuffer (RB) wraps around, which
343       * cffdec doesn't really deal with.. so figure out how
344       * many dwords are unread
345       */
346      unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */
347
348      if (verbose) {
349         dump_commands(ringbuffers[id].buf, ringszdw, 0);
350         return;
351      }
352
353/* helper macro to deal with modulo size math: */
354#define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw)
355
356      /* The rptr will (most likely) have moved past the IB to
357       * userspace cmdstream, so back up a bit, and then advance
358       * until we find a valid start of a packet.. this is going
359       * to be less reliable on a4xx and before (pkt0/pkt3),
360       * compared to pkt4/pkt7 with parity bits
361       */
362      const int lookback = 12;
363      unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback);
364
365      for (int idx = 0; idx < lookback; idx++) {
366         if (valid_header(ringbuffers[id].buf[rptr]))
367            break;
368         rptr = mod_add(rptr, 1);
369      }
370
371      unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr);
372
373      printf("got cmdszdw=%d\n", cmdszdw);
374      uint32_t *buf = malloc(cmdszdw * 4);
375
376      for (int idx = 0; idx < cmdszdw; idx++) {
377         int p = mod_add(rptr, idx);
378         buf[idx] = ringbuffers[id].buf[p];
379      }
380
381      dump_commands(buf, cmdszdw, 0);
382      free(buf);
383   }
384}
385
386/*
387 * Decode 'bos' (buffers) section:
388 */
389
390static void
391decode_bos(void)
392{
393   uint32_t size = 0;
394   uint64_t iova = 0;
395
396   foreach_line_in_section (line) {
397      if (startswith(line, "  - iova:")) {
398         parseline(line, "  - iova: %" PRIx64, &iova);
399      } else if (startswith(line, "    size:")) {
400         parseline(line, "    size: %u", &size);
401      } else if (startswith(line, "    data: !!ascii85 |")) {
402         uint32_t *buf = popline_ascii85(size / 4);
403
404         if (verbose)
405            dump_hex_ascii(buf, size, 1);
406
407         add_buffer(iova, size, buf);
408
409         continue;
410      }
411
412      printf("%s", line);
413   }
414}
415
416/*
417 * Decode registers section:
418 */
419
420static void
421dump_register(struct rnn *rnn, uint32_t offset, uint32_t value)
422{
423   struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset);
424   if (info && info->typeinfo) {
425      char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);
426      printf("%s: %s\n", info->name, decoded);
427   } else if (info) {
428      printf("%s: %08x\n", info->name, value);
429   } else {
430      printf("<%04x>: %08x\n", offset, value);
431   }
432}
433
434static void
435decode_gmu_registers(void)
436{
437   foreach_line_in_section (line) {
438      uint32_t offset, value;
439      parseline(line, "  - { offset: %x, value: %x }", &offset, &value);
440
441      printf("\t%08x\t", value);
442      dump_register(rnn_gmu, offset / 4, value);
443   }
444}
445
446static void
447decode_registers(void)
448{
449   foreach_line_in_section (line) {
450      uint32_t offset, value;
451      parseline(line, "  - { offset: %x, value: %x }", &offset, &value);
452
453      reg_set(offset / 4, value);
454      printf("\t%08x", value);
455      dump_register_val(offset / 4, value, 0);
456   }
457}
458
459/* similar to registers section, but for banked context regs: */
460static void
461decode_clusters(void)
462{
463   foreach_line_in_section (line) {
464      if (startswith(line, "  - cluster-name:") ||
465          startswith(line, "    - context:")) {
466         printf("%s", line);
467         continue;
468      }
469
470      uint32_t offset, value;
471      parseline(line, "      - { offset: %x, value: %x }", &offset, &value);
472
473      printf("\t%08x", value);
474      dump_register_val(offset / 4, value, 0);
475   }
476}
477
478/*
479 * Decode indexed-registers.. these aren't like normal registers, but a
480 * sort of FIFO where successive reads pop out associated debug state.
481 */
482
483static void
484dump_cp_sqe_stat(uint32_t *stat)
485{
486   printf("\t PC: %04x\n", stat[0]);
487   stat++;
488
489   if (is_a6xx() && valid_header(stat[0])) {
490      if (pkt_is_type7(stat[0])) {
491         unsigned opc = cp_type7_opcode(stat[0]);
492         const char *name = pktname(opc);
493         if (name)
494            printf("\tPKT: %s\n", name);
495      } else {
496         /* Not sure if this case can happen: */
497      }
498   }
499
500   for (int i = 0; i < 16; i++) {
501      printf("\t$%02x: %08x\t\t$%02x: %08x\n", i + 1, stat[i], i + 16 + 1,
502             stat[i + 16]);
503   }
504}
505
506static void
507dump_control_regs(uint32_t *regs)
508{
509   if (!rnn_control)
510      return;
511
512   /* Control regs 0x100-0x17f are a scratch space to be used by the
513    * firmware however it wants, unlike lower regs which involve some
514    * fixed-function units. Therefore only these registers get dumped
515    * directly.
516    */
517   for (uint32_t i = 0; i < 0x80; i++) {
518      printf("\t%08x\t", regs[i]);
519      dump_register(rnn_control, i + 0x100, regs[i]);
520   }
521}
522
523static void
524dump_cp_ucode_dbg(uint32_t *dbg)
525{
526   /* Notes on the data:
527    * There seems to be a section every 4096 DWORD's. The sections aren't
528    * all the same size, so the rest of the 4096 DWORD's are filled with
529    * mirrors of the actual data.
530    */
531
532   for (int section = 0; section < 6; section++, dbg += 0x1000) {
533      switch (section) {
534      case 0:
535         /* Contains scattered data from a630_sqe.fw: */
536         printf("\tSQE instruction cache:\n");
537         dump_hex_ascii(dbg, 4 * 0x400, 1);
538         break;
539      case 1:
540         printf("\tUnknown 1:\n");
541         dump_hex_ascii(dbg, 4 * 0x80, 1);
542         break;
543      case 2:
544         printf("\tUnknown 2:\n");
545         dump_hex_ascii(dbg, 4 * 0x200, 1);
546         break;
547      case 3:
548         printf("\tUnknown 3:\n");
549         dump_hex_ascii(dbg, 4 * 0x80, 1);
550         break;
551      case 4:
552         /* Don't bother printing this normally */
553         if (verbose) {
554            printf("\tSQE packet jumptable contents:\n");
555            dump_hex_ascii(dbg, 4 * 0x80, 1);
556         }
557         break;
558      case 5:
559         printf("\tSQE scratch control regs:\n");
560         dump_control_regs(dbg);
561         break;
562      }
563   }
564}
565
566static void
567dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context,
568                        bool pipe)
569{
570   if (pipe) {
571      struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg);
572      printf("\t\twrite %s (%02x) pipe\n", info->name, reg);
573
574      if (!strcmp(info->typeinfo->name, "void")) {
575         /* registers that ignore their payload */
576      } else {
577         printf("\t\t\t");
578         dump_register(rnn_pipe, reg, data);
579      }
580   } else {
581      printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context);
582      dump_register_val(reg, data, 2);
583   }
584}
585
586static void
587dump_mem_pool_chunk(const uint32_t *chunk)
588{
589   struct __attribute__((packed)) {
590      bool reg0_enabled : 1;
591      bool reg1_enabled : 1;
592      uint32_t data0 : 32;
593      uint32_t data1 : 32;
594      uint32_t reg0 : 18;
595      uint32_t reg1 : 18;
596      bool reg0_pipe : 1;
597      bool reg1_pipe : 1;
598      uint32_t reg0_context : 1;
599      uint32_t reg1_context : 1;
600      uint32_t padding : 22;
601   } fields;
602
603   memcpy(&fields, chunk, 4 * sizeof(uint32_t));
604
605   if (fields.reg0_enabled) {
606      dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context,
607                              fields.reg0_pipe);
608   }
609
610   if (fields.reg1_enabled) {
611      dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context,
612                              fields.reg1_pipe);
613   }
614}
615
616static void
617dump_cp_mem_pool(uint32_t *mempool)
618{
619   /* The mem pool is a shared pool of memory used for storing in-flight
620    * register writes. There are 6 different queues, one for each
621    * cluster. Writing to $data (or for some special registers, $addr)
622    * pushes data onto the appropriate queue, and each queue is pulled
623    * from by the appropriate cluster. The queues are thus written to
624    * in-order, but may be read out-of-order.
625    *
626    * The queues are conceptually divided into 128-bit "chunks", and the
627    * read and write pointers are in units of chunks.  These chunks are
628    * organized internally into 8-chunk "blocks", and memory is allocated
629    * dynamically in terms of blocks. Each queue is represented as a
630    * singly-linked list of blocks, as well as 3-bit start/end chunk
631    * pointers that point within the first/last block.  The next pointers
632    * are located in a separate array, rather than inline.
633    */
634
635   /* TODO: The firmware CP_MEM_POOL save/restore routines do something
636    * like:
637    *
638    * cread $02, [ $00 + 0 ]
639    * and $02, $02, 0x118
640    * ...
641    * brne $02, 0, #label
642    * mov $03, 0x2000
643    * mov $03, 0x1000
644    * label:
645    * ...
646    *
647    * I think that control register 0 is the GPU version, and some
648    * versions have a smaller mem pool. It seems some models have a mem
649    * pool that's half the size, and a bunch of offsets are shifted
650    * accordingly. Unfortunately the kernel driver's dumping code doesn't
651    * seem to take this into account, even the downstream android driver,
652    * and we don't know which versions 0x8, 0x10, or 0x100 correspond
653    * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out?
654    */
655   bool small_mem_pool = false;
656
657   /* The array of next pointers for each block. */
658   const uint32_t *next_pointers =
659      small_mem_pool ? &mempool[0x800] : &mempool[0x1000];
660
661   /* Maximum number of blocks in the pool, also the size of the pointers
662    * array.
663    */
664   const int num_blocks = small_mem_pool ? 0x30 : 0x80;
665
666   /* Number of queues */
667   const unsigned num_queues = 6;
668
669   /* Unfortunately the per-queue state is a little more complicated than
670    * a simple pair of begin/end pointers. Instead of a single beginning
671    * block, there are *two*, with the property that either the two are
672    * equal or the second is the "next" of the first. Similarly there are
673    * two end blocks. Thus the queue either looks like this:
674    *
675    * A -> B -> ... -> C -> D
676    *
677    * Or like this, or some combination:
678    *
679    * A/B -> ... -> C/D
680    *
681    * However, there's only one beginning/end chunk offset. Now the
682    * question is, which of A or B is the actual start? I.e. is the chunk
683    * offset an offset inside A or B? It depends. I'll show a typical read
684    * cycle, starting here (read pointer marked with a *) with a chunk
685    * offset of 0:
686    *
687    *	  A                    B
688    *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
689    * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_|
690    *
691    * Once the pointer advances far enough, the hardware decides to free
692    * A, after which the read-side state looks like:
693    *
694    *	(free)                A/B
695    *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
696    * |_|_|_|_|_|_|_|_|    |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_|
697    *
698    * Then after advancing the pointer a bit more, the hardware fetches
699    * the "next" pointer for A and stores it in B:
700    *
701    *	(free)                 A                     B
702    *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
703    * |_|_|_|_|_|_|_|_|    |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_|
704    *
705    * Then the read pointer advances into B, at which point we've come
706    * back to the first state having advanced a whole block:
707    *
708    *	(free)                 A                     B
709    *  _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _      _ _ _ _ _ _ _ _
710    * |_|_|_|_|_|_|_|_|    |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_|
711    *
712    *
713    * There is a similar cycle for the write pointer. Now, the question
714    * is, how do we know which state we're in? We need to know this to
715    * know whether the pointer (*) is in A or B if they're different. It
716    * seems like there should be some bit somewhere describing this, but
717    * after lots of experimentation I've come up empty-handed. For now we
718    * assume that if the pointer is in the first half, then we're in
719    * either the first or second state and use B, and otherwise we're in
720    * the second or third state and use A. So far I haven't seen anything
721    * that violates this assumption.
722    */
723
724   struct {
725      uint32_t unk0;
726      uint32_t padding0[7]; /* Mirrors of unk0 */
727
728      struct {
729         uint32_t chunk : 3;
730         uint32_t first_block : 32 - 3;
731      } writer[6];
732      uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */
733
734      uint32_t unk1;
735      uint32_t padding2[7]; /* Mirrors of unk1 */
736
737      uint32_t writer_second_block[6];
738      uint32_t padding3[2];
739
740      uint32_t unk2[6];
741      uint32_t padding4[2];
742
743      struct {
744         uint32_t chunk : 3;
745         uint32_t first_block : 32 - 3;
746      } reader[6];
747      uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */
748
749      uint32_t unk3;
750      uint32_t padding6[7]; /* Mirrors of unk3 */
751
752      uint32_t reader_second_block[6];
753      uint32_t padding7[2];
754
755      uint32_t block_count[6];
756      uint32_t padding[2];
757
758      uint32_t unk4;
759      uint32_t padding9[7]; /* Mirrors of unk4 */
760   } data1;
761
762   const uint32_t *data1_ptr =
763      small_mem_pool ? &mempool[0xc00] : &mempool[0x1800];
764   memcpy(&data1, data1_ptr, sizeof(data1));
765
766   /* Based on the kernel, the first dword is the mem pool size (in
767    * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE.
768    */
769   const uint32_t *data2_ptr =
770      small_mem_pool ? &mempool[0x1000] : &mempool[0x2000];
771   const int data2_size = 0x60;
772
773   /* This seems to be the size of each queue in chunks. */
774   const uint32_t *queue_sizes = &data2_ptr[0x18];
775
776   printf("\tdata2:\n");
777   dump_hex_ascii(data2_ptr, 4 * data2_size, 1);
778
779   /* These seem to be some kind of counter of allocated/deallocated blocks */
780   if (verbose) {
781      printf("\tunk0: %x\n", data1.unk0);
782      printf("\tunk1: %x\n", data1.unk1);
783      printf("\tunk3: %x\n", data1.unk3);
784      printf("\tunk4: %x\n\n", data1.unk4);
785   }
786
787   for (int queue = 0; queue < num_queues; queue++) {
788      const char *cluster_names[6] = {"FE",   "SP_VS", "PC_VS",
789                                      "GRAS", "SP_PS", "PS"};
790      printf("\tCLUSTER_%s:\n\n", cluster_names[queue]);
791
792      if (verbose) {
793         printf("\t\twriter_first_block: 0x%x\n",
794                data1.writer[queue].first_block);
795         printf("\t\twriter_second_block: 0x%x\n",
796                data1.writer_second_block[queue]);
797         printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk);
798         printf("\t\treader_first_block: 0x%x\n",
799                data1.reader[queue].first_block);
800         printf("\t\treader_second_block: 0x%x\n",
801                data1.reader_second_block[queue]);
802         printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk);
803         printf("\t\tblock_count: %d\n", data1.block_count[queue]);
804         printf("\t\tunk2: 0x%x\n", data1.unk2[queue]);
805         printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]);
806      }
807
808      uint32_t cur_chunk = data1.reader[queue].chunk;
809      uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block
810                                         : data1.reader_second_block[queue];
811      uint32_t last_chunk = data1.writer[queue].chunk;
812      uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block
813                                           : data1.writer_second_block[queue];
814
815      if (verbose)
816         printf("\tblock %x\n", cur_block);
817      if (cur_block >= num_blocks) {
818         fprintf(stderr, "block %x too large\n", cur_block);
819         exit(1);
820      }
821      unsigned calculated_queue_size = 0;
822      while (cur_block != last_block || cur_chunk != last_chunk) {
823         calculated_queue_size++;
824         uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4];
825
826         dump_mem_pool_chunk(chunk_ptr);
827
828         printf("\t%05x: %08x %08x %08x %08x\n",
829                4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0],
830                chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]);
831
832         cur_chunk++;
833         if (cur_chunk == 8) {
834            cur_block = next_pointers[cur_block];
835            if (verbose)
836               printf("\tblock %x\n", cur_block);
837            if (cur_block >= num_blocks) {
838               fprintf(stderr, "block %x too large\n", cur_block);
839               exit(1);
840            }
841            cur_chunk = 0;
842         }
843      }
844      if (calculated_queue_size != queue_sizes[queue]) {
845         printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n",
846                calculated_queue_size);
847      }
848      printf("\n");
849   }
850}
851
852static void
853decode_indexed_registers(void)
854{
855   char *name = NULL;
856   uint32_t sizedwords = 0;
857
858   foreach_line_in_section (line) {
859      if (startswith(line, "  - regs-name:")) {
860         free(name);
861         parseline(line, "  - regs-name: %ms", &name);
862      } else if (startswith(line, "    dwords:")) {
863         parseline(line, "    dwords: %u", &sizedwords);
864      } else if (startswith(line, "    data: !!ascii85 |")) {
865         uint32_t *buf = popline_ascii85(sizedwords);
866
867         /* some of the sections are pretty large, and are (at least
868          * so far) not useful, so skip them if not in verbose mode:
869          */
870         bool dump = verbose || !strcmp(name, "CP_SQE_STAT") ||
871                     !strcmp(name, "CP_DRAW_STATE") ||
872                     !strcmp(name, "CP_ROQ") || 0;
873
874         if (!strcmp(name, "CP_SQE_STAT"))
875            dump_cp_sqe_stat(buf);
876
877         if (!strcmp(name, "CP_UCODE_DBG_DATA"))
878            dump_cp_ucode_dbg(buf);
879
880         if (!strcmp(name, "CP_MEMPOOL"))
881            dump_cp_mem_pool(buf);
882
883         if (dump)
884            dump_hex_ascii(buf, 4 * sizedwords, 1);
885
886         free(buf);
887
888         continue;
889      }
890
891      printf("%s", line);
892   }
893}
894
895/*
896 * Decode shader-blocks:
897 */
898
899static void
900decode_shader_blocks(void)
901{
902   char *type = NULL;
903   uint32_t sizedwords = 0;
904
905   foreach_line_in_section (line) {
906      if (startswith(line, "  - type:")) {
907         free(type);
908         parseline(line, "  - type: %ms", &type);
909      } else if (startswith(line, "      size:")) {
910         parseline(line, "      size: %u", &sizedwords);
911      } else if (startswith(line, "    data: !!ascii85 |")) {
912         uint32_t *buf = popline_ascii85(sizedwords);
913
914         /* some of the sections are pretty large, and are (at least
915          * so far) not useful, so skip them if not in verbose mode:
916          */
917         bool dump = verbose || !strcmp(type, "A6XX_SP_INST_DATA") ||
918                     !strcmp(type, "A6XX_HLSQ_INST_RAM") || 0;
919
920         if (!strcmp(type, "A6XX_SP_INST_DATA") ||
921             !strcmp(type, "A6XX_HLSQ_INST_RAM")) {
922            /* TODO this section actually contains multiple shaders
923             * (or parts of shaders?), so perhaps we should search
924             * for ends of shaders and decode each?
925             */
926            try_disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id);
927         }
928
929         if (dump)
930            dump_hex_ascii(buf, 4 * sizedwords, 1);
931
932         free(buf);
933
934         continue;
935      }
936
937      printf("%s", line);
938   }
939
940   free(type);
941}
942
943/*
944 * Decode debugbus section:
945 */
946
947static void
948decode_debugbus(void)
949{
950   char *block = NULL;
951   uint32_t sizedwords = 0;
952
953   foreach_line_in_section (line) {
954      if (startswith(line, "  - debugbus-block:")) {
955         free(block);
956         parseline(line, "  - debugbus-block: %ms", &block);
957      } else if (startswith(line, "    count:")) {
958         parseline(line, "    count: %u", &sizedwords);
959      } else if (startswith(line, "    data: !!ascii85 |")) {
960         uint32_t *buf = popline_ascii85(sizedwords);
961
962         /* some of the sections are pretty large, and are (at least
963          * so far) not useful, so skip them if not in verbose mode:
964          */
965         bool dump = verbose || 0;
966
967         if (dump)
968            dump_hex_ascii(buf, 4 * sizedwords, 1);
969
970         free(buf);
971
972         continue;
973      }
974
975      printf("%s", line);
976   }
977}
978
979/*
980 * Main crashdump decode loop:
981 */
982
983static void
984decode(void)
985{
986   const char *line;
987
988   while ((line = popline())) {
989      printf("%s", line);
990      if (startswith(line, "revision:")) {
991         parseline(line, "revision: %u", &options.gpu_id);
992         printf("Got gpu_id=%u\n", options.gpu_id);
993
994         cffdec_init(&options);
995
996         if (is_a6xx()) {
997            rnn_gmu = rnn_new(!options.color);
998            rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX");
999            rnn_control = rnn_new(!options.color);
1000            rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml",
1001                          "A6XX_CONTROL_REG");
1002            rnn_pipe = rnn_new(!options.color);
1003            rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml",
1004                          "A6XX_PIPE_REG");
1005         } else if (is_a5xx()) {
1006            rnn_control = rnn_new(!options.color);
1007            rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml",
1008                          "A5XX_CONTROL_REG");
1009         } else {
1010            rnn_control = NULL;
1011         }
1012      } else if (startswith(line, "bos:")) {
1013         decode_bos();
1014      } else if (startswith(line, "ringbuffer:")) {
1015         decode_ringbuffer();
1016      } else if (startswith(line, "registers:")) {
1017         decode_registers();
1018
1019         /* after we've recorded buffer contents, and CP register values,
1020          * we can take a stab at decoding the cmdstream:
1021          */
1022         dump_cmdstream();
1023      } else if (startswith(line, "registers-gmu:")) {
1024         decode_gmu_registers();
1025      } else if (startswith(line, "indexed-registers:")) {
1026         decode_indexed_registers();
1027      } else if (startswith(line, "shader-blocks:")) {
1028         decode_shader_blocks();
1029      } else if (startswith(line, "clusters:")) {
1030         decode_clusters();
1031      } else if (startswith(line, "debugbus:")) {
1032         decode_debugbus();
1033      }
1034   }
1035}
1036
1037/*
1038 * Usage and argument parsing:
1039 */
1040
1041static void
1042usage(void)
1043{
1044   /* clang-format off */
1045   fprintf(stderr, "Usage:\n\n"
1046           "\tcrashdec [-achmsv] [-f FILE]\n\n"
1047           "Options:\n"
1048           "\t-a, --allregs   - show all registers (including ones not written since\n"
1049           "\t                  previous draw) at each draw\n"
1050           "\t-c, --color     - use colors\n"
1051           "\t-f, --file=FILE - read input from specified file (rather than stdin)\n"
1052           "\t-h, --help      - this usage message\n"
1053           "\t-m, --markers   - try to decode CP_NOP string markers\n"
1054           "\t-s, --summary   - don't show individual register writes, but just show\n"
1055           "\t                  register values on draws\n"
1056           "\t-v, --verbose   - dump more verbose output, including contents of\n"
1057           "\t                  less interesting buffers\n"
1058           "\n"
1059   );
1060   /* clang-format on */
1061   exit(2);
1062}
1063
1064/* clang-format off */
1065static const struct option opts[] = {
1066      { .name = "allregs", .has_arg = 0, NULL, 'a' },
1067      { .name = "color",   .has_arg = 0, NULL, 'c' },
1068      { .name = "file",    .has_arg = 1, NULL, 'f' },
1069      { .name = "help",    .has_arg = 0, NULL, 'h' },
1070      { .name = "markers", .has_arg = 0, NULL, 'm' },
1071      { .name = "summary", .has_arg = 0, NULL, 's' },
1072      { .name = "verbose", .has_arg = 0, NULL, 'v' },
1073      {}
1074};
1075/* clang-format on */
1076
1077static bool interactive;
1078
1079static void
1080cleanup(void)
1081{
1082   fflush(stdout);
1083
1084   if (interactive) {
1085      pager_close();
1086   }
1087}
1088
1089int
1090main(int argc, char **argv)
1091{
1092   int c;
1093
1094   interactive = isatty(STDOUT_FILENO);
1095   options.color = interactive;
1096
1097   /* default to read from stdin: */
1098   in = stdin;
1099
1100   while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) {
1101      switch (c) {
1102      case 'a':
1103         options.allregs = true;
1104         break;
1105      case 'c':
1106         options.color = true;
1107         break;
1108      case 'f':
1109         in = fopen(optarg, "r");
1110         break;
1111      case 'm':
1112         options.decode_markers = true;
1113         break;
1114      case 's':
1115         options.summary = true;
1116         break;
1117      case 'v':
1118         verbose = true;
1119         break;
1120      case 'h':
1121      default:
1122         usage();
1123      }
1124   }
1125
1126   disasm_a3xx_set_debug(PRINT_RAW);
1127
1128   if (interactive) {
1129      pager_open();
1130   }
1131
1132   atexit(cleanup);
1133
1134   decode();
1135   cleanup();
1136}
1137