crashdec.c revision 7ec681f3
1/* 2 * Copyright © 2020 Google, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24/* 25 * Decoder for devcoredump traces from drm/msm. In case of a gpu crash/hang, 26 * the coredump should be found in: 27 * 28 * /sys/class/devcoredump/devcd<n>/data 29 * 30 * The crashdump will hang around for 5min, it can be cleared by writing to 31 * the file, ie: 32 * 33 * echo 1 > /sys/class/devcoredump/devcd<n>/data 34 * 35 * (the driver won't log any new crashdumps until the previous one is cleared 36 * or times out after 5min) 37 */ 38 39#include <assert.h> 40#include <getopt.h> 41#include <inttypes.h> 42#include <stdarg.h> 43#include <stdbool.h> 44#include <stdint.h> 45#include <stdio.h> 46#include <stdlib.h> 47#include <string.h> 48#include <unistd.h> 49 50#include "freedreno_pm4.h" 51 52#include "ir3/instr-a3xx.h" 53#include "buffers.h" 54#include "cffdec.h" 55#include "disasm.h" 56#include "pager.h" 57#include "rnnutil.h" 58#include "util.h" 59 60static FILE *in; 61static bool verbose; 62 63static struct rnn *rnn_gmu; 64static struct rnn *rnn_control; 65static struct rnn *rnn_pipe; 66 67static struct cffdec_options options = { 68 .draw_filter = -1, 69}; 70 71static inline bool 72is_a6xx(void) 73{ 74 return (600 <= options.gpu_id) && (options.gpu_id < 700); 75} 76static inline bool 77is_a5xx(void) 78{ 79 return (500 <= options.gpu_id) && (options.gpu_id < 600); 80} 81static inline bool 82is_64b(void) 83{ 84 return options.gpu_id >= 500; 85} 86 87/* 88 * Helpers to read register values: 89 */ 90 91/* read registers that are 64b on 64b GPUs (ie. a5xx+) */ 92static uint64_t 93regval64(const char *name) 94{ 95 unsigned reg = regbase(name); 96 assert(reg); 97 uint64_t val = reg_val(reg); 98 if (is_64b()) 99 val |= ((uint64_t)reg_val(reg + 1)) << 32; 100 return val; 101} 102 103static uint32_t 104regval(const char *name) 105{ 106 unsigned reg = regbase(name); 107 assert(reg); 108 return reg_val(reg); 109} 110 111/* 112 * Line reading and string helpers: 113 */ 114 115static char * 116replacestr(char *line, const char *find, const char *replace) 117{ 118 char *tail, *s; 119 120 if (!(s = strstr(line, find))) 121 return line; 122 123 tail = s + strlen(find); 124 125 char *newline; 126 asprintf(&newline, "%.*s%s%s", (int)(s - line), line, replace, tail); 127 free(line); 128 129 return newline; 130} 131 132static char *lastline; 133static char *pushedline; 134 135static const char * 136popline(void) 137{ 138 char *r = pushedline; 139 140 if (r) { 141 pushedline = NULL; 142 return r; 143 } 144 145 free(lastline); 146 147 size_t n = 0; 148 if (getline(&r, &n, in) < 0) 149 exit(0); 150 151 /* Handle section name typo's from earlier kernels: */ 152 r = replacestr(r, "CP_MEMPOOOL", "CP_MEMPOOL"); 153 r = replacestr(r, "CP_SEQ_STAT", "CP_SQE_STAT"); 154 155 lastline = r; 156 return r; 157} 158 159static void 160pushline(void) 161{ 162 assert(!pushedline); 163 pushedline = lastline; 164} 165 166static uint32_t * 167popline_ascii85(uint32_t sizedwords) 168{ 169 const char *line = popline(); 170 171 /* At this point we exepct the ascii85 data to be indented *some* 172 * amount, and to terminate at the end of the line. So just eat 173 * up the leading whitespace. 174 */ 175 assert(*line == ' '); 176 while (*line == ' ') 177 line++; 178 179 uint32_t *buf = calloc(1, 4 * sizedwords); 180 int idx = 0; 181 182 while (*line != '\n') { 183 if (*line == 'z') { 184 buf[idx++] = 0; 185 line++; 186 continue; 187 } 188 189 uint32_t accum = 0; 190 for (int i = 0; (i < 5) && (*line != '\n'); i++) { 191 accum *= 85; 192 accum += *line - '!'; 193 line++; 194 } 195 196 buf[idx++] = accum; 197 } 198 199 return buf; 200} 201 202static bool 203startswith(const char *line, const char *start) 204{ 205 return strstr(line, start) == line; 206} 207 208static void 209parseline(const char *line, const char *fmt, ...) 210{ 211 int fmtlen = strlen(fmt); 212 int n = 0; 213 int l = 0; 214 215 /* scan fmt string to extract expected # of conversions: */ 216 for (int i = 0; i < fmtlen; i++) { 217 if (fmt[i] == '%') { 218 if (i == (l - 1)) { /* prev char was %, ie. we have %% */ 219 n--; 220 l = 0; 221 } else { 222 n++; 223 l = i; 224 } 225 } 226 } 227 228 va_list ap; 229 va_start(ap, fmt); 230 if (vsscanf(line, fmt, ap) != n) { 231 fprintf(stderr, "parse error scanning: '%s'\n", fmt); 232 exit(1); 233 } 234 va_end(ap); 235} 236 237#define foreach_line_in_section(_line) \ 238 for (const char *_line = popline(); _line; _line = popline()) \ 239 /* check for start of next section */ \ 240 if (_line[0] != ' ') { \ 241 pushline(); \ 242 break; \ 243 } else 244 245/* 246 * Decode ringbuffer section: 247 */ 248 249static struct { 250 uint64_t iova; 251 uint32_t rptr; 252 uint32_t wptr; 253 uint32_t size; 254 uint32_t *buf; 255} ringbuffers[5]; 256 257static void 258decode_ringbuffer(void) 259{ 260 int id = 0; 261 262 foreach_line_in_section (line) { 263 if (startswith(line, " - id:")) { 264 parseline(line, " - id: %d", &id); 265 assert(id < ARRAY_SIZE(ringbuffers)); 266 } else if (startswith(line, " iova:")) { 267 parseline(line, " iova: %" PRIx64, &ringbuffers[id].iova); 268 } else if (startswith(line, " rptr:")) { 269 parseline(line, " rptr: %d", &ringbuffers[id].rptr); 270 } else if (startswith(line, " wptr:")) { 271 parseline(line, " wptr: %d", &ringbuffers[id].wptr); 272 } else if (startswith(line, " size:")) { 273 parseline(line, " size: %d", &ringbuffers[id].size); 274 } else if (startswith(line, " data: !!ascii85 |")) { 275 ringbuffers[id].buf = popline_ascii85(ringbuffers[id].size / 4); 276 add_buffer(ringbuffers[id].iova, ringbuffers[id].size, 277 ringbuffers[id].buf); 278 continue; 279 } 280 281 printf("%s", line); 282 } 283} 284 285static bool 286valid_header(uint32_t pkt) 287{ 288 if (options.gpu_id >= 500) { 289 return pkt_is_type4(pkt) || pkt_is_type7(pkt); 290 } else { 291 /* TODO maybe we can check validish looking pkt3 opc or pkt0 292 * register offset.. the cmds sent by kernel are usually 293 * fairly limited (other than initialization) which confines 294 * the search space a bit.. 295 */ 296 return true; 297 } 298} 299 300static void 301dump_cmdstream(void) 302{ 303 uint64_t rb_base = regval64("CP_RB_BASE"); 304 305 printf("got rb_base=%" PRIx64 "\n", rb_base); 306 307 options.ibs[1].base = regval64("CP_IB1_BASE"); 308 options.ibs[1].rem = regval("CP_IB1_REM_SIZE"); 309 options.ibs[2].base = regval64("CP_IB2_BASE"); 310 options.ibs[2].rem = regval("CP_IB2_REM_SIZE"); 311 312 /* Adjust remaining size to account for cmdstream slurped into ROQ 313 * but not yet consumed by SQE 314 * 315 * TODO add support for earlier GPUs once we tease out the needed 316 * registers.. see crashit.c in msmtest for hints. 317 * 318 * TODO it would be nice to be able to extract out register bitfields 319 * by name rather than hard-coding this. 320 */ 321 if (is_a6xx()) { 322 options.ibs[1].rem += regval("CP_CSQ_IB1_STAT") >> 16; 323 options.ibs[2].rem += regval("CP_CSQ_IB2_STAT") >> 16; 324 } 325 326 printf("IB1: %" PRIx64 ", %u\n", options.ibs[1].base, options.ibs[1].rem); 327 printf("IB2: %" PRIx64 ", %u\n", options.ibs[2].base, options.ibs[2].rem); 328 329 /* now that we've got the regvals we want, reset register state 330 * so we aren't seeing values from decode_registers(); 331 */ 332 reset_regs(); 333 334 for (int id = 0; id < ARRAY_SIZE(ringbuffers); id++) { 335 if (ringbuffers[id].iova != rb_base) 336 continue; 337 if (!ringbuffers[id].size) 338 continue; 339 340 printf("found ring!\n"); 341 342 /* The kernel level ringbuffer (RB) wraps around, which 343 * cffdec doesn't really deal with.. so figure out how 344 * many dwords are unread 345 */ 346 unsigned ringszdw = ringbuffers[id].size >> 2; /* in dwords */ 347 348 if (verbose) { 349 dump_commands(ringbuffers[id].buf, ringszdw, 0); 350 return; 351 } 352 353/* helper macro to deal with modulo size math: */ 354#define mod_add(b, v) ((ringszdw + (int)(b) + (int)(v)) % ringszdw) 355 356 /* The rptr will (most likely) have moved past the IB to 357 * userspace cmdstream, so back up a bit, and then advance 358 * until we find a valid start of a packet.. this is going 359 * to be less reliable on a4xx and before (pkt0/pkt3), 360 * compared to pkt4/pkt7 with parity bits 361 */ 362 const int lookback = 12; 363 unsigned rptr = mod_add(ringbuffers[id].rptr, -lookback); 364 365 for (int idx = 0; idx < lookback; idx++) { 366 if (valid_header(ringbuffers[id].buf[rptr])) 367 break; 368 rptr = mod_add(rptr, 1); 369 } 370 371 unsigned cmdszdw = mod_add(ringbuffers[id].wptr, -rptr); 372 373 printf("got cmdszdw=%d\n", cmdszdw); 374 uint32_t *buf = malloc(cmdszdw * 4); 375 376 for (int idx = 0; idx < cmdszdw; idx++) { 377 int p = mod_add(rptr, idx); 378 buf[idx] = ringbuffers[id].buf[p]; 379 } 380 381 dump_commands(buf, cmdszdw, 0); 382 free(buf); 383 } 384} 385 386/* 387 * Decode 'bos' (buffers) section: 388 */ 389 390static void 391decode_bos(void) 392{ 393 uint32_t size = 0; 394 uint64_t iova = 0; 395 396 foreach_line_in_section (line) { 397 if (startswith(line, " - iova:")) { 398 parseline(line, " - iova: %" PRIx64, &iova); 399 } else if (startswith(line, " size:")) { 400 parseline(line, " size: %u", &size); 401 } else if (startswith(line, " data: !!ascii85 |")) { 402 uint32_t *buf = popline_ascii85(size / 4); 403 404 if (verbose) 405 dump_hex_ascii(buf, size, 1); 406 407 add_buffer(iova, size, buf); 408 409 continue; 410 } 411 412 printf("%s", line); 413 } 414} 415 416/* 417 * Decode registers section: 418 */ 419 420static void 421dump_register(struct rnn *rnn, uint32_t offset, uint32_t value) 422{ 423 struct rnndecaddrinfo *info = rnn_reginfo(rnn, offset); 424 if (info && info->typeinfo) { 425 char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value); 426 printf("%s: %s\n", info->name, decoded); 427 } else if (info) { 428 printf("%s: %08x\n", info->name, value); 429 } else { 430 printf("<%04x>: %08x\n", offset, value); 431 } 432} 433 434static void 435decode_gmu_registers(void) 436{ 437 foreach_line_in_section (line) { 438 uint32_t offset, value; 439 parseline(line, " - { offset: %x, value: %x }", &offset, &value); 440 441 printf("\t%08x\t", value); 442 dump_register(rnn_gmu, offset / 4, value); 443 } 444} 445 446static void 447decode_registers(void) 448{ 449 foreach_line_in_section (line) { 450 uint32_t offset, value; 451 parseline(line, " - { offset: %x, value: %x }", &offset, &value); 452 453 reg_set(offset / 4, value); 454 printf("\t%08x", value); 455 dump_register_val(offset / 4, value, 0); 456 } 457} 458 459/* similar to registers section, but for banked context regs: */ 460static void 461decode_clusters(void) 462{ 463 foreach_line_in_section (line) { 464 if (startswith(line, " - cluster-name:") || 465 startswith(line, " - context:")) { 466 printf("%s", line); 467 continue; 468 } 469 470 uint32_t offset, value; 471 parseline(line, " - { offset: %x, value: %x }", &offset, &value); 472 473 printf("\t%08x", value); 474 dump_register_val(offset / 4, value, 0); 475 } 476} 477 478/* 479 * Decode indexed-registers.. these aren't like normal registers, but a 480 * sort of FIFO where successive reads pop out associated debug state. 481 */ 482 483static void 484dump_cp_sqe_stat(uint32_t *stat) 485{ 486 printf("\t PC: %04x\n", stat[0]); 487 stat++; 488 489 if (is_a6xx() && valid_header(stat[0])) { 490 if (pkt_is_type7(stat[0])) { 491 unsigned opc = cp_type7_opcode(stat[0]); 492 const char *name = pktname(opc); 493 if (name) 494 printf("\tPKT: %s\n", name); 495 } else { 496 /* Not sure if this case can happen: */ 497 } 498 } 499 500 for (int i = 0; i < 16; i++) { 501 printf("\t$%02x: %08x\t\t$%02x: %08x\n", i + 1, stat[i], i + 16 + 1, 502 stat[i + 16]); 503 } 504} 505 506static void 507dump_control_regs(uint32_t *regs) 508{ 509 if (!rnn_control) 510 return; 511 512 /* Control regs 0x100-0x17f are a scratch space to be used by the 513 * firmware however it wants, unlike lower regs which involve some 514 * fixed-function units. Therefore only these registers get dumped 515 * directly. 516 */ 517 for (uint32_t i = 0; i < 0x80; i++) { 518 printf("\t%08x\t", regs[i]); 519 dump_register(rnn_control, i + 0x100, regs[i]); 520 } 521} 522 523static void 524dump_cp_ucode_dbg(uint32_t *dbg) 525{ 526 /* Notes on the data: 527 * There seems to be a section every 4096 DWORD's. The sections aren't 528 * all the same size, so the rest of the 4096 DWORD's are filled with 529 * mirrors of the actual data. 530 */ 531 532 for (int section = 0; section < 6; section++, dbg += 0x1000) { 533 switch (section) { 534 case 0: 535 /* Contains scattered data from a630_sqe.fw: */ 536 printf("\tSQE instruction cache:\n"); 537 dump_hex_ascii(dbg, 4 * 0x400, 1); 538 break; 539 case 1: 540 printf("\tUnknown 1:\n"); 541 dump_hex_ascii(dbg, 4 * 0x80, 1); 542 break; 543 case 2: 544 printf("\tUnknown 2:\n"); 545 dump_hex_ascii(dbg, 4 * 0x200, 1); 546 break; 547 case 3: 548 printf("\tUnknown 3:\n"); 549 dump_hex_ascii(dbg, 4 * 0x80, 1); 550 break; 551 case 4: 552 /* Don't bother printing this normally */ 553 if (verbose) { 554 printf("\tSQE packet jumptable contents:\n"); 555 dump_hex_ascii(dbg, 4 * 0x80, 1); 556 } 557 break; 558 case 5: 559 printf("\tSQE scratch control regs:\n"); 560 dump_control_regs(dbg); 561 break; 562 } 563 } 564} 565 566static void 567dump_mem_pool_reg_write(unsigned reg, uint32_t data, unsigned context, 568 bool pipe) 569{ 570 if (pipe) { 571 struct rnndecaddrinfo *info = rnn_reginfo(rnn_pipe, reg); 572 printf("\t\twrite %s (%02x) pipe\n", info->name, reg); 573 574 if (!strcmp(info->typeinfo->name, "void")) { 575 /* registers that ignore their payload */ 576 } else { 577 printf("\t\t\t"); 578 dump_register(rnn_pipe, reg, data); 579 } 580 } else { 581 printf("\t\twrite %s (%05x) context %d\n", regname(reg, 1), reg, context); 582 dump_register_val(reg, data, 2); 583 } 584} 585 586static void 587dump_mem_pool_chunk(const uint32_t *chunk) 588{ 589 struct __attribute__((packed)) { 590 bool reg0_enabled : 1; 591 bool reg1_enabled : 1; 592 uint32_t data0 : 32; 593 uint32_t data1 : 32; 594 uint32_t reg0 : 18; 595 uint32_t reg1 : 18; 596 bool reg0_pipe : 1; 597 bool reg1_pipe : 1; 598 uint32_t reg0_context : 1; 599 uint32_t reg1_context : 1; 600 uint32_t padding : 22; 601 } fields; 602 603 memcpy(&fields, chunk, 4 * sizeof(uint32_t)); 604 605 if (fields.reg0_enabled) { 606 dump_mem_pool_reg_write(fields.reg0, fields.data0, fields.reg0_context, 607 fields.reg0_pipe); 608 } 609 610 if (fields.reg1_enabled) { 611 dump_mem_pool_reg_write(fields.reg1, fields.data1, fields.reg1_context, 612 fields.reg1_pipe); 613 } 614} 615 616static void 617dump_cp_mem_pool(uint32_t *mempool) 618{ 619 /* The mem pool is a shared pool of memory used for storing in-flight 620 * register writes. There are 6 different queues, one for each 621 * cluster. Writing to $data (or for some special registers, $addr) 622 * pushes data onto the appropriate queue, and each queue is pulled 623 * from by the appropriate cluster. The queues are thus written to 624 * in-order, but may be read out-of-order. 625 * 626 * The queues are conceptually divided into 128-bit "chunks", and the 627 * read and write pointers are in units of chunks. These chunks are 628 * organized internally into 8-chunk "blocks", and memory is allocated 629 * dynamically in terms of blocks. Each queue is represented as a 630 * singly-linked list of blocks, as well as 3-bit start/end chunk 631 * pointers that point within the first/last block. The next pointers 632 * are located in a separate array, rather than inline. 633 */ 634 635 /* TODO: The firmware CP_MEM_POOL save/restore routines do something 636 * like: 637 * 638 * cread $02, [ $00 + 0 ] 639 * and $02, $02, 0x118 640 * ... 641 * brne $02, 0, #label 642 * mov $03, 0x2000 643 * mov $03, 0x1000 644 * label: 645 * ... 646 * 647 * I think that control register 0 is the GPU version, and some 648 * versions have a smaller mem pool. It seems some models have a mem 649 * pool that's half the size, and a bunch of offsets are shifted 650 * accordingly. Unfortunately the kernel driver's dumping code doesn't 651 * seem to take this into account, even the downstream android driver, 652 * and we don't know which versions 0x8, 0x10, or 0x100 correspond 653 * to. Or maybe we can use CP_DBG_MEM_POOL_SIZE to figure this out? 654 */ 655 bool small_mem_pool = false; 656 657 /* The array of next pointers for each block. */ 658 const uint32_t *next_pointers = 659 small_mem_pool ? &mempool[0x800] : &mempool[0x1000]; 660 661 /* Maximum number of blocks in the pool, also the size of the pointers 662 * array. 663 */ 664 const int num_blocks = small_mem_pool ? 0x30 : 0x80; 665 666 /* Number of queues */ 667 const unsigned num_queues = 6; 668 669 /* Unfortunately the per-queue state is a little more complicated than 670 * a simple pair of begin/end pointers. Instead of a single beginning 671 * block, there are *two*, with the property that either the two are 672 * equal or the second is the "next" of the first. Similarly there are 673 * two end blocks. Thus the queue either looks like this: 674 * 675 * A -> B -> ... -> C -> D 676 * 677 * Or like this, or some combination: 678 * 679 * A/B -> ... -> C/D 680 * 681 * However, there's only one beginning/end chunk offset. Now the 682 * question is, which of A or B is the actual start? I.e. is the chunk 683 * offset an offset inside A or B? It depends. I'll show a typical read 684 * cycle, starting here (read pointer marked with a *) with a chunk 685 * offset of 0: 686 * 687 * A B 688 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 689 * |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| -> |_|_|_|_|_|_|_|_| 690 * 691 * Once the pointer advances far enough, the hardware decides to free 692 * A, after which the read-side state looks like: 693 * 694 * (free) A/B 695 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 696 * |_|_|_|_|_|_|_|_| |_|_|_|*|_|_|_|_| -> |_|_|_|_|_|_|_|_| 697 * 698 * Then after advancing the pointer a bit more, the hardware fetches 699 * the "next" pointer for A and stores it in B: 700 * 701 * (free) A B 702 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 703 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|*| -> |_|_|_|_|_|_|_|_| 704 * 705 * Then the read pointer advances into B, at which point we've come 706 * back to the first state having advanced a whole block: 707 * 708 * (free) A B 709 * _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ 710 * |_|_|_|_|_|_|_|_| |_|_|_|_|_|_|_|_| -> |*|_|_|_|_|_|_|_| 711 * 712 * 713 * There is a similar cycle for the write pointer. Now, the question 714 * is, how do we know which state we're in? We need to know this to 715 * know whether the pointer (*) is in A or B if they're different. It 716 * seems like there should be some bit somewhere describing this, but 717 * after lots of experimentation I've come up empty-handed. For now we 718 * assume that if the pointer is in the first half, then we're in 719 * either the first or second state and use B, and otherwise we're in 720 * the second or third state and use A. So far I haven't seen anything 721 * that violates this assumption. 722 */ 723 724 struct { 725 uint32_t unk0; 726 uint32_t padding0[7]; /* Mirrors of unk0 */ 727 728 struct { 729 uint32_t chunk : 3; 730 uint32_t first_block : 32 - 3; 731 } writer[6]; 732 uint32_t padding1[2]; /* Mirrors of writer[4], writer[5] */ 733 734 uint32_t unk1; 735 uint32_t padding2[7]; /* Mirrors of unk1 */ 736 737 uint32_t writer_second_block[6]; 738 uint32_t padding3[2]; 739 740 uint32_t unk2[6]; 741 uint32_t padding4[2]; 742 743 struct { 744 uint32_t chunk : 3; 745 uint32_t first_block : 32 - 3; 746 } reader[6]; 747 uint32_t padding5[2]; /* Mirrors of reader[4], reader[5] */ 748 749 uint32_t unk3; 750 uint32_t padding6[7]; /* Mirrors of unk3 */ 751 752 uint32_t reader_second_block[6]; 753 uint32_t padding7[2]; 754 755 uint32_t block_count[6]; 756 uint32_t padding[2]; 757 758 uint32_t unk4; 759 uint32_t padding9[7]; /* Mirrors of unk4 */ 760 } data1; 761 762 const uint32_t *data1_ptr = 763 small_mem_pool ? &mempool[0xc00] : &mempool[0x1800]; 764 memcpy(&data1, data1_ptr, sizeof(data1)); 765 766 /* Based on the kernel, the first dword is the mem pool size (in 767 * blocks?) and mirrors CP_MEM_POOL_DBG_SIZE. 768 */ 769 const uint32_t *data2_ptr = 770 small_mem_pool ? &mempool[0x1000] : &mempool[0x2000]; 771 const int data2_size = 0x60; 772 773 /* This seems to be the size of each queue in chunks. */ 774 const uint32_t *queue_sizes = &data2_ptr[0x18]; 775 776 printf("\tdata2:\n"); 777 dump_hex_ascii(data2_ptr, 4 * data2_size, 1); 778 779 /* These seem to be some kind of counter of allocated/deallocated blocks */ 780 if (verbose) { 781 printf("\tunk0: %x\n", data1.unk0); 782 printf("\tunk1: %x\n", data1.unk1); 783 printf("\tunk3: %x\n", data1.unk3); 784 printf("\tunk4: %x\n\n", data1.unk4); 785 } 786 787 for (int queue = 0; queue < num_queues; queue++) { 788 const char *cluster_names[6] = {"FE", "SP_VS", "PC_VS", 789 "GRAS", "SP_PS", "PS"}; 790 printf("\tCLUSTER_%s:\n\n", cluster_names[queue]); 791 792 if (verbose) { 793 printf("\t\twriter_first_block: 0x%x\n", 794 data1.writer[queue].first_block); 795 printf("\t\twriter_second_block: 0x%x\n", 796 data1.writer_second_block[queue]); 797 printf("\t\twriter_chunk: %d\n", data1.writer[queue].chunk); 798 printf("\t\treader_first_block: 0x%x\n", 799 data1.reader[queue].first_block); 800 printf("\t\treader_second_block: 0x%x\n", 801 data1.reader_second_block[queue]); 802 printf("\t\treader_chunk: %d\n", data1.reader[queue].chunk); 803 printf("\t\tblock_count: %d\n", data1.block_count[queue]); 804 printf("\t\tunk2: 0x%x\n", data1.unk2[queue]); 805 printf("\t\tqueue_size: %d\n\n", queue_sizes[queue]); 806 } 807 808 uint32_t cur_chunk = data1.reader[queue].chunk; 809 uint32_t cur_block = cur_chunk > 3 ? data1.reader[queue].first_block 810 : data1.reader_second_block[queue]; 811 uint32_t last_chunk = data1.writer[queue].chunk; 812 uint32_t last_block = last_chunk > 3 ? data1.writer[queue].first_block 813 : data1.writer_second_block[queue]; 814 815 if (verbose) 816 printf("\tblock %x\n", cur_block); 817 if (cur_block >= num_blocks) { 818 fprintf(stderr, "block %x too large\n", cur_block); 819 exit(1); 820 } 821 unsigned calculated_queue_size = 0; 822 while (cur_block != last_block || cur_chunk != last_chunk) { 823 calculated_queue_size++; 824 uint32_t *chunk_ptr = &mempool[cur_block * 0x20 + cur_chunk * 4]; 825 826 dump_mem_pool_chunk(chunk_ptr); 827 828 printf("\t%05x: %08x %08x %08x %08x\n", 829 4 * (cur_block * 0x20 + cur_chunk + 4), chunk_ptr[0], 830 chunk_ptr[1], chunk_ptr[2], chunk_ptr[3]); 831 832 cur_chunk++; 833 if (cur_chunk == 8) { 834 cur_block = next_pointers[cur_block]; 835 if (verbose) 836 printf("\tblock %x\n", cur_block); 837 if (cur_block >= num_blocks) { 838 fprintf(stderr, "block %x too large\n", cur_block); 839 exit(1); 840 } 841 cur_chunk = 0; 842 } 843 } 844 if (calculated_queue_size != queue_sizes[queue]) { 845 printf("\t\tCALCULATED SIZE %d DOES NOT MATCH!\n", 846 calculated_queue_size); 847 } 848 printf("\n"); 849 } 850} 851 852static void 853decode_indexed_registers(void) 854{ 855 char *name = NULL; 856 uint32_t sizedwords = 0; 857 858 foreach_line_in_section (line) { 859 if (startswith(line, " - regs-name:")) { 860 free(name); 861 parseline(line, " - regs-name: %ms", &name); 862 } else if (startswith(line, " dwords:")) { 863 parseline(line, " dwords: %u", &sizedwords); 864 } else if (startswith(line, " data: !!ascii85 |")) { 865 uint32_t *buf = popline_ascii85(sizedwords); 866 867 /* some of the sections are pretty large, and are (at least 868 * so far) not useful, so skip them if not in verbose mode: 869 */ 870 bool dump = verbose || !strcmp(name, "CP_SQE_STAT") || 871 !strcmp(name, "CP_DRAW_STATE") || 872 !strcmp(name, "CP_ROQ") || 0; 873 874 if (!strcmp(name, "CP_SQE_STAT")) 875 dump_cp_sqe_stat(buf); 876 877 if (!strcmp(name, "CP_UCODE_DBG_DATA")) 878 dump_cp_ucode_dbg(buf); 879 880 if (!strcmp(name, "CP_MEMPOOL")) 881 dump_cp_mem_pool(buf); 882 883 if (dump) 884 dump_hex_ascii(buf, 4 * sizedwords, 1); 885 886 free(buf); 887 888 continue; 889 } 890 891 printf("%s", line); 892 } 893} 894 895/* 896 * Decode shader-blocks: 897 */ 898 899static void 900decode_shader_blocks(void) 901{ 902 char *type = NULL; 903 uint32_t sizedwords = 0; 904 905 foreach_line_in_section (line) { 906 if (startswith(line, " - type:")) { 907 free(type); 908 parseline(line, " - type: %ms", &type); 909 } else if (startswith(line, " size:")) { 910 parseline(line, " size: %u", &sizedwords); 911 } else if (startswith(line, " data: !!ascii85 |")) { 912 uint32_t *buf = popline_ascii85(sizedwords); 913 914 /* some of the sections are pretty large, and are (at least 915 * so far) not useful, so skip them if not in verbose mode: 916 */ 917 bool dump = verbose || !strcmp(type, "A6XX_SP_INST_DATA") || 918 !strcmp(type, "A6XX_HLSQ_INST_RAM") || 0; 919 920 if (!strcmp(type, "A6XX_SP_INST_DATA") || 921 !strcmp(type, "A6XX_HLSQ_INST_RAM")) { 922 /* TODO this section actually contains multiple shaders 923 * (or parts of shaders?), so perhaps we should search 924 * for ends of shaders and decode each? 925 */ 926 try_disasm_a3xx(buf, sizedwords, 1, stdout, options.gpu_id); 927 } 928 929 if (dump) 930 dump_hex_ascii(buf, 4 * sizedwords, 1); 931 932 free(buf); 933 934 continue; 935 } 936 937 printf("%s", line); 938 } 939 940 free(type); 941} 942 943/* 944 * Decode debugbus section: 945 */ 946 947static void 948decode_debugbus(void) 949{ 950 char *block = NULL; 951 uint32_t sizedwords = 0; 952 953 foreach_line_in_section (line) { 954 if (startswith(line, " - debugbus-block:")) { 955 free(block); 956 parseline(line, " - debugbus-block: %ms", &block); 957 } else if (startswith(line, " count:")) { 958 parseline(line, " count: %u", &sizedwords); 959 } else if (startswith(line, " data: !!ascii85 |")) { 960 uint32_t *buf = popline_ascii85(sizedwords); 961 962 /* some of the sections are pretty large, and are (at least 963 * so far) not useful, so skip them if not in verbose mode: 964 */ 965 bool dump = verbose || 0; 966 967 if (dump) 968 dump_hex_ascii(buf, 4 * sizedwords, 1); 969 970 free(buf); 971 972 continue; 973 } 974 975 printf("%s", line); 976 } 977} 978 979/* 980 * Main crashdump decode loop: 981 */ 982 983static void 984decode(void) 985{ 986 const char *line; 987 988 while ((line = popline())) { 989 printf("%s", line); 990 if (startswith(line, "revision:")) { 991 parseline(line, "revision: %u", &options.gpu_id); 992 printf("Got gpu_id=%u\n", options.gpu_id); 993 994 cffdec_init(&options); 995 996 if (is_a6xx()) { 997 rnn_gmu = rnn_new(!options.color); 998 rnn_load_file(rnn_gmu, "adreno/a6xx_gmu.xml", "A6XX"); 999 rnn_control = rnn_new(!options.color); 1000 rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", 1001 "A6XX_CONTROL_REG"); 1002 rnn_pipe = rnn_new(!options.color); 1003 rnn_load_file(rnn_pipe, "adreno/adreno_pipe_regs.xml", 1004 "A6XX_PIPE_REG"); 1005 } else if (is_a5xx()) { 1006 rnn_control = rnn_new(!options.color); 1007 rnn_load_file(rnn_control, "adreno/adreno_control_regs.xml", 1008 "A5XX_CONTROL_REG"); 1009 } else { 1010 rnn_control = NULL; 1011 } 1012 } else if (startswith(line, "bos:")) { 1013 decode_bos(); 1014 } else if (startswith(line, "ringbuffer:")) { 1015 decode_ringbuffer(); 1016 } else if (startswith(line, "registers:")) { 1017 decode_registers(); 1018 1019 /* after we've recorded buffer contents, and CP register values, 1020 * we can take a stab at decoding the cmdstream: 1021 */ 1022 dump_cmdstream(); 1023 } else if (startswith(line, "registers-gmu:")) { 1024 decode_gmu_registers(); 1025 } else if (startswith(line, "indexed-registers:")) { 1026 decode_indexed_registers(); 1027 } else if (startswith(line, "shader-blocks:")) { 1028 decode_shader_blocks(); 1029 } else if (startswith(line, "clusters:")) { 1030 decode_clusters(); 1031 } else if (startswith(line, "debugbus:")) { 1032 decode_debugbus(); 1033 } 1034 } 1035} 1036 1037/* 1038 * Usage and argument parsing: 1039 */ 1040 1041static void 1042usage(void) 1043{ 1044 /* clang-format off */ 1045 fprintf(stderr, "Usage:\n\n" 1046 "\tcrashdec [-achmsv] [-f FILE]\n\n" 1047 "Options:\n" 1048 "\t-a, --allregs - show all registers (including ones not written since\n" 1049 "\t previous draw) at each draw\n" 1050 "\t-c, --color - use colors\n" 1051 "\t-f, --file=FILE - read input from specified file (rather than stdin)\n" 1052 "\t-h, --help - this usage message\n" 1053 "\t-m, --markers - try to decode CP_NOP string markers\n" 1054 "\t-s, --summary - don't show individual register writes, but just show\n" 1055 "\t register values on draws\n" 1056 "\t-v, --verbose - dump more verbose output, including contents of\n" 1057 "\t less interesting buffers\n" 1058 "\n" 1059 ); 1060 /* clang-format on */ 1061 exit(2); 1062} 1063 1064/* clang-format off */ 1065static const struct option opts[] = { 1066 { .name = "allregs", .has_arg = 0, NULL, 'a' }, 1067 { .name = "color", .has_arg = 0, NULL, 'c' }, 1068 { .name = "file", .has_arg = 1, NULL, 'f' }, 1069 { .name = "help", .has_arg = 0, NULL, 'h' }, 1070 { .name = "markers", .has_arg = 0, NULL, 'm' }, 1071 { .name = "summary", .has_arg = 0, NULL, 's' }, 1072 { .name = "verbose", .has_arg = 0, NULL, 'v' }, 1073 {} 1074}; 1075/* clang-format on */ 1076 1077static bool interactive; 1078 1079static void 1080cleanup(void) 1081{ 1082 fflush(stdout); 1083 1084 if (interactive) { 1085 pager_close(); 1086 } 1087} 1088 1089int 1090main(int argc, char **argv) 1091{ 1092 int c; 1093 1094 interactive = isatty(STDOUT_FILENO); 1095 options.color = interactive; 1096 1097 /* default to read from stdin: */ 1098 in = stdin; 1099 1100 while ((c = getopt_long(argc, argv, "acf:hmsv", opts, NULL)) != -1) { 1101 switch (c) { 1102 case 'a': 1103 options.allregs = true; 1104 break; 1105 case 'c': 1106 options.color = true; 1107 break; 1108 case 'f': 1109 in = fopen(optarg, "r"); 1110 break; 1111 case 'm': 1112 options.decode_markers = true; 1113 break; 1114 case 's': 1115 options.summary = true; 1116 break; 1117 case 'v': 1118 verbose = true; 1119 break; 1120 case 'h': 1121 default: 1122 usage(); 1123 } 1124 } 1125 1126 disasm_a3xx_set_debug(PRINT_RAW); 1127 1128 if (interactive) { 1129 pager_open(); 1130 } 1131 1132 atexit(cleanup); 1133 1134 decode(); 1135 cleanup(); 1136} 1137