1/* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * All Rights Reserved. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * on the rights to use, copy, modify, merge, publish, distribute, sub 9 * license, and/or sell copies of the Software, and to permit persons to whom 10 * the Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25#include "si_pipe.h" 26#include "si_compute.h" 27#include "sid.h" 28#include "gfx9d.h" 29#include "sid_tables.h" 30#include "driver_ddebug/dd_util.h" 31#include "util/u_dump.h" 32#include "util/u_log.h" 33#include "util/u_memory.h" 34#include "util/u_string.h" 35#include "ac_debug.h" 36 37static void si_dump_bo_list(struct si_context *sctx, 38 const struct radeon_saved_cs *saved, FILE *f); 39 40DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL) 41 42/** 43 * Store a linearized copy of all chunks of \p cs together with the buffer 44 * list in \p saved. 45 */ 46void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, 47 struct radeon_saved_cs *saved, bool get_buffer_list) 48{ 49 uint32_t *buf; 50 unsigned i; 51 52 /* Save the IB chunks. */ 53 saved->num_dw = cs->prev_dw + cs->current.cdw; 54 saved->ib = MALLOC(4 * saved->num_dw); 55 if (!saved->ib) 56 goto oom; 57 58 buf = saved->ib; 59 for (i = 0; i < cs->num_prev; ++i) { 60 memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4); 61 buf += cs->prev[i].cdw; 62 } 63 memcpy(buf, cs->current.buf, cs->current.cdw * 4); 64 65 if (!get_buffer_list) 66 return; 67 68 /* Save the buffer list. */ 69 saved->bo_count = ws->cs_get_buffer_list(cs, NULL); 70 saved->bo_list = CALLOC(saved->bo_count, 71 sizeof(saved->bo_list[0])); 72 if (!saved->bo_list) { 73 FREE(saved->ib); 74 goto oom; 75 } 76 ws->cs_get_buffer_list(cs, saved->bo_list); 77 78 return; 79 80oom: 81 fprintf(stderr, "%s: out of memory\n", __func__); 82 memset(saved, 0, sizeof(*saved)); 83} 84 85void si_clear_saved_cs(struct radeon_saved_cs *saved) 86{ 87 FREE(saved->ib); 88 FREE(saved->bo_list); 89 90 memset(saved, 0, sizeof(*saved)); 91} 92 93void si_destroy_saved_cs(struct si_saved_cs *scs) 94{ 95 si_clear_saved_cs(&scs->gfx); 96 si_resource_reference(&scs->trace_buf, NULL); 97 free(scs); 98} 99 100static void si_dump_shader(struct si_screen *sscreen, 101 enum pipe_shader_type processor, 102 const struct si_shader *shader, FILE *f) 103{ 104 if (shader->shader_log) 105 fwrite(shader->shader_log, shader->shader_log_size, 1, f); 106 else 107 si_shader_dump(sscreen, shader, NULL, processor, f, false); 108} 109 110struct si_log_chunk_shader { 111 /* The shader destroy code assumes a current context for unlinking of 112 * PM4 packets etc. 113 * 114 * While we should be able to destroy shaders without a context, doing 115 * so would happen only very rarely and be therefore likely to fail 116 * just when you're trying to debug something. Let's just remember the 117 * current context in the chunk. 118 */ 119 struct si_context *ctx; 120 struct si_shader *shader; 121 enum pipe_shader_type processor; 122 123 /* For keep-alive reference counts */ 124 struct si_shader_selector *sel; 125 struct si_compute *program; 126}; 127 128static void 129si_log_chunk_shader_destroy(void *data) 130{ 131 struct si_log_chunk_shader *chunk = data; 132 si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL); 133 si_compute_reference(&chunk->program, NULL); 134 FREE(chunk); 135} 136 137static void 138si_log_chunk_shader_print(void *data, FILE *f) 139{ 140 struct si_log_chunk_shader *chunk = data; 141 struct si_screen *sscreen = chunk->ctx->screen; 142 si_dump_shader(sscreen, chunk->processor, 143 chunk->shader, f); 144} 145 146static struct u_log_chunk_type si_log_chunk_type_shader = { 147 .destroy = si_log_chunk_shader_destroy, 148 .print = si_log_chunk_shader_print, 149}; 150 151static void si_dump_gfx_shader(struct si_context *ctx, 152 const struct si_shader_ctx_state *state, 153 struct u_log_context *log) 154{ 155 struct si_shader *current = state->current; 156 157 if (!state->cso || !current) 158 return; 159 160 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); 161 chunk->ctx = ctx; 162 chunk->processor = state->cso->info.processor; 163 chunk->shader = current; 164 si_shader_selector_reference(ctx, &chunk->sel, current->selector); 165 u_log_chunk(log, &si_log_chunk_type_shader, chunk); 166} 167 168static void si_dump_compute_shader(struct si_context *ctx, 169 struct u_log_context *log) 170{ 171 const struct si_cs_shader_state *state = &ctx->cs_shader_state; 172 173 if (!state->program) 174 return; 175 176 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); 177 chunk->ctx = ctx; 178 chunk->processor = PIPE_SHADER_COMPUTE; 179 chunk->shader = &state->program->shader; 180 si_compute_reference(&chunk->program, state->program); 181 u_log_chunk(log, &si_log_chunk_type_shader, chunk); 182} 183 184/** 185 * Shader compiles can be overridden with arbitrary ELF objects by setting 186 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2] 187 */ 188bool si_replace_shader(unsigned num, struct ac_shader_binary *binary) 189{ 190 const char *p = debug_get_option_replace_shaders(); 191 const char *semicolon; 192 char *copy = NULL; 193 FILE *f; 194 long filesize, nread; 195 char *buf = NULL; 196 bool replaced = false; 197 198 if (!p) 199 return false; 200 201 while (*p) { 202 unsigned long i; 203 char *endp; 204 i = strtoul(p, &endp, 0); 205 206 p = endp; 207 if (*p != ':') { 208 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n"); 209 exit(1); 210 } 211 ++p; 212 213 if (i == num) 214 break; 215 216 p = strchr(p, ';'); 217 if (!p) 218 return false; 219 ++p; 220 } 221 if (!*p) 222 return false; 223 224 semicolon = strchr(p, ';'); 225 if (semicolon) { 226 p = copy = strndup(p, semicolon - p); 227 if (!copy) { 228 fprintf(stderr, "out of memory\n"); 229 return false; 230 } 231 } 232 233 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p); 234 235 f = fopen(p, "r"); 236 if (!f) { 237 perror("radeonsi: failed to open file"); 238 goto out_free; 239 } 240 241 if (fseek(f, 0, SEEK_END) != 0) 242 goto file_error; 243 244 filesize = ftell(f); 245 if (filesize < 0) 246 goto file_error; 247 248 if (fseek(f, 0, SEEK_SET) != 0) 249 goto file_error; 250 251 buf = MALLOC(filesize); 252 if (!buf) { 253 fprintf(stderr, "out of memory\n"); 254 goto out_close; 255 } 256 257 nread = fread(buf, 1, filesize, f); 258 if (nread != filesize) 259 goto file_error; 260 261 ac_elf_read(buf, filesize, binary); 262 replaced = true; 263 264out_close: 265 fclose(f); 266out_free: 267 FREE(buf); 268 free(copy); 269 return replaced; 270 271file_error: 272 perror("radeonsi: reading shader"); 273 goto out_close; 274} 275 276/* Parsed IBs are difficult to read without colors. Use "less -R file" to 277 * read them, or use "aha -b -f file" to convert them to html. 278 */ 279#define COLOR_RESET "\033[0m" 280#define COLOR_RED "\033[31m" 281#define COLOR_GREEN "\033[1;32m" 282#define COLOR_YELLOW "\033[1;33m" 283#define COLOR_CYAN "\033[1;36m" 284 285static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f, 286 unsigned offset) 287{ 288 struct radeon_winsys *ws = sctx->ws; 289 uint32_t value; 290 291 if (ws->read_registers(ws, offset, 1, &value)) 292 ac_dump_reg(f, sctx->chip_class, offset, value, ~0); 293} 294 295static void si_dump_debug_registers(struct si_context *sctx, FILE *f) 296{ 297 if (!sctx->screen->info.has_read_registers_query) 298 return; 299 300 fprintf(f, "Memory-mapped registers:\n"); 301 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS); 302 303 /* No other registers can be read on DRM < 3.1.0. */ 304 if (sctx->screen->info.drm_major < 3 || 305 sctx->screen->info.drm_minor < 1) { 306 fprintf(f, "\n"); 307 return; 308 } 309 310 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2); 311 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0); 312 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1); 313 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2); 314 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3); 315 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG); 316 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG); 317 if (sctx->chip_class <= VI) { 318 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS); 319 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2); 320 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3); 321 } 322 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT); 323 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1); 324 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2); 325 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3); 326 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS); 327 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT); 328 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1); 329 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS); 330 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT); 331 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1); 332 fprintf(f, "\n"); 333} 334 335struct si_log_chunk_cs { 336 struct si_context *ctx; 337 struct si_saved_cs *cs; 338 bool dump_bo_list; 339 unsigned gfx_begin, gfx_end; 340}; 341 342static void si_log_chunk_type_cs_destroy(void *data) 343{ 344 struct si_log_chunk_cs *chunk = data; 345 si_saved_cs_reference(&chunk->cs, NULL); 346 free(chunk); 347} 348 349static void si_parse_current_ib(FILE *f, struct radeon_cmdbuf *cs, 350 unsigned begin, unsigned end, 351 int *last_trace_id, unsigned trace_id_count, 352 const char *name, enum chip_class chip_class) 353{ 354 unsigned orig_end = end; 355 356 assert(begin <= end); 357 358 fprintf(f, "------------------ %s begin (dw = %u) ------------------\n", 359 name, begin); 360 361 for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) { 362 struct radeon_cmdbuf_chunk *chunk = &cs->prev[prev_idx]; 363 364 if (begin < chunk->cdw) { 365 ac_parse_ib_chunk(f, chunk->buf + begin, 366 MIN2(end, chunk->cdw) - begin, 367 last_trace_id, trace_id_count, 368 chip_class, NULL, NULL); 369 } 370 371 if (end <= chunk->cdw) 372 return; 373 374 if (begin < chunk->cdw) 375 fprintf(f, "\n---------- Next %s Chunk ----------\n\n", 376 name); 377 378 begin -= MIN2(begin, chunk->cdw); 379 end -= chunk->cdw; 380 } 381 382 assert(end <= cs->current.cdw); 383 384 ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id, 385 trace_id_count, chip_class, NULL, NULL); 386 387 fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n", 388 name, orig_end); 389} 390 391static void si_log_chunk_type_cs_print(void *data, FILE *f) 392{ 393 struct si_log_chunk_cs *chunk = data; 394 struct si_context *ctx = chunk->ctx; 395 struct si_saved_cs *scs = chunk->cs; 396 int last_trace_id = -1; 397 398 /* We are expecting that the ddebug pipe has already 399 * waited for the context, so this buffer should be idle. 400 * If the GPU is hung, there is no point in waiting for it. 401 */ 402 uint32_t *map = ctx->ws->buffer_map(scs->trace_buf->buf, 403 NULL, 404 PIPE_TRANSFER_UNSYNCHRONIZED | 405 PIPE_TRANSFER_READ); 406 if (map) 407 last_trace_id = map[0]; 408 409 if (chunk->gfx_end != chunk->gfx_begin) { 410 if (chunk->gfx_begin == 0) { 411 if (ctx->init_config) 412 ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw, 413 NULL, 0, "IB2: Init config", ctx->chip_class, 414 NULL, NULL); 415 416 if (ctx->init_config_gs_rings) 417 ac_parse_ib(f, ctx->init_config_gs_rings->pm4, 418 ctx->init_config_gs_rings->ndw, 419 NULL, 0, "IB2: Init GS rings", ctx->chip_class, 420 NULL, NULL); 421 } 422 423 if (scs->flushed) { 424 ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin, 425 chunk->gfx_end - chunk->gfx_begin, 426 &last_trace_id, map ? 1 : 0, "IB", ctx->chip_class, 427 NULL, NULL); 428 } else { 429 si_parse_current_ib(f, ctx->gfx_cs, chunk->gfx_begin, 430 chunk->gfx_end, &last_trace_id, map ? 1 : 0, 431 "IB", ctx->chip_class); 432 } 433 } 434 435 if (chunk->dump_bo_list) { 436 fprintf(f, "Flushing. Time: "); 437 util_dump_ns(f, scs->time_flush); 438 fprintf(f, "\n\n"); 439 si_dump_bo_list(ctx, &scs->gfx, f); 440 } 441} 442 443static const struct u_log_chunk_type si_log_chunk_type_cs = { 444 .destroy = si_log_chunk_type_cs_destroy, 445 .print = si_log_chunk_type_cs_print, 446}; 447 448static void si_log_cs(struct si_context *ctx, struct u_log_context *log, 449 bool dump_bo_list) 450{ 451 assert(ctx->current_saved_cs); 452 453 struct si_saved_cs *scs = ctx->current_saved_cs; 454 unsigned gfx_cur = ctx->gfx_cs->prev_dw + ctx->gfx_cs->current.cdw; 455 456 if (!dump_bo_list && 457 gfx_cur == scs->gfx_last_dw) 458 return; 459 460 struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk)); 461 462 chunk->ctx = ctx; 463 si_saved_cs_reference(&chunk->cs, scs); 464 chunk->dump_bo_list = dump_bo_list; 465 466 chunk->gfx_begin = scs->gfx_last_dw; 467 chunk->gfx_end = gfx_cur; 468 scs->gfx_last_dw = gfx_cur; 469 470 u_log_chunk(log, &si_log_chunk_type_cs, chunk); 471} 472 473void si_auto_log_cs(void *data, struct u_log_context *log) 474{ 475 struct si_context *ctx = (struct si_context *)data; 476 si_log_cs(ctx, log, false); 477} 478 479void si_log_hw_flush(struct si_context *sctx) 480{ 481 if (!sctx->log) 482 return; 483 484 si_log_cs(sctx, sctx->log, true); 485 486 if (&sctx->b == sctx->screen->aux_context) { 487 /* The aux context isn't captured by the ddebug wrapper, 488 * so we dump it on a flush-by-flush basis here. 489 */ 490 FILE *f = dd_get_debug_file(false); 491 if (!f) { 492 fprintf(stderr, "radeonsi: error opening aux context dump file.\n"); 493 } else { 494 dd_write_header(f, &sctx->screen->b, 0); 495 496 fprintf(f, "Aux context dump:\n\n"); 497 u_log_new_page_print(sctx->log, f); 498 499 fclose(f); 500 } 501 } 502} 503 504static const char *priority_to_string(enum radeon_bo_priority priority) 505{ 506#define ITEM(x) [RADEON_PRIO_##x] = #x 507 static const char *table[64] = { 508 ITEM(FENCE), 509 ITEM(TRACE), 510 ITEM(SO_FILLED_SIZE), 511 ITEM(QUERY), 512 ITEM(IB1), 513 ITEM(IB2), 514 ITEM(DRAW_INDIRECT), 515 ITEM(INDEX_BUFFER), 516 ITEM(CP_DMA), 517 ITEM(CONST_BUFFER), 518 ITEM(DESCRIPTORS), 519 ITEM(BORDER_COLORS), 520 ITEM(SAMPLER_BUFFER), 521 ITEM(VERTEX_BUFFER), 522 ITEM(SHADER_RW_BUFFER), 523 ITEM(COMPUTE_GLOBAL), 524 ITEM(SAMPLER_TEXTURE), 525 ITEM(SHADER_RW_IMAGE), 526 ITEM(SAMPLER_TEXTURE_MSAA), 527 ITEM(COLOR_BUFFER), 528 ITEM(DEPTH_BUFFER), 529 ITEM(COLOR_BUFFER_MSAA), 530 ITEM(DEPTH_BUFFER_MSAA), 531 ITEM(SEPARATE_META), 532 ITEM(SHADER_BINARY), 533 ITEM(SHADER_RINGS), 534 ITEM(SCRATCH_BUFFER), 535 }; 536#undef ITEM 537 538 assert(priority < ARRAY_SIZE(table)); 539 return table[priority]; 540} 541 542static int bo_list_compare_va(const struct radeon_bo_list_item *a, 543 const struct radeon_bo_list_item *b) 544{ 545 return a->vm_address < b->vm_address ? -1 : 546 a->vm_address > b->vm_address ? 1 : 0; 547} 548 549static void si_dump_bo_list(struct si_context *sctx, 550 const struct radeon_saved_cs *saved, FILE *f) 551{ 552 unsigned i,j; 553 554 if (!saved->bo_list) 555 return; 556 557 /* Sort the list according to VM adddresses first. */ 558 qsort(saved->bo_list, saved->bo_count, 559 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va); 560 561 fprintf(f, "Buffer list (in units of pages = 4kB):\n" 562 COLOR_YELLOW " Size VM start page " 563 "VM end page Usage" COLOR_RESET "\n"); 564 565 for (i = 0; i < saved->bo_count; i++) { 566 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */ 567 const unsigned page_size = sctx->screen->info.gart_page_size; 568 uint64_t va = saved->bo_list[i].vm_address; 569 uint64_t size = saved->bo_list[i].bo_size; 570 bool hit = false; 571 572 /* If there's unused virtual memory between 2 buffers, print it. */ 573 if (i) { 574 uint64_t previous_va_end = saved->bo_list[i-1].vm_address + 575 saved->bo_list[i-1].bo_size; 576 577 if (va > previous_va_end) { 578 fprintf(f, " %10"PRIu64" -- hole --\n", 579 (va - previous_va_end) / page_size); 580 } 581 } 582 583 /* Print the buffer. */ 584 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ", 585 size / page_size, va / page_size, (va + size) / page_size); 586 587 /* Print the usage. */ 588 for (j = 0; j < 32; j++) { 589 if (!(saved->bo_list[i].priority_usage & (1u << j))) 590 continue; 591 592 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j)); 593 hit = true; 594 } 595 fprintf(f, "\n"); 596 } 597 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n" 598 " Other buffers can still be allocated there.\n\n"); 599} 600 601static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log) 602{ 603 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 604 struct si_texture *tex; 605 int i; 606 607 for (i = 0; i < state->nr_cbufs; i++) { 608 if (!state->cbufs[i]) 609 continue; 610 611 tex = (struct si_texture*)state->cbufs[i]->texture; 612 u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i); 613 si_print_texture_info(sctx->screen, tex, log); 614 u_log_printf(log, "\n"); 615 } 616 617 if (state->zsbuf) { 618 tex = (struct si_texture*)state->zsbuf->texture; 619 u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n"); 620 si_print_texture_info(sctx->screen, tex, log); 621 u_log_printf(log, "\n"); 622 } 623} 624 625typedef unsigned (*slot_remap_func)(unsigned); 626 627struct si_log_chunk_desc_list { 628 /** Pointer to memory map of buffer where the list is uploader */ 629 uint32_t *gpu_list; 630 /** Reference of buffer where the list is uploaded, so that gpu_list 631 * is kept live. */ 632 struct si_resource *buf; 633 634 const char *shader_name; 635 const char *elem_name; 636 slot_remap_func slot_remap; 637 enum chip_class chip_class; 638 unsigned element_dw_size; 639 unsigned num_elements; 640 641 uint32_t list[0]; 642}; 643 644static void 645si_log_chunk_desc_list_destroy(void *data) 646{ 647 struct si_log_chunk_desc_list *chunk = data; 648 si_resource_reference(&chunk->buf, NULL); 649 FREE(chunk); 650} 651 652static void 653si_log_chunk_desc_list_print(void *data, FILE *f) 654{ 655 struct si_log_chunk_desc_list *chunk = data; 656 657 for (unsigned i = 0; i < chunk->num_elements; i++) { 658 unsigned cpu_dw_offset = i * chunk->element_dw_size; 659 unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size; 660 const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list"; 661 uint32_t *cpu_list = chunk->list + cpu_dw_offset; 662 uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list; 663 664 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n", 665 chunk->shader_name, chunk->elem_name, i, list_note); 666 667 switch (chunk->element_dw_size) { 668 case 4: 669 for (unsigned j = 0; j < 4; j++) 670 ac_dump_reg(f, chunk->chip_class, 671 R_008F00_SQ_BUF_RSRC_WORD0 + j*4, 672 gpu_list[j], 0xffffffff); 673 break; 674 case 8: 675 for (unsigned j = 0; j < 8; j++) 676 ac_dump_reg(f, chunk->chip_class, 677 R_008F10_SQ_IMG_RSRC_WORD0 + j*4, 678 gpu_list[j], 0xffffffff); 679 680 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 681 for (unsigned j = 0; j < 4; j++) 682 ac_dump_reg(f, chunk->chip_class, 683 R_008F00_SQ_BUF_RSRC_WORD0 + j*4, 684 gpu_list[4+j], 0xffffffff); 685 break; 686 case 16: 687 for (unsigned j = 0; j < 8; j++) 688 ac_dump_reg(f, chunk->chip_class, 689 R_008F10_SQ_IMG_RSRC_WORD0 + j*4, 690 gpu_list[j], 0xffffffff); 691 692 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 693 for (unsigned j = 0; j < 4; j++) 694 ac_dump_reg(f, chunk->chip_class, 695 R_008F00_SQ_BUF_RSRC_WORD0 + j*4, 696 gpu_list[4+j], 0xffffffff); 697 698 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); 699 for (unsigned j = 0; j < 8; j++) 700 ac_dump_reg(f, chunk->chip_class, 701 R_008F10_SQ_IMG_RSRC_WORD0 + j*4, 702 gpu_list[8+j], 0xffffffff); 703 704 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); 705 for (unsigned j = 0; j < 4; j++) 706 ac_dump_reg(f, chunk->chip_class, 707 R_008F30_SQ_IMG_SAMP_WORD0 + j*4, 708 gpu_list[12+j], 0xffffffff); 709 break; 710 } 711 712 if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) { 713 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!" 714 COLOR_RESET "\n"); 715 } 716 717 fprintf(f, "\n"); 718 } 719 720} 721 722static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = { 723 .destroy = si_log_chunk_desc_list_destroy, 724 .print = si_log_chunk_desc_list_print, 725}; 726 727static void si_dump_descriptor_list(struct si_screen *screen, 728 struct si_descriptors *desc, 729 const char *shader_name, 730 const char *elem_name, 731 unsigned element_dw_size, 732 unsigned num_elements, 733 slot_remap_func slot_remap, 734 struct u_log_context *log) 735{ 736 if (!desc->list) 737 return; 738 739 /* In some cases, the caller doesn't know how many elements are really 740 * uploaded. Reduce num_elements to fit in the range of active slots. */ 741 unsigned active_range_dw_begin = 742 desc->first_active_slot * desc->element_dw_size; 743 unsigned active_range_dw_end = 744 active_range_dw_begin + desc->num_active_slots * desc->element_dw_size; 745 746 while (num_elements > 0) { 747 int i = slot_remap(num_elements - 1); 748 unsigned dw_begin = i * element_dw_size; 749 unsigned dw_end = dw_begin + element_dw_size; 750 751 if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end) 752 break; 753 754 num_elements--; 755 } 756 757 struct si_log_chunk_desc_list *chunk = 758 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list, 759 4 * element_dw_size * num_elements); 760 chunk->shader_name = shader_name; 761 chunk->elem_name = elem_name; 762 chunk->element_dw_size = element_dw_size; 763 chunk->num_elements = num_elements; 764 chunk->slot_remap = slot_remap; 765 chunk->chip_class = screen->info.chip_class; 766 767 si_resource_reference(&chunk->buf, desc->buffer); 768 chunk->gpu_list = desc->gpu_list; 769 770 for (unsigned i = 0; i < num_elements; ++i) { 771 memcpy(&chunk->list[i * element_dw_size], 772 &desc->list[slot_remap(i) * element_dw_size], 773 4 * element_dw_size); 774 } 775 776 u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk); 777} 778 779static unsigned si_identity(unsigned slot) 780{ 781 return slot; 782} 783 784static void si_dump_descriptors(struct si_context *sctx, 785 enum pipe_shader_type processor, 786 const struct tgsi_shader_info *info, 787 struct u_log_context *log) 788{ 789 struct si_descriptors *descs = 790 &sctx->descriptors[SI_DESCS_FIRST_SHADER + 791 processor * SI_NUM_SHADER_DESCS]; 792 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"}; 793 const char *name = shader_name[processor]; 794 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers; 795 unsigned enabled_images; 796 797 if (info) { 798 enabled_constbuf = info->const_buffers_declared; 799 enabled_shaderbuf = info->shader_buffers_declared; 800 enabled_samplers = info->samplers_declared; 801 enabled_images = info->images_declared; 802 } else { 803 enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >> 804 SI_NUM_SHADER_BUFFERS; 805 enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask & 806 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); 807 enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >> 808 (32 - SI_NUM_SHADER_BUFFERS); 809 enabled_samplers = sctx->samplers[processor].enabled_mask; 810 enabled_images = sctx->images[processor].enabled_mask; 811 } 812 813 if (processor == PIPE_SHADER_VERTEX && 814 sctx->vb_descriptors_buffer && 815 sctx->vb_descriptors_gpu_list && 816 sctx->vertex_elements) { 817 assert(info); /* only CS may not have an info struct */ 818 struct si_descriptors desc = {}; 819 820 desc.buffer = sctx->vb_descriptors_buffer; 821 desc.list = sctx->vb_descriptors_gpu_list; 822 desc.gpu_list = sctx->vb_descriptors_gpu_list; 823 desc.element_dw_size = 4; 824 desc.num_active_slots = sctx->vertex_elements->desc_list_byte_size / 16; 825 826 si_dump_descriptor_list(sctx->screen, &desc, name, 827 " - Vertex buffer", 4, info->num_inputs, 828 si_identity, log); 829 } 830 831 si_dump_descriptor_list(sctx->screen, 832 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], 833 name, " - Constant buffer", 4, 834 util_last_bit(enabled_constbuf), 835 si_get_constbuf_slot, log); 836 si_dump_descriptor_list(sctx->screen, 837 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], 838 name, " - Shader buffer", 4, 839 util_last_bit(enabled_shaderbuf), 840 si_get_shaderbuf_slot, log); 841 si_dump_descriptor_list(sctx->screen, 842 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], 843 name, " - Sampler", 16, 844 util_last_bit(enabled_samplers), 845 si_get_sampler_slot, log); 846 si_dump_descriptor_list(sctx->screen, 847 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], 848 name, " - Image", 8, 849 util_last_bit(enabled_images), 850 si_get_image_slot, log); 851} 852 853static void si_dump_gfx_descriptors(struct si_context *sctx, 854 const struct si_shader_ctx_state *state, 855 struct u_log_context *log) 856{ 857 if (!state->cso || !state->current) 858 return; 859 860 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log); 861} 862 863static void si_dump_compute_descriptors(struct si_context *sctx, 864 struct u_log_context *log) 865{ 866 if (!sctx->cs_shader_state.program) 867 return; 868 869 si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log); 870} 871 872struct si_shader_inst { 873 const char *text; /* start of disassembly for this instruction */ 874 unsigned textlen; 875 unsigned size; /* instruction size = 4 or 8 */ 876 uint64_t addr; /* instruction address */ 877}; 878 879/** 880 * Split a disassembly string into instructions and add them to the array 881 * pointed to by \p instructions. 882 * 883 * Labels are considered to be part of the following instruction. 884 */ 885static void si_add_split_disasm(const char *disasm, 886 uint64_t *addr, 887 unsigned *num, 888 struct si_shader_inst *instructions) 889{ 890 const char *semicolon; 891 892 while ((semicolon = strchr(disasm, ';'))) { 893 struct si_shader_inst *inst = &instructions[(*num)++]; 894 const char *end = util_strchrnul(semicolon, '\n'); 895 896 inst->text = disasm; 897 inst->textlen = end - disasm; 898 899 inst->addr = *addr; 900 /* More than 16 chars after ";" means the instruction is 8 bytes long. */ 901 inst->size = end - semicolon > 16 ? 8 : 4; 902 *addr += inst->size; 903 904 if (!(*end)) 905 break; 906 disasm = end + 1; 907 } 908} 909 910/* If the shader is being executed, print its asm instructions, and annotate 911 * those that are being executed right now with information about waves that 912 * execute them. This is most useful during a GPU hang. 913 */ 914static void si_print_annotated_shader(struct si_shader *shader, 915 struct ac_wave_info *waves, 916 unsigned num_waves, 917 FILE *f) 918{ 919 if (!shader || !shader->binary.disasm_string) 920 return; 921 922 uint64_t start_addr = shader->bo->gpu_address; 923 uint64_t end_addr = start_addr + shader->bo->b.b.width0; 924 unsigned i; 925 926 /* See if any wave executes the shader. */ 927 for (i = 0; i < num_waves; i++) { 928 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) 929 break; 930 } 931 if (i == num_waves) 932 return; /* the shader is not being executed */ 933 934 /* Remember the first found wave. The waves are sorted according to PC. */ 935 waves = &waves[i]; 936 num_waves -= i; 937 938 /* Get the list of instructions. 939 * Buffer size / 4 is the upper bound of the instruction count. 940 */ 941 unsigned num_inst = 0; 942 uint64_t inst_addr = start_addr; 943 struct si_shader_inst *instructions = 944 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst)); 945 946 if (shader->prolog) { 947 si_add_split_disasm(shader->prolog->binary.disasm_string, 948 &inst_addr, &num_inst, instructions); 949 } 950 if (shader->previous_stage) { 951 si_add_split_disasm(shader->previous_stage->binary.disasm_string, 952 &inst_addr, &num_inst, instructions); 953 } 954 if (shader->prolog2) { 955 si_add_split_disasm(shader->prolog2->binary.disasm_string, 956 &inst_addr, &num_inst, instructions); 957 } 958 si_add_split_disasm(shader->binary.disasm_string, 959 &inst_addr, &num_inst, instructions); 960 if (shader->epilog) { 961 si_add_split_disasm(shader->epilog->binary.disasm_string, 962 &inst_addr, &num_inst, instructions); 963 } 964 965 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", 966 si_get_shader_name(shader, shader->selector->type)); 967 968 /* Print instructions with annotations. */ 969 for (i = 0; i < num_inst; i++) { 970 struct si_shader_inst *inst = &instructions[i]; 971 972 fprintf(f, "%.*s [PC=0x%"PRIx64", size=%u]\n", 973 inst->textlen, inst->text, inst->addr, inst->size); 974 975 /* Print which waves execute the instruction right now. */ 976 while (num_waves && inst->addr == waves->pc) { 977 fprintf(f, 978 " " COLOR_GREEN "^ SE%u SH%u CU%u " 979 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ", 980 waves->se, waves->sh, waves->cu, waves->simd, 981 waves->wave, waves->exec); 982 983 if (inst->size == 4) { 984 fprintf(f, "INST32=%08X" COLOR_RESET "\n", 985 waves->inst_dw0); 986 } else { 987 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", 988 waves->inst_dw0, waves->inst_dw1); 989 } 990 991 waves->matched = true; 992 waves = &waves[1]; 993 num_waves--; 994 } 995 } 996 997 fprintf(f, "\n\n"); 998 free(instructions); 999} 1000 1001static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f) 1002{ 1003 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; 1004 unsigned num_waves = ac_get_wave_info(waves); 1005 1006 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET 1007 "\n\n", num_waves); 1008 1009 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f); 1010 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f); 1011 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f); 1012 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f); 1013 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f); 1014 1015 /* Print waves executing shaders that are not currently bound. */ 1016 unsigned i; 1017 bool found = false; 1018 for (i = 0; i < num_waves; i++) { 1019 if (waves[i].matched) 1020 continue; 1021 1022 if (!found) { 1023 fprintf(f, COLOR_CYAN 1024 "Waves not executing currently-bound shaders:" 1025 COLOR_RESET "\n"); 1026 found = true; 1027 } 1028 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64 1029 " INST=%08X %08X PC=%"PRIx64"\n", 1030 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, 1031 waves[i].wave, waves[i].exec, waves[i].inst_dw0, 1032 waves[i].inst_dw1, waves[i].pc); 1033 } 1034 if (found) 1035 fprintf(f, "\n\n"); 1036} 1037 1038static void si_dump_command(const char *title, const char *command, FILE *f) 1039{ 1040 char line[2000]; 1041 1042 FILE *p = popen(command, "r"); 1043 if (!p) 1044 return; 1045 1046 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title); 1047 while (fgets(line, sizeof(line), p)) 1048 fputs(line, f); 1049 fprintf(f, "\n\n"); 1050 pclose(p); 1051} 1052 1053static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, 1054 unsigned flags) 1055{ 1056 struct si_context *sctx = (struct si_context*)ctx; 1057 1058 if (sctx->log) 1059 u_log_flush(sctx->log); 1060 1061 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) { 1062 si_dump_debug_registers(sctx, f); 1063 1064 si_dump_annotated_shaders(sctx, f); 1065 si_dump_command("Active waves (raw data)", "umr -O halt_waves -wa | column -t", f); 1066 si_dump_command("Wave information", "umr -O halt_waves,bits -wa", f); 1067 } 1068} 1069 1070void si_log_draw_state(struct si_context *sctx, struct u_log_context *log) 1071{ 1072 struct si_shader_ctx_state *tcs_shader; 1073 1074 if (!log) 1075 return; 1076 1077 tcs_shader = &sctx->tcs_shader; 1078 if (sctx->tes_shader.cso && !sctx->tcs_shader.cso) 1079 tcs_shader = &sctx->fixed_func_tcs_shader; 1080 1081 si_dump_framebuffer(sctx, log); 1082 1083 si_dump_gfx_shader(sctx, &sctx->vs_shader, log); 1084 si_dump_gfx_shader(sctx, tcs_shader, log); 1085 si_dump_gfx_shader(sctx, &sctx->tes_shader, log); 1086 si_dump_gfx_shader(sctx, &sctx->gs_shader, log); 1087 si_dump_gfx_shader(sctx, &sctx->ps_shader, log); 1088 1089 si_dump_descriptor_list(sctx->screen, 1090 &sctx->descriptors[SI_DESCS_RW_BUFFERS], 1091 "", "RW buffers", 4, 1092 sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots, 1093 si_identity, log); 1094 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log); 1095 si_dump_gfx_descriptors(sctx, tcs_shader, log); 1096 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log); 1097 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log); 1098 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log); 1099} 1100 1101void si_log_compute_state(struct si_context *sctx, struct u_log_context *log) 1102{ 1103 if (!log) 1104 return; 1105 1106 si_dump_compute_shader(sctx, log); 1107 si_dump_compute_descriptors(sctx, log); 1108} 1109 1110static void si_dump_dma(struct si_context *sctx, 1111 struct radeon_saved_cs *saved, FILE *f) 1112{ 1113 static const char ib_name[] = "sDMA IB"; 1114 unsigned i; 1115 1116 si_dump_bo_list(sctx, saved, f); 1117 1118 fprintf(f, "------------------ %s begin ------------------\n", ib_name); 1119 1120 for (i = 0; i < saved->num_dw; ++i) { 1121 fprintf(f, " %08x\n", saved->ib[i]); 1122 } 1123 1124 fprintf(f, "------------------- %s end -------------------\n", ib_name); 1125 fprintf(f, "\n"); 1126 1127 fprintf(f, "SDMA Dump Done.\n"); 1128} 1129 1130void si_check_vm_faults(struct si_context *sctx, 1131 struct radeon_saved_cs *saved, enum ring_type ring) 1132{ 1133 struct pipe_screen *screen = sctx->b.screen; 1134 FILE *f; 1135 uint64_t addr; 1136 char cmd_line[4096]; 1137 1138 if (!ac_vm_fault_occured(sctx->chip_class, 1139 &sctx->dmesg_timestamp, &addr)) 1140 return; 1141 1142 f = dd_get_debug_file(false); 1143 if (!f) 1144 return; 1145 1146 fprintf(f, "VM fault report.\n\n"); 1147 if (os_get_command_line(cmd_line, sizeof(cmd_line))) 1148 fprintf(f, "Command: %s\n", cmd_line); 1149 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen)); 1150 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen)); 1151 fprintf(f, "Device name: %s\n\n", screen->get_name(screen)); 1152 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr); 1153 1154 if (sctx->apitrace_call_number) 1155 fprintf(f, "Last apitrace call: %u\n\n", 1156 sctx->apitrace_call_number); 1157 1158 switch (ring) { 1159 case RING_GFX: { 1160 struct u_log_context log; 1161 u_log_context_init(&log); 1162 1163 si_log_draw_state(sctx, &log); 1164 si_log_compute_state(sctx, &log); 1165 si_log_cs(sctx, &log, true); 1166 1167 u_log_new_page_print(&log, f); 1168 u_log_context_destroy(&log); 1169 break; 1170 } 1171 case RING_DMA: 1172 si_dump_dma(sctx, saved, f); 1173 break; 1174 1175 default: 1176 break; 1177 } 1178 1179 fclose(f); 1180 1181 fprintf(stderr, "Detected a VM fault, exiting...\n"); 1182 exit(0); 1183} 1184 1185void si_init_debug_functions(struct si_context *sctx) 1186{ 1187 sctx->b.dump_debug_state = si_dump_debug_state; 1188 1189 /* Set the initial dmesg timestamp for this context, so that 1190 * only new messages will be checked for VM faults. 1191 */ 1192 if (sctx->screen->debug_flags & DBG(CHECK_VM)) 1193 ac_vm_fault_occured(sctx->chip_class, 1194 &sctx->dmesg_timestamp, NULL); 1195} 1196