1b8e80941Smrg/* 2b8e80941Smrg * Copyright 2015 Advanced Micro Devices, Inc. 3b8e80941Smrg * All Rights Reserved. 4b8e80941Smrg * 5b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 6b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 7b8e80941Smrg * to deal in the Software without restriction, including without limitation 8b8e80941Smrg * on the rights to use, copy, modify, merge, publish, distribute, sub 9b8e80941Smrg * license, and/or sell copies of the Software, and to permit persons to whom 10b8e80941Smrg * the Software is furnished to do so, subject to the following conditions: 11b8e80941Smrg * 12b8e80941Smrg * The above copyright notice and this permission notice (including the next 13b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 14b8e80941Smrg * Software. 15b8e80941Smrg * 16b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19b8e80941Smrg * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 20b8e80941Smrg * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21b8e80941Smrg * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22b8e80941Smrg * USE OR OTHER DEALINGS IN THE SOFTWARE. 23b8e80941Smrg */ 24b8e80941Smrg 25b8e80941Smrg#include "si_pipe.h" 26b8e80941Smrg#include "si_compute.h" 27b8e80941Smrg#include "sid.h" 28b8e80941Smrg#include "gfx9d.h" 29b8e80941Smrg#include "sid_tables.h" 30b8e80941Smrg#include "driver_ddebug/dd_util.h" 31b8e80941Smrg#include "util/u_dump.h" 32b8e80941Smrg#include "util/u_log.h" 33b8e80941Smrg#include "util/u_memory.h" 34b8e80941Smrg#include "util/u_string.h" 35b8e80941Smrg#include "ac_debug.h" 36b8e80941Smrg 37b8e80941Smrgstatic void si_dump_bo_list(struct si_context *sctx, 38b8e80941Smrg const struct radeon_saved_cs *saved, FILE *f); 39b8e80941Smrg 40b8e80941SmrgDEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL) 41b8e80941Smrg 42b8e80941Smrg/** 43b8e80941Smrg * Store a linearized copy of all chunks of \p cs together with the buffer 44b8e80941Smrg * list in \p saved. 45b8e80941Smrg */ 46b8e80941Smrgvoid si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, 47b8e80941Smrg struct radeon_saved_cs *saved, bool get_buffer_list) 48b8e80941Smrg{ 49b8e80941Smrg uint32_t *buf; 50b8e80941Smrg unsigned i; 51b8e80941Smrg 52b8e80941Smrg /* Save the IB chunks. */ 53b8e80941Smrg saved->num_dw = cs->prev_dw + cs->current.cdw; 54b8e80941Smrg saved->ib = MALLOC(4 * saved->num_dw); 55b8e80941Smrg if (!saved->ib) 56b8e80941Smrg goto oom; 57b8e80941Smrg 58b8e80941Smrg buf = saved->ib; 59b8e80941Smrg for (i = 0; i < cs->num_prev; ++i) { 60b8e80941Smrg memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4); 61b8e80941Smrg buf += cs->prev[i].cdw; 62b8e80941Smrg } 63b8e80941Smrg memcpy(buf, cs->current.buf, cs->current.cdw * 4); 64b8e80941Smrg 65b8e80941Smrg if (!get_buffer_list) 66b8e80941Smrg return; 67b8e80941Smrg 68b8e80941Smrg /* Save the buffer list. */ 69b8e80941Smrg saved->bo_count = ws->cs_get_buffer_list(cs, NULL); 70b8e80941Smrg saved->bo_list = CALLOC(saved->bo_count, 71b8e80941Smrg sizeof(saved->bo_list[0])); 72b8e80941Smrg if (!saved->bo_list) { 73b8e80941Smrg FREE(saved->ib); 74b8e80941Smrg goto oom; 75b8e80941Smrg } 76b8e80941Smrg ws->cs_get_buffer_list(cs, saved->bo_list); 77b8e80941Smrg 78b8e80941Smrg return; 79b8e80941Smrg 80b8e80941Smrgoom: 81b8e80941Smrg fprintf(stderr, "%s: out of memory\n", __func__); 82b8e80941Smrg memset(saved, 0, sizeof(*saved)); 83b8e80941Smrg} 84b8e80941Smrg 85b8e80941Smrgvoid si_clear_saved_cs(struct radeon_saved_cs *saved) 86b8e80941Smrg{ 87b8e80941Smrg FREE(saved->ib); 88b8e80941Smrg FREE(saved->bo_list); 89b8e80941Smrg 90b8e80941Smrg memset(saved, 0, sizeof(*saved)); 91b8e80941Smrg} 92b8e80941Smrg 93b8e80941Smrgvoid si_destroy_saved_cs(struct si_saved_cs *scs) 94b8e80941Smrg{ 95b8e80941Smrg si_clear_saved_cs(&scs->gfx); 96b8e80941Smrg si_resource_reference(&scs->trace_buf, NULL); 97b8e80941Smrg free(scs); 98b8e80941Smrg} 99b8e80941Smrg 100b8e80941Smrgstatic void si_dump_shader(struct si_screen *sscreen, 101b8e80941Smrg enum pipe_shader_type processor, 102b8e80941Smrg const struct si_shader *shader, FILE *f) 103b8e80941Smrg{ 104b8e80941Smrg if (shader->shader_log) 105b8e80941Smrg fwrite(shader->shader_log, shader->shader_log_size, 1, f); 106b8e80941Smrg else 107b8e80941Smrg si_shader_dump(sscreen, shader, NULL, processor, f, false); 108b8e80941Smrg} 109b8e80941Smrg 110b8e80941Smrgstruct si_log_chunk_shader { 111b8e80941Smrg /* The shader destroy code assumes a current context for unlinking of 112b8e80941Smrg * PM4 packets etc. 113b8e80941Smrg * 114b8e80941Smrg * While we should be able to destroy shaders without a context, doing 115b8e80941Smrg * so would happen only very rarely and be therefore likely to fail 116b8e80941Smrg * just when you're trying to debug something. Let's just remember the 117b8e80941Smrg * current context in the chunk. 118b8e80941Smrg */ 119b8e80941Smrg struct si_context *ctx; 120b8e80941Smrg struct si_shader *shader; 121b8e80941Smrg enum pipe_shader_type processor; 122b8e80941Smrg 123b8e80941Smrg /* For keep-alive reference counts */ 124b8e80941Smrg struct si_shader_selector *sel; 125b8e80941Smrg struct si_compute *program; 126b8e80941Smrg}; 127b8e80941Smrg 128b8e80941Smrgstatic void 129b8e80941Smrgsi_log_chunk_shader_destroy(void *data) 130b8e80941Smrg{ 131b8e80941Smrg struct si_log_chunk_shader *chunk = data; 132b8e80941Smrg si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL); 133b8e80941Smrg si_compute_reference(&chunk->program, NULL); 134b8e80941Smrg FREE(chunk); 135b8e80941Smrg} 136b8e80941Smrg 137b8e80941Smrgstatic void 138b8e80941Smrgsi_log_chunk_shader_print(void *data, FILE *f) 139b8e80941Smrg{ 140b8e80941Smrg struct si_log_chunk_shader *chunk = data; 141b8e80941Smrg struct si_screen *sscreen = chunk->ctx->screen; 142b8e80941Smrg si_dump_shader(sscreen, chunk->processor, 143b8e80941Smrg chunk->shader, f); 144b8e80941Smrg} 145b8e80941Smrg 146b8e80941Smrgstatic struct u_log_chunk_type si_log_chunk_type_shader = { 147b8e80941Smrg .destroy = si_log_chunk_shader_destroy, 148b8e80941Smrg .print = si_log_chunk_shader_print, 149b8e80941Smrg}; 150b8e80941Smrg 151b8e80941Smrgstatic void si_dump_gfx_shader(struct si_context *ctx, 152b8e80941Smrg const struct si_shader_ctx_state *state, 153b8e80941Smrg struct u_log_context *log) 154b8e80941Smrg{ 155b8e80941Smrg struct si_shader *current = state->current; 156b8e80941Smrg 157b8e80941Smrg if (!state->cso || !current) 158b8e80941Smrg return; 159b8e80941Smrg 160b8e80941Smrg struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); 161b8e80941Smrg chunk->ctx = ctx; 162b8e80941Smrg chunk->processor = state->cso->info.processor; 163b8e80941Smrg chunk->shader = current; 164b8e80941Smrg si_shader_selector_reference(ctx, &chunk->sel, current->selector); 165b8e80941Smrg u_log_chunk(log, &si_log_chunk_type_shader, chunk); 166b8e80941Smrg} 167b8e80941Smrg 168b8e80941Smrgstatic void si_dump_compute_shader(struct si_context *ctx, 169b8e80941Smrg struct u_log_context *log) 170b8e80941Smrg{ 171b8e80941Smrg const struct si_cs_shader_state *state = &ctx->cs_shader_state; 172b8e80941Smrg 173b8e80941Smrg if (!state->program) 174b8e80941Smrg return; 175b8e80941Smrg 176b8e80941Smrg struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); 177b8e80941Smrg chunk->ctx = ctx; 178b8e80941Smrg chunk->processor = PIPE_SHADER_COMPUTE; 179b8e80941Smrg chunk->shader = &state->program->shader; 180b8e80941Smrg si_compute_reference(&chunk->program, state->program); 181b8e80941Smrg u_log_chunk(log, &si_log_chunk_type_shader, chunk); 182b8e80941Smrg} 183b8e80941Smrg 184b8e80941Smrg/** 185b8e80941Smrg * Shader compiles can be overridden with arbitrary ELF objects by setting 186b8e80941Smrg * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2] 187b8e80941Smrg */ 188b8e80941Smrgbool si_replace_shader(unsigned num, struct ac_shader_binary *binary) 189b8e80941Smrg{ 190b8e80941Smrg const char *p = debug_get_option_replace_shaders(); 191b8e80941Smrg const char *semicolon; 192b8e80941Smrg char *copy = NULL; 193b8e80941Smrg FILE *f; 194b8e80941Smrg long filesize, nread; 195b8e80941Smrg char *buf = NULL; 196b8e80941Smrg bool replaced = false; 197b8e80941Smrg 198b8e80941Smrg if (!p) 199b8e80941Smrg return false; 200b8e80941Smrg 201b8e80941Smrg while (*p) { 202b8e80941Smrg unsigned long i; 203b8e80941Smrg char *endp; 204b8e80941Smrg i = strtoul(p, &endp, 0); 205b8e80941Smrg 206b8e80941Smrg p = endp; 207b8e80941Smrg if (*p != ':') { 208b8e80941Smrg fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n"); 209b8e80941Smrg exit(1); 210b8e80941Smrg } 211b8e80941Smrg ++p; 212b8e80941Smrg 213b8e80941Smrg if (i == num) 214b8e80941Smrg break; 215b8e80941Smrg 216b8e80941Smrg p = strchr(p, ';'); 217b8e80941Smrg if (!p) 218b8e80941Smrg return false; 219b8e80941Smrg ++p; 220b8e80941Smrg } 221b8e80941Smrg if (!*p) 222b8e80941Smrg return false; 223b8e80941Smrg 224b8e80941Smrg semicolon = strchr(p, ';'); 225b8e80941Smrg if (semicolon) { 226b8e80941Smrg p = copy = strndup(p, semicolon - p); 227b8e80941Smrg if (!copy) { 228b8e80941Smrg fprintf(stderr, "out of memory\n"); 229b8e80941Smrg return false; 230b8e80941Smrg } 231b8e80941Smrg } 232b8e80941Smrg 233b8e80941Smrg fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p); 234b8e80941Smrg 235b8e80941Smrg f = fopen(p, "r"); 236b8e80941Smrg if (!f) { 237b8e80941Smrg perror("radeonsi: failed to open file"); 238b8e80941Smrg goto out_free; 239b8e80941Smrg } 240b8e80941Smrg 241b8e80941Smrg if (fseek(f, 0, SEEK_END) != 0) 242b8e80941Smrg goto file_error; 243b8e80941Smrg 244b8e80941Smrg filesize = ftell(f); 245b8e80941Smrg if (filesize < 0) 246b8e80941Smrg goto file_error; 247b8e80941Smrg 248b8e80941Smrg if (fseek(f, 0, SEEK_SET) != 0) 249b8e80941Smrg goto file_error; 250b8e80941Smrg 251b8e80941Smrg buf = MALLOC(filesize); 252b8e80941Smrg if (!buf) { 253b8e80941Smrg fprintf(stderr, "out of memory\n"); 254b8e80941Smrg goto out_close; 255b8e80941Smrg } 256b8e80941Smrg 257b8e80941Smrg nread = fread(buf, 1, filesize, f); 258b8e80941Smrg if (nread != filesize) 259b8e80941Smrg goto file_error; 260b8e80941Smrg 261b8e80941Smrg ac_elf_read(buf, filesize, binary); 262b8e80941Smrg replaced = true; 263b8e80941Smrg 264b8e80941Smrgout_close: 265b8e80941Smrg fclose(f); 266b8e80941Smrgout_free: 267b8e80941Smrg FREE(buf); 268b8e80941Smrg free(copy); 269b8e80941Smrg return replaced; 270b8e80941Smrg 271b8e80941Smrgfile_error: 272b8e80941Smrg perror("radeonsi: reading shader"); 273b8e80941Smrg goto out_close; 274b8e80941Smrg} 275b8e80941Smrg 276b8e80941Smrg/* Parsed IBs are difficult to read without colors. Use "less -R file" to 277b8e80941Smrg * read them, or use "aha -b -f file" to convert them to html. 278b8e80941Smrg */ 279b8e80941Smrg#define COLOR_RESET "\033[0m" 280b8e80941Smrg#define COLOR_RED "\033[31m" 281b8e80941Smrg#define COLOR_GREEN "\033[1;32m" 282b8e80941Smrg#define COLOR_YELLOW "\033[1;33m" 283b8e80941Smrg#define COLOR_CYAN "\033[1;36m" 284b8e80941Smrg 285b8e80941Smrgstatic void si_dump_mmapped_reg(struct si_context *sctx, FILE *f, 286b8e80941Smrg unsigned offset) 287b8e80941Smrg{ 288b8e80941Smrg struct radeon_winsys *ws = sctx->ws; 289b8e80941Smrg uint32_t value; 290b8e80941Smrg 291b8e80941Smrg if (ws->read_registers(ws, offset, 1, &value)) 292b8e80941Smrg ac_dump_reg(f, sctx->chip_class, offset, value, ~0); 293b8e80941Smrg} 294b8e80941Smrg 295b8e80941Smrgstatic void si_dump_debug_registers(struct si_context *sctx, FILE *f) 296b8e80941Smrg{ 297b8e80941Smrg if (!sctx->screen->info.has_read_registers_query) 298b8e80941Smrg return; 299b8e80941Smrg 300b8e80941Smrg fprintf(f, "Memory-mapped registers:\n"); 301b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS); 302b8e80941Smrg 303b8e80941Smrg /* No other registers can be read on DRM < 3.1.0. */ 304b8e80941Smrg if (sctx->screen->info.drm_major < 3 || 305b8e80941Smrg sctx->screen->info.drm_minor < 1) { 306b8e80941Smrg fprintf(f, "\n"); 307b8e80941Smrg return; 308b8e80941Smrg } 309b8e80941Smrg 310b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2); 311b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0); 312b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1); 313b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2); 314b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3); 315b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG); 316b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG); 317b8e80941Smrg if (sctx->chip_class <= VI) { 318b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS); 319b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2); 320b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3); 321b8e80941Smrg } 322b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT); 323b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1); 324b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2); 325b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3); 326b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS); 327b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT); 328b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1); 329b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS); 330b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT); 331b8e80941Smrg si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1); 332b8e80941Smrg fprintf(f, "\n"); 333b8e80941Smrg} 334b8e80941Smrg 335b8e80941Smrgstruct si_log_chunk_cs { 336b8e80941Smrg struct si_context *ctx; 337b8e80941Smrg struct si_saved_cs *cs; 338b8e80941Smrg bool dump_bo_list; 339b8e80941Smrg unsigned gfx_begin, gfx_end; 340b8e80941Smrg}; 341b8e80941Smrg 342b8e80941Smrgstatic void si_log_chunk_type_cs_destroy(void *data) 343b8e80941Smrg{ 344b8e80941Smrg struct si_log_chunk_cs *chunk = data; 345b8e80941Smrg si_saved_cs_reference(&chunk->cs, NULL); 346b8e80941Smrg free(chunk); 347b8e80941Smrg} 348b8e80941Smrg 349b8e80941Smrgstatic void si_parse_current_ib(FILE *f, struct radeon_cmdbuf *cs, 350b8e80941Smrg unsigned begin, unsigned end, 351b8e80941Smrg int *last_trace_id, unsigned trace_id_count, 352b8e80941Smrg const char *name, enum chip_class chip_class) 353b8e80941Smrg{ 354b8e80941Smrg unsigned orig_end = end; 355b8e80941Smrg 356b8e80941Smrg assert(begin <= end); 357b8e80941Smrg 358b8e80941Smrg fprintf(f, "------------------ %s begin (dw = %u) ------------------\n", 359b8e80941Smrg name, begin); 360b8e80941Smrg 361b8e80941Smrg for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) { 362b8e80941Smrg struct radeon_cmdbuf_chunk *chunk = &cs->prev[prev_idx]; 363b8e80941Smrg 364b8e80941Smrg if (begin < chunk->cdw) { 365b8e80941Smrg ac_parse_ib_chunk(f, chunk->buf + begin, 366b8e80941Smrg MIN2(end, chunk->cdw) - begin, 367b8e80941Smrg last_trace_id, trace_id_count, 368b8e80941Smrg chip_class, NULL, NULL); 369b8e80941Smrg } 370b8e80941Smrg 371b8e80941Smrg if (end <= chunk->cdw) 372b8e80941Smrg return; 373b8e80941Smrg 374b8e80941Smrg if (begin < chunk->cdw) 375b8e80941Smrg fprintf(f, "\n---------- Next %s Chunk ----------\n\n", 376b8e80941Smrg name); 377b8e80941Smrg 378b8e80941Smrg begin -= MIN2(begin, chunk->cdw); 379b8e80941Smrg end -= chunk->cdw; 380b8e80941Smrg } 381b8e80941Smrg 382b8e80941Smrg assert(end <= cs->current.cdw); 383b8e80941Smrg 384b8e80941Smrg ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id, 385b8e80941Smrg trace_id_count, chip_class, NULL, NULL); 386b8e80941Smrg 387b8e80941Smrg fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n", 388b8e80941Smrg name, orig_end); 389b8e80941Smrg} 390b8e80941Smrg 391b8e80941Smrgstatic void si_log_chunk_type_cs_print(void *data, FILE *f) 392b8e80941Smrg{ 393b8e80941Smrg struct si_log_chunk_cs *chunk = data; 394b8e80941Smrg struct si_context *ctx = chunk->ctx; 395b8e80941Smrg struct si_saved_cs *scs = chunk->cs; 396b8e80941Smrg int last_trace_id = -1; 397b8e80941Smrg 398b8e80941Smrg /* We are expecting that the ddebug pipe has already 399b8e80941Smrg * waited for the context, so this buffer should be idle. 400b8e80941Smrg * If the GPU is hung, there is no point in waiting for it. 401b8e80941Smrg */ 402b8e80941Smrg uint32_t *map = ctx->ws->buffer_map(scs->trace_buf->buf, 403b8e80941Smrg NULL, 404b8e80941Smrg PIPE_TRANSFER_UNSYNCHRONIZED | 405b8e80941Smrg PIPE_TRANSFER_READ); 406b8e80941Smrg if (map) 407b8e80941Smrg last_trace_id = map[0]; 408b8e80941Smrg 409b8e80941Smrg if (chunk->gfx_end != chunk->gfx_begin) { 410b8e80941Smrg if (chunk->gfx_begin == 0) { 411b8e80941Smrg if (ctx->init_config) 412b8e80941Smrg ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw, 413b8e80941Smrg NULL, 0, "IB2: Init config", ctx->chip_class, 414b8e80941Smrg NULL, NULL); 415b8e80941Smrg 416b8e80941Smrg if (ctx->init_config_gs_rings) 417b8e80941Smrg ac_parse_ib(f, ctx->init_config_gs_rings->pm4, 418b8e80941Smrg ctx->init_config_gs_rings->ndw, 419b8e80941Smrg NULL, 0, "IB2: Init GS rings", ctx->chip_class, 420b8e80941Smrg NULL, NULL); 421b8e80941Smrg } 422b8e80941Smrg 423b8e80941Smrg if (scs->flushed) { 424b8e80941Smrg ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin, 425b8e80941Smrg chunk->gfx_end - chunk->gfx_begin, 426b8e80941Smrg &last_trace_id, map ? 1 : 0, "IB", ctx->chip_class, 427b8e80941Smrg NULL, NULL); 428b8e80941Smrg } else { 429b8e80941Smrg si_parse_current_ib(f, ctx->gfx_cs, chunk->gfx_begin, 430b8e80941Smrg chunk->gfx_end, &last_trace_id, map ? 1 : 0, 431b8e80941Smrg "IB", ctx->chip_class); 432b8e80941Smrg } 433b8e80941Smrg } 434b8e80941Smrg 435b8e80941Smrg if (chunk->dump_bo_list) { 436b8e80941Smrg fprintf(f, "Flushing. Time: "); 437b8e80941Smrg util_dump_ns(f, scs->time_flush); 438b8e80941Smrg fprintf(f, "\n\n"); 439b8e80941Smrg si_dump_bo_list(ctx, &scs->gfx, f); 440b8e80941Smrg } 441b8e80941Smrg} 442b8e80941Smrg 443b8e80941Smrgstatic const struct u_log_chunk_type si_log_chunk_type_cs = { 444b8e80941Smrg .destroy = si_log_chunk_type_cs_destroy, 445b8e80941Smrg .print = si_log_chunk_type_cs_print, 446b8e80941Smrg}; 447b8e80941Smrg 448b8e80941Smrgstatic void si_log_cs(struct si_context *ctx, struct u_log_context *log, 449b8e80941Smrg bool dump_bo_list) 450b8e80941Smrg{ 451b8e80941Smrg assert(ctx->current_saved_cs); 452b8e80941Smrg 453b8e80941Smrg struct si_saved_cs *scs = ctx->current_saved_cs; 454b8e80941Smrg unsigned gfx_cur = ctx->gfx_cs->prev_dw + ctx->gfx_cs->current.cdw; 455b8e80941Smrg 456b8e80941Smrg if (!dump_bo_list && 457b8e80941Smrg gfx_cur == scs->gfx_last_dw) 458b8e80941Smrg return; 459b8e80941Smrg 460b8e80941Smrg struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk)); 461b8e80941Smrg 462b8e80941Smrg chunk->ctx = ctx; 463b8e80941Smrg si_saved_cs_reference(&chunk->cs, scs); 464b8e80941Smrg chunk->dump_bo_list = dump_bo_list; 465b8e80941Smrg 466b8e80941Smrg chunk->gfx_begin = scs->gfx_last_dw; 467b8e80941Smrg chunk->gfx_end = gfx_cur; 468b8e80941Smrg scs->gfx_last_dw = gfx_cur; 469b8e80941Smrg 470b8e80941Smrg u_log_chunk(log, &si_log_chunk_type_cs, chunk); 471b8e80941Smrg} 472b8e80941Smrg 473b8e80941Smrgvoid si_auto_log_cs(void *data, struct u_log_context *log) 474b8e80941Smrg{ 475b8e80941Smrg struct si_context *ctx = (struct si_context *)data; 476b8e80941Smrg si_log_cs(ctx, log, false); 477b8e80941Smrg} 478b8e80941Smrg 479b8e80941Smrgvoid si_log_hw_flush(struct si_context *sctx) 480b8e80941Smrg{ 481b8e80941Smrg if (!sctx->log) 482b8e80941Smrg return; 483b8e80941Smrg 484b8e80941Smrg si_log_cs(sctx, sctx->log, true); 485b8e80941Smrg 486b8e80941Smrg if (&sctx->b == sctx->screen->aux_context) { 487b8e80941Smrg /* The aux context isn't captured by the ddebug wrapper, 488b8e80941Smrg * so we dump it on a flush-by-flush basis here. 489b8e80941Smrg */ 490b8e80941Smrg FILE *f = dd_get_debug_file(false); 491b8e80941Smrg if (!f) { 492b8e80941Smrg fprintf(stderr, "radeonsi: error opening aux context dump file.\n"); 493b8e80941Smrg } else { 494b8e80941Smrg dd_write_header(f, &sctx->screen->b, 0); 495b8e80941Smrg 496b8e80941Smrg fprintf(f, "Aux context dump:\n\n"); 497b8e80941Smrg u_log_new_page_print(sctx->log, f); 498b8e80941Smrg 499b8e80941Smrg fclose(f); 500b8e80941Smrg } 501b8e80941Smrg } 502b8e80941Smrg} 503b8e80941Smrg 504b8e80941Smrgstatic const char *priority_to_string(enum radeon_bo_priority priority) 505b8e80941Smrg{ 506b8e80941Smrg#define ITEM(x) [RADEON_PRIO_##x] = #x 507b8e80941Smrg static const char *table[64] = { 508b8e80941Smrg ITEM(FENCE), 509b8e80941Smrg ITEM(TRACE), 510b8e80941Smrg ITEM(SO_FILLED_SIZE), 511b8e80941Smrg ITEM(QUERY), 512b8e80941Smrg ITEM(IB1), 513b8e80941Smrg ITEM(IB2), 514b8e80941Smrg ITEM(DRAW_INDIRECT), 515b8e80941Smrg ITEM(INDEX_BUFFER), 516b8e80941Smrg ITEM(CP_DMA), 517b8e80941Smrg ITEM(CONST_BUFFER), 518b8e80941Smrg ITEM(DESCRIPTORS), 519b8e80941Smrg ITEM(BORDER_COLORS), 520b8e80941Smrg ITEM(SAMPLER_BUFFER), 521b8e80941Smrg ITEM(VERTEX_BUFFER), 522b8e80941Smrg ITEM(SHADER_RW_BUFFER), 523b8e80941Smrg ITEM(COMPUTE_GLOBAL), 524b8e80941Smrg ITEM(SAMPLER_TEXTURE), 525b8e80941Smrg ITEM(SHADER_RW_IMAGE), 526b8e80941Smrg ITEM(SAMPLER_TEXTURE_MSAA), 527b8e80941Smrg ITEM(COLOR_BUFFER), 528b8e80941Smrg ITEM(DEPTH_BUFFER), 529b8e80941Smrg ITEM(COLOR_BUFFER_MSAA), 530b8e80941Smrg ITEM(DEPTH_BUFFER_MSAA), 531b8e80941Smrg ITEM(SEPARATE_META), 532b8e80941Smrg ITEM(SHADER_BINARY), 533b8e80941Smrg ITEM(SHADER_RINGS), 534b8e80941Smrg ITEM(SCRATCH_BUFFER), 535b8e80941Smrg }; 536b8e80941Smrg#undef ITEM 537b8e80941Smrg 538b8e80941Smrg assert(priority < ARRAY_SIZE(table)); 539b8e80941Smrg return table[priority]; 540b8e80941Smrg} 541b8e80941Smrg 542b8e80941Smrgstatic int bo_list_compare_va(const struct radeon_bo_list_item *a, 543b8e80941Smrg const struct radeon_bo_list_item *b) 544b8e80941Smrg{ 545b8e80941Smrg return a->vm_address < b->vm_address ? -1 : 546b8e80941Smrg a->vm_address > b->vm_address ? 1 : 0; 547b8e80941Smrg} 548b8e80941Smrg 549b8e80941Smrgstatic void si_dump_bo_list(struct si_context *sctx, 550b8e80941Smrg const struct radeon_saved_cs *saved, FILE *f) 551b8e80941Smrg{ 552b8e80941Smrg unsigned i,j; 553b8e80941Smrg 554b8e80941Smrg if (!saved->bo_list) 555b8e80941Smrg return; 556b8e80941Smrg 557b8e80941Smrg /* Sort the list according to VM adddresses first. */ 558b8e80941Smrg qsort(saved->bo_list, saved->bo_count, 559b8e80941Smrg sizeof(saved->bo_list[0]), (void*)bo_list_compare_va); 560b8e80941Smrg 561b8e80941Smrg fprintf(f, "Buffer list (in units of pages = 4kB):\n" 562b8e80941Smrg COLOR_YELLOW " Size VM start page " 563b8e80941Smrg "VM end page Usage" COLOR_RESET "\n"); 564b8e80941Smrg 565b8e80941Smrg for (i = 0; i < saved->bo_count; i++) { 566b8e80941Smrg /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */ 567b8e80941Smrg const unsigned page_size = sctx->screen->info.gart_page_size; 568b8e80941Smrg uint64_t va = saved->bo_list[i].vm_address; 569b8e80941Smrg uint64_t size = saved->bo_list[i].bo_size; 570b8e80941Smrg bool hit = false; 571b8e80941Smrg 572b8e80941Smrg /* If there's unused virtual memory between 2 buffers, print it. */ 573b8e80941Smrg if (i) { 574b8e80941Smrg uint64_t previous_va_end = saved->bo_list[i-1].vm_address + 575b8e80941Smrg saved->bo_list[i-1].bo_size; 576b8e80941Smrg 577b8e80941Smrg if (va > previous_va_end) { 578b8e80941Smrg fprintf(f, " %10"PRIu64" -- hole --\n", 579b8e80941Smrg (va - previous_va_end) / page_size); 580b8e80941Smrg } 581b8e80941Smrg } 582b8e80941Smrg 583b8e80941Smrg /* Print the buffer. */ 584b8e80941Smrg fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ", 585b8e80941Smrg size / page_size, va / page_size, (va + size) / page_size); 586b8e80941Smrg 587b8e80941Smrg /* Print the usage. */ 588b8e80941Smrg for (j = 0; j < 32; j++) { 589b8e80941Smrg if (!(saved->bo_list[i].priority_usage & (1u << j))) 590b8e80941Smrg continue; 591b8e80941Smrg 592b8e80941Smrg fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j)); 593b8e80941Smrg hit = true; 594b8e80941Smrg } 595b8e80941Smrg fprintf(f, "\n"); 596b8e80941Smrg } 597b8e80941Smrg fprintf(f, "\nNote: The holes represent memory not used by the IB.\n" 598b8e80941Smrg " Other buffers can still be allocated there.\n\n"); 599b8e80941Smrg} 600b8e80941Smrg 601b8e80941Smrgstatic void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log) 602b8e80941Smrg{ 603b8e80941Smrg struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 604b8e80941Smrg struct si_texture *tex; 605b8e80941Smrg int i; 606b8e80941Smrg 607b8e80941Smrg for (i = 0; i < state->nr_cbufs; i++) { 608b8e80941Smrg if (!state->cbufs[i]) 609b8e80941Smrg continue; 610b8e80941Smrg 611b8e80941Smrg tex = (struct si_texture*)state->cbufs[i]->texture; 612b8e80941Smrg u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i); 613b8e80941Smrg si_print_texture_info(sctx->screen, tex, log); 614b8e80941Smrg u_log_printf(log, "\n"); 615b8e80941Smrg } 616b8e80941Smrg 617b8e80941Smrg if (state->zsbuf) { 618b8e80941Smrg tex = (struct si_texture*)state->zsbuf->texture; 619b8e80941Smrg u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n"); 620b8e80941Smrg si_print_texture_info(sctx->screen, tex, log); 621b8e80941Smrg u_log_printf(log, "\n"); 622b8e80941Smrg } 623b8e80941Smrg} 624b8e80941Smrg 625b8e80941Smrgtypedef unsigned (*slot_remap_func)(unsigned); 626b8e80941Smrg 627b8e80941Smrgstruct si_log_chunk_desc_list { 628b8e80941Smrg /** Pointer to memory map of buffer where the list is uploader */ 629b8e80941Smrg uint32_t *gpu_list; 630b8e80941Smrg /** Reference of buffer where the list is uploaded, so that gpu_list 631b8e80941Smrg * is kept live. */ 632b8e80941Smrg struct si_resource *buf; 633b8e80941Smrg 634b8e80941Smrg const char *shader_name; 635b8e80941Smrg const char *elem_name; 636b8e80941Smrg slot_remap_func slot_remap; 637b8e80941Smrg enum chip_class chip_class; 638b8e80941Smrg unsigned element_dw_size; 639b8e80941Smrg unsigned num_elements; 640b8e80941Smrg 641b8e80941Smrg uint32_t list[0]; 642b8e80941Smrg}; 643b8e80941Smrg 644b8e80941Smrgstatic void 645b8e80941Smrgsi_log_chunk_desc_list_destroy(void *data) 646b8e80941Smrg{ 647b8e80941Smrg struct si_log_chunk_desc_list *chunk = data; 648b8e80941Smrg si_resource_reference(&chunk->buf, NULL); 649b8e80941Smrg FREE(chunk); 650b8e80941Smrg} 651b8e80941Smrg 652b8e80941Smrgstatic void 653b8e80941Smrgsi_log_chunk_desc_list_print(void *data, FILE *f) 654b8e80941Smrg{ 655b8e80941Smrg struct si_log_chunk_desc_list *chunk = data; 656b8e80941Smrg 657b8e80941Smrg for (unsigned i = 0; i < chunk->num_elements; i++) { 658b8e80941Smrg unsigned cpu_dw_offset = i * chunk->element_dw_size; 659b8e80941Smrg unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size; 660b8e80941Smrg const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list"; 661b8e80941Smrg uint32_t *cpu_list = chunk->list + cpu_dw_offset; 662b8e80941Smrg uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list; 663b8e80941Smrg 664b8e80941Smrg fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n", 665b8e80941Smrg chunk->shader_name, chunk->elem_name, i, list_note); 666b8e80941Smrg 667b8e80941Smrg switch (chunk->element_dw_size) { 668b8e80941Smrg case 4: 669b8e80941Smrg for (unsigned j = 0; j < 4; j++) 670b8e80941Smrg ac_dump_reg(f, chunk->chip_class, 671b8e80941Smrg R_008F00_SQ_BUF_RSRC_WORD0 + j*4, 672b8e80941Smrg gpu_list[j], 0xffffffff); 673b8e80941Smrg break; 674b8e80941Smrg case 8: 675b8e80941Smrg for (unsigned j = 0; j < 8; j++) 676b8e80941Smrg ac_dump_reg(f, chunk->chip_class, 677b8e80941Smrg R_008F10_SQ_IMG_RSRC_WORD0 + j*4, 678b8e80941Smrg gpu_list[j], 0xffffffff); 679b8e80941Smrg 680b8e80941Smrg fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 681b8e80941Smrg for (unsigned j = 0; j < 4; j++) 682b8e80941Smrg ac_dump_reg(f, chunk->chip_class, 683b8e80941Smrg R_008F00_SQ_BUF_RSRC_WORD0 + j*4, 684b8e80941Smrg gpu_list[4+j], 0xffffffff); 685b8e80941Smrg break; 686b8e80941Smrg case 16: 687b8e80941Smrg for (unsigned j = 0; j < 8; j++) 688b8e80941Smrg ac_dump_reg(f, chunk->chip_class, 689b8e80941Smrg R_008F10_SQ_IMG_RSRC_WORD0 + j*4, 690b8e80941Smrg gpu_list[j], 0xffffffff); 691b8e80941Smrg 692b8e80941Smrg fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 693b8e80941Smrg for (unsigned j = 0; j < 4; j++) 694b8e80941Smrg ac_dump_reg(f, chunk->chip_class, 695b8e80941Smrg R_008F00_SQ_BUF_RSRC_WORD0 + j*4, 696b8e80941Smrg gpu_list[4+j], 0xffffffff); 697b8e80941Smrg 698b8e80941Smrg fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); 699b8e80941Smrg for (unsigned j = 0; j < 8; j++) 700b8e80941Smrg ac_dump_reg(f, chunk->chip_class, 701b8e80941Smrg R_008F10_SQ_IMG_RSRC_WORD0 + j*4, 702b8e80941Smrg gpu_list[8+j], 0xffffffff); 703b8e80941Smrg 704b8e80941Smrg fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); 705b8e80941Smrg for (unsigned j = 0; j < 4; j++) 706b8e80941Smrg ac_dump_reg(f, chunk->chip_class, 707b8e80941Smrg R_008F30_SQ_IMG_SAMP_WORD0 + j*4, 708b8e80941Smrg gpu_list[12+j], 0xffffffff); 709b8e80941Smrg break; 710b8e80941Smrg } 711b8e80941Smrg 712b8e80941Smrg if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) { 713b8e80941Smrg fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!" 714b8e80941Smrg COLOR_RESET "\n"); 715b8e80941Smrg } 716b8e80941Smrg 717b8e80941Smrg fprintf(f, "\n"); 718b8e80941Smrg } 719b8e80941Smrg 720b8e80941Smrg} 721b8e80941Smrg 722b8e80941Smrgstatic const struct u_log_chunk_type si_log_chunk_type_descriptor_list = { 723b8e80941Smrg .destroy = si_log_chunk_desc_list_destroy, 724b8e80941Smrg .print = si_log_chunk_desc_list_print, 725b8e80941Smrg}; 726b8e80941Smrg 727b8e80941Smrgstatic void si_dump_descriptor_list(struct si_screen *screen, 728b8e80941Smrg struct si_descriptors *desc, 729b8e80941Smrg const char *shader_name, 730b8e80941Smrg const char *elem_name, 731b8e80941Smrg unsigned element_dw_size, 732b8e80941Smrg unsigned num_elements, 733b8e80941Smrg slot_remap_func slot_remap, 734b8e80941Smrg struct u_log_context *log) 735b8e80941Smrg{ 736b8e80941Smrg if (!desc->list) 737b8e80941Smrg return; 738b8e80941Smrg 739b8e80941Smrg /* In some cases, the caller doesn't know how many elements are really 740b8e80941Smrg * uploaded. Reduce num_elements to fit in the range of active slots. */ 741b8e80941Smrg unsigned active_range_dw_begin = 742b8e80941Smrg desc->first_active_slot * desc->element_dw_size; 743b8e80941Smrg unsigned active_range_dw_end = 744b8e80941Smrg active_range_dw_begin + desc->num_active_slots * desc->element_dw_size; 745b8e80941Smrg 746b8e80941Smrg while (num_elements > 0) { 747b8e80941Smrg int i = slot_remap(num_elements - 1); 748b8e80941Smrg unsigned dw_begin = i * element_dw_size; 749b8e80941Smrg unsigned dw_end = dw_begin + element_dw_size; 750b8e80941Smrg 751b8e80941Smrg if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end) 752b8e80941Smrg break; 753b8e80941Smrg 754b8e80941Smrg num_elements--; 755b8e80941Smrg } 756b8e80941Smrg 757b8e80941Smrg struct si_log_chunk_desc_list *chunk = 758b8e80941Smrg CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list, 759b8e80941Smrg 4 * element_dw_size * num_elements); 760b8e80941Smrg chunk->shader_name = shader_name; 761b8e80941Smrg chunk->elem_name = elem_name; 762b8e80941Smrg chunk->element_dw_size = element_dw_size; 763b8e80941Smrg chunk->num_elements = num_elements; 764b8e80941Smrg chunk->slot_remap = slot_remap; 765b8e80941Smrg chunk->chip_class = screen->info.chip_class; 766b8e80941Smrg 767b8e80941Smrg si_resource_reference(&chunk->buf, desc->buffer); 768b8e80941Smrg chunk->gpu_list = desc->gpu_list; 769b8e80941Smrg 770b8e80941Smrg for (unsigned i = 0; i < num_elements; ++i) { 771b8e80941Smrg memcpy(&chunk->list[i * element_dw_size], 772b8e80941Smrg &desc->list[slot_remap(i) * element_dw_size], 773b8e80941Smrg 4 * element_dw_size); 774b8e80941Smrg } 775b8e80941Smrg 776b8e80941Smrg u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk); 777b8e80941Smrg} 778b8e80941Smrg 779b8e80941Smrgstatic unsigned si_identity(unsigned slot) 780b8e80941Smrg{ 781b8e80941Smrg return slot; 782b8e80941Smrg} 783b8e80941Smrg 784b8e80941Smrgstatic void si_dump_descriptors(struct si_context *sctx, 785b8e80941Smrg enum pipe_shader_type processor, 786b8e80941Smrg const struct tgsi_shader_info *info, 787b8e80941Smrg struct u_log_context *log) 788b8e80941Smrg{ 789b8e80941Smrg struct si_descriptors *descs = 790b8e80941Smrg &sctx->descriptors[SI_DESCS_FIRST_SHADER + 791b8e80941Smrg processor * SI_NUM_SHADER_DESCS]; 792b8e80941Smrg static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"}; 793b8e80941Smrg const char *name = shader_name[processor]; 794b8e80941Smrg unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers; 795b8e80941Smrg unsigned enabled_images; 796b8e80941Smrg 797b8e80941Smrg if (info) { 798b8e80941Smrg enabled_constbuf = info->const_buffers_declared; 799b8e80941Smrg enabled_shaderbuf = info->shader_buffers_declared; 800b8e80941Smrg enabled_samplers = info->samplers_declared; 801b8e80941Smrg enabled_images = info->images_declared; 802b8e80941Smrg } else { 803b8e80941Smrg enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >> 804b8e80941Smrg SI_NUM_SHADER_BUFFERS; 805b8e80941Smrg enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask & 806b8e80941Smrg u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); 807b8e80941Smrg enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >> 808b8e80941Smrg (32 - SI_NUM_SHADER_BUFFERS); 809b8e80941Smrg enabled_samplers = sctx->samplers[processor].enabled_mask; 810b8e80941Smrg enabled_images = sctx->images[processor].enabled_mask; 811b8e80941Smrg } 812b8e80941Smrg 813b8e80941Smrg if (processor == PIPE_SHADER_VERTEX && 814b8e80941Smrg sctx->vb_descriptors_buffer && 815b8e80941Smrg sctx->vb_descriptors_gpu_list && 816b8e80941Smrg sctx->vertex_elements) { 817b8e80941Smrg assert(info); /* only CS may not have an info struct */ 818b8e80941Smrg struct si_descriptors desc = {}; 819b8e80941Smrg 820b8e80941Smrg desc.buffer = sctx->vb_descriptors_buffer; 821b8e80941Smrg desc.list = sctx->vb_descriptors_gpu_list; 822b8e80941Smrg desc.gpu_list = sctx->vb_descriptors_gpu_list; 823b8e80941Smrg desc.element_dw_size = 4; 824b8e80941Smrg desc.num_active_slots = sctx->vertex_elements->desc_list_byte_size / 16; 825b8e80941Smrg 826b8e80941Smrg si_dump_descriptor_list(sctx->screen, &desc, name, 827b8e80941Smrg " - Vertex buffer", 4, info->num_inputs, 828b8e80941Smrg si_identity, log); 829b8e80941Smrg } 830b8e80941Smrg 831b8e80941Smrg si_dump_descriptor_list(sctx->screen, 832b8e80941Smrg &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], 833b8e80941Smrg name, " - Constant buffer", 4, 834b8e80941Smrg util_last_bit(enabled_constbuf), 835b8e80941Smrg si_get_constbuf_slot, log); 836b8e80941Smrg si_dump_descriptor_list(sctx->screen, 837b8e80941Smrg &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], 838b8e80941Smrg name, " - Shader buffer", 4, 839b8e80941Smrg util_last_bit(enabled_shaderbuf), 840b8e80941Smrg si_get_shaderbuf_slot, log); 841b8e80941Smrg si_dump_descriptor_list(sctx->screen, 842b8e80941Smrg &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], 843b8e80941Smrg name, " - Sampler", 16, 844b8e80941Smrg util_last_bit(enabled_samplers), 845b8e80941Smrg si_get_sampler_slot, log); 846b8e80941Smrg si_dump_descriptor_list(sctx->screen, 847b8e80941Smrg &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], 848b8e80941Smrg name, " - Image", 8, 849b8e80941Smrg util_last_bit(enabled_images), 850b8e80941Smrg si_get_image_slot, log); 851b8e80941Smrg} 852b8e80941Smrg 853b8e80941Smrgstatic void si_dump_gfx_descriptors(struct si_context *sctx, 854b8e80941Smrg const struct si_shader_ctx_state *state, 855b8e80941Smrg struct u_log_context *log) 856b8e80941Smrg{ 857b8e80941Smrg if (!state->cso || !state->current) 858b8e80941Smrg return; 859b8e80941Smrg 860b8e80941Smrg si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log); 861b8e80941Smrg} 862b8e80941Smrg 863b8e80941Smrgstatic void si_dump_compute_descriptors(struct si_context *sctx, 864b8e80941Smrg struct u_log_context *log) 865b8e80941Smrg{ 866b8e80941Smrg if (!sctx->cs_shader_state.program) 867b8e80941Smrg return; 868b8e80941Smrg 869b8e80941Smrg si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log); 870b8e80941Smrg} 871b8e80941Smrg 872b8e80941Smrgstruct si_shader_inst { 873b8e80941Smrg const char *text; /* start of disassembly for this instruction */ 874b8e80941Smrg unsigned textlen; 875b8e80941Smrg unsigned size; /* instruction size = 4 or 8 */ 876b8e80941Smrg uint64_t addr; /* instruction address */ 877b8e80941Smrg}; 878b8e80941Smrg 879b8e80941Smrg/** 880b8e80941Smrg * Split a disassembly string into instructions and add them to the array 881b8e80941Smrg * pointed to by \p instructions. 882b8e80941Smrg * 883b8e80941Smrg * Labels are considered to be part of the following instruction. 884b8e80941Smrg */ 885b8e80941Smrgstatic void si_add_split_disasm(const char *disasm, 886b8e80941Smrg uint64_t *addr, 887b8e80941Smrg unsigned *num, 888b8e80941Smrg struct si_shader_inst *instructions) 889b8e80941Smrg{ 890b8e80941Smrg const char *semicolon; 891b8e80941Smrg 892b8e80941Smrg while ((semicolon = strchr(disasm, ';'))) { 893b8e80941Smrg struct si_shader_inst *inst = &instructions[(*num)++]; 894b8e80941Smrg const char *end = util_strchrnul(semicolon, '\n'); 895b8e80941Smrg 896b8e80941Smrg inst->text = disasm; 897b8e80941Smrg inst->textlen = end - disasm; 898b8e80941Smrg 899b8e80941Smrg inst->addr = *addr; 900b8e80941Smrg /* More than 16 chars after ";" means the instruction is 8 bytes long. */ 901b8e80941Smrg inst->size = end - semicolon > 16 ? 8 : 4; 902b8e80941Smrg *addr += inst->size; 903b8e80941Smrg 904b8e80941Smrg if (!(*end)) 905b8e80941Smrg break; 906b8e80941Smrg disasm = end + 1; 907b8e80941Smrg } 908b8e80941Smrg} 909b8e80941Smrg 910b8e80941Smrg/* If the shader is being executed, print its asm instructions, and annotate 911b8e80941Smrg * those that are being executed right now with information about waves that 912b8e80941Smrg * execute them. This is most useful during a GPU hang. 913b8e80941Smrg */ 914b8e80941Smrgstatic void si_print_annotated_shader(struct si_shader *shader, 915b8e80941Smrg struct ac_wave_info *waves, 916b8e80941Smrg unsigned num_waves, 917b8e80941Smrg FILE *f) 918b8e80941Smrg{ 919b8e80941Smrg if (!shader || !shader->binary.disasm_string) 920b8e80941Smrg return; 921b8e80941Smrg 922b8e80941Smrg uint64_t start_addr = shader->bo->gpu_address; 923b8e80941Smrg uint64_t end_addr = start_addr + shader->bo->b.b.width0; 924b8e80941Smrg unsigned i; 925b8e80941Smrg 926b8e80941Smrg /* See if any wave executes the shader. */ 927b8e80941Smrg for (i = 0; i < num_waves; i++) { 928b8e80941Smrg if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) 929b8e80941Smrg break; 930b8e80941Smrg } 931b8e80941Smrg if (i == num_waves) 932b8e80941Smrg return; /* the shader is not being executed */ 933b8e80941Smrg 934b8e80941Smrg /* Remember the first found wave. The waves are sorted according to PC. */ 935b8e80941Smrg waves = &waves[i]; 936b8e80941Smrg num_waves -= i; 937b8e80941Smrg 938b8e80941Smrg /* Get the list of instructions. 939b8e80941Smrg * Buffer size / 4 is the upper bound of the instruction count. 940b8e80941Smrg */ 941b8e80941Smrg unsigned num_inst = 0; 942b8e80941Smrg uint64_t inst_addr = start_addr; 943b8e80941Smrg struct si_shader_inst *instructions = 944b8e80941Smrg calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst)); 945b8e80941Smrg 946b8e80941Smrg if (shader->prolog) { 947b8e80941Smrg si_add_split_disasm(shader->prolog->binary.disasm_string, 948b8e80941Smrg &inst_addr, &num_inst, instructions); 949b8e80941Smrg } 950b8e80941Smrg if (shader->previous_stage) { 951b8e80941Smrg si_add_split_disasm(shader->previous_stage->binary.disasm_string, 952b8e80941Smrg &inst_addr, &num_inst, instructions); 953b8e80941Smrg } 954b8e80941Smrg if (shader->prolog2) { 955b8e80941Smrg si_add_split_disasm(shader->prolog2->binary.disasm_string, 956b8e80941Smrg &inst_addr, &num_inst, instructions); 957b8e80941Smrg } 958b8e80941Smrg si_add_split_disasm(shader->binary.disasm_string, 959b8e80941Smrg &inst_addr, &num_inst, instructions); 960b8e80941Smrg if (shader->epilog) { 961b8e80941Smrg si_add_split_disasm(shader->epilog->binary.disasm_string, 962b8e80941Smrg &inst_addr, &num_inst, instructions); 963b8e80941Smrg } 964b8e80941Smrg 965b8e80941Smrg fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", 966b8e80941Smrg si_get_shader_name(shader, shader->selector->type)); 967b8e80941Smrg 968b8e80941Smrg /* Print instructions with annotations. */ 969b8e80941Smrg for (i = 0; i < num_inst; i++) { 970b8e80941Smrg struct si_shader_inst *inst = &instructions[i]; 971b8e80941Smrg 972b8e80941Smrg fprintf(f, "%.*s [PC=0x%"PRIx64", size=%u]\n", 973b8e80941Smrg inst->textlen, inst->text, inst->addr, inst->size); 974b8e80941Smrg 975b8e80941Smrg /* Print which waves execute the instruction right now. */ 976b8e80941Smrg while (num_waves && inst->addr == waves->pc) { 977b8e80941Smrg fprintf(f, 978b8e80941Smrg " " COLOR_GREEN "^ SE%u SH%u CU%u " 979b8e80941Smrg "SIMD%u WAVE%u EXEC=%016"PRIx64 " ", 980b8e80941Smrg waves->se, waves->sh, waves->cu, waves->simd, 981b8e80941Smrg waves->wave, waves->exec); 982b8e80941Smrg 983b8e80941Smrg if (inst->size == 4) { 984b8e80941Smrg fprintf(f, "INST32=%08X" COLOR_RESET "\n", 985b8e80941Smrg waves->inst_dw0); 986b8e80941Smrg } else { 987b8e80941Smrg fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", 988b8e80941Smrg waves->inst_dw0, waves->inst_dw1); 989b8e80941Smrg } 990b8e80941Smrg 991b8e80941Smrg waves->matched = true; 992b8e80941Smrg waves = &waves[1]; 993b8e80941Smrg num_waves--; 994b8e80941Smrg } 995b8e80941Smrg } 996b8e80941Smrg 997b8e80941Smrg fprintf(f, "\n\n"); 998b8e80941Smrg free(instructions); 999b8e80941Smrg} 1000b8e80941Smrg 1001b8e80941Smrgstatic void si_dump_annotated_shaders(struct si_context *sctx, FILE *f) 1002b8e80941Smrg{ 1003b8e80941Smrg struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; 1004b8e80941Smrg unsigned num_waves = ac_get_wave_info(waves); 1005b8e80941Smrg 1006b8e80941Smrg fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET 1007b8e80941Smrg "\n\n", num_waves); 1008b8e80941Smrg 1009b8e80941Smrg si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f); 1010b8e80941Smrg si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f); 1011b8e80941Smrg si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f); 1012b8e80941Smrg si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f); 1013b8e80941Smrg si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f); 1014b8e80941Smrg 1015b8e80941Smrg /* Print waves executing shaders that are not currently bound. */ 1016b8e80941Smrg unsigned i; 1017b8e80941Smrg bool found = false; 1018b8e80941Smrg for (i = 0; i < num_waves; i++) { 1019b8e80941Smrg if (waves[i].matched) 1020b8e80941Smrg continue; 1021b8e80941Smrg 1022b8e80941Smrg if (!found) { 1023b8e80941Smrg fprintf(f, COLOR_CYAN 1024b8e80941Smrg "Waves not executing currently-bound shaders:" 1025b8e80941Smrg COLOR_RESET "\n"); 1026b8e80941Smrg found = true; 1027b8e80941Smrg } 1028b8e80941Smrg fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64 1029b8e80941Smrg " INST=%08X %08X PC=%"PRIx64"\n", 1030b8e80941Smrg waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, 1031b8e80941Smrg waves[i].wave, waves[i].exec, waves[i].inst_dw0, 1032b8e80941Smrg waves[i].inst_dw1, waves[i].pc); 1033b8e80941Smrg } 1034b8e80941Smrg if (found) 1035b8e80941Smrg fprintf(f, "\n\n"); 1036b8e80941Smrg} 1037b8e80941Smrg 1038b8e80941Smrgstatic void si_dump_command(const char *title, const char *command, FILE *f) 1039b8e80941Smrg{ 1040b8e80941Smrg char line[2000]; 1041b8e80941Smrg 1042b8e80941Smrg FILE *p = popen(command, "r"); 1043b8e80941Smrg if (!p) 1044b8e80941Smrg return; 1045b8e80941Smrg 1046b8e80941Smrg fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title); 1047b8e80941Smrg while (fgets(line, sizeof(line), p)) 1048b8e80941Smrg fputs(line, f); 1049b8e80941Smrg fprintf(f, "\n\n"); 1050b8e80941Smrg pclose(p); 1051b8e80941Smrg} 1052b8e80941Smrg 1053b8e80941Smrgstatic void si_dump_debug_state(struct pipe_context *ctx, FILE *f, 1054b8e80941Smrg unsigned flags) 1055b8e80941Smrg{ 1056b8e80941Smrg struct si_context *sctx = (struct si_context*)ctx; 1057b8e80941Smrg 1058b8e80941Smrg if (sctx->log) 1059b8e80941Smrg u_log_flush(sctx->log); 1060b8e80941Smrg 1061b8e80941Smrg if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) { 1062b8e80941Smrg si_dump_debug_registers(sctx, f); 1063b8e80941Smrg 1064b8e80941Smrg si_dump_annotated_shaders(sctx, f); 1065b8e80941Smrg si_dump_command("Active waves (raw data)", "umr -O halt_waves -wa | column -t", f); 1066b8e80941Smrg si_dump_command("Wave information", "umr -O halt_waves,bits -wa", f); 1067b8e80941Smrg } 1068b8e80941Smrg} 1069b8e80941Smrg 1070b8e80941Smrgvoid si_log_draw_state(struct si_context *sctx, struct u_log_context *log) 1071b8e80941Smrg{ 1072b8e80941Smrg struct si_shader_ctx_state *tcs_shader; 1073b8e80941Smrg 1074b8e80941Smrg if (!log) 1075b8e80941Smrg return; 1076b8e80941Smrg 1077b8e80941Smrg tcs_shader = &sctx->tcs_shader; 1078b8e80941Smrg if (sctx->tes_shader.cso && !sctx->tcs_shader.cso) 1079b8e80941Smrg tcs_shader = &sctx->fixed_func_tcs_shader; 1080b8e80941Smrg 1081b8e80941Smrg si_dump_framebuffer(sctx, log); 1082b8e80941Smrg 1083b8e80941Smrg si_dump_gfx_shader(sctx, &sctx->vs_shader, log); 1084b8e80941Smrg si_dump_gfx_shader(sctx, tcs_shader, log); 1085b8e80941Smrg si_dump_gfx_shader(sctx, &sctx->tes_shader, log); 1086b8e80941Smrg si_dump_gfx_shader(sctx, &sctx->gs_shader, log); 1087b8e80941Smrg si_dump_gfx_shader(sctx, &sctx->ps_shader, log); 1088b8e80941Smrg 1089b8e80941Smrg si_dump_descriptor_list(sctx->screen, 1090b8e80941Smrg &sctx->descriptors[SI_DESCS_RW_BUFFERS], 1091b8e80941Smrg "", "RW buffers", 4, 1092b8e80941Smrg sctx->descriptors[SI_DESCS_RW_BUFFERS].num_active_slots, 1093b8e80941Smrg si_identity, log); 1094b8e80941Smrg si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log); 1095b8e80941Smrg si_dump_gfx_descriptors(sctx, tcs_shader, log); 1096b8e80941Smrg si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log); 1097b8e80941Smrg si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log); 1098b8e80941Smrg si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log); 1099b8e80941Smrg} 1100b8e80941Smrg 1101b8e80941Smrgvoid si_log_compute_state(struct si_context *sctx, struct u_log_context *log) 1102b8e80941Smrg{ 1103b8e80941Smrg if (!log) 1104b8e80941Smrg return; 1105b8e80941Smrg 1106b8e80941Smrg si_dump_compute_shader(sctx, log); 1107b8e80941Smrg si_dump_compute_descriptors(sctx, log); 1108b8e80941Smrg} 1109b8e80941Smrg 1110b8e80941Smrgstatic void si_dump_dma(struct si_context *sctx, 1111b8e80941Smrg struct radeon_saved_cs *saved, FILE *f) 1112b8e80941Smrg{ 1113b8e80941Smrg static const char ib_name[] = "sDMA IB"; 1114b8e80941Smrg unsigned i; 1115b8e80941Smrg 1116b8e80941Smrg si_dump_bo_list(sctx, saved, f); 1117b8e80941Smrg 1118b8e80941Smrg fprintf(f, "------------------ %s begin ------------------\n", ib_name); 1119b8e80941Smrg 1120b8e80941Smrg for (i = 0; i < saved->num_dw; ++i) { 1121b8e80941Smrg fprintf(f, " %08x\n", saved->ib[i]); 1122b8e80941Smrg } 1123b8e80941Smrg 1124b8e80941Smrg fprintf(f, "------------------- %s end -------------------\n", ib_name); 1125b8e80941Smrg fprintf(f, "\n"); 1126b8e80941Smrg 1127b8e80941Smrg fprintf(f, "SDMA Dump Done.\n"); 1128b8e80941Smrg} 1129b8e80941Smrg 1130b8e80941Smrgvoid si_check_vm_faults(struct si_context *sctx, 1131b8e80941Smrg struct radeon_saved_cs *saved, enum ring_type ring) 1132b8e80941Smrg{ 1133b8e80941Smrg struct pipe_screen *screen = sctx->b.screen; 1134b8e80941Smrg FILE *f; 1135b8e80941Smrg uint64_t addr; 1136b8e80941Smrg char cmd_line[4096]; 1137b8e80941Smrg 1138b8e80941Smrg if (!ac_vm_fault_occured(sctx->chip_class, 1139b8e80941Smrg &sctx->dmesg_timestamp, &addr)) 1140b8e80941Smrg return; 1141b8e80941Smrg 1142b8e80941Smrg f = dd_get_debug_file(false); 1143b8e80941Smrg if (!f) 1144b8e80941Smrg return; 1145b8e80941Smrg 1146b8e80941Smrg fprintf(f, "VM fault report.\n\n"); 1147b8e80941Smrg if (os_get_command_line(cmd_line, sizeof(cmd_line))) 1148b8e80941Smrg fprintf(f, "Command: %s\n", cmd_line); 1149b8e80941Smrg fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen)); 1150b8e80941Smrg fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen)); 1151b8e80941Smrg fprintf(f, "Device name: %s\n\n", screen->get_name(screen)); 1152b8e80941Smrg fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr); 1153b8e80941Smrg 1154b8e80941Smrg if (sctx->apitrace_call_number) 1155b8e80941Smrg fprintf(f, "Last apitrace call: %u\n\n", 1156b8e80941Smrg sctx->apitrace_call_number); 1157b8e80941Smrg 1158b8e80941Smrg switch (ring) { 1159b8e80941Smrg case RING_GFX: { 1160b8e80941Smrg struct u_log_context log; 1161b8e80941Smrg u_log_context_init(&log); 1162b8e80941Smrg 1163b8e80941Smrg si_log_draw_state(sctx, &log); 1164b8e80941Smrg si_log_compute_state(sctx, &log); 1165b8e80941Smrg si_log_cs(sctx, &log, true); 1166b8e80941Smrg 1167b8e80941Smrg u_log_new_page_print(&log, f); 1168b8e80941Smrg u_log_context_destroy(&log); 1169b8e80941Smrg break; 1170b8e80941Smrg } 1171b8e80941Smrg case RING_DMA: 1172b8e80941Smrg si_dump_dma(sctx, saved, f); 1173b8e80941Smrg break; 1174b8e80941Smrg 1175b8e80941Smrg default: 1176b8e80941Smrg break; 1177b8e80941Smrg } 1178b8e80941Smrg 1179b8e80941Smrg fclose(f); 1180b8e80941Smrg 1181b8e80941Smrg fprintf(stderr, "Detected a VM fault, exiting...\n"); 1182b8e80941Smrg exit(0); 1183b8e80941Smrg} 1184b8e80941Smrg 1185b8e80941Smrgvoid si_init_debug_functions(struct si_context *sctx) 1186b8e80941Smrg{ 1187b8e80941Smrg sctx->b.dump_debug_state = si_dump_debug_state; 1188b8e80941Smrg 1189b8e80941Smrg /* Set the initial dmesg timestamp for this context, so that 1190b8e80941Smrg * only new messages will be checked for VM faults. 1191b8e80941Smrg */ 1192b8e80941Smrg if (sctx->screen->debug_flags & DBG(CHECK_VM)) 1193b8e80941Smrg ac_vm_fault_occured(sctx->chip_class, 1194b8e80941Smrg &sctx->dmesg_timestamp, NULL); 1195b8e80941Smrg} 1196