1/* 2 * Copyright © 2016 Red Hat. 3 * Copyright © 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright © 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28#include <stdio.h> 29#include <stdlib.h> 30#ifndef _WIN32 31#include <sys/utsname.h> 32#endif 33#include <sys/stat.h> 34 35#include "util/mesa-sha1.h" 36#include "ac_debug.h" 37#include "radv_debug.h" 38#include "radv_shader.h" 39#include "sid.h" 40 41#define TRACE_BO_SIZE 4096 42#define TMA_BO_SIZE 4096 43 44#define COLOR_RESET "\033[0m" 45#define COLOR_RED "\033[31m" 46#define COLOR_GREEN "\033[1;32m" 47#define COLOR_YELLOW "\033[1;33m" 48#define COLOR_CYAN "\033[1;36m" 49 50#define RADV_DUMP_DIR "radv_dumps" 51 52/* Trace BO layout (offsets are 4 bytes): 53 * 54 * [0]: primary trace ID 55 * [1]: secondary trace ID 56 * [2-3]: 64-bit GFX ring pipeline pointer 57 * [4-5]: 64-bit COMPUTE ring pipeline pointer 58 * [6-7]: Vertex descriptors pointer 59 * [8-9]: 64-bit descriptor set #0 pointer 60 * ... 61 * [68-69]: 64-bit descriptor set #31 pointer 62 */ 63 64bool 65radv_init_trace(struct radv_device *device) 66{ 67 struct radeon_winsys *ws = device->ws; 68 VkResult result; 69 70 result = ws->buffer_create( 71 ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM, 72 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, 73 RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo); 74 if (result != VK_SUCCESS) 75 return false; 76 77 result = ws->buffer_make_resident(ws, device->trace_bo, true); 78 if (result != VK_SUCCESS) 79 return false; 80 81 device->trace_id_ptr = ws->buffer_map(device->trace_bo); 82 if (!device->trace_id_ptr) 83 return false; 84 85 ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp, 86 NULL); 87 88 return true; 89} 90 91void 92radv_finish_trace(struct radv_device *device) 93{ 94 struct radeon_winsys *ws = device->ws; 95 96 if (unlikely(device->trace_bo)) { 97 ws->buffer_make_resident(ws, device->trace_bo, false); 98 ws->buffer_destroy(ws, device->trace_bo); 99 } 100} 101 102static void 103radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f) 104{ 105 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); 106 device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2); 107} 108 109static void 110radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset) 111{ 112 struct radeon_winsys *ws = device->ws; 113 uint32_t value; 114 115 if (ws->read_registers(ws, offset, 1, &value)) 116 ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0); 117} 118 119static void 120radv_dump_debug_registers(struct radv_device *device, FILE *f) 121{ 122 struct radeon_info *info = &device->physical_device->rad_info; 123 124 fprintf(f, "Memory-mapped registers:\n"); 125 radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS); 126 127 radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2); 128 radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0); 129 radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1); 130 radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2); 131 radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3); 132 radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG); 133 radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG); 134 if (info->chip_class <= GFX8) { 135 radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS); 136 radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2); 137 radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3); 138 } 139 radv_dump_mmapped_reg(device, f, R_008680_CP_STAT); 140 radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1); 141 radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2); 142 radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3); 143 radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS); 144 radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT); 145 radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1); 146 radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS); 147 radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT); 148 radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1); 149 fprintf(f, "\n"); 150} 151 152static void 153radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f) 154{ 155 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 156 for (unsigned j = 0; j < 4; j++) 157 ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff); 158} 159 160static void 161radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f) 162{ 163 unsigned sq_img_rsrc_word0 = 164 chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0; 165 166 fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n"); 167 for (unsigned j = 0; j < 8; j++) 168 ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff); 169 170 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); 171 for (unsigned j = 0; j < 8; j++) 172 ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff); 173} 174 175static void 176radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f) 177{ 178 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); 179 for (unsigned j = 0; j < 4; j++) { 180 ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff); 181 } 182} 183 184static void 185radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, 186 FILE *f) 187{ 188 radv_dump_image_descriptor(chip_class, desc, f); 189 radv_dump_sampler_descriptor(chip_class, desc + 16, f); 190} 191 192static void 193radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id, 194 FILE *f) 195{ 196 enum chip_class chip_class = device->physical_device->rad_info.chip_class; 197 const struct radv_descriptor_set_layout *layout; 198 int i; 199 200 if (!set) 201 return; 202 layout = set->header.layout; 203 204 for (i = 0; i < set->header.layout->binding_count; i++) { 205 uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4; 206 207 switch (layout->binding[i].type) { 208 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 209 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 210 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 211 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 212 radv_dump_buffer_descriptor(chip_class, desc, f); 213 break; 214 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 215 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 216 case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 217 radv_dump_image_descriptor(chip_class, desc, f); 218 break; 219 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 220 radv_dump_combined_image_sampler_descriptor(chip_class, desc, f); 221 break; 222 case VK_DESCRIPTOR_TYPE_SAMPLER: 223 radv_dump_sampler_descriptor(chip_class, desc, f); 224 break; 225 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 226 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 227 case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: 228 case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: 229 /* todo */ 230 break; 231 default: 232 assert(!"unknown descriptor type"); 233 break; 234 } 235 fprintf(f, "\n"); 236 } 237 fprintf(f, "\n\n"); 238} 239 240static void 241radv_dump_descriptors(struct radv_device *device, FILE *f) 242{ 243 uint64_t *ptr = (uint64_t *)device->trace_id_ptr; 244 int i; 245 246 fprintf(f, "Descriptors:\n"); 247 for (i = 0; i < MAX_SETS; i++) { 248 struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4); 249 250 radv_dump_descriptor_set(device, set, i, f); 251 } 252} 253 254struct radv_shader_inst { 255 char text[160]; /* one disasm line */ 256 unsigned offset; /* instruction offset */ 257 unsigned size; /* instruction size = 4 or 8 */ 258}; 259 260/* Split a disassembly string into lines and add them to the array pointed 261 * to by "instructions". */ 262static void 263si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, 264 struct radv_shader_inst *instructions) 265{ 266 struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL; 267 char *next; 268 269 while ((next = strchr(disasm, '\n'))) { 270 struct radv_shader_inst *inst = &instructions[*num]; 271 unsigned len = next - disasm; 272 273 if (!memchr(disasm, ';', len)) { 274 /* Ignore everything that is not an instruction. */ 275 disasm = next + 1; 276 continue; 277 } 278 279 assert(len < ARRAY_SIZE(inst->text)); 280 memcpy(inst->text, disasm, len); 281 inst->text[len] = 0; 282 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0; 283 284 const char *semicolon = strchr(disasm, ';'); 285 assert(semicolon); 286 /* More than 16 chars after ";" means the instruction is 8 bytes long. */ 287 inst->size = next - semicolon > 16 ? 8 : 4; 288 289 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, 290 " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset, 291 inst->size); 292 293 last_inst = inst; 294 (*num)++; 295 disasm = next + 1; 296 } 297} 298 299static void 300radv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage, 301 struct ac_wave_info *waves, unsigned num_waves, FILE *f) 302{ 303 uint64_t start_addr, end_addr; 304 unsigned i; 305 306 if (!shader) 307 return; 308 309 start_addr = radv_shader_variant_get_va(shader); 310 end_addr = start_addr + shader->code_size; 311 312 /* See if any wave executes the shader. */ 313 for (i = 0; i < num_waves; i++) { 314 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) 315 break; 316 } 317 318 if (i == num_waves) 319 return; /* the shader is not being executed */ 320 321 /* Remember the first found wave. The waves are sorted according to PC. */ 322 waves = &waves[i]; 323 num_waves -= i; 324 325 /* Get the list of instructions. 326 * Buffer size / 4 is the upper bound of the instruction count. 327 */ 328 unsigned num_inst = 0; 329 struct radv_shader_inst *instructions = 330 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); 331 332 si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); 333 334 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", 335 radv_get_shader_name(&shader->info, stage)); 336 337 /* Print instructions with annotations. */ 338 for (i = 0; i < num_inst; i++) { 339 struct radv_shader_inst *inst = &instructions[i]; 340 341 fprintf(f, "%s\n", inst->text); 342 343 /* Print which waves execute the instruction right now. */ 344 while (num_waves && start_addr + inst->offset == waves->pc) { 345 fprintf(f, 346 " " COLOR_GREEN "^ SE%u SH%u CU%u " 347 "SIMD%u WAVE%u EXEC=%016" PRIx64 " ", 348 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec); 349 350 if (inst->size == 4) { 351 fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0); 352 } else { 353 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1); 354 } 355 356 waves->matched = true; 357 waves = &waves[1]; 358 num_waves--; 359 } 360 } 361 362 fprintf(f, "\n\n"); 363 free(instructions); 364} 365 366static void 367radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, 368 FILE *f) 369{ 370 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; 371 enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class; 372 unsigned num_waves = ac_get_wave_info(chip_class, waves); 373 374 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); 375 376 /* Dump annotated active graphics shaders. */ 377 unsigned stages = active_stages; 378 while (stages) { 379 int stage = u_bit_scan(&stages); 380 381 radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f); 382 } 383 384 /* Print waves executing shaders that are not currently bound. */ 385 unsigned i; 386 bool found = false; 387 for (i = 0; i < num_waves; i++) { 388 if (waves[i].matched) 389 continue; 390 391 if (!found) { 392 fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n"); 393 found = true; 394 } 395 fprintf(f, 396 " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 397 "\n", 398 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec, 399 waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc); 400 } 401 if (found) 402 fprintf(f, "\n\n"); 403} 404 405static void 406radv_dump_spirv(struct radv_shader_variant *shader, const char *sha1, const char *dump_dir) 407{ 408 char dump_path[512]; 409 FILE *f; 410 411 snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1); 412 413 f = fopen(dump_path, "w+"); 414 if (f) { 415 fwrite(shader->spirv, shader->spirv_size, 1, f); 416 fclose(f); 417 } 418} 419 420static void 421radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader, 422 gl_shader_stage stage, const char *dump_dir, FILE *f) 423{ 424 if (!shader) 425 return; 426 427 fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage)); 428 429 if (shader->spirv) { 430 unsigned char sha1[21]; 431 char sha1buf[41]; 432 433 _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1); 434 _mesa_sha1_format(sha1buf, sha1); 435 436 fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf); 437 radv_dump_spirv(shader, sha1buf, dump_dir); 438 } 439 440 if (shader->nir_string) { 441 fprintf(f, "NIR:\n%s\n", shader->nir_string); 442 } 443 444 fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO", 445 shader->ir_string); 446 fprintf(f, "DISASM:\n%s\n", shader->disasm_string); 447 448 radv_dump_shader_stats(pipeline->device, pipeline, stage, f); 449} 450 451static void 452radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, 453 const char *dump_dir, FILE *f) 454{ 455 /* Dump active graphics shaders. */ 456 unsigned stages = active_stages; 457 while (stages) { 458 int stage = u_bit_scan(&stages); 459 460 radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f); 461 } 462} 463 464static void 465radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f) 466{ 467 void *ptr = (uint64_t *)pipeline->device->trace_id_ptr; 468 uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask); 469 uint32_t *vb_ptr = &((uint32_t *)ptr)[3]; 470 471 if (!count) 472 return; 473 474 fprintf(f, "Num vertex %s: %d\n", 475 pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count); 476 for (uint32_t i = 0; i < count; i++) { 477 uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; 478 uint64_t va = 0; 479 480 va |= desc[0]; 481 va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32; 482 483 fprintf(f, "VBO#%d:\n", i); 484 fprintf(f, "\tVA: 0x%" PRIx64 "\n", va); 485 fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1])); 486 fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]); 487 } 488} 489 490static struct radv_pipeline * 491radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring) 492{ 493 uint64_t *ptr = (uint64_t *)device->trace_id_ptr; 494 int offset = ring == RING_GFX ? 1 : 2; 495 496 return *(struct radv_pipeline **)(ptr + offset); 497} 498 499static void 500radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) 501{ 502 enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 503 struct radv_pipeline *pipeline; 504 505 fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE"); 506 507 pipeline = radv_get_saved_pipeline(queue->device, ring); 508 if (pipeline) { 509 radv_dump_shaders(pipeline, pipeline->active_stages, dump_dir, f); 510 if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) 511 radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f); 512 radv_dump_vertex_descriptors(pipeline, f); 513 radv_dump_descriptors(queue->device, f); 514 } 515} 516 517static void 518radv_dump_cmd(const char *cmd, FILE *f) 519{ 520#ifndef _WIN32 521 char line[2048]; 522 FILE *p; 523 524 p = popen(cmd, "r"); 525 if (p) { 526 while (fgets(line, sizeof(line), p)) 527 fputs(line, f); 528 fprintf(f, "\n"); 529 pclose(p); 530 } 531#endif 532} 533 534static void 535radv_dump_dmesg(FILE *f) 536{ 537 fprintf(f, "\nLast 60 lines of dmesg:\n\n"); 538 radv_dump_cmd("dmesg | tail -n60", f); 539} 540 541void 542radv_dump_enabled_options(struct radv_device *device, FILE *f) 543{ 544 uint64_t mask; 545 546 if (device->instance->debug_flags) { 547 fprintf(f, "Enabled debug options: "); 548 549 mask = device->instance->debug_flags; 550 while (mask) { 551 int i = u_bit_scan64(&mask); 552 fprintf(f, "%s, ", radv_get_debug_option_name(i)); 553 } 554 fprintf(f, "\n"); 555 } 556 557 if (device->instance->perftest_flags) { 558 fprintf(f, "Enabled perftest options: "); 559 560 mask = device->instance->perftest_flags; 561 while (mask) { 562 int i = u_bit_scan64(&mask); 563 fprintf(f, "%s, ", radv_get_perftest_option_name(i)); 564 } 565 fprintf(f, "\n"); 566 } 567} 568 569static void 570radv_dump_app_info(struct radv_device *device, FILE *f) 571{ 572 struct radv_instance *instance = device->instance; 573 574 fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name); 575 fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version); 576 fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name); 577 fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version); 578 fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version), 579 VK_VERSION_MINOR(instance->vk.app_info.api_version), 580 VK_VERSION_PATCH(instance->vk.app_info.api_version)); 581 582 radv_dump_enabled_options(device, f); 583} 584 585static void 586radv_dump_device_name(struct radv_device *device, FILE *f) 587{ 588 struct radeon_info *info = &device->physical_device->rad_info; 589#ifndef _WIN32 590 char kernel_version[128] = {0}; 591 struct utsname uname_data; 592#endif 593 const char *chip_name; 594 595 chip_name = device->ws->get_chip_name(device->ws); 596 597#ifdef _WIN32 598 fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name, 599 info->drm_major, info->drm_minor, info->drm_patchlevel); 600#else 601 if (uname(&uname_data) == 0) 602 snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); 603 604 fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name, 605 info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version); 606#endif 607} 608 609static void 610radv_dump_umr_ring(struct radv_queue *queue, FILE *f) 611{ 612 enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 613 struct radv_device *device = queue->device; 614 char cmd[128]; 615 616 /* TODO: Dump compute ring. */ 617 if (ring != RING_GFX) 618 return; 619 620 sprintf(cmd, "umr -R %s 2>&1", 621 device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx"); 622 623 fprintf(f, "\nUMR GFX ring:\n\n"); 624 radv_dump_cmd(cmd, f); 625} 626 627static void 628radv_dump_umr_waves(struct radv_queue *queue, FILE *f) 629{ 630 enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 631 struct radv_device *device = queue->device; 632 char cmd[128]; 633 634 /* TODO: Dump compute ring. */ 635 if (ring != RING_GFX) 636 return; 637 638 sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1", 639 device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx"); 640 641 fprintf(f, "\nUMR GFX waves:\n\n"); 642 radv_dump_cmd(cmd, f); 643} 644 645static bool 646radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring) 647{ 648 struct radeon_winsys *ws = queue->device->ws; 649 650 if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family)) 651 return true; 652 653 return false; 654} 655 656void 657radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) 658{ 659 struct radv_device *device = queue->device; 660 enum ring_type ring; 661 uint64_t addr; 662 663 ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 664 665 bool hang_occurred = radv_gpu_hang_occured(queue, ring); 666 bool vm_fault_occurred = false; 667 if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS) 668 vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class, 669 &device->dmesg_timestamp, &addr); 670 if (!hang_occurred && !vm_fault_occurred) 671 return; 672 673 fprintf(stderr, "radv: GPU hang detected...\n"); 674 675#ifndef _WIN32 676 /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save 677 * various debugging info about that GPU hang. 678 */ 679 struct tm *timep, result; 680 time_t raw_time; 681 FILE *f; 682 char dump_dir[256], dump_path[512], buf_time[128]; 683 684 time(&raw_time); 685 timep = os_localtime(&raw_time, &result); 686 strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep); 687 688 snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), 689 getpid(), buf_time); 690 if (mkdir(dump_dir, 0774) && errno != EEXIST) { 691 fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno); 692 abort(); 693 } 694 695 fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir); 696 697 /* Dump trace file. */ 698 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log"); 699 f = fopen(dump_path, "w+"); 700 if (f) { 701 radv_dump_trace(queue->device, cs, f); 702 fclose(f); 703 } 704 705 /* Dump pipeline state. */ 706 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log"); 707 f = fopen(dump_path, "w+"); 708 if (f) { 709 radv_dump_queue_state(queue, dump_dir, f); 710 fclose(f); 711 } 712 713 if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) { 714 /* Dump UMR ring. */ 715 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log"); 716 f = fopen(dump_path, "w+"); 717 if (f) { 718 radv_dump_umr_ring(queue, f); 719 fclose(f); 720 } 721 722 /* Dump UMR waves. */ 723 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log"); 724 f = fopen(dump_path, "w+"); 725 if (f) { 726 radv_dump_umr_waves(queue, f); 727 fclose(f); 728 } 729 } 730 731 /* Dump debug registers. */ 732 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log"); 733 f = fopen(dump_path, "w+"); 734 if (f) { 735 radv_dump_debug_registers(device, f); 736 fclose(f); 737 } 738 739 /* Dump BO ranges. */ 740 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log"); 741 f = fopen(dump_path, "w+"); 742 if (f) { 743 device->ws->dump_bo_ranges(device->ws, f); 744 fclose(f); 745 } 746 747 /* Dump BO log. */ 748 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log"); 749 f = fopen(dump_path, "w+"); 750 if (f) { 751 device->ws->dump_bo_log(device->ws, f); 752 fclose(f); 753 } 754 755 /* Dump VM fault info. */ 756 if (vm_fault_occurred) { 757 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log"); 758 f = fopen(dump_path, "w+"); 759 if (f) { 760 fprintf(f, "VM fault report.\n\n"); 761 fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr); 762 fclose(f); 763 } 764 } 765 766 /* Dump app info. */ 767 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log"); 768 f = fopen(dump_path, "w+"); 769 if (f) { 770 radv_dump_app_info(device, f); 771 fclose(f); 772 } 773 774 /* Dump GPU info. */ 775 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log"); 776 f = fopen(dump_path, "w+"); 777 if (f) { 778 radv_dump_device_name(device, f); 779 ac_print_gpu_info(&device->physical_device->rad_info, f); 780 fclose(f); 781 } 782 783 /* Dump dmesg. */ 784 snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log"); 785 f = fopen(dump_path, "w+"); 786 if (f) { 787 radv_dump_dmesg(f); 788 fclose(f); 789 } 790#endif 791 792 fprintf(stderr, "radv: GPU hang report saved successfully!\n"); 793 abort(); 794} 795 796void 797radv_print_spirv(const char *data, uint32_t size, FILE *fp) 798{ 799#ifndef _WIN32 800 char path[] = "/tmp/fileXXXXXX"; 801 char command[128]; 802 int fd; 803 804 /* Dump the binary into a temporary file. */ 805 fd = mkstemp(path); 806 if (fd < 0) 807 return; 808 809 if (write(fd, data, size) == -1) 810 goto fail; 811 812 /* Disassemble using spirv-dis if installed. */ 813 sprintf(command, "spirv-dis %s", path); 814 radv_dump_cmd(command, fp); 815 816fail: 817 close(fd); 818 unlink(path); 819#endif 820} 821 822bool 823radv_trap_handler_init(struct radv_device *device) 824{ 825 struct radeon_winsys *ws = device->ws; 826 VkResult result; 827 828 /* Create the trap handler shader and upload it like other shaders. */ 829 device->trap_handler_shader = radv_create_trap_handler_shader(device); 830 if (!device->trap_handler_shader) { 831 fprintf(stderr, "radv: failed to create the trap handler shader.\n"); 832 return false; 833 } 834 835 result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true); 836 if (result != VK_SUCCESS) 837 return false; 838 839 result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM, 840 RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | 841 RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT, 842 RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo); 843 if (result != VK_SUCCESS) 844 return false; 845 846 result = ws->buffer_make_resident(ws, device->tma_bo, true); 847 if (result != VK_SUCCESS) 848 return false; 849 850 device->tma_ptr = ws->buffer_map(device->tma_bo); 851 if (!device->tma_ptr) 852 return false; 853 854 /* Upload a buffer descriptor to store various info from the trap. */ 855 uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16; 856 uint32_t desc[4]; 857 858 desc[0] = tma_va; 859 desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32); 860 desc[2] = TMA_BO_SIZE; 861 desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 862 S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 863 S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 864 865 memcpy(device->tma_ptr, desc, sizeof(desc)); 866 867 return true; 868} 869 870void 871radv_trap_handler_finish(struct radv_device *device) 872{ 873 struct radeon_winsys *ws = device->ws; 874 875 if (unlikely(device->trap_handler_shader)) { 876 ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false); 877 radv_shader_variant_destroy(device, device->trap_handler_shader); 878 } 879 880 if (unlikely(device->tma_bo)) { 881 ws->buffer_make_resident(ws, device->tma_bo, false); 882 ws->buffer_destroy(ws, device->tma_bo); 883 } 884} 885 886static void 887radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc) 888{ 889 struct radv_shader_variant *shader; 890 uint64_t start_addr, end_addr; 891 uint32_t instr_offset; 892 893 shader = radv_find_shader_variant(device, faulty_pc); 894 if (!shader) 895 return; 896 897 start_addr = radv_shader_variant_get_va(shader); 898 end_addr = start_addr + shader->code_size; 899 instr_offset = faulty_pc - start_addr; 900 901 fprintf(stderr, 902 "Faulty shader found " 903 "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n", 904 start_addr, end_addr, instr_offset); 905 906 /* Get the list of instructions. 907 * Buffer size / 4 is the upper bound of the instruction count. 908 */ 909 unsigned num_inst = 0; 910 struct radv_shader_inst *instructions = 911 calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); 912 913 /* Split the disassembly string into instructions. */ 914 si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); 915 916 /* Print instructions with annotations. */ 917 for (unsigned i = 0; i < num_inst; i++) { 918 struct radv_shader_inst *inst = &instructions[i]; 919 920 if (start_addr + inst->offset == faulty_pc) { 921 fprintf(stderr, "\n!!! Faulty instruction below !!!\n"); 922 fprintf(stderr, "%s\n", inst->text); 923 fprintf(stderr, "\n"); 924 } else { 925 fprintf(stderr, "%s\n", inst->text); 926 } 927 } 928 929 free(instructions); 930} 931 932struct radv_sq_hw_reg { 933 uint32_t status; 934 uint32_t trap_sts; 935 uint32_t hw_id; 936 uint32_t ib_sts; 937}; 938 939static void 940radv_dump_sq_hw_regs(struct radv_device *device) 941{ 942 struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6]; 943 944 fprintf(stderr, "\nHardware registers:\n"); 945 if (device->physical_device->rad_info.chip_class >= GFX10) { 946 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS, 947 regs->status, ~0); 948 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS, 949 regs->trap_sts, ~0); 950 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1, 951 regs->hw_id, ~0); 952 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS, 953 regs->ib_sts, ~0); 954 } else { 955 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS, 956 regs->status, ~0); 957 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS, 958 regs->trap_sts, ~0); 959 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID, 960 regs->hw_id, ~0); 961 ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS, 962 regs->ib_sts, ~0); 963 } 964 fprintf(stderr, "\n\n"); 965} 966 967void 968radv_check_trap_handler(struct radv_queue *queue) 969{ 970 enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 971 struct radv_device *device = queue->device; 972 struct radeon_winsys *ws = device->ws; 973 974 /* Wait for the context to be idle in a finite time. */ 975 ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family); 976 977 /* Try to detect if the trap handler has been reached by the hw by 978 * looking at ttmp0 which should be non-zero if a shader exception 979 * happened. 980 */ 981 if (!device->tma_ptr[4]) 982 return; 983 984#if 0 985 fprintf(stderr, "tma_ptr:\n"); 986 for (unsigned i = 0; i < 10; i++) 987 fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]); 988#endif 989 990 radv_dump_sq_hw_regs(device); 991 992 uint32_t ttmp0 = device->tma_ptr[4]; 993 uint32_t ttmp1 = device->tma_ptr[5]; 994 995 /* According to the ISA docs, 3.10 Trap and Exception Registers: 996 * 997 * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}" 998 * 999 * "When the trap handler is entered, the PC of the faulting 1000 * instruction is: (PC - PC_rewind * 4)." 1001 * */ 1002 uint8_t trap_id = (ttmp1 >> 16) & 0xff; 1003 uint8_t ht = (ttmp1 >> 24) & 0x1; 1004 uint8_t pc_rewind = (ttmp1 >> 25) & 0xf; 1005 uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4); 1006 1007 fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, 1008 pc_rewind); 1009 1010 radv_dump_faulty_shader(device, pc); 1011 1012 abort(); 1013} 1014