101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2016 Red Hat. 301e04c3fSmrg * Copyright © 2016 Bas Nieuwenhuizen 401e04c3fSmrg * 501e04c3fSmrg * based in part on anv driver which is: 601e04c3fSmrg * Copyright © 2015 Intel Corporation 701e04c3fSmrg * 801e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 901e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 1001e04c3fSmrg * to deal in the Software without restriction, including without limitation 1101e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 1201e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1301e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1401e04c3fSmrg * 1501e04c3fSmrg * The above copyright notice and this permission notice (including the next 1601e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1701e04c3fSmrg * Software. 1801e04c3fSmrg * 1901e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 2001e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 2101e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 2201e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2301e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2401e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2501e04c3fSmrg * IN THE SOFTWARE. 2601e04c3fSmrg */ 2701e04c3fSmrg 2801e04c3fSmrg#include <stdio.h> 297ec681f3Smrg#include <stdlib.h> 307ec681f3Smrg#ifndef _WIN32 3101e04c3fSmrg#include <sys/utsname.h> 327ec681f3Smrg#endif 337ec681f3Smrg#include <sys/stat.h> 3401e04c3fSmrg 3501e04c3fSmrg#include "util/mesa-sha1.h" 3601e04c3fSmrg#include "ac_debug.h" 3701e04c3fSmrg#include "radv_debug.h" 3801e04c3fSmrg#include "radv_shader.h" 397ec681f3Smrg#include "sid.h" 4001e04c3fSmrg 4101e04c3fSmrg#define TRACE_BO_SIZE 4096 427ec681f3Smrg#define TMA_BO_SIZE 4096 437ec681f3Smrg 447ec681f3Smrg#define COLOR_RESET "\033[0m" 457ec681f3Smrg#define COLOR_RED "\033[31m" 467ec681f3Smrg#define COLOR_GREEN "\033[1;32m" 477ec681f3Smrg#define COLOR_YELLOW "\033[1;33m" 487ec681f3Smrg#define COLOR_CYAN "\033[1;36m" 4901e04c3fSmrg 507ec681f3Smrg#define RADV_DUMP_DIR "radv_dumps" 5101e04c3fSmrg 5201e04c3fSmrg/* Trace BO layout (offsets are 4 bytes): 5301e04c3fSmrg * 5401e04c3fSmrg * [0]: primary trace ID 5501e04c3fSmrg * [1]: secondary trace ID 567ec681f3Smrg * [2-3]: 64-bit GFX ring pipeline pointer 577ec681f3Smrg * [4-5]: 64-bit COMPUTE ring pipeline pointer 587ec681f3Smrg * [6-7]: Vertex descriptors pointer 597ec681f3Smrg * [8-9]: 64-bit descriptor set #0 pointer 6001e04c3fSmrg * ... 6101e04c3fSmrg * [68-69]: 64-bit descriptor set #31 pointer 6201e04c3fSmrg */ 6301e04c3fSmrg 6401e04c3fSmrgbool 6501e04c3fSmrgradv_init_trace(struct radv_device *device) 6601e04c3fSmrg{ 677ec681f3Smrg struct radeon_winsys *ws = device->ws; 687ec681f3Smrg VkResult result; 6901e04c3fSmrg 707ec681f3Smrg result = ws->buffer_create( 717ec681f3Smrg ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM, 727ec681f3Smrg RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM, 737ec681f3Smrg RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo); 747ec681f3Smrg if (result != VK_SUCCESS) 757ec681f3Smrg return false; 7601e04c3fSmrg 777ec681f3Smrg result = ws->buffer_make_resident(ws, device->trace_bo, true); 787ec681f3Smrg if (result != VK_SUCCESS) 797ec681f3Smrg return false; 8001e04c3fSmrg 817ec681f3Smrg device->trace_id_ptr = ws->buffer_map(device->trace_bo); 827ec681f3Smrg if (!device->trace_id_ptr) 837ec681f3Smrg return false; 8401e04c3fSmrg 857ec681f3Smrg ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp, 867ec681f3Smrg NULL); 8701e04c3fSmrg 887ec681f3Smrg return true; 8901e04c3fSmrg} 9001e04c3fSmrg 917ec681f3Smrgvoid 927ec681f3Smrgradv_finish_trace(struct radv_device *device) 9301e04c3fSmrg{ 947ec681f3Smrg struct radeon_winsys *ws = device->ws; 9501e04c3fSmrg 967ec681f3Smrg if (unlikely(device->trace_bo)) { 977ec681f3Smrg ws->buffer_make_resident(ws, device->trace_bo, false); 987ec681f3Smrg ws->buffer_destroy(ws, device->trace_bo); 997ec681f3Smrg } 1007ec681f3Smrg} 10101e04c3fSmrg 1027ec681f3Smrgstatic void 1037ec681f3Smrgradv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f) 1047ec681f3Smrg{ 1057ec681f3Smrg fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); 1067ec681f3Smrg device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2); 10701e04c3fSmrg} 10801e04c3fSmrg 10901e04c3fSmrgstatic void 11001e04c3fSmrgradv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset) 11101e04c3fSmrg{ 1127ec681f3Smrg struct radeon_winsys *ws = device->ws; 1137ec681f3Smrg uint32_t value; 11401e04c3fSmrg 1157ec681f3Smrg if (ws->read_registers(ws, offset, 1, &value)) 1167ec681f3Smrg ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0); 11701e04c3fSmrg} 11801e04c3fSmrg 11901e04c3fSmrgstatic void 12001e04c3fSmrgradv_dump_debug_registers(struct radv_device *device, FILE *f) 12101e04c3fSmrg{ 1227ec681f3Smrg struct radeon_info *info = &device->physical_device->rad_info; 1237ec681f3Smrg 1247ec681f3Smrg fprintf(f, "Memory-mapped registers:\n"); 1257ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS); 1267ec681f3Smrg 1277ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2); 1287ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0); 1297ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1); 1307ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2); 1317ec681f3Smrg radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3); 1327ec681f3Smrg radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG); 1337ec681f3Smrg radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG); 1347ec681f3Smrg if (info->chip_class <= GFX8) { 1357ec681f3Smrg radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS); 1367ec681f3Smrg radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2); 1377ec681f3Smrg radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3); 1387ec681f3Smrg } 1397ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008680_CP_STAT); 1407ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1); 1417ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2); 1427ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3); 1437ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS); 1447ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT); 1457ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1); 1467ec681f3Smrg radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS); 1477ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT); 1487ec681f3Smrg radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1); 1497ec681f3Smrg fprintf(f, "\n"); 15001e04c3fSmrg} 15101e04c3fSmrg 15201e04c3fSmrgstatic void 1537ec681f3Smrgradv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f) 15401e04c3fSmrg{ 1557ec681f3Smrg fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 1567ec681f3Smrg for (unsigned j = 0; j < 4; j++) 1577ec681f3Smrg ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff); 15801e04c3fSmrg} 15901e04c3fSmrg 16001e04c3fSmrgstatic void 1617ec681f3Smrgradv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f) 16201e04c3fSmrg{ 1637ec681f3Smrg unsigned sq_img_rsrc_word0 = 1647ec681f3Smrg chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0; 1657ec681f3Smrg 1667ec681f3Smrg fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n"); 1677ec681f3Smrg for (unsigned j = 0; j < 8; j++) 1687ec681f3Smrg ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff); 16901e04c3fSmrg 1707ec681f3Smrg fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); 1717ec681f3Smrg for (unsigned j = 0; j < 8; j++) 1727ec681f3Smrg ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff); 17301e04c3fSmrg} 17401e04c3fSmrg 17501e04c3fSmrgstatic void 1767ec681f3Smrgradv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f) 17701e04c3fSmrg{ 1787ec681f3Smrg fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); 1797ec681f3Smrg for (unsigned j = 0; j < 4; j++) { 1807ec681f3Smrg ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff); 1817ec681f3Smrg } 18201e04c3fSmrg} 18301e04c3fSmrg 18401e04c3fSmrgstatic void 1857ec681f3Smrgradv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, 1867ec681f3Smrg FILE *f) 18701e04c3fSmrg{ 1887ec681f3Smrg radv_dump_image_descriptor(chip_class, desc, f); 1897ec681f3Smrg radv_dump_sampler_descriptor(chip_class, desc + 16, f); 19001e04c3fSmrg} 19101e04c3fSmrg 19201e04c3fSmrgstatic void 1937ec681f3Smrgradv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id, 1947ec681f3Smrg FILE *f) 1957ec681f3Smrg{ 1967ec681f3Smrg enum chip_class chip_class = device->physical_device->rad_info.chip_class; 1977ec681f3Smrg const struct radv_descriptor_set_layout *layout; 1987ec681f3Smrg int i; 1997ec681f3Smrg 2007ec681f3Smrg if (!set) 2017ec681f3Smrg return; 2027ec681f3Smrg layout = set->header.layout; 2037ec681f3Smrg 2047ec681f3Smrg for (i = 0; i < set->header.layout->binding_count; i++) { 2057ec681f3Smrg uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4; 2067ec681f3Smrg 2077ec681f3Smrg switch (layout->binding[i].type) { 2087ec681f3Smrg case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER: 2097ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: 2107ec681f3Smrg case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER: 2117ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER: 2127ec681f3Smrg radv_dump_buffer_descriptor(chip_class, desc, f); 2137ec681f3Smrg break; 2147ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE: 2157ec681f3Smrg case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE: 2167ec681f3Smrg case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT: 2177ec681f3Smrg radv_dump_image_descriptor(chip_class, desc, f); 2187ec681f3Smrg break; 2197ec681f3Smrg case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER: 2207ec681f3Smrg radv_dump_combined_image_sampler_descriptor(chip_class, desc, f); 2217ec681f3Smrg break; 2227ec681f3Smrg case VK_DESCRIPTOR_TYPE_SAMPLER: 2237ec681f3Smrg radv_dump_sampler_descriptor(chip_class, desc, f); 2247ec681f3Smrg break; 2257ec681f3Smrg case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC: 2267ec681f3Smrg case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC: 2277ec681f3Smrg case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE: 2287ec681f3Smrg case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR: 2297ec681f3Smrg /* todo */ 2307ec681f3Smrg break; 2317ec681f3Smrg default: 2327ec681f3Smrg assert(!"unknown descriptor type"); 2337ec681f3Smrg break; 2347ec681f3Smrg } 2357ec681f3Smrg fprintf(f, "\n"); 2367ec681f3Smrg } 2377ec681f3Smrg fprintf(f, "\n\n"); 23801e04c3fSmrg} 23901e04c3fSmrg 24001e04c3fSmrgstatic void 2417ec681f3Smrgradv_dump_descriptors(struct radv_device *device, FILE *f) 24201e04c3fSmrg{ 2437ec681f3Smrg uint64_t *ptr = (uint64_t *)device->trace_id_ptr; 2447ec681f3Smrg int i; 24501e04c3fSmrg 2467ec681f3Smrg fprintf(f, "Descriptors:\n"); 2477ec681f3Smrg for (i = 0; i < MAX_SETS; i++) { 2487ec681f3Smrg struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4); 24901e04c3fSmrg 2507ec681f3Smrg radv_dump_descriptor_set(device, set, i, f); 2517ec681f3Smrg } 25201e04c3fSmrg} 25301e04c3fSmrg 25401e04c3fSmrgstruct radv_shader_inst { 2557ec681f3Smrg char text[160]; /* one disasm line */ 2567ec681f3Smrg unsigned offset; /* instruction offset */ 2577ec681f3Smrg unsigned size; /* instruction size = 4 or 8 */ 25801e04c3fSmrg}; 25901e04c3fSmrg 26001e04c3fSmrg/* Split a disassembly string into lines and add them to the array pointed 26101e04c3fSmrg * to by "instructions". */ 2627ec681f3Smrgstatic void 2637ec681f3Smrgsi_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num, 2647ec681f3Smrg struct radv_shader_inst *instructions) 26501e04c3fSmrg{ 2667ec681f3Smrg struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL; 2677ec681f3Smrg char *next; 2687ec681f3Smrg 2697ec681f3Smrg while ((next = strchr(disasm, '\n'))) { 2707ec681f3Smrg struct radv_shader_inst *inst = &instructions[*num]; 2717ec681f3Smrg unsigned len = next - disasm; 2727ec681f3Smrg 2737ec681f3Smrg if (!memchr(disasm, ';', len)) { 2747ec681f3Smrg /* Ignore everything that is not an instruction. */ 2757ec681f3Smrg disasm = next + 1; 2767ec681f3Smrg continue; 2777ec681f3Smrg } 2787ec681f3Smrg 2797ec681f3Smrg assert(len < ARRAY_SIZE(inst->text)); 2807ec681f3Smrg memcpy(inst->text, disasm, len); 2817ec681f3Smrg inst->text[len] = 0; 2827ec681f3Smrg inst->offset = last_inst ? last_inst->offset + last_inst->size : 0; 2837ec681f3Smrg 2847ec681f3Smrg const char *semicolon = strchr(disasm, ';'); 2857ec681f3Smrg assert(semicolon); 2867ec681f3Smrg /* More than 16 chars after ";" means the instruction is 8 bytes long. */ 2877ec681f3Smrg inst->size = next - semicolon > 16 ? 8 : 4; 2887ec681f3Smrg 2897ec681f3Smrg snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, 2907ec681f3Smrg " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset, 2917ec681f3Smrg inst->size); 2927ec681f3Smrg 2937ec681f3Smrg last_inst = inst; 2947ec681f3Smrg (*num)++; 2957ec681f3Smrg disasm = next + 1; 2967ec681f3Smrg } 29701e04c3fSmrg} 29801e04c3fSmrg 29901e04c3fSmrgstatic void 3007ec681f3Smrgradv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage, 3017ec681f3Smrg struct ac_wave_info *waves, unsigned num_waves, FILE *f) 30201e04c3fSmrg{ 3037ec681f3Smrg uint64_t start_addr, end_addr; 3047ec681f3Smrg unsigned i; 3057ec681f3Smrg 3067ec681f3Smrg if (!shader) 3077ec681f3Smrg return; 3087ec681f3Smrg 3097ec681f3Smrg start_addr = radv_shader_variant_get_va(shader); 3107ec681f3Smrg end_addr = start_addr + shader->code_size; 3117ec681f3Smrg 3127ec681f3Smrg /* See if any wave executes the shader. */ 3137ec681f3Smrg for (i = 0; i < num_waves; i++) { 3147ec681f3Smrg if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) 3157ec681f3Smrg break; 3167ec681f3Smrg } 3177ec681f3Smrg 3187ec681f3Smrg if (i == num_waves) 3197ec681f3Smrg return; /* the shader is not being executed */ 3207ec681f3Smrg 3217ec681f3Smrg /* Remember the first found wave. The waves are sorted according to PC. */ 3227ec681f3Smrg waves = &waves[i]; 3237ec681f3Smrg num_waves -= i; 3247ec681f3Smrg 3257ec681f3Smrg /* Get the list of instructions. 3267ec681f3Smrg * Buffer size / 4 is the upper bound of the instruction count. 3277ec681f3Smrg */ 3287ec681f3Smrg unsigned num_inst = 0; 3297ec681f3Smrg struct radv_shader_inst *instructions = 3307ec681f3Smrg calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); 3317ec681f3Smrg 3327ec681f3Smrg si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); 3337ec681f3Smrg 3347ec681f3Smrg fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", 3357ec681f3Smrg radv_get_shader_name(&shader->info, stage)); 3367ec681f3Smrg 3377ec681f3Smrg /* Print instructions with annotations. */ 3387ec681f3Smrg for (i = 0; i < num_inst; i++) { 3397ec681f3Smrg struct radv_shader_inst *inst = &instructions[i]; 3407ec681f3Smrg 3417ec681f3Smrg fprintf(f, "%s\n", inst->text); 3427ec681f3Smrg 3437ec681f3Smrg /* Print which waves execute the instruction right now. */ 3447ec681f3Smrg while (num_waves && start_addr + inst->offset == waves->pc) { 3457ec681f3Smrg fprintf(f, 3467ec681f3Smrg " " COLOR_GREEN "^ SE%u SH%u CU%u " 3477ec681f3Smrg "SIMD%u WAVE%u EXEC=%016" PRIx64 " ", 3487ec681f3Smrg waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec); 3497ec681f3Smrg 3507ec681f3Smrg if (inst->size == 4) { 3517ec681f3Smrg fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0); 3527ec681f3Smrg } else { 3537ec681f3Smrg fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1); 3547ec681f3Smrg } 3557ec681f3Smrg 3567ec681f3Smrg waves->matched = true; 3577ec681f3Smrg waves = &waves[1]; 3587ec681f3Smrg num_waves--; 3597ec681f3Smrg } 3607ec681f3Smrg } 3617ec681f3Smrg 3627ec681f3Smrg fprintf(f, "\n\n"); 3637ec681f3Smrg free(instructions); 3647ec681f3Smrg} 36501e04c3fSmrg 3667ec681f3Smrgstatic void 3677ec681f3Smrgradv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, 3687ec681f3Smrg FILE *f) 3697ec681f3Smrg{ 3707ec681f3Smrg struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; 3717ec681f3Smrg enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class; 3727ec681f3Smrg unsigned num_waves = ac_get_wave_info(chip_class, waves); 3737ec681f3Smrg 3747ec681f3Smrg fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves); 3757ec681f3Smrg 3767ec681f3Smrg /* Dump annotated active graphics shaders. */ 3777ec681f3Smrg unsigned stages = active_stages; 3787ec681f3Smrg while (stages) { 3797ec681f3Smrg int stage = u_bit_scan(&stages); 3807ec681f3Smrg 3817ec681f3Smrg radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f); 3827ec681f3Smrg } 3837ec681f3Smrg 3847ec681f3Smrg /* Print waves executing shaders that are not currently bound. */ 3857ec681f3Smrg unsigned i; 3867ec681f3Smrg bool found = false; 3877ec681f3Smrg for (i = 0; i < num_waves; i++) { 3887ec681f3Smrg if (waves[i].matched) 3897ec681f3Smrg continue; 3907ec681f3Smrg 3917ec681f3Smrg if (!found) { 3927ec681f3Smrg fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n"); 3937ec681f3Smrg found = true; 3947ec681f3Smrg } 3957ec681f3Smrg fprintf(f, 3967ec681f3Smrg " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64 3977ec681f3Smrg "\n", 3987ec681f3Smrg waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec, 3997ec681f3Smrg waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc); 4007ec681f3Smrg } 4017ec681f3Smrg if (found) 4027ec681f3Smrg fprintf(f, "\n\n"); 40301e04c3fSmrg} 40401e04c3fSmrg 40501e04c3fSmrgstatic void 4067ec681f3Smrgradv_dump_spirv(struct radv_shader_variant *shader, const char *sha1, const char *dump_dir) 4077ec681f3Smrg{ 4087ec681f3Smrg char dump_path[512]; 4097ec681f3Smrg FILE *f; 4107ec681f3Smrg 4117ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1); 4127ec681f3Smrg 4137ec681f3Smrg f = fopen(dump_path, "w+"); 4147ec681f3Smrg if (f) { 4157ec681f3Smrg fwrite(shader->spirv, shader->spirv_size, 1, f); 4167ec681f3Smrg fclose(f); 4177ec681f3Smrg } 41801e04c3fSmrg} 41901e04c3fSmrg 42001e04c3fSmrgstatic void 4217ec681f3Smrgradv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader, 4227ec681f3Smrg gl_shader_stage stage, const char *dump_dir, FILE *f) 42301e04c3fSmrg{ 4247ec681f3Smrg if (!shader) 4257ec681f3Smrg return; 42601e04c3fSmrg 4277ec681f3Smrg fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage)); 42801e04c3fSmrg 4297ec681f3Smrg if (shader->spirv) { 4307ec681f3Smrg unsigned char sha1[21]; 4317ec681f3Smrg char sha1buf[41]; 43201e04c3fSmrg 4337ec681f3Smrg _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1); 4347ec681f3Smrg _mesa_sha1_format(sha1buf, sha1); 43501e04c3fSmrg 4367ec681f3Smrg fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf); 4377ec681f3Smrg radv_dump_spirv(shader, sha1buf, dump_dir); 4387ec681f3Smrg } 43901e04c3fSmrg 4407ec681f3Smrg if (shader->nir_string) { 4417ec681f3Smrg fprintf(f, "NIR:\n%s\n", shader->nir_string); 4427ec681f3Smrg } 44301e04c3fSmrg 4447ec681f3Smrg fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO", 4457ec681f3Smrg shader->ir_string); 4467ec681f3Smrg fprintf(f, "DISASM:\n%s\n", shader->disasm_string); 44701e04c3fSmrg 4487ec681f3Smrg radv_dump_shader_stats(pipeline->device, pipeline, stage, f); 44901e04c3fSmrg} 45001e04c3fSmrg 45101e04c3fSmrgstatic void 4527ec681f3Smrgradv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages, 4537ec681f3Smrg const char *dump_dir, FILE *f) 45401e04c3fSmrg{ 4557ec681f3Smrg /* Dump active graphics shaders. */ 4567ec681f3Smrg unsigned stages = active_stages; 4577ec681f3Smrg while (stages) { 4587ec681f3Smrg int stage = u_bit_scan(&stages); 45901e04c3fSmrg 4607ec681f3Smrg radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f); 4617ec681f3Smrg } 46201e04c3fSmrg} 46301e04c3fSmrg 46401e04c3fSmrgstatic void 4657ec681f3Smrgradv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f) 46601e04c3fSmrg{ 4677ec681f3Smrg void *ptr = (uint64_t *)pipeline->device->trace_id_ptr; 4687ec681f3Smrg uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask); 4697ec681f3Smrg uint32_t *vb_ptr = &((uint32_t *)ptr)[3]; 4707ec681f3Smrg 4717ec681f3Smrg if (!count) 4727ec681f3Smrg return; 4737ec681f3Smrg 4747ec681f3Smrg fprintf(f, "Num vertex %s: %d\n", 4757ec681f3Smrg pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count); 4767ec681f3Smrg for (uint32_t i = 0; i < count; i++) { 4777ec681f3Smrg uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4]; 4787ec681f3Smrg uint64_t va = 0; 4797ec681f3Smrg 4807ec681f3Smrg va |= desc[0]; 4817ec681f3Smrg va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32; 4827ec681f3Smrg 4837ec681f3Smrg fprintf(f, "VBO#%d:\n", i); 4847ec681f3Smrg fprintf(f, "\tVA: 0x%" PRIx64 "\n", va); 4857ec681f3Smrg fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1])); 4867ec681f3Smrg fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]); 4877ec681f3Smrg } 48801e04c3fSmrg} 48901e04c3fSmrg 4907ec681f3Smrgstatic struct radv_pipeline * 4917ec681f3Smrgradv_get_saved_pipeline(struct radv_device *device, enum ring_type ring) 49201e04c3fSmrg{ 4937ec681f3Smrg uint64_t *ptr = (uint64_t *)device->trace_id_ptr; 4947ec681f3Smrg int offset = ring == RING_GFX ? 1 : 2; 49501e04c3fSmrg 4967ec681f3Smrg return *(struct radv_pipeline **)(ptr + offset); 49701e04c3fSmrg} 49801e04c3fSmrg 49901e04c3fSmrgstatic void 5007ec681f3Smrgradv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f) 50101e04c3fSmrg{ 5027ec681f3Smrg enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 5037ec681f3Smrg struct radv_pipeline *pipeline; 5047ec681f3Smrg 5057ec681f3Smrg fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE"); 5067ec681f3Smrg 5077ec681f3Smrg pipeline = radv_get_saved_pipeline(queue->device, ring); 5087ec681f3Smrg if (pipeline) { 5097ec681f3Smrg radv_dump_shaders(pipeline, pipeline->active_stages, dump_dir, f); 5107ec681f3Smrg if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR)) 5117ec681f3Smrg radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f); 5127ec681f3Smrg radv_dump_vertex_descriptors(pipeline, f); 5137ec681f3Smrg radv_dump_descriptors(queue->device, f); 5147ec681f3Smrg } 51501e04c3fSmrg} 51601e04c3fSmrg 5177ec681f3Smrgstatic void 5187ec681f3Smrgradv_dump_cmd(const char *cmd, FILE *f) 51901e04c3fSmrg{ 5207ec681f3Smrg#ifndef _WIN32 5217ec681f3Smrg char line[2048]; 5227ec681f3Smrg FILE *p; 5237ec681f3Smrg 5247ec681f3Smrg p = popen(cmd, "r"); 5257ec681f3Smrg if (p) { 5267ec681f3Smrg while (fgets(line, sizeof(line), p)) 5277ec681f3Smrg fputs(line, f); 5287ec681f3Smrg fprintf(f, "\n"); 5297ec681f3Smrg pclose(p); 5307ec681f3Smrg } 5317ec681f3Smrg#endif 53201e04c3fSmrg} 53301e04c3fSmrg 5347ec681f3Smrgstatic void 5357ec681f3Smrgradv_dump_dmesg(FILE *f) 53601e04c3fSmrg{ 5377ec681f3Smrg fprintf(f, "\nLast 60 lines of dmesg:\n\n"); 5387ec681f3Smrg radv_dump_cmd("dmesg | tail -n60", f); 5397ec681f3Smrg} 54001e04c3fSmrg 5417ec681f3Smrgvoid 5427ec681f3Smrgradv_dump_enabled_options(struct radv_device *device, FILE *f) 5437ec681f3Smrg{ 5447ec681f3Smrg uint64_t mask; 5457ec681f3Smrg 5467ec681f3Smrg if (device->instance->debug_flags) { 5477ec681f3Smrg fprintf(f, "Enabled debug options: "); 5487ec681f3Smrg 5497ec681f3Smrg mask = device->instance->debug_flags; 5507ec681f3Smrg while (mask) { 5517ec681f3Smrg int i = u_bit_scan64(&mask); 5527ec681f3Smrg fprintf(f, "%s, ", radv_get_debug_option_name(i)); 5537ec681f3Smrg } 5547ec681f3Smrg fprintf(f, "\n"); 5557ec681f3Smrg } 5567ec681f3Smrg 5577ec681f3Smrg if (device->instance->perftest_flags) { 5587ec681f3Smrg fprintf(f, "Enabled perftest options: "); 5597ec681f3Smrg 5607ec681f3Smrg mask = device->instance->perftest_flags; 5617ec681f3Smrg while (mask) { 5627ec681f3Smrg int i = u_bit_scan64(&mask); 5637ec681f3Smrg fprintf(f, "%s, ", radv_get_perftest_option_name(i)); 5647ec681f3Smrg } 5657ec681f3Smrg fprintf(f, "\n"); 5667ec681f3Smrg } 56701e04c3fSmrg} 56801e04c3fSmrg 56901e04c3fSmrgstatic void 5707ec681f3Smrgradv_dump_app_info(struct radv_device *device, FILE *f) 57101e04c3fSmrg{ 5727ec681f3Smrg struct radv_instance *instance = device->instance; 57301e04c3fSmrg 5747ec681f3Smrg fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name); 5757ec681f3Smrg fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version); 5767ec681f3Smrg fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name); 5777ec681f3Smrg fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version); 5787ec681f3Smrg fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version), 5797ec681f3Smrg VK_VERSION_MINOR(instance->vk.app_info.api_version), 5807ec681f3Smrg VK_VERSION_PATCH(instance->vk.app_info.api_version)); 58101e04c3fSmrg 5827ec681f3Smrg radv_dump_enabled_options(device, f); 58301e04c3fSmrg} 58401e04c3fSmrg 5857ec681f3Smrgstatic void 5867ec681f3Smrgradv_dump_device_name(struct radv_device *device, FILE *f) 58701e04c3fSmrg{ 5887ec681f3Smrg struct radeon_info *info = &device->physical_device->rad_info; 5897ec681f3Smrg#ifndef _WIN32 5907ec681f3Smrg char kernel_version[128] = {0}; 5917ec681f3Smrg struct utsname uname_data; 5927ec681f3Smrg#endif 5937ec681f3Smrg const char *chip_name; 5947ec681f3Smrg 5957ec681f3Smrg chip_name = device->ws->get_chip_name(device->ws); 5967ec681f3Smrg 5977ec681f3Smrg#ifdef _WIN32 5987ec681f3Smrg fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name, 5997ec681f3Smrg info->drm_major, info->drm_minor, info->drm_patchlevel); 6007ec681f3Smrg#else 6017ec681f3Smrg if (uname(&uname_data) == 0) 6027ec681f3Smrg snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release); 6037ec681f3Smrg 6047ec681f3Smrg fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name, 6057ec681f3Smrg info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version); 6067ec681f3Smrg#endif 6077ec681f3Smrg} 60801e04c3fSmrg 6097ec681f3Smrgstatic void 6107ec681f3Smrgradv_dump_umr_ring(struct radv_queue *queue, FILE *f) 6117ec681f3Smrg{ 6127ec681f3Smrg enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 6137ec681f3Smrg struct radv_device *device = queue->device; 6147ec681f3Smrg char cmd[128]; 61501e04c3fSmrg 6167ec681f3Smrg /* TODO: Dump compute ring. */ 6177ec681f3Smrg if (ring != RING_GFX) 6187ec681f3Smrg return; 61901e04c3fSmrg 6207ec681f3Smrg sprintf(cmd, "umr -R %s 2>&1", 6217ec681f3Smrg device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx"); 62201e04c3fSmrg 6237ec681f3Smrg fprintf(f, "\nUMR GFX ring:\n\n"); 6247ec681f3Smrg radv_dump_cmd(cmd, f); 62501e04c3fSmrg} 62601e04c3fSmrg 62701e04c3fSmrgstatic void 6287ec681f3Smrgradv_dump_umr_waves(struct radv_queue *queue, FILE *f) 62901e04c3fSmrg{ 6307ec681f3Smrg enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 6317ec681f3Smrg struct radv_device *device = queue->device; 6327ec681f3Smrg char cmd[128]; 63301e04c3fSmrg 6347ec681f3Smrg /* TODO: Dump compute ring. */ 6357ec681f3Smrg if (ring != RING_GFX) 6367ec681f3Smrg return; 63701e04c3fSmrg 6387ec681f3Smrg sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1", 6397ec681f3Smrg device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx"); 64001e04c3fSmrg 6417ec681f3Smrg fprintf(f, "\nUMR GFX waves:\n\n"); 6427ec681f3Smrg radv_dump_cmd(cmd, f); 64301e04c3fSmrg} 64401e04c3fSmrg 64501e04c3fSmrgstatic bool 64601e04c3fSmrgradv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring) 64701e04c3fSmrg{ 6487ec681f3Smrg struct radeon_winsys *ws = queue->device->ws; 64901e04c3fSmrg 6507ec681f3Smrg if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family)) 6517ec681f3Smrg return true; 65201e04c3fSmrg 6537ec681f3Smrg return false; 65401e04c3fSmrg} 65501e04c3fSmrg 65601e04c3fSmrgvoid 65701e04c3fSmrgradv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs) 65801e04c3fSmrg{ 6597ec681f3Smrg struct radv_device *device = queue->device; 6607ec681f3Smrg enum ring_type ring; 6617ec681f3Smrg uint64_t addr; 6627ec681f3Smrg 6637ec681f3Smrg ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 6647ec681f3Smrg 6657ec681f3Smrg bool hang_occurred = radv_gpu_hang_occured(queue, ring); 6667ec681f3Smrg bool vm_fault_occurred = false; 6677ec681f3Smrg if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS) 6687ec681f3Smrg vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class, 6697ec681f3Smrg &device->dmesg_timestamp, &addr); 6707ec681f3Smrg if (!hang_occurred && !vm_fault_occurred) 6717ec681f3Smrg return; 6727ec681f3Smrg 6737ec681f3Smrg fprintf(stderr, "radv: GPU hang detected...\n"); 6747ec681f3Smrg 6757ec681f3Smrg#ifndef _WIN32 6767ec681f3Smrg /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save 6777ec681f3Smrg * various debugging info about that GPU hang. 6787ec681f3Smrg */ 6797ec681f3Smrg struct tm *timep, result; 6807ec681f3Smrg time_t raw_time; 6817ec681f3Smrg FILE *f; 6827ec681f3Smrg char dump_dir[256], dump_path[512], buf_time[128]; 6837ec681f3Smrg 6847ec681f3Smrg time(&raw_time); 6857ec681f3Smrg timep = os_localtime(&raw_time, &result); 6867ec681f3Smrg strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep); 6877ec681f3Smrg 6887ec681f3Smrg snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."), 6897ec681f3Smrg getpid(), buf_time); 6907ec681f3Smrg if (mkdir(dump_dir, 0774) && errno != EEXIST) { 6917ec681f3Smrg fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno); 6927ec681f3Smrg abort(); 6937ec681f3Smrg } 6947ec681f3Smrg 6957ec681f3Smrg fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir); 6967ec681f3Smrg 6977ec681f3Smrg /* Dump trace file. */ 6987ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log"); 6997ec681f3Smrg f = fopen(dump_path, "w+"); 7007ec681f3Smrg if (f) { 7017ec681f3Smrg radv_dump_trace(queue->device, cs, f); 7027ec681f3Smrg fclose(f); 7037ec681f3Smrg } 7047ec681f3Smrg 7057ec681f3Smrg /* Dump pipeline state. */ 7067ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log"); 7077ec681f3Smrg f = fopen(dump_path, "w+"); 7087ec681f3Smrg if (f) { 7097ec681f3Smrg radv_dump_queue_state(queue, dump_dir, f); 7107ec681f3Smrg fclose(f); 7117ec681f3Smrg } 7127ec681f3Smrg 7137ec681f3Smrg if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) { 7147ec681f3Smrg /* Dump UMR ring. */ 7157ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log"); 7167ec681f3Smrg f = fopen(dump_path, "w+"); 7177ec681f3Smrg if (f) { 7187ec681f3Smrg radv_dump_umr_ring(queue, f); 7197ec681f3Smrg fclose(f); 7207ec681f3Smrg } 7217ec681f3Smrg 7227ec681f3Smrg /* Dump UMR waves. */ 7237ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log"); 7247ec681f3Smrg f = fopen(dump_path, "w+"); 7257ec681f3Smrg if (f) { 7267ec681f3Smrg radv_dump_umr_waves(queue, f); 7277ec681f3Smrg fclose(f); 7287ec681f3Smrg } 7297ec681f3Smrg } 7307ec681f3Smrg 7317ec681f3Smrg /* Dump debug registers. */ 7327ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log"); 7337ec681f3Smrg f = fopen(dump_path, "w+"); 7347ec681f3Smrg if (f) { 7357ec681f3Smrg radv_dump_debug_registers(device, f); 7367ec681f3Smrg fclose(f); 7377ec681f3Smrg } 7387ec681f3Smrg 7397ec681f3Smrg /* Dump BO ranges. */ 7407ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log"); 7417ec681f3Smrg f = fopen(dump_path, "w+"); 7427ec681f3Smrg if (f) { 7437ec681f3Smrg device->ws->dump_bo_ranges(device->ws, f); 7447ec681f3Smrg fclose(f); 7457ec681f3Smrg } 7467ec681f3Smrg 7477ec681f3Smrg /* Dump BO log. */ 7487ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log"); 7497ec681f3Smrg f = fopen(dump_path, "w+"); 7507ec681f3Smrg if (f) { 7517ec681f3Smrg device->ws->dump_bo_log(device->ws, f); 7527ec681f3Smrg fclose(f); 7537ec681f3Smrg } 7547ec681f3Smrg 7557ec681f3Smrg /* Dump VM fault info. */ 7567ec681f3Smrg if (vm_fault_occurred) { 7577ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log"); 7587ec681f3Smrg f = fopen(dump_path, "w+"); 7597ec681f3Smrg if (f) { 7607ec681f3Smrg fprintf(f, "VM fault report.\n\n"); 7617ec681f3Smrg fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr); 7627ec681f3Smrg fclose(f); 7637ec681f3Smrg } 7647ec681f3Smrg } 7657ec681f3Smrg 7667ec681f3Smrg /* Dump app info. */ 7677ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log"); 7687ec681f3Smrg f = fopen(dump_path, "w+"); 7697ec681f3Smrg if (f) { 7707ec681f3Smrg radv_dump_app_info(device, f); 7717ec681f3Smrg fclose(f); 7727ec681f3Smrg } 7737ec681f3Smrg 7747ec681f3Smrg /* Dump GPU info. */ 7757ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log"); 7767ec681f3Smrg f = fopen(dump_path, "w+"); 7777ec681f3Smrg if (f) { 7787ec681f3Smrg radv_dump_device_name(device, f); 7797ec681f3Smrg ac_print_gpu_info(&device->physical_device->rad_info, f); 7807ec681f3Smrg fclose(f); 7817ec681f3Smrg } 7827ec681f3Smrg 7837ec681f3Smrg /* Dump dmesg. */ 7847ec681f3Smrg snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log"); 7857ec681f3Smrg f = fopen(dump_path, "w+"); 7867ec681f3Smrg if (f) { 7877ec681f3Smrg radv_dump_dmesg(f); 7887ec681f3Smrg fclose(f); 7897ec681f3Smrg } 7907ec681f3Smrg#endif 7917ec681f3Smrg 7927ec681f3Smrg fprintf(stderr, "radv: GPU hang report saved successfully!\n"); 7937ec681f3Smrg abort(); 7947ec681f3Smrg} 79501e04c3fSmrg 7967ec681f3Smrgvoid 7977ec681f3Smrgradv_print_spirv(const char *data, uint32_t size, FILE *fp) 7987ec681f3Smrg{ 7997ec681f3Smrg#ifndef _WIN32 8007ec681f3Smrg char path[] = "/tmp/fileXXXXXX"; 8017ec681f3Smrg char command[128]; 8027ec681f3Smrg int fd; 80301e04c3fSmrg 8047ec681f3Smrg /* Dump the binary into a temporary file. */ 8057ec681f3Smrg fd = mkstemp(path); 8067ec681f3Smrg if (fd < 0) 8077ec681f3Smrg return; 80801e04c3fSmrg 8097ec681f3Smrg if (write(fd, data, size) == -1) 8107ec681f3Smrg goto fail; 81101e04c3fSmrg 8127ec681f3Smrg /* Disassemble using spirv-dis if installed. */ 8137ec681f3Smrg sprintf(command, "spirv-dis %s", path); 8147ec681f3Smrg radv_dump_cmd(command, fp); 81501e04c3fSmrg 8167ec681f3Smrgfail: 8177ec681f3Smrg close(fd); 8187ec681f3Smrg unlink(path); 8197ec681f3Smrg#endif 8207ec681f3Smrg} 82101e04c3fSmrg 8227ec681f3Smrgbool 8237ec681f3Smrgradv_trap_handler_init(struct radv_device *device) 8247ec681f3Smrg{ 8257ec681f3Smrg struct radeon_winsys *ws = device->ws; 8267ec681f3Smrg VkResult result; 8277ec681f3Smrg 8287ec681f3Smrg /* Create the trap handler shader and upload it like other shaders. */ 8297ec681f3Smrg device->trap_handler_shader = radv_create_trap_handler_shader(device); 8307ec681f3Smrg if (!device->trap_handler_shader) { 8317ec681f3Smrg fprintf(stderr, "radv: failed to create the trap handler shader.\n"); 8327ec681f3Smrg return false; 8337ec681f3Smrg } 8347ec681f3Smrg 8357ec681f3Smrg result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true); 8367ec681f3Smrg if (result != VK_SUCCESS) 8377ec681f3Smrg return false; 8387ec681f3Smrg 8397ec681f3Smrg result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM, 8407ec681f3Smrg RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | 8417ec681f3Smrg RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT, 8427ec681f3Smrg RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo); 8437ec681f3Smrg if (result != VK_SUCCESS) 8447ec681f3Smrg return false; 8457ec681f3Smrg 8467ec681f3Smrg result = ws->buffer_make_resident(ws, device->tma_bo, true); 8477ec681f3Smrg if (result != VK_SUCCESS) 8487ec681f3Smrg return false; 8497ec681f3Smrg 8507ec681f3Smrg device->tma_ptr = ws->buffer_map(device->tma_bo); 8517ec681f3Smrg if (!device->tma_ptr) 8527ec681f3Smrg return false; 8537ec681f3Smrg 8547ec681f3Smrg /* Upload a buffer descriptor to store various info from the trap. */ 8557ec681f3Smrg uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16; 8567ec681f3Smrg uint32_t desc[4]; 8577ec681f3Smrg 8587ec681f3Smrg desc[0] = tma_va; 8597ec681f3Smrg desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32); 8607ec681f3Smrg desc[2] = TMA_BO_SIZE; 8617ec681f3Smrg desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) | 8627ec681f3Smrg S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) | 8637ec681f3Smrg S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32); 8647ec681f3Smrg 8657ec681f3Smrg memcpy(device->tma_ptr, desc, sizeof(desc)); 8667ec681f3Smrg 8677ec681f3Smrg return true; 86801e04c3fSmrg} 86901e04c3fSmrg 87001e04c3fSmrgvoid 8717ec681f3Smrgradv_trap_handler_finish(struct radv_device *device) 87201e04c3fSmrg{ 8737ec681f3Smrg struct radeon_winsys *ws = device->ws; 87401e04c3fSmrg 8757ec681f3Smrg if (unlikely(device->trap_handler_shader)) { 8767ec681f3Smrg ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false); 8777ec681f3Smrg radv_shader_variant_destroy(device, device->trap_handler_shader); 8787ec681f3Smrg } 87901e04c3fSmrg 8807ec681f3Smrg if (unlikely(device->tma_bo)) { 8817ec681f3Smrg ws->buffer_make_resident(ws, device->tma_bo, false); 8827ec681f3Smrg ws->buffer_destroy(ws, device->tma_bo); 8837ec681f3Smrg } 8847ec681f3Smrg} 88501e04c3fSmrg 8867ec681f3Smrgstatic void 8877ec681f3Smrgradv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc) 8887ec681f3Smrg{ 8897ec681f3Smrg struct radv_shader_variant *shader; 8907ec681f3Smrg uint64_t start_addr, end_addr; 8917ec681f3Smrg uint32_t instr_offset; 8927ec681f3Smrg 8937ec681f3Smrg shader = radv_find_shader_variant(device, faulty_pc); 8947ec681f3Smrg if (!shader) 8957ec681f3Smrg return; 8967ec681f3Smrg 8977ec681f3Smrg start_addr = radv_shader_variant_get_va(shader); 8987ec681f3Smrg end_addr = start_addr + shader->code_size; 8997ec681f3Smrg instr_offset = faulty_pc - start_addr; 9007ec681f3Smrg 9017ec681f3Smrg fprintf(stderr, 9027ec681f3Smrg "Faulty shader found " 9037ec681f3Smrg "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n", 9047ec681f3Smrg start_addr, end_addr, instr_offset); 9057ec681f3Smrg 9067ec681f3Smrg /* Get the list of instructions. 9077ec681f3Smrg * Buffer size / 4 is the upper bound of the instruction count. 9087ec681f3Smrg */ 9097ec681f3Smrg unsigned num_inst = 0; 9107ec681f3Smrg struct radv_shader_inst *instructions = 9117ec681f3Smrg calloc(shader->code_size / 4, sizeof(struct radv_shader_inst)); 9127ec681f3Smrg 9137ec681f3Smrg /* Split the disassembly string into instructions. */ 9147ec681f3Smrg si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions); 9157ec681f3Smrg 9167ec681f3Smrg /* Print instructions with annotations. */ 9177ec681f3Smrg for (unsigned i = 0; i < num_inst; i++) { 9187ec681f3Smrg struct radv_shader_inst *inst = &instructions[i]; 9197ec681f3Smrg 9207ec681f3Smrg if (start_addr + inst->offset == faulty_pc) { 9217ec681f3Smrg fprintf(stderr, "\n!!! Faulty instruction below !!!\n"); 9227ec681f3Smrg fprintf(stderr, "%s\n", inst->text); 9237ec681f3Smrg fprintf(stderr, "\n"); 9247ec681f3Smrg } else { 9257ec681f3Smrg fprintf(stderr, "%s\n", inst->text); 9267ec681f3Smrg } 9277ec681f3Smrg } 9287ec681f3Smrg 9297ec681f3Smrg free(instructions); 9307ec681f3Smrg} 93101e04c3fSmrg 9327ec681f3Smrgstruct radv_sq_hw_reg { 9337ec681f3Smrg uint32_t status; 9347ec681f3Smrg uint32_t trap_sts; 9357ec681f3Smrg uint32_t hw_id; 9367ec681f3Smrg uint32_t ib_sts; 9377ec681f3Smrg}; 93801e04c3fSmrg 9397ec681f3Smrgstatic void 9407ec681f3Smrgradv_dump_sq_hw_regs(struct radv_device *device) 9417ec681f3Smrg{ 9427ec681f3Smrg struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6]; 9437ec681f3Smrg 9447ec681f3Smrg fprintf(stderr, "\nHardware registers:\n"); 9457ec681f3Smrg if (device->physical_device->rad_info.chip_class >= GFX10) { 9467ec681f3Smrg ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS, 9477ec681f3Smrg regs->status, ~0); 9487ec681f3Smrg ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS, 9497ec681f3Smrg regs->trap_sts, ~0); 9507ec681f3Smrg ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1, 9517ec681f3Smrg regs->hw_id, ~0); 9527ec681f3Smrg ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS, 9537ec681f3Smrg regs->ib_sts, ~0); 9547ec681f3Smrg } else { 9557ec681f3Smrg ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS, 9567ec681f3Smrg regs->status, ~0); 9577ec681f3Smrg ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS, 9587ec681f3Smrg regs->trap_sts, ~0); 9597ec681f3Smrg ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID, 9607ec681f3Smrg regs->hw_id, ~0); 9617ec681f3Smrg ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS, 9627ec681f3Smrg regs->ib_sts, ~0); 9637ec681f3Smrg } 9647ec681f3Smrg fprintf(stderr, "\n\n"); 9657ec681f3Smrg} 9667ec681f3Smrg 9677ec681f3Smrgvoid 9687ec681f3Smrgradv_check_trap_handler(struct radv_queue *queue) 9697ec681f3Smrg{ 9707ec681f3Smrg enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index); 9717ec681f3Smrg struct radv_device *device = queue->device; 9727ec681f3Smrg struct radeon_winsys *ws = device->ws; 9737ec681f3Smrg 9747ec681f3Smrg /* Wait for the context to be idle in a finite time. */ 9757ec681f3Smrg ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family); 9767ec681f3Smrg 9777ec681f3Smrg /* Try to detect if the trap handler has been reached by the hw by 9787ec681f3Smrg * looking at ttmp0 which should be non-zero if a shader exception 9797ec681f3Smrg * happened. 9807ec681f3Smrg */ 9817ec681f3Smrg if (!device->tma_ptr[4]) 9827ec681f3Smrg return; 9837ec681f3Smrg 9847ec681f3Smrg#if 0 9857ec681f3Smrg fprintf(stderr, "tma_ptr:\n"); 9867ec681f3Smrg for (unsigned i = 0; i < 10; i++) 9877ec681f3Smrg fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]); 9887ec681f3Smrg#endif 9897ec681f3Smrg 9907ec681f3Smrg radv_dump_sq_hw_regs(device); 9917ec681f3Smrg 9927ec681f3Smrg uint32_t ttmp0 = device->tma_ptr[4]; 9937ec681f3Smrg uint32_t ttmp1 = device->tma_ptr[5]; 9947ec681f3Smrg 9957ec681f3Smrg /* According to the ISA docs, 3.10 Trap and Exception Registers: 9967ec681f3Smrg * 9977ec681f3Smrg * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}" 9987ec681f3Smrg * 9997ec681f3Smrg * "When the trap handler is entered, the PC of the faulting 10007ec681f3Smrg * instruction is: (PC - PC_rewind * 4)." 10017ec681f3Smrg * */ 10027ec681f3Smrg uint8_t trap_id = (ttmp1 >> 16) & 0xff; 10037ec681f3Smrg uint8_t ht = (ttmp1 >> 24) & 0x1; 10047ec681f3Smrg uint8_t pc_rewind = (ttmp1 >> 25) & 0xf; 10057ec681f3Smrg uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4); 10067ec681f3Smrg 10077ec681f3Smrg fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht, 10087ec681f3Smrg pc_rewind); 10097ec681f3Smrg 10107ec681f3Smrg radv_dump_faulty_shader(device, pc); 10117ec681f3Smrg 10127ec681f3Smrg abort(); 101301e04c3fSmrg} 1014