101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2016 Red Hat.
301e04c3fSmrg * Copyright © 2016 Bas Nieuwenhuizen
401e04c3fSmrg *
501e04c3fSmrg * based in part on anv driver which is:
601e04c3fSmrg * Copyright © 2015 Intel Corporation
701e04c3fSmrg *
801e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
901e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
1001e04c3fSmrg * to deal in the Software without restriction, including without limitation
1101e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
1201e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
1301e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1401e04c3fSmrg *
1501e04c3fSmrg * The above copyright notice and this permission notice (including the next
1601e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1701e04c3fSmrg * Software.
1801e04c3fSmrg *
1901e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
2001e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
2101e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
2201e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2301e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2401e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2501e04c3fSmrg * IN THE SOFTWARE.
2601e04c3fSmrg */
2701e04c3fSmrg
2801e04c3fSmrg#include <stdio.h>
297ec681f3Smrg#include <stdlib.h>
307ec681f3Smrg#ifndef _WIN32
3101e04c3fSmrg#include <sys/utsname.h>
327ec681f3Smrg#endif
337ec681f3Smrg#include <sys/stat.h>
3401e04c3fSmrg
3501e04c3fSmrg#include "util/mesa-sha1.h"
3601e04c3fSmrg#include "ac_debug.h"
3701e04c3fSmrg#include "radv_debug.h"
3801e04c3fSmrg#include "radv_shader.h"
397ec681f3Smrg#include "sid.h"
4001e04c3fSmrg
4101e04c3fSmrg#define TRACE_BO_SIZE 4096
427ec681f3Smrg#define TMA_BO_SIZE   4096
437ec681f3Smrg
447ec681f3Smrg#define COLOR_RESET  "\033[0m"
457ec681f3Smrg#define COLOR_RED    "\033[31m"
467ec681f3Smrg#define COLOR_GREEN  "\033[1;32m"
477ec681f3Smrg#define COLOR_YELLOW "\033[1;33m"
487ec681f3Smrg#define COLOR_CYAN   "\033[1;36m"
4901e04c3fSmrg
507ec681f3Smrg#define RADV_DUMP_DIR "radv_dumps"
5101e04c3fSmrg
5201e04c3fSmrg/* Trace BO layout (offsets are 4 bytes):
5301e04c3fSmrg *
5401e04c3fSmrg * [0]: primary trace ID
5501e04c3fSmrg * [1]: secondary trace ID
567ec681f3Smrg * [2-3]: 64-bit GFX ring pipeline pointer
577ec681f3Smrg * [4-5]: 64-bit COMPUTE ring pipeline pointer
587ec681f3Smrg * [6-7]: Vertex descriptors pointer
597ec681f3Smrg * [8-9]: 64-bit descriptor set #0 pointer
6001e04c3fSmrg * ...
6101e04c3fSmrg * [68-69]: 64-bit descriptor set #31 pointer
6201e04c3fSmrg */
6301e04c3fSmrg
6401e04c3fSmrgbool
6501e04c3fSmrgradv_init_trace(struct radv_device *device)
6601e04c3fSmrg{
677ec681f3Smrg   struct radeon_winsys *ws = device->ws;
687ec681f3Smrg   VkResult result;
6901e04c3fSmrg
707ec681f3Smrg   result = ws->buffer_create(
717ec681f3Smrg      ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
727ec681f3Smrg      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
737ec681f3Smrg      RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
747ec681f3Smrg   if (result != VK_SUCCESS)
757ec681f3Smrg      return false;
7601e04c3fSmrg
777ec681f3Smrg   result = ws->buffer_make_resident(ws, device->trace_bo, true);
787ec681f3Smrg   if (result != VK_SUCCESS)
797ec681f3Smrg      return false;
8001e04c3fSmrg
817ec681f3Smrg   device->trace_id_ptr = ws->buffer_map(device->trace_bo);
827ec681f3Smrg   if (!device->trace_id_ptr)
837ec681f3Smrg      return false;
8401e04c3fSmrg
857ec681f3Smrg   ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,
867ec681f3Smrg                       NULL);
8701e04c3fSmrg
887ec681f3Smrg   return true;
8901e04c3fSmrg}
9001e04c3fSmrg
917ec681f3Smrgvoid
927ec681f3Smrgradv_finish_trace(struct radv_device *device)
9301e04c3fSmrg{
947ec681f3Smrg   struct radeon_winsys *ws = device->ws;
9501e04c3fSmrg
967ec681f3Smrg   if (unlikely(device->trace_bo)) {
977ec681f3Smrg      ws->buffer_make_resident(ws, device->trace_bo, false);
987ec681f3Smrg      ws->buffer_destroy(ws, device->trace_bo);
997ec681f3Smrg   }
1007ec681f3Smrg}
10101e04c3fSmrg
1027ec681f3Smrgstatic void
1037ec681f3Smrgradv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
1047ec681f3Smrg{
1057ec681f3Smrg   fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
1067ec681f3Smrg   device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
10701e04c3fSmrg}
10801e04c3fSmrg
10901e04c3fSmrgstatic void
11001e04c3fSmrgradv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
11101e04c3fSmrg{
1127ec681f3Smrg   struct radeon_winsys *ws = device->ws;
1137ec681f3Smrg   uint32_t value;
11401e04c3fSmrg
1157ec681f3Smrg   if (ws->read_registers(ws, offset, 1, &value))
1167ec681f3Smrg      ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);
11701e04c3fSmrg}
11801e04c3fSmrg
11901e04c3fSmrgstatic void
12001e04c3fSmrgradv_dump_debug_registers(struct radv_device *device, FILE *f)
12101e04c3fSmrg{
1227ec681f3Smrg   struct radeon_info *info = &device->physical_device->rad_info;
1237ec681f3Smrg
1247ec681f3Smrg   fprintf(f, "Memory-mapped registers:\n");
1257ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
1267ec681f3Smrg
1277ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
1287ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
1297ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
1307ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
1317ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
1327ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
1337ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
1347ec681f3Smrg   if (info->chip_class <= GFX8) {
1357ec681f3Smrg      radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
1367ec681f3Smrg      radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
1377ec681f3Smrg      radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
1387ec681f3Smrg   }
1397ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
1407ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
1417ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
1427ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
1437ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
1447ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
1457ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
1467ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
1477ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
1487ec681f3Smrg   radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
1497ec681f3Smrg   fprintf(f, "\n");
15001e04c3fSmrg}
15101e04c3fSmrg
15201e04c3fSmrgstatic void
1537ec681f3Smrgradv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
15401e04c3fSmrg{
1557ec681f3Smrg   fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
1567ec681f3Smrg   for (unsigned j = 0; j < 4; j++)
1577ec681f3Smrg      ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
15801e04c3fSmrg}
15901e04c3fSmrg
16001e04c3fSmrgstatic void
1617ec681f3Smrgradv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
16201e04c3fSmrg{
1637ec681f3Smrg   unsigned sq_img_rsrc_word0 =
1647ec681f3Smrg      chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
1657ec681f3Smrg
1667ec681f3Smrg   fprintf(f, COLOR_CYAN "    Image:" COLOR_RESET "\n");
1677ec681f3Smrg   for (unsigned j = 0; j < 8; j++)
1687ec681f3Smrg      ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
16901e04c3fSmrg
1707ec681f3Smrg   fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
1717ec681f3Smrg   for (unsigned j = 0; j < 8; j++)
1727ec681f3Smrg      ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
17301e04c3fSmrg}
17401e04c3fSmrg
17501e04c3fSmrgstatic void
1767ec681f3Smrgradv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
17701e04c3fSmrg{
1787ec681f3Smrg   fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
1797ec681f3Smrg   for (unsigned j = 0; j < 4; j++) {
1807ec681f3Smrg      ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
1817ec681f3Smrg   }
18201e04c3fSmrg}
18301e04c3fSmrg
18401e04c3fSmrgstatic void
1857ec681f3Smrgradv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
1867ec681f3Smrg                                            FILE *f)
18701e04c3fSmrg{
1887ec681f3Smrg   radv_dump_image_descriptor(chip_class, desc, f);
1897ec681f3Smrg   radv_dump_sampler_descriptor(chip_class, desc + 16, f);
19001e04c3fSmrg}
19101e04c3fSmrg
19201e04c3fSmrgstatic void
1937ec681f3Smrgradv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
1947ec681f3Smrg                         FILE *f)
1957ec681f3Smrg{
1967ec681f3Smrg   enum chip_class chip_class = device->physical_device->rad_info.chip_class;
1977ec681f3Smrg   const struct radv_descriptor_set_layout *layout;
1987ec681f3Smrg   int i;
1997ec681f3Smrg
2007ec681f3Smrg   if (!set)
2017ec681f3Smrg      return;
2027ec681f3Smrg   layout = set->header.layout;
2037ec681f3Smrg
2047ec681f3Smrg   for (i = 0; i < set->header.layout->binding_count; i++) {
2057ec681f3Smrg      uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
2067ec681f3Smrg
2077ec681f3Smrg      switch (layout->binding[i].type) {
2087ec681f3Smrg      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
2097ec681f3Smrg      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
2107ec681f3Smrg      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
2117ec681f3Smrg      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
2127ec681f3Smrg         radv_dump_buffer_descriptor(chip_class, desc, f);
2137ec681f3Smrg         break;
2147ec681f3Smrg      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
2157ec681f3Smrg      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
2167ec681f3Smrg      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
2177ec681f3Smrg         radv_dump_image_descriptor(chip_class, desc, f);
2187ec681f3Smrg         break;
2197ec681f3Smrg      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
2207ec681f3Smrg         radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
2217ec681f3Smrg         break;
2227ec681f3Smrg      case VK_DESCRIPTOR_TYPE_SAMPLER:
2237ec681f3Smrg         radv_dump_sampler_descriptor(chip_class, desc, f);
2247ec681f3Smrg         break;
2257ec681f3Smrg      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
2267ec681f3Smrg      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
2277ec681f3Smrg      case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
2287ec681f3Smrg      case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
2297ec681f3Smrg         /* todo */
2307ec681f3Smrg         break;
2317ec681f3Smrg      default:
2327ec681f3Smrg         assert(!"unknown descriptor type");
2337ec681f3Smrg         break;
2347ec681f3Smrg      }
2357ec681f3Smrg      fprintf(f, "\n");
2367ec681f3Smrg   }
2377ec681f3Smrg   fprintf(f, "\n\n");
23801e04c3fSmrg}
23901e04c3fSmrg
24001e04c3fSmrgstatic void
2417ec681f3Smrgradv_dump_descriptors(struct radv_device *device, FILE *f)
24201e04c3fSmrg{
2437ec681f3Smrg   uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
2447ec681f3Smrg   int i;
24501e04c3fSmrg
2467ec681f3Smrg   fprintf(f, "Descriptors:\n");
2477ec681f3Smrg   for (i = 0; i < MAX_SETS; i++) {
2487ec681f3Smrg      struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4);
24901e04c3fSmrg
2507ec681f3Smrg      radv_dump_descriptor_set(device, set, i, f);
2517ec681f3Smrg   }
25201e04c3fSmrg}
25301e04c3fSmrg
25401e04c3fSmrgstruct radv_shader_inst {
2557ec681f3Smrg   char text[160];  /* one disasm line */
2567ec681f3Smrg   unsigned offset; /* instruction offset */
2577ec681f3Smrg   unsigned size;   /* instruction size = 4 or 8 */
25801e04c3fSmrg};
25901e04c3fSmrg
26001e04c3fSmrg/* Split a disassembly string into lines and add them to the array pointed
26101e04c3fSmrg * to by "instructions". */
2627ec681f3Smrgstatic void
2637ec681f3Smrgsi_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
2647ec681f3Smrg                    struct radv_shader_inst *instructions)
26501e04c3fSmrg{
2667ec681f3Smrg   struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
2677ec681f3Smrg   char *next;
2687ec681f3Smrg
2697ec681f3Smrg   while ((next = strchr(disasm, '\n'))) {
2707ec681f3Smrg      struct radv_shader_inst *inst = &instructions[*num];
2717ec681f3Smrg      unsigned len = next - disasm;
2727ec681f3Smrg
2737ec681f3Smrg      if (!memchr(disasm, ';', len)) {
2747ec681f3Smrg         /* Ignore everything that is not an instruction. */
2757ec681f3Smrg         disasm = next + 1;
2767ec681f3Smrg         continue;
2777ec681f3Smrg      }
2787ec681f3Smrg
2797ec681f3Smrg      assert(len < ARRAY_SIZE(inst->text));
2807ec681f3Smrg      memcpy(inst->text, disasm, len);
2817ec681f3Smrg      inst->text[len] = 0;
2827ec681f3Smrg      inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
2837ec681f3Smrg
2847ec681f3Smrg      const char *semicolon = strchr(disasm, ';');
2857ec681f3Smrg      assert(semicolon);
2867ec681f3Smrg      /* More than 16 chars after ";" means the instruction is 8 bytes long. */
2877ec681f3Smrg      inst->size = next - semicolon > 16 ? 8 : 4;
2887ec681f3Smrg
2897ec681f3Smrg      snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
2907ec681f3Smrg               " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
2917ec681f3Smrg               inst->size);
2927ec681f3Smrg
2937ec681f3Smrg      last_inst = inst;
2947ec681f3Smrg      (*num)++;
2957ec681f3Smrg      disasm = next + 1;
2967ec681f3Smrg   }
29701e04c3fSmrg}
29801e04c3fSmrg
29901e04c3fSmrgstatic void
3007ec681f3Smrgradv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage,
3017ec681f3Smrg                           struct ac_wave_info *waves, unsigned num_waves, FILE *f)
30201e04c3fSmrg{
3037ec681f3Smrg   uint64_t start_addr, end_addr;
3047ec681f3Smrg   unsigned i;
3057ec681f3Smrg
3067ec681f3Smrg   if (!shader)
3077ec681f3Smrg      return;
3087ec681f3Smrg
3097ec681f3Smrg   start_addr = radv_shader_variant_get_va(shader);
3107ec681f3Smrg   end_addr = start_addr + shader->code_size;
3117ec681f3Smrg
3127ec681f3Smrg   /* See if any wave executes the shader. */
3137ec681f3Smrg   for (i = 0; i < num_waves; i++) {
3147ec681f3Smrg      if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
3157ec681f3Smrg         break;
3167ec681f3Smrg   }
3177ec681f3Smrg
3187ec681f3Smrg   if (i == num_waves)
3197ec681f3Smrg      return; /* the shader is not being executed */
3207ec681f3Smrg
3217ec681f3Smrg   /* Remember the first found wave. The waves are sorted according to PC. */
3227ec681f3Smrg   waves = &waves[i];
3237ec681f3Smrg   num_waves -= i;
3247ec681f3Smrg
3257ec681f3Smrg   /* Get the list of instructions.
3267ec681f3Smrg    * Buffer size / 4 is the upper bound of the instruction count.
3277ec681f3Smrg    */
3287ec681f3Smrg   unsigned num_inst = 0;
3297ec681f3Smrg   struct radv_shader_inst *instructions =
3307ec681f3Smrg      calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
3317ec681f3Smrg
3327ec681f3Smrg   si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
3337ec681f3Smrg
3347ec681f3Smrg   fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
3357ec681f3Smrg           radv_get_shader_name(&shader->info, stage));
3367ec681f3Smrg
3377ec681f3Smrg   /* Print instructions with annotations. */
3387ec681f3Smrg   for (i = 0; i < num_inst; i++) {
3397ec681f3Smrg      struct radv_shader_inst *inst = &instructions[i];
3407ec681f3Smrg
3417ec681f3Smrg      fprintf(f, "%s\n", inst->text);
3427ec681f3Smrg
3437ec681f3Smrg      /* Print which waves execute the instruction right now. */
3447ec681f3Smrg      while (num_waves && start_addr + inst->offset == waves->pc) {
3457ec681f3Smrg         fprintf(f,
3467ec681f3Smrg                 "          " COLOR_GREEN "^ SE%u SH%u CU%u "
3477ec681f3Smrg                 "SIMD%u WAVE%u  EXEC=%016" PRIx64 "  ",
3487ec681f3Smrg                 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
3497ec681f3Smrg
3507ec681f3Smrg         if (inst->size == 4) {
3517ec681f3Smrg            fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
3527ec681f3Smrg         } else {
3537ec681f3Smrg            fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
3547ec681f3Smrg         }
3557ec681f3Smrg
3567ec681f3Smrg         waves->matched = true;
3577ec681f3Smrg         waves = &waves[1];
3587ec681f3Smrg         num_waves--;
3597ec681f3Smrg      }
3607ec681f3Smrg   }
3617ec681f3Smrg
3627ec681f3Smrg   fprintf(f, "\n\n");
3637ec681f3Smrg   free(instructions);
3647ec681f3Smrg}
36501e04c3fSmrg
3667ec681f3Smrgstatic void
3677ec681f3Smrgradv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
3687ec681f3Smrg                            FILE *f)
3697ec681f3Smrg{
3707ec681f3Smrg   struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
3717ec681f3Smrg   enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
3727ec681f3Smrg   unsigned num_waves = ac_get_wave_info(chip_class, waves);
3737ec681f3Smrg
3747ec681f3Smrg   fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
3757ec681f3Smrg
3767ec681f3Smrg   /* Dump annotated active graphics shaders. */
3777ec681f3Smrg   unsigned stages = active_stages;
3787ec681f3Smrg   while (stages) {
3797ec681f3Smrg      int stage = u_bit_scan(&stages);
3807ec681f3Smrg
3817ec681f3Smrg      radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
3827ec681f3Smrg   }
3837ec681f3Smrg
3847ec681f3Smrg   /* Print waves executing shaders that are not currently bound. */
3857ec681f3Smrg   unsigned i;
3867ec681f3Smrg   bool found = false;
3877ec681f3Smrg   for (i = 0; i < num_waves; i++) {
3887ec681f3Smrg      if (waves[i].matched)
3897ec681f3Smrg         continue;
3907ec681f3Smrg
3917ec681f3Smrg      if (!found) {
3927ec681f3Smrg         fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
3937ec681f3Smrg         found = true;
3947ec681f3Smrg      }
3957ec681f3Smrg      fprintf(f,
3967ec681f3Smrg              "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016" PRIx64 "  INST=%08X %08X  PC=%" PRIx64
3977ec681f3Smrg              "\n",
3987ec681f3Smrg              waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
3997ec681f3Smrg              waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
4007ec681f3Smrg   }
4017ec681f3Smrg   if (found)
4027ec681f3Smrg      fprintf(f, "\n\n");
40301e04c3fSmrg}
40401e04c3fSmrg
40501e04c3fSmrgstatic void
4067ec681f3Smrgradv_dump_spirv(struct radv_shader_variant *shader, const char *sha1, const char *dump_dir)
4077ec681f3Smrg{
4087ec681f3Smrg   char dump_path[512];
4097ec681f3Smrg   FILE *f;
4107ec681f3Smrg
4117ec681f3Smrg   snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
4127ec681f3Smrg
4137ec681f3Smrg   f = fopen(dump_path, "w+");
4147ec681f3Smrg   if (f) {
4157ec681f3Smrg      fwrite(shader->spirv, shader->spirv_size, 1, f);
4167ec681f3Smrg      fclose(f);
4177ec681f3Smrg   }
41801e04c3fSmrg}
41901e04c3fSmrg
42001e04c3fSmrgstatic void
4217ec681f3Smrgradv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader,
4227ec681f3Smrg                 gl_shader_stage stage, const char *dump_dir, FILE *f)
42301e04c3fSmrg{
4247ec681f3Smrg   if (!shader)
4257ec681f3Smrg      return;
42601e04c3fSmrg
4277ec681f3Smrg   fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
42801e04c3fSmrg
4297ec681f3Smrg   if (shader->spirv) {
4307ec681f3Smrg      unsigned char sha1[21];
4317ec681f3Smrg      char sha1buf[41];
43201e04c3fSmrg
4337ec681f3Smrg      _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
4347ec681f3Smrg      _mesa_sha1_format(sha1buf, sha1);
43501e04c3fSmrg
4367ec681f3Smrg      fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
4377ec681f3Smrg      radv_dump_spirv(shader, sha1buf, dump_dir);
4387ec681f3Smrg   }
43901e04c3fSmrg
4407ec681f3Smrg   if (shader->nir_string) {
4417ec681f3Smrg      fprintf(f, "NIR:\n%s\n", shader->nir_string);
4427ec681f3Smrg   }
44301e04c3fSmrg
4447ec681f3Smrg   fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
4457ec681f3Smrg           shader->ir_string);
4467ec681f3Smrg   fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
44701e04c3fSmrg
4487ec681f3Smrg   radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
44901e04c3fSmrg}
45001e04c3fSmrg
45101e04c3fSmrgstatic void
4527ec681f3Smrgradv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
4537ec681f3Smrg                  const char *dump_dir, FILE *f)
45401e04c3fSmrg{
4557ec681f3Smrg   /* Dump active graphics shaders. */
4567ec681f3Smrg   unsigned stages = active_stages;
4577ec681f3Smrg   while (stages) {
4587ec681f3Smrg      int stage = u_bit_scan(&stages);
45901e04c3fSmrg
4607ec681f3Smrg      radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f);
4617ec681f3Smrg   }
46201e04c3fSmrg}
46301e04c3fSmrg
46401e04c3fSmrgstatic void
4657ec681f3Smrgradv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f)
46601e04c3fSmrg{
4677ec681f3Smrg   void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
4687ec681f3Smrg   uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask);
4697ec681f3Smrg   uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
4707ec681f3Smrg
4717ec681f3Smrg   if (!count)
4727ec681f3Smrg      return;
4737ec681f3Smrg
4747ec681f3Smrg   fprintf(f, "Num vertex %s: %d\n",
4757ec681f3Smrg           pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count);
4767ec681f3Smrg   for (uint32_t i = 0; i < count; i++) {
4777ec681f3Smrg      uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
4787ec681f3Smrg      uint64_t va = 0;
4797ec681f3Smrg
4807ec681f3Smrg      va |= desc[0];
4817ec681f3Smrg      va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
4827ec681f3Smrg
4837ec681f3Smrg      fprintf(f, "VBO#%d:\n", i);
4847ec681f3Smrg      fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
4857ec681f3Smrg      fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
4867ec681f3Smrg      fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
4877ec681f3Smrg   }
48801e04c3fSmrg}
48901e04c3fSmrg
4907ec681f3Smrgstatic struct radv_pipeline *
4917ec681f3Smrgradv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
49201e04c3fSmrg{
4937ec681f3Smrg   uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
4947ec681f3Smrg   int offset = ring == RING_GFX ? 1 : 2;
49501e04c3fSmrg
4967ec681f3Smrg   return *(struct radv_pipeline **)(ptr + offset);
49701e04c3fSmrg}
49801e04c3fSmrg
49901e04c3fSmrgstatic void
5007ec681f3Smrgradv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
50101e04c3fSmrg{
5027ec681f3Smrg   enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
5037ec681f3Smrg   struct radv_pipeline *pipeline;
5047ec681f3Smrg
5057ec681f3Smrg   fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
5067ec681f3Smrg
5077ec681f3Smrg   pipeline = radv_get_saved_pipeline(queue->device, ring);
5087ec681f3Smrg   if (pipeline) {
5097ec681f3Smrg      radv_dump_shaders(pipeline, pipeline->active_stages, dump_dir, f);
5107ec681f3Smrg      if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
5117ec681f3Smrg         radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
5127ec681f3Smrg      radv_dump_vertex_descriptors(pipeline, f);
5137ec681f3Smrg      radv_dump_descriptors(queue->device, f);
5147ec681f3Smrg   }
51501e04c3fSmrg}
51601e04c3fSmrg
5177ec681f3Smrgstatic void
5187ec681f3Smrgradv_dump_cmd(const char *cmd, FILE *f)
51901e04c3fSmrg{
5207ec681f3Smrg#ifndef _WIN32
5217ec681f3Smrg   char line[2048];
5227ec681f3Smrg   FILE *p;
5237ec681f3Smrg
5247ec681f3Smrg   p = popen(cmd, "r");
5257ec681f3Smrg   if (p) {
5267ec681f3Smrg      while (fgets(line, sizeof(line), p))
5277ec681f3Smrg         fputs(line, f);
5287ec681f3Smrg      fprintf(f, "\n");
5297ec681f3Smrg      pclose(p);
5307ec681f3Smrg   }
5317ec681f3Smrg#endif
53201e04c3fSmrg}
53301e04c3fSmrg
5347ec681f3Smrgstatic void
5357ec681f3Smrgradv_dump_dmesg(FILE *f)
53601e04c3fSmrg{
5377ec681f3Smrg   fprintf(f, "\nLast 60 lines of dmesg:\n\n");
5387ec681f3Smrg   radv_dump_cmd("dmesg | tail -n60", f);
5397ec681f3Smrg}
54001e04c3fSmrg
5417ec681f3Smrgvoid
5427ec681f3Smrgradv_dump_enabled_options(struct radv_device *device, FILE *f)
5437ec681f3Smrg{
5447ec681f3Smrg   uint64_t mask;
5457ec681f3Smrg
5467ec681f3Smrg   if (device->instance->debug_flags) {
5477ec681f3Smrg      fprintf(f, "Enabled debug options: ");
5487ec681f3Smrg
5497ec681f3Smrg      mask = device->instance->debug_flags;
5507ec681f3Smrg      while (mask) {
5517ec681f3Smrg         int i = u_bit_scan64(&mask);
5527ec681f3Smrg         fprintf(f, "%s, ", radv_get_debug_option_name(i));
5537ec681f3Smrg      }
5547ec681f3Smrg      fprintf(f, "\n");
5557ec681f3Smrg   }
5567ec681f3Smrg
5577ec681f3Smrg   if (device->instance->perftest_flags) {
5587ec681f3Smrg      fprintf(f, "Enabled perftest options: ");
5597ec681f3Smrg
5607ec681f3Smrg      mask = device->instance->perftest_flags;
5617ec681f3Smrg      while (mask) {
5627ec681f3Smrg         int i = u_bit_scan64(&mask);
5637ec681f3Smrg         fprintf(f, "%s, ", radv_get_perftest_option_name(i));
5647ec681f3Smrg      }
5657ec681f3Smrg      fprintf(f, "\n");
5667ec681f3Smrg   }
56701e04c3fSmrg}
56801e04c3fSmrg
56901e04c3fSmrgstatic void
5707ec681f3Smrgradv_dump_app_info(struct radv_device *device, FILE *f)
57101e04c3fSmrg{
5727ec681f3Smrg   struct radv_instance *instance = device->instance;
57301e04c3fSmrg
5747ec681f3Smrg   fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
5757ec681f3Smrg   fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
5767ec681f3Smrg   fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
5777ec681f3Smrg   fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
5787ec681f3Smrg   fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
5797ec681f3Smrg           VK_VERSION_MINOR(instance->vk.app_info.api_version),
5807ec681f3Smrg           VK_VERSION_PATCH(instance->vk.app_info.api_version));
58101e04c3fSmrg
5827ec681f3Smrg   radv_dump_enabled_options(device, f);
58301e04c3fSmrg}
58401e04c3fSmrg
5857ec681f3Smrgstatic void
5867ec681f3Smrgradv_dump_device_name(struct radv_device *device, FILE *f)
58701e04c3fSmrg{
5887ec681f3Smrg   struct radeon_info *info = &device->physical_device->rad_info;
5897ec681f3Smrg#ifndef _WIN32
5907ec681f3Smrg   char kernel_version[128] = {0};
5917ec681f3Smrg   struct utsname uname_data;
5927ec681f3Smrg#endif
5937ec681f3Smrg   const char *chip_name;
5947ec681f3Smrg
5957ec681f3Smrg   chip_name = device->ws->get_chip_name(device->ws);
5967ec681f3Smrg
5977ec681f3Smrg#ifdef _WIN32
5987ec681f3Smrg   fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name,
5997ec681f3Smrg           info->drm_major, info->drm_minor, info->drm_patchlevel);
6007ec681f3Smrg#else
6017ec681f3Smrg   if (uname(&uname_data) == 0)
6027ec681f3Smrg      snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
6037ec681f3Smrg
6047ec681f3Smrg   fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name,
6057ec681f3Smrg           info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
6067ec681f3Smrg#endif
6077ec681f3Smrg}
60801e04c3fSmrg
6097ec681f3Smrgstatic void
6107ec681f3Smrgradv_dump_umr_ring(struct radv_queue *queue, FILE *f)
6117ec681f3Smrg{
6127ec681f3Smrg   enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
6137ec681f3Smrg   struct radv_device *device = queue->device;
6147ec681f3Smrg   char cmd[128];
61501e04c3fSmrg
6167ec681f3Smrg   /* TODO: Dump compute ring. */
6177ec681f3Smrg   if (ring != RING_GFX)
6187ec681f3Smrg      return;
61901e04c3fSmrg
6207ec681f3Smrg   sprintf(cmd, "umr -R %s 2>&1",
6217ec681f3Smrg           device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
62201e04c3fSmrg
6237ec681f3Smrg   fprintf(f, "\nUMR GFX ring:\n\n");
6247ec681f3Smrg   radv_dump_cmd(cmd, f);
62501e04c3fSmrg}
62601e04c3fSmrg
62701e04c3fSmrgstatic void
6287ec681f3Smrgradv_dump_umr_waves(struct radv_queue *queue, FILE *f)
62901e04c3fSmrg{
6307ec681f3Smrg   enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
6317ec681f3Smrg   struct radv_device *device = queue->device;
6327ec681f3Smrg   char cmd[128];
63301e04c3fSmrg
6347ec681f3Smrg   /* TODO: Dump compute ring. */
6357ec681f3Smrg   if (ring != RING_GFX)
6367ec681f3Smrg      return;
63701e04c3fSmrg
6387ec681f3Smrg   sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
6397ec681f3Smrg           device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
64001e04c3fSmrg
6417ec681f3Smrg   fprintf(f, "\nUMR GFX waves:\n\n");
6427ec681f3Smrg   radv_dump_cmd(cmd, f);
64301e04c3fSmrg}
64401e04c3fSmrg
64501e04c3fSmrgstatic bool
64601e04c3fSmrgradv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
64701e04c3fSmrg{
6487ec681f3Smrg   struct radeon_winsys *ws = queue->device->ws;
64901e04c3fSmrg
6507ec681f3Smrg   if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
6517ec681f3Smrg      return true;
65201e04c3fSmrg
6537ec681f3Smrg   return false;
65401e04c3fSmrg}
65501e04c3fSmrg
65601e04c3fSmrgvoid
65701e04c3fSmrgradv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
65801e04c3fSmrg{
6597ec681f3Smrg   struct radv_device *device = queue->device;
6607ec681f3Smrg   enum ring_type ring;
6617ec681f3Smrg   uint64_t addr;
6627ec681f3Smrg
6637ec681f3Smrg   ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
6647ec681f3Smrg
6657ec681f3Smrg   bool hang_occurred = radv_gpu_hang_occured(queue, ring);
6667ec681f3Smrg   bool vm_fault_occurred = false;
6677ec681f3Smrg   if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
6687ec681f3Smrg      vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
6697ec681f3Smrg                                              &device->dmesg_timestamp, &addr);
6707ec681f3Smrg   if (!hang_occurred && !vm_fault_occurred)
6717ec681f3Smrg      return;
6727ec681f3Smrg
6737ec681f3Smrg   fprintf(stderr, "radv: GPU hang detected...\n");
6747ec681f3Smrg
6757ec681f3Smrg#ifndef _WIN32
6767ec681f3Smrg   /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
6777ec681f3Smrg    * various debugging info about that GPU hang.
6787ec681f3Smrg    */
6797ec681f3Smrg   struct tm *timep, result;
6807ec681f3Smrg   time_t raw_time;
6817ec681f3Smrg   FILE *f;
6827ec681f3Smrg   char dump_dir[256], dump_path[512], buf_time[128];
6837ec681f3Smrg
6847ec681f3Smrg   time(&raw_time);
6857ec681f3Smrg   timep = os_localtime(&raw_time, &result);
6867ec681f3Smrg   strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
6877ec681f3Smrg
6887ec681f3Smrg   snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
6897ec681f3Smrg            getpid(), buf_time);
6907ec681f3Smrg   if (mkdir(dump_dir, 0774) && errno != EEXIST) {
6917ec681f3Smrg      fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
6927ec681f3Smrg      abort();
6937ec681f3Smrg   }
6947ec681f3Smrg
6957ec681f3Smrg   fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
6967ec681f3Smrg
6977ec681f3Smrg   /* Dump trace file. */
6987ec681f3Smrg   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
6997ec681f3Smrg   f = fopen(dump_path, "w+");
7007ec681f3Smrg   if (f) {
7017ec681f3Smrg      radv_dump_trace(queue->device, cs, f);
7027ec681f3Smrg      fclose(f);
7037ec681f3Smrg   }
7047ec681f3Smrg
7057ec681f3Smrg   /* Dump pipeline state. */
7067ec681f3Smrg   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
7077ec681f3Smrg   f = fopen(dump_path, "w+");
7087ec681f3Smrg   if (f) {
7097ec681f3Smrg      radv_dump_queue_state(queue, dump_dir, f);
7107ec681f3Smrg      fclose(f);
7117ec681f3Smrg   }
7127ec681f3Smrg
7137ec681f3Smrg   if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
7147ec681f3Smrg      /* Dump UMR ring. */
7157ec681f3Smrg      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
7167ec681f3Smrg      f = fopen(dump_path, "w+");
7177ec681f3Smrg      if (f) {
7187ec681f3Smrg         radv_dump_umr_ring(queue, f);
7197ec681f3Smrg         fclose(f);
7207ec681f3Smrg      }
7217ec681f3Smrg
7227ec681f3Smrg      /* Dump UMR waves. */
7237ec681f3Smrg      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
7247ec681f3Smrg      f = fopen(dump_path, "w+");
7257ec681f3Smrg      if (f) {
7267ec681f3Smrg         radv_dump_umr_waves(queue, f);
7277ec681f3Smrg         fclose(f);
7287ec681f3Smrg      }
7297ec681f3Smrg   }
7307ec681f3Smrg
7317ec681f3Smrg   /* Dump debug registers. */
7327ec681f3Smrg   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
7337ec681f3Smrg   f = fopen(dump_path, "w+");
7347ec681f3Smrg   if (f) {
7357ec681f3Smrg      radv_dump_debug_registers(device, f);
7367ec681f3Smrg      fclose(f);
7377ec681f3Smrg   }
7387ec681f3Smrg
7397ec681f3Smrg   /* Dump BO ranges. */
7407ec681f3Smrg   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
7417ec681f3Smrg   f = fopen(dump_path, "w+");
7427ec681f3Smrg   if (f) {
7437ec681f3Smrg      device->ws->dump_bo_ranges(device->ws, f);
7447ec681f3Smrg      fclose(f);
7457ec681f3Smrg   }
7467ec681f3Smrg
7477ec681f3Smrg   /* Dump BO log. */
7487ec681f3Smrg   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
7497ec681f3Smrg   f = fopen(dump_path, "w+");
7507ec681f3Smrg   if (f) {
7517ec681f3Smrg      device->ws->dump_bo_log(device->ws, f);
7527ec681f3Smrg      fclose(f);
7537ec681f3Smrg   }
7547ec681f3Smrg
7557ec681f3Smrg   /* Dump VM fault info. */
7567ec681f3Smrg   if (vm_fault_occurred) {
7577ec681f3Smrg      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
7587ec681f3Smrg      f = fopen(dump_path, "w+");
7597ec681f3Smrg      if (f) {
7607ec681f3Smrg         fprintf(f, "VM fault report.\n\n");
7617ec681f3Smrg         fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
7627ec681f3Smrg         fclose(f);
7637ec681f3Smrg      }
7647ec681f3Smrg   }
7657ec681f3Smrg
7667ec681f3Smrg   /* Dump app info. */
7677ec681f3Smrg   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
7687ec681f3Smrg   f = fopen(dump_path, "w+");
7697ec681f3Smrg   if (f) {
7707ec681f3Smrg      radv_dump_app_info(device, f);
7717ec681f3Smrg      fclose(f);
7727ec681f3Smrg   }
7737ec681f3Smrg
7747ec681f3Smrg   /* Dump GPU info. */
7757ec681f3Smrg   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
7767ec681f3Smrg   f = fopen(dump_path, "w+");
7777ec681f3Smrg   if (f) {
7787ec681f3Smrg      radv_dump_device_name(device, f);
7797ec681f3Smrg      ac_print_gpu_info(&device->physical_device->rad_info, f);
7807ec681f3Smrg      fclose(f);
7817ec681f3Smrg   }
7827ec681f3Smrg
7837ec681f3Smrg   /* Dump dmesg. */
7847ec681f3Smrg   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
7857ec681f3Smrg   f = fopen(dump_path, "w+");
7867ec681f3Smrg   if (f) {
7877ec681f3Smrg      radv_dump_dmesg(f);
7887ec681f3Smrg      fclose(f);
7897ec681f3Smrg   }
7907ec681f3Smrg#endif
7917ec681f3Smrg
7927ec681f3Smrg   fprintf(stderr, "radv: GPU hang report saved successfully!\n");
7937ec681f3Smrg   abort();
7947ec681f3Smrg}
79501e04c3fSmrg
7967ec681f3Smrgvoid
7977ec681f3Smrgradv_print_spirv(const char *data, uint32_t size, FILE *fp)
7987ec681f3Smrg{
7997ec681f3Smrg#ifndef _WIN32
8007ec681f3Smrg   char path[] = "/tmp/fileXXXXXX";
8017ec681f3Smrg   char command[128];
8027ec681f3Smrg   int fd;
80301e04c3fSmrg
8047ec681f3Smrg   /* Dump the binary into a temporary file. */
8057ec681f3Smrg   fd = mkstemp(path);
8067ec681f3Smrg   if (fd < 0)
8077ec681f3Smrg      return;
80801e04c3fSmrg
8097ec681f3Smrg   if (write(fd, data, size) == -1)
8107ec681f3Smrg      goto fail;
81101e04c3fSmrg
8127ec681f3Smrg   /* Disassemble using spirv-dis if installed. */
8137ec681f3Smrg   sprintf(command, "spirv-dis %s", path);
8147ec681f3Smrg   radv_dump_cmd(command, fp);
81501e04c3fSmrg
8167ec681f3Smrgfail:
8177ec681f3Smrg   close(fd);
8187ec681f3Smrg   unlink(path);
8197ec681f3Smrg#endif
8207ec681f3Smrg}
82101e04c3fSmrg
8227ec681f3Smrgbool
8237ec681f3Smrgradv_trap_handler_init(struct radv_device *device)
8247ec681f3Smrg{
8257ec681f3Smrg   struct radeon_winsys *ws = device->ws;
8267ec681f3Smrg   VkResult result;
8277ec681f3Smrg
8287ec681f3Smrg   /* Create the trap handler shader and upload it like other shaders. */
8297ec681f3Smrg   device->trap_handler_shader = radv_create_trap_handler_shader(device);
8307ec681f3Smrg   if (!device->trap_handler_shader) {
8317ec681f3Smrg      fprintf(stderr, "radv: failed to create the trap handler shader.\n");
8327ec681f3Smrg      return false;
8337ec681f3Smrg   }
8347ec681f3Smrg
8357ec681f3Smrg   result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
8367ec681f3Smrg   if (result != VK_SUCCESS)
8377ec681f3Smrg      return false;
8387ec681f3Smrg
8397ec681f3Smrg   result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
8407ec681f3Smrg                              RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
8417ec681f3Smrg                                 RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
8427ec681f3Smrg                              RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
8437ec681f3Smrg   if (result != VK_SUCCESS)
8447ec681f3Smrg      return false;
8457ec681f3Smrg
8467ec681f3Smrg   result = ws->buffer_make_resident(ws, device->tma_bo, true);
8477ec681f3Smrg   if (result != VK_SUCCESS)
8487ec681f3Smrg      return false;
8497ec681f3Smrg
8507ec681f3Smrg   device->tma_ptr = ws->buffer_map(device->tma_bo);
8517ec681f3Smrg   if (!device->tma_ptr)
8527ec681f3Smrg      return false;
8537ec681f3Smrg
8547ec681f3Smrg   /* Upload a buffer descriptor to store various info from the trap. */
8557ec681f3Smrg   uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
8567ec681f3Smrg   uint32_t desc[4];
8577ec681f3Smrg
8587ec681f3Smrg   desc[0] = tma_va;
8597ec681f3Smrg   desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
8607ec681f3Smrg   desc[2] = TMA_BO_SIZE;
8617ec681f3Smrg   desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
8627ec681f3Smrg             S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
8637ec681f3Smrg             S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
8647ec681f3Smrg
8657ec681f3Smrg   memcpy(device->tma_ptr, desc, sizeof(desc));
8667ec681f3Smrg
8677ec681f3Smrg   return true;
86801e04c3fSmrg}
86901e04c3fSmrg
87001e04c3fSmrgvoid
8717ec681f3Smrgradv_trap_handler_finish(struct radv_device *device)
87201e04c3fSmrg{
8737ec681f3Smrg   struct radeon_winsys *ws = device->ws;
87401e04c3fSmrg
8757ec681f3Smrg   if (unlikely(device->trap_handler_shader)) {
8767ec681f3Smrg      ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
8777ec681f3Smrg      radv_shader_variant_destroy(device, device->trap_handler_shader);
8787ec681f3Smrg   }
87901e04c3fSmrg
8807ec681f3Smrg   if (unlikely(device->tma_bo)) {
8817ec681f3Smrg      ws->buffer_make_resident(ws, device->tma_bo, false);
8827ec681f3Smrg      ws->buffer_destroy(ws, device->tma_bo);
8837ec681f3Smrg   }
8847ec681f3Smrg}
88501e04c3fSmrg
8867ec681f3Smrgstatic void
8877ec681f3Smrgradv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
8887ec681f3Smrg{
8897ec681f3Smrg   struct radv_shader_variant *shader;
8907ec681f3Smrg   uint64_t start_addr, end_addr;
8917ec681f3Smrg   uint32_t instr_offset;
8927ec681f3Smrg
8937ec681f3Smrg   shader = radv_find_shader_variant(device, faulty_pc);
8947ec681f3Smrg   if (!shader)
8957ec681f3Smrg      return;
8967ec681f3Smrg
8977ec681f3Smrg   start_addr = radv_shader_variant_get_va(shader);
8987ec681f3Smrg   end_addr = start_addr + shader->code_size;
8997ec681f3Smrg   instr_offset = faulty_pc - start_addr;
9007ec681f3Smrg
9017ec681f3Smrg   fprintf(stderr,
9027ec681f3Smrg           "Faulty shader found "
9037ec681f3Smrg           "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
9047ec681f3Smrg           start_addr, end_addr, instr_offset);
9057ec681f3Smrg
9067ec681f3Smrg   /* Get the list of instructions.
9077ec681f3Smrg    * Buffer size / 4 is the upper bound of the instruction count.
9087ec681f3Smrg    */
9097ec681f3Smrg   unsigned num_inst = 0;
9107ec681f3Smrg   struct radv_shader_inst *instructions =
9117ec681f3Smrg      calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
9127ec681f3Smrg
9137ec681f3Smrg   /* Split the disassembly string into instructions. */
9147ec681f3Smrg   si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
9157ec681f3Smrg
9167ec681f3Smrg   /* Print instructions with annotations. */
9177ec681f3Smrg   for (unsigned i = 0; i < num_inst; i++) {
9187ec681f3Smrg      struct radv_shader_inst *inst = &instructions[i];
9197ec681f3Smrg
9207ec681f3Smrg      if (start_addr + inst->offset == faulty_pc) {
9217ec681f3Smrg         fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
9227ec681f3Smrg         fprintf(stderr, "%s\n", inst->text);
9237ec681f3Smrg         fprintf(stderr, "\n");
9247ec681f3Smrg      } else {
9257ec681f3Smrg         fprintf(stderr, "%s\n", inst->text);
9267ec681f3Smrg      }
9277ec681f3Smrg   }
9287ec681f3Smrg
9297ec681f3Smrg   free(instructions);
9307ec681f3Smrg}
93101e04c3fSmrg
9327ec681f3Smrgstruct radv_sq_hw_reg {
9337ec681f3Smrg   uint32_t status;
9347ec681f3Smrg   uint32_t trap_sts;
9357ec681f3Smrg   uint32_t hw_id;
9367ec681f3Smrg   uint32_t ib_sts;
9377ec681f3Smrg};
93801e04c3fSmrg
9397ec681f3Smrgstatic void
9407ec681f3Smrgradv_dump_sq_hw_regs(struct radv_device *device)
9417ec681f3Smrg{
9427ec681f3Smrg   struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
9437ec681f3Smrg
9447ec681f3Smrg   fprintf(stderr, "\nHardware registers:\n");
9457ec681f3Smrg   if (device->physical_device->rad_info.chip_class >= GFX10) {
9467ec681f3Smrg      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,
9477ec681f3Smrg                  regs->status, ~0);
9487ec681f3Smrg      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,
9497ec681f3Smrg                  regs->trap_sts, ~0);
9507ec681f3Smrg      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,
9517ec681f3Smrg                  regs->hw_id, ~0);
9527ec681f3Smrg      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,
9537ec681f3Smrg                  regs->ib_sts, ~0);
9547ec681f3Smrg   } else {
9557ec681f3Smrg      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,
9567ec681f3Smrg                  regs->status, ~0);
9577ec681f3Smrg      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,
9587ec681f3Smrg                  regs->trap_sts, ~0);
9597ec681f3Smrg      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,
9607ec681f3Smrg                  regs->hw_id, ~0);
9617ec681f3Smrg      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,
9627ec681f3Smrg                  regs->ib_sts, ~0);
9637ec681f3Smrg   }
9647ec681f3Smrg   fprintf(stderr, "\n\n");
9657ec681f3Smrg}
9667ec681f3Smrg
9677ec681f3Smrgvoid
9687ec681f3Smrgradv_check_trap_handler(struct radv_queue *queue)
9697ec681f3Smrg{
9707ec681f3Smrg   enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
9717ec681f3Smrg   struct radv_device *device = queue->device;
9727ec681f3Smrg   struct radeon_winsys *ws = device->ws;
9737ec681f3Smrg
9747ec681f3Smrg   /* Wait for the context to be idle in a finite time. */
9757ec681f3Smrg   ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
9767ec681f3Smrg
9777ec681f3Smrg   /* Try to detect if the trap handler has been reached by the hw by
9787ec681f3Smrg    * looking at ttmp0 which should be non-zero if a shader exception
9797ec681f3Smrg    * happened.
9807ec681f3Smrg    */
9817ec681f3Smrg   if (!device->tma_ptr[4])
9827ec681f3Smrg      return;
9837ec681f3Smrg
9847ec681f3Smrg#if 0
9857ec681f3Smrg	fprintf(stderr, "tma_ptr:\n");
9867ec681f3Smrg	for (unsigned i = 0; i < 10; i++)
9877ec681f3Smrg		fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
9887ec681f3Smrg#endif
9897ec681f3Smrg
9907ec681f3Smrg   radv_dump_sq_hw_regs(device);
9917ec681f3Smrg
9927ec681f3Smrg   uint32_t ttmp0 = device->tma_ptr[4];
9937ec681f3Smrg   uint32_t ttmp1 = device->tma_ptr[5];
9947ec681f3Smrg
9957ec681f3Smrg   /* According to the ISA docs, 3.10 Trap and Exception Registers:
9967ec681f3Smrg    *
9977ec681f3Smrg    * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
9987ec681f3Smrg    *
9997ec681f3Smrg    * "When the trap handler is entered, the PC of the faulting
10007ec681f3Smrg    *  instruction is: (PC - PC_rewind * 4)."
10017ec681f3Smrg    * */
10027ec681f3Smrg   uint8_t trap_id = (ttmp1 >> 16) & 0xff;
10037ec681f3Smrg   uint8_t ht = (ttmp1 >> 24) & 0x1;
10047ec681f3Smrg   uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
10057ec681f3Smrg   uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
10067ec681f3Smrg
10077ec681f3Smrg   fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
10087ec681f3Smrg           pc_rewind);
10097ec681f3Smrg
10107ec681f3Smrg   radv_dump_faulty_shader(device, pc);
10117ec681f3Smrg
10127ec681f3Smrg   abort();
101301e04c3fSmrg}
1014