1/*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * based in part on anv driver which is:
6 * Copyright © 2015 Intel Corporation
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the "Software"),
10 * to deal in the Software without restriction, including without limitation
11 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12 * and/or sell copies of the Software, and to permit persons to whom the
13 * Software is furnished to do so, subject to the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the next
16 * paragraph) shall be included in all copies or substantial portions of the
17 * Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 * IN THE SOFTWARE.
26 */
27
28#include <stdio.h>
29#include <stdlib.h>
30#ifndef _WIN32
31#include <sys/utsname.h>
32#endif
33#include <sys/stat.h>
34
35#include "util/mesa-sha1.h"
36#include "ac_debug.h"
37#include "radv_debug.h"
38#include "radv_shader.h"
39#include "sid.h"
40
41#define TRACE_BO_SIZE 4096
42#define TMA_BO_SIZE   4096
43
44#define COLOR_RESET  "\033[0m"
45#define COLOR_RED    "\033[31m"
46#define COLOR_GREEN  "\033[1;32m"
47#define COLOR_YELLOW "\033[1;33m"
48#define COLOR_CYAN   "\033[1;36m"
49
50#define RADV_DUMP_DIR "radv_dumps"
51
52/* Trace BO layout (offsets are 4 bytes):
53 *
54 * [0]: primary trace ID
55 * [1]: secondary trace ID
56 * [2-3]: 64-bit GFX ring pipeline pointer
57 * [4-5]: 64-bit COMPUTE ring pipeline pointer
58 * [6-7]: Vertex descriptors pointer
59 * [8-9]: 64-bit descriptor set #0 pointer
60 * ...
61 * [68-69]: 64-bit descriptor set #31 pointer
62 */
63
64bool
65radv_init_trace(struct radv_device *device)
66{
67   struct radeon_winsys *ws = device->ws;
68   VkResult result;
69
70   result = ws->buffer_create(
71      ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,
72      RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,
73      RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);
74   if (result != VK_SUCCESS)
75      return false;
76
77   result = ws->buffer_make_resident(ws, device->trace_bo, true);
78   if (result != VK_SUCCESS)
79      return false;
80
81   device->trace_id_ptr = ws->buffer_map(device->trace_bo);
82   if (!device->trace_id_ptr)
83      return false;
84
85   ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,
86                       NULL);
87
88   return true;
89}
90
91void
92radv_finish_trace(struct radv_device *device)
93{
94   struct radeon_winsys *ws = device->ws;
95
96   if (unlikely(device->trace_bo)) {
97      ws->buffer_make_resident(ws, device->trace_bo, false);
98      ws->buffer_destroy(ws, device->trace_bo);
99   }
100}
101
102static void
103radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)
104{
105   fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);
106   device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);
107}
108
109static void
110radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)
111{
112   struct radeon_winsys *ws = device->ws;
113   uint32_t value;
114
115   if (ws->read_registers(ws, offset, 1, &value))
116      ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);
117}
118
119static void
120radv_dump_debug_registers(struct radv_device *device, FILE *f)
121{
122   struct radeon_info *info = &device->physical_device->rad_info;
123
124   fprintf(f, "Memory-mapped registers:\n");
125   radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);
126
127   radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);
128   radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);
129   radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);
130   radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);
131   radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);
132   radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);
133   radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);
134   if (info->chip_class <= GFX8) {
135      radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);
136      radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);
137      radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);
138   }
139   radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);
140   radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);
141   radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);
142   radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);
143   radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);
144   radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);
145   radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);
146   radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);
147   radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);
148   radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);
149   fprintf(f, "\n");
150}
151
152static void
153radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
154{
155   fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
156   for (unsigned j = 0; j < 4; j++)
157      ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);
158}
159
160static void
161radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
162{
163   unsigned sq_img_rsrc_word0 =
164      chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;
165
166   fprintf(f, COLOR_CYAN "    Image:" COLOR_RESET "\n");
167   for (unsigned j = 0; j < 8; j++)
168      ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);
169
170   fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
171   for (unsigned j = 0; j < 8; j++)
172      ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);
173}
174
175static void
176radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)
177{
178   fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
179   for (unsigned j = 0; j < 4; j++) {
180      ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);
181   }
182}
183
184static void
185radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,
186                                            FILE *f)
187{
188   radv_dump_image_descriptor(chip_class, desc, f);
189   radv_dump_sampler_descriptor(chip_class, desc + 16, f);
190}
191
192static void
193radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,
194                         FILE *f)
195{
196   enum chip_class chip_class = device->physical_device->rad_info.chip_class;
197   const struct radv_descriptor_set_layout *layout;
198   int i;
199
200   if (!set)
201      return;
202   layout = set->header.layout;
203
204   for (i = 0; i < set->header.layout->binding_count; i++) {
205      uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;
206
207      switch (layout->binding[i].type) {
208      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
209      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
210      case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
211      case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
212         radv_dump_buffer_descriptor(chip_class, desc, f);
213         break;
214      case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
215      case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
216      case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
217         radv_dump_image_descriptor(chip_class, desc, f);
218         break;
219      case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
220         radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);
221         break;
222      case VK_DESCRIPTOR_TYPE_SAMPLER:
223         radv_dump_sampler_descriptor(chip_class, desc, f);
224         break;
225      case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
226      case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
227      case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:
228      case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:
229         /* todo */
230         break;
231      default:
232         assert(!"unknown descriptor type");
233         break;
234      }
235      fprintf(f, "\n");
236   }
237   fprintf(f, "\n\n");
238}
239
240static void
241radv_dump_descriptors(struct radv_device *device, FILE *f)
242{
243   uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
244   int i;
245
246   fprintf(f, "Descriptors:\n");
247   for (i = 0; i < MAX_SETS; i++) {
248      struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4);
249
250      radv_dump_descriptor_set(device, set, i, f);
251   }
252}
253
254struct radv_shader_inst {
255   char text[160];  /* one disasm line */
256   unsigned offset; /* instruction offset */
257   unsigned size;   /* instruction size = 4 or 8 */
258};
259
260/* Split a disassembly string into lines and add them to the array pointed
261 * to by "instructions". */
262static void
263si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,
264                    struct radv_shader_inst *instructions)
265{
266   struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
267   char *next;
268
269   while ((next = strchr(disasm, '\n'))) {
270      struct radv_shader_inst *inst = &instructions[*num];
271      unsigned len = next - disasm;
272
273      if (!memchr(disasm, ';', len)) {
274         /* Ignore everything that is not an instruction. */
275         disasm = next + 1;
276         continue;
277      }
278
279      assert(len < ARRAY_SIZE(inst->text));
280      memcpy(inst->text, disasm, len);
281      inst->text[len] = 0;
282      inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
283
284      const char *semicolon = strchr(disasm, ';');
285      assert(semicolon);
286      /* More than 16 chars after ";" means the instruction is 8 bytes long. */
287      inst->size = next - semicolon > 16 ? 8 : 4;
288
289      snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
290               " [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,
291               inst->size);
292
293      last_inst = inst;
294      (*num)++;
295      disasm = next + 1;
296   }
297}
298
299static void
300radv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage,
301                           struct ac_wave_info *waves, unsigned num_waves, FILE *f)
302{
303   uint64_t start_addr, end_addr;
304   unsigned i;
305
306   if (!shader)
307      return;
308
309   start_addr = radv_shader_variant_get_va(shader);
310   end_addr = start_addr + shader->code_size;
311
312   /* See if any wave executes the shader. */
313   for (i = 0; i < num_waves; i++) {
314      if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
315         break;
316   }
317
318   if (i == num_waves)
319      return; /* the shader is not being executed */
320
321   /* Remember the first found wave. The waves are sorted according to PC. */
322   waves = &waves[i];
323   num_waves -= i;
324
325   /* Get the list of instructions.
326    * Buffer size / 4 is the upper bound of the instruction count.
327    */
328   unsigned num_inst = 0;
329   struct radv_shader_inst *instructions =
330      calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
331
332   si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
333
334   fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
335           radv_get_shader_name(&shader->info, stage));
336
337   /* Print instructions with annotations. */
338   for (i = 0; i < num_inst; i++) {
339      struct radv_shader_inst *inst = &instructions[i];
340
341      fprintf(f, "%s\n", inst->text);
342
343      /* Print which waves execute the instruction right now. */
344      while (num_waves && start_addr + inst->offset == waves->pc) {
345         fprintf(f,
346                 "          " COLOR_GREEN "^ SE%u SH%u CU%u "
347                 "SIMD%u WAVE%u  EXEC=%016" PRIx64 "  ",
348                 waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);
349
350         if (inst->size == 4) {
351            fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);
352         } else {
353            fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);
354         }
355
356         waves->matched = true;
357         waves = &waves[1];
358         num_waves--;
359      }
360   }
361
362   fprintf(f, "\n\n");
363   free(instructions);
364}
365
366static void
367radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
368                            FILE *f)
369{
370   struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
371   enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;
372   unsigned num_waves = ac_get_wave_info(chip_class, waves);
373
374   fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);
375
376   /* Dump annotated active graphics shaders. */
377   unsigned stages = active_stages;
378   while (stages) {
379      int stage = u_bit_scan(&stages);
380
381      radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);
382   }
383
384   /* Print waves executing shaders that are not currently bound. */
385   unsigned i;
386   bool found = false;
387   for (i = 0; i < num_waves; i++) {
388      if (waves[i].matched)
389         continue;
390
391      if (!found) {
392         fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");
393         found = true;
394      }
395      fprintf(f,
396              "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016" PRIx64 "  INST=%08X %08X  PC=%" PRIx64
397              "\n",
398              waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,
399              waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);
400   }
401   if (found)
402      fprintf(f, "\n\n");
403}
404
405static void
406radv_dump_spirv(struct radv_shader_variant *shader, const char *sha1, const char *dump_dir)
407{
408   char dump_path[512];
409   FILE *f;
410
411   snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);
412
413   f = fopen(dump_path, "w+");
414   if (f) {
415      fwrite(shader->spirv, shader->spirv_size, 1, f);
416      fclose(f);
417   }
418}
419
420static void
421radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader,
422                 gl_shader_stage stage, const char *dump_dir, FILE *f)
423{
424   if (!shader)
425      return;
426
427   fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));
428
429   if (shader->spirv) {
430      unsigned char sha1[21];
431      char sha1buf[41];
432
433      _mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);
434      _mesa_sha1_format(sha1buf, sha1);
435
436      fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);
437      radv_dump_spirv(shader, sha1buf, dump_dir);
438   }
439
440   if (shader->nir_string) {
441      fprintf(f, "NIR:\n%s\n", shader->nir_string);
442   }
443
444   fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",
445           shader->ir_string);
446   fprintf(f, "DISASM:\n%s\n", shader->disasm_string);
447
448   radv_dump_shader_stats(pipeline->device, pipeline, stage, f);
449}
450
451static void
452radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,
453                  const char *dump_dir, FILE *f)
454{
455   /* Dump active graphics shaders. */
456   unsigned stages = active_stages;
457   while (stages) {
458      int stage = u_bit_scan(&stages);
459
460      radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f);
461   }
462}
463
464static void
465radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f)
466{
467   void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;
468   uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask);
469   uint32_t *vb_ptr = &((uint32_t *)ptr)[3];
470
471   if (!count)
472      return;
473
474   fprintf(f, "Num vertex %s: %d\n",
475           pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count);
476   for (uint32_t i = 0; i < count; i++) {
477      uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];
478      uint64_t va = 0;
479
480      va |= desc[0];
481      va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;
482
483      fprintf(f, "VBO#%d:\n", i);
484      fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);
485      fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));
486      fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);
487   }
488}
489
490static struct radv_pipeline *
491radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)
492{
493   uint64_t *ptr = (uint64_t *)device->trace_id_ptr;
494   int offset = ring == RING_GFX ? 1 : 2;
495
496   return *(struct radv_pipeline **)(ptr + offset);
497}
498
499static void
500radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)
501{
502   enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
503   struct radv_pipeline *pipeline;
504
505   fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");
506
507   pipeline = radv_get_saved_pipeline(queue->device, ring);
508   if (pipeline) {
509      radv_dump_shaders(pipeline, pipeline->active_stages, dump_dir, f);
510      if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))
511         radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);
512      radv_dump_vertex_descriptors(pipeline, f);
513      radv_dump_descriptors(queue->device, f);
514   }
515}
516
517static void
518radv_dump_cmd(const char *cmd, FILE *f)
519{
520#ifndef _WIN32
521   char line[2048];
522   FILE *p;
523
524   p = popen(cmd, "r");
525   if (p) {
526      while (fgets(line, sizeof(line), p))
527         fputs(line, f);
528      fprintf(f, "\n");
529      pclose(p);
530   }
531#endif
532}
533
534static void
535radv_dump_dmesg(FILE *f)
536{
537   fprintf(f, "\nLast 60 lines of dmesg:\n\n");
538   radv_dump_cmd("dmesg | tail -n60", f);
539}
540
541void
542radv_dump_enabled_options(struct radv_device *device, FILE *f)
543{
544   uint64_t mask;
545
546   if (device->instance->debug_flags) {
547      fprintf(f, "Enabled debug options: ");
548
549      mask = device->instance->debug_flags;
550      while (mask) {
551         int i = u_bit_scan64(&mask);
552         fprintf(f, "%s, ", radv_get_debug_option_name(i));
553      }
554      fprintf(f, "\n");
555   }
556
557   if (device->instance->perftest_flags) {
558      fprintf(f, "Enabled perftest options: ");
559
560      mask = device->instance->perftest_flags;
561      while (mask) {
562         int i = u_bit_scan64(&mask);
563         fprintf(f, "%s, ", radv_get_perftest_option_name(i));
564      }
565      fprintf(f, "\n");
566   }
567}
568
569static void
570radv_dump_app_info(struct radv_device *device, FILE *f)
571{
572   struct radv_instance *instance = device->instance;
573
574   fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);
575   fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);
576   fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);
577   fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);
578   fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),
579           VK_VERSION_MINOR(instance->vk.app_info.api_version),
580           VK_VERSION_PATCH(instance->vk.app_info.api_version));
581
582   radv_dump_enabled_options(device, f);
583}
584
585static void
586radv_dump_device_name(struct radv_device *device, FILE *f)
587{
588   struct radeon_info *info = &device->physical_device->rad_info;
589#ifndef _WIN32
590   char kernel_version[128] = {0};
591   struct utsname uname_data;
592#endif
593   const char *chip_name;
594
595   chip_name = device->ws->get_chip_name(device->ws);
596
597#ifdef _WIN32
598   fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name,
599           info->drm_major, info->drm_minor, info->drm_patchlevel);
600#else
601   if (uname(&uname_data) == 0)
602      snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);
603
604   fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name,
605           info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);
606#endif
607}
608
609static void
610radv_dump_umr_ring(struct radv_queue *queue, FILE *f)
611{
612   enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
613   struct radv_device *device = queue->device;
614   char cmd[128];
615
616   /* TODO: Dump compute ring. */
617   if (ring != RING_GFX)
618      return;
619
620   sprintf(cmd, "umr -R %s 2>&1",
621           device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
622
623   fprintf(f, "\nUMR GFX ring:\n\n");
624   radv_dump_cmd(cmd, f);
625}
626
627static void
628radv_dump_umr_waves(struct radv_queue *queue, FILE *f)
629{
630   enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
631   struct radv_device *device = queue->device;
632   char cmd[128];
633
634   /* TODO: Dump compute ring. */
635   if (ring != RING_GFX)
636      return;
637
638   sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",
639           device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");
640
641   fprintf(f, "\nUMR GFX waves:\n\n");
642   radv_dump_cmd(cmd, f);
643}
644
645static bool
646radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)
647{
648   struct radeon_winsys *ws = queue->device->ws;
649
650   if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family))
651      return true;
652
653   return false;
654}
655
656void
657radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)
658{
659   struct radv_device *device = queue->device;
660   enum ring_type ring;
661   uint64_t addr;
662
663   ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
664
665   bool hang_occurred = radv_gpu_hang_occured(queue, ring);
666   bool vm_fault_occurred = false;
667   if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)
668      vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,
669                                              &device->dmesg_timestamp, &addr);
670   if (!hang_occurred && !vm_fault_occurred)
671      return;
672
673   fprintf(stderr, "radv: GPU hang detected...\n");
674
675#ifndef _WIN32
676   /* Create a directory into $HOME/radv_dumps_<pid>_<time> to save
677    * various debugging info about that GPU hang.
678    */
679   struct tm *timep, result;
680   time_t raw_time;
681   FILE *f;
682   char dump_dir[256], dump_path[512], buf_time[128];
683
684   time(&raw_time);
685   timep = os_localtime(&raw_time, &result);
686   strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);
687
688   snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),
689            getpid(), buf_time);
690   if (mkdir(dump_dir, 0774) && errno != EEXIST) {
691      fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);
692      abort();
693   }
694
695   fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);
696
697   /* Dump trace file. */
698   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");
699   f = fopen(dump_path, "w+");
700   if (f) {
701      radv_dump_trace(queue->device, cs, f);
702      fclose(f);
703   }
704
705   /* Dump pipeline state. */
706   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");
707   f = fopen(dump_path, "w+");
708   if (f) {
709      radv_dump_queue_state(queue, dump_dir, f);
710      fclose(f);
711   }
712
713   if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {
714      /* Dump UMR ring. */
715      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");
716      f = fopen(dump_path, "w+");
717      if (f) {
718         radv_dump_umr_ring(queue, f);
719         fclose(f);
720      }
721
722      /* Dump UMR waves. */
723      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");
724      f = fopen(dump_path, "w+");
725      if (f) {
726         radv_dump_umr_waves(queue, f);
727         fclose(f);
728      }
729   }
730
731   /* Dump debug registers. */
732   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");
733   f = fopen(dump_path, "w+");
734   if (f) {
735      radv_dump_debug_registers(device, f);
736      fclose(f);
737   }
738
739   /* Dump BO ranges. */
740   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");
741   f = fopen(dump_path, "w+");
742   if (f) {
743      device->ws->dump_bo_ranges(device->ws, f);
744      fclose(f);
745   }
746
747   /* Dump BO log. */
748   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");
749   f = fopen(dump_path, "w+");
750   if (f) {
751      device->ws->dump_bo_log(device->ws, f);
752      fclose(f);
753   }
754
755   /* Dump VM fault info. */
756   if (vm_fault_occurred) {
757      snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");
758      f = fopen(dump_path, "w+");
759      if (f) {
760         fprintf(f, "VM fault report.\n\n");
761         fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);
762         fclose(f);
763      }
764   }
765
766   /* Dump app info. */
767   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");
768   f = fopen(dump_path, "w+");
769   if (f) {
770      radv_dump_app_info(device, f);
771      fclose(f);
772   }
773
774   /* Dump GPU info. */
775   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");
776   f = fopen(dump_path, "w+");
777   if (f) {
778      radv_dump_device_name(device, f);
779      ac_print_gpu_info(&device->physical_device->rad_info, f);
780      fclose(f);
781   }
782
783   /* Dump dmesg. */
784   snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");
785   f = fopen(dump_path, "w+");
786   if (f) {
787      radv_dump_dmesg(f);
788      fclose(f);
789   }
790#endif
791
792   fprintf(stderr, "radv: GPU hang report saved successfully!\n");
793   abort();
794}
795
796void
797radv_print_spirv(const char *data, uint32_t size, FILE *fp)
798{
799#ifndef _WIN32
800   char path[] = "/tmp/fileXXXXXX";
801   char command[128];
802   int fd;
803
804   /* Dump the binary into a temporary file. */
805   fd = mkstemp(path);
806   if (fd < 0)
807      return;
808
809   if (write(fd, data, size) == -1)
810      goto fail;
811
812   /* Disassemble using spirv-dis if installed. */
813   sprintf(command, "spirv-dis %s", path);
814   radv_dump_cmd(command, fp);
815
816fail:
817   close(fd);
818   unlink(path);
819#endif
820}
821
822bool
823radv_trap_handler_init(struct radv_device *device)
824{
825   struct radeon_winsys *ws = device->ws;
826   VkResult result;
827
828   /* Create the trap handler shader and upload it like other shaders. */
829   device->trap_handler_shader = radv_create_trap_handler_shader(device);
830   if (!device->trap_handler_shader) {
831      fprintf(stderr, "radv: failed to create the trap handler shader.\n");
832      return false;
833   }
834
835   result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);
836   if (result != VK_SUCCESS)
837      return false;
838
839   result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,
840                              RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
841                                 RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,
842                              RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);
843   if (result != VK_SUCCESS)
844      return false;
845
846   result = ws->buffer_make_resident(ws, device->tma_bo, true);
847   if (result != VK_SUCCESS)
848      return false;
849
850   device->tma_ptr = ws->buffer_map(device->tma_bo);
851   if (!device->tma_ptr)
852      return false;
853
854   /* Upload a buffer descriptor to store various info from the trap. */
855   uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;
856   uint32_t desc[4];
857
858   desc[0] = tma_va;
859   desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);
860   desc[2] = TMA_BO_SIZE;
861   desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
862             S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
863             S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
864
865   memcpy(device->tma_ptr, desc, sizeof(desc));
866
867   return true;
868}
869
870void
871radv_trap_handler_finish(struct radv_device *device)
872{
873   struct radeon_winsys *ws = device->ws;
874
875   if (unlikely(device->trap_handler_shader)) {
876      ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);
877      radv_shader_variant_destroy(device, device->trap_handler_shader);
878   }
879
880   if (unlikely(device->tma_bo)) {
881      ws->buffer_make_resident(ws, device->tma_bo, false);
882      ws->buffer_destroy(ws, device->tma_bo);
883   }
884}
885
886static void
887radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)
888{
889   struct radv_shader_variant *shader;
890   uint64_t start_addr, end_addr;
891   uint32_t instr_offset;
892
893   shader = radv_find_shader_variant(device, faulty_pc);
894   if (!shader)
895      return;
896
897   start_addr = radv_shader_variant_get_va(shader);
898   end_addr = start_addr + shader->code_size;
899   instr_offset = faulty_pc - start_addr;
900
901   fprintf(stderr,
902           "Faulty shader found "
903           "VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",
904           start_addr, end_addr, instr_offset);
905
906   /* Get the list of instructions.
907    * Buffer size / 4 is the upper bound of the instruction count.
908    */
909   unsigned num_inst = 0;
910   struct radv_shader_inst *instructions =
911      calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));
912
913   /* Split the disassembly string into instructions. */
914   si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);
915
916   /* Print instructions with annotations. */
917   for (unsigned i = 0; i < num_inst; i++) {
918      struct radv_shader_inst *inst = &instructions[i];
919
920      if (start_addr + inst->offset == faulty_pc) {
921         fprintf(stderr, "\n!!! Faulty instruction below !!!\n");
922         fprintf(stderr, "%s\n", inst->text);
923         fprintf(stderr, "\n");
924      } else {
925         fprintf(stderr, "%s\n", inst->text);
926      }
927   }
928
929   free(instructions);
930}
931
932struct radv_sq_hw_reg {
933   uint32_t status;
934   uint32_t trap_sts;
935   uint32_t hw_id;
936   uint32_t ib_sts;
937};
938
939static void
940radv_dump_sq_hw_regs(struct radv_device *device)
941{
942   struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];
943
944   fprintf(stderr, "\nHardware registers:\n");
945   if (device->physical_device->rad_info.chip_class >= GFX10) {
946      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,
947                  regs->status, ~0);
948      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,
949                  regs->trap_sts, ~0);
950      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,
951                  regs->hw_id, ~0);
952      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,
953                  regs->ib_sts, ~0);
954   } else {
955      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,
956                  regs->status, ~0);
957      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,
958                  regs->trap_sts, ~0);
959      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,
960                  regs->hw_id, ~0);
961      ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,
962                  regs->ib_sts, ~0);
963   }
964   fprintf(stderr, "\n\n");
965}
966
967void
968radv_check_trap_handler(struct radv_queue *queue)
969{
970   enum ring_type ring = radv_queue_family_to_ring(queue->vk.queue_family_index);
971   struct radv_device *device = queue->device;
972   struct radeon_winsys *ws = device->ws;
973
974   /* Wait for the context to be idle in a finite time. */
975   ws->ctx_wait_idle(queue->hw_ctx, ring, queue->vk.index_in_family);
976
977   /* Try to detect if the trap handler has been reached by the hw by
978    * looking at ttmp0 which should be non-zero if a shader exception
979    * happened.
980    */
981   if (!device->tma_ptr[4])
982      return;
983
984#if 0
985	fprintf(stderr, "tma_ptr:\n");
986	for (unsigned i = 0; i < 10; i++)
987		fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);
988#endif
989
990   radv_dump_sq_hw_regs(device);
991
992   uint32_t ttmp0 = device->tma_ptr[4];
993   uint32_t ttmp1 = device->tma_ptr[5];
994
995   /* According to the ISA docs, 3.10 Trap and Exception Registers:
996    *
997    * "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"
998    *
999    * "When the trap handler is entered, the PC of the faulting
1000    *  instruction is: (PC - PC_rewind * 4)."
1001    * */
1002   uint8_t trap_id = (ttmp1 >> 16) & 0xff;
1003   uint8_t ht = (ttmp1 >> 24) & 0x1;
1004   uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;
1005   uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);
1006
1007   fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,
1008           pc_rewind);
1009
1010   radv_dump_faulty_shader(device, pc);
1011
1012   abort();
1013}
1014