1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "common/intel_decoder.h"
25#include "intel_disasm.h"
26#include "util/macros.h"
27#include "main/macros.h" /* Needed for ROUND_DOWN_TO */
28
29#include <string.h>
30
31void
32intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx,
33                            const struct intel_device_info *devinfo,
34                            FILE *fp, enum intel_batch_decode_flags flags,
35                            const char *xml_path,
36                            struct intel_batch_decode_bo (*get_bo)(void *,
37                                                                   bool,
38                                                                   uint64_t),
39                            unsigned (*get_state_size)(void *, uint64_t,
40                                                       uint64_t),
41                            void *user_data)
42{
43   memset(ctx, 0, sizeof(*ctx));
44
45   ctx->devinfo = *devinfo;
46   ctx->get_bo = get_bo;
47   ctx->get_state_size = get_state_size;
48   ctx->user_data = user_data;
49   ctx->fp = fp;
50   ctx->flags = flags;
51   ctx->max_vbo_decoded_lines = -1; /* No limit! */
52   ctx->engine = I915_ENGINE_CLASS_RENDER;
53
54   if (xml_path == NULL)
55      ctx->spec = intel_spec_load(devinfo);
56   else
57      ctx->spec = intel_spec_load_from_path(devinfo, xml_path);
58}
59
60void
61intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx)
62{
63   intel_spec_destroy(ctx->spec);
64}
65
66#define CSI "\e["
67#define RED_COLOR    CSI "31m"
68#define BLUE_HEADER  CSI "0;44m" CSI "1;37m"
69#define GREEN_HEADER CSI "1;42m"
70#define NORMAL       CSI "0m"
71
72static void
73ctx_print_group(struct intel_batch_decode_ctx *ctx,
74                struct intel_group *group,
75                uint64_t address, const void *map)
76{
77   intel_print_group(ctx->fp, group, address, map, 0,
78                   (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0);
79}
80
81static struct intel_batch_decode_bo
82ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr)
83{
84   if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) {
85      /* On Broadwell and above, we have 48-bit addresses which consume two
86       * dwords.  Some packets require that these get stored in a "canonical
87       * form" which means that bit 47 is sign-extended through the upper
88       * bits. In order to correctly handle those aub dumps, we need to mask
89       * off the top 16 bits.
90       */
91      addr &= (~0ull >> 16);
92   }
93
94   struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr);
95
96   if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0))
97      bo.addr &= (~0ull >> 16);
98
99   /* We may actually have an offset into the bo */
100   if (bo.map != NULL) {
101      assert(bo.addr <= addr);
102      uint64_t offset = addr - bo.addr;
103      bo.map += offset;
104      bo.addr += offset;
105      bo.size -= offset;
106   }
107
108   return bo;
109}
110
111static int
112update_count(struct intel_batch_decode_ctx *ctx,
113             uint64_t address,
114             uint64_t base_address,
115             unsigned element_dwords,
116             unsigned guess)
117{
118   unsigned size = 0;
119
120   if (ctx->get_state_size)
121      size = ctx->get_state_size(ctx->user_data, address, base_address);
122
123   if (size > 0)
124      return size / (sizeof(uint32_t) * element_dwords);
125
126   /* In the absence of any information, just guess arbitrarily. */
127   return guess;
128}
129
130static void
131ctx_disassemble_program(struct intel_batch_decode_ctx *ctx,
132                        uint32_t ksp, const char *type)
133{
134   uint64_t addr = ctx->instruction_base + ksp;
135   struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
136   if (!bo.map)
137      return;
138
139   fprintf(ctx->fp, "\nReferenced %s:\n", type);
140   intel_disassemble(&ctx->devinfo, bo.map, 0, ctx->fp);
141}
142
143/* Heuristic to determine whether a uint32_t is probably actually a float
144 * (http://stackoverflow.com/a/2953466)
145 */
146
147static bool
148probably_float(uint32_t bits)
149{
150   int exp = ((bits & 0x7f800000U) >> 23) - 127;
151   uint32_t mant = bits & 0x007fffff;
152
153   /* +- 0.0 */
154   if (exp == -127 && mant == 0)
155      return true;
156
157   /* +- 1 billionth to 1 billion */
158   if (-30 <= exp && exp <= 30)
159      return true;
160
161   /* some value with only a few binary digits */
162   if ((mant & 0x0000ffff) == 0)
163      return true;
164
165   return false;
166}
167
168static void
169ctx_print_buffer(struct intel_batch_decode_ctx *ctx,
170                 struct intel_batch_decode_bo bo,
171                 uint32_t read_length,
172                 uint32_t pitch,
173                 int max_lines)
174{
175   const uint32_t *dw_end =
176         bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4);
177
178   int column_count = 0, pitch_col_count = 0, line_count = -1;
179   for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
180      if (pitch_col_count * 4 == pitch || column_count == 8) {
181         fprintf(ctx->fp, "\n");
182         column_count = 0;
183         if (pitch_col_count * 4 == pitch)
184            pitch_col_count = 0;
185         line_count++;
186
187         if (max_lines >= 0 && line_count >= max_lines)
188            break;
189      }
190      fprintf(ctx->fp, column_count == 0 ? "  " : " ");
191
192      if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw))
193         fprintf(ctx->fp, "  %8.2f", *(float *) dw);
194      else
195         fprintf(ctx->fp, "  0x%08x", *dw);
196
197      column_count++;
198      pitch_col_count++;
199   }
200   fprintf(ctx->fp, "\n");
201}
202
203static struct intel_group *
204intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
205{
206   return intel_spec_find_instruction(ctx->spec, ctx->engine, p);
207}
208
209static void
210handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
211{
212   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
213
214   struct intel_field_iterator iter;
215   intel_field_iterator_init(&iter, inst, p, 0, false);
216
217   uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0;
218   bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0;
219
220   while (intel_field_iterator_next(&iter)) {
221      if (strcmp(iter.name, "Surface State Base Address") == 0) {
222         surface_base = iter.raw_value;
223      } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
224         dynamic_base = iter.raw_value;
225      } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
226         instruction_base = iter.raw_value;
227      } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) {
228         surface_modify = iter.raw_value;
229      } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) {
230         dynamic_modify = iter.raw_value;
231      } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) {
232         instruction_modify = iter.raw_value;
233      }
234   }
235
236   if (dynamic_modify)
237      ctx->dynamic_base = dynamic_base;
238
239   if (surface_modify)
240      ctx->surface_base = surface_base;
241
242   if (instruction_modify)
243      ctx->instruction_base = instruction_base;
244}
245
246static void
247handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx,
248                                const uint32_t *p)
249{
250   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
251
252   struct intel_field_iterator iter;
253   intel_field_iterator_init(&iter, inst, p, 0, false);
254
255   uint64_t bt_pool_base = 0;
256   bool bt_pool_enable = false;
257
258   while (intel_field_iterator_next(&iter)) {
259      if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) {
260         bt_pool_base = iter.raw_value;
261      } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) {
262         bt_pool_enable = iter.raw_value;
263      }
264   }
265
266   if (bt_pool_enable) {
267      ctx->bt_pool_base = bt_pool_base;
268   } else {
269      ctx->bt_pool_base = 0;
270   }
271}
272
273static void
274dump_binding_table(struct intel_batch_decode_ctx *ctx,
275                   uint32_t offset, int count)
276{
277   struct intel_group *strct =
278      intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
279   if (strct == NULL) {
280      fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
281      return;
282   }
283
284   /* When 256B binding tables are enabled, we have to shift the offset */
285   if (ctx->use_256B_binding_tables)
286      offset <<= 3;
287
288   const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base :
289                                                     ctx->surface_base;
290
291   if (count < 0) {
292      count = update_count(ctx, bt_pool_base + offset,
293                           bt_pool_base, 1, 8);
294   }
295
296   if (offset % 32 != 0 || offset >= UINT16_MAX) {
297      fprintf(ctx->fp, "  invalid binding table pointer\n");
298      return;
299   }
300
301   struct intel_batch_decode_bo bind_bo =
302      ctx_get_bo(ctx, true, bt_pool_base + offset);
303
304   if (bind_bo.map == NULL) {
305      fprintf(ctx->fp, "  binding table unavailable\n");
306      return;
307   }
308
309   const uint32_t *pointers = bind_bo.map;
310   for (int i = 0; i < count; i++) {
311      if (pointers[i] == 0)
312         continue;
313
314      uint64_t addr = ctx->surface_base + pointers[i];
315      struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr);
316      uint32_t size = strct->dw_length * 4;
317
318      if (pointers[i] % 32 != 0 ||
319          addr < bo.addr || addr + size >= bo.addr + bo.size) {
320         fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]);
321         continue;
322      }
323
324      fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]);
325      ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr));
326   }
327}
328
329static void
330dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count)
331{
332   struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE");
333   uint64_t state_addr = ctx->dynamic_base + offset;
334
335   assert(count > 0);
336
337   struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
338   const void *state_map = bo.map;
339
340   if (state_map == NULL) {
341      fprintf(ctx->fp, "  samplers unavailable\n");
342      return;
343   }
344
345   if (offset % 32 != 0) {
346      fprintf(ctx->fp, "  invalid sampler state pointer\n");
347      return;
348   }
349
350   const unsigned sampler_state_size = strct->dw_length * 4;
351
352   if (count * sampler_state_size >= bo.size) {
353      fprintf(ctx->fp, "  sampler state ends after bo ends\n");
354      assert(!"sampler state ends after bo ends");
355      return;
356   }
357
358   for (int i = 0; i < count; i++) {
359      fprintf(ctx->fp, "sampler state %d\n", i);
360      ctx_print_group(ctx, strct, state_addr, state_map);
361      state_addr += sampler_state_size;
362      state_map += sampler_state_size;
363   }
364}
365
366static void
367handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx,
368                                 struct intel_group *desc, const uint32_t *p)
369{
370   uint64_t ksp = 0;
371   uint32_t sampler_offset = 0, sampler_count = 0;
372   uint32_t binding_table_offset = 0, binding_entry_count = 0;
373
374   struct intel_field_iterator iter;
375   intel_field_iterator_init(&iter, desc, p, 0, false);
376   while (intel_field_iterator_next(&iter)) {
377      if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
378         ksp = strtoll(iter.value, NULL, 16);
379      } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
380         sampler_offset = strtol(iter.value, NULL, 16);
381      } else if (strcmp(iter.name, "Sampler Count") == 0) {
382         sampler_count = strtol(iter.value, NULL, 10);
383      } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
384         binding_table_offset = strtol(iter.value, NULL, 16);
385      } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
386         binding_entry_count = strtol(iter.value, NULL, 10);
387      }
388   }
389
390   ctx_disassemble_program(ctx, ksp, "compute shader");
391   fprintf(ctx->fp, "\n");
392
393   if (sampler_count)
394      dump_samplers(ctx, sampler_offset, sampler_count);
395   if (binding_entry_count)
396      dump_binding_table(ctx, binding_table_offset, binding_entry_count);
397}
398
399static void
400handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx,
401                                       const uint32_t *p)
402{
403   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
404   struct intel_group *desc =
405      intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
406
407   struct intel_field_iterator iter;
408   intel_field_iterator_init(&iter, inst, p, 0, false);
409   uint32_t descriptor_offset = 0;
410   int descriptor_count = 0;
411   while (intel_field_iterator_next(&iter)) {
412      if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
413         descriptor_offset = strtol(iter.value, NULL, 16);
414      } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
415         descriptor_count =
416            strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
417      }
418   }
419
420   uint64_t desc_addr = ctx->dynamic_base + descriptor_offset;
421   struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr);
422   const void *desc_map = bo.map;
423
424   if (desc_map == NULL) {
425      fprintf(ctx->fp, "  interface descriptors unavailable\n");
426      return;
427   }
428
429   for (int i = 0; i < descriptor_count; i++) {
430      fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
431
432      ctx_print_group(ctx, desc, desc_addr, desc_map);
433
434      handle_interface_descriptor_data(ctx, desc, desc_map);
435
436      desc_map += desc->dw_length;
437      desc_addr += desc->dw_length * 4;
438   }
439}
440
441static void
442handle_compute_walker(struct intel_batch_decode_ctx *ctx,
443                      const uint32_t *p)
444{
445   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
446
447   struct intel_field_iterator iter;
448   intel_field_iterator_init(&iter, inst, p, 0, false);
449   while (intel_field_iterator_next(&iter)) {
450      if (strcmp(iter.name, "Interface Descriptor") == 0) {
451         handle_interface_descriptor_data(ctx, iter.struct_desc,
452                                          &iter.p[iter.start_bit / 32]);
453      }
454   }
455}
456
457static void
458handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx,
459                              const uint32_t *p)
460{
461   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
462   struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE");
463
464   struct intel_batch_decode_bo vb = {};
465   uint32_t vb_size = 0;
466   int index = -1;
467   int pitch = -1;
468   bool ready = false;
469
470   struct intel_field_iterator iter;
471   intel_field_iterator_init(&iter, inst, p, 0, false);
472   while (intel_field_iterator_next(&iter)) {
473      if (iter.struct_desc != vbs)
474         continue;
475
476      struct intel_field_iterator vbs_iter;
477      intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false);
478      while (intel_field_iterator_next(&vbs_iter)) {
479         if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) {
480            index = vbs_iter.raw_value;
481         } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) {
482            pitch = vbs_iter.raw_value;
483         } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) {
484            vb = ctx_get_bo(ctx, true, vbs_iter.raw_value);
485         } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) {
486            vb_size = vbs_iter.raw_value;
487            ready = true;
488         } else if (strcmp(vbs_iter.name, "End Address") == 0) {
489            if (vb.map && vbs_iter.raw_value >= vb.addr)
490               vb_size = (vbs_iter.raw_value + 1) - vb.addr;
491            else
492               vb_size = 0;
493            ready = true;
494         }
495
496         if (!ready)
497            continue;
498
499         fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
500
501         if (vb.map == NULL) {
502            fprintf(ctx->fp, "  buffer contents unavailable\n");
503            continue;
504         }
505
506         if (vb.map == 0 || vb_size == 0)
507            continue;
508
509         ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines);
510
511         vb.map = NULL;
512         vb_size = 0;
513         index = -1;
514         pitch = -1;
515         ready = false;
516      }
517   }
518}
519
520static void
521handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx,
522                            const uint32_t *p)
523{
524   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
525
526   struct intel_batch_decode_bo ib = {};
527   uint32_t ib_size = 0;
528   uint32_t format = 0;
529
530   struct intel_field_iterator iter;
531   intel_field_iterator_init(&iter, inst, p, 0, false);
532   while (intel_field_iterator_next(&iter)) {
533      if (strcmp(iter.name, "Index Format") == 0) {
534         format = iter.raw_value;
535      } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
536         ib = ctx_get_bo(ctx, true, iter.raw_value);
537      } else if (strcmp(iter.name, "Buffer Size") == 0) {
538         ib_size = iter.raw_value;
539      }
540   }
541
542   if (ib.map == NULL) {
543      fprintf(ctx->fp, "  buffer contents unavailable\n");
544      return;
545   }
546
547   const void *m = ib.map;
548   const void *ib_end = ib.map + MIN2(ib.size, ib_size);
549   for (int i = 0; m < ib_end && i < 10; i++) {
550      switch (format) {
551      case 0:
552         fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
553         m += 1;
554         break;
555      case 1:
556         fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
557         m += 2;
558         break;
559      case 2:
560         fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
561         m += 4;
562         break;
563      }
564   }
565
566   if (m < ib_end)
567      fprintf(ctx->fp, "...");
568   fprintf(ctx->fp, "\n");
569}
570
571static void
572decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
573{
574   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
575
576   uint64_t ksp = 0;
577   bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */
578   bool is_enabled = true;
579
580   struct intel_field_iterator iter;
581   intel_field_iterator_init(&iter, inst, p, 0, false);
582   while (intel_field_iterator_next(&iter)) {
583      if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
584         ksp = iter.raw_value;
585      } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
586         is_simd8 = iter.raw_value;
587      } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
588         is_simd8 = strcmp(iter.value, "SIMD8") == 0;
589      } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
590         is_simd8 = strcmp(iter.value, "SIMD8") == 0;
591      } else if (strcmp(iter.name, "Enable") == 0) {
592         is_enabled = iter.raw_value;
593      }
594   }
595
596   const char *type =
597      strcmp(inst->name,   "VS_STATE") == 0 ? "vertex shader" :
598      strcmp(inst->name,   "GS_STATE") == 0 ? "geometry shader" :
599      strcmp(inst->name,   "SF_STATE") == 0 ? "strips and fans shader" :
600      strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
601      strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" :
602      strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" :
603      strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
604      strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
605      NULL;
606
607   if (is_enabled) {
608      ctx_disassemble_program(ctx, ksp, type);
609      fprintf(ctx->fp, "\n");
610   }
611}
612
613static void
614decode_ps_kern(struct intel_batch_decode_ctx *ctx,
615               struct intel_group *inst, const uint32_t *p)
616{
617   bool single_ksp = ctx->devinfo.ver == 4;
618   uint64_t ksp[3] = {0, 0, 0};
619   bool enabled[3] = {false, false, false};
620
621   struct intel_field_iterator iter;
622   intel_field_iterator_init(&iter, inst, p, 0, false);
623   while (intel_field_iterator_next(&iter)) {
624      if (strncmp(iter.name, "Kernel Start Pointer ",
625                  strlen("Kernel Start Pointer ")) == 0) {
626         int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
627         ksp[idx] = strtol(iter.value, NULL, 16);
628      } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
629         enabled[0] = strcmp(iter.value, "true") == 0;
630      } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
631         enabled[1] = strcmp(iter.value, "true") == 0;
632      } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
633         enabled[2] = strcmp(iter.value, "true") == 0;
634      }
635   }
636
637   if (single_ksp)
638      ksp[1] = ksp[2] = ksp[0];
639
640   /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
641   if (enabled[0] + enabled[1] + enabled[2] == 1) {
642      if (enabled[1]) {
643         ksp[1] = ksp[0];
644         ksp[0] = 0;
645      } else if (enabled[2]) {
646         ksp[2] = ksp[0];
647         ksp[0] = 0;
648      }
649   } else {
650      uint64_t tmp = ksp[1];
651      ksp[1] = ksp[2];
652      ksp[2] = tmp;
653   }
654
655   if (enabled[0])
656      ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader");
657   if (enabled[1])
658      ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader");
659   if (enabled[2])
660      ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader");
661
662   if (enabled[0] || enabled[1] || enabled[2])
663      fprintf(ctx->fp, "\n");
664}
665
666static void
667decode_ps_kernels(struct intel_batch_decode_ctx *ctx,
668                  const uint32_t *p)
669{
670   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
671   decode_ps_kern(ctx, inst, p);
672}
673
674static void
675decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
676{
677   struct intel_group *inst =
678      intel_spec_find_instruction(ctx->spec, ctx->engine, p);
679   struct intel_group *body =
680      intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA");
681
682   uint32_t read_length[4];
683   struct intel_batch_decode_bo buffer[4];
684   memset(buffer, 0, sizeof(buffer));
685
686   struct intel_field_iterator outer;
687   intel_field_iterator_init(&outer, inst, p, 0, false);
688   int idx = 0;
689   while (intel_field_iterator_next(&outer)) {
690      if (outer.struct_desc != body)
691         continue;
692
693      struct intel_field_iterator iter;
694      intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
695                              0, false);
696      while (intel_field_iterator_next(&iter)) {
697         if (!strcmp(iter.name, "Pointer To Constant Buffer")) {
698            buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value);
699         } else if (!strcmp(iter.name, "Constant Buffer Read Length")) {
700            read_length[idx] = iter.raw_value;
701         }
702      }
703      idx++;
704   }
705
706   for (int i = 0; i < 4; i++) {
707      if (read_length[i] == 0 || buffer[i].map == NULL)
708         continue;
709
710      unsigned size = read_length[i] * 32;
711      fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
712
713      ctx_print_buffer(ctx, buffer[i], size, 0, -1);
714   }
715}
716
717static void
718decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
719{
720   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
721   struct intel_group *body =
722      intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY");
723
724   uint32_t read_length[4] = {0};
725   uint64_t read_addr[4];
726
727   struct intel_field_iterator outer;
728   intel_field_iterator_init(&outer, inst, p, 0, false);
729   while (intel_field_iterator_next(&outer)) {
730      if (outer.struct_desc != body)
731         continue;
732
733      struct intel_field_iterator iter;
734      intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32],
735                              0, false);
736
737      while (intel_field_iterator_next(&iter)) {
738         int idx;
739         if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) {
740            read_length[idx] = iter.raw_value;
741         } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) {
742            read_addr[idx] = iter.raw_value;
743         }
744      }
745
746      for (int i = 0; i < 4; i++) {
747         if (read_length[i] == 0)
748            continue;
749
750         struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]);
751         if (!buffer.map) {
752            fprintf(ctx->fp, "constant buffer %d unavailable\n", i);
753            continue;
754         }
755
756         unsigned size = read_length[i] * 32;
757         fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
758
759         ctx_print_buffer(ctx, buffer, size, 0, -1);
760      }
761   }
762}
763
764static void
765decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
766{
767   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
768   uint64_t read_length = 0, read_addr = 0, valid = 0;
769   struct intel_field_iterator iter;
770   intel_field_iterator_init(&iter, inst, p, 0, false);
771
772   while (intel_field_iterator_next(&iter)) {
773      if (!strcmp(iter.name, "Buffer Length")) {
774         read_length = iter.raw_value;
775      } else if (!strcmp(iter.name, "Valid")) {
776         valid = iter.raw_value;
777      } else if (!strcmp(iter.name, "Buffer Starting Address")) {
778         read_addr = iter.raw_value;
779      }
780   }
781
782   if (!valid)
783      return;
784
785   struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr);
786   if (!buffer.map) {
787      fprintf(ctx->fp, "constant buffer unavailable\n");
788      return;
789   }
790   unsigned size = (read_length + 1) * 16 * sizeof(float);
791   fprintf(ctx->fp, "constant buffer size %u\n", size);
792
793   ctx_print_buffer(ctx, buffer, size, 0, -1);
794}
795
796
797static void
798decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
799                                           const uint32_t *p)
800{
801   fprintf(ctx->fp, "VS Binding Table:\n");
802   dump_binding_table(ctx, p[1], -1);
803
804   fprintf(ctx->fp, "GS Binding Table:\n");
805   dump_binding_table(ctx, p[2], -1);
806
807   if (ctx->devinfo.ver < 6) {
808      fprintf(ctx->fp, "CLIP Binding Table:\n");
809      dump_binding_table(ctx, p[3], -1);
810      fprintf(ctx->fp, "SF Binding Table:\n");
811      dump_binding_table(ctx, p[4], -1);
812      fprintf(ctx->fp, "PS Binding Table:\n");
813      dump_binding_table(ctx, p[5], -1);
814   } else {
815      fprintf(ctx->fp, "PS Binding Table:\n");
816      dump_binding_table(ctx, p[3], -1);
817   }
818}
819
820static void
821decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx,
822                                      const uint32_t *p)
823{
824   dump_binding_table(ctx, p[1], -1);
825}
826
827static void
828decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx,
829                                      const uint32_t *p)
830{
831   dump_samplers(ctx, p[1], 1);
832}
833
834static void
835decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx,
836                                           const uint32_t *p)
837{
838   dump_samplers(ctx, p[1], 1);
839   dump_samplers(ctx, p[2], 1);
840   dump_samplers(ctx, p[3], 1);
841}
842
843static bool
844str_ends_with(const char *str, const char *end)
845{
846   int offset = strlen(str) - strlen(end);
847   if (offset < 0)
848      return false;
849
850   return strcmp(str + offset, end) == 0;
851}
852
853static void
854decode_dynamic_state(struct intel_batch_decode_ctx *ctx,
855                       const char *struct_type, uint32_t state_offset,
856                       int count)
857{
858   uint64_t state_addr = ctx->dynamic_base + state_offset;
859   struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr);
860   const void *state_map = bo.map;
861
862   if (state_map == NULL) {
863      fprintf(ctx->fp, "  dynamic %s state unavailable\n", struct_type);
864      return;
865   }
866
867   struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type);
868   if (strcmp(struct_type, "BLEND_STATE") == 0) {
869      /* Blend states are different from the others because they have a header
870       * struct called BLEND_STATE which is followed by a variable number of
871       * BLEND_STATE_ENTRY structs.
872       */
873      fprintf(ctx->fp, "%s\n", struct_type);
874      ctx_print_group(ctx, state, state_addr, state_map);
875
876      state_addr += state->dw_length * 4;
877      state_map += state->dw_length * 4;
878
879      struct_type = "BLEND_STATE_ENTRY";
880      state = intel_spec_find_struct(ctx->spec, struct_type);
881   }
882
883   count = update_count(ctx, ctx->dynamic_base + state_offset,
884                        ctx->dynamic_base, state->dw_length, count);
885
886   for (int i = 0; i < count; i++) {
887      fprintf(ctx->fp, "%s %d\n", struct_type, i);
888      ctx_print_group(ctx, state, state_addr, state_map);
889
890      state_addr += state->dw_length * 4;
891      state_map += state->dw_length * 4;
892   }
893}
894
895static void
896decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx,
897                              const char *struct_type, const uint32_t *p,
898                              int count)
899{
900   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
901
902   uint32_t state_offset = 0;
903
904   struct intel_field_iterator iter;
905   intel_field_iterator_init(&iter, inst, p, 0, false);
906   while (intel_field_iterator_next(&iter)) {
907      if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) {
908         state_offset = iter.raw_value;
909         break;
910      }
911   }
912   decode_dynamic_state(ctx, struct_type, state_offset, count);
913}
914
915static void
916decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx,
917                                       const uint32_t *p)
918{
919   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
920   uint32_t state_offset = 0;
921   bool clip = false, sf = false, cc = false;
922   struct intel_field_iterator iter;
923   intel_field_iterator_init(&iter, inst, p, 0, false);
924   while (intel_field_iterator_next(&iter)) {
925      if (!strcmp(iter.name, "CLIP Viewport State Change"))
926         clip = iter.raw_value;
927      if (!strcmp(iter.name, "SF Viewport State Change"))
928         sf = iter.raw_value;
929      if (!strcmp(iter.name, "CC Viewport State Change"))
930         cc = iter.raw_value;
931      else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) {
932         state_offset = iter.raw_value;
933         decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1);
934      }
935      else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) {
936         state_offset = iter.raw_value;
937         decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1);
938      }
939      else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) {
940         state_offset = iter.raw_value;
941         decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1);
942      }
943   }
944}
945
946static void
947decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx,
948                                          const uint32_t *p)
949{
950   decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
951}
952
953static void
954decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx,
955                                               const uint32_t *p)
956{
957   decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
958}
959
960static void
961decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx,
962                                    const uint32_t *p)
963{
964   decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
965}
966
967static void
968decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx,
969                                 const uint32_t *p)
970{
971   if (ctx->devinfo.ver != 6) {
972      decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
973      return;
974   }
975
976   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
977
978   uint32_t state_offset = 0;
979   bool blend_change = false, ds_change = false, cc_change = false;
980   struct intel_field_iterator iter;
981   intel_field_iterator_init(&iter, inst, p, 0, false);
982   while (intel_field_iterator_next(&iter)) {
983      if (!strcmp(iter.name, "BLEND_STATE Change"))
984         blend_change = iter.raw_value;
985      else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change"))
986         ds_change = iter.raw_value;
987      else if (!strcmp(iter.name, "Color Calc State Pointer Valid"))
988         cc_change = iter.raw_value;
989      else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) {
990         state_offset = iter.raw_value;
991         decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1);
992      }
993      else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) {
994         state_offset = iter.raw_value;
995         decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1);
996      }
997      else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) {
998         state_offset = iter.raw_value;
999         decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1);
1000      }
1001   }
1002}
1003
1004static void
1005decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx,
1006                                 const uint32_t *p)
1007{
1008   decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1);
1009}
1010
1011static void
1012decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx,
1013                                      const uint32_t *p)
1014{
1015   decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
1016}
1017
1018static void
1019decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx,
1020                                          const uint32_t *p)
1021{
1022   decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1);
1023}
1024
1025static void
1026handle_gt_mode(struct intel_batch_decode_ctx *ctx,
1027               uint32_t reg_addr, uint32_t val)
1028{
1029   struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr);
1030
1031   assert(intel_group_get_length(reg, &val) == 1);
1032
1033   struct intel_field_iterator iter;
1034   intel_field_iterator_init(&iter, reg, &val, 0, false);
1035
1036   uint32_t bt_alignment;
1037   bool bt_alignment_mask = 0;
1038
1039   while (intel_field_iterator_next(&iter)) {
1040      if (strcmp(iter.name, "Binding Table Alignment") == 0) {
1041         bt_alignment = iter.raw_value;
1042      } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) {
1043         bt_alignment_mask = iter.raw_value;
1044      }
1045   }
1046
1047   if (bt_alignment_mask)
1048      ctx->use_256B_binding_tables = bt_alignment;
1049}
1050
1051struct reg_handler {
1052   const char *name;
1053   void (*handler)(struct intel_batch_decode_ctx *ctx,
1054                   uint32_t reg_addr, uint32_t val);
1055} reg_handlers[] = {
1056   { "GT_MODE", handle_gt_mode }
1057};
1058
1059static void
1060decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1061{
1062   struct intel_group *inst = intel_ctx_find_instruction(ctx, p);
1063   const unsigned length = intel_group_get_length(inst, p);
1064   assert(length & 1);
1065   const unsigned nr_regs = (length - 1) / 2;
1066
1067   for (unsigned i = 0; i < nr_regs; i++) {
1068      struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]);
1069      if (reg != NULL) {
1070         fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
1071                 reg->name, reg->register_offset, p[2]);
1072         ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
1073
1074         for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) {
1075            if (strcmp(reg->name, reg_handlers[i].name) == 0)
1076               reg_handlers[i].handler(ctx, p[1], p[2]);
1077         }
1078      }
1079   }
1080}
1081
1082static void
1083decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1084{
1085   struct intel_group *strct =
1086      intel_spec_find_struct(ctx->spec, "VS_STATE");
1087   if (strct == NULL) {
1088      fprintf(ctx->fp, "did not find VS_STATE info\n");
1089      return;
1090   }
1091
1092   struct intel_batch_decode_bo bind_bo =
1093      ctx_get_bo(ctx, true, offset);
1094
1095   if (bind_bo.map == NULL) {
1096      fprintf(ctx->fp, " vs state unavailable\n");
1097      return;
1098   }
1099
1100   ctx_print_group(ctx, strct, offset, bind_bo.map);
1101
1102   uint64_t ksp = 0;
1103   bool is_enabled = true;
1104   struct intel_field_iterator iter;
1105   intel_field_iterator_init(&iter, strct, bind_bo.map, 0, false);
1106   while (intel_field_iterator_next(&iter)) {
1107      if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
1108         ksp = iter.raw_value;
1109      } else if (strcmp(iter.name, "Enable") == 0) {
1110	is_enabled = iter.raw_value;
1111      }
1112   }
1113   if (is_enabled) {
1114      ctx_disassemble_program(ctx, ksp, "vertex shader");
1115      fprintf(ctx->fp, "\n");
1116   }
1117}
1118
1119static void
1120decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1121{
1122   struct intel_group *strct =
1123      intel_spec_find_struct(ctx->spec, "GS_STATE");
1124   if (strct == NULL) {
1125      fprintf(ctx->fp, "did not find GS_STATE info\n");
1126      return;
1127   }
1128
1129   struct intel_batch_decode_bo bind_bo =
1130      ctx_get_bo(ctx, true, offset);
1131
1132   if (bind_bo.map == NULL) {
1133      fprintf(ctx->fp, " gs state unavailable\n");
1134      return;
1135   }
1136
1137   ctx_print_group(ctx, strct, offset, bind_bo.map);
1138}
1139
1140static void
1141decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1142{
1143   struct intel_group *strct =
1144      intel_spec_find_struct(ctx->spec, "CLIP_STATE");
1145   if (strct == NULL) {
1146      fprintf(ctx->fp, "did not find CLIP_STATE info\n");
1147      return;
1148   }
1149
1150   struct intel_batch_decode_bo bind_bo =
1151      ctx_get_bo(ctx, true, offset);
1152
1153   if (bind_bo.map == NULL) {
1154      fprintf(ctx->fp, " clip state unavailable\n");
1155      return;
1156   }
1157
1158   ctx_print_group(ctx, strct, offset, bind_bo.map);
1159
1160   struct intel_group *vp_strct =
1161      intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT");
1162   if (vp_strct == NULL) {
1163      fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n");
1164      return;
1165   }
1166   uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3;
1167   struct intel_batch_decode_bo vp_bo =
1168      ctx_get_bo(ctx, true, clip_vp_offset);
1169   if (vp_bo.map == NULL) {
1170      fprintf(ctx->fp, " clip vp state unavailable\n");
1171      return;
1172   }
1173   ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map);
1174}
1175
1176static void
1177decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1178{
1179   struct intel_group *strct =
1180      intel_spec_find_struct(ctx->spec, "SF_STATE");
1181   if (strct == NULL) {
1182      fprintf(ctx->fp, "did not find SF_STATE info\n");
1183      return;
1184   }
1185
1186   struct intel_batch_decode_bo bind_bo =
1187      ctx_get_bo(ctx, true, offset);
1188
1189   if (bind_bo.map == NULL) {
1190      fprintf(ctx->fp, " sf state unavailable\n");
1191      return;
1192   }
1193
1194   ctx_print_group(ctx, strct, offset, bind_bo.map);
1195
1196   struct intel_group *vp_strct =
1197      intel_spec_find_struct(ctx->spec, "SF_VIEWPORT");
1198   if (vp_strct == NULL) {
1199      fprintf(ctx->fp, "did not find SF_VIEWPORT info\n");
1200      return;
1201   }
1202
1203   uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3;
1204   struct intel_batch_decode_bo vp_bo =
1205      ctx_get_bo(ctx, true, sf_vp_offset);
1206   if (vp_bo.map == NULL) {
1207      fprintf(ctx->fp, " sf vp state unavailable\n");
1208      return;
1209   }
1210   ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map);
1211}
1212
1213static void
1214decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1215{
1216   struct intel_group *strct =
1217      intel_spec_find_struct(ctx->spec, "WM_STATE");
1218   if (strct == NULL) {
1219      fprintf(ctx->fp, "did not find WM_STATE info\n");
1220      return;
1221   }
1222
1223   struct intel_batch_decode_bo bind_bo =
1224      ctx_get_bo(ctx, true, offset);
1225
1226   if (bind_bo.map == NULL) {
1227      fprintf(ctx->fp, " wm state unavailable\n");
1228      return;
1229   }
1230
1231   ctx_print_group(ctx, strct, offset, bind_bo.map);
1232
1233   decode_ps_kern(ctx, strct, bind_bo.map);
1234}
1235
1236static void
1237decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset)
1238{
1239   struct intel_group *strct =
1240      intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE");
1241   if (strct == NULL) {
1242      fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n");
1243      return;
1244   }
1245
1246   struct intel_batch_decode_bo bind_bo =
1247      ctx_get_bo(ctx, true, offset);
1248
1249   if (bind_bo.map == NULL) {
1250      fprintf(ctx->fp, " cc state unavailable\n");
1251      return;
1252   }
1253
1254   ctx_print_group(ctx, strct, offset, bind_bo.map);
1255
1256   struct intel_group *vp_strct =
1257      intel_spec_find_struct(ctx->spec, "CC_VIEWPORT");
1258   if (vp_strct == NULL) {
1259      fprintf(ctx->fp, "did not find CC_VIEWPORT info\n");
1260      return;
1261   }
1262   uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3;
1263   struct intel_batch_decode_bo vp_bo =
1264      ctx_get_bo(ctx, true, cc_vp_offset);
1265   if (vp_bo.map == NULL) {
1266      fprintf(ctx->fp, " cc vp state unavailable\n");
1267      return;
1268   }
1269   ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map);
1270}
1271static void
1272decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1273{
1274   fprintf(ctx->fp, "VS State Table:\n");
1275   decode_vs_state(ctx, p[1]);
1276   if (p[2] & 1) {
1277      fprintf(ctx->fp, "GS State Table:\n");
1278      decode_gs_state(ctx, p[2] & ~1);
1279   }
1280   fprintf(ctx->fp, "Clip State Table:\n");
1281   decode_clip_state(ctx, p[3] & ~1);
1282   fprintf(ctx->fp, "SF State Table:\n");
1283   decode_sf_state(ctx, p[4]);
1284   fprintf(ctx->fp, "WM State Table:\n");
1285   decode_wm_state(ctx, p[5]);
1286   fprintf(ctx->fp, "CC State Table:\n");
1287   decode_cc_state(ctx, p[6]);
1288}
1289
1290static void
1291decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p)
1292{
1293   decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1);
1294}
1295
1296struct custom_decoder {
1297   const char *cmd_name;
1298   void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p);
1299} custom_decoders[] = {
1300   { "STATE_BASE_ADDRESS", handle_state_base_address },
1301   { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc },
1302   { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
1303   { "COMPUTE_WALKER", handle_compute_walker },
1304   { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
1305   { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
1306   { "3DSTATE_VS", decode_single_ksp },
1307   { "3DSTATE_GS", decode_single_ksp },
1308   { "3DSTATE_DS", decode_single_ksp },
1309   { "3DSTATE_HS", decode_single_ksp },
1310   { "3DSTATE_PS", decode_ps_kernels },
1311   { "3DSTATE_WM", decode_ps_kernels },
1312   { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
1313   { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
1314   { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
1315   { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
1316   { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
1317   { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all },
1318
1319   { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers },
1320   { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
1321   { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
1322   { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
1323   { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
1324   { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
1325
1326   { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
1327   { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
1328   { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
1329   { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
1330   { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
1331   { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 },
1332
1333   { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers },
1334   { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
1335   { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
1336   { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
1337   { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
1338   { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers },
1339   { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
1340   { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers },
1341   { "MI_LOAD_REGISTER_IMM", decode_load_register_imm },
1342   { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers },
1343   { "3DSTATE_CPS_POINTERS", decode_cps_pointers },
1344   { "CONSTANT_BUFFER", decode_gfx4_constant_buffer },
1345};
1346
1347void
1348intel_print_batch(struct intel_batch_decode_ctx *ctx,
1349                  const uint32_t *batch, uint32_t batch_size,
1350                  uint64_t batch_addr, bool from_ring)
1351{
1352   const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t);
1353   int length;
1354   struct intel_group *inst;
1355   const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : "";
1356
1357   if (ctx->n_batch_buffer_start >= 100) {
1358      fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n",
1359              (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1360              (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0,
1361              reset_color);
1362      return;
1363   }
1364
1365   ctx->n_batch_buffer_start++;
1366
1367   for (p = batch; p < end; p += length) {
1368      inst = intel_ctx_find_instruction(ctx, p);
1369      length = intel_group_get_length(inst, p);
1370      assert(inst == NULL || length > 0);
1371      length = MAX2(1, length);
1372
1373      uint64_t offset;
1374      if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS)
1375         offset = batch_addr + ((char *)p - (char *)batch);
1376      else
1377         offset = 0;
1378
1379      if (inst == NULL) {
1380         fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n",
1381                 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1382                 offset, p[0], reset_color);
1383
1384         for (int i=1; i < length; i++) {
1385            fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n",
1386                 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "",
1387                 offset + i * 4, p[i], reset_color);
1388         }
1389
1390         continue;
1391      }
1392
1393      const char *color;
1394      const char *inst_name = intel_group_get_name(inst);
1395      if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) {
1396         reset_color = NORMAL;
1397         if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1398            if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
1399                strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
1400               color = GREEN_HEADER;
1401            else
1402               color = BLUE_HEADER;
1403         } else {
1404            color = NORMAL;
1405         }
1406      } else {
1407         color = "";
1408         reset_color = "";
1409      }
1410
1411      fprintf(ctx->fp, "%s0x%08"PRIx64"%s:  0x%08x:  %-80s%s\n", color, offset,
1412              ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0],
1413              inst_name, reset_color);
1414
1415      if (ctx->flags & INTEL_BATCH_DECODE_FULL) {
1416         ctx_print_group(ctx, inst, offset, p);
1417
1418         for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) {
1419            if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) {
1420               custom_decoders[i].decode(ctx, p);
1421               break;
1422            }
1423         }
1424      }
1425
1426      if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) {
1427         uint64_t next_batch_addr = 0;
1428         bool ppgtt = false;
1429         bool second_level = false;
1430         bool predicate = false;
1431         struct intel_field_iterator iter;
1432         intel_field_iterator_init(&iter, inst, p, 0, false);
1433         while (intel_field_iterator_next(&iter)) {
1434            if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
1435               next_batch_addr = iter.raw_value;
1436            } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
1437               second_level = iter.raw_value;
1438            } else if (strcmp(iter.name, "Address Space Indicator") == 0) {
1439               ppgtt = iter.raw_value;
1440            } else if (strcmp(iter.name, "Predication Enable") == 0) {
1441               predicate = iter.raw_value;
1442            }
1443         }
1444
1445         if (!predicate) {
1446            struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr);
1447
1448            if (next_batch.map == NULL) {
1449               fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n",
1450                       next_batch_addr);
1451            } else {
1452               intel_print_batch(ctx, next_batch.map, next_batch.size,
1453                                 next_batch.addr, false);
1454            }
1455            if (second_level) {
1456               /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
1457                * like a subroutine call.  Commands that come afterwards get
1458                * processed once the 2nd level batch buffer returns with
1459                * MI_BATCH_BUFFER_END.
1460                */
1461               continue;
1462            } else if (!from_ring) {
1463               /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
1464                * like a goto.  Nothing after it will ever get processed.  In
1465                * order to prevent the recursion from growing, we just reset the
1466                * loop and continue;
1467                */
1468               break;
1469            }
1470         }
1471      } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) {
1472         break;
1473      }
1474   }
1475
1476   ctx->n_batch_buffer_start--;
1477}
1478