1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "common/intel_decoder.h" 25#include "intel_disasm.h" 26#include "util/macros.h" 27#include "main/macros.h" /* Needed for ROUND_DOWN_TO */ 28 29#include <string.h> 30 31void 32intel_batch_decode_ctx_init(struct intel_batch_decode_ctx *ctx, 33 const struct intel_device_info *devinfo, 34 FILE *fp, enum intel_batch_decode_flags flags, 35 const char *xml_path, 36 struct intel_batch_decode_bo (*get_bo)(void *, 37 bool, 38 uint64_t), 39 unsigned (*get_state_size)(void *, uint64_t, 40 uint64_t), 41 void *user_data) 42{ 43 memset(ctx, 0, sizeof(*ctx)); 44 45 ctx->devinfo = *devinfo; 46 ctx->get_bo = get_bo; 47 ctx->get_state_size = get_state_size; 48 ctx->user_data = user_data; 49 ctx->fp = fp; 50 ctx->flags = flags; 51 ctx->max_vbo_decoded_lines = -1; /* No limit! */ 52 ctx->engine = I915_ENGINE_CLASS_RENDER; 53 54 if (xml_path == NULL) 55 ctx->spec = intel_spec_load(devinfo); 56 else 57 ctx->spec = intel_spec_load_from_path(devinfo, xml_path); 58} 59 60void 61intel_batch_decode_ctx_finish(struct intel_batch_decode_ctx *ctx) 62{ 63 intel_spec_destroy(ctx->spec); 64} 65 66#define CSI "\e[" 67#define RED_COLOR CSI "31m" 68#define BLUE_HEADER CSI "0;44m" CSI "1;37m" 69#define GREEN_HEADER CSI "1;42m" 70#define NORMAL CSI "0m" 71 72static void 73ctx_print_group(struct intel_batch_decode_ctx *ctx, 74 struct intel_group *group, 75 uint64_t address, const void *map) 76{ 77 intel_print_group(ctx->fp, group, address, map, 0, 78 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) != 0); 79} 80 81static struct intel_batch_decode_bo 82ctx_get_bo(struct intel_batch_decode_ctx *ctx, bool ppgtt, uint64_t addr) 83{ 84 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) { 85 /* On Broadwell and above, we have 48-bit addresses which consume two 86 * dwords. Some packets require that these get stored in a "canonical 87 * form" which means that bit 47 is sign-extended through the upper 88 * bits. In order to correctly handle those aub dumps, we need to mask 89 * off the top 16 bits. 90 */ 91 addr &= (~0ull >> 16); 92 } 93 94 struct intel_batch_decode_bo bo = ctx->get_bo(ctx->user_data, ppgtt, addr); 95 96 if (intel_spec_get_gen(ctx->spec) >= intel_make_gen(8,0)) 97 bo.addr &= (~0ull >> 16); 98 99 /* We may actually have an offset into the bo */ 100 if (bo.map != NULL) { 101 assert(bo.addr <= addr); 102 uint64_t offset = addr - bo.addr; 103 bo.map += offset; 104 bo.addr += offset; 105 bo.size -= offset; 106 } 107 108 return bo; 109} 110 111static int 112update_count(struct intel_batch_decode_ctx *ctx, 113 uint64_t address, 114 uint64_t base_address, 115 unsigned element_dwords, 116 unsigned guess) 117{ 118 unsigned size = 0; 119 120 if (ctx->get_state_size) 121 size = ctx->get_state_size(ctx->user_data, address, base_address); 122 123 if (size > 0) 124 return size / (sizeof(uint32_t) * element_dwords); 125 126 /* In the absence of any information, just guess arbitrarily. */ 127 return guess; 128} 129 130static void 131ctx_disassemble_program(struct intel_batch_decode_ctx *ctx, 132 uint32_t ksp, const char *type) 133{ 134 uint64_t addr = ctx->instruction_base + ksp; 135 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr); 136 if (!bo.map) 137 return; 138 139 fprintf(ctx->fp, "\nReferenced %s:\n", type); 140 intel_disassemble(&ctx->devinfo, bo.map, 0, ctx->fp); 141} 142 143/* Heuristic to determine whether a uint32_t is probably actually a float 144 * (http://stackoverflow.com/a/2953466) 145 */ 146 147static bool 148probably_float(uint32_t bits) 149{ 150 int exp = ((bits & 0x7f800000U) >> 23) - 127; 151 uint32_t mant = bits & 0x007fffff; 152 153 /* +- 0.0 */ 154 if (exp == -127 && mant == 0) 155 return true; 156 157 /* +- 1 billionth to 1 billion */ 158 if (-30 <= exp && exp <= 30) 159 return true; 160 161 /* some value with only a few binary digits */ 162 if ((mant & 0x0000ffff) == 0) 163 return true; 164 165 return false; 166} 167 168static void 169ctx_print_buffer(struct intel_batch_decode_ctx *ctx, 170 struct intel_batch_decode_bo bo, 171 uint32_t read_length, 172 uint32_t pitch, 173 int max_lines) 174{ 175 const uint32_t *dw_end = 176 bo.map + ROUND_DOWN_TO(MIN2(bo.size, read_length), 4); 177 178 int column_count = 0, pitch_col_count = 0, line_count = -1; 179 for (const uint32_t *dw = bo.map; dw < dw_end; dw++) { 180 if (pitch_col_count * 4 == pitch || column_count == 8) { 181 fprintf(ctx->fp, "\n"); 182 column_count = 0; 183 if (pitch_col_count * 4 == pitch) 184 pitch_col_count = 0; 185 line_count++; 186 187 if (max_lines >= 0 && line_count >= max_lines) 188 break; 189 } 190 fprintf(ctx->fp, column_count == 0 ? " " : " "); 191 192 if ((ctx->flags & INTEL_BATCH_DECODE_FLOATS) && probably_float(*dw)) 193 fprintf(ctx->fp, " %8.2f", *(float *) dw); 194 else 195 fprintf(ctx->fp, " 0x%08x", *dw); 196 197 column_count++; 198 pitch_col_count++; 199 } 200 fprintf(ctx->fp, "\n"); 201} 202 203static struct intel_group * 204intel_ctx_find_instruction(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 205{ 206 return intel_spec_find_instruction(ctx->spec, ctx->engine, p); 207} 208 209static void 210handle_state_base_address(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 211{ 212 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 213 214 struct intel_field_iterator iter; 215 intel_field_iterator_init(&iter, inst, p, 0, false); 216 217 uint64_t surface_base = 0, dynamic_base = 0, instruction_base = 0; 218 bool surface_modify = 0, dynamic_modify = 0, instruction_modify = 0; 219 220 while (intel_field_iterator_next(&iter)) { 221 if (strcmp(iter.name, "Surface State Base Address") == 0) { 222 surface_base = iter.raw_value; 223 } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) { 224 dynamic_base = iter.raw_value; 225 } else if (strcmp(iter.name, "Instruction Base Address") == 0) { 226 instruction_base = iter.raw_value; 227 } else if (strcmp(iter.name, "Surface State Base Address Modify Enable") == 0) { 228 surface_modify = iter.raw_value; 229 } else if (strcmp(iter.name, "Dynamic State Base Address Modify Enable") == 0) { 230 dynamic_modify = iter.raw_value; 231 } else if (strcmp(iter.name, "Instruction Base Address Modify Enable") == 0) { 232 instruction_modify = iter.raw_value; 233 } 234 } 235 236 if (dynamic_modify) 237 ctx->dynamic_base = dynamic_base; 238 239 if (surface_modify) 240 ctx->surface_base = surface_base; 241 242 if (instruction_modify) 243 ctx->instruction_base = instruction_base; 244} 245 246static void 247handle_binding_table_pool_alloc(struct intel_batch_decode_ctx *ctx, 248 const uint32_t *p) 249{ 250 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 251 252 struct intel_field_iterator iter; 253 intel_field_iterator_init(&iter, inst, p, 0, false); 254 255 uint64_t bt_pool_base = 0; 256 bool bt_pool_enable = false; 257 258 while (intel_field_iterator_next(&iter)) { 259 if (strcmp(iter.name, "Binding Table Pool Base Address") == 0) { 260 bt_pool_base = iter.raw_value; 261 } else if (strcmp(iter.name, "Binding Table Pool Enable") == 0) { 262 bt_pool_enable = iter.raw_value; 263 } 264 } 265 266 if (bt_pool_enable) { 267 ctx->bt_pool_base = bt_pool_base; 268 } else { 269 ctx->bt_pool_base = 0; 270 } 271} 272 273static void 274dump_binding_table(struct intel_batch_decode_ctx *ctx, 275 uint32_t offset, int count) 276{ 277 struct intel_group *strct = 278 intel_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE"); 279 if (strct == NULL) { 280 fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n"); 281 return; 282 } 283 284 /* When 256B binding tables are enabled, we have to shift the offset */ 285 if (ctx->use_256B_binding_tables) 286 offset <<= 3; 287 288 const uint64_t bt_pool_base = ctx->bt_pool_base ? ctx->bt_pool_base : 289 ctx->surface_base; 290 291 if (count < 0) { 292 count = update_count(ctx, bt_pool_base + offset, 293 bt_pool_base, 1, 8); 294 } 295 296 if (offset % 32 != 0 || offset >= UINT16_MAX) { 297 fprintf(ctx->fp, " invalid binding table pointer\n"); 298 return; 299 } 300 301 struct intel_batch_decode_bo bind_bo = 302 ctx_get_bo(ctx, true, bt_pool_base + offset); 303 304 if (bind_bo.map == NULL) { 305 fprintf(ctx->fp, " binding table unavailable\n"); 306 return; 307 } 308 309 const uint32_t *pointers = bind_bo.map; 310 for (int i = 0; i < count; i++) { 311 if (pointers[i] == 0) 312 continue; 313 314 uint64_t addr = ctx->surface_base + pointers[i]; 315 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, addr); 316 uint32_t size = strct->dw_length * 4; 317 318 if (pointers[i] % 32 != 0 || 319 addr < bo.addr || addr + size >= bo.addr + bo.size) { 320 fprintf(ctx->fp, "pointer %u: 0x%08x <not valid>\n", i, pointers[i]); 321 continue; 322 } 323 324 fprintf(ctx->fp, "pointer %u: 0x%08x\n", i, pointers[i]); 325 ctx_print_group(ctx, strct, addr, bo.map + (addr - bo.addr)); 326 } 327} 328 329static void 330dump_samplers(struct intel_batch_decode_ctx *ctx, uint32_t offset, int count) 331{ 332 struct intel_group *strct = intel_spec_find_struct(ctx->spec, "SAMPLER_STATE"); 333 uint64_t state_addr = ctx->dynamic_base + offset; 334 335 assert(count > 0); 336 337 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr); 338 const void *state_map = bo.map; 339 340 if (state_map == NULL) { 341 fprintf(ctx->fp, " samplers unavailable\n"); 342 return; 343 } 344 345 if (offset % 32 != 0) { 346 fprintf(ctx->fp, " invalid sampler state pointer\n"); 347 return; 348 } 349 350 const unsigned sampler_state_size = strct->dw_length * 4; 351 352 if (count * sampler_state_size >= bo.size) { 353 fprintf(ctx->fp, " sampler state ends after bo ends\n"); 354 assert(!"sampler state ends after bo ends"); 355 return; 356 } 357 358 for (int i = 0; i < count; i++) { 359 fprintf(ctx->fp, "sampler state %d\n", i); 360 ctx_print_group(ctx, strct, state_addr, state_map); 361 state_addr += sampler_state_size; 362 state_map += sampler_state_size; 363 } 364} 365 366static void 367handle_interface_descriptor_data(struct intel_batch_decode_ctx *ctx, 368 struct intel_group *desc, const uint32_t *p) 369{ 370 uint64_t ksp = 0; 371 uint32_t sampler_offset = 0, sampler_count = 0; 372 uint32_t binding_table_offset = 0, binding_entry_count = 0; 373 374 struct intel_field_iterator iter; 375 intel_field_iterator_init(&iter, desc, p, 0, false); 376 while (intel_field_iterator_next(&iter)) { 377 if (strcmp(iter.name, "Kernel Start Pointer") == 0) { 378 ksp = strtoll(iter.value, NULL, 16); 379 } else if (strcmp(iter.name, "Sampler State Pointer") == 0) { 380 sampler_offset = strtol(iter.value, NULL, 16); 381 } else if (strcmp(iter.name, "Sampler Count") == 0) { 382 sampler_count = strtol(iter.value, NULL, 10); 383 } else if (strcmp(iter.name, "Binding Table Pointer") == 0) { 384 binding_table_offset = strtol(iter.value, NULL, 16); 385 } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) { 386 binding_entry_count = strtol(iter.value, NULL, 10); 387 } 388 } 389 390 ctx_disassemble_program(ctx, ksp, "compute shader"); 391 fprintf(ctx->fp, "\n"); 392 393 if (sampler_count) 394 dump_samplers(ctx, sampler_offset, sampler_count); 395 if (binding_entry_count) 396 dump_binding_table(ctx, binding_table_offset, binding_entry_count); 397} 398 399static void 400handle_media_interface_descriptor_load(struct intel_batch_decode_ctx *ctx, 401 const uint32_t *p) 402{ 403 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 404 struct intel_group *desc = 405 intel_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA"); 406 407 struct intel_field_iterator iter; 408 intel_field_iterator_init(&iter, inst, p, 0, false); 409 uint32_t descriptor_offset = 0; 410 int descriptor_count = 0; 411 while (intel_field_iterator_next(&iter)) { 412 if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) { 413 descriptor_offset = strtol(iter.value, NULL, 16); 414 } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) { 415 descriptor_count = 416 strtol(iter.value, NULL, 16) / (desc->dw_length * 4); 417 } 418 } 419 420 uint64_t desc_addr = ctx->dynamic_base + descriptor_offset; 421 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, desc_addr); 422 const void *desc_map = bo.map; 423 424 if (desc_map == NULL) { 425 fprintf(ctx->fp, " interface descriptors unavailable\n"); 426 return; 427 } 428 429 for (int i = 0; i < descriptor_count; i++) { 430 fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset); 431 432 ctx_print_group(ctx, desc, desc_addr, desc_map); 433 434 handle_interface_descriptor_data(ctx, desc, desc_map); 435 436 desc_map += desc->dw_length; 437 desc_addr += desc->dw_length * 4; 438 } 439} 440 441static void 442handle_compute_walker(struct intel_batch_decode_ctx *ctx, 443 const uint32_t *p) 444{ 445 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 446 447 struct intel_field_iterator iter; 448 intel_field_iterator_init(&iter, inst, p, 0, false); 449 while (intel_field_iterator_next(&iter)) { 450 if (strcmp(iter.name, "Interface Descriptor") == 0) { 451 handle_interface_descriptor_data(ctx, iter.struct_desc, 452 &iter.p[iter.start_bit / 32]); 453 } 454 } 455} 456 457static void 458handle_3dstate_vertex_buffers(struct intel_batch_decode_ctx *ctx, 459 const uint32_t *p) 460{ 461 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 462 struct intel_group *vbs = intel_spec_find_struct(ctx->spec, "VERTEX_BUFFER_STATE"); 463 464 struct intel_batch_decode_bo vb = {}; 465 uint32_t vb_size = 0; 466 int index = -1; 467 int pitch = -1; 468 bool ready = false; 469 470 struct intel_field_iterator iter; 471 intel_field_iterator_init(&iter, inst, p, 0, false); 472 while (intel_field_iterator_next(&iter)) { 473 if (iter.struct_desc != vbs) 474 continue; 475 476 struct intel_field_iterator vbs_iter; 477 intel_field_iterator_init(&vbs_iter, vbs, &iter.p[iter.start_bit / 32], 0, false); 478 while (intel_field_iterator_next(&vbs_iter)) { 479 if (strcmp(vbs_iter.name, "Vertex Buffer Index") == 0) { 480 index = vbs_iter.raw_value; 481 } else if (strcmp(vbs_iter.name, "Buffer Pitch") == 0) { 482 pitch = vbs_iter.raw_value; 483 } else if (strcmp(vbs_iter.name, "Buffer Starting Address") == 0) { 484 vb = ctx_get_bo(ctx, true, vbs_iter.raw_value); 485 } else if (strcmp(vbs_iter.name, "Buffer Size") == 0) { 486 vb_size = vbs_iter.raw_value; 487 ready = true; 488 } else if (strcmp(vbs_iter.name, "End Address") == 0) { 489 if (vb.map && vbs_iter.raw_value >= vb.addr) 490 vb_size = (vbs_iter.raw_value + 1) - vb.addr; 491 else 492 vb_size = 0; 493 ready = true; 494 } 495 496 if (!ready) 497 continue; 498 499 fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size); 500 501 if (vb.map == NULL) { 502 fprintf(ctx->fp, " buffer contents unavailable\n"); 503 continue; 504 } 505 506 if (vb.map == 0 || vb_size == 0) 507 continue; 508 509 ctx_print_buffer(ctx, vb, vb_size, pitch, ctx->max_vbo_decoded_lines); 510 511 vb.map = NULL; 512 vb_size = 0; 513 index = -1; 514 pitch = -1; 515 ready = false; 516 } 517 } 518} 519 520static void 521handle_3dstate_index_buffer(struct intel_batch_decode_ctx *ctx, 522 const uint32_t *p) 523{ 524 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 525 526 struct intel_batch_decode_bo ib = {}; 527 uint32_t ib_size = 0; 528 uint32_t format = 0; 529 530 struct intel_field_iterator iter; 531 intel_field_iterator_init(&iter, inst, p, 0, false); 532 while (intel_field_iterator_next(&iter)) { 533 if (strcmp(iter.name, "Index Format") == 0) { 534 format = iter.raw_value; 535 } else if (strcmp(iter.name, "Buffer Starting Address") == 0) { 536 ib = ctx_get_bo(ctx, true, iter.raw_value); 537 } else if (strcmp(iter.name, "Buffer Size") == 0) { 538 ib_size = iter.raw_value; 539 } 540 } 541 542 if (ib.map == NULL) { 543 fprintf(ctx->fp, " buffer contents unavailable\n"); 544 return; 545 } 546 547 const void *m = ib.map; 548 const void *ib_end = ib.map + MIN2(ib.size, ib_size); 549 for (int i = 0; m < ib_end && i < 10; i++) { 550 switch (format) { 551 case 0: 552 fprintf(ctx->fp, "%3d ", *(uint8_t *)m); 553 m += 1; 554 break; 555 case 1: 556 fprintf(ctx->fp, "%3d ", *(uint16_t *)m); 557 m += 2; 558 break; 559 case 2: 560 fprintf(ctx->fp, "%3d ", *(uint32_t *)m); 561 m += 4; 562 break; 563 } 564 } 565 566 if (m < ib_end) 567 fprintf(ctx->fp, "..."); 568 fprintf(ctx->fp, "\n"); 569} 570 571static void 572decode_single_ksp(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 573{ 574 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 575 576 uint64_t ksp = 0; 577 bool is_simd8 = ctx->devinfo.ver >= 11; /* vertex shaders on Gfx8+ only */ 578 bool is_enabled = true; 579 580 struct intel_field_iterator iter; 581 intel_field_iterator_init(&iter, inst, p, 0, false); 582 while (intel_field_iterator_next(&iter)) { 583 if (strcmp(iter.name, "Kernel Start Pointer") == 0) { 584 ksp = iter.raw_value; 585 } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) { 586 is_simd8 = iter.raw_value; 587 } else if (strcmp(iter.name, "Dispatch Mode") == 0) { 588 is_simd8 = strcmp(iter.value, "SIMD8") == 0; 589 } else if (strcmp(iter.name, "Dispatch Enable") == 0) { 590 is_simd8 = strcmp(iter.value, "SIMD8") == 0; 591 } else if (strcmp(iter.name, "Enable") == 0) { 592 is_enabled = iter.raw_value; 593 } 594 } 595 596 const char *type = 597 strcmp(inst->name, "VS_STATE") == 0 ? "vertex shader" : 598 strcmp(inst->name, "GS_STATE") == 0 ? "geometry shader" : 599 strcmp(inst->name, "SF_STATE") == 0 ? "strips and fans shader" : 600 strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" : 601 strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation evaluation shader" : 602 strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation control shader" : 603 strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") : 604 strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") : 605 NULL; 606 607 if (is_enabled) { 608 ctx_disassemble_program(ctx, ksp, type); 609 fprintf(ctx->fp, "\n"); 610 } 611} 612 613static void 614decode_ps_kern(struct intel_batch_decode_ctx *ctx, 615 struct intel_group *inst, const uint32_t *p) 616{ 617 bool single_ksp = ctx->devinfo.ver == 4; 618 uint64_t ksp[3] = {0, 0, 0}; 619 bool enabled[3] = {false, false, false}; 620 621 struct intel_field_iterator iter; 622 intel_field_iterator_init(&iter, inst, p, 0, false); 623 while (intel_field_iterator_next(&iter)) { 624 if (strncmp(iter.name, "Kernel Start Pointer ", 625 strlen("Kernel Start Pointer ")) == 0) { 626 int idx = iter.name[strlen("Kernel Start Pointer ")] - '0'; 627 ksp[idx] = strtol(iter.value, NULL, 16); 628 } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) { 629 enabled[0] = strcmp(iter.value, "true") == 0; 630 } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) { 631 enabled[1] = strcmp(iter.value, "true") == 0; 632 } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) { 633 enabled[2] = strcmp(iter.value, "true") == 0; 634 } 635 } 636 637 if (single_ksp) 638 ksp[1] = ksp[2] = ksp[0]; 639 640 /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */ 641 if (enabled[0] + enabled[1] + enabled[2] == 1) { 642 if (enabled[1]) { 643 ksp[1] = ksp[0]; 644 ksp[0] = 0; 645 } else if (enabled[2]) { 646 ksp[2] = ksp[0]; 647 ksp[0] = 0; 648 } 649 } else { 650 uint64_t tmp = ksp[1]; 651 ksp[1] = ksp[2]; 652 ksp[2] = tmp; 653 } 654 655 if (enabled[0]) 656 ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader"); 657 if (enabled[1]) 658 ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader"); 659 if (enabled[2]) 660 ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader"); 661 662 if (enabled[0] || enabled[1] || enabled[2]) 663 fprintf(ctx->fp, "\n"); 664} 665 666static void 667decode_ps_kernels(struct intel_batch_decode_ctx *ctx, 668 const uint32_t *p) 669{ 670 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 671 decode_ps_kern(ctx, inst, p); 672} 673 674static void 675decode_3dstate_constant_all(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 676{ 677 struct intel_group *inst = 678 intel_spec_find_instruction(ctx->spec, ctx->engine, p); 679 struct intel_group *body = 680 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_ALL_DATA"); 681 682 uint32_t read_length[4]; 683 struct intel_batch_decode_bo buffer[4]; 684 memset(buffer, 0, sizeof(buffer)); 685 686 struct intel_field_iterator outer; 687 intel_field_iterator_init(&outer, inst, p, 0, false); 688 int idx = 0; 689 while (intel_field_iterator_next(&outer)) { 690 if (outer.struct_desc != body) 691 continue; 692 693 struct intel_field_iterator iter; 694 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32], 695 0, false); 696 while (intel_field_iterator_next(&iter)) { 697 if (!strcmp(iter.name, "Pointer To Constant Buffer")) { 698 buffer[idx] = ctx_get_bo(ctx, true, iter.raw_value); 699 } else if (!strcmp(iter.name, "Constant Buffer Read Length")) { 700 read_length[idx] = iter.raw_value; 701 } 702 } 703 idx++; 704 } 705 706 for (int i = 0; i < 4; i++) { 707 if (read_length[i] == 0 || buffer[i].map == NULL) 708 continue; 709 710 unsigned size = read_length[i] * 32; 711 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size); 712 713 ctx_print_buffer(ctx, buffer[i], size, 0, -1); 714 } 715} 716 717static void 718decode_3dstate_constant(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 719{ 720 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 721 struct intel_group *body = 722 intel_spec_find_struct(ctx->spec, "3DSTATE_CONSTANT_BODY"); 723 724 uint32_t read_length[4] = {0}; 725 uint64_t read_addr[4]; 726 727 struct intel_field_iterator outer; 728 intel_field_iterator_init(&outer, inst, p, 0, false); 729 while (intel_field_iterator_next(&outer)) { 730 if (outer.struct_desc != body) 731 continue; 732 733 struct intel_field_iterator iter; 734 intel_field_iterator_init(&iter, body, &outer.p[outer.start_bit / 32], 735 0, false); 736 737 while (intel_field_iterator_next(&iter)) { 738 int idx; 739 if (sscanf(iter.name, "Read Length[%d]", &idx) == 1) { 740 read_length[idx] = iter.raw_value; 741 } else if (sscanf(iter.name, "Buffer[%d]", &idx) == 1) { 742 read_addr[idx] = iter.raw_value; 743 } 744 } 745 746 for (int i = 0; i < 4; i++) { 747 if (read_length[i] == 0) 748 continue; 749 750 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr[i]); 751 if (!buffer.map) { 752 fprintf(ctx->fp, "constant buffer %d unavailable\n", i); 753 continue; 754 } 755 756 unsigned size = read_length[i] * 32; 757 fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size); 758 759 ctx_print_buffer(ctx, buffer, size, 0, -1); 760 } 761 } 762} 763 764static void 765decode_gfx4_constant_buffer(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 766{ 767 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 768 uint64_t read_length = 0, read_addr = 0, valid = 0; 769 struct intel_field_iterator iter; 770 intel_field_iterator_init(&iter, inst, p, 0, false); 771 772 while (intel_field_iterator_next(&iter)) { 773 if (!strcmp(iter.name, "Buffer Length")) { 774 read_length = iter.raw_value; 775 } else if (!strcmp(iter.name, "Valid")) { 776 valid = iter.raw_value; 777 } else if (!strcmp(iter.name, "Buffer Starting Address")) { 778 read_addr = iter.raw_value; 779 } 780 } 781 782 if (!valid) 783 return; 784 785 struct intel_batch_decode_bo buffer = ctx_get_bo(ctx, true, read_addr); 786 if (!buffer.map) { 787 fprintf(ctx->fp, "constant buffer unavailable\n"); 788 return; 789 } 790 unsigned size = (read_length + 1) * 16 * sizeof(float); 791 fprintf(ctx->fp, "constant buffer size %u\n", size); 792 793 ctx_print_buffer(ctx, buffer, size, 0, -1); 794} 795 796 797static void 798decode_gfx4_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx, 799 const uint32_t *p) 800{ 801 fprintf(ctx->fp, "VS Binding Table:\n"); 802 dump_binding_table(ctx, p[1], -1); 803 804 fprintf(ctx->fp, "GS Binding Table:\n"); 805 dump_binding_table(ctx, p[2], -1); 806 807 if (ctx->devinfo.ver < 6) { 808 fprintf(ctx->fp, "CLIP Binding Table:\n"); 809 dump_binding_table(ctx, p[3], -1); 810 fprintf(ctx->fp, "SF Binding Table:\n"); 811 dump_binding_table(ctx, p[4], -1); 812 fprintf(ctx->fp, "PS Binding Table:\n"); 813 dump_binding_table(ctx, p[5], -1); 814 } else { 815 fprintf(ctx->fp, "PS Binding Table:\n"); 816 dump_binding_table(ctx, p[3], -1); 817 } 818} 819 820static void 821decode_3dstate_binding_table_pointers(struct intel_batch_decode_ctx *ctx, 822 const uint32_t *p) 823{ 824 dump_binding_table(ctx, p[1], -1); 825} 826 827static void 828decode_3dstate_sampler_state_pointers(struct intel_batch_decode_ctx *ctx, 829 const uint32_t *p) 830{ 831 dump_samplers(ctx, p[1], 1); 832} 833 834static void 835decode_3dstate_sampler_state_pointers_gfx6(struct intel_batch_decode_ctx *ctx, 836 const uint32_t *p) 837{ 838 dump_samplers(ctx, p[1], 1); 839 dump_samplers(ctx, p[2], 1); 840 dump_samplers(ctx, p[3], 1); 841} 842 843static bool 844str_ends_with(const char *str, const char *end) 845{ 846 int offset = strlen(str) - strlen(end); 847 if (offset < 0) 848 return false; 849 850 return strcmp(str + offset, end) == 0; 851} 852 853static void 854decode_dynamic_state(struct intel_batch_decode_ctx *ctx, 855 const char *struct_type, uint32_t state_offset, 856 int count) 857{ 858 uint64_t state_addr = ctx->dynamic_base + state_offset; 859 struct intel_batch_decode_bo bo = ctx_get_bo(ctx, true, state_addr); 860 const void *state_map = bo.map; 861 862 if (state_map == NULL) { 863 fprintf(ctx->fp, " dynamic %s state unavailable\n", struct_type); 864 return; 865 } 866 867 struct intel_group *state = intel_spec_find_struct(ctx->spec, struct_type); 868 if (strcmp(struct_type, "BLEND_STATE") == 0) { 869 /* Blend states are different from the others because they have a header 870 * struct called BLEND_STATE which is followed by a variable number of 871 * BLEND_STATE_ENTRY structs. 872 */ 873 fprintf(ctx->fp, "%s\n", struct_type); 874 ctx_print_group(ctx, state, state_addr, state_map); 875 876 state_addr += state->dw_length * 4; 877 state_map += state->dw_length * 4; 878 879 struct_type = "BLEND_STATE_ENTRY"; 880 state = intel_spec_find_struct(ctx->spec, struct_type); 881 } 882 883 count = update_count(ctx, ctx->dynamic_base + state_offset, 884 ctx->dynamic_base, state->dw_length, count); 885 886 for (int i = 0; i < count; i++) { 887 fprintf(ctx->fp, "%s %d\n", struct_type, i); 888 ctx_print_group(ctx, state, state_addr, state_map); 889 890 state_addr += state->dw_length * 4; 891 state_map += state->dw_length * 4; 892 } 893} 894 895static void 896decode_dynamic_state_pointers(struct intel_batch_decode_ctx *ctx, 897 const char *struct_type, const uint32_t *p, 898 int count) 899{ 900 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 901 902 uint32_t state_offset = 0; 903 904 struct intel_field_iterator iter; 905 intel_field_iterator_init(&iter, inst, p, 0, false); 906 while (intel_field_iterator_next(&iter)) { 907 if (str_ends_with(iter.name, "Pointer") || !strncmp(iter.name, "Pointer", 7)) { 908 state_offset = iter.raw_value; 909 break; 910 } 911 } 912 decode_dynamic_state(ctx, struct_type, state_offset, count); 913} 914 915static void 916decode_3dstate_viewport_state_pointers(struct intel_batch_decode_ctx *ctx, 917 const uint32_t *p) 918{ 919 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 920 uint32_t state_offset = 0; 921 bool clip = false, sf = false, cc = false; 922 struct intel_field_iterator iter; 923 intel_field_iterator_init(&iter, inst, p, 0, false); 924 while (intel_field_iterator_next(&iter)) { 925 if (!strcmp(iter.name, "CLIP Viewport State Change")) 926 clip = iter.raw_value; 927 if (!strcmp(iter.name, "SF Viewport State Change")) 928 sf = iter.raw_value; 929 if (!strcmp(iter.name, "CC Viewport State Change")) 930 cc = iter.raw_value; 931 else if (!strcmp(iter.name, "Pointer to CLIP_VIEWPORT") && clip) { 932 state_offset = iter.raw_value; 933 decode_dynamic_state(ctx, "CLIP_VIEWPORT", state_offset, 1); 934 } 935 else if (!strcmp(iter.name, "Pointer to SF_VIEWPORT") && sf) { 936 state_offset = iter.raw_value; 937 decode_dynamic_state(ctx, "SF_VIEWPORT", state_offset, 1); 938 } 939 else if (!strcmp(iter.name, "Pointer to CC_VIEWPORT") && cc) { 940 state_offset = iter.raw_value; 941 decode_dynamic_state(ctx, "CC_VIEWPORT", state_offset, 1); 942 } 943 } 944} 945 946static void 947decode_3dstate_viewport_state_pointers_cc(struct intel_batch_decode_ctx *ctx, 948 const uint32_t *p) 949{ 950 decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4); 951} 952 953static void 954decode_3dstate_viewport_state_pointers_sf_clip(struct intel_batch_decode_ctx *ctx, 955 const uint32_t *p) 956{ 957 decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4); 958} 959 960static void 961decode_3dstate_blend_state_pointers(struct intel_batch_decode_ctx *ctx, 962 const uint32_t *p) 963{ 964 decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1); 965} 966 967static void 968decode_3dstate_cc_state_pointers(struct intel_batch_decode_ctx *ctx, 969 const uint32_t *p) 970{ 971 if (ctx->devinfo.ver != 6) { 972 decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1); 973 return; 974 } 975 976 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 977 978 uint32_t state_offset = 0; 979 bool blend_change = false, ds_change = false, cc_change = false; 980 struct intel_field_iterator iter; 981 intel_field_iterator_init(&iter, inst, p, 0, false); 982 while (intel_field_iterator_next(&iter)) { 983 if (!strcmp(iter.name, "BLEND_STATE Change")) 984 blend_change = iter.raw_value; 985 else if (!strcmp(iter.name, "DEPTH_STENCIL_STATE Change")) 986 ds_change = iter.raw_value; 987 else if (!strcmp(iter.name, "Color Calc State Pointer Valid")) 988 cc_change = iter.raw_value; 989 else if (!strcmp(iter.name, "Pointer to DEPTH_STENCIL_STATE") && ds_change) { 990 state_offset = iter.raw_value; 991 decode_dynamic_state(ctx, "DEPTH_STENCIL_STATE", state_offset, 1); 992 } 993 else if (!strcmp(iter.name, "Pointer to BLEND_STATE") && blend_change) { 994 state_offset = iter.raw_value; 995 decode_dynamic_state(ctx, "BLEND_STATE", state_offset, 1); 996 } 997 else if (!strcmp(iter.name, "Color Calc State Pointer") && cc_change) { 998 state_offset = iter.raw_value; 999 decode_dynamic_state(ctx, "COLOR_CALC_STATE", state_offset, 1); 1000 } 1001 } 1002} 1003 1004static void 1005decode_3dstate_ds_state_pointers(struct intel_batch_decode_ctx *ctx, 1006 const uint32_t *p) 1007{ 1008 decode_dynamic_state_pointers(ctx, "DEPTH_STENCIL_STATE", p, 1); 1009} 1010 1011static void 1012decode_3dstate_scissor_state_pointers(struct intel_batch_decode_ctx *ctx, 1013 const uint32_t *p) 1014{ 1015 decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1); 1016} 1017 1018static void 1019decode_3dstate_slice_table_state_pointers(struct intel_batch_decode_ctx *ctx, 1020 const uint32_t *p) 1021{ 1022 decode_dynamic_state_pointers(ctx, "SLICE_HASH_TABLE", p, 1); 1023} 1024 1025static void 1026handle_gt_mode(struct intel_batch_decode_ctx *ctx, 1027 uint32_t reg_addr, uint32_t val) 1028{ 1029 struct intel_group *reg = intel_spec_find_register(ctx->spec, reg_addr); 1030 1031 assert(intel_group_get_length(reg, &val) == 1); 1032 1033 struct intel_field_iterator iter; 1034 intel_field_iterator_init(&iter, reg, &val, 0, false); 1035 1036 uint32_t bt_alignment; 1037 bool bt_alignment_mask = 0; 1038 1039 while (intel_field_iterator_next(&iter)) { 1040 if (strcmp(iter.name, "Binding Table Alignment") == 0) { 1041 bt_alignment = iter.raw_value; 1042 } else if (strcmp(iter.name, "Binding Table Alignment Mask") == 0) { 1043 bt_alignment_mask = iter.raw_value; 1044 } 1045 } 1046 1047 if (bt_alignment_mask) 1048 ctx->use_256B_binding_tables = bt_alignment; 1049} 1050 1051struct reg_handler { 1052 const char *name; 1053 void (*handler)(struct intel_batch_decode_ctx *ctx, 1054 uint32_t reg_addr, uint32_t val); 1055} reg_handlers[] = { 1056 { "GT_MODE", handle_gt_mode } 1057}; 1058 1059static void 1060decode_load_register_imm(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 1061{ 1062 struct intel_group *inst = intel_ctx_find_instruction(ctx, p); 1063 const unsigned length = intel_group_get_length(inst, p); 1064 assert(length & 1); 1065 const unsigned nr_regs = (length - 1) / 2; 1066 1067 for (unsigned i = 0; i < nr_regs; i++) { 1068 struct intel_group *reg = intel_spec_find_register(ctx->spec, p[i * 2 + 1]); 1069 if (reg != NULL) { 1070 fprintf(ctx->fp, "register %s (0x%x): 0x%x\n", 1071 reg->name, reg->register_offset, p[2]); 1072 ctx_print_group(ctx, reg, reg->register_offset, &p[2]); 1073 1074 for (unsigned i = 0; i < ARRAY_SIZE(reg_handlers); i++) { 1075 if (strcmp(reg->name, reg_handlers[i].name) == 0) 1076 reg_handlers[i].handler(ctx, p[1], p[2]); 1077 } 1078 } 1079 } 1080} 1081 1082static void 1083decode_vs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1084{ 1085 struct intel_group *strct = 1086 intel_spec_find_struct(ctx->spec, "VS_STATE"); 1087 if (strct == NULL) { 1088 fprintf(ctx->fp, "did not find VS_STATE info\n"); 1089 return; 1090 } 1091 1092 struct intel_batch_decode_bo bind_bo = 1093 ctx_get_bo(ctx, true, offset); 1094 1095 if (bind_bo.map == NULL) { 1096 fprintf(ctx->fp, " vs state unavailable\n"); 1097 return; 1098 } 1099 1100 ctx_print_group(ctx, strct, offset, bind_bo.map); 1101 1102 uint64_t ksp = 0; 1103 bool is_enabled = true; 1104 struct intel_field_iterator iter; 1105 intel_field_iterator_init(&iter, strct, bind_bo.map, 0, false); 1106 while (intel_field_iterator_next(&iter)) { 1107 if (strcmp(iter.name, "Kernel Start Pointer") == 0) { 1108 ksp = iter.raw_value; 1109 } else if (strcmp(iter.name, "Enable") == 0) { 1110 is_enabled = iter.raw_value; 1111 } 1112 } 1113 if (is_enabled) { 1114 ctx_disassemble_program(ctx, ksp, "vertex shader"); 1115 fprintf(ctx->fp, "\n"); 1116 } 1117} 1118 1119static void 1120decode_gs_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1121{ 1122 struct intel_group *strct = 1123 intel_spec_find_struct(ctx->spec, "GS_STATE"); 1124 if (strct == NULL) { 1125 fprintf(ctx->fp, "did not find GS_STATE info\n"); 1126 return; 1127 } 1128 1129 struct intel_batch_decode_bo bind_bo = 1130 ctx_get_bo(ctx, true, offset); 1131 1132 if (bind_bo.map == NULL) { 1133 fprintf(ctx->fp, " gs state unavailable\n"); 1134 return; 1135 } 1136 1137 ctx_print_group(ctx, strct, offset, bind_bo.map); 1138} 1139 1140static void 1141decode_clip_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1142{ 1143 struct intel_group *strct = 1144 intel_spec_find_struct(ctx->spec, "CLIP_STATE"); 1145 if (strct == NULL) { 1146 fprintf(ctx->fp, "did not find CLIP_STATE info\n"); 1147 return; 1148 } 1149 1150 struct intel_batch_decode_bo bind_bo = 1151 ctx_get_bo(ctx, true, offset); 1152 1153 if (bind_bo.map == NULL) { 1154 fprintf(ctx->fp, " clip state unavailable\n"); 1155 return; 1156 } 1157 1158 ctx_print_group(ctx, strct, offset, bind_bo.map); 1159 1160 struct intel_group *vp_strct = 1161 intel_spec_find_struct(ctx->spec, "CLIP_VIEWPORT"); 1162 if (vp_strct == NULL) { 1163 fprintf(ctx->fp, "did not find CLIP_VIEWPORT info\n"); 1164 return; 1165 } 1166 uint32_t clip_vp_offset = ((uint32_t *)bind_bo.map)[6] & ~0x3; 1167 struct intel_batch_decode_bo vp_bo = 1168 ctx_get_bo(ctx, true, clip_vp_offset); 1169 if (vp_bo.map == NULL) { 1170 fprintf(ctx->fp, " clip vp state unavailable\n"); 1171 return; 1172 } 1173 ctx_print_group(ctx, vp_strct, clip_vp_offset, vp_bo.map); 1174} 1175 1176static void 1177decode_sf_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1178{ 1179 struct intel_group *strct = 1180 intel_spec_find_struct(ctx->spec, "SF_STATE"); 1181 if (strct == NULL) { 1182 fprintf(ctx->fp, "did not find SF_STATE info\n"); 1183 return; 1184 } 1185 1186 struct intel_batch_decode_bo bind_bo = 1187 ctx_get_bo(ctx, true, offset); 1188 1189 if (bind_bo.map == NULL) { 1190 fprintf(ctx->fp, " sf state unavailable\n"); 1191 return; 1192 } 1193 1194 ctx_print_group(ctx, strct, offset, bind_bo.map); 1195 1196 struct intel_group *vp_strct = 1197 intel_spec_find_struct(ctx->spec, "SF_VIEWPORT"); 1198 if (vp_strct == NULL) { 1199 fprintf(ctx->fp, "did not find SF_VIEWPORT info\n"); 1200 return; 1201 } 1202 1203 uint32_t sf_vp_offset = ((uint32_t *)bind_bo.map)[5] & ~0x3; 1204 struct intel_batch_decode_bo vp_bo = 1205 ctx_get_bo(ctx, true, sf_vp_offset); 1206 if (vp_bo.map == NULL) { 1207 fprintf(ctx->fp, " sf vp state unavailable\n"); 1208 return; 1209 } 1210 ctx_print_group(ctx, vp_strct, sf_vp_offset, vp_bo.map); 1211} 1212 1213static void 1214decode_wm_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1215{ 1216 struct intel_group *strct = 1217 intel_spec_find_struct(ctx->spec, "WM_STATE"); 1218 if (strct == NULL) { 1219 fprintf(ctx->fp, "did not find WM_STATE info\n"); 1220 return; 1221 } 1222 1223 struct intel_batch_decode_bo bind_bo = 1224 ctx_get_bo(ctx, true, offset); 1225 1226 if (bind_bo.map == NULL) { 1227 fprintf(ctx->fp, " wm state unavailable\n"); 1228 return; 1229 } 1230 1231 ctx_print_group(ctx, strct, offset, bind_bo.map); 1232 1233 decode_ps_kern(ctx, strct, bind_bo.map); 1234} 1235 1236static void 1237decode_cc_state(struct intel_batch_decode_ctx *ctx, uint32_t offset) 1238{ 1239 struct intel_group *strct = 1240 intel_spec_find_struct(ctx->spec, "COLOR_CALC_STATE"); 1241 if (strct == NULL) { 1242 fprintf(ctx->fp, "did not find COLOR_CALC_STATE info\n"); 1243 return; 1244 } 1245 1246 struct intel_batch_decode_bo bind_bo = 1247 ctx_get_bo(ctx, true, offset); 1248 1249 if (bind_bo.map == NULL) { 1250 fprintf(ctx->fp, " cc state unavailable\n"); 1251 return; 1252 } 1253 1254 ctx_print_group(ctx, strct, offset, bind_bo.map); 1255 1256 struct intel_group *vp_strct = 1257 intel_spec_find_struct(ctx->spec, "CC_VIEWPORT"); 1258 if (vp_strct == NULL) { 1259 fprintf(ctx->fp, "did not find CC_VIEWPORT info\n"); 1260 return; 1261 } 1262 uint32_t cc_vp_offset = ((uint32_t *)bind_bo.map)[4] & ~0x3; 1263 struct intel_batch_decode_bo vp_bo = 1264 ctx_get_bo(ctx, true, cc_vp_offset); 1265 if (vp_bo.map == NULL) { 1266 fprintf(ctx->fp, " cc vp state unavailable\n"); 1267 return; 1268 } 1269 ctx_print_group(ctx, vp_strct, cc_vp_offset, vp_bo.map); 1270} 1271static void 1272decode_pipelined_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 1273{ 1274 fprintf(ctx->fp, "VS State Table:\n"); 1275 decode_vs_state(ctx, p[1]); 1276 if (p[2] & 1) { 1277 fprintf(ctx->fp, "GS State Table:\n"); 1278 decode_gs_state(ctx, p[2] & ~1); 1279 } 1280 fprintf(ctx->fp, "Clip State Table:\n"); 1281 decode_clip_state(ctx, p[3] & ~1); 1282 fprintf(ctx->fp, "SF State Table:\n"); 1283 decode_sf_state(ctx, p[4]); 1284 fprintf(ctx->fp, "WM State Table:\n"); 1285 decode_wm_state(ctx, p[5]); 1286 fprintf(ctx->fp, "CC State Table:\n"); 1287 decode_cc_state(ctx, p[6]); 1288} 1289 1290static void 1291decode_cps_pointers(struct intel_batch_decode_ctx *ctx, const uint32_t *p) 1292{ 1293 decode_dynamic_state_pointers(ctx, "CPS_STATE", p, 1); 1294} 1295 1296struct custom_decoder { 1297 const char *cmd_name; 1298 void (*decode)(struct intel_batch_decode_ctx *ctx, const uint32_t *p); 1299} custom_decoders[] = { 1300 { "STATE_BASE_ADDRESS", handle_state_base_address }, 1301 { "3DSTATE_BINDING_TABLE_POOL_ALLOC", handle_binding_table_pool_alloc }, 1302 { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load }, 1303 { "COMPUTE_WALKER", handle_compute_walker }, 1304 { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers }, 1305 { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer }, 1306 { "3DSTATE_VS", decode_single_ksp }, 1307 { "3DSTATE_GS", decode_single_ksp }, 1308 { "3DSTATE_DS", decode_single_ksp }, 1309 { "3DSTATE_HS", decode_single_ksp }, 1310 { "3DSTATE_PS", decode_ps_kernels }, 1311 { "3DSTATE_WM", decode_ps_kernels }, 1312 { "3DSTATE_CONSTANT_VS", decode_3dstate_constant }, 1313 { "3DSTATE_CONSTANT_GS", decode_3dstate_constant }, 1314 { "3DSTATE_CONSTANT_PS", decode_3dstate_constant }, 1315 { "3DSTATE_CONSTANT_HS", decode_3dstate_constant }, 1316 { "3DSTATE_CONSTANT_DS", decode_3dstate_constant }, 1317 { "3DSTATE_CONSTANT_ALL", decode_3dstate_constant_all }, 1318 1319 { "3DSTATE_BINDING_TABLE_POINTERS", decode_gfx4_3dstate_binding_table_pointers }, 1320 { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers }, 1321 { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers }, 1322 { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers }, 1323 { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers }, 1324 { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers }, 1325 1326 { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers }, 1327 { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers }, 1328 { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers }, 1329 { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers }, 1330 { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers }, 1331 { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gfx6 }, 1332 1333 { "3DSTATE_VIEWPORT_STATE_POINTERS", decode_3dstate_viewport_state_pointers }, 1334 { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc }, 1335 { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip }, 1336 { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers }, 1337 { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers }, 1338 { "3DSTATE_DEPTH_STENCIL_STATE_POINTERS", decode_3dstate_ds_state_pointers }, 1339 { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers }, 1340 { "3DSTATE_SLICE_TABLE_STATE_POINTERS", decode_3dstate_slice_table_state_pointers }, 1341 { "MI_LOAD_REGISTER_IMM", decode_load_register_imm }, 1342 { "3DSTATE_PIPELINED_POINTERS", decode_pipelined_pointers }, 1343 { "3DSTATE_CPS_POINTERS", decode_cps_pointers }, 1344 { "CONSTANT_BUFFER", decode_gfx4_constant_buffer }, 1345}; 1346 1347void 1348intel_print_batch(struct intel_batch_decode_ctx *ctx, 1349 const uint32_t *batch, uint32_t batch_size, 1350 uint64_t batch_addr, bool from_ring) 1351{ 1352 const uint32_t *p, *end = batch + batch_size / sizeof(uint32_t); 1353 int length; 1354 struct intel_group *inst; 1355 const char *reset_color = ctx->flags & INTEL_BATCH_DECODE_IN_COLOR ? NORMAL : ""; 1356 1357 if (ctx->n_batch_buffer_start >= 100) { 1358 fprintf(ctx->fp, "%s0x%08"PRIx64": Max batch buffer jumps exceeded%s\n", 1359 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "", 1360 (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) ? batch_addr : 0, 1361 reset_color); 1362 return; 1363 } 1364 1365 ctx->n_batch_buffer_start++; 1366 1367 for (p = batch; p < end; p += length) { 1368 inst = intel_ctx_find_instruction(ctx, p); 1369 length = intel_group_get_length(inst, p); 1370 assert(inst == NULL || length > 0); 1371 length = MAX2(1, length); 1372 1373 uint64_t offset; 1374 if (ctx->flags & INTEL_BATCH_DECODE_OFFSETS) 1375 offset = batch_addr + ((char *)p - (char *)batch); 1376 else 1377 offset = 0; 1378 1379 if (inst == NULL) { 1380 fprintf(ctx->fp, "%s0x%08"PRIx64": unknown instruction %08x%s\n", 1381 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "", 1382 offset, p[0], reset_color); 1383 1384 for (int i=1; i < length; i++) { 1385 fprintf(ctx->fp, "%s0x%08"PRIx64": -- %08x%s\n", 1386 (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) ? RED_COLOR : "", 1387 offset + i * 4, p[i], reset_color); 1388 } 1389 1390 continue; 1391 } 1392 1393 const char *color; 1394 const char *inst_name = intel_group_get_name(inst); 1395 if (ctx->flags & INTEL_BATCH_DECODE_IN_COLOR) { 1396 reset_color = NORMAL; 1397 if (ctx->flags & INTEL_BATCH_DECODE_FULL) { 1398 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 || 1399 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) 1400 color = GREEN_HEADER; 1401 else 1402 color = BLUE_HEADER; 1403 } else { 1404 color = NORMAL; 1405 } 1406 } else { 1407 color = ""; 1408 reset_color = ""; 1409 } 1410 1411 fprintf(ctx->fp, "%s0x%08"PRIx64"%s: 0x%08x: %-80s%s\n", color, offset, 1412 ctx->acthd && offset == ctx->acthd ? " (ACTHD)" : "", p[0], 1413 inst_name, reset_color); 1414 1415 if (ctx->flags & INTEL_BATCH_DECODE_FULL) { 1416 ctx_print_group(ctx, inst, offset, p); 1417 1418 for (int i = 0; i < ARRAY_SIZE(custom_decoders); i++) { 1419 if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) { 1420 custom_decoders[i].decode(ctx, p); 1421 break; 1422 } 1423 } 1424 } 1425 1426 if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) { 1427 uint64_t next_batch_addr = 0; 1428 bool ppgtt = false; 1429 bool second_level = false; 1430 bool predicate = false; 1431 struct intel_field_iterator iter; 1432 intel_field_iterator_init(&iter, inst, p, 0, false); 1433 while (intel_field_iterator_next(&iter)) { 1434 if (strcmp(iter.name, "Batch Buffer Start Address") == 0) { 1435 next_batch_addr = iter.raw_value; 1436 } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) { 1437 second_level = iter.raw_value; 1438 } else if (strcmp(iter.name, "Address Space Indicator") == 0) { 1439 ppgtt = iter.raw_value; 1440 } else if (strcmp(iter.name, "Predication Enable") == 0) { 1441 predicate = iter.raw_value; 1442 } 1443 } 1444 1445 if (!predicate) { 1446 struct intel_batch_decode_bo next_batch = ctx_get_bo(ctx, ppgtt, next_batch_addr); 1447 1448 if (next_batch.map == NULL) { 1449 fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable\n", 1450 next_batch_addr); 1451 } else { 1452 intel_print_batch(ctx, next_batch.map, next_batch.size, 1453 next_batch.addr, false); 1454 } 1455 if (second_level) { 1456 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts 1457 * like a subroutine call. Commands that come afterwards get 1458 * processed once the 2nd level batch buffer returns with 1459 * MI_BATCH_BUFFER_END. 1460 */ 1461 continue; 1462 } else if (!from_ring) { 1463 /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts 1464 * like a goto. Nothing after it will ever get processed. In 1465 * order to prevent the recursion from growing, we just reset the 1466 * loop and continue; 1467 */ 1468 break; 1469 } 1470 } 1471 } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) { 1472 break; 1473 } 1474 } 1475 1476 ctx->n_batch_buffer_start--; 1477} 1478