aub_write.c revision b8e80941
1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "aub_write.h" 25 26#include <inttypes.h> 27#include <signal.h> 28#include <stdarg.h> 29#include <stdlib.h> 30#include <string.h> 31 32#include "drm-uapi/i915_drm.h" 33#include "intel_aub.h" 34#include "gen_context.h" 35 36#ifndef ALIGN 37#define ALIGN(x, y) (((x) + (y)-1) & ~((y)-1)) 38#endif 39 40#define MI_BATCH_NON_SECURE_I965 (1 << 8) 41 42#define min(a, b) ({ \ 43 __typeof(a) _a = (a); \ 44 __typeof(b) _b = (b); \ 45 _a < _b ? _a : _b; \ 46 }) 47 48#define max(a, b) ({ \ 49 __typeof(a) _a = (a); \ 50 __typeof(b) _b = (b); \ 51 _a > _b ? _a : _b; \ 52 }) 53 54static void 55mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr, 56 uint32_t len, uint32_t addr_space, 57 const char *desc); 58 59static void __attribute__ ((format(__printf__, 2, 3))) 60fail_if(int cond, const char *format, ...) 61{ 62 va_list args; 63 64 if (!cond) 65 return; 66 67 va_start(args, format); 68 vfprintf(stderr, format, args); 69 va_end(args); 70 71 raise(SIGTRAP); 72} 73 74static inline uint32_t 75align_u32(uint32_t v, uint32_t a) 76{ 77 return (v + a - 1) & ~(a - 1); 78} 79 80static void 81aub_ppgtt_table_finish(struct aub_ppgtt_table *table, int level) 82{ 83 if (level == 1) 84 return; 85 86 for (unsigned i = 0; i < ARRAY_SIZE(table->subtables); i++) { 87 if (table->subtables[i]) { 88 aub_ppgtt_table_finish(table->subtables[i], level - 1); 89 free(table->subtables[i]); 90 } 91 } 92} 93 94static void 95data_out(struct aub_file *aub, const void *data, size_t size) 96{ 97 if (size == 0) 98 return; 99 100 fail_if(fwrite(data, 1, size, aub->file) == 0, 101 "Writing to output failed\n"); 102} 103 104static void 105dword_out(struct aub_file *aub, uint32_t data) 106{ 107 data_out(aub, &data, sizeof(data)); 108} 109 110static void 111write_execlists_header(struct aub_file *aub, const char *name) 112{ 113 char app_name[8 * 4]; 114 int app_name_len, dwords; 115 116 app_name_len = 117 snprintf(app_name, sizeof(app_name), "PCI-ID=0x%X %s", 118 aub->pci_id, name); 119 app_name_len = ALIGN(app_name_len, sizeof(uint32_t)); 120 121 dwords = 5 + app_name_len / sizeof(uint32_t); 122 dword_out(aub, CMD_MEM_TRACE_VERSION | (dwords - 1)); 123 dword_out(aub, AUB_MEM_TRACE_VERSION_FILE_VERSION); 124 dword_out(aub, aub->devinfo.simulator_id << AUB_MEM_TRACE_VERSION_DEVICE_SHIFT); 125 dword_out(aub, 0); /* version */ 126 dword_out(aub, 0); /* version */ 127 data_out(aub, app_name, app_name_len); 128} 129 130static void 131write_legacy_header(struct aub_file *aub, const char *name) 132{ 133 char app_name[8 * 4]; 134 char comment[16]; 135 int comment_len, comment_dwords, dwords; 136 137 comment_len = snprintf(comment, sizeof(comment), "PCI-ID=0x%x", aub->pci_id); 138 comment_dwords = ((comment_len + 3) / 4); 139 140 /* Start with a (required) version packet. */ 141 dwords = 13 + comment_dwords; 142 dword_out(aub, CMD_AUB_HEADER | (dwords - 2)); 143 dword_out(aub, (4 << AUB_HEADER_MAJOR_SHIFT) | 144 (0 << AUB_HEADER_MINOR_SHIFT)); 145 146 /* Next comes a 32-byte application name. */ 147 strncpy(app_name, name, sizeof(app_name)); 148 app_name[sizeof(app_name) - 1] = 0; 149 data_out(aub, app_name, sizeof(app_name)); 150 151 dword_out(aub, 0); /* timestamp */ 152 dword_out(aub, 0); /* timestamp */ 153 dword_out(aub, comment_len); 154 data_out(aub, comment, comment_dwords * 4); 155} 156 157 158static void 159aub_write_header(struct aub_file *aub, const char *app_name) 160{ 161 if (aub_use_execlists(aub)) 162 write_execlists_header(aub, app_name); 163 else 164 write_legacy_header(aub, app_name); 165} 166 167void 168aub_file_init(struct aub_file *aub, FILE *file, FILE *debug, uint16_t pci_id, const char *app_name) 169{ 170 memset(aub, 0, sizeof(*aub)); 171 172 aub->verbose_log_file = debug; 173 aub->file = file; 174 aub->pci_id = pci_id; 175 fail_if(!gen_get_device_info(pci_id, &aub->devinfo), 176 "failed to identify chipset=0x%x\n", pci_id); 177 aub->addr_bits = aub->devinfo.gen >= 8 ? 48 : 32; 178 179 aub_write_header(aub, app_name); 180 181 aub->phys_addrs_allocator = 0; 182 aub->pml4.phys_addr = aub->phys_addrs_allocator++ << 12; 183 184 mem_trace_memory_write_header_out(aub, 0, 185 GEN8_PTE_SIZE, 186 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY, 187 "GGTT PT"); 188 dword_out(aub, 1); 189 dword_out(aub, 0); 190} 191 192void 193aub_file_finish(struct aub_file *aub) 194{ 195 aub_ppgtt_table_finish(&aub->pml4, 4); 196 fclose(aub->file); 197} 198 199uint32_t 200aub_gtt_size(struct aub_file *aub) 201{ 202 return NUM_PT_ENTRIES * (aub->addr_bits > 32 ? GEN8_PTE_SIZE : PTE_SIZE); 203} 204 205static void 206mem_trace_memory_write_header_out(struct aub_file *aub, uint64_t addr, 207 uint32_t len, uint32_t addr_space, 208 const char *desc) 209{ 210 uint32_t dwords = ALIGN(len, sizeof(uint32_t)) / sizeof(uint32_t); 211 212 if (aub->verbose_log_file) { 213 fprintf(aub->verbose_log_file, 214 " MEM WRITE (0x%016" PRIx64 "-0x%016" PRIx64 ") %s\n", 215 addr, addr + len, desc); 216 } 217 218 dword_out(aub, CMD_MEM_TRACE_MEMORY_WRITE | (5 + dwords - 1)); 219 dword_out(aub, addr & 0xFFFFFFFF); /* addr lo */ 220 dword_out(aub, addr >> 32); /* addr hi */ 221 dword_out(aub, addr_space); /* gtt */ 222 dword_out(aub, len); 223} 224 225static void 226register_write_out(struct aub_file *aub, uint32_t addr, uint32_t value) 227{ 228 uint32_t dwords = 1; 229 230 if (aub->verbose_log_file) { 231 fprintf(aub->verbose_log_file, 232 " MMIO WRITE (0x%08x = 0x%08x)\n", addr, value); 233 } 234 235 dword_out(aub, CMD_MEM_TRACE_REGISTER_WRITE | (5 + dwords - 1)); 236 dword_out(aub, addr); 237 dword_out(aub, AUB_MEM_TRACE_REGISTER_SIZE_DWORD | 238 AUB_MEM_TRACE_REGISTER_SPACE_MMIO); 239 dword_out(aub, 0xFFFFFFFF); /* mask lo */ 240 dword_out(aub, 0x00000000); /* mask hi */ 241 dword_out(aub, value); 242} 243 244static void 245populate_ppgtt_table(struct aub_file *aub, struct aub_ppgtt_table *table, 246 int start, int end, int level) 247{ 248 uint64_t entries[512] = {0}; 249 int dirty_start = 512, dirty_end = 0; 250 251 if (aub->verbose_log_file) { 252 fprintf(aub->verbose_log_file, 253 " PPGTT (0x%016" PRIx64 "), lvl %d, start: %x, end: %x\n", 254 table->phys_addr, level, start, end); 255 } 256 257 for (int i = start; i <= end; i++) { 258 if (!table->subtables[i]) { 259 dirty_start = min(dirty_start, i); 260 dirty_end = max(dirty_end, i); 261 if (level == 1) { 262 table->subtables[i] = 263 (void *)(aub->phys_addrs_allocator++ << 12); 264 if (aub->verbose_log_file) { 265 fprintf(aub->verbose_log_file, 266 " Adding entry: %x, phys_addr: 0x%016" PRIx64 "\n", 267 i, (uint64_t)table->subtables[i]); 268 } 269 } else { 270 table->subtables[i] = 271 calloc(1, sizeof(struct aub_ppgtt_table)); 272 table->subtables[i]->phys_addr = 273 aub->phys_addrs_allocator++ << 12; 274 if (aub->verbose_log_file) { 275 fprintf(aub->verbose_log_file, 276 " Adding entry: %x, phys_addr: 0x%016" PRIx64 "\n", 277 i, table->subtables[i]->phys_addr); 278 } 279 } 280 } 281 entries[i] = 3 /* read/write | present */ | 282 (level == 1 ? (uint64_t)table->subtables[i] : 283 table->subtables[i]->phys_addr); 284 } 285 286 if (dirty_start <= dirty_end) { 287 uint64_t write_addr = table->phys_addr + dirty_start * 288 sizeof(uint64_t); 289 uint64_t write_size = (dirty_end - dirty_start + 1) * 290 sizeof(uint64_t); 291 mem_trace_memory_write_header_out(aub, write_addr, write_size, 292 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL, 293 "PPGTT update"); 294 data_out(aub, entries + dirty_start, write_size); 295 } 296} 297 298void 299aub_map_ppgtt(struct aub_file *aub, uint64_t start, uint64_t size) 300{ 301 uint64_t l4_start = start & 0xff8000000000; 302 uint64_t l4_end = ((start + size - 1) | 0x007fffffffff) & 0xffffffffffff; 303 304#define L4_index(addr) (((addr) >> 39) & 0x1ff) 305#define L3_index(addr) (((addr) >> 30) & 0x1ff) 306#define L2_index(addr) (((addr) >> 21) & 0x1ff) 307#define L1_index(addr) (((addr) >> 12) & 0x1ff) 308 309#define L3_table(addr) (aub->pml4.subtables[L4_index(addr)]) 310#define L2_table(addr) (L3_table(addr)->subtables[L3_index(addr)]) 311#define L1_table(addr) (L2_table(addr)->subtables[L2_index(addr)]) 312 313 if (aub->verbose_log_file) { 314 fprintf(aub->verbose_log_file, 315 " Mapping PPGTT address: 0x%" PRIx64 ", size: %" PRIu64"\n", 316 start, size); 317 } 318 319 populate_ppgtt_table(aub, &aub->pml4, L4_index(l4_start), L4_index(l4_end), 4); 320 321 for (uint64_t l4 = l4_start; l4 < l4_end; l4 += (1ULL << 39)) { 322 uint64_t l3_start = max(l4, start & 0xffffc0000000); 323 uint64_t l3_end = min(l4 + (1ULL << 39) - 1, 324 ((start + size - 1) | 0x00003fffffff) & 0xffffffffffff); 325 uint64_t l3_start_idx = L3_index(l3_start); 326 uint64_t l3_end_idx = L3_index(l3_end); 327 328 populate_ppgtt_table(aub, L3_table(l4), l3_start_idx, l3_end_idx, 3); 329 330 for (uint64_t l3 = l3_start; l3 < l3_end; l3 += (1ULL << 30)) { 331 uint64_t l2_start = max(l3, start & 0xffffffe00000); 332 uint64_t l2_end = min(l3 + (1ULL << 30) - 1, 333 ((start + size - 1) | 0x0000001fffff) & 0xffffffffffff); 334 uint64_t l2_start_idx = L2_index(l2_start); 335 uint64_t l2_end_idx = L2_index(l2_end); 336 337 populate_ppgtt_table(aub, L2_table(l3), l2_start_idx, l2_end_idx, 2); 338 339 for (uint64_t l2 = l2_start; l2 < l2_end; l2 += (1ULL << 21)) { 340 uint64_t l1_start = max(l2, start & 0xfffffffff000); 341 uint64_t l1_end = min(l2 + (1ULL << 21) - 1, 342 ((start + size - 1) | 0x000000000fff) & 0xffffffffffff); 343 uint64_t l1_start_idx = L1_index(l1_start); 344 uint64_t l1_end_idx = L1_index(l1_end); 345 346 populate_ppgtt_table(aub, L1_table(l2), l1_start_idx, l1_end_idx, 1); 347 } 348 } 349 } 350} 351 352static uint64_t 353ppgtt_lookup(struct aub_file *aub, uint64_t ppgtt_addr) 354{ 355 return (uint64_t)L1_table(ppgtt_addr)->subtables[L1_index(ppgtt_addr)]; 356} 357 358static const struct engine { 359 const char *name; 360 enum drm_i915_gem_engine_class engine_class; 361 uint32_t hw_class; 362 uint32_t elsp_reg; 363 uint32_t elsq_reg; 364 uint32_t status_reg; 365 uint32_t control_reg; 366} engines[] = { 367 [I915_ENGINE_CLASS_RENDER] = { 368 .name = "RENDER", 369 .engine_class = I915_ENGINE_CLASS_RENDER, 370 .hw_class = 1, 371 .elsp_reg = EXECLIST_SUBMITPORT_RCSUNIT, 372 .elsq_reg = EXECLIST_SQ_CONTENTS0_RCSUNIT, 373 .status_reg = EXECLIST_STATUS_RCSUNIT, 374 .control_reg = EXECLIST_CONTROL_RCSUNIT, 375 }, 376 [I915_ENGINE_CLASS_VIDEO] = { 377 .name = "VIDEO", 378 .engine_class = I915_ENGINE_CLASS_VIDEO, 379 .hw_class = 3, 380 .elsp_reg = EXECLIST_SUBMITPORT_VCSUNIT0, 381 .elsq_reg = EXECLIST_SQ_CONTENTS0_VCSUNIT0, 382 .status_reg = EXECLIST_STATUS_VCSUNIT0, 383 .control_reg = EXECLIST_CONTROL_VCSUNIT0, 384 }, 385 [I915_ENGINE_CLASS_COPY] = { 386 .name = "BLITTER", 387 .engine_class = I915_ENGINE_CLASS_COPY, 388 .hw_class = 2, 389 .elsp_reg = EXECLIST_SUBMITPORT_BCSUNIT, 390 .elsq_reg = EXECLIST_SQ_CONTENTS0_BCSUNIT, 391 .status_reg = EXECLIST_STATUS_BCSUNIT, 392 .control_reg = EXECLIST_CONTROL_BCSUNIT, 393 }, 394}; 395 396static const struct engine * 397engine_from_engine_class(enum drm_i915_gem_engine_class engine_class) 398{ 399 switch (engine_class) { 400 case I915_ENGINE_CLASS_RENDER: 401 case I915_ENGINE_CLASS_COPY: 402 case I915_ENGINE_CLASS_VIDEO: 403 return &engines[engine_class]; 404 default: 405 unreachable("unknown ring"); 406 } 407} 408 409static void 410get_context_init(const struct gen_device_info *devinfo, 411 const struct gen_context_parameters *params, 412 enum drm_i915_gem_engine_class engine_class, 413 uint32_t *data, 414 uint32_t *size) 415{ 416 static const gen_context_init_t gen8_contexts[] = { 417 [I915_ENGINE_CLASS_RENDER] = gen8_render_context_init, 418 [I915_ENGINE_CLASS_COPY] = gen8_blitter_context_init, 419 [I915_ENGINE_CLASS_VIDEO] = gen8_video_context_init, 420 }; 421 static const gen_context_init_t gen10_contexts[] = { 422 [I915_ENGINE_CLASS_RENDER] = gen10_render_context_init, 423 [I915_ENGINE_CLASS_COPY] = gen10_blitter_context_init, 424 [I915_ENGINE_CLASS_VIDEO] = gen10_video_context_init, 425 }; 426 427 assert(devinfo->gen >= 8); 428 429 if (devinfo->gen <= 10) 430 gen8_contexts[engine_class](params, data, size); 431 else 432 gen10_contexts[engine_class](params, data, size); 433} 434 435static uint32_t 436write_engine_execlist_setup(struct aub_file *aub, 437 enum drm_i915_gem_engine_class engine_class) 438{ 439 const struct engine *cs = engine_from_engine_class(engine_class); 440 uint32_t context_size; 441 442 get_context_init(&aub->devinfo, NULL, engine_class, NULL, &context_size); 443 444 /* GGTT PT */ 445 uint64_t phys_addr = aub->phys_addrs_allocator << 12; 446 uint32_t total_size = RING_SIZE + PPHWSP_SIZE + context_size; 447 uint32_t ggtt_ptes = DIV_ROUND_UP(total_size, 4096); 448 char name[80]; 449 450 aub->phys_addrs_allocator += ggtt_ptes; 451 452 snprintf(name, sizeof(name), "%s GGTT PT", cs->name); 453 mem_trace_memory_write_header_out(aub, 454 sizeof(uint64_t) * (phys_addr >> 12), 455 ggtt_ptes * GEN8_PTE_SIZE, 456 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY, 457 name); 458 for (uint32_t i = 0; i < ggtt_ptes; i++) { 459 dword_out(aub, 1 + 0x1000 * i + phys_addr); 460 dword_out(aub, 0); 461 } 462 463 /* RING */ 464 aub->engine_setup[engine_class].ring_addr = phys_addr; 465 snprintf(name, sizeof(name), "%s RING", cs->name); 466 mem_trace_memory_write_header_out(aub, phys_addr, RING_SIZE, 467 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT, 468 name); 469 for (uint32_t i = 0; i < RING_SIZE; i += sizeof(uint32_t)) 470 dword_out(aub, 0); 471 phys_addr += RING_SIZE; 472 473 /* PPHWSP */ 474 aub->engine_setup[engine_class].pphwsp_addr = phys_addr; 475 aub->engine_setup[engine_class].descriptor = cs->hw_class | phys_addr | CONTEXT_FLAGS; 476 snprintf(name, sizeof(name), "%s PPHWSP", cs->name); 477 mem_trace_memory_write_header_out(aub, phys_addr, 478 PPHWSP_SIZE + context_size, 479 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT, 480 name); 481 for (uint32_t i = 0; i < PPHWSP_SIZE; i += sizeof(uint32_t)) 482 dword_out(aub, 0); 483 484 /* CONTEXT */ 485 struct gen_context_parameters params = { 486 .ring_addr = aub->engine_setup[engine_class].ring_addr, 487 .ring_size = RING_SIZE, 488 .pml4_addr = aub->pml4.phys_addr, 489 }; 490 uint32_t *context_data = calloc(1, context_size); 491 get_context_init(&aub->devinfo, ¶ms, engine_class, context_data, &context_size); 492 data_out(aub, context_data, context_size); 493 free(context_data); 494 495 return total_size; 496} 497 498static void 499write_execlists_default_setup(struct aub_file *aub) 500{ 501 write_engine_execlist_setup(aub, I915_ENGINE_CLASS_RENDER); 502 write_engine_execlist_setup(aub, I915_ENGINE_CLASS_COPY); 503 write_engine_execlist_setup(aub, I915_ENGINE_CLASS_VIDEO); 504 505 register_write_out(aub, HWS_PGA_RCSUNIT, aub->engine_setup[I915_ENGINE_CLASS_RENDER].pphwsp_addr); 506 register_write_out(aub, HWS_PGA_VCSUNIT0, aub->engine_setup[I915_ENGINE_CLASS_VIDEO].pphwsp_addr); 507 register_write_out(aub, HWS_PGA_BCSUNIT, aub->engine_setup[I915_ENGINE_CLASS_COPY].pphwsp_addr); 508 509 register_write_out(aub, GFX_MODE_RCSUNIT, 0x80008000 /* execlist enable */); 510 register_write_out(aub, GFX_MODE_VCSUNIT0, 0x80008000 /* execlist enable */); 511 register_write_out(aub, GFX_MODE_BCSUNIT, 0x80008000 /* execlist enable */); 512} 513 514static void write_legacy_default_setup(struct aub_file *aub) 515{ 516 uint32_t entry = 0x200003; 517 518 /* Set up the GTT. The max we can handle is 64M */ 519 dword_out(aub, CMD_AUB_TRACE_HEADER_BLOCK | 520 ((aub->addr_bits > 32 ? 6 : 5) - 2)); 521 dword_out(aub, AUB_TRACE_MEMTYPE_GTT_ENTRY | 522 AUB_TRACE_TYPE_NOTYPE | AUB_TRACE_OP_DATA_WRITE); 523 dword_out(aub, 0); /* subtype */ 524 dword_out(aub, 0); /* offset */ 525 dword_out(aub, aub_gtt_size(aub)); /* size */ 526 if (aub->addr_bits > 32) 527 dword_out(aub, 0); 528 for (uint32_t i = 0; i < NUM_PT_ENTRIES; i++) { 529 dword_out(aub, entry + 0x1000 * i); 530 if (aub->addr_bits > 32) 531 dword_out(aub, 0); 532 } 533} 534 535/** 536 * Sets up a default GGTT/PPGTT address space and execlists context (when 537 * supported). 538 */ 539void 540aub_write_default_setup(struct aub_file *aub) 541{ 542 if (aub_use_execlists(aub)) 543 write_execlists_default_setup(aub); 544 else 545 write_legacy_default_setup(aub); 546 547 aub->has_default_setup = true; 548} 549 550void 551aub_write_ggtt(struct aub_file *aub, uint64_t virt_addr, uint64_t size, const void *data) 552{ 553 if (aub->verbose_log_file) { 554 fprintf(aub->verbose_log_file, 555 " Writting GGTT address: 0x%" PRIx64 ", size: %" PRIu64"\n", 556 virt_addr, size); 557 } 558 559 /* Default setup assumes a 1 to 1 mapping between physical and virtual GGTT 560 * addresses. This is somewhat incompatible with the aub_write_ggtt() 561 * function. In practice it doesn't matter as the GGTT writes are used to 562 * replace the default setup and we've taken care to setup the PML4 as the 563 * top of the GGTT. 564 */ 565 assert(!aub->has_default_setup); 566 567 /* Makes the code below a bit simpler. In practice all of the write we 568 * receive from error2aub are page aligned. 569 */ 570 assert(virt_addr % 4096 == 0); 571 assert((aub->phys_addrs_allocator + size) < (1UL << 32)); 572 573 /* GGTT PT */ 574 uint32_t ggtt_ptes = DIV_ROUND_UP(size, 4096); 575 uint64_t phys_addr = aub->phys_addrs_allocator << 12; 576 aub->phys_addrs_allocator += ggtt_ptes; 577 578 if (aub->verbose_log_file) { 579 fprintf(aub->verbose_log_file, 580 " Writting GGTT address: 0x%" PRIx64 ", size: %" PRIu64" phys_addr=0x%lx entries=%u\n", 581 virt_addr, size, phys_addr, ggtt_ptes); 582 } 583 584 mem_trace_memory_write_header_out(aub, 585 (virt_addr >> 12) * GEN8_PTE_SIZE, 586 ggtt_ptes * GEN8_PTE_SIZE, 587 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT_ENTRY, 588 "GGTT PT"); 589 for (uint32_t i = 0; i < ggtt_ptes; i++) { 590 dword_out(aub, 1 + phys_addr + i * 4096); 591 dword_out(aub, 0); 592 } 593 594 /* We write the GGTT buffer through the GGTT aub command rather than the 595 * PHYSICAL aub command. This is because the Gen9 simulator seems to have 2 596 * different set of memory pools for GGTT and physical (probably someone 597 * didn't really understand the concept?). 598 */ 599 static const char null_block[8 * 4096]; 600 for (uint64_t offset = 0; offset < size; offset += 4096) { 601 uint32_t block_size = min(4096, size - offset); 602 603 mem_trace_memory_write_header_out(aub, virt_addr + offset, block_size, 604 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT, 605 "GGTT buffer"); 606 data_out(aub, (char *) data + offset, block_size); 607 608 /* Pad to a multiple of 4 bytes. */ 609 data_out(aub, null_block, -block_size & 3); 610 } 611} 612 613/** 614 * Break up large objects into multiple writes. Otherwise a 128kb VBO 615 * would overflow the 16 bits of size field in the packet header and 616 * everything goes badly after that. 617 */ 618void 619aub_write_trace_block(struct aub_file *aub, 620 uint32_t type, void *virtual, 621 uint32_t size, uint64_t gtt_offset) 622{ 623 uint32_t block_size; 624 uint32_t subtype = 0; 625 static const char null_block[8 * 4096]; 626 627 for (uint32_t offset = 0; offset < size; offset += block_size) { 628 block_size = min(8 * 4096, size - offset); 629 630 if (aub_use_execlists(aub)) { 631 block_size = min(4096, block_size); 632 mem_trace_memory_write_header_out(aub, 633 ppgtt_lookup(aub, gtt_offset + offset), 634 block_size, 635 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_PHYSICAL, 636 "Trace Block"); 637 } else { 638 dword_out(aub, CMD_AUB_TRACE_HEADER_BLOCK | 639 ((aub->addr_bits > 32 ? 6 : 5) - 2)); 640 dword_out(aub, AUB_TRACE_MEMTYPE_GTT | 641 type | AUB_TRACE_OP_DATA_WRITE); 642 dword_out(aub, subtype); 643 dword_out(aub, gtt_offset + offset); 644 dword_out(aub, align_u32(block_size, 4)); 645 if (aub->addr_bits > 32) 646 dword_out(aub, (gtt_offset + offset) >> 32); 647 } 648 649 if (virtual) 650 data_out(aub, ((char *) virtual) + offset, block_size); 651 else 652 data_out(aub, null_block, block_size); 653 654 /* Pad to a multiple of 4 bytes. */ 655 data_out(aub, null_block, -block_size & 3); 656 } 657} 658 659static void 660aub_dump_ring_buffer_execlist(struct aub_file *aub, 661 const struct engine *cs, 662 uint64_t batch_offset) 663{ 664 mem_trace_memory_write_header_out(aub, aub->engine_setup[cs->engine_class].ring_addr, 16, 665 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT, 666 "RING MI_BATCH_BUFFER_START user"); 667 dword_out(aub, AUB_MI_BATCH_BUFFER_START | MI_BATCH_NON_SECURE_I965 | (3 - 2)); 668 dword_out(aub, batch_offset & 0xFFFFFFFF); 669 dword_out(aub, batch_offset >> 32); 670 dword_out(aub, 0 /* MI_NOOP */); 671 672 mem_trace_memory_write_header_out(aub, aub->engine_setup[cs->engine_class].ring_addr + 8192 + 20, 4, 673 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT, 674 "RING BUFFER HEAD"); 675 dword_out(aub, 0); /* RING_BUFFER_HEAD */ 676 mem_trace_memory_write_header_out(aub, aub->engine_setup[cs->engine_class].ring_addr + 8192 + 28, 4, 677 AUB_MEM_TRACE_MEMORY_ADDRESS_SPACE_GGTT, 678 "RING BUFFER TAIL"); 679 dword_out(aub, 16); /* RING_BUFFER_TAIL */ 680} 681 682static void 683aub_dump_execlist(struct aub_file *aub, const struct engine *cs, uint64_t descriptor) 684{ 685 if (aub->devinfo.gen >= 11) { 686 register_write_out(aub, cs->elsq_reg, descriptor & 0xFFFFFFFF); 687 register_write_out(aub, cs->elsq_reg + sizeof(uint32_t), descriptor >> 32); 688 register_write_out(aub, cs->control_reg, 1); 689 } else { 690 register_write_out(aub, cs->elsp_reg, 0); 691 register_write_out(aub, cs->elsp_reg, 0); 692 register_write_out(aub, cs->elsp_reg, descriptor >> 32); 693 register_write_out(aub, cs->elsp_reg, descriptor & 0xFFFFFFFF); 694 } 695 696 dword_out(aub, CMD_MEM_TRACE_REGISTER_POLL | (5 + 1 - 1)); 697 dword_out(aub, cs->status_reg); 698 dword_out(aub, AUB_MEM_TRACE_REGISTER_SIZE_DWORD | 699 AUB_MEM_TRACE_REGISTER_SPACE_MMIO); 700 if (aub->devinfo.gen >= 11) { 701 dword_out(aub, 0x00000001); /* mask lo */ 702 dword_out(aub, 0x00000000); /* mask hi */ 703 dword_out(aub, 0x00000001); 704 } else { 705 dword_out(aub, 0x00000010); /* mask lo */ 706 dword_out(aub, 0x00000000); /* mask hi */ 707 dword_out(aub, 0x00000000); 708 } 709} 710 711static void 712aub_dump_ring_buffer_legacy(struct aub_file *aub, 713 uint64_t batch_offset, 714 uint64_t offset, 715 enum drm_i915_gem_engine_class engine_class) 716{ 717 uint32_t ringbuffer[4096]; 718 unsigned aub_mi_bbs_len; 719 int ring_count = 0; 720 static const int engine_class_to_ring[] = { 721 [I915_ENGINE_CLASS_RENDER] = AUB_TRACE_TYPE_RING_PRB0, 722 [I915_ENGINE_CLASS_VIDEO] = AUB_TRACE_TYPE_RING_PRB1, 723 [I915_ENGINE_CLASS_COPY] = AUB_TRACE_TYPE_RING_PRB2, 724 }; 725 int ring = engine_class_to_ring[engine_class]; 726 727 /* Make a ring buffer to execute our batchbuffer. */ 728 memset(ringbuffer, 0, sizeof(ringbuffer)); 729 730 aub_mi_bbs_len = aub->addr_bits > 32 ? 3 : 2; 731 ringbuffer[ring_count] = AUB_MI_BATCH_BUFFER_START | (aub_mi_bbs_len - 2); 732 aub_write_reloc(&aub->devinfo, &ringbuffer[ring_count + 1], batch_offset); 733 ring_count += aub_mi_bbs_len; 734 735 /* Write out the ring. This appears to trigger execution of 736 * the ring in the simulator. 737 */ 738 dword_out(aub, CMD_AUB_TRACE_HEADER_BLOCK | 739 ((aub->addr_bits > 32 ? 6 : 5) - 2)); 740 dword_out(aub, AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); 741 dword_out(aub, 0); /* general/surface subtype */ 742 dword_out(aub, offset); 743 dword_out(aub, ring_count * 4); 744 if (aub->addr_bits > 32) 745 dword_out(aub, offset >> 32); 746 747 data_out(aub, ringbuffer, ring_count * 4); 748} 749 750void 751aub_write_exec(struct aub_file *aub, uint64_t batch_addr, 752 uint64_t offset, enum drm_i915_gem_engine_class engine_class) 753{ 754 const struct engine *cs = engine_from_engine_class(engine_class); 755 756 if (aub_use_execlists(aub)) { 757 aub_dump_ring_buffer_execlist(aub, cs, batch_addr); 758 aub_dump_execlist(aub, cs, aub->engine_setup[engine_class].descriptor); 759 } else { 760 /* Dump ring buffer */ 761 aub_dump_ring_buffer_legacy(aub, batch_addr, offset, engine_class); 762 } 763 fflush(aub->file); 764} 765 766void 767aub_write_context_execlists(struct aub_file *aub, uint64_t context_addr, 768 enum drm_i915_gem_engine_class engine_class) 769{ 770 const struct engine *cs = engine_from_engine_class(engine_class); 771 uint64_t descriptor = ((uint64_t)1 << 62 | context_addr | CONTEXT_FLAGS); 772 aub_dump_execlist(aub, cs, descriptor); 773} 774