1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file iris_batch.c 25 * 26 * Batchbuffer and command submission module. 27 * 28 * Every API draw call results in a number of GPU commands, which we 29 * collect into a "batch buffer". Typically, many draw calls are grouped 30 * into a single batch to amortize command submission overhead. 31 * 32 * We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl. 33 * One critical piece of data is the "validation list", which contains a 34 * list of the buffer objects (BOs) which the commands in the GPU need. 35 * The kernel will make sure these are resident and pinned at the correct 36 * virtual memory address before executing our batch. If a BO is not in 37 * the validation list, it effectively does not exist, so take care. 38 */ 39 40#include "iris_batch.h" 41#include "iris_bufmgr.h" 42#include "iris_context.h" 43#include "iris_fence.h" 44 45#include "drm-uapi/i915_drm.h" 46 47#include "common/intel_aux_map.h" 48#include "intel/common/intel_gem.h" 49#include "util/hash_table.h" 50#include "util/set.h" 51#include "util/u_upload_mgr.h" 52#include "main/macros.h" 53 54#include <errno.h> 55#include <xf86drm.h> 56 57#if HAVE_VALGRIND 58#include <valgrind.h> 59#include <memcheck.h> 60#define VG(x) x 61#else 62#define VG(x) 63#endif 64 65#define FILE_DEBUG_FLAG DEBUG_BUFMGR 66 67static void 68iris_batch_reset(struct iris_batch *batch); 69 70static unsigned 71num_fences(struct iris_batch *batch) 72{ 73 return util_dynarray_num_elements(&batch->exec_fences, 74 struct drm_i915_gem_exec_fence); 75} 76 77/** 78 * Debugging code to dump the fence list, used by INTEL_DEBUG=submit. 79 */ 80static void 81dump_fence_list(struct iris_batch *batch) 82{ 83 fprintf(stderr, "Fence list (length %u): ", num_fences(batch)); 84 85 util_dynarray_foreach(&batch->exec_fences, 86 struct drm_i915_gem_exec_fence, f) { 87 fprintf(stderr, "%s%u%s ", 88 (f->flags & I915_EXEC_FENCE_WAIT) ? "..." : "", 89 f->handle, 90 (f->flags & I915_EXEC_FENCE_SIGNAL) ? "!" : ""); 91 } 92 93 fprintf(stderr, "\n"); 94} 95 96/** 97 * Debugging code to dump the validation list, used by INTEL_DEBUG=submit. 98 */ 99static void 100dump_bo_list(struct iris_batch *batch) 101{ 102 fprintf(stderr, "BO list (length %d):\n", batch->exec_count); 103 104 for (int i = 0; i < batch->exec_count; i++) { 105 struct iris_bo *bo = batch->exec_bos[i]; 106 struct iris_bo *backing = iris_get_backing_bo(bo); 107 bool written = BITSET_TEST(batch->bos_written, i); 108 109 fprintf(stderr, "[%2d]: %3d (%3d) %-14s @ 0x%016"PRIx64" (%-6s %8"PRIu64"B) %2d refs %s\n", 110 i, 111 bo->gem_handle, 112 backing->gem_handle, 113 bo->name, 114 bo->address, 115 backing->real.local ? "local" : "system", 116 bo->size, 117 bo->refcount, 118 written ? "(write)" : ""); 119 } 120} 121 122/** 123 * Return BO information to the batch decoder (for debugging). 124 */ 125static struct intel_batch_decode_bo 126decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) 127{ 128 struct iris_batch *batch = v_batch; 129 130 assert(ppgtt); 131 132 for (int i = 0; i < batch->exec_count; i++) { 133 struct iris_bo *bo = batch->exec_bos[i]; 134 /* The decoder zeroes out the top 16 bits, so we need to as well */ 135 uint64_t bo_address = bo->address & (~0ull >> 16); 136 137 if (address >= bo_address && address < bo_address + bo->size) { 138 return (struct intel_batch_decode_bo) { 139 .addr = bo_address, 140 .size = bo->size, 141 .map = iris_bo_map(batch->dbg, bo, MAP_READ), 142 }; 143 } 144 } 145 146 return (struct intel_batch_decode_bo) { }; 147} 148 149static unsigned 150decode_get_state_size(void *v_batch, 151 uint64_t address, 152 UNUSED uint64_t base_address) 153{ 154 struct iris_batch *batch = v_batch; 155 unsigned size = (uintptr_t) 156 _mesa_hash_table_u64_search(batch->state_sizes, address); 157 158 return size; 159} 160 161/** 162 * Decode the current batch. 163 */ 164static void 165decode_batch(struct iris_batch *batch) 166{ 167 void *map = iris_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ); 168 intel_print_batch(&batch->decoder, map, batch->primary_batch_size, 169 batch->exec_bos[0]->address, false); 170} 171 172void 173iris_init_batch(struct iris_context *ice, 174 enum iris_batch_name name, 175 int priority) 176{ 177 struct iris_batch *batch = &ice->batches[name]; 178 struct iris_screen *screen = (void *) ice->ctx.screen; 179 180 batch->screen = screen; 181 batch->dbg = &ice->dbg; 182 batch->reset = &ice->reset; 183 batch->state_sizes = ice->state.sizes; 184 batch->name = name; 185 batch->ice = ice; 186 batch->contains_fence_signal = false; 187 188 batch->fine_fences.uploader = 189 u_upload_create(&ice->ctx, 4096, PIPE_BIND_CUSTOM, 190 PIPE_USAGE_STAGING, 0); 191 iris_fine_fence_init(batch); 192 193 batch->hw_ctx_id = iris_create_hw_context(screen->bufmgr); 194 assert(batch->hw_ctx_id); 195 196 iris_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority); 197 198 util_dynarray_init(&batch->exec_fences, ralloc_context(NULL)); 199 util_dynarray_init(&batch->syncobjs, ralloc_context(NULL)); 200 201 batch->exec_count = 0; 202 batch->max_gem_handle = 0; 203 batch->exec_array_size = 128; 204 batch->exec_bos = 205 malloc(batch->exec_array_size * sizeof(batch->exec_bos[0])); 206 batch->bos_written = 207 rzalloc_array(NULL, BITSET_WORD, BITSET_WORDS(batch->exec_array_size)); 208 209 batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 210 _mesa_key_pointer_equal); 211 212 memset(batch->other_batches, 0, sizeof(batch->other_batches)); 213 214 for (int i = 0, j = 0; i < IRIS_BATCH_COUNT; i++) { 215 if (i != name) 216 batch->other_batches[j++] = &ice->batches[i]; 217 } 218 219 if (INTEL_DEBUG(DEBUG_ANY)) { 220 const unsigned decode_flags = 221 INTEL_BATCH_DECODE_FULL | 222 (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) | 223 INTEL_BATCH_DECODE_OFFSETS | 224 INTEL_BATCH_DECODE_FLOATS; 225 226 intel_batch_decode_ctx_init(&batch->decoder, &screen->devinfo, 227 stderr, decode_flags, NULL, 228 decode_get_bo, decode_get_state_size, batch); 229 batch->decoder.dynamic_base = IRIS_MEMZONE_DYNAMIC_START; 230 batch->decoder.instruction_base = IRIS_MEMZONE_SHADER_START; 231 batch->decoder.max_vbo_decoded_lines = 32; 232 } 233 234 iris_init_batch_measure(ice, batch); 235 236 iris_batch_reset(batch); 237} 238 239static int 240find_exec_index(struct iris_batch *batch, struct iris_bo *bo) 241{ 242 unsigned index = READ_ONCE(bo->index); 243 244 if (index < batch->exec_count && batch->exec_bos[index] == bo) 245 return index; 246 247 /* May have been shared between multiple active batches */ 248 for (index = 0; index < batch->exec_count; index++) { 249 if (batch->exec_bos[index] == bo) 250 return index; 251 } 252 253 return -1; 254} 255 256static void 257ensure_exec_obj_space(struct iris_batch *batch, uint32_t count) 258{ 259 while (batch->exec_count + count > batch->exec_array_size) { 260 unsigned old_size = batch->exec_array_size; 261 262 batch->exec_array_size *= 2; 263 batch->exec_bos = 264 realloc(batch->exec_bos, 265 batch->exec_array_size * sizeof(batch->exec_bos[0])); 266 batch->bos_written = 267 rerzalloc(NULL, batch->bos_written, BITSET_WORD, 268 BITSET_WORDS(old_size), 269 BITSET_WORDS(batch->exec_array_size)); 270 } 271} 272 273static void 274add_bo_to_batch(struct iris_batch *batch, struct iris_bo *bo, bool writable) 275{ 276 assert(batch->exec_array_size > batch->exec_count); 277 278 iris_bo_reference(bo); 279 280 batch->exec_bos[batch->exec_count] = bo; 281 282 if (writable) 283 BITSET_SET(batch->bos_written, batch->exec_count); 284 285 bo->index = batch->exec_count; 286 batch->exec_count++; 287 batch->aperture_space += bo->size; 288 289 batch->max_gem_handle = 290 MAX2(batch->max_gem_handle, iris_get_backing_bo(bo)->gem_handle); 291} 292 293static void 294flush_for_cross_batch_dependencies(struct iris_batch *batch, 295 struct iris_bo *bo, 296 bool writable) 297{ 298 if (batch->measure && bo == batch->measure->bo) 299 return; 300 301 /* When a batch uses a buffer for the first time, or newly writes a buffer 302 * it had already referenced, we may need to flush other batches in order 303 * to correctly synchronize them. 304 */ 305 for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) { 306 struct iris_batch *other_batch = batch->other_batches[b]; 307 int other_index = find_exec_index(other_batch, bo); 308 309 /* If the buffer is referenced by another batch, and either batch 310 * intends to write it, then flush the other batch and synchronize. 311 * 312 * Consider these cases: 313 * 314 * 1. They read, we read => No synchronization required. 315 * 2. They read, we write => Synchronize (they need the old value) 316 * 3. They write, we read => Synchronize (we need their new value) 317 * 4. They write, we write => Synchronize (order writes) 318 * 319 * The read/read case is very common, as multiple batches usually 320 * share a streaming state buffer or shader assembly buffer, and 321 * we want to avoid synchronizing in this case. 322 */ 323 if (other_index != -1 && 324 (writable || BITSET_TEST(other_batch->bos_written, other_index))) 325 iris_batch_flush(other_batch); 326 } 327} 328 329/** 330 * Add a buffer to the current batch's validation list. 331 * 332 * You must call this on any BO you wish to use in this batch, to ensure 333 * that it's resident when the GPU commands execute. 334 */ 335void 336iris_use_pinned_bo(struct iris_batch *batch, 337 struct iris_bo *bo, 338 bool writable, enum iris_domain access) 339{ 340 assert(iris_get_backing_bo(bo)->real.kflags & EXEC_OBJECT_PINNED); 341 assert(bo != batch->bo); 342 343 /* Never mark the workaround BO with EXEC_OBJECT_WRITE. We don't care 344 * about the order of any writes to that buffer, and marking it writable 345 * would introduce data dependencies between multiple batches which share 346 * the buffer. It is added directly to the batch using add_bo_to_batch() 347 * during batch reset time. 348 */ 349 if (bo == batch->screen->workaround_bo) 350 return; 351 352 if (access < NUM_IRIS_DOMAINS) { 353 assert(batch->sync_region_depth); 354 iris_bo_bump_seqno(bo, batch->next_seqno, access); 355 } 356 357 int existing_index = find_exec_index(batch, bo); 358 359 if (existing_index == -1) { 360 flush_for_cross_batch_dependencies(batch, bo, writable); 361 362 ensure_exec_obj_space(batch, 1); 363 add_bo_to_batch(batch, bo, writable); 364 } else if (writable && !BITSET_TEST(batch->bos_written, existing_index)) { 365 flush_for_cross_batch_dependencies(batch, bo, writable); 366 367 /* The BO is already in the list; mark it writable */ 368 BITSET_SET(batch->bos_written, existing_index); 369 } 370} 371 372static void 373create_batch(struct iris_batch *batch) 374{ 375 struct iris_screen *screen = batch->screen; 376 struct iris_bufmgr *bufmgr = screen->bufmgr; 377 378 /* TODO: We probably could suballocate batches... */ 379 batch->bo = iris_bo_alloc(bufmgr, "command buffer", 380 BATCH_SZ + BATCH_RESERVED, 1, 381 IRIS_MEMZONE_OTHER, BO_ALLOC_NO_SUBALLOC); 382 iris_get_backing_bo(batch->bo)->real.kflags |= EXEC_OBJECT_CAPTURE; 383 batch->map = iris_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE); 384 batch->map_next = batch->map; 385 386 ensure_exec_obj_space(batch, 1); 387 add_bo_to_batch(batch, batch->bo, false); 388} 389 390static void 391iris_batch_maybe_noop(struct iris_batch *batch) 392{ 393 /* We only insert the NOOP at the beginning of the batch. */ 394 assert(iris_batch_bytes_used(batch) == 0); 395 396 if (batch->noop_enabled) { 397 /* Emit MI_BATCH_BUFFER_END to prevent any further command to be 398 * executed. 399 */ 400 uint32_t *map = batch->map_next; 401 402 map[0] = (0xA << 23); 403 404 batch->map_next += 4; 405 } 406} 407 408static void 409iris_batch_reset(struct iris_batch *batch) 410{ 411 struct iris_screen *screen = batch->screen; 412 struct iris_bufmgr *bufmgr = screen->bufmgr; 413 414 iris_bo_unreference(batch->bo); 415 batch->primary_batch_size = 0; 416 batch->total_chained_batch_size = 0; 417 batch->contains_draw = false; 418 batch->contains_fence_signal = false; 419 batch->decoder.surface_base = batch->last_surface_base_address; 420 421 create_batch(batch); 422 assert(batch->bo->index == 0); 423 424 memset(batch->bos_written, 0, 425 sizeof(BITSET_WORD) * BITSET_WORDS(batch->exec_array_size)); 426 427 struct iris_syncobj *syncobj = iris_create_syncobj(bufmgr); 428 iris_batch_add_syncobj(batch, syncobj, I915_EXEC_FENCE_SIGNAL); 429 iris_syncobj_reference(bufmgr, &syncobj, NULL); 430 431 assert(!batch->sync_region_depth); 432 iris_batch_sync_boundary(batch); 433 iris_batch_mark_reset_sync(batch); 434 435 /* Always add the workaround BO, it contains a driver identifier at the 436 * beginning quite helpful to debug error states. 437 */ 438 add_bo_to_batch(batch, screen->workaround_bo, false); 439 440 iris_batch_maybe_noop(batch); 441} 442 443void 444iris_batch_free(struct iris_batch *batch) 445{ 446 struct iris_screen *screen = batch->screen; 447 struct iris_bufmgr *bufmgr = screen->bufmgr; 448 449 for (int i = 0; i < batch->exec_count; i++) { 450 iris_bo_unreference(batch->exec_bos[i]); 451 } 452 free(batch->exec_bos); 453 ralloc_free(batch->bos_written); 454 455 ralloc_free(batch->exec_fences.mem_ctx); 456 457 pipe_resource_reference(&batch->fine_fences.ref.res, NULL); 458 459 util_dynarray_foreach(&batch->syncobjs, struct iris_syncobj *, s) 460 iris_syncobj_reference(bufmgr, s, NULL); 461 ralloc_free(batch->syncobjs.mem_ctx); 462 463 iris_fine_fence_reference(batch->screen, &batch->last_fence, NULL); 464 u_upload_destroy(batch->fine_fences.uploader); 465 466 iris_bo_unreference(batch->bo); 467 batch->bo = NULL; 468 batch->map = NULL; 469 batch->map_next = NULL; 470 471 iris_destroy_hw_context(bufmgr, batch->hw_ctx_id); 472 473 iris_destroy_batch_measure(batch->measure); 474 batch->measure = NULL; 475 476 _mesa_hash_table_destroy(batch->cache.render, NULL); 477 478 if (INTEL_DEBUG(DEBUG_ANY)) 479 intel_batch_decode_ctx_finish(&batch->decoder); 480} 481 482/** 483 * If we've chained to a secondary batch, or are getting near to the end, 484 * then flush. This should only be called between draws. 485 */ 486void 487iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate) 488{ 489 if (batch->bo != batch->exec_bos[0] || 490 iris_batch_bytes_used(batch) + estimate >= BATCH_SZ) { 491 iris_batch_flush(batch); 492 } 493} 494 495static void 496record_batch_sizes(struct iris_batch *batch) 497{ 498 unsigned batch_size = iris_batch_bytes_used(batch); 499 500 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->map, batch_size)); 501 502 if (batch->bo == batch->exec_bos[0]) 503 batch->primary_batch_size = batch_size; 504 505 batch->total_chained_batch_size += batch_size; 506} 507 508void 509iris_chain_to_new_batch(struct iris_batch *batch) 510{ 511 uint32_t *cmd = batch->map_next; 512 uint64_t *addr = batch->map_next + 4; 513 batch->map_next += 12; 514 515 record_batch_sizes(batch); 516 517 /* No longer held by batch->bo, still held by validation list */ 518 iris_bo_unreference(batch->bo); 519 create_batch(batch); 520 521 /* Emit MI_BATCH_BUFFER_START to chain to another batch. */ 522 *cmd = (0x31 << 23) | (1 << 8) | (3 - 2); 523 *addr = batch->bo->address; 524} 525 526static void 527add_aux_map_bos_to_batch(struct iris_batch *batch) 528{ 529 void *aux_map_ctx = iris_bufmgr_get_aux_map_context(batch->screen->bufmgr); 530 if (!aux_map_ctx) 531 return; 532 533 uint32_t count = intel_aux_map_get_num_buffers(aux_map_ctx); 534 ensure_exec_obj_space(batch, count); 535 intel_aux_map_fill_bos(aux_map_ctx, 536 (void**)&batch->exec_bos[batch->exec_count], count); 537 for (uint32_t i = 0; i < count; i++) { 538 struct iris_bo *bo = batch->exec_bos[batch->exec_count]; 539 add_bo_to_batch(batch, bo, false); 540 } 541} 542 543static void 544finish_seqno(struct iris_batch *batch) 545{ 546 struct iris_fine_fence *sq = iris_fine_fence_new(batch, IRIS_FENCE_END); 547 if (!sq) 548 return; 549 550 iris_fine_fence_reference(batch->screen, &batch->last_fence, sq); 551 iris_fine_fence_reference(batch->screen, &sq, NULL); 552} 553 554/** 555 * Terminate a batch with MI_BATCH_BUFFER_END. 556 */ 557static void 558iris_finish_batch(struct iris_batch *batch) 559{ 560 const struct intel_device_info *devinfo = &batch->screen->devinfo; 561 562 if (devinfo->ver == 12 && batch->name == IRIS_BATCH_RENDER) { 563 /* We re-emit constants at the beginning of every batch as a hardware 564 * bug workaround, so invalidate indirect state pointers in order to 565 * save ourselves the overhead of restoring constants redundantly when 566 * the next render batch is executed. 567 */ 568 iris_emit_pipe_control_flush(batch, "ISP invalidate at batch end", 569 PIPE_CONTROL_INDIRECT_STATE_POINTERS_DISABLE | 570 PIPE_CONTROL_STALL_AT_SCOREBOARD | 571 PIPE_CONTROL_CS_STALL); 572 } 573 574 add_aux_map_bos_to_batch(batch); 575 576 finish_seqno(batch); 577 578 /* Emit MI_BATCH_BUFFER_END to finish our batch. */ 579 uint32_t *map = batch->map_next; 580 581 map[0] = (0xA << 23); 582 583 batch->map_next += 4; 584 585 record_batch_sizes(batch); 586} 587 588/** 589 * Replace our current GEM context with a new one (in case it got banned). 590 */ 591static bool 592replace_hw_ctx(struct iris_batch *batch) 593{ 594 struct iris_screen *screen = batch->screen; 595 struct iris_bufmgr *bufmgr = screen->bufmgr; 596 597 uint32_t new_ctx = iris_clone_hw_context(bufmgr, batch->hw_ctx_id); 598 if (!new_ctx) 599 return false; 600 601 iris_destroy_hw_context(bufmgr, batch->hw_ctx_id); 602 batch->hw_ctx_id = new_ctx; 603 604 /* Notify the context that state must be re-initialized. */ 605 iris_lost_context_state(batch); 606 607 return true; 608} 609 610enum pipe_reset_status 611iris_batch_check_for_reset(struct iris_batch *batch) 612{ 613 struct iris_screen *screen = batch->screen; 614 enum pipe_reset_status status = PIPE_NO_RESET; 615 struct drm_i915_reset_stats stats = { .ctx_id = batch->hw_ctx_id }; 616 617 if (intel_ioctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats)) 618 DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno)); 619 620 if (stats.batch_active != 0) { 621 /* A reset was observed while a batch from this hardware context was 622 * executing. Assume that this context was at fault. 623 */ 624 status = PIPE_GUILTY_CONTEXT_RESET; 625 } else if (stats.batch_pending != 0) { 626 /* A reset was observed while a batch from this context was in progress, 627 * but the batch was not executing. In this case, assume that the 628 * context was not at fault. 629 */ 630 status = PIPE_INNOCENT_CONTEXT_RESET; 631 } 632 633 if (status != PIPE_NO_RESET) { 634 /* Our context is likely banned, or at least in an unknown state. 635 * Throw it away and start with a fresh context. Ideally this may 636 * catch the problem before our next execbuf fails with -EIO. 637 */ 638 replace_hw_ctx(batch); 639 } 640 641 return status; 642} 643 644static void 645move_syncobj_to_batch(struct iris_batch *batch, 646 struct iris_syncobj **p_syncobj, 647 unsigned flags) 648{ 649 struct iris_bufmgr *bufmgr = batch->screen->bufmgr; 650 651 if (!*p_syncobj) 652 return; 653 654 bool found = false; 655 util_dynarray_foreach(&batch->syncobjs, struct iris_syncobj *, s) { 656 if (*p_syncobj == *s) { 657 found = true; 658 break; 659 } 660 } 661 662 if (!found) 663 iris_batch_add_syncobj(batch, *p_syncobj, flags); 664 665 iris_syncobj_reference(bufmgr, p_syncobj, NULL); 666} 667 668static void 669update_bo_syncobjs(struct iris_batch *batch, struct iris_bo *bo, bool write) 670{ 671 struct iris_screen *screen = batch->screen; 672 struct iris_bufmgr *bufmgr = screen->bufmgr; 673 674 /* Make sure bo->deps is big enough */ 675 if (screen->id >= bo->deps_size) { 676 int new_size = screen->id + 1; 677 bo->deps= realloc(bo->deps, new_size * sizeof(bo->deps[0])); 678 memset(&bo->deps[bo->deps_size], 0, 679 sizeof(bo->deps[0]) * (new_size - bo->deps_size)); 680 681 bo->deps_size = new_size; 682 } 683 684 /* When it comes to execbuf submission of non-shared buffers, we only need 685 * to care about the reads and writes done by the other batches of our own 686 * screen, and we also don't care about the reads and writes done by our 687 * own batch, although we need to track them. Just note that other places of 688 * our code may need to care about all the operations done by every batch 689 * on every screen. 690 */ 691 struct iris_bo_screen_deps *deps = &bo->deps[screen->id]; 692 int batch_idx = batch->name; 693 694#if IRIS_BATCH_COUNT == 2 695 /* Due to the above, we exploit the fact that IRIS_NUM_BATCHES is actually 696 * 2, which means there's only one other batch we need to care about. 697 */ 698 int other_batch_idx = 1 - batch_idx; 699#else 700 /* For IRIS_BATCH_COUNT == 3 we can do: 701 * int other_batch_idxs[IRIS_BATCH_COUNT - 1] = { 702 * (batch_idx ^ 1) & 1, 703 * (batch_idx ^ 2) & 2, 704 * }; 705 * For IRIS_BATCH_COUNT == 4 we can do: 706 * int other_batch_idxs[IRIS_BATCH_COUNT - 1] = { 707 * (batch_idx + 1) & 3, 708 * (batch_idx + 2) & 3, 709 * (batch_idx + 3) & 3, 710 * }; 711 */ 712#error "Implement me." 713#endif 714 715 /* If it is being written to by others, wait on it. */ 716 if (deps->write_syncobjs[other_batch_idx]) 717 move_syncobj_to_batch(batch, &deps->write_syncobjs[other_batch_idx], 718 I915_EXEC_FENCE_WAIT); 719 720 /* If it's being written by our screen, wait on it too. This is relevant 721 * when there are multiple contexts on the same screen. */ 722 if (deps->write_syncobjs[batch_idx]) 723 move_syncobj_to_batch(batch, &deps->write_syncobjs[batch_idx], 724 I915_EXEC_FENCE_WAIT); 725 726 struct iris_syncobj *batch_syncobj = iris_batch_get_signal_syncobj(batch); 727 728 if (write) { 729 /* If we're writing to it, set our batch's syncobj as write_syncobj so 730 * others can wait on us. Also wait every reader we care about before 731 * writing. 732 */ 733 iris_syncobj_reference(bufmgr, &deps->write_syncobjs[batch_idx], 734 batch_syncobj); 735 736 move_syncobj_to_batch(batch, &deps->read_syncobjs[other_batch_idx], 737 I915_EXEC_FENCE_WAIT); 738 move_syncobj_to_batch(batch, &deps->read_syncobjs[batch_idx], 739 I915_EXEC_FENCE_WAIT); 740 741 } else { 742 /* If we're reading, replace the other read from our batch index. */ 743 iris_syncobj_reference(bufmgr, &deps->read_syncobjs[batch_idx], 744 batch_syncobj); 745 } 746} 747 748static void 749update_batch_syncobjs(struct iris_batch *batch) 750{ 751 struct iris_bufmgr *bufmgr = batch->screen->bufmgr; 752 simple_mtx_t *bo_deps_lock = iris_bufmgr_get_bo_deps_lock(bufmgr); 753 754 simple_mtx_lock(bo_deps_lock); 755 756 for (int i = 0; i < batch->exec_count; i++) { 757 struct iris_bo *bo = batch->exec_bos[i]; 758 bool write = BITSET_TEST(batch->bos_written, i); 759 760 if (bo == batch->screen->workaround_bo) 761 continue; 762 763 update_bo_syncobjs(batch, bo, write); 764 } 765 simple_mtx_unlock(bo_deps_lock); 766} 767 768/** 769 * Submit the batch to the GPU via execbuffer2. 770 */ 771static int 772submit_batch(struct iris_batch *batch) 773{ 774 iris_bo_unmap(batch->bo); 775 776 struct drm_i915_gem_exec_object2 *validation_list = 777 malloc(batch->exec_count * sizeof(*validation_list)); 778 779 unsigned *index_for_handle = 780 calloc(batch->max_gem_handle + 1, sizeof(unsigned)); 781 782 unsigned validation_count = 0; 783 for (int i = 0; i < batch->exec_count; i++) { 784 struct iris_bo *bo = iris_get_backing_bo(batch->exec_bos[i]); 785 assert(bo->gem_handle != 0); 786 787 bool written = BITSET_TEST(batch->bos_written, i); 788 unsigned prev_index = index_for_handle[bo->gem_handle]; 789 if (prev_index > 0) { 790 if (written) 791 validation_list[prev_index].flags |= EXEC_OBJECT_WRITE; 792 } else { 793 index_for_handle[bo->gem_handle] = validation_count; 794 validation_list[validation_count] = 795 (struct drm_i915_gem_exec_object2) { 796 .handle = bo->gem_handle, 797 .offset = bo->address, 798 .flags = bo->real.kflags | (written ? EXEC_OBJECT_WRITE : 0) | 799 (iris_bo_is_external(bo) ? 0 : EXEC_OBJECT_ASYNC), 800 }; 801 ++validation_count; 802 } 803 } 804 805 free(index_for_handle); 806 807 if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT)) { 808 dump_fence_list(batch); 809 dump_bo_list(batch); 810 } 811 812 if (INTEL_DEBUG(DEBUG_BATCH)) { 813 decode_batch(batch); 814 } 815 816 /* The requirement for using I915_EXEC_NO_RELOC are: 817 * 818 * The addresses written in the objects must match the corresponding 819 * reloc.address which in turn must match the corresponding 820 * execobject.offset. 821 * 822 * Any render targets written to in the batch must be flagged with 823 * EXEC_OBJECT_WRITE. 824 * 825 * To avoid stalling, execobject.offset should match the current 826 * address of that object within the active context. 827 */ 828 struct drm_i915_gem_execbuffer2 execbuf = { 829 .buffers_ptr = (uintptr_t) validation_list, 830 .buffer_count = validation_count, 831 .batch_start_offset = 0, 832 /* This must be QWord aligned. */ 833 .batch_len = ALIGN(batch->primary_batch_size, 8), 834 .flags = I915_EXEC_RENDER | 835 I915_EXEC_NO_RELOC | 836 I915_EXEC_BATCH_FIRST | 837 I915_EXEC_HANDLE_LUT, 838 .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */ 839 }; 840 841 if (num_fences(batch)) { 842 execbuf.flags |= I915_EXEC_FENCE_ARRAY; 843 execbuf.num_cliprects = num_fences(batch); 844 execbuf.cliprects_ptr = 845 (uintptr_t)util_dynarray_begin(&batch->exec_fences); 846 } 847 848 int ret = 0; 849 if (!batch->screen->devinfo.no_hw && 850 intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf)) 851 ret = -errno; 852 853 for (int i = 0; i < batch->exec_count; i++) { 854 struct iris_bo *bo = batch->exec_bos[i]; 855 856 bo->idle = false; 857 bo->index = -1; 858 859 iris_get_backing_bo(bo)->idle = false; 860 861 iris_bo_unreference(bo); 862 } 863 864 free(validation_list); 865 866 return ret; 867} 868 869static const char * 870batch_name_to_string(enum iris_batch_name name) 871{ 872 const char *names[IRIS_BATCH_COUNT] = { 873 [IRIS_BATCH_RENDER] = "render", 874 [IRIS_BATCH_COMPUTE] = "compute", 875 }; 876 return names[name]; 877} 878 879/** 880 * Flush the batch buffer, submitting it to the GPU and resetting it so 881 * we're ready to emit the next batch. 882 */ 883void 884_iris_batch_flush(struct iris_batch *batch, const char *file, int line) 885{ 886 struct iris_screen *screen = batch->screen; 887 888 /* If a fence signals we need to flush it. */ 889 if (iris_batch_bytes_used(batch) == 0 && !batch->contains_fence_signal) 890 return; 891 892 iris_measure_batch_end(batch->ice, batch); 893 894 iris_finish_batch(batch); 895 896 update_batch_syncobjs(batch); 897 898 if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL)) { 899 const char *basefile = strstr(file, "iris/"); 900 if (basefile) 901 file = basefile + 5; 902 903 fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5db (%0.1f%%) " 904 "(cmds), %4d BOs (%0.1fMb aperture)\n", 905 file, line, batch_name_to_string(batch->name), batch->hw_ctx_id, 906 batch->total_chained_batch_size, 907 100.0f * batch->total_chained_batch_size / BATCH_SZ, 908 batch->exec_count, 909 (float) batch->aperture_space / (1024 * 1024)); 910 911 } 912 913 int ret = submit_batch(batch); 914 915 /* When batch submission fails, our end-of-batch syncobj remains 916 * unsignalled, and in fact is not even considered submitted. 917 * 918 * In the hang recovery case (-EIO) or -ENOMEM, we recreate our context and 919 * attempt to carry on. In that case, we need to signal our syncobj, 920 * dubiously claiming that this batch completed, because future batches may 921 * depend on it. If we don't, then execbuf would fail with -EINVAL for 922 * those batches, because they depend on a syncobj that's considered to be 923 * "never submitted". This would lead to an abort(). So here, we signal 924 * the failing batch's syncobj to try and allow further progress to be 925 * made, knowing we may have broken our dependency tracking. 926 */ 927 if (ret < 0) 928 iris_syncobj_signal(screen->bufmgr, iris_batch_get_signal_syncobj(batch)); 929 930 batch->exec_count = 0; 931 batch->max_gem_handle = 0; 932 batch->aperture_space = 0; 933 934 util_dynarray_foreach(&batch->syncobjs, struct iris_syncobj *, s) 935 iris_syncobj_reference(screen->bufmgr, s, NULL); 936 util_dynarray_clear(&batch->syncobjs); 937 938 util_dynarray_clear(&batch->exec_fences); 939 940 if (INTEL_DEBUG(DEBUG_SYNC)) { 941 dbg_printf("waiting for idle\n"); 942 iris_bo_wait_rendering(batch->bo); /* if execbuf failed; this is a nop */ 943 } 944 945 /* Start a new batch buffer. */ 946 iris_batch_reset(batch); 947 948 /* EIO means our context is banned. In this case, try and replace it 949 * with a new logical context, and inform iris_context that all state 950 * has been lost and needs to be re-initialized. If this succeeds, 951 * dubiously claim success... 952 * Also handle ENOMEM here. 953 */ 954 if ((ret == -EIO || ret == -ENOMEM) && replace_hw_ctx(batch)) { 955 if (batch->reset->reset) { 956 /* Tell gallium frontends the device is lost and it was our fault. */ 957 batch->reset->reset(batch->reset->data, PIPE_GUILTY_CONTEXT_RESET); 958 } 959 960 ret = 0; 961 } 962 963 if (ret < 0) { 964#ifdef DEBUG 965 const bool color = INTEL_DEBUG(DEBUG_COLOR); 966 fprintf(stderr, "%siris: Failed to submit batchbuffer: %-80s%s\n", 967 color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : ""); 968#endif 969 abort(); 970 } 971} 972 973/** 974 * Does the current batch refer to the given BO? 975 * 976 * (In other words, is the BO in the current batch's validation list?) 977 */ 978bool 979iris_batch_references(struct iris_batch *batch, struct iris_bo *bo) 980{ 981 return find_exec_index(batch, bo) != -1; 982} 983 984/** 985 * Updates the state of the noop feature. Returns true if there was a noop 986 * transition that led to state invalidation. 987 */ 988bool 989iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable) 990{ 991 if (batch->noop_enabled == noop_enable) 992 return 0; 993 994 batch->noop_enabled = noop_enable; 995 996 iris_batch_flush(batch); 997 998 /* If the batch was empty, flush had no effect, so insert our noop. */ 999 if (iris_batch_bytes_used(batch) == 0) 1000 iris_batch_maybe_noop(batch); 1001 1002 /* We only need to update the entire state if we transition from noop -> 1003 * not-noop. 1004 */ 1005 return !batch->noop_enabled; 1006} 1007