1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file iris_batch.c 25 * 26 * Batchbuffer and command submission module. 27 * 28 * Every API draw call results in a number of GPU commands, which we 29 * collect into a "batch buffer". Typically, many draw calls are grouped 30 * into a single batch to amortize command submission overhead. 31 * 32 * We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl. 33 * One critical piece of data is the "validation list", which contains a 34 * list of the buffer objects (BOs) which the commands in the GPU need. 35 * The kernel will make sure these are resident and pinned at the correct 36 * virtual memory address before executing our batch. If a BO is not in 37 * the validation list, it effectively does not exist, so take care. 38 */ 39 40#include "iris_batch.h" 41#include "iris_bufmgr.h" 42#include "iris_context.h" 43#include "iris_fence.h" 44 45#include "drm-uapi/i915_drm.h" 46 47#include "util/hash_table.h" 48#include "util/set.h" 49#include "main/macros.h" 50 51#include <errno.h> 52#include <xf86drm.h> 53 54#if HAVE_VALGRIND 55#include <valgrind.h> 56#include <memcheck.h> 57#define VG(x) x 58#else 59#define VG(x) 60#endif 61 62#define FILE_DEBUG_FLAG DEBUG_BUFMGR 63 64/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END 65 * or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus, we may 66 * need an extra 4 bytes to pad out to the nearest QWord. So reserve 16. 67 */ 68#define BATCH_RESERVED 16 69 70static void 71iris_batch_reset(struct iris_batch *batch); 72 73static unsigned 74num_fences(struct iris_batch *batch) 75{ 76 return util_dynarray_num_elements(&batch->exec_fences, 77 struct drm_i915_gem_exec_fence); 78} 79 80/** 81 * Debugging code to dump the fence list, used by INTEL_DEBUG=submit. 82 */ 83static void 84dump_fence_list(struct iris_batch *batch) 85{ 86 fprintf(stderr, "Fence list (length %u): ", num_fences(batch)); 87 88 util_dynarray_foreach(&batch->exec_fences, 89 struct drm_i915_gem_exec_fence, f) { 90 fprintf(stderr, "%s%u%s ", 91 (f->flags & I915_EXEC_FENCE_WAIT) ? "..." : "", 92 f->handle, 93 (f->flags & I915_EXEC_FENCE_SIGNAL) ? "!" : ""); 94 } 95 96 fprintf(stderr, "\n"); 97} 98 99/** 100 * Debugging code to dump the validation list, used by INTEL_DEBUG=submit. 101 */ 102static void 103dump_validation_list(struct iris_batch *batch) 104{ 105 fprintf(stderr, "Validation list (length %d):\n", batch->exec_count); 106 107 for (int i = 0; i < batch->exec_count; i++) { 108 uint64_t flags = batch->validation_list[i].flags; 109 assert(batch->validation_list[i].handle == 110 batch->exec_bos[i]->gem_handle); 111 fprintf(stderr, "[%2d]: %2d %-14s @ 0x%016llx (%"PRIu64"B)\t %2d refs %s\n", 112 i, 113 batch->validation_list[i].handle, 114 batch->exec_bos[i]->name, 115 batch->validation_list[i].offset, 116 batch->exec_bos[i]->size, 117 batch->exec_bos[i]->refcount, 118 (flags & EXEC_OBJECT_WRITE) ? " (write)" : ""); 119 } 120} 121 122/** 123 * Return BO information to the batch decoder (for debugging). 124 */ 125static struct gen_batch_decode_bo 126decode_get_bo(void *v_batch, bool ppgtt, uint64_t address) 127{ 128 struct iris_batch *batch = v_batch; 129 130 assert(ppgtt); 131 132 for (int i = 0; i < batch->exec_count; i++) { 133 struct iris_bo *bo = batch->exec_bos[i]; 134 /* The decoder zeroes out the top 16 bits, so we need to as well */ 135 uint64_t bo_address = bo->gtt_offset & (~0ull >> 16); 136 137 if (address >= bo_address && address < bo_address + bo->size) { 138 return (struct gen_batch_decode_bo) { 139 .addr = address, 140 .size = bo->size, 141 .map = iris_bo_map(batch->dbg, bo, MAP_READ) + 142 (address - bo_address), 143 }; 144 } 145 } 146 147 return (struct gen_batch_decode_bo) { }; 148} 149 150/** 151 * Decode the current batch. 152 */ 153static void 154decode_batch(struct iris_batch *batch) 155{ 156 void *map = iris_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ); 157 gen_print_batch(&batch->decoder, map, batch->primary_batch_size, 158 batch->exec_bos[0]->gtt_offset, false); 159} 160 161void 162iris_init_batch(struct iris_batch *batch, 163 struct iris_screen *screen, 164 struct iris_vtable *vtbl, 165 struct pipe_debug_callback *dbg, 166 struct iris_batch *all_batches, 167 enum iris_batch_name name, 168 uint8_t engine, 169 int priority) 170{ 171 batch->screen = screen; 172 batch->vtbl = vtbl; 173 batch->dbg = dbg; 174 batch->name = name; 175 176 /* engine should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */ 177 assert((engine & ~I915_EXEC_RING_MASK) == 0); 178 assert(util_bitcount(engine) == 1); 179 batch->engine = engine; 180 181 batch->hw_ctx_id = iris_create_hw_context(screen->bufmgr); 182 assert(batch->hw_ctx_id); 183 184 iris_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority); 185 186 util_dynarray_init(&batch->exec_fences, ralloc_context(NULL)); 187 util_dynarray_init(&batch->syncpts, ralloc_context(NULL)); 188 189 batch->exec_count = 0; 190 batch->exec_array_size = 100; 191 batch->exec_bos = 192 malloc(batch->exec_array_size * sizeof(batch->exec_bos[0])); 193 batch->validation_list = 194 malloc(batch->exec_array_size * sizeof(batch->validation_list[0])); 195 196 batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 197 _mesa_key_pointer_equal); 198 batch->cache.depth = _mesa_set_create(NULL, _mesa_hash_pointer, 199 _mesa_key_pointer_equal); 200 201 memset(batch->other_batches, 0, sizeof(batch->other_batches)); 202 203 for (int i = 0, j = 0; i < IRIS_BATCH_COUNT; i++) { 204 if (&all_batches[i] != batch) 205 batch->other_batches[j++] = &all_batches[i]; 206 } 207 208 if (unlikely(INTEL_DEBUG)) { 209 const unsigned decode_flags = 210 GEN_BATCH_DECODE_FULL | 211 ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) | 212 GEN_BATCH_DECODE_OFFSETS | 213 GEN_BATCH_DECODE_FLOATS; 214 215 /* TODO: track state size so we can print the right # of entries */ 216 gen_batch_decode_ctx_init(&batch->decoder, &screen->devinfo, 217 stderr, decode_flags, NULL, 218 decode_get_bo, NULL, batch); 219 batch->decoder.max_vbo_decoded_lines = 32; 220 } 221 222 iris_batch_reset(batch); 223} 224 225static struct drm_i915_gem_exec_object2 * 226find_validation_entry(struct iris_batch *batch, struct iris_bo *bo) 227{ 228 unsigned index = READ_ONCE(bo->index); 229 230 if (index < batch->exec_count && batch->exec_bos[index] == bo) 231 return &batch->validation_list[index]; 232 233 /* May have been shared between multiple active batches */ 234 for (index = 0; index < batch->exec_count; index++) { 235 if (batch->exec_bos[index] == bo) 236 return &batch->validation_list[index]; 237 } 238 239 return NULL; 240} 241 242/** 243 * Add a buffer to the current batch's validation list. 244 * 245 * You must call this on any BO you wish to use in this batch, to ensure 246 * that it's resident when the GPU commands execute. 247 */ 248void 249iris_use_pinned_bo(struct iris_batch *batch, 250 struct iris_bo *bo, 251 bool writable) 252{ 253 assert(bo->kflags & EXEC_OBJECT_PINNED); 254 255 /* Never mark the workaround BO with EXEC_OBJECT_WRITE. We don't care 256 * about the order of any writes to that buffer, and marking it writable 257 * would introduce data dependencies between multiple batches which share 258 * the buffer. 259 */ 260 if (bo == batch->screen->workaround_bo) 261 writable = false; 262 263 struct drm_i915_gem_exec_object2 *existing_entry = 264 find_validation_entry(batch, bo); 265 266 if (existing_entry) { 267 /* The BO is already in the validation list; mark it writable */ 268 if (writable) 269 existing_entry->flags |= EXEC_OBJECT_WRITE; 270 271 return; 272 } 273 274 if (bo != batch->bo) { 275 /* This is the first time our batch has seen this BO. Before we use it, 276 * we may need to flush and synchronize with other batches. 277 */ 278 for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) { 279 struct drm_i915_gem_exec_object2 *other_entry = 280 find_validation_entry(batch->other_batches[b], bo); 281 282 /* If the buffer is referenced by another batch, and either batch 283 * intends to write it, then flush the other batch and synchronize. 284 * 285 * Consider these cases: 286 * 287 * 1. They read, we read => No synchronization required. 288 * 2. They read, we write => Synchronize (they need the old value) 289 * 3. They write, we read => Synchronize (we need their new value) 290 * 4. They write, we write => Synchronize (order writes) 291 * 292 * The read/read case is very common, as multiple batches usually 293 * share a streaming state buffer or shader assembly buffer, and 294 * we want to avoid synchronizing in this case. 295 */ 296 if (other_entry && 297 ((other_entry->flags & EXEC_OBJECT_WRITE) || writable)) { 298 iris_batch_flush(batch->other_batches[b]); 299 iris_batch_add_syncpt(batch, batch->other_batches[b]->last_syncpt, 300 I915_EXEC_FENCE_WAIT); 301 } 302 } 303 } 304 305 /* Now, take a reference and add it to the validation list. */ 306 iris_bo_reference(bo); 307 308 if (batch->exec_count == batch->exec_array_size) { 309 batch->exec_array_size *= 2; 310 batch->exec_bos = 311 realloc(batch->exec_bos, 312 batch->exec_array_size * sizeof(batch->exec_bos[0])); 313 batch->validation_list = 314 realloc(batch->validation_list, 315 batch->exec_array_size * sizeof(batch->validation_list[0])); 316 } 317 318 batch->validation_list[batch->exec_count] = 319 (struct drm_i915_gem_exec_object2) { 320 .handle = bo->gem_handle, 321 .offset = bo->gtt_offset, 322 .flags = bo->kflags | (writable ? EXEC_OBJECT_WRITE : 0), 323 }; 324 325 bo->index = batch->exec_count; 326 batch->exec_bos[batch->exec_count] = bo; 327 batch->aperture_space += bo->size; 328 329 batch->exec_count++; 330} 331 332static void 333create_batch(struct iris_batch *batch) 334{ 335 struct iris_screen *screen = batch->screen; 336 struct iris_bufmgr *bufmgr = screen->bufmgr; 337 338 batch->bo = iris_bo_alloc(bufmgr, "command buffer", 339 BATCH_SZ + BATCH_RESERVED, IRIS_MEMZONE_OTHER); 340 batch->bo->kflags |= EXEC_OBJECT_CAPTURE; 341 batch->map = iris_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE); 342 batch->map_next = batch->map; 343 344 iris_use_pinned_bo(batch, batch->bo, false); 345} 346 347static void 348iris_batch_reset(struct iris_batch *batch) 349{ 350 struct iris_screen *screen = batch->screen; 351 352 iris_bo_unreference(batch->bo); 353 batch->primary_batch_size = 0; 354 batch->contains_draw = false; 355 356 create_batch(batch); 357 assert(batch->bo->index == 0); 358 359 struct iris_syncpt *syncpt = iris_create_syncpt(screen); 360 iris_batch_add_syncpt(batch, syncpt, I915_EXEC_FENCE_SIGNAL); 361 iris_syncpt_reference(screen, &syncpt, NULL); 362 363 iris_cache_sets_clear(batch); 364} 365 366void 367iris_batch_free(struct iris_batch *batch) 368{ 369 struct iris_screen *screen = batch->screen; 370 struct iris_bufmgr *bufmgr = screen->bufmgr; 371 372 for (int i = 0; i < batch->exec_count; i++) { 373 iris_bo_unreference(batch->exec_bos[i]); 374 } 375 free(batch->exec_bos); 376 free(batch->validation_list); 377 378 ralloc_free(batch->exec_fences.mem_ctx); 379 380 util_dynarray_foreach(&batch->syncpts, struct iris_syncpt *, s) 381 iris_syncpt_reference(screen, s, NULL); 382 ralloc_free(batch->syncpts.mem_ctx); 383 384 iris_syncpt_reference(screen, &batch->last_syncpt, NULL); 385 386 iris_bo_unreference(batch->bo); 387 batch->bo = NULL; 388 batch->map = NULL; 389 batch->map_next = NULL; 390 391 iris_destroy_hw_context(bufmgr, batch->hw_ctx_id); 392 393 _mesa_hash_table_destroy(batch->cache.render, NULL); 394 _mesa_set_destroy(batch->cache.depth, NULL); 395 396 if (unlikely(INTEL_DEBUG)) 397 gen_batch_decode_ctx_finish(&batch->decoder); 398} 399 400/** 401 * If we've chained to a secondary batch, or are getting near to the end, 402 * then flush. This should only be called between draws. 403 */ 404void 405iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate) 406{ 407 if (batch->bo != batch->exec_bos[0] || 408 iris_batch_bytes_used(batch) + estimate >= BATCH_SZ) { 409 iris_batch_flush(batch); 410 } 411} 412 413void 414iris_chain_to_new_batch(struct iris_batch *batch) 415{ 416 /* We only support chaining a single time. */ 417 assert(batch->bo == batch->exec_bos[0]); 418 419 VG(void *map = batch->map); 420 uint32_t *cmd = batch->map_next; 421 uint64_t *addr = batch->map_next + 4; 422 batch->map_next += 12; 423 424 /* No longer held by batch->bo, still held by validation list */ 425 iris_bo_unreference(batch->bo); 426 batch->primary_batch_size = iris_batch_bytes_used(batch); 427 create_batch(batch); 428 429 /* Emit MI_BATCH_BUFFER_START to chain to another batch. */ 430 *cmd = (0x31 << 23) | (1 << 8) | (3 - 2); 431 *addr = batch->bo->gtt_offset; 432 433 VG(VALGRIND_CHECK_MEM_IS_DEFINED(map, batch->primary_batch_size)); 434} 435 436/** 437 * Terminate a batch with MI_BATCH_BUFFER_END. 438 */ 439static void 440iris_finish_batch(struct iris_batch *batch) 441{ 442 /* Emit MI_BATCH_BUFFER_END to finish our batch. */ 443 uint32_t *map = batch->map_next; 444 445 map[0] = (0xA << 23); 446 447 batch->map_next += 4; 448 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->map, iris_batch_bytes_used(batch))); 449 450 if (batch->bo == batch->exec_bos[0]) 451 batch->primary_batch_size = iris_batch_bytes_used(batch); 452} 453 454/** 455 * Submit the batch to the GPU via execbuffer2. 456 */ 457static int 458submit_batch(struct iris_batch *batch) 459{ 460 iris_bo_unmap(batch->bo); 461 462 /* The requirement for using I915_EXEC_NO_RELOC are: 463 * 464 * The addresses written in the objects must match the corresponding 465 * reloc.gtt_offset which in turn must match the corresponding 466 * execobject.offset. 467 * 468 * Any render targets written to in the batch must be flagged with 469 * EXEC_OBJECT_WRITE. 470 * 471 * To avoid stalling, execobject.offset should match the current 472 * address of that object within the active context. 473 */ 474 struct drm_i915_gem_execbuffer2 execbuf = { 475 .buffers_ptr = (uintptr_t) batch->validation_list, 476 .buffer_count = batch->exec_count, 477 .batch_start_offset = 0, 478 /* This must be QWord aligned. */ 479 .batch_len = ALIGN(batch->primary_batch_size, 8), 480 .flags = batch->engine | 481 I915_EXEC_NO_RELOC | 482 I915_EXEC_BATCH_FIRST | 483 I915_EXEC_HANDLE_LUT, 484 .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */ 485 }; 486 487 if (num_fences(batch)) { 488 execbuf.flags |= I915_EXEC_FENCE_ARRAY; 489 execbuf.num_cliprects = num_fences(batch); 490 execbuf.cliprects_ptr = 491 (uintptr_t)util_dynarray_begin(&batch->exec_fences); 492 } 493 494 int ret = batch->screen->no_hw ? 0 : drm_ioctl(batch->screen->fd, 495 DRM_IOCTL_I915_GEM_EXECBUFFER2, 496 &execbuf); 497 if (ret != 0) { 498 ret = -errno; 499 DBG("execbuf FAILED: errno = %d\n", -ret); 500 fprintf(stderr, "execbuf FAILED: errno = %d\n", -ret); 501 abort(); 502 } else { 503 DBG("execbuf succeeded\n"); 504 } 505 506 for (int i = 0; i < batch->exec_count; i++) { 507 struct iris_bo *bo = batch->exec_bos[i]; 508 509 bo->idle = false; 510 bo->index = -1; 511 512 iris_bo_unreference(bo); 513 } 514 515 return ret; 516} 517 518static const char * 519batch_name_to_string(enum iris_batch_name name) 520{ 521 const char *names[IRIS_BATCH_COUNT] = { 522 [IRIS_BATCH_RENDER] = "render", 523 [IRIS_BATCH_COMPUTE] = "compute", 524 }; 525 return names[name]; 526} 527 528/** 529 * Flush the batch buffer, submitting it to the GPU and resetting it so 530 * we're ready to emit the next batch. 531 * 532 * \param in_fence_fd is ignored if -1. Otherwise, this function takes 533 * ownership of the fd. 534 * 535 * \param out_fence_fd is ignored if NULL. Otherwise, the caller must 536 * take ownership of the returned fd. 537 */ 538void 539_iris_batch_flush(struct iris_batch *batch, const char *file, int line) 540{ 541 struct iris_screen *screen = batch->screen; 542 543 if (iris_batch_bytes_used(batch) == 0) 544 return; 545 546 iris_finish_batch(batch); 547 548 if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) { 549 int bytes_for_commands = iris_batch_bytes_used(batch); 550 int second_bytes = 0; 551 if (batch->bo != batch->exec_bos[0]) { 552 second_bytes = bytes_for_commands; 553 bytes_for_commands += batch->primary_batch_size; 554 } 555 fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5d+%5db (%0.1f%%) " 556 "(cmds), %4d BOs (%0.1fMb aperture)\n", 557 file, line, batch_name_to_string(batch->name), batch->hw_ctx_id, 558 batch->primary_batch_size, second_bytes, 559 100.0f * bytes_for_commands / BATCH_SZ, 560 batch->exec_count, 561 (float) batch->aperture_space / (1024 * 1024)); 562 dump_fence_list(batch); 563 dump_validation_list(batch); 564 } 565 566 if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { 567 decode_batch(batch); 568 } 569 570 int ret = submit_batch(batch); 571 572 if (ret >= 0) { 573 //if (iris->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB) 574 //iris_check_for_reset(ice); 575 576 if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { 577 dbg_printf("waiting for idle\n"); 578 iris_bo_wait_rendering(batch->bo); 579 } 580 } else { 581#ifdef DEBUG 582 const bool color = INTEL_DEBUG & DEBUG_COLOR; 583 fprintf(stderr, "%siris: Failed to submit batchbuffer: %-80s%s\n", 584 color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : ""); 585 abort(); 586#endif 587 } 588 589 batch->exec_count = 0; 590 batch->aperture_space = 0; 591 592 struct iris_syncpt *syncpt = 593 ((struct iris_syncpt **) util_dynarray_begin(&batch->syncpts))[0]; 594 iris_syncpt_reference(screen, &batch->last_syncpt, syncpt); 595 596 util_dynarray_foreach(&batch->syncpts, struct iris_syncpt *, s) 597 iris_syncpt_reference(screen, s, NULL); 598 util_dynarray_clear(&batch->syncpts); 599 600 util_dynarray_clear(&batch->exec_fences); 601 602 /* Start a new batch buffer. */ 603 iris_batch_reset(batch); 604} 605 606/** 607 * Does the current batch refer to the given BO? 608 * 609 * (In other words, is the BO in the current batch's validation list?) 610 */ 611bool 612iris_batch_references(struct iris_batch *batch, struct iris_bo *bo) 613{ 614 return find_validation_entry(batch, bo) != NULL; 615} 616