1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2017 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice shall be included 12b8e80941Smrg * in all copies or substantial portions of the Software. 13b8e80941Smrg * 14b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15b8e80941Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20b8e80941Smrg * DEALINGS IN THE SOFTWARE. 21b8e80941Smrg */ 22b8e80941Smrg 23b8e80941Smrg/** 24b8e80941Smrg * @file iris_batch.c 25b8e80941Smrg * 26b8e80941Smrg * Batchbuffer and command submission module. 27b8e80941Smrg * 28b8e80941Smrg * Every API draw call results in a number of GPU commands, which we 29b8e80941Smrg * collect into a "batch buffer". Typically, many draw calls are grouped 30b8e80941Smrg * into a single batch to amortize command submission overhead. 31b8e80941Smrg * 32b8e80941Smrg * We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl. 33b8e80941Smrg * One critical piece of data is the "validation list", which contains a 34b8e80941Smrg * list of the buffer objects (BOs) which the commands in the GPU need. 35b8e80941Smrg * The kernel will make sure these are resident and pinned at the correct 36b8e80941Smrg * virtual memory address before executing our batch. If a BO is not in 37b8e80941Smrg * the validation list, it effectively does not exist, so take care. 38b8e80941Smrg */ 39b8e80941Smrg 40b8e80941Smrg#include "iris_batch.h" 41b8e80941Smrg#include "iris_bufmgr.h" 42b8e80941Smrg#include "iris_context.h" 43b8e80941Smrg#include "iris_fence.h" 44b8e80941Smrg 45b8e80941Smrg#include "drm-uapi/i915_drm.h" 46b8e80941Smrg 47b8e80941Smrg#include "util/hash_table.h" 48b8e80941Smrg#include "util/set.h" 49b8e80941Smrg#include "main/macros.h" 50b8e80941Smrg 51b8e80941Smrg#include <errno.h> 52b8e80941Smrg#include <xf86drm.h> 53b8e80941Smrg 54b8e80941Smrg#if HAVE_VALGRIND 55b8e80941Smrg#include <valgrind.h> 56b8e80941Smrg#include <memcheck.h> 57b8e80941Smrg#define VG(x) x 58b8e80941Smrg#else 59b8e80941Smrg#define VG(x) 60b8e80941Smrg#endif 61b8e80941Smrg 62b8e80941Smrg#define FILE_DEBUG_FLAG DEBUG_BUFMGR 63b8e80941Smrg 64b8e80941Smrg/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END 65b8e80941Smrg * or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus, we may 66b8e80941Smrg * need an extra 4 bytes to pad out to the nearest QWord. So reserve 16. 67b8e80941Smrg */ 68b8e80941Smrg#define BATCH_RESERVED 16 69b8e80941Smrg 70b8e80941Smrgstatic void 71b8e80941Smrgiris_batch_reset(struct iris_batch *batch); 72b8e80941Smrg 73b8e80941Smrgstatic unsigned 74b8e80941Smrgnum_fences(struct iris_batch *batch) 75b8e80941Smrg{ 76b8e80941Smrg return util_dynarray_num_elements(&batch->exec_fences, 77b8e80941Smrg struct drm_i915_gem_exec_fence); 78b8e80941Smrg} 79b8e80941Smrg 80b8e80941Smrg/** 81b8e80941Smrg * Debugging code to dump the fence list, used by INTEL_DEBUG=submit. 82b8e80941Smrg */ 83b8e80941Smrgstatic void 84b8e80941Smrgdump_fence_list(struct iris_batch *batch) 85b8e80941Smrg{ 86b8e80941Smrg fprintf(stderr, "Fence list (length %u): ", num_fences(batch)); 87b8e80941Smrg 88b8e80941Smrg util_dynarray_foreach(&batch->exec_fences, 89b8e80941Smrg struct drm_i915_gem_exec_fence, f) { 90b8e80941Smrg fprintf(stderr, "%s%u%s ", 91b8e80941Smrg (f->flags & I915_EXEC_FENCE_WAIT) ? "..." : "", 92b8e80941Smrg f->handle, 93b8e80941Smrg (f->flags & I915_EXEC_FENCE_SIGNAL) ? "!" : ""); 94b8e80941Smrg } 95b8e80941Smrg 96b8e80941Smrg fprintf(stderr, "\n"); 97b8e80941Smrg} 98b8e80941Smrg 99b8e80941Smrg/** 100b8e80941Smrg * Debugging code to dump the validation list, used by INTEL_DEBUG=submit. 101b8e80941Smrg */ 102b8e80941Smrgstatic void 103b8e80941Smrgdump_validation_list(struct iris_batch *batch) 104b8e80941Smrg{ 105b8e80941Smrg fprintf(stderr, "Validation list (length %d):\n", batch->exec_count); 106b8e80941Smrg 107b8e80941Smrg for (int i = 0; i < batch->exec_count; i++) { 108b8e80941Smrg uint64_t flags = batch->validation_list[i].flags; 109b8e80941Smrg assert(batch->validation_list[i].handle == 110b8e80941Smrg batch->exec_bos[i]->gem_handle); 111b8e80941Smrg fprintf(stderr, "[%2d]: %2d %-14s @ 0x%016llx (%"PRIu64"B)\t %2d refs %s\n", 112b8e80941Smrg i, 113b8e80941Smrg batch->validation_list[i].handle, 114b8e80941Smrg batch->exec_bos[i]->name, 115b8e80941Smrg batch->validation_list[i].offset, 116b8e80941Smrg batch->exec_bos[i]->size, 117b8e80941Smrg batch->exec_bos[i]->refcount, 118b8e80941Smrg (flags & EXEC_OBJECT_WRITE) ? " (write)" : ""); 119b8e80941Smrg } 120b8e80941Smrg} 121b8e80941Smrg 122b8e80941Smrg/** 123b8e80941Smrg * Return BO information to the batch decoder (for debugging). 124b8e80941Smrg */ 125b8e80941Smrgstatic struct gen_batch_decode_bo 126b8e80941Smrgdecode_get_bo(void *v_batch, bool ppgtt, uint64_t address) 127b8e80941Smrg{ 128b8e80941Smrg struct iris_batch *batch = v_batch; 129b8e80941Smrg 130b8e80941Smrg assert(ppgtt); 131b8e80941Smrg 132b8e80941Smrg for (int i = 0; i < batch->exec_count; i++) { 133b8e80941Smrg struct iris_bo *bo = batch->exec_bos[i]; 134b8e80941Smrg /* The decoder zeroes out the top 16 bits, so we need to as well */ 135b8e80941Smrg uint64_t bo_address = bo->gtt_offset & (~0ull >> 16); 136b8e80941Smrg 137b8e80941Smrg if (address >= bo_address && address < bo_address + bo->size) { 138b8e80941Smrg return (struct gen_batch_decode_bo) { 139b8e80941Smrg .addr = address, 140b8e80941Smrg .size = bo->size, 141b8e80941Smrg .map = iris_bo_map(batch->dbg, bo, MAP_READ) + 142b8e80941Smrg (address - bo_address), 143b8e80941Smrg }; 144b8e80941Smrg } 145b8e80941Smrg } 146b8e80941Smrg 147b8e80941Smrg return (struct gen_batch_decode_bo) { }; 148b8e80941Smrg} 149b8e80941Smrg 150b8e80941Smrg/** 151b8e80941Smrg * Decode the current batch. 152b8e80941Smrg */ 153b8e80941Smrgstatic void 154b8e80941Smrgdecode_batch(struct iris_batch *batch) 155b8e80941Smrg{ 156b8e80941Smrg void *map = iris_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ); 157b8e80941Smrg gen_print_batch(&batch->decoder, map, batch->primary_batch_size, 158b8e80941Smrg batch->exec_bos[0]->gtt_offset, false); 159b8e80941Smrg} 160b8e80941Smrg 161b8e80941Smrgvoid 162b8e80941Smrgiris_init_batch(struct iris_batch *batch, 163b8e80941Smrg struct iris_screen *screen, 164b8e80941Smrg struct iris_vtable *vtbl, 165b8e80941Smrg struct pipe_debug_callback *dbg, 166b8e80941Smrg struct iris_batch *all_batches, 167b8e80941Smrg enum iris_batch_name name, 168b8e80941Smrg uint8_t engine, 169b8e80941Smrg int priority) 170b8e80941Smrg{ 171b8e80941Smrg batch->screen = screen; 172b8e80941Smrg batch->vtbl = vtbl; 173b8e80941Smrg batch->dbg = dbg; 174b8e80941Smrg batch->name = name; 175b8e80941Smrg 176b8e80941Smrg /* engine should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */ 177b8e80941Smrg assert((engine & ~I915_EXEC_RING_MASK) == 0); 178b8e80941Smrg assert(util_bitcount(engine) == 1); 179b8e80941Smrg batch->engine = engine; 180b8e80941Smrg 181b8e80941Smrg batch->hw_ctx_id = iris_create_hw_context(screen->bufmgr); 182b8e80941Smrg assert(batch->hw_ctx_id); 183b8e80941Smrg 184b8e80941Smrg iris_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority); 185b8e80941Smrg 186b8e80941Smrg util_dynarray_init(&batch->exec_fences, ralloc_context(NULL)); 187b8e80941Smrg util_dynarray_init(&batch->syncpts, ralloc_context(NULL)); 188b8e80941Smrg 189b8e80941Smrg batch->exec_count = 0; 190b8e80941Smrg batch->exec_array_size = 100; 191b8e80941Smrg batch->exec_bos = 192b8e80941Smrg malloc(batch->exec_array_size * sizeof(batch->exec_bos[0])); 193b8e80941Smrg batch->validation_list = 194b8e80941Smrg malloc(batch->exec_array_size * sizeof(batch->validation_list[0])); 195b8e80941Smrg 196b8e80941Smrg batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer, 197b8e80941Smrg _mesa_key_pointer_equal); 198b8e80941Smrg batch->cache.depth = _mesa_set_create(NULL, _mesa_hash_pointer, 199b8e80941Smrg _mesa_key_pointer_equal); 200b8e80941Smrg 201b8e80941Smrg memset(batch->other_batches, 0, sizeof(batch->other_batches)); 202b8e80941Smrg 203b8e80941Smrg for (int i = 0, j = 0; i < IRIS_BATCH_COUNT; i++) { 204b8e80941Smrg if (&all_batches[i] != batch) 205b8e80941Smrg batch->other_batches[j++] = &all_batches[i]; 206b8e80941Smrg } 207b8e80941Smrg 208b8e80941Smrg if (unlikely(INTEL_DEBUG)) { 209b8e80941Smrg const unsigned decode_flags = 210b8e80941Smrg GEN_BATCH_DECODE_FULL | 211b8e80941Smrg ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) | 212b8e80941Smrg GEN_BATCH_DECODE_OFFSETS | 213b8e80941Smrg GEN_BATCH_DECODE_FLOATS; 214b8e80941Smrg 215b8e80941Smrg /* TODO: track state size so we can print the right # of entries */ 216b8e80941Smrg gen_batch_decode_ctx_init(&batch->decoder, &screen->devinfo, 217b8e80941Smrg stderr, decode_flags, NULL, 218b8e80941Smrg decode_get_bo, NULL, batch); 219b8e80941Smrg batch->decoder.max_vbo_decoded_lines = 32; 220b8e80941Smrg } 221b8e80941Smrg 222b8e80941Smrg iris_batch_reset(batch); 223b8e80941Smrg} 224b8e80941Smrg 225b8e80941Smrgstatic struct drm_i915_gem_exec_object2 * 226b8e80941Smrgfind_validation_entry(struct iris_batch *batch, struct iris_bo *bo) 227b8e80941Smrg{ 228b8e80941Smrg unsigned index = READ_ONCE(bo->index); 229b8e80941Smrg 230b8e80941Smrg if (index < batch->exec_count && batch->exec_bos[index] == bo) 231b8e80941Smrg return &batch->validation_list[index]; 232b8e80941Smrg 233b8e80941Smrg /* May have been shared between multiple active batches */ 234b8e80941Smrg for (index = 0; index < batch->exec_count; index++) { 235b8e80941Smrg if (batch->exec_bos[index] == bo) 236b8e80941Smrg return &batch->validation_list[index]; 237b8e80941Smrg } 238b8e80941Smrg 239b8e80941Smrg return NULL; 240b8e80941Smrg} 241b8e80941Smrg 242b8e80941Smrg/** 243b8e80941Smrg * Add a buffer to the current batch's validation list. 244b8e80941Smrg * 245b8e80941Smrg * You must call this on any BO you wish to use in this batch, to ensure 246b8e80941Smrg * that it's resident when the GPU commands execute. 247b8e80941Smrg */ 248b8e80941Smrgvoid 249b8e80941Smrgiris_use_pinned_bo(struct iris_batch *batch, 250b8e80941Smrg struct iris_bo *bo, 251b8e80941Smrg bool writable) 252b8e80941Smrg{ 253b8e80941Smrg assert(bo->kflags & EXEC_OBJECT_PINNED); 254b8e80941Smrg 255b8e80941Smrg /* Never mark the workaround BO with EXEC_OBJECT_WRITE. We don't care 256b8e80941Smrg * about the order of any writes to that buffer, and marking it writable 257b8e80941Smrg * would introduce data dependencies between multiple batches which share 258b8e80941Smrg * the buffer. 259b8e80941Smrg */ 260b8e80941Smrg if (bo == batch->screen->workaround_bo) 261b8e80941Smrg writable = false; 262b8e80941Smrg 263b8e80941Smrg struct drm_i915_gem_exec_object2 *existing_entry = 264b8e80941Smrg find_validation_entry(batch, bo); 265b8e80941Smrg 266b8e80941Smrg if (existing_entry) { 267b8e80941Smrg /* The BO is already in the validation list; mark it writable */ 268b8e80941Smrg if (writable) 269b8e80941Smrg existing_entry->flags |= EXEC_OBJECT_WRITE; 270b8e80941Smrg 271b8e80941Smrg return; 272b8e80941Smrg } 273b8e80941Smrg 274b8e80941Smrg if (bo != batch->bo) { 275b8e80941Smrg /* This is the first time our batch has seen this BO. Before we use it, 276b8e80941Smrg * we may need to flush and synchronize with other batches. 277b8e80941Smrg */ 278b8e80941Smrg for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) { 279b8e80941Smrg struct drm_i915_gem_exec_object2 *other_entry = 280b8e80941Smrg find_validation_entry(batch->other_batches[b], bo); 281b8e80941Smrg 282b8e80941Smrg /* If the buffer is referenced by another batch, and either batch 283b8e80941Smrg * intends to write it, then flush the other batch and synchronize. 284b8e80941Smrg * 285b8e80941Smrg * Consider these cases: 286b8e80941Smrg * 287b8e80941Smrg * 1. They read, we read => No synchronization required. 288b8e80941Smrg * 2. They read, we write => Synchronize (they need the old value) 289b8e80941Smrg * 3. They write, we read => Synchronize (we need their new value) 290b8e80941Smrg * 4. They write, we write => Synchronize (order writes) 291b8e80941Smrg * 292b8e80941Smrg * The read/read case is very common, as multiple batches usually 293b8e80941Smrg * share a streaming state buffer or shader assembly buffer, and 294b8e80941Smrg * we want to avoid synchronizing in this case. 295b8e80941Smrg */ 296b8e80941Smrg if (other_entry && 297b8e80941Smrg ((other_entry->flags & EXEC_OBJECT_WRITE) || writable)) { 298b8e80941Smrg iris_batch_flush(batch->other_batches[b]); 299b8e80941Smrg iris_batch_add_syncpt(batch, batch->other_batches[b]->last_syncpt, 300b8e80941Smrg I915_EXEC_FENCE_WAIT); 301b8e80941Smrg } 302b8e80941Smrg } 303b8e80941Smrg } 304b8e80941Smrg 305b8e80941Smrg /* Now, take a reference and add it to the validation list. */ 306b8e80941Smrg iris_bo_reference(bo); 307b8e80941Smrg 308b8e80941Smrg if (batch->exec_count == batch->exec_array_size) { 309b8e80941Smrg batch->exec_array_size *= 2; 310b8e80941Smrg batch->exec_bos = 311b8e80941Smrg realloc(batch->exec_bos, 312b8e80941Smrg batch->exec_array_size * sizeof(batch->exec_bos[0])); 313b8e80941Smrg batch->validation_list = 314b8e80941Smrg realloc(batch->validation_list, 315b8e80941Smrg batch->exec_array_size * sizeof(batch->validation_list[0])); 316b8e80941Smrg } 317b8e80941Smrg 318b8e80941Smrg batch->validation_list[batch->exec_count] = 319b8e80941Smrg (struct drm_i915_gem_exec_object2) { 320b8e80941Smrg .handle = bo->gem_handle, 321b8e80941Smrg .offset = bo->gtt_offset, 322b8e80941Smrg .flags = bo->kflags | (writable ? EXEC_OBJECT_WRITE : 0), 323b8e80941Smrg }; 324b8e80941Smrg 325b8e80941Smrg bo->index = batch->exec_count; 326b8e80941Smrg batch->exec_bos[batch->exec_count] = bo; 327b8e80941Smrg batch->aperture_space += bo->size; 328b8e80941Smrg 329b8e80941Smrg batch->exec_count++; 330b8e80941Smrg} 331b8e80941Smrg 332b8e80941Smrgstatic void 333b8e80941Smrgcreate_batch(struct iris_batch *batch) 334b8e80941Smrg{ 335b8e80941Smrg struct iris_screen *screen = batch->screen; 336b8e80941Smrg struct iris_bufmgr *bufmgr = screen->bufmgr; 337b8e80941Smrg 338b8e80941Smrg batch->bo = iris_bo_alloc(bufmgr, "command buffer", 339b8e80941Smrg BATCH_SZ + BATCH_RESERVED, IRIS_MEMZONE_OTHER); 340b8e80941Smrg batch->bo->kflags |= EXEC_OBJECT_CAPTURE; 341b8e80941Smrg batch->map = iris_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE); 342b8e80941Smrg batch->map_next = batch->map; 343b8e80941Smrg 344b8e80941Smrg iris_use_pinned_bo(batch, batch->bo, false); 345b8e80941Smrg} 346b8e80941Smrg 347b8e80941Smrgstatic void 348b8e80941Smrgiris_batch_reset(struct iris_batch *batch) 349b8e80941Smrg{ 350b8e80941Smrg struct iris_screen *screen = batch->screen; 351b8e80941Smrg 352b8e80941Smrg iris_bo_unreference(batch->bo); 353b8e80941Smrg batch->primary_batch_size = 0; 354b8e80941Smrg batch->contains_draw = false; 355b8e80941Smrg 356b8e80941Smrg create_batch(batch); 357b8e80941Smrg assert(batch->bo->index == 0); 358b8e80941Smrg 359b8e80941Smrg struct iris_syncpt *syncpt = iris_create_syncpt(screen); 360b8e80941Smrg iris_batch_add_syncpt(batch, syncpt, I915_EXEC_FENCE_SIGNAL); 361b8e80941Smrg iris_syncpt_reference(screen, &syncpt, NULL); 362b8e80941Smrg 363b8e80941Smrg iris_cache_sets_clear(batch); 364b8e80941Smrg} 365b8e80941Smrg 366b8e80941Smrgvoid 367b8e80941Smrgiris_batch_free(struct iris_batch *batch) 368b8e80941Smrg{ 369b8e80941Smrg struct iris_screen *screen = batch->screen; 370b8e80941Smrg struct iris_bufmgr *bufmgr = screen->bufmgr; 371b8e80941Smrg 372b8e80941Smrg for (int i = 0; i < batch->exec_count; i++) { 373b8e80941Smrg iris_bo_unreference(batch->exec_bos[i]); 374b8e80941Smrg } 375b8e80941Smrg free(batch->exec_bos); 376b8e80941Smrg free(batch->validation_list); 377b8e80941Smrg 378b8e80941Smrg ralloc_free(batch->exec_fences.mem_ctx); 379b8e80941Smrg 380b8e80941Smrg util_dynarray_foreach(&batch->syncpts, struct iris_syncpt *, s) 381b8e80941Smrg iris_syncpt_reference(screen, s, NULL); 382b8e80941Smrg ralloc_free(batch->syncpts.mem_ctx); 383b8e80941Smrg 384b8e80941Smrg iris_syncpt_reference(screen, &batch->last_syncpt, NULL); 385b8e80941Smrg 386b8e80941Smrg iris_bo_unreference(batch->bo); 387b8e80941Smrg batch->bo = NULL; 388b8e80941Smrg batch->map = NULL; 389b8e80941Smrg batch->map_next = NULL; 390b8e80941Smrg 391b8e80941Smrg iris_destroy_hw_context(bufmgr, batch->hw_ctx_id); 392b8e80941Smrg 393b8e80941Smrg _mesa_hash_table_destroy(batch->cache.render, NULL); 394b8e80941Smrg _mesa_set_destroy(batch->cache.depth, NULL); 395b8e80941Smrg 396b8e80941Smrg if (unlikely(INTEL_DEBUG)) 397b8e80941Smrg gen_batch_decode_ctx_finish(&batch->decoder); 398b8e80941Smrg} 399b8e80941Smrg 400b8e80941Smrg/** 401b8e80941Smrg * If we've chained to a secondary batch, or are getting near to the end, 402b8e80941Smrg * then flush. This should only be called between draws. 403b8e80941Smrg */ 404b8e80941Smrgvoid 405b8e80941Smrgiris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate) 406b8e80941Smrg{ 407b8e80941Smrg if (batch->bo != batch->exec_bos[0] || 408b8e80941Smrg iris_batch_bytes_used(batch) + estimate >= BATCH_SZ) { 409b8e80941Smrg iris_batch_flush(batch); 410b8e80941Smrg } 411b8e80941Smrg} 412b8e80941Smrg 413b8e80941Smrgvoid 414b8e80941Smrgiris_chain_to_new_batch(struct iris_batch *batch) 415b8e80941Smrg{ 416b8e80941Smrg /* We only support chaining a single time. */ 417b8e80941Smrg assert(batch->bo == batch->exec_bos[0]); 418b8e80941Smrg 419b8e80941Smrg VG(void *map = batch->map); 420b8e80941Smrg uint32_t *cmd = batch->map_next; 421b8e80941Smrg uint64_t *addr = batch->map_next + 4; 422b8e80941Smrg batch->map_next += 12; 423b8e80941Smrg 424b8e80941Smrg /* No longer held by batch->bo, still held by validation list */ 425b8e80941Smrg iris_bo_unreference(batch->bo); 426b8e80941Smrg batch->primary_batch_size = iris_batch_bytes_used(batch); 427b8e80941Smrg create_batch(batch); 428b8e80941Smrg 429b8e80941Smrg /* Emit MI_BATCH_BUFFER_START to chain to another batch. */ 430b8e80941Smrg *cmd = (0x31 << 23) | (1 << 8) | (3 - 2); 431b8e80941Smrg *addr = batch->bo->gtt_offset; 432b8e80941Smrg 433b8e80941Smrg VG(VALGRIND_CHECK_MEM_IS_DEFINED(map, batch->primary_batch_size)); 434b8e80941Smrg} 435b8e80941Smrg 436b8e80941Smrg/** 437b8e80941Smrg * Terminate a batch with MI_BATCH_BUFFER_END. 438b8e80941Smrg */ 439b8e80941Smrgstatic void 440b8e80941Smrgiris_finish_batch(struct iris_batch *batch) 441b8e80941Smrg{ 442b8e80941Smrg /* Emit MI_BATCH_BUFFER_END to finish our batch. */ 443b8e80941Smrg uint32_t *map = batch->map_next; 444b8e80941Smrg 445b8e80941Smrg map[0] = (0xA << 23); 446b8e80941Smrg 447b8e80941Smrg batch->map_next += 4; 448b8e80941Smrg VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->map, iris_batch_bytes_used(batch))); 449b8e80941Smrg 450b8e80941Smrg if (batch->bo == batch->exec_bos[0]) 451b8e80941Smrg batch->primary_batch_size = iris_batch_bytes_used(batch); 452b8e80941Smrg} 453b8e80941Smrg 454b8e80941Smrg/** 455b8e80941Smrg * Submit the batch to the GPU via execbuffer2. 456b8e80941Smrg */ 457b8e80941Smrgstatic int 458b8e80941Smrgsubmit_batch(struct iris_batch *batch) 459b8e80941Smrg{ 460b8e80941Smrg iris_bo_unmap(batch->bo); 461b8e80941Smrg 462b8e80941Smrg /* The requirement for using I915_EXEC_NO_RELOC are: 463b8e80941Smrg * 464b8e80941Smrg * The addresses written in the objects must match the corresponding 465b8e80941Smrg * reloc.gtt_offset which in turn must match the corresponding 466b8e80941Smrg * execobject.offset. 467b8e80941Smrg * 468b8e80941Smrg * Any render targets written to in the batch must be flagged with 469b8e80941Smrg * EXEC_OBJECT_WRITE. 470b8e80941Smrg * 471b8e80941Smrg * To avoid stalling, execobject.offset should match the current 472b8e80941Smrg * address of that object within the active context. 473b8e80941Smrg */ 474b8e80941Smrg struct drm_i915_gem_execbuffer2 execbuf = { 475b8e80941Smrg .buffers_ptr = (uintptr_t) batch->validation_list, 476b8e80941Smrg .buffer_count = batch->exec_count, 477b8e80941Smrg .batch_start_offset = 0, 478b8e80941Smrg /* This must be QWord aligned. */ 479b8e80941Smrg .batch_len = ALIGN(batch->primary_batch_size, 8), 480b8e80941Smrg .flags = batch->engine | 481b8e80941Smrg I915_EXEC_NO_RELOC | 482b8e80941Smrg I915_EXEC_BATCH_FIRST | 483b8e80941Smrg I915_EXEC_HANDLE_LUT, 484b8e80941Smrg .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */ 485b8e80941Smrg }; 486b8e80941Smrg 487b8e80941Smrg if (num_fences(batch)) { 488b8e80941Smrg execbuf.flags |= I915_EXEC_FENCE_ARRAY; 489b8e80941Smrg execbuf.num_cliprects = num_fences(batch); 490b8e80941Smrg execbuf.cliprects_ptr = 491b8e80941Smrg (uintptr_t)util_dynarray_begin(&batch->exec_fences); 492b8e80941Smrg } 493b8e80941Smrg 494b8e80941Smrg int ret = batch->screen->no_hw ? 0 : drm_ioctl(batch->screen->fd, 495b8e80941Smrg DRM_IOCTL_I915_GEM_EXECBUFFER2, 496b8e80941Smrg &execbuf); 497b8e80941Smrg if (ret != 0) { 498b8e80941Smrg ret = -errno; 499b8e80941Smrg DBG("execbuf FAILED: errno = %d\n", -ret); 500b8e80941Smrg fprintf(stderr, "execbuf FAILED: errno = %d\n", -ret); 501b8e80941Smrg abort(); 502b8e80941Smrg } else { 503b8e80941Smrg DBG("execbuf succeeded\n"); 504b8e80941Smrg } 505b8e80941Smrg 506b8e80941Smrg for (int i = 0; i < batch->exec_count; i++) { 507b8e80941Smrg struct iris_bo *bo = batch->exec_bos[i]; 508b8e80941Smrg 509b8e80941Smrg bo->idle = false; 510b8e80941Smrg bo->index = -1; 511b8e80941Smrg 512b8e80941Smrg iris_bo_unreference(bo); 513b8e80941Smrg } 514b8e80941Smrg 515b8e80941Smrg return ret; 516b8e80941Smrg} 517b8e80941Smrg 518b8e80941Smrgstatic const char * 519b8e80941Smrgbatch_name_to_string(enum iris_batch_name name) 520b8e80941Smrg{ 521b8e80941Smrg const char *names[IRIS_BATCH_COUNT] = { 522b8e80941Smrg [IRIS_BATCH_RENDER] = "render", 523b8e80941Smrg [IRIS_BATCH_COMPUTE] = "compute", 524b8e80941Smrg }; 525b8e80941Smrg return names[name]; 526b8e80941Smrg} 527b8e80941Smrg 528b8e80941Smrg/** 529b8e80941Smrg * Flush the batch buffer, submitting it to the GPU and resetting it so 530b8e80941Smrg * we're ready to emit the next batch. 531b8e80941Smrg * 532b8e80941Smrg * \param in_fence_fd is ignored if -1. Otherwise, this function takes 533b8e80941Smrg * ownership of the fd. 534b8e80941Smrg * 535b8e80941Smrg * \param out_fence_fd is ignored if NULL. Otherwise, the caller must 536b8e80941Smrg * take ownership of the returned fd. 537b8e80941Smrg */ 538b8e80941Smrgvoid 539b8e80941Smrg_iris_batch_flush(struct iris_batch *batch, const char *file, int line) 540b8e80941Smrg{ 541b8e80941Smrg struct iris_screen *screen = batch->screen; 542b8e80941Smrg 543b8e80941Smrg if (iris_batch_bytes_used(batch) == 0) 544b8e80941Smrg return; 545b8e80941Smrg 546b8e80941Smrg iris_finish_batch(batch); 547b8e80941Smrg 548b8e80941Smrg if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) { 549b8e80941Smrg int bytes_for_commands = iris_batch_bytes_used(batch); 550b8e80941Smrg int second_bytes = 0; 551b8e80941Smrg if (batch->bo != batch->exec_bos[0]) { 552b8e80941Smrg second_bytes = bytes_for_commands; 553b8e80941Smrg bytes_for_commands += batch->primary_batch_size; 554b8e80941Smrg } 555b8e80941Smrg fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5d+%5db (%0.1f%%) " 556b8e80941Smrg "(cmds), %4d BOs (%0.1fMb aperture)\n", 557b8e80941Smrg file, line, batch_name_to_string(batch->name), batch->hw_ctx_id, 558b8e80941Smrg batch->primary_batch_size, second_bytes, 559b8e80941Smrg 100.0f * bytes_for_commands / BATCH_SZ, 560b8e80941Smrg batch->exec_count, 561b8e80941Smrg (float) batch->aperture_space / (1024 * 1024)); 562b8e80941Smrg dump_fence_list(batch); 563b8e80941Smrg dump_validation_list(batch); 564b8e80941Smrg } 565b8e80941Smrg 566b8e80941Smrg if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { 567b8e80941Smrg decode_batch(batch); 568b8e80941Smrg } 569b8e80941Smrg 570b8e80941Smrg int ret = submit_batch(batch); 571b8e80941Smrg 572b8e80941Smrg if (ret >= 0) { 573b8e80941Smrg //if (iris->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB) 574b8e80941Smrg //iris_check_for_reset(ice); 575b8e80941Smrg 576b8e80941Smrg if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) { 577b8e80941Smrg dbg_printf("waiting for idle\n"); 578b8e80941Smrg iris_bo_wait_rendering(batch->bo); 579b8e80941Smrg } 580b8e80941Smrg } else { 581b8e80941Smrg#ifdef DEBUG 582b8e80941Smrg const bool color = INTEL_DEBUG & DEBUG_COLOR; 583b8e80941Smrg fprintf(stderr, "%siris: Failed to submit batchbuffer: %-80s%s\n", 584b8e80941Smrg color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : ""); 585b8e80941Smrg abort(); 586b8e80941Smrg#endif 587b8e80941Smrg } 588b8e80941Smrg 589b8e80941Smrg batch->exec_count = 0; 590b8e80941Smrg batch->aperture_space = 0; 591b8e80941Smrg 592b8e80941Smrg struct iris_syncpt *syncpt = 593b8e80941Smrg ((struct iris_syncpt **) util_dynarray_begin(&batch->syncpts))[0]; 594b8e80941Smrg iris_syncpt_reference(screen, &batch->last_syncpt, syncpt); 595b8e80941Smrg 596b8e80941Smrg util_dynarray_foreach(&batch->syncpts, struct iris_syncpt *, s) 597b8e80941Smrg iris_syncpt_reference(screen, s, NULL); 598b8e80941Smrg util_dynarray_clear(&batch->syncpts); 599b8e80941Smrg 600b8e80941Smrg util_dynarray_clear(&batch->exec_fences); 601b8e80941Smrg 602b8e80941Smrg /* Start a new batch buffer. */ 603b8e80941Smrg iris_batch_reset(batch); 604b8e80941Smrg} 605b8e80941Smrg 606b8e80941Smrg/** 607b8e80941Smrg * Does the current batch refer to the given BO? 608b8e80941Smrg * 609b8e80941Smrg * (In other words, is the BO in the current batch's validation list?) 610b8e80941Smrg */ 611b8e80941Smrgbool 612b8e80941Smrgiris_batch_references(struct iris_batch *batch, struct iris_bo *bo) 613b8e80941Smrg{ 614b8e80941Smrg return find_validation_entry(batch, bo) != NULL; 615b8e80941Smrg} 616