17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2017 Intel Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice shall be included 127ec681f3Smrg * in all copies or substantial portions of the Software. 137ec681f3Smrg * 147ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 157ec681f3Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 167ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 177ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 187ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 197ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 207ec681f3Smrg * DEALINGS IN THE SOFTWARE. 217ec681f3Smrg */ 227ec681f3Smrg 237ec681f3Smrg/** 247ec681f3Smrg * @file crocus_batch.c 257ec681f3Smrg * 267ec681f3Smrg * Batchbuffer and command submission module. 277ec681f3Smrg * 287ec681f3Smrg * Every API draw call results in a number of GPU commands, which we 297ec681f3Smrg * collect into a "batch buffer". Typically, many draw calls are grouped 307ec681f3Smrg * into a single batch to amortize command submission overhead. 317ec681f3Smrg * 327ec681f3Smrg * We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl. 337ec681f3Smrg * One critical piece of data is the "validation list", which contains a 347ec681f3Smrg * list of the buffer objects (BOs) which the commands in the GPU need. 357ec681f3Smrg * The kernel will make sure these are resident and pinned at the correct 367ec681f3Smrg * virtual memory address before executing our batch. If a BO is not in 377ec681f3Smrg * the validation list, it effectively does not exist, so take care. 387ec681f3Smrg */ 397ec681f3Smrg 407ec681f3Smrg#include "crocus_batch.h" 417ec681f3Smrg#include "crocus_bufmgr.h" 427ec681f3Smrg#include "crocus_context.h" 437ec681f3Smrg#include "crocus_fence.h" 447ec681f3Smrg 457ec681f3Smrg#include "drm-uapi/i915_drm.h" 467ec681f3Smrg 477ec681f3Smrg#include "intel/common/intel_gem.h" 487ec681f3Smrg#include "main/macros.h" 497ec681f3Smrg#include "util/hash_table.h" 507ec681f3Smrg#include "util/set.h" 517ec681f3Smrg#include "util/u_upload_mgr.h" 527ec681f3Smrg 537ec681f3Smrg#include <errno.h> 547ec681f3Smrg#include <xf86drm.h> 557ec681f3Smrg 567ec681f3Smrg#if HAVE_VALGRIND 577ec681f3Smrg#include <memcheck.h> 587ec681f3Smrg#include <valgrind.h> 597ec681f3Smrg#define VG(x) x 607ec681f3Smrg#else 617ec681f3Smrg#define VG(x) 627ec681f3Smrg#endif 637ec681f3Smrg 647ec681f3Smrg#define FILE_DEBUG_FLAG DEBUG_BUFMGR 657ec681f3Smrg 667ec681f3Smrg/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END 677ec681f3Smrg * or 12 bytes for MI_BATCH_BUFFER_START (when chaining). Plus, we may 687ec681f3Smrg * need an extra 4 bytes to pad out to the nearest QWord. So reserve 16. 697ec681f3Smrg */ 707ec681f3Smrg#define BATCH_RESERVED(devinfo) ((devinfo)->is_haswell ? 32 : 16) 717ec681f3Smrg 727ec681f3Smrgstatic void crocus_batch_reset(struct crocus_batch *batch); 737ec681f3Smrg 747ec681f3Smrgstatic unsigned 757ec681f3Smrgnum_fences(struct crocus_batch *batch) 767ec681f3Smrg{ 777ec681f3Smrg return util_dynarray_num_elements(&batch->exec_fences, 787ec681f3Smrg struct drm_i915_gem_exec_fence); 797ec681f3Smrg} 807ec681f3Smrg 817ec681f3Smrg/** 827ec681f3Smrg * Debugging code to dump the fence list, used by INTEL_DEBUG=submit. 837ec681f3Smrg */ 847ec681f3Smrgstatic void 857ec681f3Smrgdump_fence_list(struct crocus_batch *batch) 867ec681f3Smrg{ 877ec681f3Smrg fprintf(stderr, "Fence list (length %u): ", num_fences(batch)); 887ec681f3Smrg 897ec681f3Smrg util_dynarray_foreach(&batch->exec_fences, 907ec681f3Smrg struct drm_i915_gem_exec_fence, f) { 917ec681f3Smrg fprintf(stderr, "%s%u%s ", 927ec681f3Smrg (f->flags & I915_EXEC_FENCE_WAIT) ? "..." : "", 937ec681f3Smrg f->handle, 947ec681f3Smrg (f->flags & I915_EXEC_FENCE_SIGNAL) ? "!" : ""); 957ec681f3Smrg } 967ec681f3Smrg 977ec681f3Smrg fprintf(stderr, "\n"); 987ec681f3Smrg} 997ec681f3Smrg 1007ec681f3Smrg/** 1017ec681f3Smrg * Debugging code to dump the validation list, used by INTEL_DEBUG=submit. 1027ec681f3Smrg */ 1037ec681f3Smrgstatic void 1047ec681f3Smrgdump_validation_list(struct crocus_batch *batch) 1057ec681f3Smrg{ 1067ec681f3Smrg fprintf(stderr, "Validation list (length %d):\n", batch->exec_count); 1077ec681f3Smrg 1087ec681f3Smrg for (int i = 0; i < batch->exec_count; i++) { 1097ec681f3Smrg uint64_t flags = batch->validation_list[i].flags; 1107ec681f3Smrg assert(batch->validation_list[i].handle == 1117ec681f3Smrg batch->exec_bos[i]->gem_handle); 1127ec681f3Smrg fprintf(stderr, 1137ec681f3Smrg "[%2d]: %2d %-14s @ 0x%"PRIx64" (%" PRIu64 "B)\t %2d refs %s\n", i, 1147ec681f3Smrg batch->validation_list[i].handle, batch->exec_bos[i]->name, 1157ec681f3Smrg (uint64_t)batch->validation_list[i].offset, batch->exec_bos[i]->size, 1167ec681f3Smrg batch->exec_bos[i]->refcount, 1177ec681f3Smrg (flags & EXEC_OBJECT_WRITE) ? " (write)" : ""); 1187ec681f3Smrg } 1197ec681f3Smrg} 1207ec681f3Smrg 1217ec681f3Smrg/** 1227ec681f3Smrg * Return BO information to the batch decoder (for debugging). 1237ec681f3Smrg */ 1247ec681f3Smrgstatic struct intel_batch_decode_bo 1257ec681f3Smrgdecode_get_bo(void *v_batch, bool ppgtt, uint64_t address) 1267ec681f3Smrg{ 1277ec681f3Smrg struct crocus_batch *batch = v_batch; 1287ec681f3Smrg 1297ec681f3Smrg for (int i = 0; i < batch->exec_count; i++) { 1307ec681f3Smrg struct crocus_bo *bo = batch->exec_bos[i]; 1317ec681f3Smrg /* The decoder zeroes out the top 16 bits, so we need to as well */ 1327ec681f3Smrg uint64_t bo_address = bo->gtt_offset & (~0ull >> 16); 1337ec681f3Smrg 1347ec681f3Smrg if (address >= bo_address && address < bo_address + bo->size) { 1357ec681f3Smrg return (struct intel_batch_decode_bo){ 1367ec681f3Smrg .addr = address, 1377ec681f3Smrg .size = bo->size, 1387ec681f3Smrg .map = crocus_bo_map(batch->dbg, bo, MAP_READ) + 1397ec681f3Smrg (address - bo_address), 1407ec681f3Smrg }; 1417ec681f3Smrg } 1427ec681f3Smrg } 1437ec681f3Smrg 1447ec681f3Smrg return (struct intel_batch_decode_bo) { }; 1457ec681f3Smrg} 1467ec681f3Smrg 1477ec681f3Smrgstatic unsigned 1487ec681f3Smrgdecode_get_state_size(void *v_batch, uint64_t address, 1497ec681f3Smrg uint64_t base_address) 1507ec681f3Smrg{ 1517ec681f3Smrg struct crocus_batch *batch = v_batch; 1527ec681f3Smrg 1537ec681f3Smrg /* The decoder gives us offsets from a base address, which is not great. 1547ec681f3Smrg * Binding tables are relative to surface state base address, and other 1557ec681f3Smrg * state is relative to dynamic state base address. These could alias, 1567ec681f3Smrg * but in practice it's unlikely because surface offsets are always in 1577ec681f3Smrg * the [0, 64K) range, and we assign dynamic state addresses starting at 1587ec681f3Smrg * the top of the 4GB range. We should fix this but it's likely good 1597ec681f3Smrg * enough for now. 1607ec681f3Smrg */ 1617ec681f3Smrg unsigned size = (uintptr_t) 1627ec681f3Smrg _mesa_hash_table_u64_search(batch->state_sizes, address - base_address); 1637ec681f3Smrg 1647ec681f3Smrg return size; 1657ec681f3Smrg} 1667ec681f3Smrg 1677ec681f3Smrg/** 1687ec681f3Smrg * Decode the current batch. 1697ec681f3Smrg */ 1707ec681f3Smrgstatic void 1717ec681f3Smrgdecode_batch(struct crocus_batch *batch) 1727ec681f3Smrg{ 1737ec681f3Smrg void *map = crocus_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ); 1747ec681f3Smrg intel_print_batch(&batch->decoder, map, batch->primary_batch_size, 1757ec681f3Smrg batch->exec_bos[0]->gtt_offset, false); 1767ec681f3Smrg} 1777ec681f3Smrg 1787ec681f3Smrgstatic void 1797ec681f3Smrginit_reloc_list(struct crocus_reloc_list *rlist, int count) 1807ec681f3Smrg{ 1817ec681f3Smrg rlist->reloc_count = 0; 1827ec681f3Smrg rlist->reloc_array_size = count; 1837ec681f3Smrg rlist->relocs = malloc(rlist->reloc_array_size * 1847ec681f3Smrg sizeof(struct drm_i915_gem_relocation_entry)); 1857ec681f3Smrg} 1867ec681f3Smrg 1877ec681f3Smrgvoid 1887ec681f3Smrgcrocus_init_batch(struct crocus_context *ice, 1897ec681f3Smrg enum crocus_batch_name name, 1907ec681f3Smrg int priority) 1917ec681f3Smrg{ 1927ec681f3Smrg struct crocus_batch *batch = &ice->batches[name]; 1937ec681f3Smrg struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 1947ec681f3Smrg struct intel_device_info *devinfo = &screen->devinfo; 1957ec681f3Smrg 1967ec681f3Smrg batch->ice = ice; 1977ec681f3Smrg batch->screen = screen; 1987ec681f3Smrg batch->dbg = &ice->dbg; 1997ec681f3Smrg batch->reset = &ice->reset; 2007ec681f3Smrg batch->name = name; 2017ec681f3Smrg batch->contains_fence_signal = false; 2027ec681f3Smrg 2037ec681f3Smrg if (devinfo->ver >= 7) { 2047ec681f3Smrg batch->fine_fences.uploader = 2057ec681f3Smrg u_upload_create(&ice->ctx, 4096, PIPE_BIND_CUSTOM, 2067ec681f3Smrg PIPE_USAGE_STAGING, 0); 2077ec681f3Smrg } 2087ec681f3Smrg crocus_fine_fence_init(batch); 2097ec681f3Smrg 2107ec681f3Smrg batch->hw_ctx_id = crocus_create_hw_context(screen->bufmgr); 2117ec681f3Smrg assert(batch->hw_ctx_id); 2127ec681f3Smrg 2137ec681f3Smrg crocus_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority); 2147ec681f3Smrg 2157ec681f3Smrg batch->valid_reloc_flags = EXEC_OBJECT_WRITE; 2167ec681f3Smrg if (devinfo->ver == 6) 2177ec681f3Smrg batch->valid_reloc_flags |= EXEC_OBJECT_NEEDS_GTT; 2187ec681f3Smrg 2197ec681f3Smrg if (INTEL_DEBUG(DEBUG_BATCH)) { 2207ec681f3Smrg /* The shadow doesn't get relocs written so state decode fails. */ 2217ec681f3Smrg batch->use_shadow_copy = false; 2227ec681f3Smrg } else 2237ec681f3Smrg batch->use_shadow_copy = !devinfo->has_llc; 2247ec681f3Smrg 2257ec681f3Smrg util_dynarray_init(&batch->exec_fences, ralloc_context(NULL)); 2267ec681f3Smrg util_dynarray_init(&batch->syncobjs, ralloc_context(NULL)); 2277ec681f3Smrg 2287ec681f3Smrg init_reloc_list(&batch->command.relocs, 250); 2297ec681f3Smrg init_reloc_list(&batch->state.relocs, 250); 2307ec681f3Smrg 2317ec681f3Smrg batch->exec_count = 0; 2327ec681f3Smrg batch->exec_array_size = 100; 2337ec681f3Smrg batch->exec_bos = 2347ec681f3Smrg malloc(batch->exec_array_size * sizeof(batch->exec_bos[0])); 2357ec681f3Smrg batch->validation_list = 2367ec681f3Smrg malloc(batch->exec_array_size * sizeof(batch->validation_list[0])); 2377ec681f3Smrg 2387ec681f3Smrg batch->cache.render = _mesa_hash_table_create(NULL, NULL, 2397ec681f3Smrg _mesa_key_pointer_equal); 2407ec681f3Smrg batch->cache.depth = _mesa_set_create(NULL, NULL, 2417ec681f3Smrg _mesa_key_pointer_equal); 2427ec681f3Smrg 2437ec681f3Smrg memset(batch->other_batches, 0, sizeof(batch->other_batches)); 2447ec681f3Smrg 2457ec681f3Smrg for (int i = 0, j = 0; i < ice->batch_count; i++) { 2467ec681f3Smrg if (i != name) 2477ec681f3Smrg batch->other_batches[j++] = &ice->batches[i]; 2487ec681f3Smrg } 2497ec681f3Smrg 2507ec681f3Smrg if (INTEL_DEBUG(DEBUG_BATCH)) { 2517ec681f3Smrg 2527ec681f3Smrg batch->state_sizes = _mesa_hash_table_u64_create(NULL); 2537ec681f3Smrg const unsigned decode_flags = 2547ec681f3Smrg INTEL_BATCH_DECODE_FULL | 2557ec681f3Smrg (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) | 2567ec681f3Smrg INTEL_BATCH_DECODE_OFFSETS | INTEL_BATCH_DECODE_FLOATS; 2577ec681f3Smrg 2587ec681f3Smrg intel_batch_decode_ctx_init(&batch->decoder, &screen->devinfo, stderr, 2597ec681f3Smrg decode_flags, NULL, decode_get_bo, 2607ec681f3Smrg decode_get_state_size, batch); 2617ec681f3Smrg batch->decoder.max_vbo_decoded_lines = 32; 2627ec681f3Smrg } 2637ec681f3Smrg 2647ec681f3Smrg crocus_batch_reset(batch); 2657ec681f3Smrg} 2667ec681f3Smrg 2677ec681f3Smrgstatic int 2687ec681f3Smrgfind_exec_index(struct crocus_batch *batch, struct crocus_bo *bo) 2697ec681f3Smrg{ 2707ec681f3Smrg unsigned index = READ_ONCE(bo->index); 2717ec681f3Smrg 2727ec681f3Smrg if (index < batch->exec_count && batch->exec_bos[index] == bo) 2737ec681f3Smrg return index; 2747ec681f3Smrg 2757ec681f3Smrg /* May have been shared between multiple active batches */ 2767ec681f3Smrg for (index = 0; index < batch->exec_count; index++) { 2777ec681f3Smrg if (batch->exec_bos[index] == bo) 2787ec681f3Smrg return index; 2797ec681f3Smrg } 2807ec681f3Smrg return -1; 2817ec681f3Smrg} 2827ec681f3Smrg 2837ec681f3Smrgstatic struct drm_i915_gem_exec_object2 * 2847ec681f3Smrgfind_validation_entry(struct crocus_batch *batch, struct crocus_bo *bo) 2857ec681f3Smrg{ 2867ec681f3Smrg int index = find_exec_index(batch, bo); 2877ec681f3Smrg 2887ec681f3Smrg if (index == -1) 2897ec681f3Smrg return NULL; 2907ec681f3Smrg return &batch->validation_list[index]; 2917ec681f3Smrg} 2927ec681f3Smrg 2937ec681f3Smrgstatic void 2947ec681f3Smrgensure_exec_obj_space(struct crocus_batch *batch, uint32_t count) 2957ec681f3Smrg{ 2967ec681f3Smrg while (batch->exec_count + count > batch->exec_array_size) { 2977ec681f3Smrg batch->exec_array_size *= 2; 2987ec681f3Smrg batch->exec_bos = realloc( 2997ec681f3Smrg batch->exec_bos, batch->exec_array_size * sizeof(batch->exec_bos[0])); 3007ec681f3Smrg batch->validation_list = 3017ec681f3Smrg realloc(batch->validation_list, 3027ec681f3Smrg batch->exec_array_size * sizeof(batch->validation_list[0])); 3037ec681f3Smrg } 3047ec681f3Smrg} 3057ec681f3Smrg 3067ec681f3Smrgstatic struct drm_i915_gem_exec_object2 * 3077ec681f3Smrgcrocus_use_bo(struct crocus_batch *batch, struct crocus_bo *bo, bool writable) 3087ec681f3Smrg{ 3097ec681f3Smrg assert(bo->bufmgr == batch->command.bo->bufmgr); 3107ec681f3Smrg 3117ec681f3Smrg struct drm_i915_gem_exec_object2 *existing_entry = 3127ec681f3Smrg find_validation_entry(batch, bo); 3137ec681f3Smrg 3147ec681f3Smrg if (existing_entry) { 3157ec681f3Smrg /* The BO is already in the validation list; mark it writable */ 3167ec681f3Smrg if (writable) 3177ec681f3Smrg existing_entry->flags |= EXEC_OBJECT_WRITE; 3187ec681f3Smrg return existing_entry; 3197ec681f3Smrg } 3207ec681f3Smrg 3217ec681f3Smrg if (bo != batch->command.bo && bo != batch->state.bo) { 3227ec681f3Smrg /* This is the first time our batch has seen this BO. Before we use it, 3237ec681f3Smrg * we may need to flush and synchronize with other batches. 3247ec681f3Smrg */ 3257ec681f3Smrg for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) { 3267ec681f3Smrg 3277ec681f3Smrg if (!batch->other_batches[b]) 3287ec681f3Smrg continue; 3297ec681f3Smrg struct drm_i915_gem_exec_object2 *other_entry = 3307ec681f3Smrg find_validation_entry(batch->other_batches[b], bo); 3317ec681f3Smrg 3327ec681f3Smrg /* If the buffer is referenced by another batch, and either batch 3337ec681f3Smrg * intends to write it, then flush the other batch and synchronize. 3347ec681f3Smrg * 3357ec681f3Smrg * Consider these cases: 3367ec681f3Smrg * 3377ec681f3Smrg * 1. They read, we read => No synchronization required. 3387ec681f3Smrg * 2. They read, we write => Synchronize (they need the old value) 3397ec681f3Smrg * 3. They write, we read => Synchronize (we need their new value) 3407ec681f3Smrg * 4. They write, we write => Synchronize (order writes) 3417ec681f3Smrg * 3427ec681f3Smrg * The read/read case is very common, as multiple batches usually 3437ec681f3Smrg * share a streaming state buffer or shader assembly buffer, and 3447ec681f3Smrg * we want to avoid synchronizing in this case. 3457ec681f3Smrg */ 3467ec681f3Smrg if (other_entry && 3477ec681f3Smrg ((other_entry->flags & EXEC_OBJECT_WRITE) || writable)) { 3487ec681f3Smrg crocus_batch_flush(batch->other_batches[b]); 3497ec681f3Smrg crocus_batch_add_syncobj(batch, 3507ec681f3Smrg batch->other_batches[b]->last_fence->syncobj, 3517ec681f3Smrg I915_EXEC_FENCE_WAIT); 3527ec681f3Smrg } 3537ec681f3Smrg } 3547ec681f3Smrg } 3557ec681f3Smrg 3567ec681f3Smrg /* Bump the ref count since the batch is now using this bo. */ 3577ec681f3Smrg crocus_bo_reference(bo); 3587ec681f3Smrg 3597ec681f3Smrg ensure_exec_obj_space(batch, 1); 3607ec681f3Smrg 3617ec681f3Smrg batch->validation_list[batch->exec_count] = 3627ec681f3Smrg (struct drm_i915_gem_exec_object2) { 3637ec681f3Smrg .handle = bo->gem_handle, 3647ec681f3Smrg .offset = bo->gtt_offset, 3657ec681f3Smrg .flags = bo->kflags | (writable ? EXEC_OBJECT_WRITE : 0), 3667ec681f3Smrg }; 3677ec681f3Smrg 3687ec681f3Smrg bo->index = batch->exec_count; 3697ec681f3Smrg batch->exec_bos[batch->exec_count] = bo; 3707ec681f3Smrg batch->aperture_space += bo->size; 3717ec681f3Smrg 3727ec681f3Smrg batch->exec_count++; 3737ec681f3Smrg 3747ec681f3Smrg return &batch->validation_list[batch->exec_count - 1]; 3757ec681f3Smrg} 3767ec681f3Smrg 3777ec681f3Smrgstatic uint64_t 3787ec681f3Smrgemit_reloc(struct crocus_batch *batch, 3797ec681f3Smrg struct crocus_reloc_list *rlist, uint32_t offset, 3807ec681f3Smrg struct crocus_bo *target, int32_t target_offset, 3817ec681f3Smrg unsigned int reloc_flags) 3827ec681f3Smrg{ 3837ec681f3Smrg assert(target != NULL); 3847ec681f3Smrg 3857ec681f3Smrg if (target == batch->ice->workaround_bo) 3867ec681f3Smrg reloc_flags &= ~RELOC_WRITE; 3877ec681f3Smrg 3887ec681f3Smrg bool writable = reloc_flags & RELOC_WRITE; 3897ec681f3Smrg 3907ec681f3Smrg struct drm_i915_gem_exec_object2 *entry = 3917ec681f3Smrg crocus_use_bo(batch, target, writable); 3927ec681f3Smrg 3937ec681f3Smrg if (rlist->reloc_count == rlist->reloc_array_size) { 3947ec681f3Smrg rlist->reloc_array_size *= 2; 3957ec681f3Smrg rlist->relocs = realloc(rlist->relocs, 3967ec681f3Smrg rlist->reloc_array_size * 3977ec681f3Smrg sizeof(struct drm_i915_gem_relocation_entry)); 3987ec681f3Smrg } 3997ec681f3Smrg 4007ec681f3Smrg if (reloc_flags & RELOC_32BIT) { 4017ec681f3Smrg /* Restrict this buffer to the low 32 bits of the address space. 4027ec681f3Smrg * 4037ec681f3Smrg * Altering the validation list flags restricts it for this batch, 4047ec681f3Smrg * but we also alter the BO's kflags to restrict it permanently 4057ec681f3Smrg * (until the BO is destroyed and put back in the cache). Buffers 4067ec681f3Smrg * may stay bound across batches, and we want keep it constrained. 4077ec681f3Smrg */ 4087ec681f3Smrg target->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 4097ec681f3Smrg entry->flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 4107ec681f3Smrg 4117ec681f3Smrg /* RELOC_32BIT is not an EXEC_OBJECT_* flag, so get rid of it. */ 4127ec681f3Smrg reloc_flags &= ~RELOC_32BIT; 4137ec681f3Smrg } 4147ec681f3Smrg 4157ec681f3Smrg if (reloc_flags) 4167ec681f3Smrg entry->flags |= reloc_flags & batch->valid_reloc_flags; 4177ec681f3Smrg 4187ec681f3Smrg rlist->relocs[rlist->reloc_count++] = 4197ec681f3Smrg (struct drm_i915_gem_relocation_entry) { 4207ec681f3Smrg .offset = offset, 4217ec681f3Smrg .delta = target_offset, 4227ec681f3Smrg .target_handle = find_exec_index(batch, target), 4237ec681f3Smrg .presumed_offset = entry->offset, 4247ec681f3Smrg }; 4257ec681f3Smrg 4267ec681f3Smrg /* Using the old buffer offset, write in what the right data would be, in 4277ec681f3Smrg * case the buffer doesn't move and we can short-circuit the relocation 4287ec681f3Smrg * processing in the kernel 4297ec681f3Smrg */ 4307ec681f3Smrg return entry->offset + target_offset; 4317ec681f3Smrg} 4327ec681f3Smrg 4337ec681f3Smrguint64_t 4347ec681f3Smrgcrocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset, 4357ec681f3Smrg struct crocus_bo *target, uint32_t target_offset, 4367ec681f3Smrg unsigned int reloc_flags) 4377ec681f3Smrg{ 4387ec681f3Smrg assert(batch_offset <= batch->command.bo->size - sizeof(uint32_t)); 4397ec681f3Smrg 4407ec681f3Smrg return emit_reloc(batch, &batch->command.relocs, batch_offset, 4417ec681f3Smrg target, target_offset, reloc_flags); 4427ec681f3Smrg} 4437ec681f3Smrg 4447ec681f3Smrguint64_t 4457ec681f3Smrgcrocus_state_reloc(struct crocus_batch *batch, uint32_t state_offset, 4467ec681f3Smrg struct crocus_bo *target, uint32_t target_offset, 4477ec681f3Smrg unsigned int reloc_flags) 4487ec681f3Smrg{ 4497ec681f3Smrg assert(state_offset <= batch->state.bo->size - sizeof(uint32_t)); 4507ec681f3Smrg 4517ec681f3Smrg return emit_reloc(batch, &batch->state.relocs, state_offset, 4527ec681f3Smrg target, target_offset, reloc_flags); 4537ec681f3Smrg} 4547ec681f3Smrg 4557ec681f3Smrgstatic void 4567ec681f3Smrgrecreate_growing_buffer(struct crocus_batch *batch, 4577ec681f3Smrg struct crocus_growing_bo *grow, 4587ec681f3Smrg const char *name, unsigned size) 4597ec681f3Smrg{ 4607ec681f3Smrg struct crocus_screen *screen = batch->screen; 4617ec681f3Smrg struct crocus_bufmgr *bufmgr = screen->bufmgr; 4627ec681f3Smrg grow->bo = crocus_bo_alloc(bufmgr, name, size); 4637ec681f3Smrg grow->bo->kflags |= EXEC_OBJECT_CAPTURE; 4647ec681f3Smrg grow->partial_bo = NULL; 4657ec681f3Smrg grow->partial_bo_map = NULL; 4667ec681f3Smrg grow->partial_bytes = 0; 4677ec681f3Smrg if (batch->use_shadow_copy) 4687ec681f3Smrg grow->map = realloc(grow->map, grow->bo->size); 4697ec681f3Smrg else 4707ec681f3Smrg grow->map = crocus_bo_map(NULL, grow->bo, MAP_READ | MAP_WRITE); 4717ec681f3Smrg grow->map_next = grow->map; 4727ec681f3Smrg} 4737ec681f3Smrg 4747ec681f3Smrgstatic void 4757ec681f3Smrgcreate_batch(struct crocus_batch *batch) 4767ec681f3Smrg{ 4777ec681f3Smrg struct crocus_screen *screen = batch->screen; 4787ec681f3Smrg 4797ec681f3Smrg recreate_growing_buffer(batch, &batch->command, 4807ec681f3Smrg "command buffer", 4817ec681f3Smrg BATCH_SZ + BATCH_RESERVED(&screen->devinfo)); 4827ec681f3Smrg 4837ec681f3Smrg crocus_use_bo(batch, batch->command.bo, false); 4847ec681f3Smrg 4857ec681f3Smrg /* Always add workaround_bo which contains a driver identifier to be 4867ec681f3Smrg * recorded in error states. 4877ec681f3Smrg */ 4887ec681f3Smrg crocus_use_bo(batch, batch->ice->workaround_bo, false); 4897ec681f3Smrg 4907ec681f3Smrg recreate_growing_buffer(batch, &batch->state, 4917ec681f3Smrg "state buffer", 4927ec681f3Smrg STATE_SZ); 4937ec681f3Smrg 4947ec681f3Smrg batch->state.used = 1; 4957ec681f3Smrg crocus_use_bo(batch, batch->state.bo, false); 4967ec681f3Smrg} 4977ec681f3Smrg 4987ec681f3Smrgstatic void 4997ec681f3Smrgcrocus_batch_maybe_noop(struct crocus_batch *batch) 5007ec681f3Smrg{ 5017ec681f3Smrg /* We only insert the NOOP at the beginning of the batch. */ 5027ec681f3Smrg assert(crocus_batch_bytes_used(batch) == 0); 5037ec681f3Smrg 5047ec681f3Smrg if (batch->noop_enabled) { 5057ec681f3Smrg /* Emit MI_BATCH_BUFFER_END to prevent any further command to be 5067ec681f3Smrg * executed. 5077ec681f3Smrg */ 5087ec681f3Smrg uint32_t *map = batch->command.map_next; 5097ec681f3Smrg 5107ec681f3Smrg map[0] = (0xA << 23); 5117ec681f3Smrg 5127ec681f3Smrg batch->command.map_next += 4; 5137ec681f3Smrg } 5147ec681f3Smrg} 5157ec681f3Smrg 5167ec681f3Smrgstatic void 5177ec681f3Smrgcrocus_batch_reset(struct crocus_batch *batch) 5187ec681f3Smrg{ 5197ec681f3Smrg struct crocus_screen *screen = batch->screen; 5207ec681f3Smrg 5217ec681f3Smrg crocus_bo_unreference(batch->command.bo); 5227ec681f3Smrg crocus_bo_unreference(batch->state.bo); 5237ec681f3Smrg batch->primary_batch_size = 0; 5247ec681f3Smrg batch->contains_draw = false; 5257ec681f3Smrg batch->contains_fence_signal = false; 5267ec681f3Smrg batch->state_base_address_emitted = false; 5277ec681f3Smrg batch->screen->vtbl.batch_reset_dirty(batch); 5287ec681f3Smrg 5297ec681f3Smrg create_batch(batch); 5307ec681f3Smrg assert(batch->command.bo->index == 0); 5317ec681f3Smrg 5327ec681f3Smrg if (batch->state_sizes) 5337ec681f3Smrg _mesa_hash_table_u64_clear(batch->state_sizes); 5347ec681f3Smrg struct crocus_syncobj *syncobj = crocus_create_syncobj(screen); 5357ec681f3Smrg crocus_batch_add_syncobj(batch, syncobj, I915_EXEC_FENCE_SIGNAL); 5367ec681f3Smrg crocus_syncobj_reference(screen, &syncobj, NULL); 5377ec681f3Smrg 5387ec681f3Smrg crocus_cache_sets_clear(batch); 5397ec681f3Smrg} 5407ec681f3Smrg 5417ec681f3Smrgvoid 5427ec681f3Smrgcrocus_batch_free(struct crocus_batch *batch) 5437ec681f3Smrg{ 5447ec681f3Smrg struct crocus_screen *screen = batch->screen; 5457ec681f3Smrg struct crocus_bufmgr *bufmgr = screen->bufmgr; 5467ec681f3Smrg 5477ec681f3Smrg if (batch->use_shadow_copy) { 5487ec681f3Smrg free(batch->command.map); 5497ec681f3Smrg free(batch->state.map); 5507ec681f3Smrg } 5517ec681f3Smrg 5527ec681f3Smrg for (int i = 0; i < batch->exec_count; i++) { 5537ec681f3Smrg crocus_bo_unreference(batch->exec_bos[i]); 5547ec681f3Smrg } 5557ec681f3Smrg 5567ec681f3Smrg pipe_resource_reference(&batch->fine_fences.ref.res, NULL); 5577ec681f3Smrg 5587ec681f3Smrg free(batch->command.relocs.relocs); 5597ec681f3Smrg free(batch->state.relocs.relocs); 5607ec681f3Smrg free(batch->exec_bos); 5617ec681f3Smrg free(batch->validation_list); 5627ec681f3Smrg 5637ec681f3Smrg ralloc_free(batch->exec_fences.mem_ctx); 5647ec681f3Smrg 5657ec681f3Smrg util_dynarray_foreach(&batch->syncobjs, struct crocus_syncobj *, s) 5667ec681f3Smrg crocus_syncobj_reference(screen, s, NULL); 5677ec681f3Smrg ralloc_free(batch->syncobjs.mem_ctx); 5687ec681f3Smrg 5697ec681f3Smrg crocus_fine_fence_reference(batch->screen, &batch->last_fence, NULL); 5707ec681f3Smrg if (batch_has_fine_fence(batch)) 5717ec681f3Smrg u_upload_destroy(batch->fine_fences.uploader); 5727ec681f3Smrg 5737ec681f3Smrg crocus_bo_unreference(batch->command.bo); 5747ec681f3Smrg crocus_bo_unreference(batch->state.bo); 5757ec681f3Smrg batch->command.bo = NULL; 5767ec681f3Smrg batch->command.map = NULL; 5777ec681f3Smrg batch->command.map_next = NULL; 5787ec681f3Smrg 5797ec681f3Smrg crocus_destroy_hw_context(bufmgr, batch->hw_ctx_id); 5807ec681f3Smrg 5817ec681f3Smrg _mesa_hash_table_destroy(batch->cache.render, NULL); 5827ec681f3Smrg _mesa_set_destroy(batch->cache.depth, NULL); 5837ec681f3Smrg 5847ec681f3Smrg if (batch->state_sizes) { 5857ec681f3Smrg _mesa_hash_table_u64_destroy(batch->state_sizes); 5867ec681f3Smrg intel_batch_decode_ctx_finish(&batch->decoder); 5877ec681f3Smrg } 5887ec681f3Smrg} 5897ec681f3Smrg 5907ec681f3Smrg/** 5917ec681f3Smrg * If we've chained to a secondary batch, or are getting near to the end, 5927ec681f3Smrg * then flush. This should only be called between draws. 5937ec681f3Smrg */ 5947ec681f3Smrgvoid 5957ec681f3Smrgcrocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate) 5967ec681f3Smrg{ 5977ec681f3Smrg if (batch->command.bo != batch->exec_bos[0] || 5987ec681f3Smrg crocus_batch_bytes_used(batch) + estimate >= BATCH_SZ) { 5997ec681f3Smrg crocus_batch_flush(batch); 6007ec681f3Smrg } 6017ec681f3Smrg} 6027ec681f3Smrg 6037ec681f3Smrg/** 6047ec681f3Smrg * Finish copying the old batch/state buffer's contents to the new one 6057ec681f3Smrg * after we tried to "grow" the buffer in an earlier operation. 6067ec681f3Smrg */ 6077ec681f3Smrgstatic void 6087ec681f3Smrgfinish_growing_bos(struct crocus_growing_bo *grow) 6097ec681f3Smrg{ 6107ec681f3Smrg struct crocus_bo *old_bo = grow->partial_bo; 6117ec681f3Smrg if (!old_bo) 6127ec681f3Smrg return; 6137ec681f3Smrg 6147ec681f3Smrg memcpy(grow->map, grow->partial_bo_map, grow->partial_bytes); 6157ec681f3Smrg 6167ec681f3Smrg grow->partial_bo = NULL; 6177ec681f3Smrg grow->partial_bo_map = NULL; 6187ec681f3Smrg grow->partial_bytes = 0; 6197ec681f3Smrg 6207ec681f3Smrg crocus_bo_unreference(old_bo); 6217ec681f3Smrg} 6227ec681f3Smrg 6237ec681f3Smrgvoid 6247ec681f3Smrgcrocus_grow_buffer(struct crocus_batch *batch, bool grow_state, 6257ec681f3Smrg unsigned used, 6267ec681f3Smrg unsigned new_size) 6277ec681f3Smrg{ 6287ec681f3Smrg struct crocus_screen *screen = batch->screen; 6297ec681f3Smrg struct crocus_bufmgr *bufmgr = screen->bufmgr; 6307ec681f3Smrg struct crocus_growing_bo *grow = grow_state ? &batch->state : &batch->command; 6317ec681f3Smrg struct crocus_bo *bo = grow->bo; 6327ec681f3Smrg 6337ec681f3Smrg if (grow->partial_bo) { 6347ec681f3Smrg /* We've already grown once, and now we need to do it again. 6357ec681f3Smrg * Finish our last grow operation so we can start a new one. 6367ec681f3Smrg * This should basically never happen. 6377ec681f3Smrg */ 6387ec681f3Smrg finish_growing_bos(grow); 6397ec681f3Smrg } 6407ec681f3Smrg 6417ec681f3Smrg struct crocus_bo *new_bo = crocus_bo_alloc(bufmgr, bo->name, new_size); 6427ec681f3Smrg 6437ec681f3Smrg /* Copy existing data to the new larger buffer */ 6447ec681f3Smrg grow->partial_bo_map = grow->map; 6457ec681f3Smrg 6467ec681f3Smrg if (batch->use_shadow_copy) { 6477ec681f3Smrg /* We can't safely use realloc, as it may move the existing buffer, 6487ec681f3Smrg * breaking existing pointers the caller may still be using. Just 6497ec681f3Smrg * malloc a new copy and memcpy it like the normal BO path. 6507ec681f3Smrg * 6517ec681f3Smrg * Use bo->size rather than new_size because the bufmgr may have 6527ec681f3Smrg * rounded up the size, and we want the shadow size to match. 6537ec681f3Smrg */ 6547ec681f3Smrg grow->map = malloc(new_bo->size); 6557ec681f3Smrg } else { 6567ec681f3Smrg grow->map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE); 6577ec681f3Smrg } 6587ec681f3Smrg /* Try to put the new BO at the same GTT offset as the old BO (which 6597ec681f3Smrg * we're throwing away, so it doesn't need to be there). 6607ec681f3Smrg * 6617ec681f3Smrg * This guarantees that our relocations continue to work: values we've 6627ec681f3Smrg * already written into the buffer, values we're going to write into the 6637ec681f3Smrg * buffer, and the validation/relocation lists all will match. 6647ec681f3Smrg * 6657ec681f3Smrg * Also preserve kflags for EXEC_OBJECT_CAPTURE. 6667ec681f3Smrg */ 6677ec681f3Smrg new_bo->gtt_offset = bo->gtt_offset; 6687ec681f3Smrg new_bo->index = bo->index; 6697ec681f3Smrg new_bo->kflags = bo->kflags; 6707ec681f3Smrg 6717ec681f3Smrg /* Batch/state buffers are per-context, and if we've run out of space, 6727ec681f3Smrg * we must have actually used them before, so...they will be in the list. 6737ec681f3Smrg */ 6747ec681f3Smrg assert(bo->index < batch->exec_count); 6757ec681f3Smrg assert(batch->exec_bos[bo->index] == bo); 6767ec681f3Smrg 6777ec681f3Smrg /* Update the validation list to use the new BO. */ 6787ec681f3Smrg batch->validation_list[bo->index].handle = new_bo->gem_handle; 6797ec681f3Smrg /* Exchange the two BOs...without breaking pointers to the old BO. 6807ec681f3Smrg * 6817ec681f3Smrg * Consider this scenario: 6827ec681f3Smrg * 6837ec681f3Smrg * 1. Somebody calls brw_state_batch() to get a region of memory, and 6847ec681f3Smrg * and then creates a brw_address pointing to brw->batch.state.bo. 6857ec681f3Smrg * 2. They then call brw_state_batch() a second time, which happens to 6867ec681f3Smrg * grow and replace the state buffer. They then try to emit a 6877ec681f3Smrg * relocation to their first section of memory. 6887ec681f3Smrg * 6897ec681f3Smrg * If we replace the brw->batch.state.bo pointer at step 2, we would 6907ec681f3Smrg * break the address created in step 1. They'd have a pointer to the 6917ec681f3Smrg * old destroyed BO. Emitting a relocation would add this dead BO to 6927ec681f3Smrg * the validation list...causing /both/ statebuffers to be in the list, 6937ec681f3Smrg * and all kinds of disasters. 6947ec681f3Smrg * 6957ec681f3Smrg * This is not a contrived case - BLORP vertex data upload hits this. 6967ec681f3Smrg * 6977ec681f3Smrg * There are worse scenarios too. Fences for GL sync objects reference 6987ec681f3Smrg * brw->batch.batch.bo. If we replaced the batch pointer when growing, 6997ec681f3Smrg * we'd need to chase down every fence and update it to point to the 7007ec681f3Smrg * new BO. Otherwise, it would refer to a "batch" that never actually 7017ec681f3Smrg * gets submitted, and would fail to trigger. 7027ec681f3Smrg * 7037ec681f3Smrg * To work around both of these issues, we transmutate the buffers in 7047ec681f3Smrg * place, making the existing struct brw_bo represent the new buffer, 7057ec681f3Smrg * and "new_bo" represent the old BO. This is highly unusual, but it 7067ec681f3Smrg * seems like a necessary evil. 7077ec681f3Smrg * 7087ec681f3Smrg * We also defer the memcpy of the existing batch's contents. Callers 7097ec681f3Smrg * may make multiple brw_state_batch calls, and retain pointers to the 7107ec681f3Smrg * old BO's map. We'll perform the memcpy in finish_growing_bo() when 7117ec681f3Smrg * we finally submit the batch, at which point we've finished uploading 7127ec681f3Smrg * state, and nobody should have any old references anymore. 7137ec681f3Smrg * 7147ec681f3Smrg * To do that, we keep a reference to the old BO in grow->partial_bo, 7157ec681f3Smrg * and store the number of bytes to copy in grow->partial_bytes. We 7167ec681f3Smrg * can monkey with the refcounts directly without atomics because these 7177ec681f3Smrg * are per-context BOs and they can only be touched by this thread. 7187ec681f3Smrg */ 7197ec681f3Smrg assert(new_bo->refcount == 1); 7207ec681f3Smrg new_bo->refcount = bo->refcount; 7217ec681f3Smrg bo->refcount = 1; 7227ec681f3Smrg 7237ec681f3Smrg struct crocus_bo tmp; 7247ec681f3Smrg memcpy(&tmp, bo, sizeof(struct crocus_bo)); 7257ec681f3Smrg memcpy(bo, new_bo, sizeof(struct crocus_bo)); 7267ec681f3Smrg memcpy(new_bo, &tmp, sizeof(struct crocus_bo)); 7277ec681f3Smrg 7287ec681f3Smrg grow->partial_bo = new_bo; /* the one reference of the OLD bo */ 7297ec681f3Smrg grow->partial_bytes = used; 7307ec681f3Smrg} 7317ec681f3Smrg 7327ec681f3Smrgstatic void 7337ec681f3Smrgfinish_seqno(struct crocus_batch *batch) 7347ec681f3Smrg{ 7357ec681f3Smrg struct crocus_fine_fence *sq = crocus_fine_fence_new(batch, CROCUS_FENCE_END); 7367ec681f3Smrg if (!sq) 7377ec681f3Smrg return; 7387ec681f3Smrg 7397ec681f3Smrg crocus_fine_fence_reference(batch->screen, &batch->last_fence, sq); 7407ec681f3Smrg crocus_fine_fence_reference(batch->screen, &sq, NULL); 7417ec681f3Smrg} 7427ec681f3Smrg 7437ec681f3Smrg/** 7447ec681f3Smrg * Terminate a batch with MI_BATCH_BUFFER_END. 7457ec681f3Smrg */ 7467ec681f3Smrgstatic void 7477ec681f3Smrgcrocus_finish_batch(struct crocus_batch *batch) 7487ec681f3Smrg{ 7497ec681f3Smrg 7507ec681f3Smrg batch->no_wrap = true; 7517ec681f3Smrg if (batch->screen->vtbl.finish_batch) 7527ec681f3Smrg batch->screen->vtbl.finish_batch(batch); 7537ec681f3Smrg 7547ec681f3Smrg finish_seqno(batch); 7557ec681f3Smrg 7567ec681f3Smrg /* Emit MI_BATCH_BUFFER_END to finish our batch. */ 7577ec681f3Smrg uint32_t *map = batch->command.map_next; 7587ec681f3Smrg 7597ec681f3Smrg map[0] = (0xA << 23); 7607ec681f3Smrg 7617ec681f3Smrg batch->command.map_next += 4; 7627ec681f3Smrg VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->command.map, crocus_batch_bytes_used(batch))); 7637ec681f3Smrg 7647ec681f3Smrg if (batch->command.bo == batch->exec_bos[0]) 7657ec681f3Smrg batch->primary_batch_size = crocus_batch_bytes_used(batch); 7667ec681f3Smrg batch->no_wrap = false; 7677ec681f3Smrg} 7687ec681f3Smrg 7697ec681f3Smrg/** 7707ec681f3Smrg * Replace our current GEM context with a new one (in case it got banned). 7717ec681f3Smrg */ 7727ec681f3Smrgstatic bool 7737ec681f3Smrgreplace_hw_ctx(struct crocus_batch *batch) 7747ec681f3Smrg{ 7757ec681f3Smrg struct crocus_screen *screen = batch->screen; 7767ec681f3Smrg struct crocus_bufmgr *bufmgr = screen->bufmgr; 7777ec681f3Smrg 7787ec681f3Smrg uint32_t new_ctx = crocus_clone_hw_context(bufmgr, batch->hw_ctx_id); 7797ec681f3Smrg if (!new_ctx) 7807ec681f3Smrg return false; 7817ec681f3Smrg 7827ec681f3Smrg crocus_destroy_hw_context(bufmgr, batch->hw_ctx_id); 7837ec681f3Smrg batch->hw_ctx_id = new_ctx; 7847ec681f3Smrg 7857ec681f3Smrg /* Notify the context that state must be re-initialized. */ 7867ec681f3Smrg crocus_lost_context_state(batch); 7877ec681f3Smrg 7887ec681f3Smrg return true; 7897ec681f3Smrg} 7907ec681f3Smrg 7917ec681f3Smrgenum pipe_reset_status 7927ec681f3Smrgcrocus_batch_check_for_reset(struct crocus_batch *batch) 7937ec681f3Smrg{ 7947ec681f3Smrg struct crocus_screen *screen = batch->screen; 7957ec681f3Smrg enum pipe_reset_status status = PIPE_NO_RESET; 7967ec681f3Smrg struct drm_i915_reset_stats stats = { .ctx_id = batch->hw_ctx_id }; 7977ec681f3Smrg 7987ec681f3Smrg if (drmIoctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats)) 7997ec681f3Smrg DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno)); 8007ec681f3Smrg 8017ec681f3Smrg if (stats.batch_active != 0) { 8027ec681f3Smrg /* A reset was observed while a batch from this hardware context was 8037ec681f3Smrg * executing. Assume that this context was at fault. 8047ec681f3Smrg */ 8057ec681f3Smrg status = PIPE_GUILTY_CONTEXT_RESET; 8067ec681f3Smrg } else if (stats.batch_pending != 0) { 8077ec681f3Smrg /* A reset was observed while a batch from this context was in progress, 8087ec681f3Smrg * but the batch was not executing. In this case, assume that the 8097ec681f3Smrg * context was not at fault. 8107ec681f3Smrg */ 8117ec681f3Smrg status = PIPE_INNOCENT_CONTEXT_RESET; 8127ec681f3Smrg } 8137ec681f3Smrg 8147ec681f3Smrg if (status != PIPE_NO_RESET) { 8157ec681f3Smrg /* Our context is likely banned, or at least in an unknown state. 8167ec681f3Smrg * Throw it away and start with a fresh context. Ideally this may 8177ec681f3Smrg * catch the problem before our next execbuf fails with -EIO. 8187ec681f3Smrg */ 8197ec681f3Smrg replace_hw_ctx(batch); 8207ec681f3Smrg } 8217ec681f3Smrg 8227ec681f3Smrg return status; 8237ec681f3Smrg} 8247ec681f3Smrg 8257ec681f3Smrg/** 8267ec681f3Smrg * Submit the batch to the GPU via execbuffer2. 8277ec681f3Smrg */ 8287ec681f3Smrgstatic int 8297ec681f3Smrgsubmit_batch(struct crocus_batch *batch) 8307ec681f3Smrg{ 8317ec681f3Smrg 8327ec681f3Smrg if (batch->use_shadow_copy) { 8337ec681f3Smrg void *bo_map = crocus_bo_map(batch->dbg, batch->command.bo, MAP_WRITE); 8347ec681f3Smrg memcpy(bo_map, batch->command.map, crocus_batch_bytes_used(batch)); 8357ec681f3Smrg 8367ec681f3Smrg bo_map = crocus_bo_map(batch->dbg, batch->state.bo, MAP_WRITE); 8377ec681f3Smrg memcpy(bo_map, batch->state.map, batch->state.used); 8387ec681f3Smrg } 8397ec681f3Smrg 8407ec681f3Smrg crocus_bo_unmap(batch->command.bo); 8417ec681f3Smrg crocus_bo_unmap(batch->state.bo); 8427ec681f3Smrg 8437ec681f3Smrg /* The requirement for using I915_EXEC_NO_RELOC are: 8447ec681f3Smrg * 8457ec681f3Smrg * The addresses written in the objects must match the corresponding 8467ec681f3Smrg * reloc.gtt_offset which in turn must match the corresponding 8477ec681f3Smrg * execobject.offset. 8487ec681f3Smrg * 8497ec681f3Smrg * Any render targets written to in the batch must be flagged with 8507ec681f3Smrg * EXEC_OBJECT_WRITE. 8517ec681f3Smrg * 8527ec681f3Smrg * To avoid stalling, execobject.offset should match the current 8537ec681f3Smrg * address of that object within the active context. 8547ec681f3Smrg */ 8557ec681f3Smrg /* Set statebuffer relocations */ 8567ec681f3Smrg const unsigned state_index = batch->state.bo->index; 8577ec681f3Smrg if (state_index < batch->exec_count && 8587ec681f3Smrg batch->exec_bos[state_index] == batch->state.bo) { 8597ec681f3Smrg struct drm_i915_gem_exec_object2 *entry = 8607ec681f3Smrg &batch->validation_list[state_index]; 8617ec681f3Smrg assert(entry->handle == batch->state.bo->gem_handle); 8627ec681f3Smrg entry->relocation_count = batch->state.relocs.reloc_count; 8637ec681f3Smrg entry->relocs_ptr = (uintptr_t)batch->state.relocs.relocs; 8647ec681f3Smrg } 8657ec681f3Smrg 8667ec681f3Smrg /* Set batchbuffer relocations */ 8677ec681f3Smrg struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0]; 8687ec681f3Smrg assert(entry->handle == batch->command.bo->gem_handle); 8697ec681f3Smrg entry->relocation_count = batch->command.relocs.reloc_count; 8707ec681f3Smrg entry->relocs_ptr = (uintptr_t)batch->command.relocs.relocs; 8717ec681f3Smrg 8727ec681f3Smrg struct drm_i915_gem_execbuffer2 execbuf = { 8737ec681f3Smrg .buffers_ptr = (uintptr_t)batch->validation_list, 8747ec681f3Smrg .buffer_count = batch->exec_count, 8757ec681f3Smrg .batch_start_offset = 0, 8767ec681f3Smrg /* This must be QWord aligned. */ 8777ec681f3Smrg .batch_len = ALIGN(batch->primary_batch_size, 8), 8787ec681f3Smrg .flags = I915_EXEC_RENDER | 8797ec681f3Smrg I915_EXEC_NO_RELOC | 8807ec681f3Smrg I915_EXEC_BATCH_FIRST | 8817ec681f3Smrg I915_EXEC_HANDLE_LUT, 8827ec681f3Smrg .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */ 8837ec681f3Smrg }; 8847ec681f3Smrg 8857ec681f3Smrg if (num_fences(batch)) { 8867ec681f3Smrg execbuf.flags |= I915_EXEC_FENCE_ARRAY; 8877ec681f3Smrg execbuf.num_cliprects = num_fences(batch); 8887ec681f3Smrg execbuf.cliprects_ptr = 8897ec681f3Smrg (uintptr_t)util_dynarray_begin(&batch->exec_fences); 8907ec681f3Smrg } 8917ec681f3Smrg 8927ec681f3Smrg int ret = 0; 8937ec681f3Smrg if (!batch->screen->devinfo.no_hw && 8947ec681f3Smrg intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf)) 8957ec681f3Smrg ret = -errno; 8967ec681f3Smrg 8977ec681f3Smrg for (int i = 0; i < batch->exec_count; i++) { 8987ec681f3Smrg struct crocus_bo *bo = batch->exec_bos[i]; 8997ec681f3Smrg 9007ec681f3Smrg bo->idle = false; 9017ec681f3Smrg bo->index = -1; 9027ec681f3Smrg 9037ec681f3Smrg /* Update brw_bo::gtt_offset */ 9047ec681f3Smrg if (batch->validation_list[i].offset != bo->gtt_offset) { 9057ec681f3Smrg DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%" PRIx64 "\n", 9067ec681f3Smrg bo->gem_handle, bo->gtt_offset, 9077ec681f3Smrg (uint64_t)batch->validation_list[i].offset); 9087ec681f3Smrg assert(!(bo->kflags & EXEC_OBJECT_PINNED)); 9097ec681f3Smrg bo->gtt_offset = batch->validation_list[i].offset; 9107ec681f3Smrg } 9117ec681f3Smrg } 9127ec681f3Smrg 9137ec681f3Smrg return ret; 9147ec681f3Smrg} 9157ec681f3Smrg 9167ec681f3Smrgstatic const char * 9177ec681f3Smrgbatch_name_to_string(enum crocus_batch_name name) 9187ec681f3Smrg{ 9197ec681f3Smrg const char *names[CROCUS_BATCH_COUNT] = { 9207ec681f3Smrg [CROCUS_BATCH_RENDER] = "render", 9217ec681f3Smrg [CROCUS_BATCH_COMPUTE] = "compute", 9227ec681f3Smrg }; 9237ec681f3Smrg return names[name]; 9247ec681f3Smrg} 9257ec681f3Smrg 9267ec681f3Smrg/** 9277ec681f3Smrg * Flush the batch buffer, submitting it to the GPU and resetting it so 9287ec681f3Smrg * we're ready to emit the next batch. 9297ec681f3Smrg * 9307ec681f3Smrg * \param in_fence_fd is ignored if -1. Otherwise, this function takes 9317ec681f3Smrg * ownership of the fd. 9327ec681f3Smrg * 9337ec681f3Smrg * \param out_fence_fd is ignored if NULL. Otherwise, the caller must 9347ec681f3Smrg * take ownership of the returned fd. 9357ec681f3Smrg */ 9367ec681f3Smrgvoid 9377ec681f3Smrg_crocus_batch_flush(struct crocus_batch *batch, const char *file, int line) 9387ec681f3Smrg{ 9397ec681f3Smrg struct crocus_screen *screen = batch->screen; 9407ec681f3Smrg 9417ec681f3Smrg /* If a fence signals we need to flush it. */ 9427ec681f3Smrg if (crocus_batch_bytes_used(batch) == 0 && !batch->contains_fence_signal) 9437ec681f3Smrg return; 9447ec681f3Smrg 9457ec681f3Smrg assert(!batch->no_wrap); 9467ec681f3Smrg crocus_finish_batch(batch); 9477ec681f3Smrg 9487ec681f3Smrg finish_growing_bos(&batch->command); 9497ec681f3Smrg finish_growing_bos(&batch->state); 9507ec681f3Smrg int ret = submit_batch(batch); 9517ec681f3Smrg 9527ec681f3Smrg if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL)) { 9537ec681f3Smrg int bytes_for_commands = crocus_batch_bytes_used(batch); 9547ec681f3Smrg int second_bytes = 0; 9557ec681f3Smrg if (batch->command.bo != batch->exec_bos[0]) { 9567ec681f3Smrg second_bytes = bytes_for_commands; 9577ec681f3Smrg bytes_for_commands += batch->primary_batch_size; 9587ec681f3Smrg } 9597ec681f3Smrg fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5d+%5db (%0.1f%%) " 9607ec681f3Smrg "(cmds), %4d BOs (%0.1fMb aperture)," 9617ec681f3Smrg " %4d command relocs, %4d state relocs\n", 9627ec681f3Smrg file, line, batch_name_to_string(batch->name), batch->hw_ctx_id, 9637ec681f3Smrg batch->primary_batch_size, second_bytes, 9647ec681f3Smrg 100.0f * bytes_for_commands / BATCH_SZ, 9657ec681f3Smrg batch->exec_count, 9667ec681f3Smrg (float) batch->aperture_space / (1024 * 1024), 9677ec681f3Smrg batch->command.relocs.reloc_count, 9687ec681f3Smrg batch->state.relocs.reloc_count); 9697ec681f3Smrg 9707ec681f3Smrg if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT)) { 9717ec681f3Smrg dump_fence_list(batch); 9727ec681f3Smrg dump_validation_list(batch); 9737ec681f3Smrg } 9747ec681f3Smrg 9757ec681f3Smrg if (INTEL_DEBUG(DEBUG_BATCH)) { 9767ec681f3Smrg decode_batch(batch); 9777ec681f3Smrg } 9787ec681f3Smrg } 9797ec681f3Smrg 9807ec681f3Smrg for (int i = 0; i < batch->exec_count; i++) { 9817ec681f3Smrg struct crocus_bo *bo = batch->exec_bos[i]; 9827ec681f3Smrg crocus_bo_unreference(bo); 9837ec681f3Smrg } 9847ec681f3Smrg 9857ec681f3Smrg batch->command.relocs.reloc_count = 0; 9867ec681f3Smrg batch->state.relocs.reloc_count = 0; 9877ec681f3Smrg batch->exec_count = 0; 9887ec681f3Smrg batch->aperture_space = 0; 9897ec681f3Smrg 9907ec681f3Smrg util_dynarray_foreach(&batch->syncobjs, struct crocus_syncobj *, s) 9917ec681f3Smrg crocus_syncobj_reference(screen, s, NULL); 9927ec681f3Smrg util_dynarray_clear(&batch->syncobjs); 9937ec681f3Smrg 9947ec681f3Smrg util_dynarray_clear(&batch->exec_fences); 9957ec681f3Smrg 9967ec681f3Smrg if (INTEL_DEBUG(DEBUG_SYNC)) { 9977ec681f3Smrg dbg_printf("waiting for idle\n"); 9987ec681f3Smrg crocus_bo_wait_rendering(batch->command.bo); /* if execbuf failed; this is a nop */ 9997ec681f3Smrg } 10007ec681f3Smrg 10017ec681f3Smrg /* Start a new batch buffer. */ 10027ec681f3Smrg crocus_batch_reset(batch); 10037ec681f3Smrg 10047ec681f3Smrg /* EIO means our context is banned. In this case, try and replace it 10057ec681f3Smrg * with a new logical context, and inform crocus_context that all state 10067ec681f3Smrg * has been lost and needs to be re-initialized. If this succeeds, 10077ec681f3Smrg * dubiously claim success... 10087ec681f3Smrg */ 10097ec681f3Smrg if (ret == -EIO && replace_hw_ctx(batch)) { 10107ec681f3Smrg if (batch->reset->reset) { 10117ec681f3Smrg /* Tell the state tracker the device is lost and it was our fault. */ 10127ec681f3Smrg batch->reset->reset(batch->reset->data, PIPE_GUILTY_CONTEXT_RESET); 10137ec681f3Smrg } 10147ec681f3Smrg 10157ec681f3Smrg ret = 0; 10167ec681f3Smrg } 10177ec681f3Smrg 10187ec681f3Smrg if (ret < 0) { 10197ec681f3Smrg#ifdef DEBUG 10207ec681f3Smrg const bool color = INTEL_DEBUG(DEBUG_COLOR); 10217ec681f3Smrg fprintf(stderr, "%scrocus: Failed to submit batchbuffer: %-80s%s\n", 10227ec681f3Smrg color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : ""); 10237ec681f3Smrg#endif 10247ec681f3Smrg abort(); 10257ec681f3Smrg } 10267ec681f3Smrg} 10277ec681f3Smrg 10287ec681f3Smrg/** 10297ec681f3Smrg * Does the current batch refer to the given BO? 10307ec681f3Smrg * 10317ec681f3Smrg * (In other words, is the BO in the current batch's validation list?) 10327ec681f3Smrg */ 10337ec681f3Smrgbool 10347ec681f3Smrgcrocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo) 10357ec681f3Smrg{ 10367ec681f3Smrg return find_validation_entry(batch, bo) != NULL; 10377ec681f3Smrg} 10387ec681f3Smrg 10397ec681f3Smrg/** 10407ec681f3Smrg * Updates the state of the noop feature. Returns true if there was a noop 10417ec681f3Smrg * transition that led to state invalidation. 10427ec681f3Smrg */ 10437ec681f3Smrgbool 10447ec681f3Smrgcrocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable) 10457ec681f3Smrg{ 10467ec681f3Smrg if (batch->noop_enabled == noop_enable) 10477ec681f3Smrg return 0; 10487ec681f3Smrg 10497ec681f3Smrg batch->noop_enabled = noop_enable; 10507ec681f3Smrg 10517ec681f3Smrg crocus_batch_flush(batch); 10527ec681f3Smrg 10537ec681f3Smrg /* If the batch was empty, flush had no effect, so insert our noop. */ 10547ec681f3Smrg if (crocus_batch_bytes_used(batch) == 0) 10557ec681f3Smrg crocus_batch_maybe_noop(batch); 10567ec681f3Smrg 10577ec681f3Smrg /* We only need to update the entire state if we transition from noop -> 10587ec681f3Smrg * not-noop. 10597ec681f3Smrg */ 10607ec681f3Smrg return !batch->noop_enabled; 10617ec681f3Smrg} 1062