17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2017 Intel Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice shall be included
127ec681f3Smrg * in all copies or substantial portions of the Software.
137ec681f3Smrg *
147ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
157ec681f3Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
167ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
177ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
187ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
197ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
207ec681f3Smrg * DEALINGS IN THE SOFTWARE.
217ec681f3Smrg */
227ec681f3Smrg
237ec681f3Smrg/**
247ec681f3Smrg * @file crocus_batch.c
257ec681f3Smrg *
267ec681f3Smrg * Batchbuffer and command submission module.
277ec681f3Smrg *
287ec681f3Smrg * Every API draw call results in a number of GPU commands, which we
297ec681f3Smrg * collect into a "batch buffer".  Typically, many draw calls are grouped
307ec681f3Smrg * into a single batch to amortize command submission overhead.
317ec681f3Smrg *
327ec681f3Smrg * We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl.
337ec681f3Smrg * One critical piece of data is the "validation list", which contains a
347ec681f3Smrg * list of the buffer objects (BOs) which the commands in the GPU need.
357ec681f3Smrg * The kernel will make sure these are resident and pinned at the correct
367ec681f3Smrg * virtual memory address before executing our batch.  If a BO is not in
377ec681f3Smrg * the validation list, it effectively does not exist, so take care.
387ec681f3Smrg */
397ec681f3Smrg
407ec681f3Smrg#include "crocus_batch.h"
417ec681f3Smrg#include "crocus_bufmgr.h"
427ec681f3Smrg#include "crocus_context.h"
437ec681f3Smrg#include "crocus_fence.h"
447ec681f3Smrg
457ec681f3Smrg#include "drm-uapi/i915_drm.h"
467ec681f3Smrg
477ec681f3Smrg#include "intel/common/intel_gem.h"
487ec681f3Smrg#include "main/macros.h"
497ec681f3Smrg#include "util/hash_table.h"
507ec681f3Smrg#include "util/set.h"
517ec681f3Smrg#include "util/u_upload_mgr.h"
527ec681f3Smrg
537ec681f3Smrg#include <errno.h>
547ec681f3Smrg#include <xf86drm.h>
557ec681f3Smrg
567ec681f3Smrg#if HAVE_VALGRIND
577ec681f3Smrg#include <memcheck.h>
587ec681f3Smrg#include <valgrind.h>
597ec681f3Smrg#define VG(x) x
607ec681f3Smrg#else
617ec681f3Smrg#define VG(x)
627ec681f3Smrg#endif
637ec681f3Smrg
647ec681f3Smrg#define FILE_DEBUG_FLAG DEBUG_BUFMGR
657ec681f3Smrg
667ec681f3Smrg/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END
677ec681f3Smrg * or 12 bytes for MI_BATCH_BUFFER_START (when chaining).  Plus, we may
687ec681f3Smrg * need an extra 4 bytes to pad out to the nearest QWord.  So reserve 16.
697ec681f3Smrg */
707ec681f3Smrg#define BATCH_RESERVED(devinfo) ((devinfo)->is_haswell ? 32 : 16)
717ec681f3Smrg
727ec681f3Smrgstatic void crocus_batch_reset(struct crocus_batch *batch);
737ec681f3Smrg
747ec681f3Smrgstatic unsigned
757ec681f3Smrgnum_fences(struct crocus_batch *batch)
767ec681f3Smrg{
777ec681f3Smrg   return util_dynarray_num_elements(&batch->exec_fences,
787ec681f3Smrg                                     struct drm_i915_gem_exec_fence);
797ec681f3Smrg}
807ec681f3Smrg
817ec681f3Smrg/**
827ec681f3Smrg * Debugging code to dump the fence list, used by INTEL_DEBUG=submit.
837ec681f3Smrg */
847ec681f3Smrgstatic void
857ec681f3Smrgdump_fence_list(struct crocus_batch *batch)
867ec681f3Smrg{
877ec681f3Smrg   fprintf(stderr, "Fence list (length %u):      ", num_fences(batch));
887ec681f3Smrg
897ec681f3Smrg   util_dynarray_foreach(&batch->exec_fences,
907ec681f3Smrg                         struct drm_i915_gem_exec_fence, f) {
917ec681f3Smrg      fprintf(stderr, "%s%u%s ",
927ec681f3Smrg              (f->flags & I915_EXEC_FENCE_WAIT) ? "..." : "",
937ec681f3Smrg              f->handle,
947ec681f3Smrg              (f->flags & I915_EXEC_FENCE_SIGNAL) ? "!" : "");
957ec681f3Smrg   }
967ec681f3Smrg
977ec681f3Smrg   fprintf(stderr, "\n");
987ec681f3Smrg}
997ec681f3Smrg
1007ec681f3Smrg/**
1017ec681f3Smrg * Debugging code to dump the validation list, used by INTEL_DEBUG=submit.
1027ec681f3Smrg */
1037ec681f3Smrgstatic void
1047ec681f3Smrgdump_validation_list(struct crocus_batch *batch)
1057ec681f3Smrg{
1067ec681f3Smrg   fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
1077ec681f3Smrg
1087ec681f3Smrg   for (int i = 0; i < batch->exec_count; i++) {
1097ec681f3Smrg      uint64_t flags = batch->validation_list[i].flags;
1107ec681f3Smrg      assert(batch->validation_list[i].handle ==
1117ec681f3Smrg             batch->exec_bos[i]->gem_handle);
1127ec681f3Smrg      fprintf(stderr,
1137ec681f3Smrg              "[%2d]: %2d %-14s @ 0x%"PRIx64" (%" PRIu64 "B)\t %2d refs %s\n", i,
1147ec681f3Smrg              batch->validation_list[i].handle, batch->exec_bos[i]->name,
1157ec681f3Smrg              (uint64_t)batch->validation_list[i].offset, batch->exec_bos[i]->size,
1167ec681f3Smrg              batch->exec_bos[i]->refcount,
1177ec681f3Smrg              (flags & EXEC_OBJECT_WRITE) ? " (write)" : "");
1187ec681f3Smrg   }
1197ec681f3Smrg}
1207ec681f3Smrg
1217ec681f3Smrg/**
1227ec681f3Smrg * Return BO information to the batch decoder (for debugging).
1237ec681f3Smrg */
1247ec681f3Smrgstatic struct intel_batch_decode_bo
1257ec681f3Smrgdecode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
1267ec681f3Smrg{
1277ec681f3Smrg   struct crocus_batch *batch = v_batch;
1287ec681f3Smrg
1297ec681f3Smrg   for (int i = 0; i < batch->exec_count; i++) {
1307ec681f3Smrg      struct crocus_bo *bo = batch->exec_bos[i];
1317ec681f3Smrg      /* The decoder zeroes out the top 16 bits, so we need to as well */
1327ec681f3Smrg      uint64_t bo_address = bo->gtt_offset & (~0ull >> 16);
1337ec681f3Smrg
1347ec681f3Smrg      if (address >= bo_address && address < bo_address + bo->size) {
1357ec681f3Smrg         return (struct intel_batch_decode_bo){
1367ec681f3Smrg            .addr = address,
1377ec681f3Smrg            .size = bo->size,
1387ec681f3Smrg            .map = crocus_bo_map(batch->dbg, bo, MAP_READ) +
1397ec681f3Smrg                   (address - bo_address),
1407ec681f3Smrg         };
1417ec681f3Smrg      }
1427ec681f3Smrg   }
1437ec681f3Smrg
1447ec681f3Smrg   return (struct intel_batch_decode_bo) { };
1457ec681f3Smrg}
1467ec681f3Smrg
1477ec681f3Smrgstatic unsigned
1487ec681f3Smrgdecode_get_state_size(void *v_batch, uint64_t address,
1497ec681f3Smrg                      uint64_t base_address)
1507ec681f3Smrg{
1517ec681f3Smrg   struct crocus_batch *batch = v_batch;
1527ec681f3Smrg
1537ec681f3Smrg   /* The decoder gives us offsets from a base address, which is not great.
1547ec681f3Smrg    * Binding tables are relative to surface state base address, and other
1557ec681f3Smrg    * state is relative to dynamic state base address.  These could alias,
1567ec681f3Smrg    * but in practice it's unlikely because surface offsets are always in
1577ec681f3Smrg    * the [0, 64K) range, and we assign dynamic state addresses starting at
1587ec681f3Smrg    * the top of the 4GB range.  We should fix this but it's likely good
1597ec681f3Smrg    * enough for now.
1607ec681f3Smrg    */
1617ec681f3Smrg   unsigned size = (uintptr_t)
1627ec681f3Smrg      _mesa_hash_table_u64_search(batch->state_sizes, address - base_address);
1637ec681f3Smrg
1647ec681f3Smrg   return size;
1657ec681f3Smrg}
1667ec681f3Smrg
1677ec681f3Smrg/**
1687ec681f3Smrg * Decode the current batch.
1697ec681f3Smrg */
1707ec681f3Smrgstatic void
1717ec681f3Smrgdecode_batch(struct crocus_batch *batch)
1727ec681f3Smrg{
1737ec681f3Smrg   void *map = crocus_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ);
1747ec681f3Smrg   intel_print_batch(&batch->decoder, map, batch->primary_batch_size,
1757ec681f3Smrg                     batch->exec_bos[0]->gtt_offset, false);
1767ec681f3Smrg}
1777ec681f3Smrg
1787ec681f3Smrgstatic void
1797ec681f3Smrginit_reloc_list(struct crocus_reloc_list *rlist, int count)
1807ec681f3Smrg{
1817ec681f3Smrg   rlist->reloc_count = 0;
1827ec681f3Smrg   rlist->reloc_array_size = count;
1837ec681f3Smrg   rlist->relocs = malloc(rlist->reloc_array_size *
1847ec681f3Smrg                          sizeof(struct drm_i915_gem_relocation_entry));
1857ec681f3Smrg}
1867ec681f3Smrg
1877ec681f3Smrgvoid
1887ec681f3Smrgcrocus_init_batch(struct crocus_context *ice,
1897ec681f3Smrg                  enum crocus_batch_name name,
1907ec681f3Smrg                  int priority)
1917ec681f3Smrg{
1927ec681f3Smrg   struct crocus_batch *batch = &ice->batches[name];
1937ec681f3Smrg   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1947ec681f3Smrg   struct intel_device_info *devinfo = &screen->devinfo;
1957ec681f3Smrg
1967ec681f3Smrg   batch->ice = ice;
1977ec681f3Smrg   batch->screen = screen;
1987ec681f3Smrg   batch->dbg = &ice->dbg;
1997ec681f3Smrg   batch->reset = &ice->reset;
2007ec681f3Smrg   batch->name = name;
2017ec681f3Smrg   batch->contains_fence_signal = false;
2027ec681f3Smrg
2037ec681f3Smrg   if (devinfo->ver >= 7) {
2047ec681f3Smrg      batch->fine_fences.uploader =
2057ec681f3Smrg         u_upload_create(&ice->ctx, 4096, PIPE_BIND_CUSTOM,
2067ec681f3Smrg                         PIPE_USAGE_STAGING, 0);
2077ec681f3Smrg   }
2087ec681f3Smrg   crocus_fine_fence_init(batch);
2097ec681f3Smrg
2107ec681f3Smrg   batch->hw_ctx_id = crocus_create_hw_context(screen->bufmgr);
2117ec681f3Smrg   assert(batch->hw_ctx_id);
2127ec681f3Smrg
2137ec681f3Smrg   crocus_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority);
2147ec681f3Smrg
2157ec681f3Smrg   batch->valid_reloc_flags = EXEC_OBJECT_WRITE;
2167ec681f3Smrg   if (devinfo->ver == 6)
2177ec681f3Smrg      batch->valid_reloc_flags |= EXEC_OBJECT_NEEDS_GTT;
2187ec681f3Smrg
2197ec681f3Smrg   if (INTEL_DEBUG(DEBUG_BATCH)) {
2207ec681f3Smrg      /* The shadow doesn't get relocs written so state decode fails. */
2217ec681f3Smrg      batch->use_shadow_copy = false;
2227ec681f3Smrg   } else
2237ec681f3Smrg      batch->use_shadow_copy = !devinfo->has_llc;
2247ec681f3Smrg
2257ec681f3Smrg   util_dynarray_init(&batch->exec_fences, ralloc_context(NULL));
2267ec681f3Smrg   util_dynarray_init(&batch->syncobjs, ralloc_context(NULL));
2277ec681f3Smrg
2287ec681f3Smrg   init_reloc_list(&batch->command.relocs, 250);
2297ec681f3Smrg   init_reloc_list(&batch->state.relocs, 250);
2307ec681f3Smrg
2317ec681f3Smrg   batch->exec_count = 0;
2327ec681f3Smrg   batch->exec_array_size = 100;
2337ec681f3Smrg   batch->exec_bos =
2347ec681f3Smrg      malloc(batch->exec_array_size * sizeof(batch->exec_bos[0]));
2357ec681f3Smrg   batch->validation_list =
2367ec681f3Smrg      malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
2377ec681f3Smrg
2387ec681f3Smrg   batch->cache.render = _mesa_hash_table_create(NULL, NULL,
2397ec681f3Smrg                                                 _mesa_key_pointer_equal);
2407ec681f3Smrg   batch->cache.depth = _mesa_set_create(NULL, NULL,
2417ec681f3Smrg                                         _mesa_key_pointer_equal);
2427ec681f3Smrg
2437ec681f3Smrg   memset(batch->other_batches, 0, sizeof(batch->other_batches));
2447ec681f3Smrg
2457ec681f3Smrg   for (int i = 0, j = 0; i < ice->batch_count; i++) {
2467ec681f3Smrg      if (i != name)
2477ec681f3Smrg         batch->other_batches[j++] = &ice->batches[i];
2487ec681f3Smrg   }
2497ec681f3Smrg
2507ec681f3Smrg   if (INTEL_DEBUG(DEBUG_BATCH)) {
2517ec681f3Smrg
2527ec681f3Smrg      batch->state_sizes = _mesa_hash_table_u64_create(NULL);
2537ec681f3Smrg      const unsigned decode_flags =
2547ec681f3Smrg         INTEL_BATCH_DECODE_FULL |
2557ec681f3Smrg         (INTEL_DEBUG(DEBUG_COLOR) ? INTEL_BATCH_DECODE_IN_COLOR : 0) |
2567ec681f3Smrg         INTEL_BATCH_DECODE_OFFSETS | INTEL_BATCH_DECODE_FLOATS;
2577ec681f3Smrg
2587ec681f3Smrg      intel_batch_decode_ctx_init(&batch->decoder, &screen->devinfo, stderr,
2597ec681f3Smrg                                  decode_flags, NULL, decode_get_bo,
2607ec681f3Smrg                                  decode_get_state_size, batch);
2617ec681f3Smrg      batch->decoder.max_vbo_decoded_lines = 32;
2627ec681f3Smrg   }
2637ec681f3Smrg
2647ec681f3Smrg   crocus_batch_reset(batch);
2657ec681f3Smrg}
2667ec681f3Smrg
2677ec681f3Smrgstatic int
2687ec681f3Smrgfind_exec_index(struct crocus_batch *batch, struct crocus_bo *bo)
2697ec681f3Smrg{
2707ec681f3Smrg   unsigned index = READ_ONCE(bo->index);
2717ec681f3Smrg
2727ec681f3Smrg   if (index < batch->exec_count && batch->exec_bos[index] == bo)
2737ec681f3Smrg      return index;
2747ec681f3Smrg
2757ec681f3Smrg   /* May have been shared between multiple active batches */
2767ec681f3Smrg   for (index = 0; index < batch->exec_count; index++) {
2777ec681f3Smrg      if (batch->exec_bos[index] == bo)
2787ec681f3Smrg	 return index;
2797ec681f3Smrg   }
2807ec681f3Smrg   return -1;
2817ec681f3Smrg}
2827ec681f3Smrg
2837ec681f3Smrgstatic struct drm_i915_gem_exec_object2 *
2847ec681f3Smrgfind_validation_entry(struct crocus_batch *batch, struct crocus_bo *bo)
2857ec681f3Smrg{
2867ec681f3Smrg   int index = find_exec_index(batch, bo);
2877ec681f3Smrg
2887ec681f3Smrg   if (index == -1)
2897ec681f3Smrg      return NULL;
2907ec681f3Smrg   return &batch->validation_list[index];
2917ec681f3Smrg}
2927ec681f3Smrg
2937ec681f3Smrgstatic void
2947ec681f3Smrgensure_exec_obj_space(struct crocus_batch *batch, uint32_t count)
2957ec681f3Smrg{
2967ec681f3Smrg   while (batch->exec_count + count > batch->exec_array_size) {
2977ec681f3Smrg      batch->exec_array_size *= 2;
2987ec681f3Smrg      batch->exec_bos = realloc(
2997ec681f3Smrg         batch->exec_bos, batch->exec_array_size * sizeof(batch->exec_bos[0]));
3007ec681f3Smrg      batch->validation_list =
3017ec681f3Smrg         realloc(batch->validation_list,
3027ec681f3Smrg                 batch->exec_array_size * sizeof(batch->validation_list[0]));
3037ec681f3Smrg   }
3047ec681f3Smrg}
3057ec681f3Smrg
3067ec681f3Smrgstatic struct drm_i915_gem_exec_object2 *
3077ec681f3Smrgcrocus_use_bo(struct crocus_batch *batch, struct crocus_bo *bo, bool writable)
3087ec681f3Smrg{
3097ec681f3Smrg   assert(bo->bufmgr == batch->command.bo->bufmgr);
3107ec681f3Smrg
3117ec681f3Smrg   struct drm_i915_gem_exec_object2 *existing_entry =
3127ec681f3Smrg      find_validation_entry(batch, bo);
3137ec681f3Smrg
3147ec681f3Smrg   if (existing_entry) {
3157ec681f3Smrg      /* The BO is already in the validation list; mark it writable */
3167ec681f3Smrg      if (writable)
3177ec681f3Smrg         existing_entry->flags |= EXEC_OBJECT_WRITE;
3187ec681f3Smrg      return existing_entry;
3197ec681f3Smrg   }
3207ec681f3Smrg
3217ec681f3Smrg   if (bo != batch->command.bo && bo != batch->state.bo) {
3227ec681f3Smrg      /* This is the first time our batch has seen this BO.  Before we use it,
3237ec681f3Smrg       * we may need to flush and synchronize with other batches.
3247ec681f3Smrg       */
3257ec681f3Smrg      for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) {
3267ec681f3Smrg
3277ec681f3Smrg         if (!batch->other_batches[b])
3287ec681f3Smrg            continue;
3297ec681f3Smrg         struct drm_i915_gem_exec_object2 *other_entry =
3307ec681f3Smrg            find_validation_entry(batch->other_batches[b], bo);
3317ec681f3Smrg
3327ec681f3Smrg         /* If the buffer is referenced by another batch, and either batch
3337ec681f3Smrg          * intends to write it, then flush the other batch and synchronize.
3347ec681f3Smrg          *
3357ec681f3Smrg          * Consider these cases:
3367ec681f3Smrg          *
3377ec681f3Smrg          * 1. They read, we read   =>  No synchronization required.
3387ec681f3Smrg          * 2. They read, we write  =>  Synchronize (they need the old value)
3397ec681f3Smrg          * 3. They write, we read  =>  Synchronize (we need their new value)
3407ec681f3Smrg          * 4. They write, we write =>  Synchronize (order writes)
3417ec681f3Smrg          *
3427ec681f3Smrg          * The read/read case is very common, as multiple batches usually
3437ec681f3Smrg          * share a streaming state buffer or shader assembly buffer, and
3447ec681f3Smrg          * we want to avoid synchronizing in this case.
3457ec681f3Smrg          */
3467ec681f3Smrg         if (other_entry &&
3477ec681f3Smrg             ((other_entry->flags & EXEC_OBJECT_WRITE) || writable)) {
3487ec681f3Smrg            crocus_batch_flush(batch->other_batches[b]);
3497ec681f3Smrg            crocus_batch_add_syncobj(batch,
3507ec681f3Smrg                                     batch->other_batches[b]->last_fence->syncobj,
3517ec681f3Smrg                                     I915_EXEC_FENCE_WAIT);
3527ec681f3Smrg         }
3537ec681f3Smrg      }
3547ec681f3Smrg   }
3557ec681f3Smrg
3567ec681f3Smrg   /* Bump the ref count since the batch is now using this bo. */
3577ec681f3Smrg   crocus_bo_reference(bo);
3587ec681f3Smrg
3597ec681f3Smrg   ensure_exec_obj_space(batch, 1);
3607ec681f3Smrg
3617ec681f3Smrg   batch->validation_list[batch->exec_count] =
3627ec681f3Smrg      (struct drm_i915_gem_exec_object2) {
3637ec681f3Smrg         .handle = bo->gem_handle,
3647ec681f3Smrg         .offset = bo->gtt_offset,
3657ec681f3Smrg         .flags = bo->kflags | (writable ? EXEC_OBJECT_WRITE : 0),
3667ec681f3Smrg      };
3677ec681f3Smrg
3687ec681f3Smrg   bo->index = batch->exec_count;
3697ec681f3Smrg   batch->exec_bos[batch->exec_count] = bo;
3707ec681f3Smrg   batch->aperture_space += bo->size;
3717ec681f3Smrg
3727ec681f3Smrg   batch->exec_count++;
3737ec681f3Smrg
3747ec681f3Smrg   return &batch->validation_list[batch->exec_count - 1];
3757ec681f3Smrg}
3767ec681f3Smrg
3777ec681f3Smrgstatic uint64_t
3787ec681f3Smrgemit_reloc(struct crocus_batch *batch,
3797ec681f3Smrg           struct crocus_reloc_list *rlist, uint32_t offset,
3807ec681f3Smrg           struct crocus_bo *target, int32_t target_offset,
3817ec681f3Smrg           unsigned int reloc_flags)
3827ec681f3Smrg{
3837ec681f3Smrg   assert(target != NULL);
3847ec681f3Smrg
3857ec681f3Smrg   if (target == batch->ice->workaround_bo)
3867ec681f3Smrg      reloc_flags &= ~RELOC_WRITE;
3877ec681f3Smrg
3887ec681f3Smrg   bool writable = reloc_flags & RELOC_WRITE;
3897ec681f3Smrg
3907ec681f3Smrg   struct drm_i915_gem_exec_object2 *entry =
3917ec681f3Smrg      crocus_use_bo(batch, target, writable);
3927ec681f3Smrg
3937ec681f3Smrg   if (rlist->reloc_count == rlist->reloc_array_size) {
3947ec681f3Smrg      rlist->reloc_array_size *= 2;
3957ec681f3Smrg      rlist->relocs = realloc(rlist->relocs,
3967ec681f3Smrg                              rlist->reloc_array_size *
3977ec681f3Smrg                              sizeof(struct drm_i915_gem_relocation_entry));
3987ec681f3Smrg   }
3997ec681f3Smrg
4007ec681f3Smrg   if (reloc_flags & RELOC_32BIT) {
4017ec681f3Smrg      /* Restrict this buffer to the low 32 bits of the address space.
4027ec681f3Smrg       *
4037ec681f3Smrg       * Altering the validation list flags restricts it for this batch,
4047ec681f3Smrg       * but we also alter the BO's kflags to restrict it permanently
4057ec681f3Smrg       * (until the BO is destroyed and put back in the cache).  Buffers
4067ec681f3Smrg       * may stay bound across batches, and we want keep it constrained.
4077ec681f3Smrg       */
4087ec681f3Smrg      target->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
4097ec681f3Smrg      entry->flags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
4107ec681f3Smrg
4117ec681f3Smrg      /* RELOC_32BIT is not an EXEC_OBJECT_* flag, so get rid of it. */
4127ec681f3Smrg      reloc_flags &= ~RELOC_32BIT;
4137ec681f3Smrg   }
4147ec681f3Smrg
4157ec681f3Smrg   if (reloc_flags)
4167ec681f3Smrg      entry->flags |= reloc_flags & batch->valid_reloc_flags;
4177ec681f3Smrg
4187ec681f3Smrg   rlist->relocs[rlist->reloc_count++] =
4197ec681f3Smrg      (struct drm_i915_gem_relocation_entry) {
4207ec681f3Smrg         .offset = offset,
4217ec681f3Smrg         .delta = target_offset,
4227ec681f3Smrg         .target_handle = find_exec_index(batch, target),
4237ec681f3Smrg         .presumed_offset = entry->offset,
4247ec681f3Smrg      };
4257ec681f3Smrg
4267ec681f3Smrg   /* Using the old buffer offset, write in what the right data would be, in
4277ec681f3Smrg    * case the buffer doesn't move and we can short-circuit the relocation
4287ec681f3Smrg    * processing in the kernel
4297ec681f3Smrg    */
4307ec681f3Smrg   return entry->offset + target_offset;
4317ec681f3Smrg}
4327ec681f3Smrg
4337ec681f3Smrguint64_t
4347ec681f3Smrgcrocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset,
4357ec681f3Smrg                     struct crocus_bo *target, uint32_t target_offset,
4367ec681f3Smrg                     unsigned int reloc_flags)
4377ec681f3Smrg{
4387ec681f3Smrg   assert(batch_offset <= batch->command.bo->size - sizeof(uint32_t));
4397ec681f3Smrg
4407ec681f3Smrg   return emit_reloc(batch, &batch->command.relocs, batch_offset,
4417ec681f3Smrg                     target, target_offset, reloc_flags);
4427ec681f3Smrg}
4437ec681f3Smrg
4447ec681f3Smrguint64_t
4457ec681f3Smrgcrocus_state_reloc(struct crocus_batch *batch, uint32_t state_offset,
4467ec681f3Smrg                   struct crocus_bo *target, uint32_t target_offset,
4477ec681f3Smrg                   unsigned int reloc_flags)
4487ec681f3Smrg{
4497ec681f3Smrg   assert(state_offset <= batch->state.bo->size - sizeof(uint32_t));
4507ec681f3Smrg
4517ec681f3Smrg   return emit_reloc(batch, &batch->state.relocs, state_offset,
4527ec681f3Smrg                     target, target_offset, reloc_flags);
4537ec681f3Smrg}
4547ec681f3Smrg
4557ec681f3Smrgstatic void
4567ec681f3Smrgrecreate_growing_buffer(struct crocus_batch *batch,
4577ec681f3Smrg                        struct crocus_growing_bo *grow,
4587ec681f3Smrg                        const char *name, unsigned size)
4597ec681f3Smrg{
4607ec681f3Smrg   struct crocus_screen *screen = batch->screen;
4617ec681f3Smrg   struct crocus_bufmgr *bufmgr = screen->bufmgr;
4627ec681f3Smrg   grow->bo = crocus_bo_alloc(bufmgr, name, size);
4637ec681f3Smrg   grow->bo->kflags |= EXEC_OBJECT_CAPTURE;
4647ec681f3Smrg   grow->partial_bo = NULL;
4657ec681f3Smrg   grow->partial_bo_map = NULL;
4667ec681f3Smrg   grow->partial_bytes = 0;
4677ec681f3Smrg   if (batch->use_shadow_copy)
4687ec681f3Smrg      grow->map = realloc(grow->map, grow->bo->size);
4697ec681f3Smrg   else
4707ec681f3Smrg      grow->map = crocus_bo_map(NULL, grow->bo, MAP_READ | MAP_WRITE);
4717ec681f3Smrg   grow->map_next = grow->map;
4727ec681f3Smrg}
4737ec681f3Smrg
4747ec681f3Smrgstatic void
4757ec681f3Smrgcreate_batch(struct crocus_batch *batch)
4767ec681f3Smrg{
4777ec681f3Smrg   struct crocus_screen *screen = batch->screen;
4787ec681f3Smrg
4797ec681f3Smrg   recreate_growing_buffer(batch, &batch->command,
4807ec681f3Smrg                           "command buffer",
4817ec681f3Smrg                           BATCH_SZ + BATCH_RESERVED(&screen->devinfo));
4827ec681f3Smrg
4837ec681f3Smrg   crocus_use_bo(batch, batch->command.bo, false);
4847ec681f3Smrg
4857ec681f3Smrg   /* Always add workaround_bo which contains a driver identifier to be
4867ec681f3Smrg    * recorded in error states.
4877ec681f3Smrg    */
4887ec681f3Smrg   crocus_use_bo(batch, batch->ice->workaround_bo, false);
4897ec681f3Smrg
4907ec681f3Smrg   recreate_growing_buffer(batch, &batch->state,
4917ec681f3Smrg                           "state buffer",
4927ec681f3Smrg                           STATE_SZ);
4937ec681f3Smrg
4947ec681f3Smrg   batch->state.used = 1;
4957ec681f3Smrg   crocus_use_bo(batch, batch->state.bo, false);
4967ec681f3Smrg}
4977ec681f3Smrg
4987ec681f3Smrgstatic void
4997ec681f3Smrgcrocus_batch_maybe_noop(struct crocus_batch *batch)
5007ec681f3Smrg{
5017ec681f3Smrg   /* We only insert the NOOP at the beginning of the batch. */
5027ec681f3Smrg   assert(crocus_batch_bytes_used(batch) == 0);
5037ec681f3Smrg
5047ec681f3Smrg   if (batch->noop_enabled) {
5057ec681f3Smrg      /* Emit MI_BATCH_BUFFER_END to prevent any further command to be
5067ec681f3Smrg       * executed.
5077ec681f3Smrg       */
5087ec681f3Smrg      uint32_t *map = batch->command.map_next;
5097ec681f3Smrg
5107ec681f3Smrg      map[0] = (0xA << 23);
5117ec681f3Smrg
5127ec681f3Smrg      batch->command.map_next += 4;
5137ec681f3Smrg   }
5147ec681f3Smrg}
5157ec681f3Smrg
5167ec681f3Smrgstatic void
5177ec681f3Smrgcrocus_batch_reset(struct crocus_batch *batch)
5187ec681f3Smrg{
5197ec681f3Smrg   struct crocus_screen *screen = batch->screen;
5207ec681f3Smrg
5217ec681f3Smrg   crocus_bo_unreference(batch->command.bo);
5227ec681f3Smrg   crocus_bo_unreference(batch->state.bo);
5237ec681f3Smrg   batch->primary_batch_size = 0;
5247ec681f3Smrg   batch->contains_draw = false;
5257ec681f3Smrg   batch->contains_fence_signal = false;
5267ec681f3Smrg   batch->state_base_address_emitted = false;
5277ec681f3Smrg   batch->screen->vtbl.batch_reset_dirty(batch);
5287ec681f3Smrg
5297ec681f3Smrg   create_batch(batch);
5307ec681f3Smrg   assert(batch->command.bo->index == 0);
5317ec681f3Smrg
5327ec681f3Smrg   if (batch->state_sizes)
5337ec681f3Smrg      _mesa_hash_table_u64_clear(batch->state_sizes);
5347ec681f3Smrg   struct crocus_syncobj *syncobj = crocus_create_syncobj(screen);
5357ec681f3Smrg   crocus_batch_add_syncobj(batch, syncobj, I915_EXEC_FENCE_SIGNAL);
5367ec681f3Smrg   crocus_syncobj_reference(screen, &syncobj, NULL);
5377ec681f3Smrg
5387ec681f3Smrg   crocus_cache_sets_clear(batch);
5397ec681f3Smrg}
5407ec681f3Smrg
5417ec681f3Smrgvoid
5427ec681f3Smrgcrocus_batch_free(struct crocus_batch *batch)
5437ec681f3Smrg{
5447ec681f3Smrg   struct crocus_screen *screen = batch->screen;
5457ec681f3Smrg   struct crocus_bufmgr *bufmgr = screen->bufmgr;
5467ec681f3Smrg
5477ec681f3Smrg   if (batch->use_shadow_copy) {
5487ec681f3Smrg      free(batch->command.map);
5497ec681f3Smrg      free(batch->state.map);
5507ec681f3Smrg   }
5517ec681f3Smrg
5527ec681f3Smrg   for (int i = 0; i < batch->exec_count; i++) {
5537ec681f3Smrg      crocus_bo_unreference(batch->exec_bos[i]);
5547ec681f3Smrg   }
5557ec681f3Smrg
5567ec681f3Smrg   pipe_resource_reference(&batch->fine_fences.ref.res, NULL);
5577ec681f3Smrg
5587ec681f3Smrg   free(batch->command.relocs.relocs);
5597ec681f3Smrg   free(batch->state.relocs.relocs);
5607ec681f3Smrg   free(batch->exec_bos);
5617ec681f3Smrg   free(batch->validation_list);
5627ec681f3Smrg
5637ec681f3Smrg   ralloc_free(batch->exec_fences.mem_ctx);
5647ec681f3Smrg
5657ec681f3Smrg   util_dynarray_foreach(&batch->syncobjs, struct crocus_syncobj *, s)
5667ec681f3Smrg      crocus_syncobj_reference(screen, s, NULL);
5677ec681f3Smrg   ralloc_free(batch->syncobjs.mem_ctx);
5687ec681f3Smrg
5697ec681f3Smrg   crocus_fine_fence_reference(batch->screen, &batch->last_fence, NULL);
5707ec681f3Smrg   if (batch_has_fine_fence(batch))
5717ec681f3Smrg      u_upload_destroy(batch->fine_fences.uploader);
5727ec681f3Smrg
5737ec681f3Smrg   crocus_bo_unreference(batch->command.bo);
5747ec681f3Smrg   crocus_bo_unreference(batch->state.bo);
5757ec681f3Smrg   batch->command.bo = NULL;
5767ec681f3Smrg   batch->command.map = NULL;
5777ec681f3Smrg   batch->command.map_next = NULL;
5787ec681f3Smrg
5797ec681f3Smrg   crocus_destroy_hw_context(bufmgr, batch->hw_ctx_id);
5807ec681f3Smrg
5817ec681f3Smrg   _mesa_hash_table_destroy(batch->cache.render, NULL);
5827ec681f3Smrg   _mesa_set_destroy(batch->cache.depth, NULL);
5837ec681f3Smrg
5847ec681f3Smrg   if (batch->state_sizes) {
5857ec681f3Smrg      _mesa_hash_table_u64_destroy(batch->state_sizes);
5867ec681f3Smrg      intel_batch_decode_ctx_finish(&batch->decoder);
5877ec681f3Smrg   }
5887ec681f3Smrg}
5897ec681f3Smrg
5907ec681f3Smrg/**
5917ec681f3Smrg * If we've chained to a secondary batch, or are getting near to the end,
5927ec681f3Smrg * then flush.  This should only be called between draws.
5937ec681f3Smrg */
5947ec681f3Smrgvoid
5957ec681f3Smrgcrocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate)
5967ec681f3Smrg{
5977ec681f3Smrg   if (batch->command.bo != batch->exec_bos[0] ||
5987ec681f3Smrg       crocus_batch_bytes_used(batch) + estimate >= BATCH_SZ) {
5997ec681f3Smrg      crocus_batch_flush(batch);
6007ec681f3Smrg   }
6017ec681f3Smrg}
6027ec681f3Smrg
6037ec681f3Smrg/**
6047ec681f3Smrg * Finish copying the old batch/state buffer's contents to the new one
6057ec681f3Smrg * after we tried to "grow" the buffer in an earlier operation.
6067ec681f3Smrg */
6077ec681f3Smrgstatic void
6087ec681f3Smrgfinish_growing_bos(struct crocus_growing_bo *grow)
6097ec681f3Smrg{
6107ec681f3Smrg   struct crocus_bo *old_bo = grow->partial_bo;
6117ec681f3Smrg   if (!old_bo)
6127ec681f3Smrg      return;
6137ec681f3Smrg
6147ec681f3Smrg   memcpy(grow->map, grow->partial_bo_map, grow->partial_bytes);
6157ec681f3Smrg
6167ec681f3Smrg   grow->partial_bo = NULL;
6177ec681f3Smrg   grow->partial_bo_map = NULL;
6187ec681f3Smrg   grow->partial_bytes = 0;
6197ec681f3Smrg
6207ec681f3Smrg   crocus_bo_unreference(old_bo);
6217ec681f3Smrg}
6227ec681f3Smrg
6237ec681f3Smrgvoid
6247ec681f3Smrgcrocus_grow_buffer(struct crocus_batch *batch, bool grow_state,
6257ec681f3Smrg                   unsigned used,
6267ec681f3Smrg                   unsigned new_size)
6277ec681f3Smrg{
6287ec681f3Smrg   struct crocus_screen *screen = batch->screen;
6297ec681f3Smrg   struct crocus_bufmgr *bufmgr = screen->bufmgr;
6307ec681f3Smrg   struct crocus_growing_bo *grow = grow_state ? &batch->state : &batch->command;
6317ec681f3Smrg   struct crocus_bo *bo = grow->bo;
6327ec681f3Smrg
6337ec681f3Smrg   if (grow->partial_bo) {
6347ec681f3Smrg      /* We've already grown once, and now we need to do it again.
6357ec681f3Smrg       * Finish our last grow operation so we can start a new one.
6367ec681f3Smrg       * This should basically never happen.
6377ec681f3Smrg       */
6387ec681f3Smrg      finish_growing_bos(grow);
6397ec681f3Smrg   }
6407ec681f3Smrg
6417ec681f3Smrg   struct crocus_bo *new_bo = crocus_bo_alloc(bufmgr, bo->name, new_size);
6427ec681f3Smrg
6437ec681f3Smrg   /* Copy existing data to the new larger buffer */
6447ec681f3Smrg   grow->partial_bo_map = grow->map;
6457ec681f3Smrg
6467ec681f3Smrg   if (batch->use_shadow_copy) {
6477ec681f3Smrg      /* We can't safely use realloc, as it may move the existing buffer,
6487ec681f3Smrg       * breaking existing pointers the caller may still be using.  Just
6497ec681f3Smrg       * malloc a new copy and memcpy it like the normal BO path.
6507ec681f3Smrg       *
6517ec681f3Smrg       * Use bo->size rather than new_size because the bufmgr may have
6527ec681f3Smrg       * rounded up the size, and we want the shadow size to match.
6537ec681f3Smrg       */
6547ec681f3Smrg      grow->map = malloc(new_bo->size);
6557ec681f3Smrg   } else {
6567ec681f3Smrg      grow->map = crocus_bo_map(NULL, new_bo, MAP_READ | MAP_WRITE);
6577ec681f3Smrg   }
6587ec681f3Smrg   /* Try to put the new BO at the same GTT offset as the old BO (which
6597ec681f3Smrg    * we're throwing away, so it doesn't need to be there).
6607ec681f3Smrg    *
6617ec681f3Smrg    * This guarantees that our relocations continue to work: values we've
6627ec681f3Smrg    * already written into the buffer, values we're going to write into the
6637ec681f3Smrg    * buffer, and the validation/relocation lists all will match.
6647ec681f3Smrg    *
6657ec681f3Smrg    * Also preserve kflags for EXEC_OBJECT_CAPTURE.
6667ec681f3Smrg    */
6677ec681f3Smrg   new_bo->gtt_offset = bo->gtt_offset;
6687ec681f3Smrg   new_bo->index = bo->index;
6697ec681f3Smrg   new_bo->kflags = bo->kflags;
6707ec681f3Smrg
6717ec681f3Smrg   /* Batch/state buffers are per-context, and if we've run out of space,
6727ec681f3Smrg    * we must have actually used them before, so...they will be in the list.
6737ec681f3Smrg    */
6747ec681f3Smrg   assert(bo->index < batch->exec_count);
6757ec681f3Smrg   assert(batch->exec_bos[bo->index] == bo);
6767ec681f3Smrg
6777ec681f3Smrg   /* Update the validation list to use the new BO. */
6787ec681f3Smrg   batch->validation_list[bo->index].handle = new_bo->gem_handle;
6797ec681f3Smrg   /* Exchange the two BOs...without breaking pointers to the old BO.
6807ec681f3Smrg    *
6817ec681f3Smrg    * Consider this scenario:
6827ec681f3Smrg    *
6837ec681f3Smrg    * 1. Somebody calls brw_state_batch() to get a region of memory, and
6847ec681f3Smrg    *    and then creates a brw_address pointing to brw->batch.state.bo.
6857ec681f3Smrg    * 2. They then call brw_state_batch() a second time, which happens to
6867ec681f3Smrg    *    grow and replace the state buffer.  They then try to emit a
6877ec681f3Smrg    *    relocation to their first section of memory.
6887ec681f3Smrg    *
6897ec681f3Smrg    * If we replace the brw->batch.state.bo pointer at step 2, we would
6907ec681f3Smrg    * break the address created in step 1.  They'd have a pointer to the
6917ec681f3Smrg    * old destroyed BO.  Emitting a relocation would add this dead BO to
6927ec681f3Smrg    * the validation list...causing /both/ statebuffers to be in the list,
6937ec681f3Smrg    * and all kinds of disasters.
6947ec681f3Smrg    *
6957ec681f3Smrg    * This is not a contrived case - BLORP vertex data upload hits this.
6967ec681f3Smrg    *
6977ec681f3Smrg    * There are worse scenarios too.  Fences for GL sync objects reference
6987ec681f3Smrg    * brw->batch.batch.bo.  If we replaced the batch pointer when growing,
6997ec681f3Smrg    * we'd need to chase down every fence and update it to point to the
7007ec681f3Smrg    * new BO.  Otherwise, it would refer to a "batch" that never actually
7017ec681f3Smrg    * gets submitted, and would fail to trigger.
7027ec681f3Smrg    *
7037ec681f3Smrg    * To work around both of these issues, we transmutate the buffers in
7047ec681f3Smrg    * place, making the existing struct brw_bo represent the new buffer,
7057ec681f3Smrg    * and "new_bo" represent the old BO.  This is highly unusual, but it
7067ec681f3Smrg    * seems like a necessary evil.
7077ec681f3Smrg    *
7087ec681f3Smrg    * We also defer the memcpy of the existing batch's contents.  Callers
7097ec681f3Smrg    * may make multiple brw_state_batch calls, and retain pointers to the
7107ec681f3Smrg    * old BO's map.  We'll perform the memcpy in finish_growing_bo() when
7117ec681f3Smrg    * we finally submit the batch, at which point we've finished uploading
7127ec681f3Smrg    * state, and nobody should have any old references anymore.
7137ec681f3Smrg    *
7147ec681f3Smrg    * To do that, we keep a reference to the old BO in grow->partial_bo,
7157ec681f3Smrg    * and store the number of bytes to copy in grow->partial_bytes.  We
7167ec681f3Smrg    * can monkey with the refcounts directly without atomics because these
7177ec681f3Smrg    * are per-context BOs and they can only be touched by this thread.
7187ec681f3Smrg    */
7197ec681f3Smrg   assert(new_bo->refcount == 1);
7207ec681f3Smrg   new_bo->refcount = bo->refcount;
7217ec681f3Smrg   bo->refcount = 1;
7227ec681f3Smrg
7237ec681f3Smrg   struct crocus_bo tmp;
7247ec681f3Smrg   memcpy(&tmp, bo, sizeof(struct crocus_bo));
7257ec681f3Smrg   memcpy(bo, new_bo, sizeof(struct crocus_bo));
7267ec681f3Smrg   memcpy(new_bo, &tmp, sizeof(struct crocus_bo));
7277ec681f3Smrg
7287ec681f3Smrg   grow->partial_bo = new_bo; /* the one reference of the OLD bo */
7297ec681f3Smrg   grow->partial_bytes = used;
7307ec681f3Smrg}
7317ec681f3Smrg
7327ec681f3Smrgstatic void
7337ec681f3Smrgfinish_seqno(struct crocus_batch *batch)
7347ec681f3Smrg{
7357ec681f3Smrg   struct crocus_fine_fence *sq = crocus_fine_fence_new(batch, CROCUS_FENCE_END);
7367ec681f3Smrg   if (!sq)
7377ec681f3Smrg      return;
7387ec681f3Smrg
7397ec681f3Smrg   crocus_fine_fence_reference(batch->screen, &batch->last_fence, sq);
7407ec681f3Smrg   crocus_fine_fence_reference(batch->screen, &sq, NULL);
7417ec681f3Smrg}
7427ec681f3Smrg
7437ec681f3Smrg/**
7447ec681f3Smrg * Terminate a batch with MI_BATCH_BUFFER_END.
7457ec681f3Smrg */
7467ec681f3Smrgstatic void
7477ec681f3Smrgcrocus_finish_batch(struct crocus_batch *batch)
7487ec681f3Smrg{
7497ec681f3Smrg
7507ec681f3Smrg   batch->no_wrap = true;
7517ec681f3Smrg   if (batch->screen->vtbl.finish_batch)
7527ec681f3Smrg      batch->screen->vtbl.finish_batch(batch);
7537ec681f3Smrg
7547ec681f3Smrg   finish_seqno(batch);
7557ec681f3Smrg
7567ec681f3Smrg   /* Emit MI_BATCH_BUFFER_END to finish our batch. */
7577ec681f3Smrg   uint32_t *map = batch->command.map_next;
7587ec681f3Smrg
7597ec681f3Smrg   map[0] = (0xA << 23);
7607ec681f3Smrg
7617ec681f3Smrg   batch->command.map_next += 4;
7627ec681f3Smrg   VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->command.map, crocus_batch_bytes_used(batch)));
7637ec681f3Smrg
7647ec681f3Smrg   if (batch->command.bo == batch->exec_bos[0])
7657ec681f3Smrg      batch->primary_batch_size = crocus_batch_bytes_used(batch);
7667ec681f3Smrg   batch->no_wrap = false;
7677ec681f3Smrg}
7687ec681f3Smrg
7697ec681f3Smrg/**
7707ec681f3Smrg * Replace our current GEM context with a new one (in case it got banned).
7717ec681f3Smrg */
7727ec681f3Smrgstatic bool
7737ec681f3Smrgreplace_hw_ctx(struct crocus_batch *batch)
7747ec681f3Smrg{
7757ec681f3Smrg   struct crocus_screen *screen = batch->screen;
7767ec681f3Smrg   struct crocus_bufmgr *bufmgr = screen->bufmgr;
7777ec681f3Smrg
7787ec681f3Smrg   uint32_t new_ctx = crocus_clone_hw_context(bufmgr, batch->hw_ctx_id);
7797ec681f3Smrg   if (!new_ctx)
7807ec681f3Smrg      return false;
7817ec681f3Smrg
7827ec681f3Smrg   crocus_destroy_hw_context(bufmgr, batch->hw_ctx_id);
7837ec681f3Smrg   batch->hw_ctx_id = new_ctx;
7847ec681f3Smrg
7857ec681f3Smrg   /* Notify the context that state must be re-initialized. */
7867ec681f3Smrg   crocus_lost_context_state(batch);
7877ec681f3Smrg
7887ec681f3Smrg   return true;
7897ec681f3Smrg}
7907ec681f3Smrg
7917ec681f3Smrgenum pipe_reset_status
7927ec681f3Smrgcrocus_batch_check_for_reset(struct crocus_batch *batch)
7937ec681f3Smrg{
7947ec681f3Smrg   struct crocus_screen *screen = batch->screen;
7957ec681f3Smrg   enum pipe_reset_status status = PIPE_NO_RESET;
7967ec681f3Smrg   struct drm_i915_reset_stats stats = { .ctx_id = batch->hw_ctx_id };
7977ec681f3Smrg
7987ec681f3Smrg   if (drmIoctl(screen->fd, DRM_IOCTL_I915_GET_RESET_STATS, &stats))
7997ec681f3Smrg      DBG("DRM_IOCTL_I915_GET_RESET_STATS failed: %s\n", strerror(errno));
8007ec681f3Smrg
8017ec681f3Smrg   if (stats.batch_active != 0) {
8027ec681f3Smrg      /* A reset was observed while a batch from this hardware context was
8037ec681f3Smrg       * executing.  Assume that this context was at fault.
8047ec681f3Smrg       */
8057ec681f3Smrg      status = PIPE_GUILTY_CONTEXT_RESET;
8067ec681f3Smrg   } else if (stats.batch_pending != 0) {
8077ec681f3Smrg      /* A reset was observed while a batch from this context was in progress,
8087ec681f3Smrg       * but the batch was not executing.  In this case, assume that the
8097ec681f3Smrg       * context was not at fault.
8107ec681f3Smrg       */
8117ec681f3Smrg      status = PIPE_INNOCENT_CONTEXT_RESET;
8127ec681f3Smrg   }
8137ec681f3Smrg
8147ec681f3Smrg   if (status != PIPE_NO_RESET) {
8157ec681f3Smrg      /* Our context is likely banned, or at least in an unknown state.
8167ec681f3Smrg       * Throw it away and start with a fresh context.  Ideally this may
8177ec681f3Smrg       * catch the problem before our next execbuf fails with -EIO.
8187ec681f3Smrg       */
8197ec681f3Smrg      replace_hw_ctx(batch);
8207ec681f3Smrg   }
8217ec681f3Smrg
8227ec681f3Smrg   return status;
8237ec681f3Smrg}
8247ec681f3Smrg
8257ec681f3Smrg/**
8267ec681f3Smrg * Submit the batch to the GPU via execbuffer2.
8277ec681f3Smrg */
8287ec681f3Smrgstatic int
8297ec681f3Smrgsubmit_batch(struct crocus_batch *batch)
8307ec681f3Smrg{
8317ec681f3Smrg
8327ec681f3Smrg   if (batch->use_shadow_copy) {
8337ec681f3Smrg      void *bo_map = crocus_bo_map(batch->dbg, batch->command.bo, MAP_WRITE);
8347ec681f3Smrg      memcpy(bo_map, batch->command.map, crocus_batch_bytes_used(batch));
8357ec681f3Smrg
8367ec681f3Smrg      bo_map = crocus_bo_map(batch->dbg, batch->state.bo, MAP_WRITE);
8377ec681f3Smrg      memcpy(bo_map, batch->state.map, batch->state.used);
8387ec681f3Smrg   }
8397ec681f3Smrg
8407ec681f3Smrg   crocus_bo_unmap(batch->command.bo);
8417ec681f3Smrg   crocus_bo_unmap(batch->state.bo);
8427ec681f3Smrg
8437ec681f3Smrg   /* The requirement for using I915_EXEC_NO_RELOC are:
8447ec681f3Smrg    *
8457ec681f3Smrg    *   The addresses written in the objects must match the corresponding
8467ec681f3Smrg    *   reloc.gtt_offset which in turn must match the corresponding
8477ec681f3Smrg    *   execobject.offset.
8487ec681f3Smrg    *
8497ec681f3Smrg    *   Any render targets written to in the batch must be flagged with
8507ec681f3Smrg    *   EXEC_OBJECT_WRITE.
8517ec681f3Smrg    *
8527ec681f3Smrg    *   To avoid stalling, execobject.offset should match the current
8537ec681f3Smrg    *   address of that object within the active context.
8547ec681f3Smrg    */
8557ec681f3Smrg   /* Set statebuffer relocations */
8567ec681f3Smrg   const unsigned state_index = batch->state.bo->index;
8577ec681f3Smrg   if (state_index < batch->exec_count &&
8587ec681f3Smrg       batch->exec_bos[state_index] == batch->state.bo) {
8597ec681f3Smrg      struct drm_i915_gem_exec_object2 *entry =
8607ec681f3Smrg         &batch->validation_list[state_index];
8617ec681f3Smrg      assert(entry->handle == batch->state.bo->gem_handle);
8627ec681f3Smrg      entry->relocation_count = batch->state.relocs.reloc_count;
8637ec681f3Smrg      entry->relocs_ptr = (uintptr_t)batch->state.relocs.relocs;
8647ec681f3Smrg   }
8657ec681f3Smrg
8667ec681f3Smrg   /* Set batchbuffer relocations */
8677ec681f3Smrg   struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0];
8687ec681f3Smrg   assert(entry->handle == batch->command.bo->gem_handle);
8697ec681f3Smrg   entry->relocation_count = batch->command.relocs.reloc_count;
8707ec681f3Smrg   entry->relocs_ptr = (uintptr_t)batch->command.relocs.relocs;
8717ec681f3Smrg
8727ec681f3Smrg   struct drm_i915_gem_execbuffer2 execbuf = {
8737ec681f3Smrg      .buffers_ptr = (uintptr_t)batch->validation_list,
8747ec681f3Smrg      .buffer_count = batch->exec_count,
8757ec681f3Smrg      .batch_start_offset = 0,
8767ec681f3Smrg      /* This must be QWord aligned. */
8777ec681f3Smrg      .batch_len = ALIGN(batch->primary_batch_size, 8),
8787ec681f3Smrg      .flags = I915_EXEC_RENDER |
8797ec681f3Smrg               I915_EXEC_NO_RELOC |
8807ec681f3Smrg               I915_EXEC_BATCH_FIRST |
8817ec681f3Smrg               I915_EXEC_HANDLE_LUT,
8827ec681f3Smrg      .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */
8837ec681f3Smrg   };
8847ec681f3Smrg
8857ec681f3Smrg   if (num_fences(batch)) {
8867ec681f3Smrg      execbuf.flags |= I915_EXEC_FENCE_ARRAY;
8877ec681f3Smrg      execbuf.num_cliprects = num_fences(batch);
8887ec681f3Smrg      execbuf.cliprects_ptr =
8897ec681f3Smrg         (uintptr_t)util_dynarray_begin(&batch->exec_fences);
8907ec681f3Smrg   }
8917ec681f3Smrg
8927ec681f3Smrg   int ret = 0;
8937ec681f3Smrg   if (!batch->screen->devinfo.no_hw &&
8947ec681f3Smrg       intel_ioctl(batch->screen->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf))
8957ec681f3Smrg      ret = -errno;
8967ec681f3Smrg
8977ec681f3Smrg   for (int i = 0; i < batch->exec_count; i++) {
8987ec681f3Smrg      struct crocus_bo *bo = batch->exec_bos[i];
8997ec681f3Smrg
9007ec681f3Smrg      bo->idle = false;
9017ec681f3Smrg      bo->index = -1;
9027ec681f3Smrg
9037ec681f3Smrg      /* Update brw_bo::gtt_offset */
9047ec681f3Smrg      if (batch->validation_list[i].offset != bo->gtt_offset) {
9057ec681f3Smrg         DBG("BO %d migrated: 0x%" PRIx64 " -> 0x%" PRIx64 "\n",
9067ec681f3Smrg             bo->gem_handle, bo->gtt_offset,
9077ec681f3Smrg             (uint64_t)batch->validation_list[i].offset);
9087ec681f3Smrg         assert(!(bo->kflags & EXEC_OBJECT_PINNED));
9097ec681f3Smrg         bo->gtt_offset = batch->validation_list[i].offset;
9107ec681f3Smrg      }
9117ec681f3Smrg   }
9127ec681f3Smrg
9137ec681f3Smrg   return ret;
9147ec681f3Smrg}
9157ec681f3Smrg
9167ec681f3Smrgstatic const char *
9177ec681f3Smrgbatch_name_to_string(enum crocus_batch_name name)
9187ec681f3Smrg{
9197ec681f3Smrg   const char *names[CROCUS_BATCH_COUNT] = {
9207ec681f3Smrg      [CROCUS_BATCH_RENDER] = "render",
9217ec681f3Smrg      [CROCUS_BATCH_COMPUTE] = "compute",
9227ec681f3Smrg   };
9237ec681f3Smrg   return names[name];
9247ec681f3Smrg}
9257ec681f3Smrg
9267ec681f3Smrg/**
9277ec681f3Smrg * Flush the batch buffer, submitting it to the GPU and resetting it so
9287ec681f3Smrg * we're ready to emit the next batch.
9297ec681f3Smrg *
9307ec681f3Smrg * \param in_fence_fd is ignored if -1.  Otherwise, this function takes
9317ec681f3Smrg * ownership of the fd.
9327ec681f3Smrg *
9337ec681f3Smrg * \param out_fence_fd is ignored if NULL.  Otherwise, the caller must
9347ec681f3Smrg * take ownership of the returned fd.
9357ec681f3Smrg */
9367ec681f3Smrgvoid
9377ec681f3Smrg_crocus_batch_flush(struct crocus_batch *batch, const char *file, int line)
9387ec681f3Smrg{
9397ec681f3Smrg   struct crocus_screen *screen = batch->screen;
9407ec681f3Smrg
9417ec681f3Smrg   /* If a fence signals we need to flush it. */
9427ec681f3Smrg   if (crocus_batch_bytes_used(batch) == 0 && !batch->contains_fence_signal)
9437ec681f3Smrg      return;
9447ec681f3Smrg
9457ec681f3Smrg   assert(!batch->no_wrap);
9467ec681f3Smrg   crocus_finish_batch(batch);
9477ec681f3Smrg
9487ec681f3Smrg   finish_growing_bos(&batch->command);
9497ec681f3Smrg   finish_growing_bos(&batch->state);
9507ec681f3Smrg   int ret = submit_batch(batch);
9517ec681f3Smrg
9527ec681f3Smrg   if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT | DEBUG_PIPE_CONTROL)) {
9537ec681f3Smrg      int bytes_for_commands = crocus_batch_bytes_used(batch);
9547ec681f3Smrg      int second_bytes = 0;
9557ec681f3Smrg      if (batch->command.bo != batch->exec_bos[0]) {
9567ec681f3Smrg         second_bytes = bytes_for_commands;
9577ec681f3Smrg         bytes_for_commands += batch->primary_batch_size;
9587ec681f3Smrg      }
9597ec681f3Smrg      fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5d+%5db (%0.1f%%) "
9607ec681f3Smrg              "(cmds), %4d BOs (%0.1fMb aperture),"
9617ec681f3Smrg              " %4d command relocs, %4d state relocs\n",
9627ec681f3Smrg              file, line, batch_name_to_string(batch->name), batch->hw_ctx_id,
9637ec681f3Smrg              batch->primary_batch_size, second_bytes,
9647ec681f3Smrg              100.0f * bytes_for_commands / BATCH_SZ,
9657ec681f3Smrg              batch->exec_count,
9667ec681f3Smrg              (float) batch->aperture_space / (1024 * 1024),
9677ec681f3Smrg              batch->command.relocs.reloc_count,
9687ec681f3Smrg              batch->state.relocs.reloc_count);
9697ec681f3Smrg
9707ec681f3Smrg      if (INTEL_DEBUG(DEBUG_BATCH | DEBUG_SUBMIT)) {
9717ec681f3Smrg         dump_fence_list(batch);
9727ec681f3Smrg         dump_validation_list(batch);
9737ec681f3Smrg      }
9747ec681f3Smrg
9757ec681f3Smrg      if (INTEL_DEBUG(DEBUG_BATCH)) {
9767ec681f3Smrg         decode_batch(batch);
9777ec681f3Smrg      }
9787ec681f3Smrg   }
9797ec681f3Smrg
9807ec681f3Smrg   for (int i = 0; i < batch->exec_count; i++) {
9817ec681f3Smrg      struct crocus_bo *bo = batch->exec_bos[i];
9827ec681f3Smrg      crocus_bo_unreference(bo);
9837ec681f3Smrg   }
9847ec681f3Smrg
9857ec681f3Smrg   batch->command.relocs.reloc_count = 0;
9867ec681f3Smrg   batch->state.relocs.reloc_count = 0;
9877ec681f3Smrg   batch->exec_count = 0;
9887ec681f3Smrg   batch->aperture_space = 0;
9897ec681f3Smrg
9907ec681f3Smrg   util_dynarray_foreach(&batch->syncobjs, struct crocus_syncobj *, s)
9917ec681f3Smrg      crocus_syncobj_reference(screen, s, NULL);
9927ec681f3Smrg   util_dynarray_clear(&batch->syncobjs);
9937ec681f3Smrg
9947ec681f3Smrg   util_dynarray_clear(&batch->exec_fences);
9957ec681f3Smrg
9967ec681f3Smrg   if (INTEL_DEBUG(DEBUG_SYNC)) {
9977ec681f3Smrg      dbg_printf("waiting for idle\n");
9987ec681f3Smrg      crocus_bo_wait_rendering(batch->command.bo); /* if execbuf failed; this is a nop */
9997ec681f3Smrg   }
10007ec681f3Smrg
10017ec681f3Smrg   /* Start a new batch buffer. */
10027ec681f3Smrg   crocus_batch_reset(batch);
10037ec681f3Smrg
10047ec681f3Smrg   /* EIO means our context is banned.  In this case, try and replace it
10057ec681f3Smrg    * with a new logical context, and inform crocus_context that all state
10067ec681f3Smrg    * has been lost and needs to be re-initialized.  If this succeeds,
10077ec681f3Smrg    * dubiously claim success...
10087ec681f3Smrg    */
10097ec681f3Smrg   if (ret == -EIO && replace_hw_ctx(batch)) {
10107ec681f3Smrg      if (batch->reset->reset) {
10117ec681f3Smrg         /* Tell the state tracker the device is lost and it was our fault. */
10127ec681f3Smrg         batch->reset->reset(batch->reset->data, PIPE_GUILTY_CONTEXT_RESET);
10137ec681f3Smrg      }
10147ec681f3Smrg
10157ec681f3Smrg      ret = 0;
10167ec681f3Smrg   }
10177ec681f3Smrg
10187ec681f3Smrg   if (ret < 0) {
10197ec681f3Smrg#ifdef DEBUG
10207ec681f3Smrg      const bool color = INTEL_DEBUG(DEBUG_COLOR);
10217ec681f3Smrg      fprintf(stderr, "%scrocus: Failed to submit batchbuffer: %-80s%s\n",
10227ec681f3Smrg              color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : "");
10237ec681f3Smrg#endif
10247ec681f3Smrg      abort();
10257ec681f3Smrg   }
10267ec681f3Smrg}
10277ec681f3Smrg
10287ec681f3Smrg/**
10297ec681f3Smrg * Does the current batch refer to the given BO?
10307ec681f3Smrg *
10317ec681f3Smrg * (In other words, is the BO in the current batch's validation list?)
10327ec681f3Smrg */
10337ec681f3Smrgbool
10347ec681f3Smrgcrocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo)
10357ec681f3Smrg{
10367ec681f3Smrg   return find_validation_entry(batch, bo) != NULL;
10377ec681f3Smrg}
10387ec681f3Smrg
10397ec681f3Smrg/**
10407ec681f3Smrg * Updates the state of the noop feature.  Returns true if there was a noop
10417ec681f3Smrg * transition that led to state invalidation.
10427ec681f3Smrg */
10437ec681f3Smrgbool
10447ec681f3Smrgcrocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable)
10457ec681f3Smrg{
10467ec681f3Smrg   if (batch->noop_enabled == noop_enable)
10477ec681f3Smrg      return 0;
10487ec681f3Smrg
10497ec681f3Smrg   batch->noop_enabled = noop_enable;
10507ec681f3Smrg
10517ec681f3Smrg   crocus_batch_flush(batch);
10527ec681f3Smrg
10537ec681f3Smrg   /* If the batch was empty, flush had no effect, so insert our noop. */
10547ec681f3Smrg   if (crocus_batch_bytes_used(batch) == 0)
10557ec681f3Smrg      crocus_batch_maybe_noop(batch);
10567ec681f3Smrg
10577ec681f3Smrg   /* We only need to update the entire state if we transition from noop ->
10587ec681f3Smrg    * not-noop.
10597ec681f3Smrg    */
10607ec681f3Smrg   return !batch->noop_enabled;
10617ec681f3Smrg}
1062