19f464c52Smaya/*
29f464c52Smaya * Copyright © 2017 Intel Corporation
39f464c52Smaya *
49f464c52Smaya * Permission is hereby granted, free of charge, to any person obtaining a
59f464c52Smaya * copy of this software and associated documentation files (the "Software"),
69f464c52Smaya * to deal in the Software without restriction, including without limitation
79f464c52Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense,
89f464c52Smaya * and/or sell copies of the Software, and to permit persons to whom the
99f464c52Smaya * Software is furnished to do so, subject to the following conditions:
109f464c52Smaya *
119f464c52Smaya * The above copyright notice and this permission notice (including the next
129f464c52Smaya * paragraph) shall be included in all copies or substantial portions of the
139f464c52Smaya * Software.
149f464c52Smaya *
159f464c52Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
169f464c52Smaya * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
179f464c52Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
189f464c52Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
199f464c52Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
209f464c52Smaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
219f464c52Smaya * IN THE SOFTWARE.
229f464c52Smaya */
239f464c52Smaya
249f464c52Smaya#ifndef IRIS_BATCH_DOT_H
259f464c52Smaya#define IRIS_BATCH_DOT_H
269f464c52Smaya
279f464c52Smaya#include <stdint.h>
289f464c52Smaya#include <stdbool.h>
299f464c52Smaya#include <string.h>
309f464c52Smaya
319f464c52Smaya#include "util/u_dynarray.h"
329f464c52Smaya
339f464c52Smaya#include "drm-uapi/i915_drm.h"
347ec681f3Smrg#include "common/intel_decoder.h"
359f464c52Smaya
369f464c52Smaya#include "iris_fence.h"
377ec681f3Smrg#include "iris_fine_fence.h"
387ec681f3Smrg
397ec681f3Smrgstruct iris_context;
409f464c52Smaya
419f464c52Smaya/* The kernel assumes batchbuffers are smaller than 256kB. */
429f464c52Smaya#define MAX_BATCH_SIZE (256 * 1024)
439f464c52Smaya
447ec681f3Smrg/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
457ec681f3Smrg * bytes for MI_BATCH_BUFFER_START (when chaining).  Plus another 24 bytes for
467ec681f3Smrg * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
477ec681f3Smrg * invalidation pipe control.
487ec681f3Smrg */
497ec681f3Smrg#define BATCH_RESERVED 60
507ec681f3Smrg
519f464c52Smaya/* Our target batch size - flush approximately at this point. */
527ec681f3Smrg#define BATCH_SZ (64 * 1024 - BATCH_RESERVED)
539f464c52Smaya
549f464c52Smayaenum iris_batch_name {
559f464c52Smaya   IRIS_BATCH_RENDER,
569f464c52Smaya   IRIS_BATCH_COMPUTE,
579f464c52Smaya};
589f464c52Smaya
599f464c52Smayastruct iris_batch {
607ec681f3Smrg   struct iris_context *ice;
619f464c52Smaya   struct iris_screen *screen;
629f464c52Smaya   struct pipe_debug_callback *dbg;
637ec681f3Smrg   struct pipe_device_reset_callback *reset;
649f464c52Smaya
659f464c52Smaya   /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
669f464c52Smaya   enum iris_batch_name name;
679f464c52Smaya
689f464c52Smaya   /** Current batchbuffer being queued up. */
699f464c52Smaya   struct iris_bo *bo;
709f464c52Smaya   void *map;
719f464c52Smaya   void *map_next;
727ec681f3Smrg
737ec681f3Smrg   /** Size of the primary batch being submitted to execbuf (in bytes). */
749f464c52Smaya   unsigned primary_batch_size;
759f464c52Smaya
767ec681f3Smrg   /** Total size of all chained batches (in bytes). */
777ec681f3Smrg   unsigned total_chained_batch_size;
787ec681f3Smrg
799f464c52Smaya   /** Last Surface State Base Address set in this hardware context. */
809f464c52Smaya   uint64_t last_surface_base_address;
819f464c52Smaya
829f464c52Smaya   uint32_t hw_ctx_id;
839f464c52Smaya
847ec681f3Smrg   /** A list of all BOs referenced by this batch */
859f464c52Smaya   struct iris_bo **exec_bos;
869f464c52Smaya   int exec_count;
879f464c52Smaya   int exec_array_size;
887ec681f3Smrg   /** Bitset of whether this batch writes to BO `i'. */
897ec681f3Smrg   BITSET_WORD *bos_written;
907ec681f3Smrg   uint32_t max_gem_handle;
917ec681f3Smrg
927ec681f3Smrg   /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
937ec681f3Smrg    * instruction is a MI_BATCH_BUFFER_END).
947ec681f3Smrg    */
957ec681f3Smrg   bool noop_enabled;
969f464c52Smaya
979f464c52Smaya   /**
987ec681f3Smrg    * A list of iris_syncobjs associated with this batch.
999f464c52Smaya    *
1009f464c52Smaya    * The first list entry will always be a signalling sync-point, indicating
1019f464c52Smaya    * that this batch has completed.  The others are likely to be sync-points
1029f464c52Smaya    * to wait on before executing the batch.
1039f464c52Smaya    */
1047ec681f3Smrg   struct util_dynarray syncobjs;
1059f464c52Smaya
1069f464c52Smaya   /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
1079f464c52Smaya   struct util_dynarray exec_fences;
1089f464c52Smaya
1099f464c52Smaya   /** The amount of aperture space (in bytes) used by all exec_bos */
1109f464c52Smaya   int aperture_space;
1119f464c52Smaya
1127ec681f3Smrg   struct {
1137ec681f3Smrg      /** Uploader to use for sequence numbers */
1147ec681f3Smrg      struct u_upload_mgr *uploader;
1157ec681f3Smrg
1167ec681f3Smrg      /** GPU buffer and CPU map where our seqno's will be written. */
1177ec681f3Smrg      struct iris_state_ref ref;
1187ec681f3Smrg      uint32_t *map;
1197ec681f3Smrg
1207ec681f3Smrg      /** The sequence number to write the next time we add a fence. */
1217ec681f3Smrg      uint32_t next;
1227ec681f3Smrg   } fine_fences;
1237ec681f3Smrg
1247ec681f3Smrg   /** A seqno (and syncobj) for the last batch that was submitted. */
1257ec681f3Smrg   struct iris_fine_fence *last_fence;
1269f464c52Smaya
1279f464c52Smaya   /** List of other batches which we might need to flush to use a BO */
1289f464c52Smaya   struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
1299f464c52Smaya
1309f464c52Smaya   struct {
1319f464c52Smaya      /**
1329f464c52Smaya       * Set of struct brw_bo * that have been rendered to within this
1339f464c52Smaya       * batchbuffer and would need flushing before being used from another
1349f464c52Smaya       * cache domain that isn't coherent with it (i.e. the sampler).
1359f464c52Smaya       */
1369f464c52Smaya      struct hash_table *render;
1379f464c52Smaya   } cache;
1389f464c52Smaya
1397ec681f3Smrg   struct intel_batch_decode_ctx decoder;
1407ec681f3Smrg   struct hash_table_u64 *state_sizes;
1417ec681f3Smrg
1427ec681f3Smrg   /**
1437ec681f3Smrg    * Matrix representation of the cache coherency status of the GPU at the
1447ec681f3Smrg    * current end point of the batch.  For every i and j,
1457ec681f3Smrg    * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
1467ec681f3Smrg    * cache domain j visible to cache domain i (which obviously implies that
1477ec681f3Smrg    * coherent_seqnos[i][i] is the most recent flush of cache domain i).  This
1487ec681f3Smrg    * can be used to efficiently determine whether synchronization is
1497ec681f3Smrg    * necessary before accessing data from cache domain i if it was previously
1507ec681f3Smrg    * accessed from another cache domain j.
1517ec681f3Smrg    */
1527ec681f3Smrg   uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
1537ec681f3Smrg
1547ec681f3Smrg   /**
1557ec681f3Smrg    * Sequence number used to track the completion of any subsequent memory
1567ec681f3Smrg    * operations in the batch until the next sync boundary.
1577ec681f3Smrg    */
1587ec681f3Smrg   uint64_t next_seqno;
1599f464c52Smaya
1609f464c52Smaya   /** Have we emitted any draw calls to this batch? */
1619f464c52Smaya   bool contains_draw;
1627ec681f3Smrg
1637ec681f3Smrg   /** Have we emitted any draw calls with next_seqno? */
1647ec681f3Smrg   bool contains_draw_with_next_seqno;
1657ec681f3Smrg
1667ec681f3Smrg   /** Batch contains fence signal operation. */
1677ec681f3Smrg   bool contains_fence_signal;
1687ec681f3Smrg
1697ec681f3Smrg   /**
1707ec681f3Smrg    * Number of times iris_batch_sync_region_start() has been called without a
1717ec681f3Smrg    * matching iris_batch_sync_region_end() on this batch.
1727ec681f3Smrg    */
1737ec681f3Smrg   uint32_t sync_region_depth;
1747ec681f3Smrg
1757ec681f3Smrg   uint32_t last_aux_map_state;
1767ec681f3Smrg   struct iris_measure_batch *measure;
1779f464c52Smaya};
1789f464c52Smaya
1797ec681f3Smrgvoid iris_init_batch(struct iris_context *ice,
1809f464c52Smaya                     enum iris_batch_name name,
1819f464c52Smaya                     int priority);
1829f464c52Smayavoid iris_chain_to_new_batch(struct iris_batch *batch);
1839f464c52Smayavoid iris_batch_free(struct iris_batch *batch);
1849f464c52Smayavoid iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
1859f464c52Smaya
1869f464c52Smayavoid _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
1879f464c52Smaya#define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
1889f464c52Smaya
1899f464c52Smayabool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
1909f464c52Smaya
1917ec681f3Smrgbool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
1927ec681f3Smrg
1939f464c52Smaya#define RELOC_WRITE EXEC_OBJECT_WRITE
1949f464c52Smaya
1959f464c52Smayavoid iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
1967ec681f3Smrg                        bool writable, enum iris_domain access);
1977ec681f3Smrg
1987ec681f3Smrgenum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
1999f464c52Smaya
2009f464c52Smayastatic inline unsigned
2019f464c52Smayairis_batch_bytes_used(struct iris_batch *batch)
2029f464c52Smaya{
2039f464c52Smaya   return batch->map_next - batch->map;
2049f464c52Smaya}
2059f464c52Smaya
2069f464c52Smaya/**
2079f464c52Smaya * Ensure the current command buffer has \param size bytes of space
2089f464c52Smaya * remaining.  If not, this creates a secondary batch buffer and emits
2099f464c52Smaya * a jump from the primary batch to the start of the secondary.
2109f464c52Smaya *
2119f464c52Smaya * Most callers want iris_get_command_space() instead.
2129f464c52Smaya */
2139f464c52Smayastatic inline void
2149f464c52Smayairis_require_command_space(struct iris_batch *batch, unsigned size)
2159f464c52Smaya{
2169f464c52Smaya   const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
2179f464c52Smaya
2189f464c52Smaya   if (required_bytes >= BATCH_SZ) {
2199f464c52Smaya      iris_chain_to_new_batch(batch);
2209f464c52Smaya   }
2219f464c52Smaya}
2229f464c52Smaya
2239f464c52Smaya/**
2249f464c52Smaya * Allocate space in the current command buffer, and return a pointer
2259f464c52Smaya * to the mapped area so the caller can write commands there.
2269f464c52Smaya *
2279f464c52Smaya * This should be called whenever emitting commands.
2289f464c52Smaya */
2299f464c52Smayastatic inline void *
2309f464c52Smayairis_get_command_space(struct iris_batch *batch, unsigned bytes)
2319f464c52Smaya{
2329f464c52Smaya   iris_require_command_space(batch, bytes);
2339f464c52Smaya   void *map = batch->map_next;
2349f464c52Smaya   batch->map_next += bytes;
2359f464c52Smaya   return map;
2369f464c52Smaya}
2379f464c52Smaya
2389f464c52Smaya/**
2399f464c52Smaya * Helper to emit GPU commands - allocates space, copies them there.
2409f464c52Smaya */
2419f464c52Smayastatic inline void
2429f464c52Smayairis_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
2439f464c52Smaya{
2449f464c52Smaya   void *map = iris_get_command_space(batch, size);
2459f464c52Smaya   memcpy(map, data, size);
2469f464c52Smaya}
2479f464c52Smaya
2489f464c52Smaya/**
2497ec681f3Smrg * Get a pointer to the batch's signalling syncobj.  Does not refcount.
2507ec681f3Smrg */
2517ec681f3Smrgstatic inline struct iris_syncobj *
2527ec681f3Smrgiris_batch_get_signal_syncobj(struct iris_batch *batch)
2537ec681f3Smrg{
2547ec681f3Smrg   /* The signalling syncobj is the first one in the list. */
2557ec681f3Smrg   struct iris_syncobj *syncobj =
2567ec681f3Smrg      ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
2577ec681f3Smrg   return syncobj;
2587ec681f3Smrg}
2597ec681f3Smrg
2607ec681f3Smrg
2617ec681f3Smrg/**
2627ec681f3Smrg * Take a reference to the batch's signalling syncobj.
2639f464c52Smaya *
2649f464c52Smaya * Callers can use this to wait for the the current batch under construction
2659f464c52Smaya * to complete (after flushing it).
2669f464c52Smaya */
2679f464c52Smayastatic inline void
2687ec681f3Smrgiris_batch_reference_signal_syncobj(struct iris_batch *batch,
2697ec681f3Smrg                                   struct iris_syncobj **out_syncobj)
2707ec681f3Smrg{
2717ec681f3Smrg   struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
2727ec681f3Smrg   iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
2737ec681f3Smrg}
2747ec681f3Smrg
2757ec681f3Smrg/**
2767ec681f3Smrg * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
2777ec681f3Smrg */
2787ec681f3Smrgstatic inline void
2797ec681f3Smrgiris_record_state_size(struct hash_table_u64 *ht,
2807ec681f3Smrg                       uint32_t offset_from_base,
2817ec681f3Smrg                       uint32_t size)
2827ec681f3Smrg{
2837ec681f3Smrg   if (ht) {
2847ec681f3Smrg      _mesa_hash_table_u64_insert(ht, offset_from_base,
2857ec681f3Smrg                                  (void *)(uintptr_t) size);
2867ec681f3Smrg   }
2877ec681f3Smrg}
2887ec681f3Smrg
2897ec681f3Smrg/**
2907ec681f3Smrg * Mark the start of a region in the batch with stable synchronization
2917ec681f3Smrg * sequence number.  Any buffer object accessed by the batch buffer only needs
2927ec681f3Smrg * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
2937ec681f3Smrg * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
2947ec681f3Smrg */
2957ec681f3Smrgstatic inline void
2967ec681f3Smrgiris_batch_sync_region_start(struct iris_batch *batch)
2977ec681f3Smrg{
2987ec681f3Smrg   batch->sync_region_depth++;
2997ec681f3Smrg}
3007ec681f3Smrg
3017ec681f3Smrg/**
3027ec681f3Smrg * Mark the end of a region in the batch with stable synchronization sequence
3037ec681f3Smrg * number.  Should be called once after each call to
3047ec681f3Smrg * iris_batch_sync_region_start().
3057ec681f3Smrg */
3067ec681f3Smrgstatic inline void
3077ec681f3Smrgiris_batch_sync_region_end(struct iris_batch *batch)
3087ec681f3Smrg{
3097ec681f3Smrg   assert(batch->sync_region_depth);
3107ec681f3Smrg   batch->sync_region_depth--;
3117ec681f3Smrg}
3127ec681f3Smrg
3137ec681f3Smrg/**
3147ec681f3Smrg * Start a new synchronization section at the current point of the batch,
3157ec681f3Smrg * unless disallowed by a previous iris_batch_sync_region_start().
3167ec681f3Smrg */
3177ec681f3Smrgstatic inline void
3187ec681f3Smrgiris_batch_sync_boundary(struct iris_batch *batch)
3197ec681f3Smrg{
3207ec681f3Smrg   if (!batch->sync_region_depth) {
3217ec681f3Smrg      batch->contains_draw_with_next_seqno = false;
3227ec681f3Smrg      batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
3237ec681f3Smrg      assert(batch->next_seqno > 0);
3247ec681f3Smrg   }
3257ec681f3Smrg}
3267ec681f3Smrg
3277ec681f3Smrg/**
3287ec681f3Smrg * Update the cache coherency status of the batch to reflect a flush of the
3297ec681f3Smrg * specified caching domain.
3307ec681f3Smrg */
3317ec681f3Smrgstatic inline void
3327ec681f3Smrgiris_batch_mark_flush_sync(struct iris_batch *batch,
3337ec681f3Smrg                           enum iris_domain access)
3347ec681f3Smrg{
3357ec681f3Smrg   batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
3367ec681f3Smrg}
3377ec681f3Smrg
3387ec681f3Smrg/**
3397ec681f3Smrg * Update the cache coherency status of the batch to reflect an invalidation
3407ec681f3Smrg * of the specified caching domain.  All prior flushes of other caches will be
3417ec681f3Smrg * considered visible to the specified caching domain.
3427ec681f3Smrg */
3437ec681f3Smrgstatic inline void
3447ec681f3Smrgiris_batch_mark_invalidate_sync(struct iris_batch *batch,
3457ec681f3Smrg                                enum iris_domain access)
3467ec681f3Smrg{
3477ec681f3Smrg   for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++)
3487ec681f3Smrg      batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
3497ec681f3Smrg}
3507ec681f3Smrg
3517ec681f3Smrg/**
3527ec681f3Smrg * Update the cache coherency status of the batch to reflect a reset.  All
3537ec681f3Smrg * previously accessed data can be considered visible to every caching domain
3547ec681f3Smrg * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
3557ec681f3Smrg */
3567ec681f3Smrgstatic inline void
3577ec681f3Smrgiris_batch_mark_reset_sync(struct iris_batch *batch)
3589f464c52Smaya{
3597ec681f3Smrg   for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++)
3607ec681f3Smrg      for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
3617ec681f3Smrg         batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
3629f464c52Smaya}
3639f464c52Smaya
3649f464c52Smaya#endif
365