17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2017 Intel Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg */
237ec681f3Smrg
247ec681f3Smrg#ifndef CROCUS_BATCH_DOT_H
257ec681f3Smrg#define CROCUS_BATCH_DOT_H
267ec681f3Smrg
277ec681f3Smrg#include <stdbool.h>
287ec681f3Smrg#include <stdint.h>
297ec681f3Smrg#include <string.h>
307ec681f3Smrg
317ec681f3Smrg#include "util/u_dynarray.h"
327ec681f3Smrg
337ec681f3Smrg#include "common/intel_decoder.h"
347ec681f3Smrg#include "drm-uapi/i915_drm.h"
357ec681f3Smrg
367ec681f3Smrg#include "crocus_fence.h"
377ec681f3Smrg#include "crocus_fine_fence.h"
387ec681f3Smrg
397ec681f3Smrg#include "crocus_bufmgr.h"
407ec681f3Smrg/* The kernel assumes batchbuffers are smaller than 256kB. */
417ec681f3Smrg#define MAX_BATCH_SIZE (256 * 1024)
427ec681f3Smrg
437ec681f3Smrg/* 3DSTATE_BINDING_TABLE_POINTERS has a U16 offset from Surface State Base
447ec681f3Smrg * Address, which means that we can't put binding tables beyond 64kB.  This
457ec681f3Smrg * effectively limits the maximum statebuffer size to 64kB.
467ec681f3Smrg */
477ec681f3Smrg#define MAX_STATE_SIZE (64 * 1024)
487ec681f3Smrg
497ec681f3Smrg/* Our target batch size - flush approximately at this point. */
507ec681f3Smrg#define BATCH_SZ (20 * 1024)
517ec681f3Smrg#define STATE_SZ (16 * 1024)
527ec681f3Smrg
537ec681f3Smrgenum crocus_batch_name {
547ec681f3Smrg   CROCUS_BATCH_RENDER,
557ec681f3Smrg   CROCUS_BATCH_COMPUTE,
567ec681f3Smrg};
577ec681f3Smrg
587ec681f3Smrg#define CROCUS_BATCH_COUNT 2
597ec681f3Smrg
607ec681f3Smrgstruct crocus_address {
617ec681f3Smrg   struct crocus_bo *bo;
627ec681f3Smrg   int32_t offset;
637ec681f3Smrg   uint32_t reloc_flags;
647ec681f3Smrg};
657ec681f3Smrg
667ec681f3Smrgstruct crocus_reloc_list {
677ec681f3Smrg   struct drm_i915_gem_relocation_entry *relocs;
687ec681f3Smrg   int reloc_count;
697ec681f3Smrg   int reloc_array_size;
707ec681f3Smrg};
717ec681f3Smrg
727ec681f3Smrgstruct crocus_growing_bo {
737ec681f3Smrg   struct crocus_bo *bo;
747ec681f3Smrg   void *map;
757ec681f3Smrg   void *map_next;
767ec681f3Smrg   struct crocus_bo *partial_bo;
777ec681f3Smrg   void *partial_bo_map;
787ec681f3Smrg   unsigned partial_bytes;
797ec681f3Smrg   struct crocus_reloc_list relocs;
807ec681f3Smrg   unsigned used;
817ec681f3Smrg};
827ec681f3Smrg
837ec681f3Smrgstruct crocus_batch {
847ec681f3Smrg   struct crocus_context *ice;
857ec681f3Smrg   struct crocus_screen *screen;
867ec681f3Smrg   struct pipe_debug_callback *dbg;
877ec681f3Smrg   struct pipe_device_reset_callback *reset;
887ec681f3Smrg
897ec681f3Smrg   /** What batch is this? (e.g. CROCUS_BATCH_RENDER/COMPUTE) */
907ec681f3Smrg   enum crocus_batch_name name;
917ec681f3Smrg
927ec681f3Smrg   /** buffers: command, state */
937ec681f3Smrg   struct crocus_growing_bo command, state;
947ec681f3Smrg
957ec681f3Smrg   /** Size of the primary batch if we've moved on to a secondary. */
967ec681f3Smrg   unsigned primary_batch_size;
977ec681f3Smrg
987ec681f3Smrg   bool state_base_address_emitted;
997ec681f3Smrg   uint8_t pipe_controls_since_last_cs_stall;
1007ec681f3Smrg
1017ec681f3Smrg   uint32_t hw_ctx_id;
1027ec681f3Smrg
1037ec681f3Smrg   uint32_t valid_reloc_flags;
1047ec681f3Smrg
1057ec681f3Smrg   bool use_shadow_copy;
1067ec681f3Smrg   bool no_wrap;
1077ec681f3Smrg
1087ec681f3Smrg   /** The validation list */
1097ec681f3Smrg   struct drm_i915_gem_exec_object2 *validation_list;
1107ec681f3Smrg   struct crocus_bo **exec_bos;
1117ec681f3Smrg   int exec_count;
1127ec681f3Smrg   int exec_array_size;
1137ec681f3Smrg
1147ec681f3Smrg   /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
1157ec681f3Smrg    * instruction is a MI_BATCH_BUFFER_END).
1167ec681f3Smrg    */
1177ec681f3Smrg   bool noop_enabled;
1187ec681f3Smrg
1197ec681f3Smrg   /**
1207ec681f3Smrg    * A list of crocus_syncobjs associated with this batch.
1217ec681f3Smrg    *
1227ec681f3Smrg    * The first list entry will always be a signalling sync-point, indicating
1237ec681f3Smrg    * that this batch has completed.  The others are likely to be sync-points
1247ec681f3Smrg    * to wait on before executing the batch.
1257ec681f3Smrg    */
1267ec681f3Smrg   struct util_dynarray syncobjs;
1277ec681f3Smrg
1287ec681f3Smrg   /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
1297ec681f3Smrg   struct util_dynarray exec_fences;
1307ec681f3Smrg
1317ec681f3Smrg   /** The amount of aperture space (in bytes) used by all exec_bos */
1327ec681f3Smrg   int aperture_space;
1337ec681f3Smrg
1347ec681f3Smrg   struct {
1357ec681f3Smrg      /** Uploader to use for sequence numbers */
1367ec681f3Smrg      struct u_upload_mgr *uploader;
1377ec681f3Smrg
1387ec681f3Smrg      /** GPU buffer and CPU map where our seqno's will be written. */
1397ec681f3Smrg      struct crocus_state_ref ref;
1407ec681f3Smrg      uint32_t *map;
1417ec681f3Smrg
1427ec681f3Smrg      /** The sequence number to write the next time we add a fence. */
1437ec681f3Smrg      uint32_t next;
1447ec681f3Smrg   } fine_fences;
1457ec681f3Smrg
1467ec681f3Smrg   /** A seqno (and syncobj) for the last batch that was submitted. */
1477ec681f3Smrg   struct crocus_fine_fence *last_fence;
1487ec681f3Smrg
1497ec681f3Smrg   /** List of other batches which we might need to flush to use a BO */
1507ec681f3Smrg   struct crocus_batch *other_batches[CROCUS_BATCH_COUNT - 1];
1517ec681f3Smrg
1527ec681f3Smrg   struct {
1537ec681f3Smrg      /**
1547ec681f3Smrg       * Set of struct brw_bo * that have been rendered to within this
1557ec681f3Smrg       * batchbuffer and would need flushing before being used from another
1567ec681f3Smrg       * cache domain that isn't coherent with it (i.e. the sampler).
1577ec681f3Smrg       */
1587ec681f3Smrg      struct hash_table *render;
1597ec681f3Smrg
1607ec681f3Smrg      /**
1617ec681f3Smrg       * Set of struct brw_bo * that have been used as a depth buffer within
1627ec681f3Smrg       * this batchbuffer and would need flushing before being used from
1637ec681f3Smrg       * another cache domain that isn't coherent with it (i.e. the sampler).
1647ec681f3Smrg       */
1657ec681f3Smrg      struct set *depth;
1667ec681f3Smrg   } cache;
1677ec681f3Smrg
1687ec681f3Smrg   struct intel_batch_decode_ctx decoder;
1697ec681f3Smrg   struct hash_table_u64 *state_sizes;
1707ec681f3Smrg
1717ec681f3Smrg   /** Have we emitted any draw calls to this batch? */
1727ec681f3Smrg   bool contains_draw;
1737ec681f3Smrg
1747ec681f3Smrg   /** Batch contains fence signal operation. */
1757ec681f3Smrg   bool contains_fence_signal;
1767ec681f3Smrg};
1777ec681f3Smrg
1787ec681f3Smrgstatic inline bool
1797ec681f3Smrgbatch_has_fine_fence(struct crocus_batch *batch)
1807ec681f3Smrg{
1817ec681f3Smrg   return !!batch->fine_fences.uploader;
1827ec681f3Smrg}
1837ec681f3Smrg
1847ec681f3Smrg#define BATCH_HAS_FINE_FENCES(batch) (!!(batch)->fine_fences.uploader)
1857ec681f3Smrgvoid crocus_init_batch(struct crocus_context *ctx,
1867ec681f3Smrg                       enum crocus_batch_name name,
1877ec681f3Smrg                       int priority);
1887ec681f3Smrgvoid crocus_batch_free(struct crocus_batch *batch);
1897ec681f3Smrgvoid crocus_batch_maybe_flush(struct crocus_batch *batch, unsigned estimate);
1907ec681f3Smrg
1917ec681f3Smrgvoid _crocus_batch_flush(struct crocus_batch *batch, const char *file, int line);
1927ec681f3Smrg#define crocus_batch_flush(batch) _crocus_batch_flush((batch), __FILE__, __LINE__)
1937ec681f3Smrg
1947ec681f3Smrgbool crocus_batch_references(struct crocus_batch *batch, struct crocus_bo *bo);
1957ec681f3Smrg
1967ec681f3Smrgbool crocus_batch_prepare_noop(struct crocus_batch *batch, bool noop_enable);
1977ec681f3Smrg
1987ec681f3Smrg#define RELOC_WRITE EXEC_OBJECT_WRITE
1997ec681f3Smrg#define RELOC_NEEDS_GGTT EXEC_OBJECT_NEEDS_GTT
2007ec681f3Smrg/* Inverted meaning, but using the same bit...emit_reloc will flip it. */
2017ec681f3Smrg#define RELOC_32BIT EXEC_OBJECT_SUPPORTS_48B_ADDRESS
2027ec681f3Smrg
2037ec681f3Smrgvoid crocus_use_pinned_bo(struct crocus_batch *batch, struct crocus_bo *bo,
2047ec681f3Smrg                          bool writable);
2057ec681f3Smrguint64_t crocus_command_reloc(struct crocus_batch *batch, uint32_t batch_offset,
2067ec681f3Smrg                              struct crocus_bo *target, uint32_t target_offset,
2077ec681f3Smrg                              unsigned int reloc_flags);
2087ec681f3Smrguint64_t crocus_state_reloc(struct crocus_batch *batch, uint32_t batch_offset,
2097ec681f3Smrg                            struct crocus_bo *target, uint32_t target_offset,
2107ec681f3Smrg                            unsigned int reloc_flags);
2117ec681f3Smrg
2127ec681f3Smrgenum pipe_reset_status crocus_batch_check_for_reset(struct crocus_batch *batch);
2137ec681f3Smrg
2147ec681f3Smrgvoid crocus_grow_buffer(struct crocus_batch *batch, bool grow_state,
2157ec681f3Smrg                        unsigned used, unsigned new_size);
2167ec681f3Smrg
2177ec681f3Smrgstatic inline unsigned
2187ec681f3Smrgcrocus_batch_bytes_used(struct crocus_batch *batch)
2197ec681f3Smrg{
2207ec681f3Smrg   return batch->command.map_next - batch->command.map;
2217ec681f3Smrg}
2227ec681f3Smrg
2237ec681f3Smrg/**
2247ec681f3Smrg * Ensure the current command buffer has \param size bytes of space
2257ec681f3Smrg * remaining.  If not, this creates a secondary batch buffer and emits
2267ec681f3Smrg * a jump from the primary batch to the start of the secondary.
2277ec681f3Smrg *
2287ec681f3Smrg * Most callers want crocus_get_command_space() instead.
2297ec681f3Smrg */
2307ec681f3Smrgstatic inline void
2317ec681f3Smrgcrocus_require_command_space(struct crocus_batch *batch, unsigned size)
2327ec681f3Smrg{
2337ec681f3Smrg   const unsigned required_bytes = crocus_batch_bytes_used(batch) + size;
2347ec681f3Smrg   unsigned used = crocus_batch_bytes_used(batch);
2357ec681f3Smrg   if (required_bytes >= BATCH_SZ && !batch->no_wrap) {
2367ec681f3Smrg      crocus_batch_flush(batch);
2377ec681f3Smrg   } else if (used + size >= batch->command.bo->size) {
2387ec681f3Smrg      const unsigned new_size =
2397ec681f3Smrg         MIN2(batch->command.bo->size + batch->command.bo->size / 2,
2407ec681f3Smrg              MAX_BATCH_SIZE);
2417ec681f3Smrg
2427ec681f3Smrg      crocus_grow_buffer(batch, false, used, new_size);
2437ec681f3Smrg      batch->command.map_next = (void *)batch->command.map + used;
2447ec681f3Smrg      assert(crocus_batch_bytes_used(batch) + size < batch->command.bo->size);
2457ec681f3Smrg   }
2467ec681f3Smrg}
2477ec681f3Smrg
2487ec681f3Smrg/**
2497ec681f3Smrg * Allocate space in the current command buffer, and return a pointer
2507ec681f3Smrg * to the mapped area so the caller can write commands there.
2517ec681f3Smrg *
2527ec681f3Smrg * This should be called whenever emitting commands.
2537ec681f3Smrg */
2547ec681f3Smrgstatic inline void *
2557ec681f3Smrgcrocus_get_command_space(struct crocus_batch *batch, unsigned bytes)
2567ec681f3Smrg{
2577ec681f3Smrg   crocus_require_command_space(batch, bytes);
2587ec681f3Smrg   void *map = batch->command.map_next;
2597ec681f3Smrg   batch->command.map_next += bytes;
2607ec681f3Smrg   return map;
2617ec681f3Smrg}
2627ec681f3Smrg
2637ec681f3Smrg/**
2647ec681f3Smrg * Helper to emit GPU commands - allocates space, copies them there.
2657ec681f3Smrg */
2667ec681f3Smrgstatic inline void
2677ec681f3Smrgcrocus_batch_emit(struct crocus_batch *batch, const void *data, unsigned size)
2687ec681f3Smrg{
2697ec681f3Smrg   void *map = crocus_get_command_space(batch, size);
2707ec681f3Smrg   memcpy(map, data, size);
2717ec681f3Smrg}
2727ec681f3Smrg
2737ec681f3Smrg/**
2747ec681f3Smrg * Get a pointer to the batch's signalling syncobj.  Does not refcount.
2757ec681f3Smrg */
2767ec681f3Smrgstatic inline struct crocus_syncobj *
2777ec681f3Smrgcrocus_batch_get_signal_syncobj(struct crocus_batch *batch)
2787ec681f3Smrg{
2797ec681f3Smrg   /* The signalling syncobj is the first one in the list. */
2807ec681f3Smrg   struct crocus_syncobj *syncobj =
2817ec681f3Smrg      ((struct crocus_syncobj **)util_dynarray_begin(&batch->syncobjs))[0];
2827ec681f3Smrg   return syncobj;
2837ec681f3Smrg}
2847ec681f3Smrg
2857ec681f3Smrg/**
2867ec681f3Smrg * Take a reference to the batch's signalling syncobj.
2877ec681f3Smrg *
2887ec681f3Smrg * Callers can use this to wait for the the current batch under construction
2897ec681f3Smrg * to complete (after flushing it).
2907ec681f3Smrg */
2917ec681f3Smrgstatic inline void
2927ec681f3Smrgcrocus_batch_reference_signal_syncobj(struct crocus_batch *batch,
2937ec681f3Smrg                                      struct crocus_syncobj **out_syncobj)
2947ec681f3Smrg{
2957ec681f3Smrg   struct crocus_syncobj *syncobj = crocus_batch_get_signal_syncobj(batch);
2967ec681f3Smrg   crocus_syncobj_reference(batch->screen, out_syncobj, syncobj);
2977ec681f3Smrg}
2987ec681f3Smrg
2997ec681f3Smrg/**
3007ec681f3Smrg * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
3017ec681f3Smrg */
3027ec681f3Smrgstatic inline void
3037ec681f3Smrgcrocus_record_state_size(struct hash_table_u64 *ht, uint32_t offset_from_base,
3047ec681f3Smrg                         uint32_t size)
3057ec681f3Smrg{
3067ec681f3Smrg   if (ht) {
3077ec681f3Smrg      _mesa_hash_table_u64_insert(ht, offset_from_base,
3087ec681f3Smrg                                  (void *)(uintptr_t)size);
3097ec681f3Smrg   }
3107ec681f3Smrg}
3117ec681f3Smrg
3127ec681f3Smrgstatic inline bool
3137ec681f3Smrgcrocus_ptr_in_state_buffer(struct crocus_batch *batch, void *p)
3147ec681f3Smrg{
3157ec681f3Smrg   return (char *)p >= (char *)batch->state.map &&
3167ec681f3Smrg          (char *)p < (char *)batch->state.map + batch->state.bo->size;
3177ec681f3Smrg}
3187ec681f3Smrg
3197ec681f3Smrgstatic inline void
3207ec681f3Smrgcrocus_require_statebuffer_space(struct crocus_batch *batch, int size)
3217ec681f3Smrg{
3227ec681f3Smrg   if (batch->state.used + size >= STATE_SZ)
3237ec681f3Smrg      crocus_batch_flush(batch);
3247ec681f3Smrg}
3257ec681f3Smrg#endif
326