1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#ifndef IRIS_BATCH_DOT_H
25#define IRIS_BATCH_DOT_H
26
27#include <stdint.h>
28#include <stdbool.h>
29#include <string.h>
30
31#include "util/u_dynarray.h"
32
33#include "drm-uapi/i915_drm.h"
34#include "common/intel_decoder.h"
35
36#include "iris_fence.h"
37#include "iris_fine_fence.h"
38
39struct iris_context;
40
41/* The kernel assumes batchbuffers are smaller than 256kB. */
42#define MAX_BATCH_SIZE (256 * 1024)
43
44/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END or 12
45 * bytes for MI_BATCH_BUFFER_START (when chaining).  Plus another 24 bytes for
46 * the seqno write (using PIPE_CONTROL), and another 24 bytes for the ISP
47 * invalidation pipe control.
48 */
49#define BATCH_RESERVED 60
50
51/* Our target batch size - flush approximately at this point. */
52#define BATCH_SZ (64 * 1024 - BATCH_RESERVED)
53
54enum iris_batch_name {
55   IRIS_BATCH_RENDER,
56   IRIS_BATCH_COMPUTE,
57};
58
59struct iris_batch {
60   struct iris_context *ice;
61   struct iris_screen *screen;
62   struct pipe_debug_callback *dbg;
63   struct pipe_device_reset_callback *reset;
64
65   /** What batch is this? (e.g. IRIS_BATCH_RENDER/COMPUTE) */
66   enum iris_batch_name name;
67
68   /** Current batchbuffer being queued up. */
69   struct iris_bo *bo;
70   void *map;
71   void *map_next;
72
73   /** Size of the primary batch being submitted to execbuf (in bytes). */
74   unsigned primary_batch_size;
75
76   /** Total size of all chained batches (in bytes). */
77   unsigned total_chained_batch_size;
78
79   /** Last Surface State Base Address set in this hardware context. */
80   uint64_t last_surface_base_address;
81
82   uint32_t hw_ctx_id;
83
84   /** A list of all BOs referenced by this batch */
85   struct iris_bo **exec_bos;
86   int exec_count;
87   int exec_array_size;
88   /** Bitset of whether this batch writes to BO `i'. */
89   BITSET_WORD *bos_written;
90   uint32_t max_gem_handle;
91
92   /** Whether INTEL_BLACKHOLE_RENDER is enabled in the batch (aka first
93    * instruction is a MI_BATCH_BUFFER_END).
94    */
95   bool noop_enabled;
96
97   /**
98    * A list of iris_syncobjs associated with this batch.
99    *
100    * The first list entry will always be a signalling sync-point, indicating
101    * that this batch has completed.  The others are likely to be sync-points
102    * to wait on before executing the batch.
103    */
104   struct util_dynarray syncobjs;
105
106   /** A list of drm_i915_exec_fences to have execbuf signal or wait on */
107   struct util_dynarray exec_fences;
108
109   /** The amount of aperture space (in bytes) used by all exec_bos */
110   int aperture_space;
111
112   struct {
113      /** Uploader to use for sequence numbers */
114      struct u_upload_mgr *uploader;
115
116      /** GPU buffer and CPU map where our seqno's will be written. */
117      struct iris_state_ref ref;
118      uint32_t *map;
119
120      /** The sequence number to write the next time we add a fence. */
121      uint32_t next;
122   } fine_fences;
123
124   /** A seqno (and syncobj) for the last batch that was submitted. */
125   struct iris_fine_fence *last_fence;
126
127   /** List of other batches which we might need to flush to use a BO */
128   struct iris_batch *other_batches[IRIS_BATCH_COUNT - 1];
129
130   struct {
131      /**
132       * Set of struct brw_bo * that have been rendered to within this
133       * batchbuffer and would need flushing before being used from another
134       * cache domain that isn't coherent with it (i.e. the sampler).
135       */
136      struct hash_table *render;
137   } cache;
138
139   struct intel_batch_decode_ctx decoder;
140   struct hash_table_u64 *state_sizes;
141
142   /**
143    * Matrix representation of the cache coherency status of the GPU at the
144    * current end point of the batch.  For every i and j,
145    * coherent_seqnos[i][j] denotes the seqno of the most recent flush of
146    * cache domain j visible to cache domain i (which obviously implies that
147    * coherent_seqnos[i][i] is the most recent flush of cache domain i).  This
148    * can be used to efficiently determine whether synchronization is
149    * necessary before accessing data from cache domain i if it was previously
150    * accessed from another cache domain j.
151    */
152   uint64_t coherent_seqnos[NUM_IRIS_DOMAINS][NUM_IRIS_DOMAINS];
153
154   /**
155    * Sequence number used to track the completion of any subsequent memory
156    * operations in the batch until the next sync boundary.
157    */
158   uint64_t next_seqno;
159
160   /** Have we emitted any draw calls to this batch? */
161   bool contains_draw;
162
163   /** Have we emitted any draw calls with next_seqno? */
164   bool contains_draw_with_next_seqno;
165
166   /** Batch contains fence signal operation. */
167   bool contains_fence_signal;
168
169   /**
170    * Number of times iris_batch_sync_region_start() has been called without a
171    * matching iris_batch_sync_region_end() on this batch.
172    */
173   uint32_t sync_region_depth;
174
175   uint32_t last_aux_map_state;
176   struct iris_measure_batch *measure;
177};
178
179void iris_init_batch(struct iris_context *ice,
180                     enum iris_batch_name name,
181                     int priority);
182void iris_chain_to_new_batch(struct iris_batch *batch);
183void iris_batch_free(struct iris_batch *batch);
184void iris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate);
185
186void _iris_batch_flush(struct iris_batch *batch, const char *file, int line);
187#define iris_batch_flush(batch) _iris_batch_flush((batch), __FILE__, __LINE__)
188
189bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo);
190
191bool iris_batch_prepare_noop(struct iris_batch *batch, bool noop_enable);
192
193#define RELOC_WRITE EXEC_OBJECT_WRITE
194
195void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo,
196                        bool writable, enum iris_domain access);
197
198enum pipe_reset_status iris_batch_check_for_reset(struct iris_batch *batch);
199
200static inline unsigned
201iris_batch_bytes_used(struct iris_batch *batch)
202{
203   return batch->map_next - batch->map;
204}
205
206/**
207 * Ensure the current command buffer has \param size bytes of space
208 * remaining.  If not, this creates a secondary batch buffer and emits
209 * a jump from the primary batch to the start of the secondary.
210 *
211 * Most callers want iris_get_command_space() instead.
212 */
213static inline void
214iris_require_command_space(struct iris_batch *batch, unsigned size)
215{
216   const unsigned required_bytes = iris_batch_bytes_used(batch) + size;
217
218   if (required_bytes >= BATCH_SZ) {
219      iris_chain_to_new_batch(batch);
220   }
221}
222
223/**
224 * Allocate space in the current command buffer, and return a pointer
225 * to the mapped area so the caller can write commands there.
226 *
227 * This should be called whenever emitting commands.
228 */
229static inline void *
230iris_get_command_space(struct iris_batch *batch, unsigned bytes)
231{
232   iris_require_command_space(batch, bytes);
233   void *map = batch->map_next;
234   batch->map_next += bytes;
235   return map;
236}
237
238/**
239 * Helper to emit GPU commands - allocates space, copies them there.
240 */
241static inline void
242iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size)
243{
244   void *map = iris_get_command_space(batch, size);
245   memcpy(map, data, size);
246}
247
248/**
249 * Get a pointer to the batch's signalling syncobj.  Does not refcount.
250 */
251static inline struct iris_syncobj *
252iris_batch_get_signal_syncobj(struct iris_batch *batch)
253{
254   /* The signalling syncobj is the first one in the list. */
255   struct iris_syncobj *syncobj =
256      ((struct iris_syncobj **) util_dynarray_begin(&batch->syncobjs))[0];
257   return syncobj;
258}
259
260
261/**
262 * Take a reference to the batch's signalling syncobj.
263 *
264 * Callers can use this to wait for the the current batch under construction
265 * to complete (after flushing it).
266 */
267static inline void
268iris_batch_reference_signal_syncobj(struct iris_batch *batch,
269                                   struct iris_syncobj **out_syncobj)
270{
271   struct iris_syncobj *syncobj = iris_batch_get_signal_syncobj(batch);
272   iris_syncobj_reference(batch->screen->bufmgr, out_syncobj, syncobj);
273}
274
275/**
276 * Record the size of a piece of state for use in INTEL_DEBUG=bat printing.
277 */
278static inline void
279iris_record_state_size(struct hash_table_u64 *ht,
280                       uint32_t offset_from_base,
281                       uint32_t size)
282{
283   if (ht) {
284      _mesa_hash_table_u64_insert(ht, offset_from_base,
285                                  (void *)(uintptr_t) size);
286   }
287}
288
289/**
290 * Mark the start of a region in the batch with stable synchronization
291 * sequence number.  Any buffer object accessed by the batch buffer only needs
292 * to be marked once (e.g. via iris_bo_bump_seqno()) within a region delimited
293 * by iris_batch_sync_region_start() and iris_batch_sync_region_end().
294 */
295static inline void
296iris_batch_sync_region_start(struct iris_batch *batch)
297{
298   batch->sync_region_depth++;
299}
300
301/**
302 * Mark the end of a region in the batch with stable synchronization sequence
303 * number.  Should be called once after each call to
304 * iris_batch_sync_region_start().
305 */
306static inline void
307iris_batch_sync_region_end(struct iris_batch *batch)
308{
309   assert(batch->sync_region_depth);
310   batch->sync_region_depth--;
311}
312
313/**
314 * Start a new synchronization section at the current point of the batch,
315 * unless disallowed by a previous iris_batch_sync_region_start().
316 */
317static inline void
318iris_batch_sync_boundary(struct iris_batch *batch)
319{
320   if (!batch->sync_region_depth) {
321      batch->contains_draw_with_next_seqno = false;
322      batch->next_seqno = p_atomic_inc_return(&batch->screen->last_seqno);
323      assert(batch->next_seqno > 0);
324   }
325}
326
327/**
328 * Update the cache coherency status of the batch to reflect a flush of the
329 * specified caching domain.
330 */
331static inline void
332iris_batch_mark_flush_sync(struct iris_batch *batch,
333                           enum iris_domain access)
334{
335   batch->coherent_seqnos[access][access] = batch->next_seqno - 1;
336}
337
338/**
339 * Update the cache coherency status of the batch to reflect an invalidation
340 * of the specified caching domain.  All prior flushes of other caches will be
341 * considered visible to the specified caching domain.
342 */
343static inline void
344iris_batch_mark_invalidate_sync(struct iris_batch *batch,
345                                enum iris_domain access)
346{
347   for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++)
348      batch->coherent_seqnos[access][i] = batch->coherent_seqnos[i][i];
349}
350
351/**
352 * Update the cache coherency status of the batch to reflect a reset.  All
353 * previously accessed data can be considered visible to every caching domain
354 * thanks to the kernel's heavyweight flushing at batch buffer boundaries.
355 */
356static inline void
357iris_batch_mark_reset_sync(struct iris_batch *batch)
358{
359   for (unsigned i = 0; i < NUM_IRIS_DOMAINS; i++)
360      for (unsigned j = 0; j < NUM_IRIS_DOMAINS; j++)
361         batch->coherent_seqnos[i][j] = batch->next_seqno - 1;
362}
363
364#endif
365