1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2017 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice shall be included
12b8e80941Smrg * in all copies or substantial portions of the Software.
13b8e80941Smrg *
14b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15b8e80941Smrg * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20b8e80941Smrg * DEALINGS IN THE SOFTWARE.
21b8e80941Smrg */
22b8e80941Smrg
23b8e80941Smrg/**
24b8e80941Smrg * @file iris_batch.c
25b8e80941Smrg *
26b8e80941Smrg * Batchbuffer and command submission module.
27b8e80941Smrg *
28b8e80941Smrg * Every API draw call results in a number of GPU commands, which we
29b8e80941Smrg * collect into a "batch buffer".  Typically, many draw calls are grouped
30b8e80941Smrg * into a single batch to amortize command submission overhead.
31b8e80941Smrg *
32b8e80941Smrg * We submit batches to the kernel using the I915_GEM_EXECBUFFER2 ioctl.
33b8e80941Smrg * One critical piece of data is the "validation list", which contains a
34b8e80941Smrg * list of the buffer objects (BOs) which the commands in the GPU need.
35b8e80941Smrg * The kernel will make sure these are resident and pinned at the correct
36b8e80941Smrg * virtual memory address before executing our batch.  If a BO is not in
37b8e80941Smrg * the validation list, it effectively does not exist, so take care.
38b8e80941Smrg */
39b8e80941Smrg
40b8e80941Smrg#include "iris_batch.h"
41b8e80941Smrg#include "iris_bufmgr.h"
42b8e80941Smrg#include "iris_context.h"
43b8e80941Smrg#include "iris_fence.h"
44b8e80941Smrg
45b8e80941Smrg#include "drm-uapi/i915_drm.h"
46b8e80941Smrg
47b8e80941Smrg#include "util/hash_table.h"
48b8e80941Smrg#include "util/set.h"
49b8e80941Smrg#include "main/macros.h"
50b8e80941Smrg
51b8e80941Smrg#include <errno.h>
52b8e80941Smrg#include <xf86drm.h>
53b8e80941Smrg
54b8e80941Smrg#if HAVE_VALGRIND
55b8e80941Smrg#include <valgrind.h>
56b8e80941Smrg#include <memcheck.h>
57b8e80941Smrg#define VG(x) x
58b8e80941Smrg#else
59b8e80941Smrg#define VG(x)
60b8e80941Smrg#endif
61b8e80941Smrg
62b8e80941Smrg#define FILE_DEBUG_FLAG DEBUG_BUFMGR
63b8e80941Smrg
64b8e80941Smrg/* Terminating the batch takes either 4 bytes for MI_BATCH_BUFFER_END
65b8e80941Smrg * or 12 bytes for MI_BATCH_BUFFER_START (when chaining).  Plus, we may
66b8e80941Smrg * need an extra 4 bytes to pad out to the nearest QWord.  So reserve 16.
67b8e80941Smrg */
68b8e80941Smrg#define BATCH_RESERVED 16
69b8e80941Smrg
70b8e80941Smrgstatic void
71b8e80941Smrgiris_batch_reset(struct iris_batch *batch);
72b8e80941Smrg
73b8e80941Smrgstatic unsigned
74b8e80941Smrgnum_fences(struct iris_batch *batch)
75b8e80941Smrg{
76b8e80941Smrg   return util_dynarray_num_elements(&batch->exec_fences,
77b8e80941Smrg                                     struct drm_i915_gem_exec_fence);
78b8e80941Smrg}
79b8e80941Smrg
80b8e80941Smrg/**
81b8e80941Smrg * Debugging code to dump the fence list, used by INTEL_DEBUG=submit.
82b8e80941Smrg */
83b8e80941Smrgstatic void
84b8e80941Smrgdump_fence_list(struct iris_batch *batch)
85b8e80941Smrg{
86b8e80941Smrg   fprintf(stderr, "Fence list (length %u):      ", num_fences(batch));
87b8e80941Smrg
88b8e80941Smrg   util_dynarray_foreach(&batch->exec_fences,
89b8e80941Smrg                         struct drm_i915_gem_exec_fence, f) {
90b8e80941Smrg      fprintf(stderr, "%s%u%s ",
91b8e80941Smrg              (f->flags & I915_EXEC_FENCE_WAIT) ? "..." : "",
92b8e80941Smrg              f->handle,
93b8e80941Smrg              (f->flags & I915_EXEC_FENCE_SIGNAL) ? "!" : "");
94b8e80941Smrg   }
95b8e80941Smrg
96b8e80941Smrg   fprintf(stderr, "\n");
97b8e80941Smrg}
98b8e80941Smrg
99b8e80941Smrg/**
100b8e80941Smrg * Debugging code to dump the validation list, used by INTEL_DEBUG=submit.
101b8e80941Smrg */
102b8e80941Smrgstatic void
103b8e80941Smrgdump_validation_list(struct iris_batch *batch)
104b8e80941Smrg{
105b8e80941Smrg   fprintf(stderr, "Validation list (length %d):\n", batch->exec_count);
106b8e80941Smrg
107b8e80941Smrg   for (int i = 0; i < batch->exec_count; i++) {
108b8e80941Smrg      uint64_t flags = batch->validation_list[i].flags;
109b8e80941Smrg      assert(batch->validation_list[i].handle ==
110b8e80941Smrg             batch->exec_bos[i]->gem_handle);
111b8e80941Smrg      fprintf(stderr, "[%2d]: %2d %-14s @ 0x%016llx (%"PRIu64"B)\t %2d refs %s\n",
112b8e80941Smrg              i,
113b8e80941Smrg              batch->validation_list[i].handle,
114b8e80941Smrg              batch->exec_bos[i]->name,
115b8e80941Smrg              batch->validation_list[i].offset,
116b8e80941Smrg              batch->exec_bos[i]->size,
117b8e80941Smrg              batch->exec_bos[i]->refcount,
118b8e80941Smrg              (flags & EXEC_OBJECT_WRITE) ? " (write)" : "");
119b8e80941Smrg   }
120b8e80941Smrg}
121b8e80941Smrg
122b8e80941Smrg/**
123b8e80941Smrg * Return BO information to the batch decoder (for debugging).
124b8e80941Smrg */
125b8e80941Smrgstatic struct gen_batch_decode_bo
126b8e80941Smrgdecode_get_bo(void *v_batch, bool ppgtt, uint64_t address)
127b8e80941Smrg{
128b8e80941Smrg   struct iris_batch *batch = v_batch;
129b8e80941Smrg
130b8e80941Smrg   assert(ppgtt);
131b8e80941Smrg
132b8e80941Smrg   for (int i = 0; i < batch->exec_count; i++) {
133b8e80941Smrg      struct iris_bo *bo = batch->exec_bos[i];
134b8e80941Smrg      /* The decoder zeroes out the top 16 bits, so we need to as well */
135b8e80941Smrg      uint64_t bo_address = bo->gtt_offset & (~0ull >> 16);
136b8e80941Smrg
137b8e80941Smrg      if (address >= bo_address && address < bo_address + bo->size) {
138b8e80941Smrg         return (struct gen_batch_decode_bo) {
139b8e80941Smrg            .addr = address,
140b8e80941Smrg            .size = bo->size,
141b8e80941Smrg            .map = iris_bo_map(batch->dbg, bo, MAP_READ) +
142b8e80941Smrg                   (address - bo_address),
143b8e80941Smrg         };
144b8e80941Smrg      }
145b8e80941Smrg   }
146b8e80941Smrg
147b8e80941Smrg   return (struct gen_batch_decode_bo) { };
148b8e80941Smrg}
149b8e80941Smrg
150b8e80941Smrg/**
151b8e80941Smrg * Decode the current batch.
152b8e80941Smrg */
153b8e80941Smrgstatic void
154b8e80941Smrgdecode_batch(struct iris_batch *batch)
155b8e80941Smrg{
156b8e80941Smrg   void *map = iris_bo_map(batch->dbg, batch->exec_bos[0], MAP_READ);
157b8e80941Smrg   gen_print_batch(&batch->decoder, map, batch->primary_batch_size,
158b8e80941Smrg                   batch->exec_bos[0]->gtt_offset, false);
159b8e80941Smrg}
160b8e80941Smrg
161b8e80941Smrgvoid
162b8e80941Smrgiris_init_batch(struct iris_batch *batch,
163b8e80941Smrg                struct iris_screen *screen,
164b8e80941Smrg                struct iris_vtable *vtbl,
165b8e80941Smrg                struct pipe_debug_callback *dbg,
166b8e80941Smrg                struct iris_batch *all_batches,
167b8e80941Smrg                enum iris_batch_name name,
168b8e80941Smrg                uint8_t engine,
169b8e80941Smrg                int priority)
170b8e80941Smrg{
171b8e80941Smrg   batch->screen = screen;
172b8e80941Smrg   batch->vtbl = vtbl;
173b8e80941Smrg   batch->dbg = dbg;
174b8e80941Smrg   batch->name = name;
175b8e80941Smrg
176b8e80941Smrg   /* engine should be one of I915_EXEC_RENDER, I915_EXEC_BLT, etc. */
177b8e80941Smrg   assert((engine & ~I915_EXEC_RING_MASK) == 0);
178b8e80941Smrg   assert(util_bitcount(engine) == 1);
179b8e80941Smrg   batch->engine = engine;
180b8e80941Smrg
181b8e80941Smrg   batch->hw_ctx_id = iris_create_hw_context(screen->bufmgr);
182b8e80941Smrg   assert(batch->hw_ctx_id);
183b8e80941Smrg
184b8e80941Smrg   iris_hw_context_set_priority(screen->bufmgr, batch->hw_ctx_id, priority);
185b8e80941Smrg
186b8e80941Smrg   util_dynarray_init(&batch->exec_fences, ralloc_context(NULL));
187b8e80941Smrg   util_dynarray_init(&batch->syncpts, ralloc_context(NULL));
188b8e80941Smrg
189b8e80941Smrg   batch->exec_count = 0;
190b8e80941Smrg   batch->exec_array_size = 100;
191b8e80941Smrg   batch->exec_bos =
192b8e80941Smrg      malloc(batch->exec_array_size * sizeof(batch->exec_bos[0]));
193b8e80941Smrg   batch->validation_list =
194b8e80941Smrg      malloc(batch->exec_array_size * sizeof(batch->validation_list[0]));
195b8e80941Smrg
196b8e80941Smrg   batch->cache.render = _mesa_hash_table_create(NULL, _mesa_hash_pointer,
197b8e80941Smrg                                                 _mesa_key_pointer_equal);
198b8e80941Smrg   batch->cache.depth = _mesa_set_create(NULL, _mesa_hash_pointer,
199b8e80941Smrg                                         _mesa_key_pointer_equal);
200b8e80941Smrg
201b8e80941Smrg   memset(batch->other_batches, 0, sizeof(batch->other_batches));
202b8e80941Smrg
203b8e80941Smrg   for (int i = 0, j = 0; i < IRIS_BATCH_COUNT; i++) {
204b8e80941Smrg      if (&all_batches[i] != batch)
205b8e80941Smrg         batch->other_batches[j++] = &all_batches[i];
206b8e80941Smrg   }
207b8e80941Smrg
208b8e80941Smrg   if (unlikely(INTEL_DEBUG)) {
209b8e80941Smrg      const unsigned decode_flags =
210b8e80941Smrg         GEN_BATCH_DECODE_FULL |
211b8e80941Smrg         ((INTEL_DEBUG & DEBUG_COLOR) ? GEN_BATCH_DECODE_IN_COLOR : 0) |
212b8e80941Smrg         GEN_BATCH_DECODE_OFFSETS |
213b8e80941Smrg         GEN_BATCH_DECODE_FLOATS;
214b8e80941Smrg
215b8e80941Smrg      /* TODO: track state size so we can print the right # of entries */
216b8e80941Smrg      gen_batch_decode_ctx_init(&batch->decoder, &screen->devinfo,
217b8e80941Smrg                                stderr, decode_flags, NULL,
218b8e80941Smrg                                decode_get_bo, NULL, batch);
219b8e80941Smrg      batch->decoder.max_vbo_decoded_lines = 32;
220b8e80941Smrg   }
221b8e80941Smrg
222b8e80941Smrg   iris_batch_reset(batch);
223b8e80941Smrg}
224b8e80941Smrg
225b8e80941Smrgstatic struct drm_i915_gem_exec_object2 *
226b8e80941Smrgfind_validation_entry(struct iris_batch *batch, struct iris_bo *bo)
227b8e80941Smrg{
228b8e80941Smrg   unsigned index = READ_ONCE(bo->index);
229b8e80941Smrg
230b8e80941Smrg   if (index < batch->exec_count && batch->exec_bos[index] == bo)
231b8e80941Smrg      return &batch->validation_list[index];
232b8e80941Smrg
233b8e80941Smrg   /* May have been shared between multiple active batches */
234b8e80941Smrg   for (index = 0; index < batch->exec_count; index++) {
235b8e80941Smrg      if (batch->exec_bos[index] == bo)
236b8e80941Smrg         return &batch->validation_list[index];
237b8e80941Smrg   }
238b8e80941Smrg
239b8e80941Smrg   return NULL;
240b8e80941Smrg}
241b8e80941Smrg
242b8e80941Smrg/**
243b8e80941Smrg * Add a buffer to the current batch's validation list.
244b8e80941Smrg *
245b8e80941Smrg * You must call this on any BO you wish to use in this batch, to ensure
246b8e80941Smrg * that it's resident when the GPU commands execute.
247b8e80941Smrg */
248b8e80941Smrgvoid
249b8e80941Smrgiris_use_pinned_bo(struct iris_batch *batch,
250b8e80941Smrg                   struct iris_bo *bo,
251b8e80941Smrg                   bool writable)
252b8e80941Smrg{
253b8e80941Smrg   assert(bo->kflags & EXEC_OBJECT_PINNED);
254b8e80941Smrg
255b8e80941Smrg   /* Never mark the workaround BO with EXEC_OBJECT_WRITE.  We don't care
256b8e80941Smrg    * about the order of any writes to that buffer, and marking it writable
257b8e80941Smrg    * would introduce data dependencies between multiple batches which share
258b8e80941Smrg    * the buffer.
259b8e80941Smrg    */
260b8e80941Smrg   if (bo == batch->screen->workaround_bo)
261b8e80941Smrg      writable = false;
262b8e80941Smrg
263b8e80941Smrg   struct drm_i915_gem_exec_object2 *existing_entry =
264b8e80941Smrg      find_validation_entry(batch, bo);
265b8e80941Smrg
266b8e80941Smrg   if (existing_entry) {
267b8e80941Smrg      /* The BO is already in the validation list; mark it writable */
268b8e80941Smrg      if (writable)
269b8e80941Smrg         existing_entry->flags |= EXEC_OBJECT_WRITE;
270b8e80941Smrg
271b8e80941Smrg      return;
272b8e80941Smrg   }
273b8e80941Smrg
274b8e80941Smrg   if (bo != batch->bo) {
275b8e80941Smrg      /* This is the first time our batch has seen this BO.  Before we use it,
276b8e80941Smrg       * we may need to flush and synchronize with other batches.
277b8e80941Smrg       */
278b8e80941Smrg      for (int b = 0; b < ARRAY_SIZE(batch->other_batches); b++) {
279b8e80941Smrg         struct drm_i915_gem_exec_object2 *other_entry =
280b8e80941Smrg            find_validation_entry(batch->other_batches[b], bo);
281b8e80941Smrg
282b8e80941Smrg         /* If the buffer is referenced by another batch, and either batch
283b8e80941Smrg          * intends to write it, then flush the other batch and synchronize.
284b8e80941Smrg          *
285b8e80941Smrg          * Consider these cases:
286b8e80941Smrg          *
287b8e80941Smrg          * 1. They read, we read   =>  No synchronization required.
288b8e80941Smrg          * 2. They read, we write  =>  Synchronize (they need the old value)
289b8e80941Smrg          * 3. They write, we read  =>  Synchronize (we need their new value)
290b8e80941Smrg          * 4. They write, we write =>  Synchronize (order writes)
291b8e80941Smrg          *
292b8e80941Smrg          * The read/read case is very common, as multiple batches usually
293b8e80941Smrg          * share a streaming state buffer or shader assembly buffer, and
294b8e80941Smrg          * we want to avoid synchronizing in this case.
295b8e80941Smrg          */
296b8e80941Smrg         if (other_entry &&
297b8e80941Smrg             ((other_entry->flags & EXEC_OBJECT_WRITE) || writable)) {
298b8e80941Smrg            iris_batch_flush(batch->other_batches[b]);
299b8e80941Smrg            iris_batch_add_syncpt(batch, batch->other_batches[b]->last_syncpt,
300b8e80941Smrg                                  I915_EXEC_FENCE_WAIT);
301b8e80941Smrg         }
302b8e80941Smrg      }
303b8e80941Smrg   }
304b8e80941Smrg
305b8e80941Smrg   /* Now, take a reference and add it to the validation list. */
306b8e80941Smrg   iris_bo_reference(bo);
307b8e80941Smrg
308b8e80941Smrg   if (batch->exec_count == batch->exec_array_size) {
309b8e80941Smrg      batch->exec_array_size *= 2;
310b8e80941Smrg      batch->exec_bos =
311b8e80941Smrg         realloc(batch->exec_bos,
312b8e80941Smrg                 batch->exec_array_size * sizeof(batch->exec_bos[0]));
313b8e80941Smrg      batch->validation_list =
314b8e80941Smrg         realloc(batch->validation_list,
315b8e80941Smrg                 batch->exec_array_size * sizeof(batch->validation_list[0]));
316b8e80941Smrg   }
317b8e80941Smrg
318b8e80941Smrg   batch->validation_list[batch->exec_count] =
319b8e80941Smrg      (struct drm_i915_gem_exec_object2) {
320b8e80941Smrg         .handle = bo->gem_handle,
321b8e80941Smrg         .offset = bo->gtt_offset,
322b8e80941Smrg         .flags = bo->kflags | (writable ? EXEC_OBJECT_WRITE : 0),
323b8e80941Smrg      };
324b8e80941Smrg
325b8e80941Smrg   bo->index = batch->exec_count;
326b8e80941Smrg   batch->exec_bos[batch->exec_count] = bo;
327b8e80941Smrg   batch->aperture_space += bo->size;
328b8e80941Smrg
329b8e80941Smrg   batch->exec_count++;
330b8e80941Smrg}
331b8e80941Smrg
332b8e80941Smrgstatic void
333b8e80941Smrgcreate_batch(struct iris_batch *batch)
334b8e80941Smrg{
335b8e80941Smrg   struct iris_screen *screen = batch->screen;
336b8e80941Smrg   struct iris_bufmgr *bufmgr = screen->bufmgr;
337b8e80941Smrg
338b8e80941Smrg   batch->bo = iris_bo_alloc(bufmgr, "command buffer",
339b8e80941Smrg                             BATCH_SZ + BATCH_RESERVED, IRIS_MEMZONE_OTHER);
340b8e80941Smrg   batch->bo->kflags |= EXEC_OBJECT_CAPTURE;
341b8e80941Smrg   batch->map = iris_bo_map(NULL, batch->bo, MAP_READ | MAP_WRITE);
342b8e80941Smrg   batch->map_next = batch->map;
343b8e80941Smrg
344b8e80941Smrg   iris_use_pinned_bo(batch, batch->bo, false);
345b8e80941Smrg}
346b8e80941Smrg
347b8e80941Smrgstatic void
348b8e80941Smrgiris_batch_reset(struct iris_batch *batch)
349b8e80941Smrg{
350b8e80941Smrg   struct iris_screen *screen = batch->screen;
351b8e80941Smrg
352b8e80941Smrg   iris_bo_unreference(batch->bo);
353b8e80941Smrg   batch->primary_batch_size = 0;
354b8e80941Smrg   batch->contains_draw = false;
355b8e80941Smrg
356b8e80941Smrg   create_batch(batch);
357b8e80941Smrg   assert(batch->bo->index == 0);
358b8e80941Smrg
359b8e80941Smrg   struct iris_syncpt *syncpt = iris_create_syncpt(screen);
360b8e80941Smrg   iris_batch_add_syncpt(batch, syncpt, I915_EXEC_FENCE_SIGNAL);
361b8e80941Smrg   iris_syncpt_reference(screen, &syncpt, NULL);
362b8e80941Smrg
363b8e80941Smrg   iris_cache_sets_clear(batch);
364b8e80941Smrg}
365b8e80941Smrg
366b8e80941Smrgvoid
367b8e80941Smrgiris_batch_free(struct iris_batch *batch)
368b8e80941Smrg{
369b8e80941Smrg   struct iris_screen *screen = batch->screen;
370b8e80941Smrg   struct iris_bufmgr *bufmgr = screen->bufmgr;
371b8e80941Smrg
372b8e80941Smrg   for (int i = 0; i < batch->exec_count; i++) {
373b8e80941Smrg      iris_bo_unreference(batch->exec_bos[i]);
374b8e80941Smrg   }
375b8e80941Smrg   free(batch->exec_bos);
376b8e80941Smrg   free(batch->validation_list);
377b8e80941Smrg
378b8e80941Smrg   ralloc_free(batch->exec_fences.mem_ctx);
379b8e80941Smrg
380b8e80941Smrg   util_dynarray_foreach(&batch->syncpts, struct iris_syncpt *, s)
381b8e80941Smrg      iris_syncpt_reference(screen, s, NULL);
382b8e80941Smrg   ralloc_free(batch->syncpts.mem_ctx);
383b8e80941Smrg
384b8e80941Smrg   iris_syncpt_reference(screen, &batch->last_syncpt, NULL);
385b8e80941Smrg
386b8e80941Smrg   iris_bo_unreference(batch->bo);
387b8e80941Smrg   batch->bo = NULL;
388b8e80941Smrg   batch->map = NULL;
389b8e80941Smrg   batch->map_next = NULL;
390b8e80941Smrg
391b8e80941Smrg   iris_destroy_hw_context(bufmgr, batch->hw_ctx_id);
392b8e80941Smrg
393b8e80941Smrg   _mesa_hash_table_destroy(batch->cache.render, NULL);
394b8e80941Smrg   _mesa_set_destroy(batch->cache.depth, NULL);
395b8e80941Smrg
396b8e80941Smrg   if (unlikely(INTEL_DEBUG))
397b8e80941Smrg      gen_batch_decode_ctx_finish(&batch->decoder);
398b8e80941Smrg}
399b8e80941Smrg
400b8e80941Smrg/**
401b8e80941Smrg * If we've chained to a secondary batch, or are getting near to the end,
402b8e80941Smrg * then flush.  This should only be called between draws.
403b8e80941Smrg */
404b8e80941Smrgvoid
405b8e80941Smrgiris_batch_maybe_flush(struct iris_batch *batch, unsigned estimate)
406b8e80941Smrg{
407b8e80941Smrg   if (batch->bo != batch->exec_bos[0] ||
408b8e80941Smrg       iris_batch_bytes_used(batch) + estimate >= BATCH_SZ) {
409b8e80941Smrg      iris_batch_flush(batch);
410b8e80941Smrg   }
411b8e80941Smrg}
412b8e80941Smrg
413b8e80941Smrgvoid
414b8e80941Smrgiris_chain_to_new_batch(struct iris_batch *batch)
415b8e80941Smrg{
416b8e80941Smrg   /* We only support chaining a single time. */
417b8e80941Smrg   assert(batch->bo == batch->exec_bos[0]);
418b8e80941Smrg
419b8e80941Smrg   VG(void *map = batch->map);
420b8e80941Smrg   uint32_t *cmd = batch->map_next;
421b8e80941Smrg   uint64_t *addr = batch->map_next + 4;
422b8e80941Smrg   batch->map_next += 12;
423b8e80941Smrg
424b8e80941Smrg   /* No longer held by batch->bo, still held by validation list */
425b8e80941Smrg   iris_bo_unreference(batch->bo);
426b8e80941Smrg   batch->primary_batch_size = iris_batch_bytes_used(batch);
427b8e80941Smrg   create_batch(batch);
428b8e80941Smrg
429b8e80941Smrg   /* Emit MI_BATCH_BUFFER_START to chain to another batch. */
430b8e80941Smrg   *cmd = (0x31 << 23) | (1 << 8) | (3 - 2);
431b8e80941Smrg   *addr = batch->bo->gtt_offset;
432b8e80941Smrg
433b8e80941Smrg   VG(VALGRIND_CHECK_MEM_IS_DEFINED(map, batch->primary_batch_size));
434b8e80941Smrg}
435b8e80941Smrg
436b8e80941Smrg/**
437b8e80941Smrg * Terminate a batch with MI_BATCH_BUFFER_END.
438b8e80941Smrg */
439b8e80941Smrgstatic void
440b8e80941Smrgiris_finish_batch(struct iris_batch *batch)
441b8e80941Smrg{
442b8e80941Smrg   /* Emit MI_BATCH_BUFFER_END to finish our batch. */
443b8e80941Smrg   uint32_t *map = batch->map_next;
444b8e80941Smrg
445b8e80941Smrg   map[0] = (0xA << 23);
446b8e80941Smrg
447b8e80941Smrg   batch->map_next += 4;
448b8e80941Smrg   VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->map, iris_batch_bytes_used(batch)));
449b8e80941Smrg
450b8e80941Smrg   if (batch->bo == batch->exec_bos[0])
451b8e80941Smrg      batch->primary_batch_size = iris_batch_bytes_used(batch);
452b8e80941Smrg}
453b8e80941Smrg
454b8e80941Smrg/**
455b8e80941Smrg * Submit the batch to the GPU via execbuffer2.
456b8e80941Smrg */
457b8e80941Smrgstatic int
458b8e80941Smrgsubmit_batch(struct iris_batch *batch)
459b8e80941Smrg{
460b8e80941Smrg   iris_bo_unmap(batch->bo);
461b8e80941Smrg
462b8e80941Smrg   /* The requirement for using I915_EXEC_NO_RELOC are:
463b8e80941Smrg    *
464b8e80941Smrg    *   The addresses written in the objects must match the corresponding
465b8e80941Smrg    *   reloc.gtt_offset which in turn must match the corresponding
466b8e80941Smrg    *   execobject.offset.
467b8e80941Smrg    *
468b8e80941Smrg    *   Any render targets written to in the batch must be flagged with
469b8e80941Smrg    *   EXEC_OBJECT_WRITE.
470b8e80941Smrg    *
471b8e80941Smrg    *   To avoid stalling, execobject.offset should match the current
472b8e80941Smrg    *   address of that object within the active context.
473b8e80941Smrg    */
474b8e80941Smrg   struct drm_i915_gem_execbuffer2 execbuf = {
475b8e80941Smrg      .buffers_ptr = (uintptr_t) batch->validation_list,
476b8e80941Smrg      .buffer_count = batch->exec_count,
477b8e80941Smrg      .batch_start_offset = 0,
478b8e80941Smrg      /* This must be QWord aligned. */
479b8e80941Smrg      .batch_len = ALIGN(batch->primary_batch_size, 8),
480b8e80941Smrg      .flags = batch->engine |
481b8e80941Smrg               I915_EXEC_NO_RELOC |
482b8e80941Smrg               I915_EXEC_BATCH_FIRST |
483b8e80941Smrg               I915_EXEC_HANDLE_LUT,
484b8e80941Smrg      .rsvd1 = batch->hw_ctx_id, /* rsvd1 is actually the context ID */
485b8e80941Smrg   };
486b8e80941Smrg
487b8e80941Smrg   if (num_fences(batch)) {
488b8e80941Smrg      execbuf.flags |= I915_EXEC_FENCE_ARRAY;
489b8e80941Smrg      execbuf.num_cliprects = num_fences(batch);
490b8e80941Smrg      execbuf.cliprects_ptr =
491b8e80941Smrg         (uintptr_t)util_dynarray_begin(&batch->exec_fences);
492b8e80941Smrg   }
493b8e80941Smrg
494b8e80941Smrg   int ret = batch->screen->no_hw ? 0 : drm_ioctl(batch->screen->fd,
495b8e80941Smrg                       DRM_IOCTL_I915_GEM_EXECBUFFER2,
496b8e80941Smrg                       &execbuf);
497b8e80941Smrg   if (ret != 0) {
498b8e80941Smrg      ret = -errno;
499b8e80941Smrg      DBG("execbuf FAILED: errno = %d\n", -ret);
500b8e80941Smrg      fprintf(stderr, "execbuf FAILED: errno = %d\n", -ret);
501b8e80941Smrg      abort();
502b8e80941Smrg   } else {
503b8e80941Smrg      DBG("execbuf succeeded\n");
504b8e80941Smrg   }
505b8e80941Smrg
506b8e80941Smrg   for (int i = 0; i < batch->exec_count; i++) {
507b8e80941Smrg      struct iris_bo *bo = batch->exec_bos[i];
508b8e80941Smrg
509b8e80941Smrg      bo->idle = false;
510b8e80941Smrg      bo->index = -1;
511b8e80941Smrg
512b8e80941Smrg      iris_bo_unreference(bo);
513b8e80941Smrg   }
514b8e80941Smrg
515b8e80941Smrg   return ret;
516b8e80941Smrg}
517b8e80941Smrg
518b8e80941Smrgstatic const char *
519b8e80941Smrgbatch_name_to_string(enum iris_batch_name name)
520b8e80941Smrg{
521b8e80941Smrg   const char *names[IRIS_BATCH_COUNT] = {
522b8e80941Smrg      [IRIS_BATCH_RENDER]  = "render",
523b8e80941Smrg      [IRIS_BATCH_COMPUTE] = "compute",
524b8e80941Smrg   };
525b8e80941Smrg   return names[name];
526b8e80941Smrg}
527b8e80941Smrg
528b8e80941Smrg/**
529b8e80941Smrg * Flush the batch buffer, submitting it to the GPU and resetting it so
530b8e80941Smrg * we're ready to emit the next batch.
531b8e80941Smrg *
532b8e80941Smrg * \param in_fence_fd is ignored if -1.  Otherwise, this function takes
533b8e80941Smrg * ownership of the fd.
534b8e80941Smrg *
535b8e80941Smrg * \param out_fence_fd is ignored if NULL.  Otherwise, the caller must
536b8e80941Smrg * take ownership of the returned fd.
537b8e80941Smrg */
538b8e80941Smrgvoid
539b8e80941Smrg_iris_batch_flush(struct iris_batch *batch, const char *file, int line)
540b8e80941Smrg{
541b8e80941Smrg   struct iris_screen *screen = batch->screen;
542b8e80941Smrg
543b8e80941Smrg   if (iris_batch_bytes_used(batch) == 0)
544b8e80941Smrg      return;
545b8e80941Smrg
546b8e80941Smrg   iris_finish_batch(batch);
547b8e80941Smrg
548b8e80941Smrg   if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) {
549b8e80941Smrg      int bytes_for_commands = iris_batch_bytes_used(batch);
550b8e80941Smrg      int second_bytes = 0;
551b8e80941Smrg      if (batch->bo != batch->exec_bos[0]) {
552b8e80941Smrg         second_bytes = bytes_for_commands;
553b8e80941Smrg         bytes_for_commands += batch->primary_batch_size;
554b8e80941Smrg      }
555b8e80941Smrg      fprintf(stderr, "%19s:%-3d: %s batch [%u] flush with %5d+%5db (%0.1f%%) "
556b8e80941Smrg              "(cmds), %4d BOs (%0.1fMb aperture)\n",
557b8e80941Smrg              file, line, batch_name_to_string(batch->name), batch->hw_ctx_id,
558b8e80941Smrg              batch->primary_batch_size, second_bytes,
559b8e80941Smrg              100.0f * bytes_for_commands / BATCH_SZ,
560b8e80941Smrg              batch->exec_count,
561b8e80941Smrg              (float) batch->aperture_space / (1024 * 1024));
562b8e80941Smrg      dump_fence_list(batch);
563b8e80941Smrg      dump_validation_list(batch);
564b8e80941Smrg   }
565b8e80941Smrg
566b8e80941Smrg   if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
567b8e80941Smrg      decode_batch(batch);
568b8e80941Smrg   }
569b8e80941Smrg
570b8e80941Smrg   int ret = submit_batch(batch);
571b8e80941Smrg
572b8e80941Smrg   if (ret >= 0) {
573b8e80941Smrg      //if (iris->ctx.Const.ResetStrategy == GL_LOSE_CONTEXT_ON_RESET_ARB)
574b8e80941Smrg         //iris_check_for_reset(ice);
575b8e80941Smrg
576b8e80941Smrg      if (unlikely(INTEL_DEBUG & DEBUG_SYNC)) {
577b8e80941Smrg         dbg_printf("waiting for idle\n");
578b8e80941Smrg         iris_bo_wait_rendering(batch->bo);
579b8e80941Smrg      }
580b8e80941Smrg   } else {
581b8e80941Smrg#ifdef DEBUG
582b8e80941Smrg      const bool color = INTEL_DEBUG & DEBUG_COLOR;
583b8e80941Smrg      fprintf(stderr, "%siris: Failed to submit batchbuffer: %-80s%s\n",
584b8e80941Smrg              color ? "\e[1;41m" : "", strerror(-ret), color ? "\e[0m" : "");
585b8e80941Smrg      abort();
586b8e80941Smrg#endif
587b8e80941Smrg   }
588b8e80941Smrg
589b8e80941Smrg   batch->exec_count = 0;
590b8e80941Smrg   batch->aperture_space = 0;
591b8e80941Smrg
592b8e80941Smrg   struct iris_syncpt *syncpt =
593b8e80941Smrg      ((struct iris_syncpt **) util_dynarray_begin(&batch->syncpts))[0];
594b8e80941Smrg   iris_syncpt_reference(screen, &batch->last_syncpt, syncpt);
595b8e80941Smrg
596b8e80941Smrg   util_dynarray_foreach(&batch->syncpts, struct iris_syncpt *, s)
597b8e80941Smrg      iris_syncpt_reference(screen, s, NULL);
598b8e80941Smrg   util_dynarray_clear(&batch->syncpts);
599b8e80941Smrg
600b8e80941Smrg   util_dynarray_clear(&batch->exec_fences);
601b8e80941Smrg
602b8e80941Smrg   /* Start a new batch buffer. */
603b8e80941Smrg   iris_batch_reset(batch);
604b8e80941Smrg}
605b8e80941Smrg
606b8e80941Smrg/**
607b8e80941Smrg * Does the current batch refer to the given BO?
608b8e80941Smrg *
609b8e80941Smrg * (In other words, is the BO in the current batch's validation list?)
610b8e80941Smrg */
611b8e80941Smrgbool
612b8e80941Smrgiris_batch_references(struct iris_batch *batch, struct iris_bo *bo)
613b8e80941Smrg{
614b8e80941Smrg   return find_validation_entry(batch, bo) != NULL;
615b8e80941Smrg}
616