1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2015 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include <assert.h>
25b8e80941Smrg#include <stdbool.h>
26b8e80941Smrg#include <string.h>
27b8e80941Smrg#include <unistd.h>
28b8e80941Smrg#include <fcntl.h>
29b8e80941Smrg
30b8e80941Smrg#include "anv_private.h"
31b8e80941Smrg
32b8e80941Smrg#include "genxml/gen8_pack.h"
33b8e80941Smrg
34b8e80941Smrg#include "util/debug.h"
35b8e80941Smrg
36b8e80941Smrg/** \file anv_batch_chain.c
37b8e80941Smrg *
38b8e80941Smrg * This file contains functions related to anv_cmd_buffer as a data
39b8e80941Smrg * structure.  This involves everything required to create and destroy
40b8e80941Smrg * the actual batch buffers as well as link them together and handle
41b8e80941Smrg * relocations and surface state.  It specifically does *not* contain any
42b8e80941Smrg * handling of actual vkCmd calls beyond vkCmdExecuteCommands.
43b8e80941Smrg */
44b8e80941Smrg
45b8e80941Smrg/*-----------------------------------------------------------------------*
46b8e80941Smrg * Functions related to anv_reloc_list
47b8e80941Smrg *-----------------------------------------------------------------------*/
48b8e80941Smrg
49b8e80941Smrgstatic VkResult
50b8e80941Smrganv_reloc_list_init_clone(struct anv_reloc_list *list,
51b8e80941Smrg                          const VkAllocationCallbacks *alloc,
52b8e80941Smrg                          const struct anv_reloc_list *other_list)
53b8e80941Smrg{
54b8e80941Smrg   if (other_list) {
55b8e80941Smrg      list->num_relocs = other_list->num_relocs;
56b8e80941Smrg      list->array_length = other_list->array_length;
57b8e80941Smrg   } else {
58b8e80941Smrg      list->num_relocs = 0;
59b8e80941Smrg      list->array_length = 256;
60b8e80941Smrg   }
61b8e80941Smrg
62b8e80941Smrg   list->relocs =
63b8e80941Smrg      vk_alloc(alloc, list->array_length * sizeof(*list->relocs), 8,
64b8e80941Smrg                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
65b8e80941Smrg
66b8e80941Smrg   if (list->relocs == NULL)
67b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
68b8e80941Smrg
69b8e80941Smrg   list->reloc_bos =
70b8e80941Smrg      vk_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8,
71b8e80941Smrg                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
72b8e80941Smrg
73b8e80941Smrg   if (list->reloc_bos == NULL) {
74b8e80941Smrg      vk_free(alloc, list->relocs);
75b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
76b8e80941Smrg   }
77b8e80941Smrg
78b8e80941Smrg   list->deps = _mesa_pointer_set_create(NULL);
79b8e80941Smrg
80b8e80941Smrg   if (!list->deps) {
81b8e80941Smrg      vk_free(alloc, list->relocs);
82b8e80941Smrg      vk_free(alloc, list->reloc_bos);
83b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
84b8e80941Smrg   }
85b8e80941Smrg
86b8e80941Smrg   if (other_list) {
87b8e80941Smrg      memcpy(list->relocs, other_list->relocs,
88b8e80941Smrg             list->array_length * sizeof(*list->relocs));
89b8e80941Smrg      memcpy(list->reloc_bos, other_list->reloc_bos,
90b8e80941Smrg             list->array_length * sizeof(*list->reloc_bos));
91b8e80941Smrg      set_foreach(other_list->deps, entry) {
92b8e80941Smrg         _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
93b8e80941Smrg      }
94b8e80941Smrg   }
95b8e80941Smrg
96b8e80941Smrg   return VK_SUCCESS;
97b8e80941Smrg}
98b8e80941Smrg
99b8e80941SmrgVkResult
100b8e80941Smrganv_reloc_list_init(struct anv_reloc_list *list,
101b8e80941Smrg                    const VkAllocationCallbacks *alloc)
102b8e80941Smrg{
103b8e80941Smrg   return anv_reloc_list_init_clone(list, alloc, NULL);
104b8e80941Smrg}
105b8e80941Smrg
106b8e80941Smrgvoid
107b8e80941Smrganv_reloc_list_finish(struct anv_reloc_list *list,
108b8e80941Smrg                      const VkAllocationCallbacks *alloc)
109b8e80941Smrg{
110b8e80941Smrg   vk_free(alloc, list->relocs);
111b8e80941Smrg   vk_free(alloc, list->reloc_bos);
112b8e80941Smrg   _mesa_set_destroy(list->deps, NULL);
113b8e80941Smrg}
114b8e80941Smrg
115b8e80941Smrgstatic VkResult
116b8e80941Smrganv_reloc_list_grow(struct anv_reloc_list *list,
117b8e80941Smrg                    const VkAllocationCallbacks *alloc,
118b8e80941Smrg                    size_t num_additional_relocs)
119b8e80941Smrg{
120b8e80941Smrg   if (list->num_relocs + num_additional_relocs <= list->array_length)
121b8e80941Smrg      return VK_SUCCESS;
122b8e80941Smrg
123b8e80941Smrg   size_t new_length = list->array_length * 2;
124b8e80941Smrg   while (new_length < list->num_relocs + num_additional_relocs)
125b8e80941Smrg      new_length *= 2;
126b8e80941Smrg
127b8e80941Smrg   struct drm_i915_gem_relocation_entry *new_relocs =
128b8e80941Smrg      vk_alloc(alloc, new_length * sizeof(*list->relocs), 8,
129b8e80941Smrg                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
130b8e80941Smrg   if (new_relocs == NULL)
131b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
132b8e80941Smrg
133b8e80941Smrg   struct anv_bo **new_reloc_bos =
134b8e80941Smrg      vk_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8,
135b8e80941Smrg                VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
136b8e80941Smrg   if (new_reloc_bos == NULL) {
137b8e80941Smrg      vk_free(alloc, new_relocs);
138b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
139b8e80941Smrg   }
140b8e80941Smrg
141b8e80941Smrg   memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
142b8e80941Smrg   memcpy(new_reloc_bos, list->reloc_bos,
143b8e80941Smrg          list->num_relocs * sizeof(*list->reloc_bos));
144b8e80941Smrg
145b8e80941Smrg   vk_free(alloc, list->relocs);
146b8e80941Smrg   vk_free(alloc, list->reloc_bos);
147b8e80941Smrg
148b8e80941Smrg   list->array_length = new_length;
149b8e80941Smrg   list->relocs = new_relocs;
150b8e80941Smrg   list->reloc_bos = new_reloc_bos;
151b8e80941Smrg
152b8e80941Smrg   return VK_SUCCESS;
153b8e80941Smrg}
154b8e80941Smrg
155b8e80941SmrgVkResult
156b8e80941Smrganv_reloc_list_add(struct anv_reloc_list *list,
157b8e80941Smrg                   const VkAllocationCallbacks *alloc,
158b8e80941Smrg                   uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
159b8e80941Smrg{
160b8e80941Smrg   struct drm_i915_gem_relocation_entry *entry;
161b8e80941Smrg   int index;
162b8e80941Smrg
163b8e80941Smrg   if (target_bo->flags & EXEC_OBJECT_PINNED) {
164b8e80941Smrg      _mesa_set_add(list->deps, target_bo);
165b8e80941Smrg      return VK_SUCCESS;
166b8e80941Smrg   }
167b8e80941Smrg
168b8e80941Smrg   VkResult result = anv_reloc_list_grow(list, alloc, 1);
169b8e80941Smrg   if (result != VK_SUCCESS)
170b8e80941Smrg      return result;
171b8e80941Smrg
172b8e80941Smrg   /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
173b8e80941Smrg   index = list->num_relocs++;
174b8e80941Smrg   list->reloc_bos[index] = target_bo;
175b8e80941Smrg   entry = &list->relocs[index];
176b8e80941Smrg   entry->target_handle = target_bo->gem_handle;
177b8e80941Smrg   entry->delta = delta;
178b8e80941Smrg   entry->offset = offset;
179b8e80941Smrg   entry->presumed_offset = target_bo->offset;
180b8e80941Smrg   entry->read_domains = 0;
181b8e80941Smrg   entry->write_domain = 0;
182b8e80941Smrg   VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry)));
183b8e80941Smrg
184b8e80941Smrg   return VK_SUCCESS;
185b8e80941Smrg}
186b8e80941Smrg
187b8e80941Smrgstatic VkResult
188b8e80941Smrganv_reloc_list_append(struct anv_reloc_list *list,
189b8e80941Smrg                      const VkAllocationCallbacks *alloc,
190b8e80941Smrg                      struct anv_reloc_list *other, uint32_t offset)
191b8e80941Smrg{
192b8e80941Smrg   VkResult result = anv_reloc_list_grow(list, alloc, other->num_relocs);
193b8e80941Smrg   if (result != VK_SUCCESS)
194b8e80941Smrg      return result;
195b8e80941Smrg
196b8e80941Smrg   memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
197b8e80941Smrg          other->num_relocs * sizeof(other->relocs[0]));
198b8e80941Smrg   memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
199b8e80941Smrg          other->num_relocs * sizeof(other->reloc_bos[0]));
200b8e80941Smrg
201b8e80941Smrg   for (uint32_t i = 0; i < other->num_relocs; i++)
202b8e80941Smrg      list->relocs[i + list->num_relocs].offset += offset;
203b8e80941Smrg
204b8e80941Smrg   list->num_relocs += other->num_relocs;
205b8e80941Smrg
206b8e80941Smrg   set_foreach(other->deps, entry) {
207b8e80941Smrg      _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key);
208b8e80941Smrg   }
209b8e80941Smrg
210b8e80941Smrg   return VK_SUCCESS;
211b8e80941Smrg}
212b8e80941Smrg
213b8e80941Smrg/*-----------------------------------------------------------------------*
214b8e80941Smrg * Functions related to anv_batch
215b8e80941Smrg *-----------------------------------------------------------------------*/
216b8e80941Smrg
217b8e80941Smrgvoid *
218b8e80941Smrganv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
219b8e80941Smrg{
220b8e80941Smrg   if (batch->next + num_dwords * 4 > batch->end) {
221b8e80941Smrg      VkResult result = batch->extend_cb(batch, batch->user_data);
222b8e80941Smrg      if (result != VK_SUCCESS) {
223b8e80941Smrg         anv_batch_set_error(batch, result);
224b8e80941Smrg         return NULL;
225b8e80941Smrg      }
226b8e80941Smrg   }
227b8e80941Smrg
228b8e80941Smrg   void *p = batch->next;
229b8e80941Smrg
230b8e80941Smrg   batch->next += num_dwords * 4;
231b8e80941Smrg   assert(batch->next <= batch->end);
232b8e80941Smrg
233b8e80941Smrg   return p;
234b8e80941Smrg}
235b8e80941Smrg
236b8e80941Smrguint64_t
237b8e80941Smrganv_batch_emit_reloc(struct anv_batch *batch,
238b8e80941Smrg                     void *location, struct anv_bo *bo, uint32_t delta)
239b8e80941Smrg{
240b8e80941Smrg   VkResult result = anv_reloc_list_add(batch->relocs, batch->alloc,
241b8e80941Smrg                                        location - batch->start, bo, delta);
242b8e80941Smrg   if (result != VK_SUCCESS) {
243b8e80941Smrg      anv_batch_set_error(batch, result);
244b8e80941Smrg      return 0;
245b8e80941Smrg   }
246b8e80941Smrg
247b8e80941Smrg   return bo->offset + delta;
248b8e80941Smrg}
249b8e80941Smrg
250b8e80941Smrgvoid
251b8e80941Smrganv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
252b8e80941Smrg{
253b8e80941Smrg   uint32_t size, offset;
254b8e80941Smrg
255b8e80941Smrg   size = other->next - other->start;
256b8e80941Smrg   assert(size % 4 == 0);
257b8e80941Smrg
258b8e80941Smrg   if (batch->next + size > batch->end) {
259b8e80941Smrg      VkResult result = batch->extend_cb(batch, batch->user_data);
260b8e80941Smrg      if (result != VK_SUCCESS) {
261b8e80941Smrg         anv_batch_set_error(batch, result);
262b8e80941Smrg         return;
263b8e80941Smrg      }
264b8e80941Smrg   }
265b8e80941Smrg
266b8e80941Smrg   assert(batch->next + size <= batch->end);
267b8e80941Smrg
268b8e80941Smrg   VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size));
269b8e80941Smrg   memcpy(batch->next, other->start, size);
270b8e80941Smrg
271b8e80941Smrg   offset = batch->next - batch->start;
272b8e80941Smrg   VkResult result = anv_reloc_list_append(batch->relocs, batch->alloc,
273b8e80941Smrg                                           other->relocs, offset);
274b8e80941Smrg   if (result != VK_SUCCESS) {
275b8e80941Smrg      anv_batch_set_error(batch, result);
276b8e80941Smrg      return;
277b8e80941Smrg   }
278b8e80941Smrg
279b8e80941Smrg   batch->next += size;
280b8e80941Smrg}
281b8e80941Smrg
282b8e80941Smrg/*-----------------------------------------------------------------------*
283b8e80941Smrg * Functions related to anv_batch_bo
284b8e80941Smrg *-----------------------------------------------------------------------*/
285b8e80941Smrg
286b8e80941Smrgstatic VkResult
287b8e80941Smrganv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer,
288b8e80941Smrg                    struct anv_batch_bo **bbo_out)
289b8e80941Smrg{
290b8e80941Smrg   VkResult result;
291b8e80941Smrg
292b8e80941Smrg   struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
293b8e80941Smrg                                        8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
294b8e80941Smrg   if (bbo == NULL)
295b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
296b8e80941Smrg
297b8e80941Smrg   result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
298b8e80941Smrg                              ANV_CMD_BUFFER_BATCH_SIZE);
299b8e80941Smrg   if (result != VK_SUCCESS)
300b8e80941Smrg      goto fail_alloc;
301b8e80941Smrg
302b8e80941Smrg   result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc);
303b8e80941Smrg   if (result != VK_SUCCESS)
304b8e80941Smrg      goto fail_bo_alloc;
305b8e80941Smrg
306b8e80941Smrg   *bbo_out = bbo;
307b8e80941Smrg
308b8e80941Smrg   return VK_SUCCESS;
309b8e80941Smrg
310b8e80941Smrg fail_bo_alloc:
311b8e80941Smrg   anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
312b8e80941Smrg fail_alloc:
313b8e80941Smrg   vk_free(&cmd_buffer->pool->alloc, bbo);
314b8e80941Smrg
315b8e80941Smrg   return result;
316b8e80941Smrg}
317b8e80941Smrg
318b8e80941Smrgstatic VkResult
319b8e80941Smrganv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer,
320b8e80941Smrg                   const struct anv_batch_bo *other_bbo,
321b8e80941Smrg                   struct anv_batch_bo **bbo_out)
322b8e80941Smrg{
323b8e80941Smrg   VkResult result;
324b8e80941Smrg
325b8e80941Smrg   struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo),
326b8e80941Smrg                                        8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
327b8e80941Smrg   if (bbo == NULL)
328b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
329b8e80941Smrg
330b8e80941Smrg   result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo,
331b8e80941Smrg                              other_bbo->bo.size);
332b8e80941Smrg   if (result != VK_SUCCESS)
333b8e80941Smrg      goto fail_alloc;
334b8e80941Smrg
335b8e80941Smrg   result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc,
336b8e80941Smrg                                      &other_bbo->relocs);
337b8e80941Smrg   if (result != VK_SUCCESS)
338b8e80941Smrg      goto fail_bo_alloc;
339b8e80941Smrg
340b8e80941Smrg   bbo->length = other_bbo->length;
341b8e80941Smrg   memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length);
342b8e80941Smrg
343b8e80941Smrg   *bbo_out = bbo;
344b8e80941Smrg
345b8e80941Smrg   return VK_SUCCESS;
346b8e80941Smrg
347b8e80941Smrg fail_bo_alloc:
348b8e80941Smrg   anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
349b8e80941Smrg fail_alloc:
350b8e80941Smrg   vk_free(&cmd_buffer->pool->alloc, bbo);
351b8e80941Smrg
352b8e80941Smrg   return result;
353b8e80941Smrg}
354b8e80941Smrg
355b8e80941Smrgstatic void
356b8e80941Smrganv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
357b8e80941Smrg                   size_t batch_padding)
358b8e80941Smrg{
359b8e80941Smrg   batch->next = batch->start = bbo->bo.map;
360b8e80941Smrg   batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
361b8e80941Smrg   batch->relocs = &bbo->relocs;
362b8e80941Smrg   bbo->relocs.num_relocs = 0;
363b8e80941Smrg   _mesa_set_clear(bbo->relocs.deps, NULL);
364b8e80941Smrg}
365b8e80941Smrg
366b8e80941Smrgstatic void
367b8e80941Smrganv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch,
368b8e80941Smrg                      size_t batch_padding)
369b8e80941Smrg{
370b8e80941Smrg   batch->start = bbo->bo.map;
371b8e80941Smrg   batch->next = bbo->bo.map + bbo->length;
372b8e80941Smrg   batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
373b8e80941Smrg   batch->relocs = &bbo->relocs;
374b8e80941Smrg}
375b8e80941Smrg
376b8e80941Smrgstatic void
377b8e80941Smrganv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
378b8e80941Smrg{
379b8e80941Smrg   assert(batch->start == bbo->bo.map);
380b8e80941Smrg   bbo->length = batch->next - batch->start;
381b8e80941Smrg   VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
382b8e80941Smrg}
383b8e80941Smrg
384b8e80941Smrgstatic VkResult
385b8e80941Smrganv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo,
386b8e80941Smrg                  struct anv_batch *batch, size_t aditional,
387b8e80941Smrg                  size_t batch_padding)
388b8e80941Smrg{
389b8e80941Smrg   assert(batch->start == bbo->bo.map);
390b8e80941Smrg   bbo->length = batch->next - batch->start;
391b8e80941Smrg
392b8e80941Smrg   size_t new_size = bbo->bo.size;
393b8e80941Smrg   while (new_size <= bbo->length + aditional + batch_padding)
394b8e80941Smrg      new_size *= 2;
395b8e80941Smrg
396b8e80941Smrg   if (new_size == bbo->bo.size)
397b8e80941Smrg      return VK_SUCCESS;
398b8e80941Smrg
399b8e80941Smrg   struct anv_bo new_bo;
400b8e80941Smrg   VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool,
401b8e80941Smrg                                       &new_bo, new_size);
402b8e80941Smrg   if (result != VK_SUCCESS)
403b8e80941Smrg      return result;
404b8e80941Smrg
405b8e80941Smrg   memcpy(new_bo.map, bbo->bo.map, bbo->length);
406b8e80941Smrg
407b8e80941Smrg   anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
408b8e80941Smrg
409b8e80941Smrg   bbo->bo = new_bo;
410b8e80941Smrg   anv_batch_bo_continue(bbo, batch, batch_padding);
411b8e80941Smrg
412b8e80941Smrg   return VK_SUCCESS;
413b8e80941Smrg}
414b8e80941Smrg
415b8e80941Smrgstatic void
416b8e80941Smrganv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer,
417b8e80941Smrg                  struct anv_batch_bo *prev_bbo,
418b8e80941Smrg                  struct anv_batch_bo *next_bbo,
419b8e80941Smrg                  uint32_t next_bbo_offset)
420b8e80941Smrg{
421b8e80941Smrg   MAYBE_UNUSED const uint32_t bb_start_offset =
422b8e80941Smrg      prev_bbo->length - GEN8_MI_BATCH_BUFFER_START_length * 4;
423b8e80941Smrg   MAYBE_UNUSED const uint32_t *bb_start = prev_bbo->bo.map + bb_start_offset;
424b8e80941Smrg
425b8e80941Smrg   /* Make sure we're looking at a MI_BATCH_BUFFER_START */
426b8e80941Smrg   assert(((*bb_start >> 29) & 0x07) == 0);
427b8e80941Smrg   assert(((*bb_start >> 23) & 0x3f) == 49);
428b8e80941Smrg
429b8e80941Smrg   if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
430b8e80941Smrg      assert(prev_bbo->bo.flags & EXEC_OBJECT_PINNED);
431b8e80941Smrg      assert(next_bbo->bo.flags & EXEC_OBJECT_PINNED);
432b8e80941Smrg
433b8e80941Smrg      write_reloc(cmd_buffer->device,
434b8e80941Smrg                  prev_bbo->bo.map + bb_start_offset + 4,
435b8e80941Smrg                  next_bbo->bo.offset + next_bbo_offset, true);
436b8e80941Smrg   } else {
437b8e80941Smrg      uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1;
438b8e80941Smrg      assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4);
439b8e80941Smrg
440b8e80941Smrg      prev_bbo->relocs.reloc_bos[reloc_idx] = &next_bbo->bo;
441b8e80941Smrg      prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset;
442b8e80941Smrg
443b8e80941Smrg      /* Use a bogus presumed offset to force a relocation */
444b8e80941Smrg      prev_bbo->relocs.relocs[reloc_idx].presumed_offset = -1;
445b8e80941Smrg   }
446b8e80941Smrg}
447b8e80941Smrg
448b8e80941Smrgstatic void
449b8e80941Smrganv_batch_bo_destroy(struct anv_batch_bo *bbo,
450b8e80941Smrg                     struct anv_cmd_buffer *cmd_buffer)
451b8e80941Smrg{
452b8e80941Smrg   anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc);
453b8e80941Smrg   anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo);
454b8e80941Smrg   vk_free(&cmd_buffer->pool->alloc, bbo);
455b8e80941Smrg}
456b8e80941Smrg
457b8e80941Smrgstatic VkResult
458b8e80941Smrganv_batch_bo_list_clone(const struct list_head *list,
459b8e80941Smrg                        struct anv_cmd_buffer *cmd_buffer,
460b8e80941Smrg                        struct list_head *new_list)
461b8e80941Smrg{
462b8e80941Smrg   VkResult result = VK_SUCCESS;
463b8e80941Smrg
464b8e80941Smrg   list_inithead(new_list);
465b8e80941Smrg
466b8e80941Smrg   struct anv_batch_bo *prev_bbo = NULL;
467b8e80941Smrg   list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
468b8e80941Smrg      struct anv_batch_bo *new_bbo = NULL;
469b8e80941Smrg      result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo);
470b8e80941Smrg      if (result != VK_SUCCESS)
471b8e80941Smrg         break;
472b8e80941Smrg      list_addtail(&new_bbo->link, new_list);
473b8e80941Smrg
474b8e80941Smrg      if (prev_bbo)
475b8e80941Smrg         anv_batch_bo_link(cmd_buffer, prev_bbo, new_bbo, 0);
476b8e80941Smrg
477b8e80941Smrg      prev_bbo = new_bbo;
478b8e80941Smrg   }
479b8e80941Smrg
480b8e80941Smrg   if (result != VK_SUCCESS) {
481b8e80941Smrg      list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link)
482b8e80941Smrg         anv_batch_bo_destroy(bbo, cmd_buffer);
483b8e80941Smrg   }
484b8e80941Smrg
485b8e80941Smrg   return result;
486b8e80941Smrg}
487b8e80941Smrg
488b8e80941Smrg/*-----------------------------------------------------------------------*
489b8e80941Smrg * Functions related to anv_batch_bo
490b8e80941Smrg *-----------------------------------------------------------------------*/
491b8e80941Smrg
492b8e80941Smrgstatic struct anv_batch_bo *
493b8e80941Smrganv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer)
494b8e80941Smrg{
495b8e80941Smrg   return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link);
496b8e80941Smrg}
497b8e80941Smrg
498b8e80941Smrgstruct anv_address
499b8e80941Smrganv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer)
500b8e80941Smrg{
501b8e80941Smrg   struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
502b8e80941Smrg   return (struct anv_address) {
503b8e80941Smrg      .bo = anv_binding_table_pool(cmd_buffer->device)->block_pool.bo,
504b8e80941Smrg      .offset = bt_block->offset,
505b8e80941Smrg   };
506b8e80941Smrg}
507b8e80941Smrg
508b8e80941Smrgstatic void
509b8e80941Smrgemit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer,
510b8e80941Smrg                        struct anv_bo *bo, uint32_t offset)
511b8e80941Smrg{
512b8e80941Smrg   /* In gen8+ the address field grew to two dwords to accomodate 48 bit
513b8e80941Smrg    * offsets. The high 16 bits are in the last dword, so we can use the gen8
514b8e80941Smrg    * version in either case, as long as we set the instruction length in the
515b8e80941Smrg    * header accordingly.  This means that we always emit three dwords here
516b8e80941Smrg    * and all the padding and adjustment we do in this file works for all
517b8e80941Smrg    * gens.
518b8e80941Smrg    */
519b8e80941Smrg
520b8e80941Smrg#define GEN7_MI_BATCH_BUFFER_START_length      2
521b8e80941Smrg#define GEN7_MI_BATCH_BUFFER_START_length_bias      2
522b8e80941Smrg
523b8e80941Smrg   const uint32_t gen7_length =
524b8e80941Smrg      GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias;
525b8e80941Smrg   const uint32_t gen8_length =
526b8e80941Smrg      GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias;
527b8e80941Smrg
528b8e80941Smrg   anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) {
529b8e80941Smrg      bbs.DWordLength               = cmd_buffer->device->info.gen < 8 ?
530b8e80941Smrg                                      gen7_length : gen8_length;
531b8e80941Smrg      bbs.SecondLevelBatchBuffer    = Firstlevelbatch;
532b8e80941Smrg      bbs.AddressSpaceIndicator     = ASI_PPGTT;
533b8e80941Smrg      bbs.BatchBufferStartAddress   = (struct anv_address) { bo, offset };
534b8e80941Smrg   }
535b8e80941Smrg}
536b8e80941Smrg
537b8e80941Smrgstatic void
538b8e80941Smrgcmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer,
539b8e80941Smrg                             struct anv_batch_bo *bbo)
540b8e80941Smrg{
541b8e80941Smrg   struct anv_batch *batch = &cmd_buffer->batch;
542b8e80941Smrg   struct anv_batch_bo *current_bbo =
543b8e80941Smrg      anv_cmd_buffer_current_batch_bo(cmd_buffer);
544b8e80941Smrg
545b8e80941Smrg   /* We set the end of the batch a little short so we would be sure we
546b8e80941Smrg    * have room for the chaining command.  Since we're about to emit the
547b8e80941Smrg    * chaining command, let's set it back where it should go.
548b8e80941Smrg    */
549b8e80941Smrg   batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
550b8e80941Smrg   assert(batch->end == current_bbo->bo.map + current_bbo->bo.size);
551b8e80941Smrg
552b8e80941Smrg   emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0);
553b8e80941Smrg
554b8e80941Smrg   anv_batch_bo_finish(current_bbo, batch);
555b8e80941Smrg}
556b8e80941Smrg
557b8e80941Smrgstatic VkResult
558b8e80941Smrganv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
559b8e80941Smrg{
560b8e80941Smrg   struct anv_cmd_buffer *cmd_buffer = _data;
561b8e80941Smrg   struct anv_batch_bo *new_bbo;
562b8e80941Smrg
563b8e80941Smrg   VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo);
564b8e80941Smrg   if (result != VK_SUCCESS)
565b8e80941Smrg      return result;
566b8e80941Smrg
567b8e80941Smrg   struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos);
568b8e80941Smrg   if (seen_bbo == NULL) {
569b8e80941Smrg      anv_batch_bo_destroy(new_bbo, cmd_buffer);
570b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
571b8e80941Smrg   }
572b8e80941Smrg   *seen_bbo = new_bbo;
573b8e80941Smrg
574b8e80941Smrg   cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo);
575b8e80941Smrg
576b8e80941Smrg   list_addtail(&new_bbo->link, &cmd_buffer->batch_bos);
577b8e80941Smrg
578b8e80941Smrg   anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
579b8e80941Smrg
580b8e80941Smrg   return VK_SUCCESS;
581b8e80941Smrg}
582b8e80941Smrg
583b8e80941Smrgstatic VkResult
584b8e80941Smrganv_cmd_buffer_grow_batch(struct anv_batch *batch, void *_data)
585b8e80941Smrg{
586b8e80941Smrg   struct anv_cmd_buffer *cmd_buffer = _data;
587b8e80941Smrg   struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
588b8e80941Smrg
589b8e80941Smrg   anv_batch_bo_grow(cmd_buffer, bbo, &cmd_buffer->batch, 4096,
590b8e80941Smrg                     GEN8_MI_BATCH_BUFFER_START_length * 4);
591b8e80941Smrg
592b8e80941Smrg   return VK_SUCCESS;
593b8e80941Smrg}
594b8e80941Smrg
595b8e80941Smrg/** Allocate a binding table
596b8e80941Smrg *
597b8e80941Smrg * This function allocates a binding table.  This is a bit more complicated
598b8e80941Smrg * than one would think due to a combination of Vulkan driver design and some
599b8e80941Smrg * unfortunate hardware restrictions.
600b8e80941Smrg *
601b8e80941Smrg * The 3DSTATE_BINDING_TABLE_POINTERS_* packets only have a 16-bit field for
602b8e80941Smrg * the binding table pointer which means that all binding tables need to live
603b8e80941Smrg * in the bottom 64k of surface state base address.  The way the GL driver has
604b8e80941Smrg * classically dealt with this restriction is to emit all surface states
605b8e80941Smrg * on-the-fly into the batch and have a batch buffer smaller than 64k.  This
606b8e80941Smrg * isn't really an option in Vulkan for a couple of reasons:
607b8e80941Smrg *
608b8e80941Smrg *  1) In Vulkan, we have growing (or chaining) batches so surface states have
609b8e80941Smrg *     to live in their own buffer and we have to be able to re-emit
610b8e80941Smrg *     STATE_BASE_ADDRESS as needed which requires a full pipeline stall.  In
611b8e80941Smrg *     order to avoid emitting STATE_BASE_ADDRESS any more often than needed
612b8e80941Smrg *     (it's not that hard to hit 64k of just binding tables), we allocate
613b8e80941Smrg *     surface state objects up-front when VkImageView is created.  In order
614b8e80941Smrg *     for this to work, surface state objects need to be allocated from a
615b8e80941Smrg *     global buffer.
616b8e80941Smrg *
617b8e80941Smrg *  2) We tried to design the surface state system in such a way that it's
618b8e80941Smrg *     already ready for bindless texturing.  The way bindless texturing works
619b8e80941Smrg *     on our hardware is that you have a big pool of surface state objects
620b8e80941Smrg *     (with its own state base address) and the bindless handles are simply
621b8e80941Smrg *     offsets into that pool.  With the architecture we chose, we already
622b8e80941Smrg *     have that pool and it's exactly the same pool that we use for regular
623b8e80941Smrg *     surface states so we should already be ready for bindless.
624b8e80941Smrg *
625b8e80941Smrg *  3) For render targets, we need to be able to fill out the surface states
626b8e80941Smrg *     later in vkBeginRenderPass so that we can assign clear colors
627b8e80941Smrg *     correctly.  One way to do this would be to just create the surface
628b8e80941Smrg *     state data and then repeatedly copy it into the surface state BO every
629b8e80941Smrg *     time we have to re-emit STATE_BASE_ADDRESS.  While this works, it's
630b8e80941Smrg *     rather annoying and just being able to allocate them up-front and
631b8e80941Smrg *     re-use them for the entire render pass.
632b8e80941Smrg *
633b8e80941Smrg * While none of these are technically blockers for emitting state on the fly
634b8e80941Smrg * like we do in GL, the ability to have a single surface state pool is
635b8e80941Smrg * simplifies things greatly.  Unfortunately, it comes at a cost...
636b8e80941Smrg *
637b8e80941Smrg * Because of the 64k limitation of 3DSTATE_BINDING_TABLE_POINTERS_*, we can't
638b8e80941Smrg * place the binding tables just anywhere in surface state base address.
639b8e80941Smrg * Because 64k isn't a whole lot of space, we can't simply restrict the
640b8e80941Smrg * surface state buffer to 64k, we have to be more clever.  The solution we've
641b8e80941Smrg * chosen is to have a block pool with a maximum size of 2G that starts at
642b8e80941Smrg * zero and grows in both directions.  All surface states are allocated from
643b8e80941Smrg * the top of the pool (positive offsets) and we allocate blocks (< 64k) of
644b8e80941Smrg * binding tables from the bottom of the pool (negative offsets).  Every time
645b8e80941Smrg * we allocate a new binding table block, we set surface state base address to
646b8e80941Smrg * point to the bottom of the binding table block.  This way all of the
647b8e80941Smrg * binding tables in the block are in the bottom 64k of surface state base
648b8e80941Smrg * address.  When we fill out the binding table, we add the distance between
649b8e80941Smrg * the bottom of our binding table block and zero of the block pool to the
650b8e80941Smrg * surface state offsets so that they are correct relative to out new surface
651b8e80941Smrg * state base address at the bottom of the binding table block.
652b8e80941Smrg *
653b8e80941Smrg * \see adjust_relocations_from_block_pool()
654b8e80941Smrg * \see adjust_relocations_too_block_pool()
655b8e80941Smrg *
656b8e80941Smrg * \param[in]  entries        The number of surface state entries the binding
657b8e80941Smrg *                            table should be able to hold.
658b8e80941Smrg *
659b8e80941Smrg * \param[out] state_offset   The offset surface surface state base address
660b8e80941Smrg *                            where the surface states live.  This must be
661b8e80941Smrg *                            added to the surface state offset when it is
662b8e80941Smrg *                            written into the binding table entry.
663b8e80941Smrg *
664b8e80941Smrg * \return                    An anv_state representing the binding table
665b8e80941Smrg */
666b8e80941Smrgstruct anv_state
667b8e80941Smrganv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer,
668b8e80941Smrg                                   uint32_t entries, uint32_t *state_offset)
669b8e80941Smrg{
670b8e80941Smrg   struct anv_device *device = cmd_buffer->device;
671b8e80941Smrg   struct anv_state_pool *state_pool = &device->surface_state_pool;
672b8e80941Smrg   struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states);
673b8e80941Smrg   struct anv_state state;
674b8e80941Smrg
675b8e80941Smrg   state.alloc_size = align_u32(entries * 4, 32);
676b8e80941Smrg
677b8e80941Smrg   if (cmd_buffer->bt_next + state.alloc_size > state_pool->block_size)
678b8e80941Smrg      return (struct anv_state) { 0 };
679b8e80941Smrg
680b8e80941Smrg   state.offset = cmd_buffer->bt_next;
681b8e80941Smrg   state.map = anv_block_pool_map(&anv_binding_table_pool(device)->block_pool,
682b8e80941Smrg                                  bt_block->offset + state.offset);
683b8e80941Smrg
684b8e80941Smrg   cmd_buffer->bt_next += state.alloc_size;
685b8e80941Smrg
686b8e80941Smrg   if (device->instance->physicalDevice.use_softpin) {
687b8e80941Smrg      assert(bt_block->offset >= 0);
688b8e80941Smrg      *state_offset = device->surface_state_pool.block_pool.start_address -
689b8e80941Smrg         device->binding_table_pool.block_pool.start_address - bt_block->offset;
690b8e80941Smrg   } else {
691b8e80941Smrg      assert(bt_block->offset < 0);
692b8e80941Smrg      *state_offset = -bt_block->offset;
693b8e80941Smrg   }
694b8e80941Smrg
695b8e80941Smrg   return state;
696b8e80941Smrg}
697b8e80941Smrg
698b8e80941Smrgstruct anv_state
699b8e80941Smrganv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer)
700b8e80941Smrg{
701b8e80941Smrg   struct isl_device *isl_dev = &cmd_buffer->device->isl_dev;
702b8e80941Smrg   return anv_state_stream_alloc(&cmd_buffer->surface_state_stream,
703b8e80941Smrg                                 isl_dev->ss.size, isl_dev->ss.align);
704b8e80941Smrg}
705b8e80941Smrg
706b8e80941Smrgstruct anv_state
707b8e80941Smrganv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer,
708b8e80941Smrg                                   uint32_t size, uint32_t alignment)
709b8e80941Smrg{
710b8e80941Smrg   return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
711b8e80941Smrg                                 size, alignment);
712b8e80941Smrg}
713b8e80941Smrg
714b8e80941SmrgVkResult
715b8e80941Smrganv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer)
716b8e80941Smrg{
717b8e80941Smrg   struct anv_state *bt_block = u_vector_add(&cmd_buffer->bt_block_states);
718b8e80941Smrg   if (bt_block == NULL) {
719b8e80941Smrg      anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY);
720b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
721b8e80941Smrg   }
722b8e80941Smrg
723b8e80941Smrg   *bt_block = anv_binding_table_pool_alloc(cmd_buffer->device);
724b8e80941Smrg   cmd_buffer->bt_next = 0;
725b8e80941Smrg
726b8e80941Smrg   return VK_SUCCESS;
727b8e80941Smrg}
728b8e80941Smrg
729b8e80941SmrgVkResult
730b8e80941Smrganv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
731b8e80941Smrg{
732b8e80941Smrg   struct anv_batch_bo *batch_bo;
733b8e80941Smrg   VkResult result;
734b8e80941Smrg
735b8e80941Smrg   list_inithead(&cmd_buffer->batch_bos);
736b8e80941Smrg
737b8e80941Smrg   result = anv_batch_bo_create(cmd_buffer, &batch_bo);
738b8e80941Smrg   if (result != VK_SUCCESS)
739b8e80941Smrg      return result;
740b8e80941Smrg
741b8e80941Smrg   list_addtail(&batch_bo->link, &cmd_buffer->batch_bos);
742b8e80941Smrg
743b8e80941Smrg   cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc;
744b8e80941Smrg   cmd_buffer->batch.user_data = cmd_buffer;
745b8e80941Smrg
746b8e80941Smrg   if (cmd_buffer->device->can_chain_batches) {
747b8e80941Smrg      cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
748b8e80941Smrg   } else {
749b8e80941Smrg      cmd_buffer->batch.extend_cb = anv_cmd_buffer_grow_batch;
750b8e80941Smrg   }
751b8e80941Smrg
752b8e80941Smrg   anv_batch_bo_start(batch_bo, &cmd_buffer->batch,
753b8e80941Smrg                      GEN8_MI_BATCH_BUFFER_START_length * 4);
754b8e80941Smrg
755b8e80941Smrg   int success = u_vector_init(&cmd_buffer->seen_bbos,
756b8e80941Smrg                                 sizeof(struct anv_bo *),
757b8e80941Smrg                                 8 * sizeof(struct anv_bo *));
758b8e80941Smrg   if (!success)
759b8e80941Smrg      goto fail_batch_bo;
760b8e80941Smrg
761b8e80941Smrg   *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = batch_bo;
762b8e80941Smrg
763b8e80941Smrg   /* u_vector requires power-of-two size elements */
764b8e80941Smrg   unsigned pow2_state_size = util_next_power_of_two(sizeof(struct anv_state));
765b8e80941Smrg   success = u_vector_init(&cmd_buffer->bt_block_states,
766b8e80941Smrg                           pow2_state_size, 8 * pow2_state_size);
767b8e80941Smrg   if (!success)
768b8e80941Smrg      goto fail_seen_bbos;
769b8e80941Smrg
770b8e80941Smrg   result = anv_reloc_list_init(&cmd_buffer->surface_relocs,
771b8e80941Smrg                                &cmd_buffer->pool->alloc);
772b8e80941Smrg   if (result != VK_SUCCESS)
773b8e80941Smrg      goto fail_bt_blocks;
774b8e80941Smrg   cmd_buffer->last_ss_pool_center = 0;
775b8e80941Smrg
776b8e80941Smrg   result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
777b8e80941Smrg   if (result != VK_SUCCESS)
778b8e80941Smrg      goto fail_bt_blocks;
779b8e80941Smrg
780b8e80941Smrg   return VK_SUCCESS;
781b8e80941Smrg
782b8e80941Smrg fail_bt_blocks:
783b8e80941Smrg   u_vector_finish(&cmd_buffer->bt_block_states);
784b8e80941Smrg fail_seen_bbos:
785b8e80941Smrg   u_vector_finish(&cmd_buffer->seen_bbos);
786b8e80941Smrg fail_batch_bo:
787b8e80941Smrg   anv_batch_bo_destroy(batch_bo, cmd_buffer);
788b8e80941Smrg
789b8e80941Smrg   return result;
790b8e80941Smrg}
791b8e80941Smrg
792b8e80941Smrgvoid
793b8e80941Smrganv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
794b8e80941Smrg{
795b8e80941Smrg   struct anv_state *bt_block;
796b8e80941Smrg   u_vector_foreach(bt_block, &cmd_buffer->bt_block_states)
797b8e80941Smrg      anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
798b8e80941Smrg   u_vector_finish(&cmd_buffer->bt_block_states);
799b8e80941Smrg
800b8e80941Smrg   anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc);
801b8e80941Smrg
802b8e80941Smrg   u_vector_finish(&cmd_buffer->seen_bbos);
803b8e80941Smrg
804b8e80941Smrg   /* Destroy all of the batch buffers */
805b8e80941Smrg   list_for_each_entry_safe(struct anv_batch_bo, bbo,
806b8e80941Smrg                            &cmd_buffer->batch_bos, link) {
807b8e80941Smrg      anv_batch_bo_destroy(bbo, cmd_buffer);
808b8e80941Smrg   }
809b8e80941Smrg}
810b8e80941Smrg
811b8e80941Smrgvoid
812b8e80941Smrganv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer)
813b8e80941Smrg{
814b8e80941Smrg   /* Delete all but the first batch bo */
815b8e80941Smrg   assert(!list_empty(&cmd_buffer->batch_bos));
816b8e80941Smrg   while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) {
817b8e80941Smrg      struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
818b8e80941Smrg      list_del(&bbo->link);
819b8e80941Smrg      anv_batch_bo_destroy(bbo, cmd_buffer);
820b8e80941Smrg   }
821b8e80941Smrg   assert(!list_empty(&cmd_buffer->batch_bos));
822b8e80941Smrg
823b8e80941Smrg   anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer),
824b8e80941Smrg                      &cmd_buffer->batch,
825b8e80941Smrg                      GEN8_MI_BATCH_BUFFER_START_length * 4);
826b8e80941Smrg
827b8e80941Smrg   while (u_vector_length(&cmd_buffer->bt_block_states) > 1) {
828b8e80941Smrg      struct anv_state *bt_block = u_vector_remove(&cmd_buffer->bt_block_states);
829b8e80941Smrg      anv_binding_table_pool_free(cmd_buffer->device, *bt_block);
830b8e80941Smrg   }
831b8e80941Smrg   assert(u_vector_length(&cmd_buffer->bt_block_states) == 1);
832b8e80941Smrg   cmd_buffer->bt_next = 0;
833b8e80941Smrg
834b8e80941Smrg   cmd_buffer->surface_relocs.num_relocs = 0;
835b8e80941Smrg   _mesa_set_clear(cmd_buffer->surface_relocs.deps, NULL);
836b8e80941Smrg   cmd_buffer->last_ss_pool_center = 0;
837b8e80941Smrg
838b8e80941Smrg   /* Reset the list of seen buffers */
839b8e80941Smrg   cmd_buffer->seen_bbos.head = 0;
840b8e80941Smrg   cmd_buffer->seen_bbos.tail = 0;
841b8e80941Smrg
842b8e80941Smrg   *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) =
843b8e80941Smrg      anv_cmd_buffer_current_batch_bo(cmd_buffer);
844b8e80941Smrg}
845b8e80941Smrg
846b8e80941Smrgvoid
847b8e80941Smrganv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer)
848b8e80941Smrg{
849b8e80941Smrg   struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer);
850b8e80941Smrg
851b8e80941Smrg   if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) {
852b8e80941Smrg      /* When we start a batch buffer, we subtract a certain amount of
853b8e80941Smrg       * padding from the end to ensure that we always have room to emit a
854b8e80941Smrg       * BATCH_BUFFER_START to chain to the next BO.  We need to remove
855b8e80941Smrg       * that padding before we end the batch; otherwise, we may end up
856b8e80941Smrg       * with our BATCH_BUFFER_END in another BO.
857b8e80941Smrg       */
858b8e80941Smrg      cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
859b8e80941Smrg      assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
860b8e80941Smrg
861b8e80941Smrg      anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END, bbe);
862b8e80941Smrg
863b8e80941Smrg      /* Round batch up to an even number of dwords. */
864b8e80941Smrg      if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4)
865b8e80941Smrg         anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP, noop);
866b8e80941Smrg
867b8e80941Smrg      cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY;
868b8e80941Smrg   } else {
869b8e80941Smrg      assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY);
870b8e80941Smrg      /* If this is a secondary command buffer, we need to determine the
871b8e80941Smrg       * mode in which it will be executed with vkExecuteCommands.  We
872b8e80941Smrg       * determine this statically here so that this stays in sync with the
873b8e80941Smrg       * actual ExecuteCommands implementation.
874b8e80941Smrg       */
875b8e80941Smrg      const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start;
876b8e80941Smrg      if (!cmd_buffer->device->can_chain_batches) {
877b8e80941Smrg         cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT;
878b8e80941Smrg      } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) &&
879b8e80941Smrg                 (length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) {
880b8e80941Smrg         /* If the secondary has exactly one batch buffer in its list *and*
881b8e80941Smrg          * that batch buffer is less than half of the maximum size, we're
882b8e80941Smrg          * probably better of simply copying it into our batch.
883b8e80941Smrg          */
884b8e80941Smrg         cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT;
885b8e80941Smrg      } else if (!(cmd_buffer->usage_flags &
886b8e80941Smrg                   VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) {
887b8e80941Smrg         cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN;
888b8e80941Smrg
889b8e80941Smrg         /* In order to chain, we need this command buffer to contain an
890b8e80941Smrg          * MI_BATCH_BUFFER_START which will jump back to the calling batch.
891b8e80941Smrg          * It doesn't matter where it points now so long as has a valid
892b8e80941Smrg          * relocation.  We'll adjust it later as part of the chaining
893b8e80941Smrg          * process.
894b8e80941Smrg          *
895b8e80941Smrg          * We set the end of the batch a little short so we would be sure we
896b8e80941Smrg          * have room for the chaining command.  Since we're about to emit the
897b8e80941Smrg          * chaining command, let's set it back where it should go.
898b8e80941Smrg          */
899b8e80941Smrg         cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4;
900b8e80941Smrg         assert(cmd_buffer->batch.start == batch_bo->bo.map);
901b8e80941Smrg         assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size);
902b8e80941Smrg
903b8e80941Smrg         emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0);
904b8e80941Smrg         assert(cmd_buffer->batch.start == batch_bo->bo.map);
905b8e80941Smrg      } else {
906b8e80941Smrg         cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN;
907b8e80941Smrg      }
908b8e80941Smrg   }
909b8e80941Smrg
910b8e80941Smrg   anv_batch_bo_finish(batch_bo, &cmd_buffer->batch);
911b8e80941Smrg}
912b8e80941Smrg
913b8e80941Smrgstatic VkResult
914b8e80941Smrganv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer,
915b8e80941Smrg                             struct list_head *list)
916b8e80941Smrg{
917b8e80941Smrg   list_for_each_entry(struct anv_batch_bo, bbo, list, link) {
918b8e80941Smrg      struct anv_batch_bo **bbo_ptr = u_vector_add(&cmd_buffer->seen_bbos);
919b8e80941Smrg      if (bbo_ptr == NULL)
920b8e80941Smrg         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
921b8e80941Smrg
922b8e80941Smrg      *bbo_ptr = bbo;
923b8e80941Smrg   }
924b8e80941Smrg
925b8e80941Smrg   return VK_SUCCESS;
926b8e80941Smrg}
927b8e80941Smrg
928b8e80941Smrgvoid
929b8e80941Smrganv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary,
930b8e80941Smrg                             struct anv_cmd_buffer *secondary)
931b8e80941Smrg{
932b8e80941Smrg   switch (secondary->exec_mode) {
933b8e80941Smrg   case ANV_CMD_BUFFER_EXEC_MODE_EMIT:
934b8e80941Smrg      anv_batch_emit_batch(&primary->batch, &secondary->batch);
935b8e80941Smrg      break;
936b8e80941Smrg   case ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT: {
937b8e80941Smrg      struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(primary);
938b8e80941Smrg      unsigned length = secondary->batch.end - secondary->batch.start;
939b8e80941Smrg      anv_batch_bo_grow(primary, bbo, &primary->batch, length,
940b8e80941Smrg                        GEN8_MI_BATCH_BUFFER_START_length * 4);
941b8e80941Smrg      anv_batch_emit_batch(&primary->batch, &secondary->batch);
942b8e80941Smrg      break;
943b8e80941Smrg   }
944b8e80941Smrg   case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: {
945b8e80941Smrg      struct anv_batch_bo *first_bbo =
946b8e80941Smrg         list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link);
947b8e80941Smrg      struct anv_batch_bo *last_bbo =
948b8e80941Smrg         list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link);
949b8e80941Smrg
950b8e80941Smrg      emit_batch_buffer_start(primary, &first_bbo->bo, 0);
951b8e80941Smrg
952b8e80941Smrg      struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary);
953b8e80941Smrg      assert(primary->batch.start == this_bbo->bo.map);
954b8e80941Smrg      uint32_t offset = primary->batch.next - primary->batch.start;
955b8e80941Smrg
956b8e80941Smrg      /* Make the tail of the secondary point back to right after the
957b8e80941Smrg       * MI_BATCH_BUFFER_START in the primary batch.
958b8e80941Smrg       */
959b8e80941Smrg      anv_batch_bo_link(primary, last_bbo, this_bbo, offset);
960b8e80941Smrg
961b8e80941Smrg      anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos);
962b8e80941Smrg      break;
963b8e80941Smrg   }
964b8e80941Smrg   case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: {
965b8e80941Smrg      struct list_head copy_list;
966b8e80941Smrg      VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos,
967b8e80941Smrg                                                secondary,
968b8e80941Smrg                                                &copy_list);
969b8e80941Smrg      if (result != VK_SUCCESS)
970b8e80941Smrg         return; /* FIXME */
971b8e80941Smrg
972b8e80941Smrg      anv_cmd_buffer_add_seen_bbos(primary, &copy_list);
973b8e80941Smrg
974b8e80941Smrg      struct anv_batch_bo *first_bbo =
975b8e80941Smrg         list_first_entry(&copy_list, struct anv_batch_bo, link);
976b8e80941Smrg      struct anv_batch_bo *last_bbo =
977b8e80941Smrg         list_last_entry(&copy_list, struct anv_batch_bo, link);
978b8e80941Smrg
979b8e80941Smrg      cmd_buffer_chain_to_batch_bo(primary, first_bbo);
980b8e80941Smrg
981b8e80941Smrg      list_splicetail(&copy_list, &primary->batch_bos);
982b8e80941Smrg
983b8e80941Smrg      anv_batch_bo_continue(last_bbo, &primary->batch,
984b8e80941Smrg                            GEN8_MI_BATCH_BUFFER_START_length * 4);
985b8e80941Smrg      break;
986b8e80941Smrg   }
987b8e80941Smrg   default:
988b8e80941Smrg      assert(!"Invalid execution mode");
989b8e80941Smrg   }
990b8e80941Smrg
991b8e80941Smrg   anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc,
992b8e80941Smrg                         &secondary->surface_relocs, 0);
993b8e80941Smrg}
994b8e80941Smrg
995b8e80941Smrgstruct anv_execbuf {
996b8e80941Smrg   struct drm_i915_gem_execbuffer2           execbuf;
997b8e80941Smrg
998b8e80941Smrg   struct drm_i915_gem_exec_object2 *        objects;
999b8e80941Smrg   uint32_t                                  bo_count;
1000b8e80941Smrg   struct anv_bo **                          bos;
1001b8e80941Smrg
1002b8e80941Smrg   /* Allocated length of the 'objects' and 'bos' arrays */
1003b8e80941Smrg   uint32_t                                  array_length;
1004b8e80941Smrg
1005b8e80941Smrg   bool                                      has_relocs;
1006b8e80941Smrg
1007b8e80941Smrg   uint32_t                                  fence_count;
1008b8e80941Smrg   uint32_t                                  fence_array_length;
1009b8e80941Smrg   struct drm_i915_gem_exec_fence *          fences;
1010b8e80941Smrg   struct anv_syncobj **                     syncobjs;
1011b8e80941Smrg};
1012b8e80941Smrg
1013b8e80941Smrgstatic void
1014b8e80941Smrganv_execbuf_init(struct anv_execbuf *exec)
1015b8e80941Smrg{
1016b8e80941Smrg   memset(exec, 0, sizeof(*exec));
1017b8e80941Smrg}
1018b8e80941Smrg
1019b8e80941Smrgstatic void
1020b8e80941Smrganv_execbuf_finish(struct anv_execbuf *exec,
1021b8e80941Smrg                   const VkAllocationCallbacks *alloc)
1022b8e80941Smrg{
1023b8e80941Smrg   vk_free(alloc, exec->objects);
1024b8e80941Smrg   vk_free(alloc, exec->bos);
1025b8e80941Smrg   vk_free(alloc, exec->fences);
1026b8e80941Smrg   vk_free(alloc, exec->syncobjs);
1027b8e80941Smrg}
1028b8e80941Smrg
1029b8e80941Smrgstatic int
1030b8e80941Smrg_compare_bo_handles(const void *_bo1, const void *_bo2)
1031b8e80941Smrg{
1032b8e80941Smrg   struct anv_bo * const *bo1 = _bo1;
1033b8e80941Smrg   struct anv_bo * const *bo2 = _bo2;
1034b8e80941Smrg
1035b8e80941Smrg   return (*bo1)->gem_handle - (*bo2)->gem_handle;
1036b8e80941Smrg}
1037b8e80941Smrg
1038b8e80941Smrgstatic VkResult
1039b8e80941Smrganv_execbuf_add_bo_set(struct anv_execbuf *exec,
1040b8e80941Smrg                       struct set *deps,
1041b8e80941Smrg                       uint32_t extra_flags,
1042b8e80941Smrg                       const VkAllocationCallbacks *alloc);
1043b8e80941Smrg
1044b8e80941Smrgstatic VkResult
1045b8e80941Smrganv_execbuf_add_bo(struct anv_execbuf *exec,
1046b8e80941Smrg                   struct anv_bo *bo,
1047b8e80941Smrg                   struct anv_reloc_list *relocs,
1048b8e80941Smrg                   uint32_t extra_flags,
1049b8e80941Smrg                   const VkAllocationCallbacks *alloc)
1050b8e80941Smrg{
1051b8e80941Smrg   struct drm_i915_gem_exec_object2 *obj = NULL;
1052b8e80941Smrg
1053b8e80941Smrg   if (bo->index < exec->bo_count && exec->bos[bo->index] == bo)
1054b8e80941Smrg      obj = &exec->objects[bo->index];
1055b8e80941Smrg
1056b8e80941Smrg   if (obj == NULL) {
1057b8e80941Smrg      /* We've never seen this one before.  Add it to the list and assign
1058b8e80941Smrg       * an id that we can use later.
1059b8e80941Smrg       */
1060b8e80941Smrg      if (exec->bo_count >= exec->array_length) {
1061b8e80941Smrg         uint32_t new_len = exec->objects ? exec->array_length * 2 : 64;
1062b8e80941Smrg
1063b8e80941Smrg         struct drm_i915_gem_exec_object2 *new_objects =
1064b8e80941Smrg            vk_alloc(alloc, new_len * sizeof(*new_objects),
1065b8e80941Smrg                     8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1066b8e80941Smrg         if (new_objects == NULL)
1067b8e80941Smrg            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1068b8e80941Smrg
1069b8e80941Smrg         struct anv_bo **new_bos =
1070b8e80941Smrg            vk_alloc(alloc, new_len * sizeof(*new_bos),
1071b8e80941Smrg                      8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1072b8e80941Smrg         if (new_bos == NULL) {
1073b8e80941Smrg            vk_free(alloc, new_objects);
1074b8e80941Smrg            return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1075b8e80941Smrg         }
1076b8e80941Smrg
1077b8e80941Smrg         if (exec->objects) {
1078b8e80941Smrg            memcpy(new_objects, exec->objects,
1079b8e80941Smrg                   exec->bo_count * sizeof(*new_objects));
1080b8e80941Smrg            memcpy(new_bos, exec->bos,
1081b8e80941Smrg                   exec->bo_count * sizeof(*new_bos));
1082b8e80941Smrg         }
1083b8e80941Smrg
1084b8e80941Smrg         vk_free(alloc, exec->objects);
1085b8e80941Smrg         vk_free(alloc, exec->bos);
1086b8e80941Smrg
1087b8e80941Smrg         exec->objects = new_objects;
1088b8e80941Smrg         exec->bos = new_bos;
1089b8e80941Smrg         exec->array_length = new_len;
1090b8e80941Smrg      }
1091b8e80941Smrg
1092b8e80941Smrg      assert(exec->bo_count < exec->array_length);
1093b8e80941Smrg
1094b8e80941Smrg      bo->index = exec->bo_count++;
1095b8e80941Smrg      obj = &exec->objects[bo->index];
1096b8e80941Smrg      exec->bos[bo->index] = bo;
1097b8e80941Smrg
1098b8e80941Smrg      obj->handle = bo->gem_handle;
1099b8e80941Smrg      obj->relocation_count = 0;
1100b8e80941Smrg      obj->relocs_ptr = 0;
1101b8e80941Smrg      obj->alignment = 0;
1102b8e80941Smrg      obj->offset = bo->offset;
1103b8e80941Smrg      obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags;
1104b8e80941Smrg      obj->rsvd1 = 0;
1105b8e80941Smrg      obj->rsvd2 = 0;
1106b8e80941Smrg   }
1107b8e80941Smrg
1108b8e80941Smrg   if (relocs != NULL) {
1109b8e80941Smrg      assert(obj->relocation_count == 0);
1110b8e80941Smrg
1111b8e80941Smrg      if (relocs->num_relocs > 0) {
1112b8e80941Smrg         /* This is the first time we've ever seen a list of relocations for
1113b8e80941Smrg          * this BO.  Go ahead and set the relocations and then walk the list
1114b8e80941Smrg          * of relocations and add them all.
1115b8e80941Smrg          */
1116b8e80941Smrg         exec->has_relocs = true;
1117b8e80941Smrg         obj->relocation_count = relocs->num_relocs;
1118b8e80941Smrg         obj->relocs_ptr = (uintptr_t) relocs->relocs;
1119b8e80941Smrg
1120b8e80941Smrg         for (size_t i = 0; i < relocs->num_relocs; i++) {
1121b8e80941Smrg            VkResult result;
1122b8e80941Smrg
1123b8e80941Smrg            /* A quick sanity check on relocations */
1124b8e80941Smrg            assert(relocs->relocs[i].offset < bo->size);
1125b8e80941Smrg            result = anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL,
1126b8e80941Smrg                                        extra_flags, alloc);
1127b8e80941Smrg
1128b8e80941Smrg            if (result != VK_SUCCESS)
1129b8e80941Smrg               return result;
1130b8e80941Smrg         }
1131b8e80941Smrg      }
1132b8e80941Smrg
1133b8e80941Smrg      return anv_execbuf_add_bo_set(exec, relocs->deps, extra_flags, alloc);
1134b8e80941Smrg   }
1135b8e80941Smrg
1136b8e80941Smrg   return VK_SUCCESS;
1137b8e80941Smrg}
1138b8e80941Smrg
1139b8e80941Smrg/* Add BO dependencies to execbuf */
1140b8e80941Smrgstatic VkResult
1141b8e80941Smrganv_execbuf_add_bo_set(struct anv_execbuf *exec,
1142b8e80941Smrg                       struct set *deps,
1143b8e80941Smrg                       uint32_t extra_flags,
1144b8e80941Smrg                       const VkAllocationCallbacks *alloc)
1145b8e80941Smrg{
1146b8e80941Smrg   if (!deps || deps->entries <= 0)
1147b8e80941Smrg      return VK_SUCCESS;
1148b8e80941Smrg
1149b8e80941Smrg   const uint32_t entries = deps->entries;
1150b8e80941Smrg   struct anv_bo **bos =
1151b8e80941Smrg      vk_alloc(alloc, entries * sizeof(*bos),
1152b8e80941Smrg               8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1153b8e80941Smrg   if (bos == NULL)
1154b8e80941Smrg      return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1155b8e80941Smrg
1156b8e80941Smrg   struct anv_bo **bo = bos;
1157b8e80941Smrg   set_foreach(deps, entry) {
1158b8e80941Smrg      *bo++ = (void *)entry->key;
1159b8e80941Smrg   }
1160b8e80941Smrg
1161b8e80941Smrg   qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles);
1162b8e80941Smrg
1163b8e80941Smrg   VkResult result = VK_SUCCESS;
1164b8e80941Smrg   for (bo = bos; bo < bos + entries; bo++) {
1165b8e80941Smrg      result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc);
1166b8e80941Smrg      if (result != VK_SUCCESS)
1167b8e80941Smrg         break;
1168b8e80941Smrg   }
1169b8e80941Smrg
1170b8e80941Smrg   vk_free(alloc, bos);
1171b8e80941Smrg
1172b8e80941Smrg   return result;
1173b8e80941Smrg}
1174b8e80941Smrg
1175b8e80941Smrgstatic VkResult
1176b8e80941Smrganv_execbuf_add_syncobj(struct anv_execbuf *exec,
1177b8e80941Smrg                        uint32_t handle, uint32_t flags,
1178b8e80941Smrg                        const VkAllocationCallbacks *alloc)
1179b8e80941Smrg{
1180b8e80941Smrg   assert(flags != 0);
1181b8e80941Smrg
1182b8e80941Smrg   if (exec->fence_count >= exec->fence_array_length) {
1183b8e80941Smrg      uint32_t new_len = MAX2(exec->fence_array_length * 2, 64);
1184b8e80941Smrg
1185b8e80941Smrg      exec->fences = vk_realloc(alloc, exec->fences,
1186b8e80941Smrg                                new_len * sizeof(*exec->fences),
1187b8e80941Smrg                                8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);
1188b8e80941Smrg      if (exec->fences == NULL)
1189b8e80941Smrg         return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1190b8e80941Smrg
1191b8e80941Smrg      exec->fence_array_length = new_len;
1192b8e80941Smrg   }
1193b8e80941Smrg
1194b8e80941Smrg   exec->fences[exec->fence_count] = (struct drm_i915_gem_exec_fence) {
1195b8e80941Smrg      .handle = handle,
1196b8e80941Smrg      .flags = flags,
1197b8e80941Smrg   };
1198b8e80941Smrg
1199b8e80941Smrg   exec->fence_count++;
1200b8e80941Smrg
1201b8e80941Smrg   return VK_SUCCESS;
1202b8e80941Smrg}
1203b8e80941Smrg
1204b8e80941Smrgstatic void
1205b8e80941Smrganv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
1206b8e80941Smrg                              struct anv_reloc_list *list)
1207b8e80941Smrg{
1208b8e80941Smrg   for (size_t i = 0; i < list->num_relocs; i++)
1209b8e80941Smrg      list->relocs[i].target_handle = list->reloc_bos[i]->index;
1210b8e80941Smrg}
1211b8e80941Smrg
1212b8e80941Smrgstatic void
1213b8e80941Smrgadjust_relocations_from_state_pool(struct anv_state_pool *pool,
1214b8e80941Smrg                                   struct anv_reloc_list *relocs,
1215b8e80941Smrg                                   uint32_t last_pool_center_bo_offset)
1216b8e80941Smrg{
1217b8e80941Smrg   assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset);
1218b8e80941Smrg   uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset;
1219b8e80941Smrg
1220b8e80941Smrg   for (size_t i = 0; i < relocs->num_relocs; i++) {
1221b8e80941Smrg      /* All of the relocations from this block pool to other BO's should
1222b8e80941Smrg       * have been emitted relative to the surface block pool center.  We
1223b8e80941Smrg       * need to add the center offset to make them relative to the
1224b8e80941Smrg       * beginning of the actual GEM bo.
1225b8e80941Smrg       */
1226b8e80941Smrg      relocs->relocs[i].offset += delta;
1227b8e80941Smrg   }
1228b8e80941Smrg}
1229b8e80941Smrg
1230b8e80941Smrgstatic void
1231b8e80941Smrgadjust_relocations_to_state_pool(struct anv_state_pool *pool,
1232b8e80941Smrg                                 struct anv_bo *from_bo,
1233b8e80941Smrg                                 struct anv_reloc_list *relocs,
1234b8e80941Smrg                                 uint32_t last_pool_center_bo_offset)
1235b8e80941Smrg{
1236b8e80941Smrg   assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset);
1237b8e80941Smrg   uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset;
1238b8e80941Smrg
1239b8e80941Smrg   /* When we initially emit relocations into a block pool, we don't
1240b8e80941Smrg    * actually know what the final center_bo_offset will be so we just emit
1241b8e80941Smrg    * it as if center_bo_offset == 0.  Now that we know what the center
1242b8e80941Smrg    * offset is, we need to walk the list of relocations and adjust any
1243b8e80941Smrg    * relocations that point to the pool bo with the correct offset.
1244b8e80941Smrg    */
1245b8e80941Smrg   for (size_t i = 0; i < relocs->num_relocs; i++) {
1246b8e80941Smrg      if (relocs->reloc_bos[i] == pool->block_pool.bo) {
1247b8e80941Smrg         /* Adjust the delta value in the relocation to correctly
1248b8e80941Smrg          * correspond to the new delta.  Initially, this value may have
1249b8e80941Smrg          * been negative (if treated as unsigned), but we trust in
1250b8e80941Smrg          * uint32_t roll-over to fix that for us at this point.
1251b8e80941Smrg          */
1252b8e80941Smrg         relocs->relocs[i].delta += delta;
1253b8e80941Smrg
1254b8e80941Smrg         /* Since the delta has changed, we need to update the actual
1255b8e80941Smrg          * relocated value with the new presumed value.  This function
1256b8e80941Smrg          * should only be called on batch buffers, so we know it isn't in
1257b8e80941Smrg          * use by the GPU at the moment.
1258b8e80941Smrg          */
1259b8e80941Smrg         assert(relocs->relocs[i].offset < from_bo->size);
1260b8e80941Smrg         write_reloc(pool->block_pool.device,
1261b8e80941Smrg                     from_bo->map + relocs->relocs[i].offset,
1262b8e80941Smrg                     relocs->relocs[i].presumed_offset +
1263b8e80941Smrg                     relocs->relocs[i].delta, false);
1264b8e80941Smrg      }
1265b8e80941Smrg   }
1266b8e80941Smrg}
1267b8e80941Smrg
1268b8e80941Smrgstatic void
1269b8e80941Smrganv_reloc_list_apply(struct anv_device *device,
1270b8e80941Smrg                     struct anv_reloc_list *list,
1271b8e80941Smrg                     struct anv_bo *bo,
1272b8e80941Smrg                     bool always_relocate)
1273b8e80941Smrg{
1274b8e80941Smrg   for (size_t i = 0; i < list->num_relocs; i++) {
1275b8e80941Smrg      struct anv_bo *target_bo = list->reloc_bos[i];
1276b8e80941Smrg      if (list->relocs[i].presumed_offset == target_bo->offset &&
1277b8e80941Smrg          !always_relocate)
1278b8e80941Smrg         continue;
1279b8e80941Smrg
1280b8e80941Smrg      void *p = bo->map + list->relocs[i].offset;
1281b8e80941Smrg      write_reloc(device, p, target_bo->offset + list->relocs[i].delta, true);
1282b8e80941Smrg      list->relocs[i].presumed_offset = target_bo->offset;
1283b8e80941Smrg   }
1284b8e80941Smrg}
1285b8e80941Smrg
1286b8e80941Smrg/**
1287b8e80941Smrg * This function applies the relocation for a command buffer and writes the
1288b8e80941Smrg * actual addresses into the buffers as per what we were told by the kernel on
1289b8e80941Smrg * the previous execbuf2 call.  This should be safe to do because, for each
1290b8e80941Smrg * relocated address, we have two cases:
1291b8e80941Smrg *
1292b8e80941Smrg *  1) The target BO is inactive (as seen by the kernel).  In this case, it is
1293b8e80941Smrg *     not in use by the GPU so updating the address is 100% ok.  It won't be
1294b8e80941Smrg *     in-use by the GPU (from our context) again until the next execbuf2
1295b8e80941Smrg *     happens.  If the kernel decides to move it in the next execbuf2, it
1296b8e80941Smrg *     will have to do the relocations itself, but that's ok because it should
1297b8e80941Smrg *     have all of the information needed to do so.
1298b8e80941Smrg *
1299b8e80941Smrg *  2) The target BO is active (as seen by the kernel).  In this case, it
1300b8e80941Smrg *     hasn't moved since the last execbuffer2 call because GTT shuffling
1301b8e80941Smrg *     *only* happens when the BO is idle. (From our perspective, it only
1302b8e80941Smrg *     happens inside the execbuffer2 ioctl, but the shuffling may be
1303b8e80941Smrg *     triggered by another ioctl, with full-ppgtt this is limited to only
1304b8e80941Smrg *     execbuffer2 ioctls on the same context, or memory pressure.)  Since the
1305b8e80941Smrg *     target BO hasn't moved, our anv_bo::offset exactly matches the BO's GTT
1306b8e80941Smrg *     address and the relocated value we are writing into the BO will be the
1307b8e80941Smrg *     same as the value that is already there.
1308b8e80941Smrg *
1309b8e80941Smrg *     There is also a possibility that the target BO is active but the exact
1310b8e80941Smrg *     RENDER_SURFACE_STATE object we are writing the relocation into isn't in
1311b8e80941Smrg *     use.  In this case, the address currently in the RENDER_SURFACE_STATE
1312b8e80941Smrg *     may be stale but it's still safe to write the relocation because that
1313b8e80941Smrg *     particular RENDER_SURFACE_STATE object isn't in-use by the GPU and
1314b8e80941Smrg *     won't be until the next execbuf2 call.
1315b8e80941Smrg *
1316b8e80941Smrg * By doing relocations on the CPU, we can tell the kernel that it doesn't
1317b8e80941Smrg * need to bother.  We want to do this because the surface state buffer is
1318b8e80941Smrg * used by every command buffer so, if the kernel does the relocations, it
1319b8e80941Smrg * will always be busy and the kernel will always stall.  This is also
1320b8e80941Smrg * probably the fastest mechanism for doing relocations since the kernel would
1321b8e80941Smrg * have to make a full copy of all the relocations lists.
1322b8e80941Smrg */
1323b8e80941Smrgstatic bool
1324b8e80941Smrgrelocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer,
1325b8e80941Smrg                    struct anv_execbuf *exec)
1326b8e80941Smrg{
1327b8e80941Smrg   if (!exec->has_relocs)
1328b8e80941Smrg      return true;
1329b8e80941Smrg
1330b8e80941Smrg   static int userspace_relocs = -1;
1331b8e80941Smrg   if (userspace_relocs < 0)
1332b8e80941Smrg      userspace_relocs = env_var_as_boolean("ANV_USERSPACE_RELOCS", true);
1333b8e80941Smrg   if (!userspace_relocs)
1334b8e80941Smrg      return false;
1335b8e80941Smrg
1336b8e80941Smrg   /* First, we have to check to see whether or not we can even do the
1337b8e80941Smrg    * relocation.  New buffers which have never been submitted to the kernel
1338b8e80941Smrg    * don't have a valid offset so we need to let the kernel do relocations so
1339b8e80941Smrg    * that we can get offsets for them.  On future execbuf2 calls, those
1340b8e80941Smrg    * buffers will have offsets and we will be able to skip relocating.
1341b8e80941Smrg    * Invalid offsets are indicated by anv_bo::offset == (uint64_t)-1.
1342b8e80941Smrg    */
1343b8e80941Smrg   for (uint32_t i = 0; i < exec->bo_count; i++) {
1344b8e80941Smrg      if (exec->bos[i]->offset == (uint64_t)-1)
1345b8e80941Smrg         return false;
1346b8e80941Smrg   }
1347b8e80941Smrg
1348b8e80941Smrg   /* Since surface states are shared between command buffers and we don't
1349b8e80941Smrg    * know what order they will be submitted to the kernel, we don't know
1350b8e80941Smrg    * what address is actually written in the surface state object at any
1351b8e80941Smrg    * given time.  The only option is to always relocate them.
1352b8e80941Smrg    */
1353b8e80941Smrg   anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs,
1354b8e80941Smrg                        cmd_buffer->device->surface_state_pool.block_pool.bo,
1355b8e80941Smrg                        true /* always relocate surface states */);
1356b8e80941Smrg
1357b8e80941Smrg   /* Since we own all of the batch buffers, we know what values are stored
1358b8e80941Smrg    * in the relocated addresses and only have to update them if the offsets
1359b8e80941Smrg    * have changed.
1360b8e80941Smrg    */
1361b8e80941Smrg   struct anv_batch_bo **bbo;
1362b8e80941Smrg   u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1363b8e80941Smrg      anv_reloc_list_apply(cmd_buffer->device,
1364b8e80941Smrg                           &(*bbo)->relocs, &(*bbo)->bo, false);
1365b8e80941Smrg   }
1366b8e80941Smrg
1367b8e80941Smrg   for (uint32_t i = 0; i < exec->bo_count; i++)
1368b8e80941Smrg      exec->objects[i].offset = exec->bos[i]->offset;
1369b8e80941Smrg
1370b8e80941Smrg   return true;
1371b8e80941Smrg}
1372b8e80941Smrg
1373b8e80941Smrgstatic VkResult
1374b8e80941Smrgsetup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf,
1375b8e80941Smrg                             struct anv_cmd_buffer *cmd_buffer)
1376b8e80941Smrg{
1377b8e80941Smrg   struct anv_batch *batch = &cmd_buffer->batch;
1378b8e80941Smrg   struct anv_state_pool *ss_pool =
1379b8e80941Smrg      &cmd_buffer->device->surface_state_pool;
1380b8e80941Smrg
1381b8e80941Smrg   adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs,
1382b8e80941Smrg                                      cmd_buffer->last_ss_pool_center);
1383b8e80941Smrg   VkResult result;
1384b8e80941Smrg   struct anv_bo *bo;
1385b8e80941Smrg   if (cmd_buffer->device->instance->physicalDevice.use_softpin) {
1386b8e80941Smrg      anv_block_pool_foreach_bo(bo, &ss_pool->block_pool) {
1387b8e80941Smrg         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1388b8e80941Smrg                                     &cmd_buffer->device->alloc);
1389b8e80941Smrg         if (result != VK_SUCCESS)
1390b8e80941Smrg            return result;
1391b8e80941Smrg      }
1392b8e80941Smrg      /* Add surface dependencies (BOs) to the execbuf */
1393b8e80941Smrg      anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0,
1394b8e80941Smrg                             &cmd_buffer->device->alloc);
1395b8e80941Smrg
1396b8e80941Smrg      /* Add the BOs for all memory objects */
1397b8e80941Smrg      list_for_each_entry(struct anv_device_memory, mem,
1398b8e80941Smrg                          &cmd_buffer->device->memory_objects, link) {
1399b8e80941Smrg         result = anv_execbuf_add_bo(execbuf, mem->bo, NULL, 0,
1400b8e80941Smrg                                     &cmd_buffer->device->alloc);
1401b8e80941Smrg         if (result != VK_SUCCESS)
1402b8e80941Smrg            return result;
1403b8e80941Smrg      }
1404b8e80941Smrg
1405b8e80941Smrg      struct anv_block_pool *pool;
1406b8e80941Smrg      pool = &cmd_buffer->device->dynamic_state_pool.block_pool;
1407b8e80941Smrg      anv_block_pool_foreach_bo(bo, pool) {
1408b8e80941Smrg         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1409b8e80941Smrg                                     &cmd_buffer->device->alloc);
1410b8e80941Smrg         if (result != VK_SUCCESS)
1411b8e80941Smrg            return result;
1412b8e80941Smrg      }
1413b8e80941Smrg
1414b8e80941Smrg      pool = &cmd_buffer->device->instruction_state_pool.block_pool;
1415b8e80941Smrg      anv_block_pool_foreach_bo(bo, pool) {
1416b8e80941Smrg         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1417b8e80941Smrg                                     &cmd_buffer->device->alloc);
1418b8e80941Smrg         if (result != VK_SUCCESS)
1419b8e80941Smrg            return result;
1420b8e80941Smrg      }
1421b8e80941Smrg
1422b8e80941Smrg      pool = &cmd_buffer->device->binding_table_pool.block_pool;
1423b8e80941Smrg      anv_block_pool_foreach_bo(bo, pool) {
1424b8e80941Smrg         result = anv_execbuf_add_bo(execbuf, bo, NULL, 0,
1425b8e80941Smrg                                     &cmd_buffer->device->alloc);
1426b8e80941Smrg         if (result != VK_SUCCESS)
1427b8e80941Smrg            return result;
1428b8e80941Smrg      }
1429b8e80941Smrg   } else {
1430b8e80941Smrg      /* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs
1431b8e80941Smrg       * will get added automatically by processing relocations on the batch
1432b8e80941Smrg       * buffer.  We have to add the surface state BO manually because it has
1433b8e80941Smrg       * relocations of its own that we need to be sure are processsed.
1434b8e80941Smrg       */
1435b8e80941Smrg      result = anv_execbuf_add_bo(execbuf, ss_pool->block_pool.bo,
1436b8e80941Smrg                                  &cmd_buffer->surface_relocs, 0,
1437b8e80941Smrg                                  &cmd_buffer->device->alloc);
1438b8e80941Smrg      if (result != VK_SUCCESS)
1439b8e80941Smrg         return result;
1440b8e80941Smrg   }
1441b8e80941Smrg
1442b8e80941Smrg   /* First, we walk over all of the bos we've seen and add them and their
1443b8e80941Smrg    * relocations to the validate list.
1444b8e80941Smrg    */
1445b8e80941Smrg   struct anv_batch_bo **bbo;
1446b8e80941Smrg   u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1447b8e80941Smrg      adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs,
1448b8e80941Smrg                                       cmd_buffer->last_ss_pool_center);
1449b8e80941Smrg
1450b8e80941Smrg      result = anv_execbuf_add_bo(execbuf, &(*bbo)->bo, &(*bbo)->relocs, 0,
1451b8e80941Smrg                                  &cmd_buffer->device->alloc);
1452b8e80941Smrg      if (result != VK_SUCCESS)
1453b8e80941Smrg         return result;
1454b8e80941Smrg   }
1455b8e80941Smrg
1456b8e80941Smrg   /* Now that we've adjusted all of the surface state relocations, we need to
1457b8e80941Smrg    * record the surface state pool center so future executions of the command
1458b8e80941Smrg    * buffer can adjust correctly.
1459b8e80941Smrg    */
1460b8e80941Smrg   cmd_buffer->last_ss_pool_center = ss_pool->block_pool.center_bo_offset;
1461b8e80941Smrg
1462b8e80941Smrg   struct anv_batch_bo *first_batch_bo =
1463b8e80941Smrg      list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link);
1464b8e80941Smrg
1465b8e80941Smrg   /* The kernel requires that the last entry in the validation list be the
1466b8e80941Smrg    * batch buffer to execute.  We can simply swap the element
1467b8e80941Smrg    * corresponding to the first batch_bo in the chain with the last
1468b8e80941Smrg    * element in the list.
1469b8e80941Smrg    */
1470b8e80941Smrg   if (first_batch_bo->bo.index != execbuf->bo_count - 1) {
1471b8e80941Smrg      uint32_t idx = first_batch_bo->bo.index;
1472b8e80941Smrg      uint32_t last_idx = execbuf->bo_count - 1;
1473b8e80941Smrg
1474b8e80941Smrg      struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx];
1475b8e80941Smrg      assert(execbuf->bos[idx] == &first_batch_bo->bo);
1476b8e80941Smrg
1477b8e80941Smrg      execbuf->objects[idx] = execbuf->objects[last_idx];
1478b8e80941Smrg      execbuf->bos[idx] = execbuf->bos[last_idx];
1479b8e80941Smrg      execbuf->bos[idx]->index = idx;
1480b8e80941Smrg
1481b8e80941Smrg      execbuf->objects[last_idx] = tmp_obj;
1482b8e80941Smrg      execbuf->bos[last_idx] = &first_batch_bo->bo;
1483b8e80941Smrg      first_batch_bo->bo.index = last_idx;
1484b8e80941Smrg   }
1485b8e80941Smrg
1486b8e80941Smrg   /* If we are pinning our BOs, we shouldn't have to relocate anything */
1487b8e80941Smrg   if (cmd_buffer->device->instance->physicalDevice.use_softpin)
1488b8e80941Smrg      assert(!execbuf->has_relocs);
1489b8e80941Smrg
1490b8e80941Smrg   /* Now we go through and fixup all of the relocation lists to point to
1491b8e80941Smrg    * the correct indices in the object array.  We have to do this after we
1492b8e80941Smrg    * reorder the list above as some of the indices may have changed.
1493b8e80941Smrg    */
1494b8e80941Smrg   if (execbuf->has_relocs) {
1495b8e80941Smrg      u_vector_foreach(bbo, &cmd_buffer->seen_bbos)
1496b8e80941Smrg         anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs);
1497b8e80941Smrg
1498b8e80941Smrg      anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
1499b8e80941Smrg   }
1500b8e80941Smrg
1501b8e80941Smrg   if (!cmd_buffer->device->info.has_llc) {
1502b8e80941Smrg      __builtin_ia32_mfence();
1503b8e80941Smrg      u_vector_foreach(bbo, &cmd_buffer->seen_bbos) {
1504b8e80941Smrg         for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE)
1505b8e80941Smrg            __builtin_ia32_clflush((*bbo)->bo.map + i);
1506b8e80941Smrg      }
1507b8e80941Smrg   }
1508b8e80941Smrg
1509b8e80941Smrg   execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
1510b8e80941Smrg      .buffers_ptr = (uintptr_t) execbuf->objects,
1511b8e80941Smrg      .buffer_count = execbuf->bo_count,
1512b8e80941Smrg      .batch_start_offset = 0,
1513b8e80941Smrg      .batch_len = batch->next - batch->start,
1514b8e80941Smrg      .cliprects_ptr = 0,
1515b8e80941Smrg      .num_cliprects = 0,
1516b8e80941Smrg      .DR1 = 0,
1517b8e80941Smrg      .DR4 = 0,
1518b8e80941Smrg      .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
1519b8e80941Smrg      .rsvd1 = cmd_buffer->device->context_id,
1520b8e80941Smrg      .rsvd2 = 0,
1521b8e80941Smrg   };
1522b8e80941Smrg
1523b8e80941Smrg   if (relocate_cmd_buffer(cmd_buffer, execbuf)) {
1524b8e80941Smrg      /* If we were able to successfully relocate everything, tell the kernel
1525b8e80941Smrg       * that it can skip doing relocations. The requirement for using
1526b8e80941Smrg       * NO_RELOC is:
1527b8e80941Smrg       *
1528b8e80941Smrg       *  1) The addresses written in the objects must match the corresponding
1529b8e80941Smrg       *     reloc.presumed_offset which in turn must match the corresponding
1530b8e80941Smrg       *     execobject.offset.
1531b8e80941Smrg       *
1532b8e80941Smrg       *  2) To avoid stalling, execobject.offset should match the current
1533b8e80941Smrg       *     address of that object within the active context.
1534b8e80941Smrg       *
1535b8e80941Smrg       * In order to satisfy all of the invariants that make userspace
1536b8e80941Smrg       * relocations to be safe (see relocate_cmd_buffer()), we need to
1537b8e80941Smrg       * further ensure that the addresses we use match those used by the
1538b8e80941Smrg       * kernel for the most recent execbuf2.
1539b8e80941Smrg       *
1540b8e80941Smrg       * The kernel may still choose to do relocations anyway if something has
1541b8e80941Smrg       * moved in the GTT. In this case, the relocation list still needs to be
1542b8e80941Smrg       * valid.  All relocations on the batch buffers are already valid and
1543b8e80941Smrg       * kept up-to-date.  For surface state relocations, by applying the
1544b8e80941Smrg       * relocations in relocate_cmd_buffer, we ensured that the address in
1545b8e80941Smrg       * the RENDER_SURFACE_STATE matches presumed_offset, so it should be
1546b8e80941Smrg       * safe for the kernel to relocate them as needed.
1547b8e80941Smrg       */
1548b8e80941Smrg      execbuf->execbuf.flags |= I915_EXEC_NO_RELOC;
1549b8e80941Smrg   } else {
1550b8e80941Smrg      /* In the case where we fall back to doing kernel relocations, we need
1551b8e80941Smrg       * to ensure that the relocation list is valid.  All relocations on the
1552b8e80941Smrg       * batch buffers are already valid and kept up-to-date.  Since surface
1553b8e80941Smrg       * states are shared between command buffers and we don't know what
1554b8e80941Smrg       * order they will be submitted to the kernel, we don't know what
1555b8e80941Smrg       * address is actually written in the surface state object at any given
1556b8e80941Smrg       * time.  The only option is to set a bogus presumed offset and let the
1557b8e80941Smrg       * kernel relocate them.
1558b8e80941Smrg       */
1559b8e80941Smrg      for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++)
1560b8e80941Smrg         cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1;
1561b8e80941Smrg   }
1562b8e80941Smrg
1563b8e80941Smrg   return VK_SUCCESS;
1564b8e80941Smrg}
1565b8e80941Smrg
1566b8e80941Smrgstatic VkResult
1567b8e80941Smrgsetup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device)
1568b8e80941Smrg{
1569b8e80941Smrg   VkResult result = anv_execbuf_add_bo(execbuf, &device->trivial_batch_bo,
1570b8e80941Smrg                                        NULL, 0, &device->alloc);
1571b8e80941Smrg   if (result != VK_SUCCESS)
1572b8e80941Smrg      return result;
1573b8e80941Smrg
1574b8e80941Smrg   execbuf->execbuf = (struct drm_i915_gem_execbuffer2) {
1575b8e80941Smrg      .buffers_ptr = (uintptr_t) execbuf->objects,
1576b8e80941Smrg      .buffer_count = execbuf->bo_count,
1577b8e80941Smrg      .batch_start_offset = 0,
1578b8e80941Smrg      .batch_len = 8, /* GEN7_MI_BATCH_BUFFER_END and NOOP */
1579b8e80941Smrg      .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER,
1580b8e80941Smrg      .rsvd1 = device->context_id,
1581b8e80941Smrg      .rsvd2 = 0,
1582b8e80941Smrg   };
1583b8e80941Smrg
1584b8e80941Smrg   return VK_SUCCESS;
1585b8e80941Smrg}
1586b8e80941Smrg
1587b8e80941SmrgVkResult
1588b8e80941Smrganv_cmd_buffer_execbuf(struct anv_device *device,
1589b8e80941Smrg                       struct anv_cmd_buffer *cmd_buffer,
1590b8e80941Smrg                       const VkSemaphore *in_semaphores,
1591b8e80941Smrg                       uint32_t num_in_semaphores,
1592b8e80941Smrg                       const VkSemaphore *out_semaphores,
1593b8e80941Smrg                       uint32_t num_out_semaphores,
1594b8e80941Smrg                       VkFence _fence)
1595b8e80941Smrg{
1596b8e80941Smrg   ANV_FROM_HANDLE(anv_fence, fence, _fence);
1597b8e80941Smrg
1598b8e80941Smrg   struct anv_execbuf execbuf;
1599b8e80941Smrg   anv_execbuf_init(&execbuf);
1600b8e80941Smrg
1601b8e80941Smrg   int in_fence = -1;
1602b8e80941Smrg   VkResult result = VK_SUCCESS;
1603b8e80941Smrg   for (uint32_t i = 0; i < num_in_semaphores; i++) {
1604b8e80941Smrg      ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
1605b8e80941Smrg      struct anv_semaphore_impl *impl =
1606b8e80941Smrg         semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
1607b8e80941Smrg         &semaphore->temporary : &semaphore->permanent;
1608b8e80941Smrg
1609b8e80941Smrg      switch (impl->type) {
1610b8e80941Smrg      case ANV_SEMAPHORE_TYPE_BO:
1611b8e80941Smrg         result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
1612b8e80941Smrg                                     0, &device->alloc);
1613b8e80941Smrg         if (result != VK_SUCCESS)
1614b8e80941Smrg            return result;
1615b8e80941Smrg         break;
1616b8e80941Smrg
1617b8e80941Smrg      case ANV_SEMAPHORE_TYPE_SYNC_FILE:
1618b8e80941Smrg         if (in_fence == -1) {
1619b8e80941Smrg            in_fence = impl->fd;
1620b8e80941Smrg         } else {
1621b8e80941Smrg            int merge = anv_gem_sync_file_merge(device, in_fence, impl->fd);
1622b8e80941Smrg            if (merge == -1)
1623b8e80941Smrg               return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE);
1624b8e80941Smrg
1625b8e80941Smrg            close(impl->fd);
1626b8e80941Smrg            close(in_fence);
1627b8e80941Smrg            in_fence = merge;
1628b8e80941Smrg         }
1629b8e80941Smrg
1630b8e80941Smrg         impl->fd = -1;
1631b8e80941Smrg         break;
1632b8e80941Smrg
1633b8e80941Smrg      case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
1634b8e80941Smrg         result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
1635b8e80941Smrg                                          I915_EXEC_FENCE_WAIT,
1636b8e80941Smrg                                          &device->alloc);
1637b8e80941Smrg         if (result != VK_SUCCESS)
1638b8e80941Smrg            return result;
1639b8e80941Smrg         break;
1640b8e80941Smrg
1641b8e80941Smrg      default:
1642b8e80941Smrg         break;
1643b8e80941Smrg      }
1644b8e80941Smrg   }
1645b8e80941Smrg
1646b8e80941Smrg   bool need_out_fence = false;
1647b8e80941Smrg   for (uint32_t i = 0; i < num_out_semaphores; i++) {
1648b8e80941Smrg      ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
1649b8e80941Smrg
1650b8e80941Smrg      /* Under most circumstances, out fences won't be temporary.  However,
1651b8e80941Smrg       * the spec does allow it for opaque_fd.  From the Vulkan 1.0.53 spec:
1652b8e80941Smrg       *
1653b8e80941Smrg       *    "If the import is temporary, the implementation must restore the
1654b8e80941Smrg       *    semaphore to its prior permanent state after submitting the next
1655b8e80941Smrg       *    semaphore wait operation."
1656b8e80941Smrg       *
1657b8e80941Smrg       * The spec says nothing whatsoever about signal operations on
1658b8e80941Smrg       * temporarily imported semaphores so it appears they are allowed.
1659b8e80941Smrg       * There are also CTS tests that require this to work.
1660b8e80941Smrg       */
1661b8e80941Smrg      struct anv_semaphore_impl *impl =
1662b8e80941Smrg         semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ?
1663b8e80941Smrg         &semaphore->temporary : &semaphore->permanent;
1664b8e80941Smrg
1665b8e80941Smrg      switch (impl->type) {
1666b8e80941Smrg      case ANV_SEMAPHORE_TYPE_BO:
1667b8e80941Smrg         result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL,
1668b8e80941Smrg                                     EXEC_OBJECT_WRITE, &device->alloc);
1669b8e80941Smrg         if (result != VK_SUCCESS)
1670b8e80941Smrg            return result;
1671b8e80941Smrg         break;
1672b8e80941Smrg
1673b8e80941Smrg      case ANV_SEMAPHORE_TYPE_SYNC_FILE:
1674b8e80941Smrg         need_out_fence = true;
1675b8e80941Smrg         break;
1676b8e80941Smrg
1677b8e80941Smrg      case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ:
1678b8e80941Smrg         result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
1679b8e80941Smrg                                          I915_EXEC_FENCE_SIGNAL,
1680b8e80941Smrg                                          &device->alloc);
1681b8e80941Smrg         if (result != VK_SUCCESS)
1682b8e80941Smrg            return result;
1683b8e80941Smrg         break;
1684b8e80941Smrg
1685b8e80941Smrg      default:
1686b8e80941Smrg         break;
1687b8e80941Smrg      }
1688b8e80941Smrg   }
1689b8e80941Smrg
1690b8e80941Smrg   if (fence) {
1691b8e80941Smrg      /* Under most circumstances, out fences won't be temporary.  However,
1692b8e80941Smrg       * the spec does allow it for opaque_fd.  From the Vulkan 1.0.53 spec:
1693b8e80941Smrg       *
1694b8e80941Smrg       *    "If the import is temporary, the implementation must restore the
1695b8e80941Smrg       *    semaphore to its prior permanent state after submitting the next
1696b8e80941Smrg       *    semaphore wait operation."
1697b8e80941Smrg       *
1698b8e80941Smrg       * The spec says nothing whatsoever about signal operations on
1699b8e80941Smrg       * temporarily imported semaphores so it appears they are allowed.
1700b8e80941Smrg       * There are also CTS tests that require this to work.
1701b8e80941Smrg       */
1702b8e80941Smrg      struct anv_fence_impl *impl =
1703b8e80941Smrg         fence->temporary.type != ANV_FENCE_TYPE_NONE ?
1704b8e80941Smrg         &fence->temporary : &fence->permanent;
1705b8e80941Smrg
1706b8e80941Smrg      switch (impl->type) {
1707b8e80941Smrg      case ANV_FENCE_TYPE_BO:
1708b8e80941Smrg         result = anv_execbuf_add_bo(&execbuf, &impl->bo.bo, NULL,
1709b8e80941Smrg                                     EXEC_OBJECT_WRITE, &device->alloc);
1710b8e80941Smrg         if (result != VK_SUCCESS)
1711b8e80941Smrg            return result;
1712b8e80941Smrg         break;
1713b8e80941Smrg
1714b8e80941Smrg      case ANV_FENCE_TYPE_SYNCOBJ:
1715b8e80941Smrg         result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj,
1716b8e80941Smrg                                          I915_EXEC_FENCE_SIGNAL,
1717b8e80941Smrg                                          &device->alloc);
1718b8e80941Smrg         if (result != VK_SUCCESS)
1719b8e80941Smrg            return result;
1720b8e80941Smrg         break;
1721b8e80941Smrg
1722b8e80941Smrg      default:
1723b8e80941Smrg         unreachable("Invalid fence type");
1724b8e80941Smrg      }
1725b8e80941Smrg   }
1726b8e80941Smrg
1727b8e80941Smrg   if (cmd_buffer) {
1728b8e80941Smrg      if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) {
1729b8e80941Smrg         struct anv_batch_bo **bo = u_vector_head(&cmd_buffer->seen_bbos);
1730b8e80941Smrg
1731b8e80941Smrg         device->cmd_buffer_being_decoded = cmd_buffer;
1732b8e80941Smrg         gen_print_batch(&device->decoder_ctx, (*bo)->bo.map,
1733b8e80941Smrg                         (*bo)->bo.size, (*bo)->bo.offset, false);
1734b8e80941Smrg         device->cmd_buffer_being_decoded = NULL;
1735b8e80941Smrg      }
1736b8e80941Smrg
1737b8e80941Smrg      result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer);
1738b8e80941Smrg   } else {
1739b8e80941Smrg      result = setup_empty_execbuf(&execbuf, device);
1740b8e80941Smrg   }
1741b8e80941Smrg
1742b8e80941Smrg   if (result != VK_SUCCESS)
1743b8e80941Smrg      return result;
1744b8e80941Smrg
1745b8e80941Smrg   if (execbuf.fence_count > 0) {
1746b8e80941Smrg      assert(device->instance->physicalDevice.has_syncobj);
1747b8e80941Smrg      execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY;
1748b8e80941Smrg      execbuf.execbuf.num_cliprects = execbuf.fence_count;
1749b8e80941Smrg      execbuf.execbuf.cliprects_ptr = (uintptr_t) execbuf.fences;
1750b8e80941Smrg   }
1751b8e80941Smrg
1752b8e80941Smrg   if (in_fence != -1) {
1753b8e80941Smrg      execbuf.execbuf.flags |= I915_EXEC_FENCE_IN;
1754b8e80941Smrg      execbuf.execbuf.rsvd2 |= (uint32_t)in_fence;
1755b8e80941Smrg   }
1756b8e80941Smrg
1757b8e80941Smrg   if (need_out_fence)
1758b8e80941Smrg      execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT;
1759b8e80941Smrg
1760b8e80941Smrg   result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos);
1761b8e80941Smrg
1762b8e80941Smrg   /* Execbuf does not consume the in_fence.  It's our job to close it. */
1763b8e80941Smrg   if (in_fence != -1)
1764b8e80941Smrg      close(in_fence);
1765b8e80941Smrg
1766b8e80941Smrg   for (uint32_t i = 0; i < num_in_semaphores; i++) {
1767b8e80941Smrg      ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]);
1768b8e80941Smrg      /* From the Vulkan 1.0.53 spec:
1769b8e80941Smrg       *
1770b8e80941Smrg       *    "If the import is temporary, the implementation must restore the
1771b8e80941Smrg       *    semaphore to its prior permanent state after submitting the next
1772b8e80941Smrg       *    semaphore wait operation."
1773b8e80941Smrg       *
1774b8e80941Smrg       * This has to happen after the execbuf in case we close any syncobjs in
1775b8e80941Smrg       * the process.
1776b8e80941Smrg       */
1777b8e80941Smrg      anv_semaphore_reset_temporary(device, semaphore);
1778b8e80941Smrg   }
1779b8e80941Smrg
1780b8e80941Smrg   if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) {
1781b8e80941Smrg      /* BO fences can't be shared, so they can't be temporary. */
1782b8e80941Smrg      assert(fence->temporary.type == ANV_FENCE_TYPE_NONE);
1783b8e80941Smrg
1784b8e80941Smrg      /* Once the execbuf has returned, we need to set the fence state to
1785b8e80941Smrg       * SUBMITTED.  We can't do this before calling execbuf because
1786b8e80941Smrg       * anv_GetFenceStatus does take the global device lock before checking
1787b8e80941Smrg       * fence->state.
1788b8e80941Smrg       *
1789b8e80941Smrg       * We set the fence state to SUBMITTED regardless of whether or not the
1790b8e80941Smrg       * execbuf succeeds because we need to ensure that vkWaitForFences() and
1791b8e80941Smrg       * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or
1792b8e80941Smrg       * VK_SUCCESS) in a finite amount of time even if execbuf fails.
1793b8e80941Smrg       */
1794b8e80941Smrg      fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED;
1795b8e80941Smrg   }
1796b8e80941Smrg
1797b8e80941Smrg   if (result == VK_SUCCESS && need_out_fence) {
1798b8e80941Smrg      int out_fence = execbuf.execbuf.rsvd2 >> 32;
1799b8e80941Smrg      for (uint32_t i = 0; i < num_out_semaphores; i++) {
1800b8e80941Smrg         ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]);
1801b8e80941Smrg         /* Out fences can't have temporary state because that would imply
1802b8e80941Smrg          * that we imported a sync file and are trying to signal it.
1803b8e80941Smrg          */
1804b8e80941Smrg         assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE);
1805b8e80941Smrg         struct anv_semaphore_impl *impl = &semaphore->permanent;
1806b8e80941Smrg
1807b8e80941Smrg         if (impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE) {
1808b8e80941Smrg            assert(impl->fd == -1);
1809b8e80941Smrg            impl->fd = dup(out_fence);
1810b8e80941Smrg         }
1811b8e80941Smrg      }
1812b8e80941Smrg      close(out_fence);
1813b8e80941Smrg   }
1814b8e80941Smrg
1815b8e80941Smrg   anv_execbuf_finish(&execbuf, &device->alloc);
1816b8e80941Smrg
1817b8e80941Smrg   return result;
1818b8e80941Smrg}
1819