1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include <assert.h> 25b8e80941Smrg#include <stdbool.h> 26b8e80941Smrg#include <string.h> 27b8e80941Smrg#include <unistd.h> 28b8e80941Smrg#include <fcntl.h> 29b8e80941Smrg 30b8e80941Smrg#include "anv_private.h" 31b8e80941Smrg 32b8e80941Smrg#include "genxml/gen8_pack.h" 33b8e80941Smrg 34b8e80941Smrg#include "util/debug.h" 35b8e80941Smrg 36b8e80941Smrg/** \file anv_batch_chain.c 37b8e80941Smrg * 38b8e80941Smrg * This file contains functions related to anv_cmd_buffer as a data 39b8e80941Smrg * structure. This involves everything required to create and destroy 40b8e80941Smrg * the actual batch buffers as well as link them together and handle 41b8e80941Smrg * relocations and surface state. It specifically does *not* contain any 42b8e80941Smrg * handling of actual vkCmd calls beyond vkCmdExecuteCommands. 43b8e80941Smrg */ 44b8e80941Smrg 45b8e80941Smrg/*-----------------------------------------------------------------------* 46b8e80941Smrg * Functions related to anv_reloc_list 47b8e80941Smrg *-----------------------------------------------------------------------*/ 48b8e80941Smrg 49b8e80941Smrgstatic VkResult 50b8e80941Smrganv_reloc_list_init_clone(struct anv_reloc_list *list, 51b8e80941Smrg const VkAllocationCallbacks *alloc, 52b8e80941Smrg const struct anv_reloc_list *other_list) 53b8e80941Smrg{ 54b8e80941Smrg if (other_list) { 55b8e80941Smrg list->num_relocs = other_list->num_relocs; 56b8e80941Smrg list->array_length = other_list->array_length; 57b8e80941Smrg } else { 58b8e80941Smrg list->num_relocs = 0; 59b8e80941Smrg list->array_length = 256; 60b8e80941Smrg } 61b8e80941Smrg 62b8e80941Smrg list->relocs = 63b8e80941Smrg vk_alloc(alloc, list->array_length * sizeof(*list->relocs), 8, 64b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 65b8e80941Smrg 66b8e80941Smrg if (list->relocs == NULL) 67b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 68b8e80941Smrg 69b8e80941Smrg list->reloc_bos = 70b8e80941Smrg vk_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8, 71b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 72b8e80941Smrg 73b8e80941Smrg if (list->reloc_bos == NULL) { 74b8e80941Smrg vk_free(alloc, list->relocs); 75b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 76b8e80941Smrg } 77b8e80941Smrg 78b8e80941Smrg list->deps = _mesa_pointer_set_create(NULL); 79b8e80941Smrg 80b8e80941Smrg if (!list->deps) { 81b8e80941Smrg vk_free(alloc, list->relocs); 82b8e80941Smrg vk_free(alloc, list->reloc_bos); 83b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 84b8e80941Smrg } 85b8e80941Smrg 86b8e80941Smrg if (other_list) { 87b8e80941Smrg memcpy(list->relocs, other_list->relocs, 88b8e80941Smrg list->array_length * sizeof(*list->relocs)); 89b8e80941Smrg memcpy(list->reloc_bos, other_list->reloc_bos, 90b8e80941Smrg list->array_length * sizeof(*list->reloc_bos)); 91b8e80941Smrg set_foreach(other_list->deps, entry) { 92b8e80941Smrg _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key); 93b8e80941Smrg } 94b8e80941Smrg } 95b8e80941Smrg 96b8e80941Smrg return VK_SUCCESS; 97b8e80941Smrg} 98b8e80941Smrg 99b8e80941SmrgVkResult 100b8e80941Smrganv_reloc_list_init(struct anv_reloc_list *list, 101b8e80941Smrg const VkAllocationCallbacks *alloc) 102b8e80941Smrg{ 103b8e80941Smrg return anv_reloc_list_init_clone(list, alloc, NULL); 104b8e80941Smrg} 105b8e80941Smrg 106b8e80941Smrgvoid 107b8e80941Smrganv_reloc_list_finish(struct anv_reloc_list *list, 108b8e80941Smrg const VkAllocationCallbacks *alloc) 109b8e80941Smrg{ 110b8e80941Smrg vk_free(alloc, list->relocs); 111b8e80941Smrg vk_free(alloc, list->reloc_bos); 112b8e80941Smrg _mesa_set_destroy(list->deps, NULL); 113b8e80941Smrg} 114b8e80941Smrg 115b8e80941Smrgstatic VkResult 116b8e80941Smrganv_reloc_list_grow(struct anv_reloc_list *list, 117b8e80941Smrg const VkAllocationCallbacks *alloc, 118b8e80941Smrg size_t num_additional_relocs) 119b8e80941Smrg{ 120b8e80941Smrg if (list->num_relocs + num_additional_relocs <= list->array_length) 121b8e80941Smrg return VK_SUCCESS; 122b8e80941Smrg 123b8e80941Smrg size_t new_length = list->array_length * 2; 124b8e80941Smrg while (new_length < list->num_relocs + num_additional_relocs) 125b8e80941Smrg new_length *= 2; 126b8e80941Smrg 127b8e80941Smrg struct drm_i915_gem_relocation_entry *new_relocs = 128b8e80941Smrg vk_alloc(alloc, new_length * sizeof(*list->relocs), 8, 129b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 130b8e80941Smrg if (new_relocs == NULL) 131b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 132b8e80941Smrg 133b8e80941Smrg struct anv_bo **new_reloc_bos = 134b8e80941Smrg vk_alloc(alloc, new_length * sizeof(*list->reloc_bos), 8, 135b8e80941Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 136b8e80941Smrg if (new_reloc_bos == NULL) { 137b8e80941Smrg vk_free(alloc, new_relocs); 138b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 139b8e80941Smrg } 140b8e80941Smrg 141b8e80941Smrg memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs)); 142b8e80941Smrg memcpy(new_reloc_bos, list->reloc_bos, 143b8e80941Smrg list->num_relocs * sizeof(*list->reloc_bos)); 144b8e80941Smrg 145b8e80941Smrg vk_free(alloc, list->relocs); 146b8e80941Smrg vk_free(alloc, list->reloc_bos); 147b8e80941Smrg 148b8e80941Smrg list->array_length = new_length; 149b8e80941Smrg list->relocs = new_relocs; 150b8e80941Smrg list->reloc_bos = new_reloc_bos; 151b8e80941Smrg 152b8e80941Smrg return VK_SUCCESS; 153b8e80941Smrg} 154b8e80941Smrg 155b8e80941SmrgVkResult 156b8e80941Smrganv_reloc_list_add(struct anv_reloc_list *list, 157b8e80941Smrg const VkAllocationCallbacks *alloc, 158b8e80941Smrg uint32_t offset, struct anv_bo *target_bo, uint32_t delta) 159b8e80941Smrg{ 160b8e80941Smrg struct drm_i915_gem_relocation_entry *entry; 161b8e80941Smrg int index; 162b8e80941Smrg 163b8e80941Smrg if (target_bo->flags & EXEC_OBJECT_PINNED) { 164b8e80941Smrg _mesa_set_add(list->deps, target_bo); 165b8e80941Smrg return VK_SUCCESS; 166b8e80941Smrg } 167b8e80941Smrg 168b8e80941Smrg VkResult result = anv_reloc_list_grow(list, alloc, 1); 169b8e80941Smrg if (result != VK_SUCCESS) 170b8e80941Smrg return result; 171b8e80941Smrg 172b8e80941Smrg /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ 173b8e80941Smrg index = list->num_relocs++; 174b8e80941Smrg list->reloc_bos[index] = target_bo; 175b8e80941Smrg entry = &list->relocs[index]; 176b8e80941Smrg entry->target_handle = target_bo->gem_handle; 177b8e80941Smrg entry->delta = delta; 178b8e80941Smrg entry->offset = offset; 179b8e80941Smrg entry->presumed_offset = target_bo->offset; 180b8e80941Smrg entry->read_domains = 0; 181b8e80941Smrg entry->write_domain = 0; 182b8e80941Smrg VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); 183b8e80941Smrg 184b8e80941Smrg return VK_SUCCESS; 185b8e80941Smrg} 186b8e80941Smrg 187b8e80941Smrgstatic VkResult 188b8e80941Smrganv_reloc_list_append(struct anv_reloc_list *list, 189b8e80941Smrg const VkAllocationCallbacks *alloc, 190b8e80941Smrg struct anv_reloc_list *other, uint32_t offset) 191b8e80941Smrg{ 192b8e80941Smrg VkResult result = anv_reloc_list_grow(list, alloc, other->num_relocs); 193b8e80941Smrg if (result != VK_SUCCESS) 194b8e80941Smrg return result; 195b8e80941Smrg 196b8e80941Smrg memcpy(&list->relocs[list->num_relocs], &other->relocs[0], 197b8e80941Smrg other->num_relocs * sizeof(other->relocs[0])); 198b8e80941Smrg memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], 199b8e80941Smrg other->num_relocs * sizeof(other->reloc_bos[0])); 200b8e80941Smrg 201b8e80941Smrg for (uint32_t i = 0; i < other->num_relocs; i++) 202b8e80941Smrg list->relocs[i + list->num_relocs].offset += offset; 203b8e80941Smrg 204b8e80941Smrg list->num_relocs += other->num_relocs; 205b8e80941Smrg 206b8e80941Smrg set_foreach(other->deps, entry) { 207b8e80941Smrg _mesa_set_add_pre_hashed(list->deps, entry->hash, entry->key); 208b8e80941Smrg } 209b8e80941Smrg 210b8e80941Smrg return VK_SUCCESS; 211b8e80941Smrg} 212b8e80941Smrg 213b8e80941Smrg/*-----------------------------------------------------------------------* 214b8e80941Smrg * Functions related to anv_batch 215b8e80941Smrg *-----------------------------------------------------------------------*/ 216b8e80941Smrg 217b8e80941Smrgvoid * 218b8e80941Smrganv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) 219b8e80941Smrg{ 220b8e80941Smrg if (batch->next + num_dwords * 4 > batch->end) { 221b8e80941Smrg VkResult result = batch->extend_cb(batch, batch->user_data); 222b8e80941Smrg if (result != VK_SUCCESS) { 223b8e80941Smrg anv_batch_set_error(batch, result); 224b8e80941Smrg return NULL; 225b8e80941Smrg } 226b8e80941Smrg } 227b8e80941Smrg 228b8e80941Smrg void *p = batch->next; 229b8e80941Smrg 230b8e80941Smrg batch->next += num_dwords * 4; 231b8e80941Smrg assert(batch->next <= batch->end); 232b8e80941Smrg 233b8e80941Smrg return p; 234b8e80941Smrg} 235b8e80941Smrg 236b8e80941Smrguint64_t 237b8e80941Smrganv_batch_emit_reloc(struct anv_batch *batch, 238b8e80941Smrg void *location, struct anv_bo *bo, uint32_t delta) 239b8e80941Smrg{ 240b8e80941Smrg VkResult result = anv_reloc_list_add(batch->relocs, batch->alloc, 241b8e80941Smrg location - batch->start, bo, delta); 242b8e80941Smrg if (result != VK_SUCCESS) { 243b8e80941Smrg anv_batch_set_error(batch, result); 244b8e80941Smrg return 0; 245b8e80941Smrg } 246b8e80941Smrg 247b8e80941Smrg return bo->offset + delta; 248b8e80941Smrg} 249b8e80941Smrg 250b8e80941Smrgvoid 251b8e80941Smrganv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) 252b8e80941Smrg{ 253b8e80941Smrg uint32_t size, offset; 254b8e80941Smrg 255b8e80941Smrg size = other->next - other->start; 256b8e80941Smrg assert(size % 4 == 0); 257b8e80941Smrg 258b8e80941Smrg if (batch->next + size > batch->end) { 259b8e80941Smrg VkResult result = batch->extend_cb(batch, batch->user_data); 260b8e80941Smrg if (result != VK_SUCCESS) { 261b8e80941Smrg anv_batch_set_error(batch, result); 262b8e80941Smrg return; 263b8e80941Smrg } 264b8e80941Smrg } 265b8e80941Smrg 266b8e80941Smrg assert(batch->next + size <= batch->end); 267b8e80941Smrg 268b8e80941Smrg VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); 269b8e80941Smrg memcpy(batch->next, other->start, size); 270b8e80941Smrg 271b8e80941Smrg offset = batch->next - batch->start; 272b8e80941Smrg VkResult result = anv_reloc_list_append(batch->relocs, batch->alloc, 273b8e80941Smrg other->relocs, offset); 274b8e80941Smrg if (result != VK_SUCCESS) { 275b8e80941Smrg anv_batch_set_error(batch, result); 276b8e80941Smrg return; 277b8e80941Smrg } 278b8e80941Smrg 279b8e80941Smrg batch->next += size; 280b8e80941Smrg} 281b8e80941Smrg 282b8e80941Smrg/*-----------------------------------------------------------------------* 283b8e80941Smrg * Functions related to anv_batch_bo 284b8e80941Smrg *-----------------------------------------------------------------------*/ 285b8e80941Smrg 286b8e80941Smrgstatic VkResult 287b8e80941Smrganv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, 288b8e80941Smrg struct anv_batch_bo **bbo_out) 289b8e80941Smrg{ 290b8e80941Smrg VkResult result; 291b8e80941Smrg 292b8e80941Smrg struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), 293b8e80941Smrg 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 294b8e80941Smrg if (bbo == NULL) 295b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 296b8e80941Smrg 297b8e80941Smrg result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, 298b8e80941Smrg ANV_CMD_BUFFER_BATCH_SIZE); 299b8e80941Smrg if (result != VK_SUCCESS) 300b8e80941Smrg goto fail_alloc; 301b8e80941Smrg 302b8e80941Smrg result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc); 303b8e80941Smrg if (result != VK_SUCCESS) 304b8e80941Smrg goto fail_bo_alloc; 305b8e80941Smrg 306b8e80941Smrg *bbo_out = bbo; 307b8e80941Smrg 308b8e80941Smrg return VK_SUCCESS; 309b8e80941Smrg 310b8e80941Smrg fail_bo_alloc: 311b8e80941Smrg anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); 312b8e80941Smrg fail_alloc: 313b8e80941Smrg vk_free(&cmd_buffer->pool->alloc, bbo); 314b8e80941Smrg 315b8e80941Smrg return result; 316b8e80941Smrg} 317b8e80941Smrg 318b8e80941Smrgstatic VkResult 319b8e80941Smrganv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, 320b8e80941Smrg const struct anv_batch_bo *other_bbo, 321b8e80941Smrg struct anv_batch_bo **bbo_out) 322b8e80941Smrg{ 323b8e80941Smrg VkResult result; 324b8e80941Smrg 325b8e80941Smrg struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), 326b8e80941Smrg 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 327b8e80941Smrg if (bbo == NULL) 328b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 329b8e80941Smrg 330b8e80941Smrg result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, &bbo->bo, 331b8e80941Smrg other_bbo->bo.size); 332b8e80941Smrg if (result != VK_SUCCESS) 333b8e80941Smrg goto fail_alloc; 334b8e80941Smrg 335b8e80941Smrg result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc, 336b8e80941Smrg &other_bbo->relocs); 337b8e80941Smrg if (result != VK_SUCCESS) 338b8e80941Smrg goto fail_bo_alloc; 339b8e80941Smrg 340b8e80941Smrg bbo->length = other_bbo->length; 341b8e80941Smrg memcpy(bbo->bo.map, other_bbo->bo.map, other_bbo->length); 342b8e80941Smrg 343b8e80941Smrg *bbo_out = bbo; 344b8e80941Smrg 345b8e80941Smrg return VK_SUCCESS; 346b8e80941Smrg 347b8e80941Smrg fail_bo_alloc: 348b8e80941Smrg anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); 349b8e80941Smrg fail_alloc: 350b8e80941Smrg vk_free(&cmd_buffer->pool->alloc, bbo); 351b8e80941Smrg 352b8e80941Smrg return result; 353b8e80941Smrg} 354b8e80941Smrg 355b8e80941Smrgstatic void 356b8e80941Smrganv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, 357b8e80941Smrg size_t batch_padding) 358b8e80941Smrg{ 359b8e80941Smrg batch->next = batch->start = bbo->bo.map; 360b8e80941Smrg batch->end = bbo->bo.map + bbo->bo.size - batch_padding; 361b8e80941Smrg batch->relocs = &bbo->relocs; 362b8e80941Smrg bbo->relocs.num_relocs = 0; 363b8e80941Smrg _mesa_set_clear(bbo->relocs.deps, NULL); 364b8e80941Smrg} 365b8e80941Smrg 366b8e80941Smrgstatic void 367b8e80941Smrganv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, 368b8e80941Smrg size_t batch_padding) 369b8e80941Smrg{ 370b8e80941Smrg batch->start = bbo->bo.map; 371b8e80941Smrg batch->next = bbo->bo.map + bbo->length; 372b8e80941Smrg batch->end = bbo->bo.map + bbo->bo.size - batch_padding; 373b8e80941Smrg batch->relocs = &bbo->relocs; 374b8e80941Smrg} 375b8e80941Smrg 376b8e80941Smrgstatic void 377b8e80941Smrganv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) 378b8e80941Smrg{ 379b8e80941Smrg assert(batch->start == bbo->bo.map); 380b8e80941Smrg bbo->length = batch->next - batch->start; 381b8e80941Smrg VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); 382b8e80941Smrg} 383b8e80941Smrg 384b8e80941Smrgstatic VkResult 385b8e80941Smrganv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo, 386b8e80941Smrg struct anv_batch *batch, size_t aditional, 387b8e80941Smrg size_t batch_padding) 388b8e80941Smrg{ 389b8e80941Smrg assert(batch->start == bbo->bo.map); 390b8e80941Smrg bbo->length = batch->next - batch->start; 391b8e80941Smrg 392b8e80941Smrg size_t new_size = bbo->bo.size; 393b8e80941Smrg while (new_size <= bbo->length + aditional + batch_padding) 394b8e80941Smrg new_size *= 2; 395b8e80941Smrg 396b8e80941Smrg if (new_size == bbo->bo.size) 397b8e80941Smrg return VK_SUCCESS; 398b8e80941Smrg 399b8e80941Smrg struct anv_bo new_bo; 400b8e80941Smrg VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, 401b8e80941Smrg &new_bo, new_size); 402b8e80941Smrg if (result != VK_SUCCESS) 403b8e80941Smrg return result; 404b8e80941Smrg 405b8e80941Smrg memcpy(new_bo.map, bbo->bo.map, bbo->length); 406b8e80941Smrg 407b8e80941Smrg anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); 408b8e80941Smrg 409b8e80941Smrg bbo->bo = new_bo; 410b8e80941Smrg anv_batch_bo_continue(bbo, batch, batch_padding); 411b8e80941Smrg 412b8e80941Smrg return VK_SUCCESS; 413b8e80941Smrg} 414b8e80941Smrg 415b8e80941Smrgstatic void 416b8e80941Smrganv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer, 417b8e80941Smrg struct anv_batch_bo *prev_bbo, 418b8e80941Smrg struct anv_batch_bo *next_bbo, 419b8e80941Smrg uint32_t next_bbo_offset) 420b8e80941Smrg{ 421b8e80941Smrg MAYBE_UNUSED const uint32_t bb_start_offset = 422b8e80941Smrg prev_bbo->length - GEN8_MI_BATCH_BUFFER_START_length * 4; 423b8e80941Smrg MAYBE_UNUSED const uint32_t *bb_start = prev_bbo->bo.map + bb_start_offset; 424b8e80941Smrg 425b8e80941Smrg /* Make sure we're looking at a MI_BATCH_BUFFER_START */ 426b8e80941Smrg assert(((*bb_start >> 29) & 0x07) == 0); 427b8e80941Smrg assert(((*bb_start >> 23) & 0x3f) == 49); 428b8e80941Smrg 429b8e80941Smrg if (cmd_buffer->device->instance->physicalDevice.use_softpin) { 430b8e80941Smrg assert(prev_bbo->bo.flags & EXEC_OBJECT_PINNED); 431b8e80941Smrg assert(next_bbo->bo.flags & EXEC_OBJECT_PINNED); 432b8e80941Smrg 433b8e80941Smrg write_reloc(cmd_buffer->device, 434b8e80941Smrg prev_bbo->bo.map + bb_start_offset + 4, 435b8e80941Smrg next_bbo->bo.offset + next_bbo_offset, true); 436b8e80941Smrg } else { 437b8e80941Smrg uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1; 438b8e80941Smrg assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4); 439b8e80941Smrg 440b8e80941Smrg prev_bbo->relocs.reloc_bos[reloc_idx] = &next_bbo->bo; 441b8e80941Smrg prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset; 442b8e80941Smrg 443b8e80941Smrg /* Use a bogus presumed offset to force a relocation */ 444b8e80941Smrg prev_bbo->relocs.relocs[reloc_idx].presumed_offset = -1; 445b8e80941Smrg } 446b8e80941Smrg} 447b8e80941Smrg 448b8e80941Smrgstatic void 449b8e80941Smrganv_batch_bo_destroy(struct anv_batch_bo *bbo, 450b8e80941Smrg struct anv_cmd_buffer *cmd_buffer) 451b8e80941Smrg{ 452b8e80941Smrg anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); 453b8e80941Smrg anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, &bbo->bo); 454b8e80941Smrg vk_free(&cmd_buffer->pool->alloc, bbo); 455b8e80941Smrg} 456b8e80941Smrg 457b8e80941Smrgstatic VkResult 458b8e80941Smrganv_batch_bo_list_clone(const struct list_head *list, 459b8e80941Smrg struct anv_cmd_buffer *cmd_buffer, 460b8e80941Smrg struct list_head *new_list) 461b8e80941Smrg{ 462b8e80941Smrg VkResult result = VK_SUCCESS; 463b8e80941Smrg 464b8e80941Smrg list_inithead(new_list); 465b8e80941Smrg 466b8e80941Smrg struct anv_batch_bo *prev_bbo = NULL; 467b8e80941Smrg list_for_each_entry(struct anv_batch_bo, bbo, list, link) { 468b8e80941Smrg struct anv_batch_bo *new_bbo = NULL; 469b8e80941Smrg result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); 470b8e80941Smrg if (result != VK_SUCCESS) 471b8e80941Smrg break; 472b8e80941Smrg list_addtail(&new_bbo->link, new_list); 473b8e80941Smrg 474b8e80941Smrg if (prev_bbo) 475b8e80941Smrg anv_batch_bo_link(cmd_buffer, prev_bbo, new_bbo, 0); 476b8e80941Smrg 477b8e80941Smrg prev_bbo = new_bbo; 478b8e80941Smrg } 479b8e80941Smrg 480b8e80941Smrg if (result != VK_SUCCESS) { 481b8e80941Smrg list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) 482b8e80941Smrg anv_batch_bo_destroy(bbo, cmd_buffer); 483b8e80941Smrg } 484b8e80941Smrg 485b8e80941Smrg return result; 486b8e80941Smrg} 487b8e80941Smrg 488b8e80941Smrg/*-----------------------------------------------------------------------* 489b8e80941Smrg * Functions related to anv_batch_bo 490b8e80941Smrg *-----------------------------------------------------------------------*/ 491b8e80941Smrg 492b8e80941Smrgstatic struct anv_batch_bo * 493b8e80941Smrganv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) 494b8e80941Smrg{ 495b8e80941Smrg return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); 496b8e80941Smrg} 497b8e80941Smrg 498b8e80941Smrgstruct anv_address 499b8e80941Smrganv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) 500b8e80941Smrg{ 501b8e80941Smrg struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states); 502b8e80941Smrg return (struct anv_address) { 503b8e80941Smrg .bo = anv_binding_table_pool(cmd_buffer->device)->block_pool.bo, 504b8e80941Smrg .offset = bt_block->offset, 505b8e80941Smrg }; 506b8e80941Smrg} 507b8e80941Smrg 508b8e80941Smrgstatic void 509b8e80941Smrgemit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, 510b8e80941Smrg struct anv_bo *bo, uint32_t offset) 511b8e80941Smrg{ 512b8e80941Smrg /* In gen8+ the address field grew to two dwords to accomodate 48 bit 513b8e80941Smrg * offsets. The high 16 bits are in the last dword, so we can use the gen8 514b8e80941Smrg * version in either case, as long as we set the instruction length in the 515b8e80941Smrg * header accordingly. This means that we always emit three dwords here 516b8e80941Smrg * and all the padding and adjustment we do in this file works for all 517b8e80941Smrg * gens. 518b8e80941Smrg */ 519b8e80941Smrg 520b8e80941Smrg#define GEN7_MI_BATCH_BUFFER_START_length 2 521b8e80941Smrg#define GEN7_MI_BATCH_BUFFER_START_length_bias 2 522b8e80941Smrg 523b8e80941Smrg const uint32_t gen7_length = 524b8e80941Smrg GEN7_MI_BATCH_BUFFER_START_length - GEN7_MI_BATCH_BUFFER_START_length_bias; 525b8e80941Smrg const uint32_t gen8_length = 526b8e80941Smrg GEN8_MI_BATCH_BUFFER_START_length - GEN8_MI_BATCH_BUFFER_START_length_bias; 527b8e80941Smrg 528b8e80941Smrg anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_START, bbs) { 529b8e80941Smrg bbs.DWordLength = cmd_buffer->device->info.gen < 8 ? 530b8e80941Smrg gen7_length : gen8_length; 531b8e80941Smrg bbs.SecondLevelBatchBuffer = Firstlevelbatch; 532b8e80941Smrg bbs.AddressSpaceIndicator = ASI_PPGTT; 533b8e80941Smrg bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset }; 534b8e80941Smrg } 535b8e80941Smrg} 536b8e80941Smrg 537b8e80941Smrgstatic void 538b8e80941Smrgcmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, 539b8e80941Smrg struct anv_batch_bo *bbo) 540b8e80941Smrg{ 541b8e80941Smrg struct anv_batch *batch = &cmd_buffer->batch; 542b8e80941Smrg struct anv_batch_bo *current_bbo = 543b8e80941Smrg anv_cmd_buffer_current_batch_bo(cmd_buffer); 544b8e80941Smrg 545b8e80941Smrg /* We set the end of the batch a little short so we would be sure we 546b8e80941Smrg * have room for the chaining command. Since we're about to emit the 547b8e80941Smrg * chaining command, let's set it back where it should go. 548b8e80941Smrg */ 549b8e80941Smrg batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4; 550b8e80941Smrg assert(batch->end == current_bbo->bo.map + current_bbo->bo.size); 551b8e80941Smrg 552b8e80941Smrg emit_batch_buffer_start(cmd_buffer, &bbo->bo, 0); 553b8e80941Smrg 554b8e80941Smrg anv_batch_bo_finish(current_bbo, batch); 555b8e80941Smrg} 556b8e80941Smrg 557b8e80941Smrgstatic VkResult 558b8e80941Smrganv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) 559b8e80941Smrg{ 560b8e80941Smrg struct anv_cmd_buffer *cmd_buffer = _data; 561b8e80941Smrg struct anv_batch_bo *new_bbo; 562b8e80941Smrg 563b8e80941Smrg VkResult result = anv_batch_bo_create(cmd_buffer, &new_bbo); 564b8e80941Smrg if (result != VK_SUCCESS) 565b8e80941Smrg return result; 566b8e80941Smrg 567b8e80941Smrg struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos); 568b8e80941Smrg if (seen_bbo == NULL) { 569b8e80941Smrg anv_batch_bo_destroy(new_bbo, cmd_buffer); 570b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 571b8e80941Smrg } 572b8e80941Smrg *seen_bbo = new_bbo; 573b8e80941Smrg 574b8e80941Smrg cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); 575b8e80941Smrg 576b8e80941Smrg list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); 577b8e80941Smrg 578b8e80941Smrg anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4); 579b8e80941Smrg 580b8e80941Smrg return VK_SUCCESS; 581b8e80941Smrg} 582b8e80941Smrg 583b8e80941Smrgstatic VkResult 584b8e80941Smrganv_cmd_buffer_grow_batch(struct anv_batch *batch, void *_data) 585b8e80941Smrg{ 586b8e80941Smrg struct anv_cmd_buffer *cmd_buffer = _data; 587b8e80941Smrg struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); 588b8e80941Smrg 589b8e80941Smrg anv_batch_bo_grow(cmd_buffer, bbo, &cmd_buffer->batch, 4096, 590b8e80941Smrg GEN8_MI_BATCH_BUFFER_START_length * 4); 591b8e80941Smrg 592b8e80941Smrg return VK_SUCCESS; 593b8e80941Smrg} 594b8e80941Smrg 595b8e80941Smrg/** Allocate a binding table 596b8e80941Smrg * 597b8e80941Smrg * This function allocates a binding table. This is a bit more complicated 598b8e80941Smrg * than one would think due to a combination of Vulkan driver design and some 599b8e80941Smrg * unfortunate hardware restrictions. 600b8e80941Smrg * 601b8e80941Smrg * The 3DSTATE_BINDING_TABLE_POINTERS_* packets only have a 16-bit field for 602b8e80941Smrg * the binding table pointer which means that all binding tables need to live 603b8e80941Smrg * in the bottom 64k of surface state base address. The way the GL driver has 604b8e80941Smrg * classically dealt with this restriction is to emit all surface states 605b8e80941Smrg * on-the-fly into the batch and have a batch buffer smaller than 64k. This 606b8e80941Smrg * isn't really an option in Vulkan for a couple of reasons: 607b8e80941Smrg * 608b8e80941Smrg * 1) In Vulkan, we have growing (or chaining) batches so surface states have 609b8e80941Smrg * to live in their own buffer and we have to be able to re-emit 610b8e80941Smrg * STATE_BASE_ADDRESS as needed which requires a full pipeline stall. In 611b8e80941Smrg * order to avoid emitting STATE_BASE_ADDRESS any more often than needed 612b8e80941Smrg * (it's not that hard to hit 64k of just binding tables), we allocate 613b8e80941Smrg * surface state objects up-front when VkImageView is created. In order 614b8e80941Smrg * for this to work, surface state objects need to be allocated from a 615b8e80941Smrg * global buffer. 616b8e80941Smrg * 617b8e80941Smrg * 2) We tried to design the surface state system in such a way that it's 618b8e80941Smrg * already ready for bindless texturing. The way bindless texturing works 619b8e80941Smrg * on our hardware is that you have a big pool of surface state objects 620b8e80941Smrg * (with its own state base address) and the bindless handles are simply 621b8e80941Smrg * offsets into that pool. With the architecture we chose, we already 622b8e80941Smrg * have that pool and it's exactly the same pool that we use for regular 623b8e80941Smrg * surface states so we should already be ready for bindless. 624b8e80941Smrg * 625b8e80941Smrg * 3) For render targets, we need to be able to fill out the surface states 626b8e80941Smrg * later in vkBeginRenderPass so that we can assign clear colors 627b8e80941Smrg * correctly. One way to do this would be to just create the surface 628b8e80941Smrg * state data and then repeatedly copy it into the surface state BO every 629b8e80941Smrg * time we have to re-emit STATE_BASE_ADDRESS. While this works, it's 630b8e80941Smrg * rather annoying and just being able to allocate them up-front and 631b8e80941Smrg * re-use them for the entire render pass. 632b8e80941Smrg * 633b8e80941Smrg * While none of these are technically blockers for emitting state on the fly 634b8e80941Smrg * like we do in GL, the ability to have a single surface state pool is 635b8e80941Smrg * simplifies things greatly. Unfortunately, it comes at a cost... 636b8e80941Smrg * 637b8e80941Smrg * Because of the 64k limitation of 3DSTATE_BINDING_TABLE_POINTERS_*, we can't 638b8e80941Smrg * place the binding tables just anywhere in surface state base address. 639b8e80941Smrg * Because 64k isn't a whole lot of space, we can't simply restrict the 640b8e80941Smrg * surface state buffer to 64k, we have to be more clever. The solution we've 641b8e80941Smrg * chosen is to have a block pool with a maximum size of 2G that starts at 642b8e80941Smrg * zero and grows in both directions. All surface states are allocated from 643b8e80941Smrg * the top of the pool (positive offsets) and we allocate blocks (< 64k) of 644b8e80941Smrg * binding tables from the bottom of the pool (negative offsets). Every time 645b8e80941Smrg * we allocate a new binding table block, we set surface state base address to 646b8e80941Smrg * point to the bottom of the binding table block. This way all of the 647b8e80941Smrg * binding tables in the block are in the bottom 64k of surface state base 648b8e80941Smrg * address. When we fill out the binding table, we add the distance between 649b8e80941Smrg * the bottom of our binding table block and zero of the block pool to the 650b8e80941Smrg * surface state offsets so that they are correct relative to out new surface 651b8e80941Smrg * state base address at the bottom of the binding table block. 652b8e80941Smrg * 653b8e80941Smrg * \see adjust_relocations_from_block_pool() 654b8e80941Smrg * \see adjust_relocations_too_block_pool() 655b8e80941Smrg * 656b8e80941Smrg * \param[in] entries The number of surface state entries the binding 657b8e80941Smrg * table should be able to hold. 658b8e80941Smrg * 659b8e80941Smrg * \param[out] state_offset The offset surface surface state base address 660b8e80941Smrg * where the surface states live. This must be 661b8e80941Smrg * added to the surface state offset when it is 662b8e80941Smrg * written into the binding table entry. 663b8e80941Smrg * 664b8e80941Smrg * \return An anv_state representing the binding table 665b8e80941Smrg */ 666b8e80941Smrgstruct anv_state 667b8e80941Smrganv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, 668b8e80941Smrg uint32_t entries, uint32_t *state_offset) 669b8e80941Smrg{ 670b8e80941Smrg struct anv_device *device = cmd_buffer->device; 671b8e80941Smrg struct anv_state_pool *state_pool = &device->surface_state_pool; 672b8e80941Smrg struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states); 673b8e80941Smrg struct anv_state state; 674b8e80941Smrg 675b8e80941Smrg state.alloc_size = align_u32(entries * 4, 32); 676b8e80941Smrg 677b8e80941Smrg if (cmd_buffer->bt_next + state.alloc_size > state_pool->block_size) 678b8e80941Smrg return (struct anv_state) { 0 }; 679b8e80941Smrg 680b8e80941Smrg state.offset = cmd_buffer->bt_next; 681b8e80941Smrg state.map = anv_block_pool_map(&anv_binding_table_pool(device)->block_pool, 682b8e80941Smrg bt_block->offset + state.offset); 683b8e80941Smrg 684b8e80941Smrg cmd_buffer->bt_next += state.alloc_size; 685b8e80941Smrg 686b8e80941Smrg if (device->instance->physicalDevice.use_softpin) { 687b8e80941Smrg assert(bt_block->offset >= 0); 688b8e80941Smrg *state_offset = device->surface_state_pool.block_pool.start_address - 689b8e80941Smrg device->binding_table_pool.block_pool.start_address - bt_block->offset; 690b8e80941Smrg } else { 691b8e80941Smrg assert(bt_block->offset < 0); 692b8e80941Smrg *state_offset = -bt_block->offset; 693b8e80941Smrg } 694b8e80941Smrg 695b8e80941Smrg return state; 696b8e80941Smrg} 697b8e80941Smrg 698b8e80941Smrgstruct anv_state 699b8e80941Smrganv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer) 700b8e80941Smrg{ 701b8e80941Smrg struct isl_device *isl_dev = &cmd_buffer->device->isl_dev; 702b8e80941Smrg return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 703b8e80941Smrg isl_dev->ss.size, isl_dev->ss.align); 704b8e80941Smrg} 705b8e80941Smrg 706b8e80941Smrgstruct anv_state 707b8e80941Smrganv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, 708b8e80941Smrg uint32_t size, uint32_t alignment) 709b8e80941Smrg{ 710b8e80941Smrg return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, 711b8e80941Smrg size, alignment); 712b8e80941Smrg} 713b8e80941Smrg 714b8e80941SmrgVkResult 715b8e80941Smrganv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer) 716b8e80941Smrg{ 717b8e80941Smrg struct anv_state *bt_block = u_vector_add(&cmd_buffer->bt_block_states); 718b8e80941Smrg if (bt_block == NULL) { 719b8e80941Smrg anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); 720b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 721b8e80941Smrg } 722b8e80941Smrg 723b8e80941Smrg *bt_block = anv_binding_table_pool_alloc(cmd_buffer->device); 724b8e80941Smrg cmd_buffer->bt_next = 0; 725b8e80941Smrg 726b8e80941Smrg return VK_SUCCESS; 727b8e80941Smrg} 728b8e80941Smrg 729b8e80941SmrgVkResult 730b8e80941Smrganv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) 731b8e80941Smrg{ 732b8e80941Smrg struct anv_batch_bo *batch_bo; 733b8e80941Smrg VkResult result; 734b8e80941Smrg 735b8e80941Smrg list_inithead(&cmd_buffer->batch_bos); 736b8e80941Smrg 737b8e80941Smrg result = anv_batch_bo_create(cmd_buffer, &batch_bo); 738b8e80941Smrg if (result != VK_SUCCESS) 739b8e80941Smrg return result; 740b8e80941Smrg 741b8e80941Smrg list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); 742b8e80941Smrg 743b8e80941Smrg cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; 744b8e80941Smrg cmd_buffer->batch.user_data = cmd_buffer; 745b8e80941Smrg 746b8e80941Smrg if (cmd_buffer->device->can_chain_batches) { 747b8e80941Smrg cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; 748b8e80941Smrg } else { 749b8e80941Smrg cmd_buffer->batch.extend_cb = anv_cmd_buffer_grow_batch; 750b8e80941Smrg } 751b8e80941Smrg 752b8e80941Smrg anv_batch_bo_start(batch_bo, &cmd_buffer->batch, 753b8e80941Smrg GEN8_MI_BATCH_BUFFER_START_length * 4); 754b8e80941Smrg 755b8e80941Smrg int success = u_vector_init(&cmd_buffer->seen_bbos, 756b8e80941Smrg sizeof(struct anv_bo *), 757b8e80941Smrg 8 * sizeof(struct anv_bo *)); 758b8e80941Smrg if (!success) 759b8e80941Smrg goto fail_batch_bo; 760b8e80941Smrg 761b8e80941Smrg *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = batch_bo; 762b8e80941Smrg 763b8e80941Smrg /* u_vector requires power-of-two size elements */ 764b8e80941Smrg unsigned pow2_state_size = util_next_power_of_two(sizeof(struct anv_state)); 765b8e80941Smrg success = u_vector_init(&cmd_buffer->bt_block_states, 766b8e80941Smrg pow2_state_size, 8 * pow2_state_size); 767b8e80941Smrg if (!success) 768b8e80941Smrg goto fail_seen_bbos; 769b8e80941Smrg 770b8e80941Smrg result = anv_reloc_list_init(&cmd_buffer->surface_relocs, 771b8e80941Smrg &cmd_buffer->pool->alloc); 772b8e80941Smrg if (result != VK_SUCCESS) 773b8e80941Smrg goto fail_bt_blocks; 774b8e80941Smrg cmd_buffer->last_ss_pool_center = 0; 775b8e80941Smrg 776b8e80941Smrg result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); 777b8e80941Smrg if (result != VK_SUCCESS) 778b8e80941Smrg goto fail_bt_blocks; 779b8e80941Smrg 780b8e80941Smrg return VK_SUCCESS; 781b8e80941Smrg 782b8e80941Smrg fail_bt_blocks: 783b8e80941Smrg u_vector_finish(&cmd_buffer->bt_block_states); 784b8e80941Smrg fail_seen_bbos: 785b8e80941Smrg u_vector_finish(&cmd_buffer->seen_bbos); 786b8e80941Smrg fail_batch_bo: 787b8e80941Smrg anv_batch_bo_destroy(batch_bo, cmd_buffer); 788b8e80941Smrg 789b8e80941Smrg return result; 790b8e80941Smrg} 791b8e80941Smrg 792b8e80941Smrgvoid 793b8e80941Smrganv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) 794b8e80941Smrg{ 795b8e80941Smrg struct anv_state *bt_block; 796b8e80941Smrg u_vector_foreach(bt_block, &cmd_buffer->bt_block_states) 797b8e80941Smrg anv_binding_table_pool_free(cmd_buffer->device, *bt_block); 798b8e80941Smrg u_vector_finish(&cmd_buffer->bt_block_states); 799b8e80941Smrg 800b8e80941Smrg anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc); 801b8e80941Smrg 802b8e80941Smrg u_vector_finish(&cmd_buffer->seen_bbos); 803b8e80941Smrg 804b8e80941Smrg /* Destroy all of the batch buffers */ 805b8e80941Smrg list_for_each_entry_safe(struct anv_batch_bo, bbo, 806b8e80941Smrg &cmd_buffer->batch_bos, link) { 807b8e80941Smrg anv_batch_bo_destroy(bbo, cmd_buffer); 808b8e80941Smrg } 809b8e80941Smrg} 810b8e80941Smrg 811b8e80941Smrgvoid 812b8e80941Smrganv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) 813b8e80941Smrg{ 814b8e80941Smrg /* Delete all but the first batch bo */ 815b8e80941Smrg assert(!list_empty(&cmd_buffer->batch_bos)); 816b8e80941Smrg while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { 817b8e80941Smrg struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); 818b8e80941Smrg list_del(&bbo->link); 819b8e80941Smrg anv_batch_bo_destroy(bbo, cmd_buffer); 820b8e80941Smrg } 821b8e80941Smrg assert(!list_empty(&cmd_buffer->batch_bos)); 822b8e80941Smrg 823b8e80941Smrg anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), 824b8e80941Smrg &cmd_buffer->batch, 825b8e80941Smrg GEN8_MI_BATCH_BUFFER_START_length * 4); 826b8e80941Smrg 827b8e80941Smrg while (u_vector_length(&cmd_buffer->bt_block_states) > 1) { 828b8e80941Smrg struct anv_state *bt_block = u_vector_remove(&cmd_buffer->bt_block_states); 829b8e80941Smrg anv_binding_table_pool_free(cmd_buffer->device, *bt_block); 830b8e80941Smrg } 831b8e80941Smrg assert(u_vector_length(&cmd_buffer->bt_block_states) == 1); 832b8e80941Smrg cmd_buffer->bt_next = 0; 833b8e80941Smrg 834b8e80941Smrg cmd_buffer->surface_relocs.num_relocs = 0; 835b8e80941Smrg _mesa_set_clear(cmd_buffer->surface_relocs.deps, NULL); 836b8e80941Smrg cmd_buffer->last_ss_pool_center = 0; 837b8e80941Smrg 838b8e80941Smrg /* Reset the list of seen buffers */ 839b8e80941Smrg cmd_buffer->seen_bbos.head = 0; 840b8e80941Smrg cmd_buffer->seen_bbos.tail = 0; 841b8e80941Smrg 842b8e80941Smrg *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = 843b8e80941Smrg anv_cmd_buffer_current_batch_bo(cmd_buffer); 844b8e80941Smrg} 845b8e80941Smrg 846b8e80941Smrgvoid 847b8e80941Smrganv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) 848b8e80941Smrg{ 849b8e80941Smrg struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); 850b8e80941Smrg 851b8e80941Smrg if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 852b8e80941Smrg /* When we start a batch buffer, we subtract a certain amount of 853b8e80941Smrg * padding from the end to ensure that we always have room to emit a 854b8e80941Smrg * BATCH_BUFFER_START to chain to the next BO. We need to remove 855b8e80941Smrg * that padding before we end the batch; otherwise, we may end up 856b8e80941Smrg * with our BATCH_BUFFER_END in another BO. 857b8e80941Smrg */ 858b8e80941Smrg cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; 859b8e80941Smrg assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); 860b8e80941Smrg 861b8e80941Smrg anv_batch_emit(&cmd_buffer->batch, GEN8_MI_BATCH_BUFFER_END, bbe); 862b8e80941Smrg 863b8e80941Smrg /* Round batch up to an even number of dwords. */ 864b8e80941Smrg if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) 865b8e80941Smrg anv_batch_emit(&cmd_buffer->batch, GEN8_MI_NOOP, noop); 866b8e80941Smrg 867b8e80941Smrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; 868b8e80941Smrg } else { 869b8e80941Smrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 870b8e80941Smrg /* If this is a secondary command buffer, we need to determine the 871b8e80941Smrg * mode in which it will be executed with vkExecuteCommands. We 872b8e80941Smrg * determine this statically here so that this stays in sync with the 873b8e80941Smrg * actual ExecuteCommands implementation. 874b8e80941Smrg */ 875b8e80941Smrg const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start; 876b8e80941Smrg if (!cmd_buffer->device->can_chain_batches) { 877b8e80941Smrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT; 878b8e80941Smrg } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && 879b8e80941Smrg (length < ANV_CMD_BUFFER_BATCH_SIZE / 2)) { 880b8e80941Smrg /* If the secondary has exactly one batch buffer in its list *and* 881b8e80941Smrg * that batch buffer is less than half of the maximum size, we're 882b8e80941Smrg * probably better of simply copying it into our batch. 883b8e80941Smrg */ 884b8e80941Smrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; 885b8e80941Smrg } else if (!(cmd_buffer->usage_flags & 886b8e80941Smrg VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) { 887b8e80941Smrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; 888b8e80941Smrg 889b8e80941Smrg /* In order to chain, we need this command buffer to contain an 890b8e80941Smrg * MI_BATCH_BUFFER_START which will jump back to the calling batch. 891b8e80941Smrg * It doesn't matter where it points now so long as has a valid 892b8e80941Smrg * relocation. We'll adjust it later as part of the chaining 893b8e80941Smrg * process. 894b8e80941Smrg * 895b8e80941Smrg * We set the end of the batch a little short so we would be sure we 896b8e80941Smrg * have room for the chaining command. Since we're about to emit the 897b8e80941Smrg * chaining command, let's set it back where it should go. 898b8e80941Smrg */ 899b8e80941Smrg cmd_buffer->batch.end += GEN8_MI_BATCH_BUFFER_START_length * 4; 900b8e80941Smrg assert(cmd_buffer->batch.start == batch_bo->bo.map); 901b8e80941Smrg assert(cmd_buffer->batch.end == batch_bo->bo.map + batch_bo->bo.size); 902b8e80941Smrg 903b8e80941Smrg emit_batch_buffer_start(cmd_buffer, &batch_bo->bo, 0); 904b8e80941Smrg assert(cmd_buffer->batch.start == batch_bo->bo.map); 905b8e80941Smrg } else { 906b8e80941Smrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; 907b8e80941Smrg } 908b8e80941Smrg } 909b8e80941Smrg 910b8e80941Smrg anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); 911b8e80941Smrg} 912b8e80941Smrg 913b8e80941Smrgstatic VkResult 914b8e80941Smrganv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, 915b8e80941Smrg struct list_head *list) 916b8e80941Smrg{ 917b8e80941Smrg list_for_each_entry(struct anv_batch_bo, bbo, list, link) { 918b8e80941Smrg struct anv_batch_bo **bbo_ptr = u_vector_add(&cmd_buffer->seen_bbos); 919b8e80941Smrg if (bbo_ptr == NULL) 920b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 921b8e80941Smrg 922b8e80941Smrg *bbo_ptr = bbo; 923b8e80941Smrg } 924b8e80941Smrg 925b8e80941Smrg return VK_SUCCESS; 926b8e80941Smrg} 927b8e80941Smrg 928b8e80941Smrgvoid 929b8e80941Smrganv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, 930b8e80941Smrg struct anv_cmd_buffer *secondary) 931b8e80941Smrg{ 932b8e80941Smrg switch (secondary->exec_mode) { 933b8e80941Smrg case ANV_CMD_BUFFER_EXEC_MODE_EMIT: 934b8e80941Smrg anv_batch_emit_batch(&primary->batch, &secondary->batch); 935b8e80941Smrg break; 936b8e80941Smrg case ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT: { 937b8e80941Smrg struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(primary); 938b8e80941Smrg unsigned length = secondary->batch.end - secondary->batch.start; 939b8e80941Smrg anv_batch_bo_grow(primary, bbo, &primary->batch, length, 940b8e80941Smrg GEN8_MI_BATCH_BUFFER_START_length * 4); 941b8e80941Smrg anv_batch_emit_batch(&primary->batch, &secondary->batch); 942b8e80941Smrg break; 943b8e80941Smrg } 944b8e80941Smrg case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { 945b8e80941Smrg struct anv_batch_bo *first_bbo = 946b8e80941Smrg list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); 947b8e80941Smrg struct anv_batch_bo *last_bbo = 948b8e80941Smrg list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); 949b8e80941Smrg 950b8e80941Smrg emit_batch_buffer_start(primary, &first_bbo->bo, 0); 951b8e80941Smrg 952b8e80941Smrg struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); 953b8e80941Smrg assert(primary->batch.start == this_bbo->bo.map); 954b8e80941Smrg uint32_t offset = primary->batch.next - primary->batch.start; 955b8e80941Smrg 956b8e80941Smrg /* Make the tail of the secondary point back to right after the 957b8e80941Smrg * MI_BATCH_BUFFER_START in the primary batch. 958b8e80941Smrg */ 959b8e80941Smrg anv_batch_bo_link(primary, last_bbo, this_bbo, offset); 960b8e80941Smrg 961b8e80941Smrg anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); 962b8e80941Smrg break; 963b8e80941Smrg } 964b8e80941Smrg case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { 965b8e80941Smrg struct list_head copy_list; 966b8e80941Smrg VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, 967b8e80941Smrg secondary, 968b8e80941Smrg ©_list); 969b8e80941Smrg if (result != VK_SUCCESS) 970b8e80941Smrg return; /* FIXME */ 971b8e80941Smrg 972b8e80941Smrg anv_cmd_buffer_add_seen_bbos(primary, ©_list); 973b8e80941Smrg 974b8e80941Smrg struct anv_batch_bo *first_bbo = 975b8e80941Smrg list_first_entry(©_list, struct anv_batch_bo, link); 976b8e80941Smrg struct anv_batch_bo *last_bbo = 977b8e80941Smrg list_last_entry(©_list, struct anv_batch_bo, link); 978b8e80941Smrg 979b8e80941Smrg cmd_buffer_chain_to_batch_bo(primary, first_bbo); 980b8e80941Smrg 981b8e80941Smrg list_splicetail(©_list, &primary->batch_bos); 982b8e80941Smrg 983b8e80941Smrg anv_batch_bo_continue(last_bbo, &primary->batch, 984b8e80941Smrg GEN8_MI_BATCH_BUFFER_START_length * 4); 985b8e80941Smrg break; 986b8e80941Smrg } 987b8e80941Smrg default: 988b8e80941Smrg assert(!"Invalid execution mode"); 989b8e80941Smrg } 990b8e80941Smrg 991b8e80941Smrg anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc, 992b8e80941Smrg &secondary->surface_relocs, 0); 993b8e80941Smrg} 994b8e80941Smrg 995b8e80941Smrgstruct anv_execbuf { 996b8e80941Smrg struct drm_i915_gem_execbuffer2 execbuf; 997b8e80941Smrg 998b8e80941Smrg struct drm_i915_gem_exec_object2 * objects; 999b8e80941Smrg uint32_t bo_count; 1000b8e80941Smrg struct anv_bo ** bos; 1001b8e80941Smrg 1002b8e80941Smrg /* Allocated length of the 'objects' and 'bos' arrays */ 1003b8e80941Smrg uint32_t array_length; 1004b8e80941Smrg 1005b8e80941Smrg bool has_relocs; 1006b8e80941Smrg 1007b8e80941Smrg uint32_t fence_count; 1008b8e80941Smrg uint32_t fence_array_length; 1009b8e80941Smrg struct drm_i915_gem_exec_fence * fences; 1010b8e80941Smrg struct anv_syncobj ** syncobjs; 1011b8e80941Smrg}; 1012b8e80941Smrg 1013b8e80941Smrgstatic void 1014b8e80941Smrganv_execbuf_init(struct anv_execbuf *exec) 1015b8e80941Smrg{ 1016b8e80941Smrg memset(exec, 0, sizeof(*exec)); 1017b8e80941Smrg} 1018b8e80941Smrg 1019b8e80941Smrgstatic void 1020b8e80941Smrganv_execbuf_finish(struct anv_execbuf *exec, 1021b8e80941Smrg const VkAllocationCallbacks *alloc) 1022b8e80941Smrg{ 1023b8e80941Smrg vk_free(alloc, exec->objects); 1024b8e80941Smrg vk_free(alloc, exec->bos); 1025b8e80941Smrg vk_free(alloc, exec->fences); 1026b8e80941Smrg vk_free(alloc, exec->syncobjs); 1027b8e80941Smrg} 1028b8e80941Smrg 1029b8e80941Smrgstatic int 1030b8e80941Smrg_compare_bo_handles(const void *_bo1, const void *_bo2) 1031b8e80941Smrg{ 1032b8e80941Smrg struct anv_bo * const *bo1 = _bo1; 1033b8e80941Smrg struct anv_bo * const *bo2 = _bo2; 1034b8e80941Smrg 1035b8e80941Smrg return (*bo1)->gem_handle - (*bo2)->gem_handle; 1036b8e80941Smrg} 1037b8e80941Smrg 1038b8e80941Smrgstatic VkResult 1039b8e80941Smrganv_execbuf_add_bo_set(struct anv_execbuf *exec, 1040b8e80941Smrg struct set *deps, 1041b8e80941Smrg uint32_t extra_flags, 1042b8e80941Smrg const VkAllocationCallbacks *alloc); 1043b8e80941Smrg 1044b8e80941Smrgstatic VkResult 1045b8e80941Smrganv_execbuf_add_bo(struct anv_execbuf *exec, 1046b8e80941Smrg struct anv_bo *bo, 1047b8e80941Smrg struct anv_reloc_list *relocs, 1048b8e80941Smrg uint32_t extra_flags, 1049b8e80941Smrg const VkAllocationCallbacks *alloc) 1050b8e80941Smrg{ 1051b8e80941Smrg struct drm_i915_gem_exec_object2 *obj = NULL; 1052b8e80941Smrg 1053b8e80941Smrg if (bo->index < exec->bo_count && exec->bos[bo->index] == bo) 1054b8e80941Smrg obj = &exec->objects[bo->index]; 1055b8e80941Smrg 1056b8e80941Smrg if (obj == NULL) { 1057b8e80941Smrg /* We've never seen this one before. Add it to the list and assign 1058b8e80941Smrg * an id that we can use later. 1059b8e80941Smrg */ 1060b8e80941Smrg if (exec->bo_count >= exec->array_length) { 1061b8e80941Smrg uint32_t new_len = exec->objects ? exec->array_length * 2 : 64; 1062b8e80941Smrg 1063b8e80941Smrg struct drm_i915_gem_exec_object2 *new_objects = 1064b8e80941Smrg vk_alloc(alloc, new_len * sizeof(*new_objects), 1065b8e80941Smrg 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1066b8e80941Smrg if (new_objects == NULL) 1067b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1068b8e80941Smrg 1069b8e80941Smrg struct anv_bo **new_bos = 1070b8e80941Smrg vk_alloc(alloc, new_len * sizeof(*new_bos), 1071b8e80941Smrg 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1072b8e80941Smrg if (new_bos == NULL) { 1073b8e80941Smrg vk_free(alloc, new_objects); 1074b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1075b8e80941Smrg } 1076b8e80941Smrg 1077b8e80941Smrg if (exec->objects) { 1078b8e80941Smrg memcpy(new_objects, exec->objects, 1079b8e80941Smrg exec->bo_count * sizeof(*new_objects)); 1080b8e80941Smrg memcpy(new_bos, exec->bos, 1081b8e80941Smrg exec->bo_count * sizeof(*new_bos)); 1082b8e80941Smrg } 1083b8e80941Smrg 1084b8e80941Smrg vk_free(alloc, exec->objects); 1085b8e80941Smrg vk_free(alloc, exec->bos); 1086b8e80941Smrg 1087b8e80941Smrg exec->objects = new_objects; 1088b8e80941Smrg exec->bos = new_bos; 1089b8e80941Smrg exec->array_length = new_len; 1090b8e80941Smrg } 1091b8e80941Smrg 1092b8e80941Smrg assert(exec->bo_count < exec->array_length); 1093b8e80941Smrg 1094b8e80941Smrg bo->index = exec->bo_count++; 1095b8e80941Smrg obj = &exec->objects[bo->index]; 1096b8e80941Smrg exec->bos[bo->index] = bo; 1097b8e80941Smrg 1098b8e80941Smrg obj->handle = bo->gem_handle; 1099b8e80941Smrg obj->relocation_count = 0; 1100b8e80941Smrg obj->relocs_ptr = 0; 1101b8e80941Smrg obj->alignment = 0; 1102b8e80941Smrg obj->offset = bo->offset; 1103b8e80941Smrg obj->flags = (bo->flags & ~ANV_BO_FLAG_MASK) | extra_flags; 1104b8e80941Smrg obj->rsvd1 = 0; 1105b8e80941Smrg obj->rsvd2 = 0; 1106b8e80941Smrg } 1107b8e80941Smrg 1108b8e80941Smrg if (relocs != NULL) { 1109b8e80941Smrg assert(obj->relocation_count == 0); 1110b8e80941Smrg 1111b8e80941Smrg if (relocs->num_relocs > 0) { 1112b8e80941Smrg /* This is the first time we've ever seen a list of relocations for 1113b8e80941Smrg * this BO. Go ahead and set the relocations and then walk the list 1114b8e80941Smrg * of relocations and add them all. 1115b8e80941Smrg */ 1116b8e80941Smrg exec->has_relocs = true; 1117b8e80941Smrg obj->relocation_count = relocs->num_relocs; 1118b8e80941Smrg obj->relocs_ptr = (uintptr_t) relocs->relocs; 1119b8e80941Smrg 1120b8e80941Smrg for (size_t i = 0; i < relocs->num_relocs; i++) { 1121b8e80941Smrg VkResult result; 1122b8e80941Smrg 1123b8e80941Smrg /* A quick sanity check on relocations */ 1124b8e80941Smrg assert(relocs->relocs[i].offset < bo->size); 1125b8e80941Smrg result = anv_execbuf_add_bo(exec, relocs->reloc_bos[i], NULL, 1126b8e80941Smrg extra_flags, alloc); 1127b8e80941Smrg 1128b8e80941Smrg if (result != VK_SUCCESS) 1129b8e80941Smrg return result; 1130b8e80941Smrg } 1131b8e80941Smrg } 1132b8e80941Smrg 1133b8e80941Smrg return anv_execbuf_add_bo_set(exec, relocs->deps, extra_flags, alloc); 1134b8e80941Smrg } 1135b8e80941Smrg 1136b8e80941Smrg return VK_SUCCESS; 1137b8e80941Smrg} 1138b8e80941Smrg 1139b8e80941Smrg/* Add BO dependencies to execbuf */ 1140b8e80941Smrgstatic VkResult 1141b8e80941Smrganv_execbuf_add_bo_set(struct anv_execbuf *exec, 1142b8e80941Smrg struct set *deps, 1143b8e80941Smrg uint32_t extra_flags, 1144b8e80941Smrg const VkAllocationCallbacks *alloc) 1145b8e80941Smrg{ 1146b8e80941Smrg if (!deps || deps->entries <= 0) 1147b8e80941Smrg return VK_SUCCESS; 1148b8e80941Smrg 1149b8e80941Smrg const uint32_t entries = deps->entries; 1150b8e80941Smrg struct anv_bo **bos = 1151b8e80941Smrg vk_alloc(alloc, entries * sizeof(*bos), 1152b8e80941Smrg 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1153b8e80941Smrg if (bos == NULL) 1154b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1155b8e80941Smrg 1156b8e80941Smrg struct anv_bo **bo = bos; 1157b8e80941Smrg set_foreach(deps, entry) { 1158b8e80941Smrg *bo++ = (void *)entry->key; 1159b8e80941Smrg } 1160b8e80941Smrg 1161b8e80941Smrg qsort(bos, entries, sizeof(struct anv_bo*), _compare_bo_handles); 1162b8e80941Smrg 1163b8e80941Smrg VkResult result = VK_SUCCESS; 1164b8e80941Smrg for (bo = bos; bo < bos + entries; bo++) { 1165b8e80941Smrg result = anv_execbuf_add_bo(exec, *bo, NULL, extra_flags, alloc); 1166b8e80941Smrg if (result != VK_SUCCESS) 1167b8e80941Smrg break; 1168b8e80941Smrg } 1169b8e80941Smrg 1170b8e80941Smrg vk_free(alloc, bos); 1171b8e80941Smrg 1172b8e80941Smrg return result; 1173b8e80941Smrg} 1174b8e80941Smrg 1175b8e80941Smrgstatic VkResult 1176b8e80941Smrganv_execbuf_add_syncobj(struct anv_execbuf *exec, 1177b8e80941Smrg uint32_t handle, uint32_t flags, 1178b8e80941Smrg const VkAllocationCallbacks *alloc) 1179b8e80941Smrg{ 1180b8e80941Smrg assert(flags != 0); 1181b8e80941Smrg 1182b8e80941Smrg if (exec->fence_count >= exec->fence_array_length) { 1183b8e80941Smrg uint32_t new_len = MAX2(exec->fence_array_length * 2, 64); 1184b8e80941Smrg 1185b8e80941Smrg exec->fences = vk_realloc(alloc, exec->fences, 1186b8e80941Smrg new_len * sizeof(*exec->fences), 1187b8e80941Smrg 8, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND); 1188b8e80941Smrg if (exec->fences == NULL) 1189b8e80941Smrg return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1190b8e80941Smrg 1191b8e80941Smrg exec->fence_array_length = new_len; 1192b8e80941Smrg } 1193b8e80941Smrg 1194b8e80941Smrg exec->fences[exec->fence_count] = (struct drm_i915_gem_exec_fence) { 1195b8e80941Smrg .handle = handle, 1196b8e80941Smrg .flags = flags, 1197b8e80941Smrg }; 1198b8e80941Smrg 1199b8e80941Smrg exec->fence_count++; 1200b8e80941Smrg 1201b8e80941Smrg return VK_SUCCESS; 1202b8e80941Smrg} 1203b8e80941Smrg 1204b8e80941Smrgstatic void 1205b8e80941Smrganv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, 1206b8e80941Smrg struct anv_reloc_list *list) 1207b8e80941Smrg{ 1208b8e80941Smrg for (size_t i = 0; i < list->num_relocs; i++) 1209b8e80941Smrg list->relocs[i].target_handle = list->reloc_bos[i]->index; 1210b8e80941Smrg} 1211b8e80941Smrg 1212b8e80941Smrgstatic void 1213b8e80941Smrgadjust_relocations_from_state_pool(struct anv_state_pool *pool, 1214b8e80941Smrg struct anv_reloc_list *relocs, 1215b8e80941Smrg uint32_t last_pool_center_bo_offset) 1216b8e80941Smrg{ 1217b8e80941Smrg assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset); 1218b8e80941Smrg uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset; 1219b8e80941Smrg 1220b8e80941Smrg for (size_t i = 0; i < relocs->num_relocs; i++) { 1221b8e80941Smrg /* All of the relocations from this block pool to other BO's should 1222b8e80941Smrg * have been emitted relative to the surface block pool center. We 1223b8e80941Smrg * need to add the center offset to make them relative to the 1224b8e80941Smrg * beginning of the actual GEM bo. 1225b8e80941Smrg */ 1226b8e80941Smrg relocs->relocs[i].offset += delta; 1227b8e80941Smrg } 1228b8e80941Smrg} 1229b8e80941Smrg 1230b8e80941Smrgstatic void 1231b8e80941Smrgadjust_relocations_to_state_pool(struct anv_state_pool *pool, 1232b8e80941Smrg struct anv_bo *from_bo, 1233b8e80941Smrg struct anv_reloc_list *relocs, 1234b8e80941Smrg uint32_t last_pool_center_bo_offset) 1235b8e80941Smrg{ 1236b8e80941Smrg assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset); 1237b8e80941Smrg uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset; 1238b8e80941Smrg 1239b8e80941Smrg /* When we initially emit relocations into a block pool, we don't 1240b8e80941Smrg * actually know what the final center_bo_offset will be so we just emit 1241b8e80941Smrg * it as if center_bo_offset == 0. Now that we know what the center 1242b8e80941Smrg * offset is, we need to walk the list of relocations and adjust any 1243b8e80941Smrg * relocations that point to the pool bo with the correct offset. 1244b8e80941Smrg */ 1245b8e80941Smrg for (size_t i = 0; i < relocs->num_relocs; i++) { 1246b8e80941Smrg if (relocs->reloc_bos[i] == pool->block_pool.bo) { 1247b8e80941Smrg /* Adjust the delta value in the relocation to correctly 1248b8e80941Smrg * correspond to the new delta. Initially, this value may have 1249b8e80941Smrg * been negative (if treated as unsigned), but we trust in 1250b8e80941Smrg * uint32_t roll-over to fix that for us at this point. 1251b8e80941Smrg */ 1252b8e80941Smrg relocs->relocs[i].delta += delta; 1253b8e80941Smrg 1254b8e80941Smrg /* Since the delta has changed, we need to update the actual 1255b8e80941Smrg * relocated value with the new presumed value. This function 1256b8e80941Smrg * should only be called on batch buffers, so we know it isn't in 1257b8e80941Smrg * use by the GPU at the moment. 1258b8e80941Smrg */ 1259b8e80941Smrg assert(relocs->relocs[i].offset < from_bo->size); 1260b8e80941Smrg write_reloc(pool->block_pool.device, 1261b8e80941Smrg from_bo->map + relocs->relocs[i].offset, 1262b8e80941Smrg relocs->relocs[i].presumed_offset + 1263b8e80941Smrg relocs->relocs[i].delta, false); 1264b8e80941Smrg } 1265b8e80941Smrg } 1266b8e80941Smrg} 1267b8e80941Smrg 1268b8e80941Smrgstatic void 1269b8e80941Smrganv_reloc_list_apply(struct anv_device *device, 1270b8e80941Smrg struct anv_reloc_list *list, 1271b8e80941Smrg struct anv_bo *bo, 1272b8e80941Smrg bool always_relocate) 1273b8e80941Smrg{ 1274b8e80941Smrg for (size_t i = 0; i < list->num_relocs; i++) { 1275b8e80941Smrg struct anv_bo *target_bo = list->reloc_bos[i]; 1276b8e80941Smrg if (list->relocs[i].presumed_offset == target_bo->offset && 1277b8e80941Smrg !always_relocate) 1278b8e80941Smrg continue; 1279b8e80941Smrg 1280b8e80941Smrg void *p = bo->map + list->relocs[i].offset; 1281b8e80941Smrg write_reloc(device, p, target_bo->offset + list->relocs[i].delta, true); 1282b8e80941Smrg list->relocs[i].presumed_offset = target_bo->offset; 1283b8e80941Smrg } 1284b8e80941Smrg} 1285b8e80941Smrg 1286b8e80941Smrg/** 1287b8e80941Smrg * This function applies the relocation for a command buffer and writes the 1288b8e80941Smrg * actual addresses into the buffers as per what we were told by the kernel on 1289b8e80941Smrg * the previous execbuf2 call. This should be safe to do because, for each 1290b8e80941Smrg * relocated address, we have two cases: 1291b8e80941Smrg * 1292b8e80941Smrg * 1) The target BO is inactive (as seen by the kernel). In this case, it is 1293b8e80941Smrg * not in use by the GPU so updating the address is 100% ok. It won't be 1294b8e80941Smrg * in-use by the GPU (from our context) again until the next execbuf2 1295b8e80941Smrg * happens. If the kernel decides to move it in the next execbuf2, it 1296b8e80941Smrg * will have to do the relocations itself, but that's ok because it should 1297b8e80941Smrg * have all of the information needed to do so. 1298b8e80941Smrg * 1299b8e80941Smrg * 2) The target BO is active (as seen by the kernel). In this case, it 1300b8e80941Smrg * hasn't moved since the last execbuffer2 call because GTT shuffling 1301b8e80941Smrg * *only* happens when the BO is idle. (From our perspective, it only 1302b8e80941Smrg * happens inside the execbuffer2 ioctl, but the shuffling may be 1303b8e80941Smrg * triggered by another ioctl, with full-ppgtt this is limited to only 1304b8e80941Smrg * execbuffer2 ioctls on the same context, or memory pressure.) Since the 1305b8e80941Smrg * target BO hasn't moved, our anv_bo::offset exactly matches the BO's GTT 1306b8e80941Smrg * address and the relocated value we are writing into the BO will be the 1307b8e80941Smrg * same as the value that is already there. 1308b8e80941Smrg * 1309b8e80941Smrg * There is also a possibility that the target BO is active but the exact 1310b8e80941Smrg * RENDER_SURFACE_STATE object we are writing the relocation into isn't in 1311b8e80941Smrg * use. In this case, the address currently in the RENDER_SURFACE_STATE 1312b8e80941Smrg * may be stale but it's still safe to write the relocation because that 1313b8e80941Smrg * particular RENDER_SURFACE_STATE object isn't in-use by the GPU and 1314b8e80941Smrg * won't be until the next execbuf2 call. 1315b8e80941Smrg * 1316b8e80941Smrg * By doing relocations on the CPU, we can tell the kernel that it doesn't 1317b8e80941Smrg * need to bother. We want to do this because the surface state buffer is 1318b8e80941Smrg * used by every command buffer so, if the kernel does the relocations, it 1319b8e80941Smrg * will always be busy and the kernel will always stall. This is also 1320b8e80941Smrg * probably the fastest mechanism for doing relocations since the kernel would 1321b8e80941Smrg * have to make a full copy of all the relocations lists. 1322b8e80941Smrg */ 1323b8e80941Smrgstatic bool 1324b8e80941Smrgrelocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer, 1325b8e80941Smrg struct anv_execbuf *exec) 1326b8e80941Smrg{ 1327b8e80941Smrg if (!exec->has_relocs) 1328b8e80941Smrg return true; 1329b8e80941Smrg 1330b8e80941Smrg static int userspace_relocs = -1; 1331b8e80941Smrg if (userspace_relocs < 0) 1332b8e80941Smrg userspace_relocs = env_var_as_boolean("ANV_USERSPACE_RELOCS", true); 1333b8e80941Smrg if (!userspace_relocs) 1334b8e80941Smrg return false; 1335b8e80941Smrg 1336b8e80941Smrg /* First, we have to check to see whether or not we can even do the 1337b8e80941Smrg * relocation. New buffers which have never been submitted to the kernel 1338b8e80941Smrg * don't have a valid offset so we need to let the kernel do relocations so 1339b8e80941Smrg * that we can get offsets for them. On future execbuf2 calls, those 1340b8e80941Smrg * buffers will have offsets and we will be able to skip relocating. 1341b8e80941Smrg * Invalid offsets are indicated by anv_bo::offset == (uint64_t)-1. 1342b8e80941Smrg */ 1343b8e80941Smrg for (uint32_t i = 0; i < exec->bo_count; i++) { 1344b8e80941Smrg if (exec->bos[i]->offset == (uint64_t)-1) 1345b8e80941Smrg return false; 1346b8e80941Smrg } 1347b8e80941Smrg 1348b8e80941Smrg /* Since surface states are shared between command buffers and we don't 1349b8e80941Smrg * know what order they will be submitted to the kernel, we don't know 1350b8e80941Smrg * what address is actually written in the surface state object at any 1351b8e80941Smrg * given time. The only option is to always relocate them. 1352b8e80941Smrg */ 1353b8e80941Smrg anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs, 1354b8e80941Smrg cmd_buffer->device->surface_state_pool.block_pool.bo, 1355b8e80941Smrg true /* always relocate surface states */); 1356b8e80941Smrg 1357b8e80941Smrg /* Since we own all of the batch buffers, we know what values are stored 1358b8e80941Smrg * in the relocated addresses and only have to update them if the offsets 1359b8e80941Smrg * have changed. 1360b8e80941Smrg */ 1361b8e80941Smrg struct anv_batch_bo **bbo; 1362b8e80941Smrg u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { 1363b8e80941Smrg anv_reloc_list_apply(cmd_buffer->device, 1364b8e80941Smrg &(*bbo)->relocs, &(*bbo)->bo, false); 1365b8e80941Smrg } 1366b8e80941Smrg 1367b8e80941Smrg for (uint32_t i = 0; i < exec->bo_count; i++) 1368b8e80941Smrg exec->objects[i].offset = exec->bos[i]->offset; 1369b8e80941Smrg 1370b8e80941Smrg return true; 1371b8e80941Smrg} 1372b8e80941Smrg 1373b8e80941Smrgstatic VkResult 1374b8e80941Smrgsetup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, 1375b8e80941Smrg struct anv_cmd_buffer *cmd_buffer) 1376b8e80941Smrg{ 1377b8e80941Smrg struct anv_batch *batch = &cmd_buffer->batch; 1378b8e80941Smrg struct anv_state_pool *ss_pool = 1379b8e80941Smrg &cmd_buffer->device->surface_state_pool; 1380b8e80941Smrg 1381b8e80941Smrg adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs, 1382b8e80941Smrg cmd_buffer->last_ss_pool_center); 1383b8e80941Smrg VkResult result; 1384b8e80941Smrg struct anv_bo *bo; 1385b8e80941Smrg if (cmd_buffer->device->instance->physicalDevice.use_softpin) { 1386b8e80941Smrg anv_block_pool_foreach_bo(bo, &ss_pool->block_pool) { 1387b8e80941Smrg result = anv_execbuf_add_bo(execbuf, bo, NULL, 0, 1388b8e80941Smrg &cmd_buffer->device->alloc); 1389b8e80941Smrg if (result != VK_SUCCESS) 1390b8e80941Smrg return result; 1391b8e80941Smrg } 1392b8e80941Smrg /* Add surface dependencies (BOs) to the execbuf */ 1393b8e80941Smrg anv_execbuf_add_bo_set(execbuf, cmd_buffer->surface_relocs.deps, 0, 1394b8e80941Smrg &cmd_buffer->device->alloc); 1395b8e80941Smrg 1396b8e80941Smrg /* Add the BOs for all memory objects */ 1397b8e80941Smrg list_for_each_entry(struct anv_device_memory, mem, 1398b8e80941Smrg &cmd_buffer->device->memory_objects, link) { 1399b8e80941Smrg result = anv_execbuf_add_bo(execbuf, mem->bo, NULL, 0, 1400b8e80941Smrg &cmd_buffer->device->alloc); 1401b8e80941Smrg if (result != VK_SUCCESS) 1402b8e80941Smrg return result; 1403b8e80941Smrg } 1404b8e80941Smrg 1405b8e80941Smrg struct anv_block_pool *pool; 1406b8e80941Smrg pool = &cmd_buffer->device->dynamic_state_pool.block_pool; 1407b8e80941Smrg anv_block_pool_foreach_bo(bo, pool) { 1408b8e80941Smrg result = anv_execbuf_add_bo(execbuf, bo, NULL, 0, 1409b8e80941Smrg &cmd_buffer->device->alloc); 1410b8e80941Smrg if (result != VK_SUCCESS) 1411b8e80941Smrg return result; 1412b8e80941Smrg } 1413b8e80941Smrg 1414b8e80941Smrg pool = &cmd_buffer->device->instruction_state_pool.block_pool; 1415b8e80941Smrg anv_block_pool_foreach_bo(bo, pool) { 1416b8e80941Smrg result = anv_execbuf_add_bo(execbuf, bo, NULL, 0, 1417b8e80941Smrg &cmd_buffer->device->alloc); 1418b8e80941Smrg if (result != VK_SUCCESS) 1419b8e80941Smrg return result; 1420b8e80941Smrg } 1421b8e80941Smrg 1422b8e80941Smrg pool = &cmd_buffer->device->binding_table_pool.block_pool; 1423b8e80941Smrg anv_block_pool_foreach_bo(bo, pool) { 1424b8e80941Smrg result = anv_execbuf_add_bo(execbuf, bo, NULL, 0, 1425b8e80941Smrg &cmd_buffer->device->alloc); 1426b8e80941Smrg if (result != VK_SUCCESS) 1427b8e80941Smrg return result; 1428b8e80941Smrg } 1429b8e80941Smrg } else { 1430b8e80941Smrg /* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs 1431b8e80941Smrg * will get added automatically by processing relocations on the batch 1432b8e80941Smrg * buffer. We have to add the surface state BO manually because it has 1433b8e80941Smrg * relocations of its own that we need to be sure are processsed. 1434b8e80941Smrg */ 1435b8e80941Smrg result = anv_execbuf_add_bo(execbuf, ss_pool->block_pool.bo, 1436b8e80941Smrg &cmd_buffer->surface_relocs, 0, 1437b8e80941Smrg &cmd_buffer->device->alloc); 1438b8e80941Smrg if (result != VK_SUCCESS) 1439b8e80941Smrg return result; 1440b8e80941Smrg } 1441b8e80941Smrg 1442b8e80941Smrg /* First, we walk over all of the bos we've seen and add them and their 1443b8e80941Smrg * relocations to the validate list. 1444b8e80941Smrg */ 1445b8e80941Smrg struct anv_batch_bo **bbo; 1446b8e80941Smrg u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { 1447b8e80941Smrg adjust_relocations_to_state_pool(ss_pool, &(*bbo)->bo, &(*bbo)->relocs, 1448b8e80941Smrg cmd_buffer->last_ss_pool_center); 1449b8e80941Smrg 1450b8e80941Smrg result = anv_execbuf_add_bo(execbuf, &(*bbo)->bo, &(*bbo)->relocs, 0, 1451b8e80941Smrg &cmd_buffer->device->alloc); 1452b8e80941Smrg if (result != VK_SUCCESS) 1453b8e80941Smrg return result; 1454b8e80941Smrg } 1455b8e80941Smrg 1456b8e80941Smrg /* Now that we've adjusted all of the surface state relocations, we need to 1457b8e80941Smrg * record the surface state pool center so future executions of the command 1458b8e80941Smrg * buffer can adjust correctly. 1459b8e80941Smrg */ 1460b8e80941Smrg cmd_buffer->last_ss_pool_center = ss_pool->block_pool.center_bo_offset; 1461b8e80941Smrg 1462b8e80941Smrg struct anv_batch_bo *first_batch_bo = 1463b8e80941Smrg list_first_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); 1464b8e80941Smrg 1465b8e80941Smrg /* The kernel requires that the last entry in the validation list be the 1466b8e80941Smrg * batch buffer to execute. We can simply swap the element 1467b8e80941Smrg * corresponding to the first batch_bo in the chain with the last 1468b8e80941Smrg * element in the list. 1469b8e80941Smrg */ 1470b8e80941Smrg if (first_batch_bo->bo.index != execbuf->bo_count - 1) { 1471b8e80941Smrg uint32_t idx = first_batch_bo->bo.index; 1472b8e80941Smrg uint32_t last_idx = execbuf->bo_count - 1; 1473b8e80941Smrg 1474b8e80941Smrg struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; 1475b8e80941Smrg assert(execbuf->bos[idx] == &first_batch_bo->bo); 1476b8e80941Smrg 1477b8e80941Smrg execbuf->objects[idx] = execbuf->objects[last_idx]; 1478b8e80941Smrg execbuf->bos[idx] = execbuf->bos[last_idx]; 1479b8e80941Smrg execbuf->bos[idx]->index = idx; 1480b8e80941Smrg 1481b8e80941Smrg execbuf->objects[last_idx] = tmp_obj; 1482b8e80941Smrg execbuf->bos[last_idx] = &first_batch_bo->bo; 1483b8e80941Smrg first_batch_bo->bo.index = last_idx; 1484b8e80941Smrg } 1485b8e80941Smrg 1486b8e80941Smrg /* If we are pinning our BOs, we shouldn't have to relocate anything */ 1487b8e80941Smrg if (cmd_buffer->device->instance->physicalDevice.use_softpin) 1488b8e80941Smrg assert(!execbuf->has_relocs); 1489b8e80941Smrg 1490b8e80941Smrg /* Now we go through and fixup all of the relocation lists to point to 1491b8e80941Smrg * the correct indices in the object array. We have to do this after we 1492b8e80941Smrg * reorder the list above as some of the indices may have changed. 1493b8e80941Smrg */ 1494b8e80941Smrg if (execbuf->has_relocs) { 1495b8e80941Smrg u_vector_foreach(bbo, &cmd_buffer->seen_bbos) 1496b8e80941Smrg anv_cmd_buffer_process_relocs(cmd_buffer, &(*bbo)->relocs); 1497b8e80941Smrg 1498b8e80941Smrg anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs); 1499b8e80941Smrg } 1500b8e80941Smrg 1501b8e80941Smrg if (!cmd_buffer->device->info.has_llc) { 1502b8e80941Smrg __builtin_ia32_mfence(); 1503b8e80941Smrg u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { 1504b8e80941Smrg for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) 1505b8e80941Smrg __builtin_ia32_clflush((*bbo)->bo.map + i); 1506b8e80941Smrg } 1507b8e80941Smrg } 1508b8e80941Smrg 1509b8e80941Smrg execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { 1510b8e80941Smrg .buffers_ptr = (uintptr_t) execbuf->objects, 1511b8e80941Smrg .buffer_count = execbuf->bo_count, 1512b8e80941Smrg .batch_start_offset = 0, 1513b8e80941Smrg .batch_len = batch->next - batch->start, 1514b8e80941Smrg .cliprects_ptr = 0, 1515b8e80941Smrg .num_cliprects = 0, 1516b8e80941Smrg .DR1 = 0, 1517b8e80941Smrg .DR4 = 0, 1518b8e80941Smrg .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, 1519b8e80941Smrg .rsvd1 = cmd_buffer->device->context_id, 1520b8e80941Smrg .rsvd2 = 0, 1521b8e80941Smrg }; 1522b8e80941Smrg 1523b8e80941Smrg if (relocate_cmd_buffer(cmd_buffer, execbuf)) { 1524b8e80941Smrg /* If we were able to successfully relocate everything, tell the kernel 1525b8e80941Smrg * that it can skip doing relocations. The requirement for using 1526b8e80941Smrg * NO_RELOC is: 1527b8e80941Smrg * 1528b8e80941Smrg * 1) The addresses written in the objects must match the corresponding 1529b8e80941Smrg * reloc.presumed_offset which in turn must match the corresponding 1530b8e80941Smrg * execobject.offset. 1531b8e80941Smrg * 1532b8e80941Smrg * 2) To avoid stalling, execobject.offset should match the current 1533b8e80941Smrg * address of that object within the active context. 1534b8e80941Smrg * 1535b8e80941Smrg * In order to satisfy all of the invariants that make userspace 1536b8e80941Smrg * relocations to be safe (see relocate_cmd_buffer()), we need to 1537b8e80941Smrg * further ensure that the addresses we use match those used by the 1538b8e80941Smrg * kernel for the most recent execbuf2. 1539b8e80941Smrg * 1540b8e80941Smrg * The kernel may still choose to do relocations anyway if something has 1541b8e80941Smrg * moved in the GTT. In this case, the relocation list still needs to be 1542b8e80941Smrg * valid. All relocations on the batch buffers are already valid and 1543b8e80941Smrg * kept up-to-date. For surface state relocations, by applying the 1544b8e80941Smrg * relocations in relocate_cmd_buffer, we ensured that the address in 1545b8e80941Smrg * the RENDER_SURFACE_STATE matches presumed_offset, so it should be 1546b8e80941Smrg * safe for the kernel to relocate them as needed. 1547b8e80941Smrg */ 1548b8e80941Smrg execbuf->execbuf.flags |= I915_EXEC_NO_RELOC; 1549b8e80941Smrg } else { 1550b8e80941Smrg /* In the case where we fall back to doing kernel relocations, we need 1551b8e80941Smrg * to ensure that the relocation list is valid. All relocations on the 1552b8e80941Smrg * batch buffers are already valid and kept up-to-date. Since surface 1553b8e80941Smrg * states are shared between command buffers and we don't know what 1554b8e80941Smrg * order they will be submitted to the kernel, we don't know what 1555b8e80941Smrg * address is actually written in the surface state object at any given 1556b8e80941Smrg * time. The only option is to set a bogus presumed offset and let the 1557b8e80941Smrg * kernel relocate them. 1558b8e80941Smrg */ 1559b8e80941Smrg for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++) 1560b8e80941Smrg cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1; 1561b8e80941Smrg } 1562b8e80941Smrg 1563b8e80941Smrg return VK_SUCCESS; 1564b8e80941Smrg} 1565b8e80941Smrg 1566b8e80941Smrgstatic VkResult 1567b8e80941Smrgsetup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_device *device) 1568b8e80941Smrg{ 1569b8e80941Smrg VkResult result = anv_execbuf_add_bo(execbuf, &device->trivial_batch_bo, 1570b8e80941Smrg NULL, 0, &device->alloc); 1571b8e80941Smrg if (result != VK_SUCCESS) 1572b8e80941Smrg return result; 1573b8e80941Smrg 1574b8e80941Smrg execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { 1575b8e80941Smrg .buffers_ptr = (uintptr_t) execbuf->objects, 1576b8e80941Smrg .buffer_count = execbuf->bo_count, 1577b8e80941Smrg .batch_start_offset = 0, 1578b8e80941Smrg .batch_len = 8, /* GEN7_MI_BATCH_BUFFER_END and NOOP */ 1579b8e80941Smrg .flags = I915_EXEC_HANDLE_LUT | I915_EXEC_RENDER, 1580b8e80941Smrg .rsvd1 = device->context_id, 1581b8e80941Smrg .rsvd2 = 0, 1582b8e80941Smrg }; 1583b8e80941Smrg 1584b8e80941Smrg return VK_SUCCESS; 1585b8e80941Smrg} 1586b8e80941Smrg 1587b8e80941SmrgVkResult 1588b8e80941Smrganv_cmd_buffer_execbuf(struct anv_device *device, 1589b8e80941Smrg struct anv_cmd_buffer *cmd_buffer, 1590b8e80941Smrg const VkSemaphore *in_semaphores, 1591b8e80941Smrg uint32_t num_in_semaphores, 1592b8e80941Smrg const VkSemaphore *out_semaphores, 1593b8e80941Smrg uint32_t num_out_semaphores, 1594b8e80941Smrg VkFence _fence) 1595b8e80941Smrg{ 1596b8e80941Smrg ANV_FROM_HANDLE(anv_fence, fence, _fence); 1597b8e80941Smrg 1598b8e80941Smrg struct anv_execbuf execbuf; 1599b8e80941Smrg anv_execbuf_init(&execbuf); 1600b8e80941Smrg 1601b8e80941Smrg int in_fence = -1; 1602b8e80941Smrg VkResult result = VK_SUCCESS; 1603b8e80941Smrg for (uint32_t i = 0; i < num_in_semaphores; i++) { 1604b8e80941Smrg ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]); 1605b8e80941Smrg struct anv_semaphore_impl *impl = 1606b8e80941Smrg semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? 1607b8e80941Smrg &semaphore->temporary : &semaphore->permanent; 1608b8e80941Smrg 1609b8e80941Smrg switch (impl->type) { 1610b8e80941Smrg case ANV_SEMAPHORE_TYPE_BO: 1611b8e80941Smrg result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL, 1612b8e80941Smrg 0, &device->alloc); 1613b8e80941Smrg if (result != VK_SUCCESS) 1614b8e80941Smrg return result; 1615b8e80941Smrg break; 1616b8e80941Smrg 1617b8e80941Smrg case ANV_SEMAPHORE_TYPE_SYNC_FILE: 1618b8e80941Smrg if (in_fence == -1) { 1619b8e80941Smrg in_fence = impl->fd; 1620b8e80941Smrg } else { 1621b8e80941Smrg int merge = anv_gem_sync_file_merge(device, in_fence, impl->fd); 1622b8e80941Smrg if (merge == -1) 1623b8e80941Smrg return vk_error(VK_ERROR_INVALID_EXTERNAL_HANDLE); 1624b8e80941Smrg 1625b8e80941Smrg close(impl->fd); 1626b8e80941Smrg close(in_fence); 1627b8e80941Smrg in_fence = merge; 1628b8e80941Smrg } 1629b8e80941Smrg 1630b8e80941Smrg impl->fd = -1; 1631b8e80941Smrg break; 1632b8e80941Smrg 1633b8e80941Smrg case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: 1634b8e80941Smrg result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj, 1635b8e80941Smrg I915_EXEC_FENCE_WAIT, 1636b8e80941Smrg &device->alloc); 1637b8e80941Smrg if (result != VK_SUCCESS) 1638b8e80941Smrg return result; 1639b8e80941Smrg break; 1640b8e80941Smrg 1641b8e80941Smrg default: 1642b8e80941Smrg break; 1643b8e80941Smrg } 1644b8e80941Smrg } 1645b8e80941Smrg 1646b8e80941Smrg bool need_out_fence = false; 1647b8e80941Smrg for (uint32_t i = 0; i < num_out_semaphores; i++) { 1648b8e80941Smrg ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]); 1649b8e80941Smrg 1650b8e80941Smrg /* Under most circumstances, out fences won't be temporary. However, 1651b8e80941Smrg * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec: 1652b8e80941Smrg * 1653b8e80941Smrg * "If the import is temporary, the implementation must restore the 1654b8e80941Smrg * semaphore to its prior permanent state after submitting the next 1655b8e80941Smrg * semaphore wait operation." 1656b8e80941Smrg * 1657b8e80941Smrg * The spec says nothing whatsoever about signal operations on 1658b8e80941Smrg * temporarily imported semaphores so it appears they are allowed. 1659b8e80941Smrg * There are also CTS tests that require this to work. 1660b8e80941Smrg */ 1661b8e80941Smrg struct anv_semaphore_impl *impl = 1662b8e80941Smrg semaphore->temporary.type != ANV_SEMAPHORE_TYPE_NONE ? 1663b8e80941Smrg &semaphore->temporary : &semaphore->permanent; 1664b8e80941Smrg 1665b8e80941Smrg switch (impl->type) { 1666b8e80941Smrg case ANV_SEMAPHORE_TYPE_BO: 1667b8e80941Smrg result = anv_execbuf_add_bo(&execbuf, impl->bo, NULL, 1668b8e80941Smrg EXEC_OBJECT_WRITE, &device->alloc); 1669b8e80941Smrg if (result != VK_SUCCESS) 1670b8e80941Smrg return result; 1671b8e80941Smrg break; 1672b8e80941Smrg 1673b8e80941Smrg case ANV_SEMAPHORE_TYPE_SYNC_FILE: 1674b8e80941Smrg need_out_fence = true; 1675b8e80941Smrg break; 1676b8e80941Smrg 1677b8e80941Smrg case ANV_SEMAPHORE_TYPE_DRM_SYNCOBJ: 1678b8e80941Smrg result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj, 1679b8e80941Smrg I915_EXEC_FENCE_SIGNAL, 1680b8e80941Smrg &device->alloc); 1681b8e80941Smrg if (result != VK_SUCCESS) 1682b8e80941Smrg return result; 1683b8e80941Smrg break; 1684b8e80941Smrg 1685b8e80941Smrg default: 1686b8e80941Smrg break; 1687b8e80941Smrg } 1688b8e80941Smrg } 1689b8e80941Smrg 1690b8e80941Smrg if (fence) { 1691b8e80941Smrg /* Under most circumstances, out fences won't be temporary. However, 1692b8e80941Smrg * the spec does allow it for opaque_fd. From the Vulkan 1.0.53 spec: 1693b8e80941Smrg * 1694b8e80941Smrg * "If the import is temporary, the implementation must restore the 1695b8e80941Smrg * semaphore to its prior permanent state after submitting the next 1696b8e80941Smrg * semaphore wait operation." 1697b8e80941Smrg * 1698b8e80941Smrg * The spec says nothing whatsoever about signal operations on 1699b8e80941Smrg * temporarily imported semaphores so it appears they are allowed. 1700b8e80941Smrg * There are also CTS tests that require this to work. 1701b8e80941Smrg */ 1702b8e80941Smrg struct anv_fence_impl *impl = 1703b8e80941Smrg fence->temporary.type != ANV_FENCE_TYPE_NONE ? 1704b8e80941Smrg &fence->temporary : &fence->permanent; 1705b8e80941Smrg 1706b8e80941Smrg switch (impl->type) { 1707b8e80941Smrg case ANV_FENCE_TYPE_BO: 1708b8e80941Smrg result = anv_execbuf_add_bo(&execbuf, &impl->bo.bo, NULL, 1709b8e80941Smrg EXEC_OBJECT_WRITE, &device->alloc); 1710b8e80941Smrg if (result != VK_SUCCESS) 1711b8e80941Smrg return result; 1712b8e80941Smrg break; 1713b8e80941Smrg 1714b8e80941Smrg case ANV_FENCE_TYPE_SYNCOBJ: 1715b8e80941Smrg result = anv_execbuf_add_syncobj(&execbuf, impl->syncobj, 1716b8e80941Smrg I915_EXEC_FENCE_SIGNAL, 1717b8e80941Smrg &device->alloc); 1718b8e80941Smrg if (result != VK_SUCCESS) 1719b8e80941Smrg return result; 1720b8e80941Smrg break; 1721b8e80941Smrg 1722b8e80941Smrg default: 1723b8e80941Smrg unreachable("Invalid fence type"); 1724b8e80941Smrg } 1725b8e80941Smrg } 1726b8e80941Smrg 1727b8e80941Smrg if (cmd_buffer) { 1728b8e80941Smrg if (unlikely(INTEL_DEBUG & DEBUG_BATCH)) { 1729b8e80941Smrg struct anv_batch_bo **bo = u_vector_head(&cmd_buffer->seen_bbos); 1730b8e80941Smrg 1731b8e80941Smrg device->cmd_buffer_being_decoded = cmd_buffer; 1732b8e80941Smrg gen_print_batch(&device->decoder_ctx, (*bo)->bo.map, 1733b8e80941Smrg (*bo)->bo.size, (*bo)->bo.offset, false); 1734b8e80941Smrg device->cmd_buffer_being_decoded = NULL; 1735b8e80941Smrg } 1736b8e80941Smrg 1737b8e80941Smrg result = setup_execbuf_for_cmd_buffer(&execbuf, cmd_buffer); 1738b8e80941Smrg } else { 1739b8e80941Smrg result = setup_empty_execbuf(&execbuf, device); 1740b8e80941Smrg } 1741b8e80941Smrg 1742b8e80941Smrg if (result != VK_SUCCESS) 1743b8e80941Smrg return result; 1744b8e80941Smrg 1745b8e80941Smrg if (execbuf.fence_count > 0) { 1746b8e80941Smrg assert(device->instance->physicalDevice.has_syncobj); 1747b8e80941Smrg execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY; 1748b8e80941Smrg execbuf.execbuf.num_cliprects = execbuf.fence_count; 1749b8e80941Smrg execbuf.execbuf.cliprects_ptr = (uintptr_t) execbuf.fences; 1750b8e80941Smrg } 1751b8e80941Smrg 1752b8e80941Smrg if (in_fence != -1) { 1753b8e80941Smrg execbuf.execbuf.flags |= I915_EXEC_FENCE_IN; 1754b8e80941Smrg execbuf.execbuf.rsvd2 |= (uint32_t)in_fence; 1755b8e80941Smrg } 1756b8e80941Smrg 1757b8e80941Smrg if (need_out_fence) 1758b8e80941Smrg execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT; 1759b8e80941Smrg 1760b8e80941Smrg result = anv_device_execbuf(device, &execbuf.execbuf, execbuf.bos); 1761b8e80941Smrg 1762b8e80941Smrg /* Execbuf does not consume the in_fence. It's our job to close it. */ 1763b8e80941Smrg if (in_fence != -1) 1764b8e80941Smrg close(in_fence); 1765b8e80941Smrg 1766b8e80941Smrg for (uint32_t i = 0; i < num_in_semaphores; i++) { 1767b8e80941Smrg ANV_FROM_HANDLE(anv_semaphore, semaphore, in_semaphores[i]); 1768b8e80941Smrg /* From the Vulkan 1.0.53 spec: 1769b8e80941Smrg * 1770b8e80941Smrg * "If the import is temporary, the implementation must restore the 1771b8e80941Smrg * semaphore to its prior permanent state after submitting the next 1772b8e80941Smrg * semaphore wait operation." 1773b8e80941Smrg * 1774b8e80941Smrg * This has to happen after the execbuf in case we close any syncobjs in 1775b8e80941Smrg * the process. 1776b8e80941Smrg */ 1777b8e80941Smrg anv_semaphore_reset_temporary(device, semaphore); 1778b8e80941Smrg } 1779b8e80941Smrg 1780b8e80941Smrg if (fence && fence->permanent.type == ANV_FENCE_TYPE_BO) { 1781b8e80941Smrg /* BO fences can't be shared, so they can't be temporary. */ 1782b8e80941Smrg assert(fence->temporary.type == ANV_FENCE_TYPE_NONE); 1783b8e80941Smrg 1784b8e80941Smrg /* Once the execbuf has returned, we need to set the fence state to 1785b8e80941Smrg * SUBMITTED. We can't do this before calling execbuf because 1786b8e80941Smrg * anv_GetFenceStatus does take the global device lock before checking 1787b8e80941Smrg * fence->state. 1788b8e80941Smrg * 1789b8e80941Smrg * We set the fence state to SUBMITTED regardless of whether or not the 1790b8e80941Smrg * execbuf succeeds because we need to ensure that vkWaitForFences() and 1791b8e80941Smrg * vkGetFenceStatus() return a valid result (VK_ERROR_DEVICE_LOST or 1792b8e80941Smrg * VK_SUCCESS) in a finite amount of time even if execbuf fails. 1793b8e80941Smrg */ 1794b8e80941Smrg fence->permanent.bo.state = ANV_BO_FENCE_STATE_SUBMITTED; 1795b8e80941Smrg } 1796b8e80941Smrg 1797b8e80941Smrg if (result == VK_SUCCESS && need_out_fence) { 1798b8e80941Smrg int out_fence = execbuf.execbuf.rsvd2 >> 32; 1799b8e80941Smrg for (uint32_t i = 0; i < num_out_semaphores; i++) { 1800b8e80941Smrg ANV_FROM_HANDLE(anv_semaphore, semaphore, out_semaphores[i]); 1801b8e80941Smrg /* Out fences can't have temporary state because that would imply 1802b8e80941Smrg * that we imported a sync file and are trying to signal it. 1803b8e80941Smrg */ 1804b8e80941Smrg assert(semaphore->temporary.type == ANV_SEMAPHORE_TYPE_NONE); 1805b8e80941Smrg struct anv_semaphore_impl *impl = &semaphore->permanent; 1806b8e80941Smrg 1807b8e80941Smrg if (impl->type == ANV_SEMAPHORE_TYPE_SYNC_FILE) { 1808b8e80941Smrg assert(impl->fd == -1); 1809b8e80941Smrg impl->fd = dup(out_fence); 1810b8e80941Smrg } 1811b8e80941Smrg } 1812b8e80941Smrg close(out_fence); 1813b8e80941Smrg } 1814b8e80941Smrg 1815b8e80941Smrg anv_execbuf_finish(&execbuf, &device->alloc); 1816b8e80941Smrg 1817b8e80941Smrg return result; 1818b8e80941Smrg} 1819