101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include <assert.h> 2501e04c3fSmrg#include <stdbool.h> 2601e04c3fSmrg#include <string.h> 2701e04c3fSmrg#include <unistd.h> 2801e04c3fSmrg#include <fcntl.h> 2901e04c3fSmrg 3001e04c3fSmrg#include "anv_private.h" 317ec681f3Smrg#include "anv_measure.h" 3201e04c3fSmrg 3301e04c3fSmrg#include "genxml/gen8_pack.h" 347ec681f3Smrg#include "genxml/genX_bits.h" 357ec681f3Smrg#include "perf/intel_perf.h" 3601e04c3fSmrg 3701e04c3fSmrg#include "util/debug.h" 3801e04c3fSmrg 3901e04c3fSmrg/** \file anv_batch_chain.c 4001e04c3fSmrg * 4101e04c3fSmrg * This file contains functions related to anv_cmd_buffer as a data 4201e04c3fSmrg * structure. This involves everything required to create and destroy 4301e04c3fSmrg * the actual batch buffers as well as link them together and handle 4401e04c3fSmrg * relocations and surface state. It specifically does *not* contain any 4501e04c3fSmrg * handling of actual vkCmd calls beyond vkCmdExecuteCommands. 4601e04c3fSmrg */ 4701e04c3fSmrg 4801e04c3fSmrg/*-----------------------------------------------------------------------* 4901e04c3fSmrg * Functions related to anv_reloc_list 5001e04c3fSmrg *-----------------------------------------------------------------------*/ 5101e04c3fSmrg 527ec681f3SmrgVkResult 537ec681f3Smrganv_reloc_list_init(struct anv_reloc_list *list, 547ec681f3Smrg const VkAllocationCallbacks *alloc) 557ec681f3Smrg{ 567ec681f3Smrg memset(list, 0, sizeof(*list)); 577ec681f3Smrg return VK_SUCCESS; 587ec681f3Smrg} 597ec681f3Smrg 6001e04c3fSmrgstatic VkResult 6101e04c3fSmrganv_reloc_list_init_clone(struct anv_reloc_list *list, 6201e04c3fSmrg const VkAllocationCallbacks *alloc, 6301e04c3fSmrg const struct anv_reloc_list *other_list) 6401e04c3fSmrg{ 657ec681f3Smrg list->num_relocs = other_list->num_relocs; 667ec681f3Smrg list->array_length = other_list->array_length; 677ec681f3Smrg 687ec681f3Smrg if (list->num_relocs > 0) { 697ec681f3Smrg list->relocs = 707ec681f3Smrg vk_alloc(alloc, list->array_length * sizeof(*list->relocs), 8, 717ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 727ec681f3Smrg if (list->relocs == NULL) 737ec681f3Smrg return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 747ec681f3Smrg 757ec681f3Smrg list->reloc_bos = 767ec681f3Smrg vk_alloc(alloc, list->array_length * sizeof(*list->reloc_bos), 8, 777ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 787ec681f3Smrg if (list->reloc_bos == NULL) { 797ec681f3Smrg vk_free(alloc, list->relocs); 807ec681f3Smrg return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 817ec681f3Smrg } 8201e04c3fSmrg 8301e04c3fSmrg memcpy(list->relocs, other_list->relocs, 8401e04c3fSmrg list->array_length * sizeof(*list->relocs)); 8501e04c3fSmrg memcpy(list->reloc_bos, other_list->reloc_bos, 8601e04c3fSmrg list->array_length * sizeof(*list->reloc_bos)); 877ec681f3Smrg } else { 887ec681f3Smrg list->relocs = NULL; 897ec681f3Smrg list->reloc_bos = NULL; 9001e04c3fSmrg } 9101e04c3fSmrg 927ec681f3Smrg list->dep_words = other_list->dep_words; 9301e04c3fSmrg 947ec681f3Smrg if (list->dep_words > 0) { 957ec681f3Smrg list->deps = 967ec681f3Smrg vk_alloc(alloc, list->dep_words * sizeof(BITSET_WORD), 8, 977ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 987ec681f3Smrg memcpy(list->deps, other_list->deps, 997ec681f3Smrg list->dep_words * sizeof(BITSET_WORD)); 1007ec681f3Smrg } else { 1017ec681f3Smrg list->deps = NULL; 1027ec681f3Smrg } 1037ec681f3Smrg 1047ec681f3Smrg return VK_SUCCESS; 10501e04c3fSmrg} 10601e04c3fSmrg 10701e04c3fSmrgvoid 10801e04c3fSmrganv_reloc_list_finish(struct anv_reloc_list *list, 10901e04c3fSmrg const VkAllocationCallbacks *alloc) 11001e04c3fSmrg{ 11101e04c3fSmrg vk_free(alloc, list->relocs); 11201e04c3fSmrg vk_free(alloc, list->reloc_bos); 1137ec681f3Smrg vk_free(alloc, list->deps); 11401e04c3fSmrg} 11501e04c3fSmrg 11601e04c3fSmrgstatic VkResult 11701e04c3fSmrganv_reloc_list_grow(struct anv_reloc_list *list, 11801e04c3fSmrg const VkAllocationCallbacks *alloc, 11901e04c3fSmrg size_t num_additional_relocs) 12001e04c3fSmrg{ 12101e04c3fSmrg if (list->num_relocs + num_additional_relocs <= list->array_length) 12201e04c3fSmrg return VK_SUCCESS; 12301e04c3fSmrg 1247ec681f3Smrg size_t new_length = MAX2(16, list->array_length * 2); 12501e04c3fSmrg while (new_length < list->num_relocs + num_additional_relocs) 12601e04c3fSmrg new_length *= 2; 12701e04c3fSmrg 12801e04c3fSmrg struct drm_i915_gem_relocation_entry *new_relocs = 1297ec681f3Smrg vk_realloc(alloc, list->relocs, 1307ec681f3Smrg new_length * sizeof(*list->relocs), 8, 1317ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 13201e04c3fSmrg if (new_relocs == NULL) 1337ec681f3Smrg return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 1347ec681f3Smrg list->relocs = new_relocs; 13501e04c3fSmrg 13601e04c3fSmrg struct anv_bo **new_reloc_bos = 1377ec681f3Smrg vk_realloc(alloc, list->reloc_bos, 1387ec681f3Smrg new_length * sizeof(*list->reloc_bos), 8, 1397ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1407ec681f3Smrg if (new_reloc_bos == NULL) 1417ec681f3Smrg return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 1427ec681f3Smrg list->reloc_bos = new_reloc_bos; 14301e04c3fSmrg 1447ec681f3Smrg list->array_length = new_length; 14501e04c3fSmrg 1467ec681f3Smrg return VK_SUCCESS; 1477ec681f3Smrg} 14801e04c3fSmrg 1497ec681f3Smrgstatic VkResult 1507ec681f3Smrganv_reloc_list_grow_deps(struct anv_reloc_list *list, 1517ec681f3Smrg const VkAllocationCallbacks *alloc, 1527ec681f3Smrg uint32_t min_num_words) 1537ec681f3Smrg{ 1547ec681f3Smrg if (min_num_words <= list->dep_words) 1557ec681f3Smrg return VK_SUCCESS; 1567ec681f3Smrg 1577ec681f3Smrg uint32_t new_length = MAX2(32, list->dep_words * 2); 1587ec681f3Smrg while (new_length < min_num_words) 1597ec681f3Smrg new_length *= 2; 1607ec681f3Smrg 1617ec681f3Smrg BITSET_WORD *new_deps = 1627ec681f3Smrg vk_realloc(alloc, list->deps, new_length * sizeof(BITSET_WORD), 8, 1637ec681f3Smrg VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1647ec681f3Smrg if (new_deps == NULL) 1657ec681f3Smrg return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY); 1667ec681f3Smrg list->deps = new_deps; 1677ec681f3Smrg 1687ec681f3Smrg /* Zero out the new data */ 1697ec681f3Smrg memset(list->deps + list->dep_words, 0, 1707ec681f3Smrg (new_length - list->dep_words) * sizeof(BITSET_WORD)); 1717ec681f3Smrg list->dep_words = new_length; 1727ec681f3Smrg 1737ec681f3Smrg return VK_SUCCESS; 1747ec681f3Smrg} 1757ec681f3Smrg 1767ec681f3Smrg#define READ_ONCE(x) (*(volatile __typeof__(x) *)&(x)) 1777ec681f3Smrg 1787ec681f3SmrgVkResult 1797ec681f3Smrganv_reloc_list_add_bo(struct anv_reloc_list *list, 1807ec681f3Smrg const VkAllocationCallbacks *alloc, 1817ec681f3Smrg struct anv_bo *target_bo) 1827ec681f3Smrg{ 1837ec681f3Smrg assert(!target_bo->is_wrapper); 1847ec681f3Smrg assert(target_bo->flags & EXEC_OBJECT_PINNED); 1857ec681f3Smrg 1867ec681f3Smrg uint32_t idx = target_bo->gem_handle; 1877ec681f3Smrg VkResult result = anv_reloc_list_grow_deps(list, alloc, 1887ec681f3Smrg (idx / BITSET_WORDBITS) + 1); 1897ec681f3Smrg if (unlikely(result != VK_SUCCESS)) 1907ec681f3Smrg return result; 1917ec681f3Smrg 1927ec681f3Smrg BITSET_SET(list->deps, idx); 19301e04c3fSmrg 19401e04c3fSmrg return VK_SUCCESS; 19501e04c3fSmrg} 19601e04c3fSmrg 19701e04c3fSmrgVkResult 19801e04c3fSmrganv_reloc_list_add(struct anv_reloc_list *list, 19901e04c3fSmrg const VkAllocationCallbacks *alloc, 2007ec681f3Smrg uint32_t offset, struct anv_bo *target_bo, uint32_t delta, 2017ec681f3Smrg uint64_t *address_u64_out) 20201e04c3fSmrg{ 20301e04c3fSmrg struct drm_i915_gem_relocation_entry *entry; 20401e04c3fSmrg int index; 20501e04c3fSmrg 2067ec681f3Smrg struct anv_bo *unwrapped_target_bo = anv_bo_unwrap(target_bo); 2077ec681f3Smrg uint64_t target_bo_offset = READ_ONCE(unwrapped_target_bo->offset); 2087ec681f3Smrg if (address_u64_out) 2097ec681f3Smrg *address_u64_out = target_bo_offset + delta; 2107ec681f3Smrg 2117ec681f3Smrg assert(unwrapped_target_bo->gem_handle > 0); 2127ec681f3Smrg assert(unwrapped_target_bo->refcount > 0); 2137ec681f3Smrg 2147ec681f3Smrg if (unwrapped_target_bo->flags & EXEC_OBJECT_PINNED) 2157ec681f3Smrg return anv_reloc_list_add_bo(list, alloc, unwrapped_target_bo); 21601e04c3fSmrg 21701e04c3fSmrg VkResult result = anv_reloc_list_grow(list, alloc, 1); 21801e04c3fSmrg if (result != VK_SUCCESS) 21901e04c3fSmrg return result; 22001e04c3fSmrg 22101e04c3fSmrg /* XXX: Can we use I915_EXEC_HANDLE_LUT? */ 22201e04c3fSmrg index = list->num_relocs++; 22301e04c3fSmrg list->reloc_bos[index] = target_bo; 22401e04c3fSmrg entry = &list->relocs[index]; 2257ec681f3Smrg entry->target_handle = -1; /* See also anv_cmd_buffer_process_relocs() */ 22601e04c3fSmrg entry->delta = delta; 22701e04c3fSmrg entry->offset = offset; 2287ec681f3Smrg entry->presumed_offset = target_bo_offset; 22901e04c3fSmrg entry->read_domains = 0; 23001e04c3fSmrg entry->write_domain = 0; 23101e04c3fSmrg VG(VALGRIND_CHECK_MEM_IS_DEFINED(entry, sizeof(*entry))); 23201e04c3fSmrg 23301e04c3fSmrg return VK_SUCCESS; 23401e04c3fSmrg} 23501e04c3fSmrg 2367ec681f3Smrgstatic void 2377ec681f3Smrganv_reloc_list_clear(struct anv_reloc_list *list) 2387ec681f3Smrg{ 2397ec681f3Smrg list->num_relocs = 0; 2407ec681f3Smrg if (list->dep_words > 0) 2417ec681f3Smrg memset(list->deps, 0, list->dep_words * sizeof(BITSET_WORD)); 2427ec681f3Smrg} 2437ec681f3Smrg 24401e04c3fSmrgstatic VkResult 24501e04c3fSmrganv_reloc_list_append(struct anv_reloc_list *list, 24601e04c3fSmrg const VkAllocationCallbacks *alloc, 24701e04c3fSmrg struct anv_reloc_list *other, uint32_t offset) 24801e04c3fSmrg{ 24901e04c3fSmrg VkResult result = anv_reloc_list_grow(list, alloc, other->num_relocs); 25001e04c3fSmrg if (result != VK_SUCCESS) 25101e04c3fSmrg return result; 25201e04c3fSmrg 2537ec681f3Smrg if (other->num_relocs > 0) { 2547ec681f3Smrg memcpy(&list->relocs[list->num_relocs], &other->relocs[0], 2557ec681f3Smrg other->num_relocs * sizeof(other->relocs[0])); 2567ec681f3Smrg memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0], 2577ec681f3Smrg other->num_relocs * sizeof(other->reloc_bos[0])); 25801e04c3fSmrg 2597ec681f3Smrg for (uint32_t i = 0; i < other->num_relocs; i++) 2607ec681f3Smrg list->relocs[i + list->num_relocs].offset += offset; 26101e04c3fSmrg 2627ec681f3Smrg list->num_relocs += other->num_relocs; 26301e04c3fSmrg } 26401e04c3fSmrg 2657ec681f3Smrg anv_reloc_list_grow_deps(list, alloc, other->dep_words); 2667ec681f3Smrg for (uint32_t w = 0; w < other->dep_words; w++) 2677ec681f3Smrg list->deps[w] |= other->deps[w]; 2687ec681f3Smrg 26901e04c3fSmrg return VK_SUCCESS; 27001e04c3fSmrg} 27101e04c3fSmrg 27201e04c3fSmrg/*-----------------------------------------------------------------------* 27301e04c3fSmrg * Functions related to anv_batch 27401e04c3fSmrg *-----------------------------------------------------------------------*/ 27501e04c3fSmrg 27601e04c3fSmrgvoid * 27701e04c3fSmrganv_batch_emit_dwords(struct anv_batch *batch, int num_dwords) 27801e04c3fSmrg{ 27901e04c3fSmrg if (batch->next + num_dwords * 4 > batch->end) { 28001e04c3fSmrg VkResult result = batch->extend_cb(batch, batch->user_data); 28101e04c3fSmrg if (result != VK_SUCCESS) { 28201e04c3fSmrg anv_batch_set_error(batch, result); 28301e04c3fSmrg return NULL; 28401e04c3fSmrg } 28501e04c3fSmrg } 28601e04c3fSmrg 28701e04c3fSmrg void *p = batch->next; 28801e04c3fSmrg 28901e04c3fSmrg batch->next += num_dwords * 4; 29001e04c3fSmrg assert(batch->next <= batch->end); 29101e04c3fSmrg 29201e04c3fSmrg return p; 29301e04c3fSmrg} 29401e04c3fSmrg 2957ec681f3Smrgstruct anv_address 2967ec681f3Smrganv_batch_address(struct anv_batch *batch, void *batch_location) 29701e04c3fSmrg{ 2987ec681f3Smrg assert(batch->start < batch_location); 29901e04c3fSmrg 3007ec681f3Smrg /* Allow a jump at the current location of the batch. */ 3017ec681f3Smrg assert(batch->next >= batch_location); 3027ec681f3Smrg 3037ec681f3Smrg return anv_address_add(batch->start_addr, batch_location - batch->start); 30401e04c3fSmrg} 30501e04c3fSmrg 30601e04c3fSmrgvoid 30701e04c3fSmrganv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other) 30801e04c3fSmrg{ 30901e04c3fSmrg uint32_t size, offset; 31001e04c3fSmrg 31101e04c3fSmrg size = other->next - other->start; 31201e04c3fSmrg assert(size % 4 == 0); 31301e04c3fSmrg 31401e04c3fSmrg if (batch->next + size > batch->end) { 31501e04c3fSmrg VkResult result = batch->extend_cb(batch, batch->user_data); 31601e04c3fSmrg if (result != VK_SUCCESS) { 31701e04c3fSmrg anv_batch_set_error(batch, result); 31801e04c3fSmrg return; 31901e04c3fSmrg } 32001e04c3fSmrg } 32101e04c3fSmrg 32201e04c3fSmrg assert(batch->next + size <= batch->end); 32301e04c3fSmrg 32401e04c3fSmrg VG(VALGRIND_CHECK_MEM_IS_DEFINED(other->start, size)); 32501e04c3fSmrg memcpy(batch->next, other->start, size); 32601e04c3fSmrg 32701e04c3fSmrg offset = batch->next - batch->start; 32801e04c3fSmrg VkResult result = anv_reloc_list_append(batch->relocs, batch->alloc, 32901e04c3fSmrg other->relocs, offset); 33001e04c3fSmrg if (result != VK_SUCCESS) { 33101e04c3fSmrg anv_batch_set_error(batch, result); 33201e04c3fSmrg return; 33301e04c3fSmrg } 33401e04c3fSmrg 33501e04c3fSmrg batch->next += size; 33601e04c3fSmrg} 33701e04c3fSmrg 33801e04c3fSmrg/*-----------------------------------------------------------------------* 33901e04c3fSmrg * Functions related to anv_batch_bo 34001e04c3fSmrg *-----------------------------------------------------------------------*/ 34101e04c3fSmrg 34201e04c3fSmrgstatic VkResult 34301e04c3fSmrganv_batch_bo_create(struct anv_cmd_buffer *cmd_buffer, 3447ec681f3Smrg uint32_t size, 34501e04c3fSmrg struct anv_batch_bo **bbo_out) 34601e04c3fSmrg{ 34701e04c3fSmrg VkResult result; 34801e04c3fSmrg 34901e04c3fSmrg struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), 35001e04c3fSmrg 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 35101e04c3fSmrg if (bbo == NULL) 3527ec681f3Smrg return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); 35301e04c3fSmrg 3547ec681f3Smrg result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, 3557ec681f3Smrg size, &bbo->bo); 35601e04c3fSmrg if (result != VK_SUCCESS) 35701e04c3fSmrg goto fail_alloc; 35801e04c3fSmrg 35901e04c3fSmrg result = anv_reloc_list_init(&bbo->relocs, &cmd_buffer->pool->alloc); 36001e04c3fSmrg if (result != VK_SUCCESS) 36101e04c3fSmrg goto fail_bo_alloc; 36201e04c3fSmrg 36301e04c3fSmrg *bbo_out = bbo; 36401e04c3fSmrg 36501e04c3fSmrg return VK_SUCCESS; 36601e04c3fSmrg 36701e04c3fSmrg fail_bo_alloc: 3687ec681f3Smrg anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo); 36901e04c3fSmrg fail_alloc: 37001e04c3fSmrg vk_free(&cmd_buffer->pool->alloc, bbo); 37101e04c3fSmrg 37201e04c3fSmrg return result; 37301e04c3fSmrg} 37401e04c3fSmrg 37501e04c3fSmrgstatic VkResult 37601e04c3fSmrganv_batch_bo_clone(struct anv_cmd_buffer *cmd_buffer, 37701e04c3fSmrg const struct anv_batch_bo *other_bbo, 37801e04c3fSmrg struct anv_batch_bo **bbo_out) 37901e04c3fSmrg{ 38001e04c3fSmrg VkResult result; 38101e04c3fSmrg 38201e04c3fSmrg struct anv_batch_bo *bbo = vk_alloc(&cmd_buffer->pool->alloc, sizeof(*bbo), 38301e04c3fSmrg 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 38401e04c3fSmrg if (bbo == NULL) 3857ec681f3Smrg return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); 38601e04c3fSmrg 3877ec681f3Smrg result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, 3887ec681f3Smrg other_bbo->bo->size, &bbo->bo); 38901e04c3fSmrg if (result != VK_SUCCESS) 39001e04c3fSmrg goto fail_alloc; 39101e04c3fSmrg 39201e04c3fSmrg result = anv_reloc_list_init_clone(&bbo->relocs, &cmd_buffer->pool->alloc, 39301e04c3fSmrg &other_bbo->relocs); 39401e04c3fSmrg if (result != VK_SUCCESS) 39501e04c3fSmrg goto fail_bo_alloc; 39601e04c3fSmrg 39701e04c3fSmrg bbo->length = other_bbo->length; 3987ec681f3Smrg memcpy(bbo->bo->map, other_bbo->bo->map, other_bbo->length); 39901e04c3fSmrg *bbo_out = bbo; 40001e04c3fSmrg 40101e04c3fSmrg return VK_SUCCESS; 40201e04c3fSmrg 40301e04c3fSmrg fail_bo_alloc: 4047ec681f3Smrg anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo); 40501e04c3fSmrg fail_alloc: 40601e04c3fSmrg vk_free(&cmd_buffer->pool->alloc, bbo); 40701e04c3fSmrg 40801e04c3fSmrg return result; 40901e04c3fSmrg} 41001e04c3fSmrg 41101e04c3fSmrgstatic void 41201e04c3fSmrganv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch, 41301e04c3fSmrg size_t batch_padding) 41401e04c3fSmrg{ 4157ec681f3Smrg anv_batch_set_storage(batch, (struct anv_address) { .bo = bbo->bo, }, 4167ec681f3Smrg bbo->bo->map, bbo->bo->size - batch_padding); 41701e04c3fSmrg batch->relocs = &bbo->relocs; 4187ec681f3Smrg anv_reloc_list_clear(&bbo->relocs); 41901e04c3fSmrg} 42001e04c3fSmrg 42101e04c3fSmrgstatic void 42201e04c3fSmrganv_batch_bo_continue(struct anv_batch_bo *bbo, struct anv_batch *batch, 42301e04c3fSmrg size_t batch_padding) 42401e04c3fSmrg{ 4257ec681f3Smrg batch->start_addr = (struct anv_address) { .bo = bbo->bo, }; 4267ec681f3Smrg batch->start = bbo->bo->map; 4277ec681f3Smrg batch->next = bbo->bo->map + bbo->length; 4287ec681f3Smrg batch->end = bbo->bo->map + bbo->bo->size - batch_padding; 42901e04c3fSmrg batch->relocs = &bbo->relocs; 43001e04c3fSmrg} 43101e04c3fSmrg 43201e04c3fSmrgstatic void 43301e04c3fSmrganv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch) 43401e04c3fSmrg{ 4357ec681f3Smrg assert(batch->start == bbo->bo->map); 43601e04c3fSmrg bbo->length = batch->next - batch->start; 43701e04c3fSmrg VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length)); 43801e04c3fSmrg} 43901e04c3fSmrg 44001e04c3fSmrgstatic VkResult 44101e04c3fSmrganv_batch_bo_grow(struct anv_cmd_buffer *cmd_buffer, struct anv_batch_bo *bbo, 44201e04c3fSmrg struct anv_batch *batch, size_t aditional, 44301e04c3fSmrg size_t batch_padding) 44401e04c3fSmrg{ 4457ec681f3Smrg assert(batch->start == bbo->bo->map); 44601e04c3fSmrg bbo->length = batch->next - batch->start; 44701e04c3fSmrg 4487ec681f3Smrg size_t new_size = bbo->bo->size; 44901e04c3fSmrg while (new_size <= bbo->length + aditional + batch_padding) 45001e04c3fSmrg new_size *= 2; 45101e04c3fSmrg 4527ec681f3Smrg if (new_size == bbo->bo->size) 45301e04c3fSmrg return VK_SUCCESS; 45401e04c3fSmrg 4557ec681f3Smrg struct anv_bo *new_bo; 45601e04c3fSmrg VkResult result = anv_bo_pool_alloc(&cmd_buffer->device->batch_bo_pool, 4577ec681f3Smrg new_size, &new_bo); 45801e04c3fSmrg if (result != VK_SUCCESS) 45901e04c3fSmrg return result; 46001e04c3fSmrg 4617ec681f3Smrg memcpy(new_bo->map, bbo->bo->map, bbo->length); 46201e04c3fSmrg 4637ec681f3Smrg anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo); 46401e04c3fSmrg 46501e04c3fSmrg bbo->bo = new_bo; 46601e04c3fSmrg anv_batch_bo_continue(bbo, batch, batch_padding); 46701e04c3fSmrg 46801e04c3fSmrg return VK_SUCCESS; 46901e04c3fSmrg} 47001e04c3fSmrg 47101e04c3fSmrgstatic void 47201e04c3fSmrganv_batch_bo_link(struct anv_cmd_buffer *cmd_buffer, 47301e04c3fSmrg struct anv_batch_bo *prev_bbo, 47401e04c3fSmrg struct anv_batch_bo *next_bbo, 47501e04c3fSmrg uint32_t next_bbo_offset) 47601e04c3fSmrg{ 4777ec681f3Smrg const uint32_t bb_start_offset = 4787ec681f3Smrg prev_bbo->length - GFX8_MI_BATCH_BUFFER_START_length * 4; 4797ec681f3Smrg ASSERTED const uint32_t *bb_start = prev_bbo->bo->map + bb_start_offset; 48001e04c3fSmrg 48101e04c3fSmrg /* Make sure we're looking at a MI_BATCH_BUFFER_START */ 48201e04c3fSmrg assert(((*bb_start >> 29) & 0x07) == 0); 48301e04c3fSmrg assert(((*bb_start >> 23) & 0x3f) == 49); 48401e04c3fSmrg 4857ec681f3Smrg if (cmd_buffer->device->physical->use_softpin) { 4867ec681f3Smrg assert(prev_bbo->bo->flags & EXEC_OBJECT_PINNED); 4877ec681f3Smrg assert(next_bbo->bo->flags & EXEC_OBJECT_PINNED); 48801e04c3fSmrg 48901e04c3fSmrg write_reloc(cmd_buffer->device, 4907ec681f3Smrg prev_bbo->bo->map + bb_start_offset + 4, 4917ec681f3Smrg next_bbo->bo->offset + next_bbo_offset, true); 49201e04c3fSmrg } else { 49301e04c3fSmrg uint32_t reloc_idx = prev_bbo->relocs.num_relocs - 1; 49401e04c3fSmrg assert(prev_bbo->relocs.relocs[reloc_idx].offset == bb_start_offset + 4); 49501e04c3fSmrg 4967ec681f3Smrg prev_bbo->relocs.reloc_bos[reloc_idx] = next_bbo->bo; 49701e04c3fSmrg prev_bbo->relocs.relocs[reloc_idx].delta = next_bbo_offset; 49801e04c3fSmrg 49901e04c3fSmrg /* Use a bogus presumed offset to force a relocation */ 50001e04c3fSmrg prev_bbo->relocs.relocs[reloc_idx].presumed_offset = -1; 50101e04c3fSmrg } 50201e04c3fSmrg} 50301e04c3fSmrg 50401e04c3fSmrgstatic void 50501e04c3fSmrganv_batch_bo_destroy(struct anv_batch_bo *bbo, 50601e04c3fSmrg struct anv_cmd_buffer *cmd_buffer) 50701e04c3fSmrg{ 50801e04c3fSmrg anv_reloc_list_finish(&bbo->relocs, &cmd_buffer->pool->alloc); 5097ec681f3Smrg anv_bo_pool_free(&cmd_buffer->device->batch_bo_pool, bbo->bo); 51001e04c3fSmrg vk_free(&cmd_buffer->pool->alloc, bbo); 51101e04c3fSmrg} 51201e04c3fSmrg 51301e04c3fSmrgstatic VkResult 51401e04c3fSmrganv_batch_bo_list_clone(const struct list_head *list, 51501e04c3fSmrg struct anv_cmd_buffer *cmd_buffer, 51601e04c3fSmrg struct list_head *new_list) 51701e04c3fSmrg{ 51801e04c3fSmrg VkResult result = VK_SUCCESS; 51901e04c3fSmrg 52001e04c3fSmrg list_inithead(new_list); 52101e04c3fSmrg 52201e04c3fSmrg struct anv_batch_bo *prev_bbo = NULL; 52301e04c3fSmrg list_for_each_entry(struct anv_batch_bo, bbo, list, link) { 52401e04c3fSmrg struct anv_batch_bo *new_bbo = NULL; 52501e04c3fSmrg result = anv_batch_bo_clone(cmd_buffer, bbo, &new_bbo); 52601e04c3fSmrg if (result != VK_SUCCESS) 52701e04c3fSmrg break; 52801e04c3fSmrg list_addtail(&new_bbo->link, new_list); 52901e04c3fSmrg 53001e04c3fSmrg if (prev_bbo) 53101e04c3fSmrg anv_batch_bo_link(cmd_buffer, prev_bbo, new_bbo, 0); 53201e04c3fSmrg 53301e04c3fSmrg prev_bbo = new_bbo; 53401e04c3fSmrg } 53501e04c3fSmrg 53601e04c3fSmrg if (result != VK_SUCCESS) { 5377ec681f3Smrg list_for_each_entry_safe(struct anv_batch_bo, bbo, new_list, link) { 5387ec681f3Smrg list_del(&bbo->link); 53901e04c3fSmrg anv_batch_bo_destroy(bbo, cmd_buffer); 5407ec681f3Smrg } 54101e04c3fSmrg } 54201e04c3fSmrg 54301e04c3fSmrg return result; 54401e04c3fSmrg} 54501e04c3fSmrg 54601e04c3fSmrg/*-----------------------------------------------------------------------* 54701e04c3fSmrg * Functions related to anv_batch_bo 54801e04c3fSmrg *-----------------------------------------------------------------------*/ 54901e04c3fSmrg 55001e04c3fSmrgstatic struct anv_batch_bo * 55101e04c3fSmrganv_cmd_buffer_current_batch_bo(struct anv_cmd_buffer *cmd_buffer) 55201e04c3fSmrg{ 55301e04c3fSmrg return LIST_ENTRY(struct anv_batch_bo, cmd_buffer->batch_bos.prev, link); 55401e04c3fSmrg} 55501e04c3fSmrg 55601e04c3fSmrgstruct anv_address 55701e04c3fSmrganv_cmd_buffer_surface_base_address(struct anv_cmd_buffer *cmd_buffer) 55801e04c3fSmrg{ 5597ec681f3Smrg struct anv_state_pool *pool = anv_binding_table_pool(cmd_buffer->device); 56001e04c3fSmrg struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states); 56101e04c3fSmrg return (struct anv_address) { 5627ec681f3Smrg .bo = pool->block_pool.bo, 5637ec681f3Smrg .offset = bt_block->offset - pool->start_offset, 56401e04c3fSmrg }; 56501e04c3fSmrg} 56601e04c3fSmrg 56701e04c3fSmrgstatic void 56801e04c3fSmrgemit_batch_buffer_start(struct anv_cmd_buffer *cmd_buffer, 56901e04c3fSmrg struct anv_bo *bo, uint32_t offset) 57001e04c3fSmrg{ 5717ec681f3Smrg /* In gfx8+ the address field grew to two dwords to accomodate 48 bit 5727ec681f3Smrg * offsets. The high 16 bits are in the last dword, so we can use the gfx8 57301e04c3fSmrg * version in either case, as long as we set the instruction length in the 57401e04c3fSmrg * header accordingly. This means that we always emit three dwords here 57501e04c3fSmrg * and all the padding and adjustment we do in this file works for all 57601e04c3fSmrg * gens. 57701e04c3fSmrg */ 57801e04c3fSmrg 5797ec681f3Smrg#define GFX7_MI_BATCH_BUFFER_START_length 2 5807ec681f3Smrg#define GFX7_MI_BATCH_BUFFER_START_length_bias 2 58101e04c3fSmrg 5827ec681f3Smrg const uint32_t gfx7_length = 5837ec681f3Smrg GFX7_MI_BATCH_BUFFER_START_length - GFX7_MI_BATCH_BUFFER_START_length_bias; 5847ec681f3Smrg const uint32_t gfx8_length = 5857ec681f3Smrg GFX8_MI_BATCH_BUFFER_START_length - GFX8_MI_BATCH_BUFFER_START_length_bias; 58601e04c3fSmrg 5877ec681f3Smrg anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_START, bbs) { 5887ec681f3Smrg bbs.DWordLength = cmd_buffer->device->info.ver < 8 ? 5897ec681f3Smrg gfx7_length : gfx8_length; 59001e04c3fSmrg bbs.SecondLevelBatchBuffer = Firstlevelbatch; 59101e04c3fSmrg bbs.AddressSpaceIndicator = ASI_PPGTT; 59201e04c3fSmrg bbs.BatchBufferStartAddress = (struct anv_address) { bo, offset }; 59301e04c3fSmrg } 59401e04c3fSmrg} 59501e04c3fSmrg 59601e04c3fSmrgstatic void 59701e04c3fSmrgcmd_buffer_chain_to_batch_bo(struct anv_cmd_buffer *cmd_buffer, 59801e04c3fSmrg struct anv_batch_bo *bbo) 59901e04c3fSmrg{ 60001e04c3fSmrg struct anv_batch *batch = &cmd_buffer->batch; 60101e04c3fSmrg struct anv_batch_bo *current_bbo = 60201e04c3fSmrg anv_cmd_buffer_current_batch_bo(cmd_buffer); 60301e04c3fSmrg 60401e04c3fSmrg /* We set the end of the batch a little short so we would be sure we 60501e04c3fSmrg * have room for the chaining command. Since we're about to emit the 60601e04c3fSmrg * chaining command, let's set it back where it should go. 60701e04c3fSmrg */ 6087ec681f3Smrg batch->end += GFX8_MI_BATCH_BUFFER_START_length * 4; 6097ec681f3Smrg assert(batch->end == current_bbo->bo->map + current_bbo->bo->size); 61001e04c3fSmrg 6117ec681f3Smrg emit_batch_buffer_start(cmd_buffer, bbo->bo, 0); 61201e04c3fSmrg 61301e04c3fSmrg anv_batch_bo_finish(current_bbo, batch); 61401e04c3fSmrg} 61501e04c3fSmrg 6167ec681f3Smrgstatic void 6177ec681f3Smrganv_cmd_buffer_record_chain_submit(struct anv_cmd_buffer *cmd_buffer_from, 6187ec681f3Smrg struct anv_cmd_buffer *cmd_buffer_to) 6197ec681f3Smrg{ 6207ec681f3Smrg assert(cmd_buffer_from->device->physical->use_softpin); 6217ec681f3Smrg 6227ec681f3Smrg uint32_t *bb_start = cmd_buffer_from->batch_end; 6237ec681f3Smrg 6247ec681f3Smrg struct anv_batch_bo *last_bbo = 6257ec681f3Smrg list_last_entry(&cmd_buffer_from->batch_bos, struct anv_batch_bo, link); 6267ec681f3Smrg struct anv_batch_bo *first_bbo = 6277ec681f3Smrg list_first_entry(&cmd_buffer_to->batch_bos, struct anv_batch_bo, link); 6287ec681f3Smrg 6297ec681f3Smrg struct GFX8_MI_BATCH_BUFFER_START gen_bb_start = { 6307ec681f3Smrg __anv_cmd_header(GFX8_MI_BATCH_BUFFER_START), 6317ec681f3Smrg .SecondLevelBatchBuffer = Firstlevelbatch, 6327ec681f3Smrg .AddressSpaceIndicator = ASI_PPGTT, 6337ec681f3Smrg .BatchBufferStartAddress = (struct anv_address) { first_bbo->bo, 0 }, 6347ec681f3Smrg }; 6357ec681f3Smrg struct anv_batch local_batch = { 6367ec681f3Smrg .start = last_bbo->bo->map, 6377ec681f3Smrg .end = last_bbo->bo->map + last_bbo->bo->size, 6387ec681f3Smrg .relocs = &last_bbo->relocs, 6397ec681f3Smrg .alloc = &cmd_buffer_from->pool->alloc, 6407ec681f3Smrg }; 6417ec681f3Smrg 6427ec681f3Smrg __anv_cmd_pack(GFX8_MI_BATCH_BUFFER_START)(&local_batch, bb_start, &gen_bb_start); 6437ec681f3Smrg 6447ec681f3Smrg last_bbo->chained = true; 6457ec681f3Smrg} 6467ec681f3Smrg 6477ec681f3Smrgstatic void 6487ec681f3Smrganv_cmd_buffer_record_end_submit(struct anv_cmd_buffer *cmd_buffer) 6497ec681f3Smrg{ 6507ec681f3Smrg assert(cmd_buffer->device->physical->use_softpin); 6517ec681f3Smrg 6527ec681f3Smrg struct anv_batch_bo *last_bbo = 6537ec681f3Smrg list_last_entry(&cmd_buffer->batch_bos, struct anv_batch_bo, link); 6547ec681f3Smrg last_bbo->chained = false; 6557ec681f3Smrg 6567ec681f3Smrg uint32_t *batch = cmd_buffer->batch_end; 6577ec681f3Smrg anv_pack_struct(batch, GFX8_MI_BATCH_BUFFER_END, 6587ec681f3Smrg __anv_cmd_header(GFX8_MI_BATCH_BUFFER_END)); 6597ec681f3Smrg} 6607ec681f3Smrg 66101e04c3fSmrgstatic VkResult 66201e04c3fSmrganv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data) 66301e04c3fSmrg{ 66401e04c3fSmrg struct anv_cmd_buffer *cmd_buffer = _data; 66501e04c3fSmrg struct anv_batch_bo *new_bbo; 6667ec681f3Smrg /* Cap reallocation to chunk. */ 6677ec681f3Smrg uint32_t alloc_size = MIN2(cmd_buffer->total_batch_size, 6687ec681f3Smrg ANV_MAX_CMD_BUFFER_BATCH_SIZE); 66901e04c3fSmrg 6707ec681f3Smrg VkResult result = anv_batch_bo_create(cmd_buffer, alloc_size, &new_bbo); 67101e04c3fSmrg if (result != VK_SUCCESS) 67201e04c3fSmrg return result; 67301e04c3fSmrg 6747ec681f3Smrg cmd_buffer->total_batch_size += alloc_size; 6757ec681f3Smrg 67601e04c3fSmrg struct anv_batch_bo **seen_bbo = u_vector_add(&cmd_buffer->seen_bbos); 67701e04c3fSmrg if (seen_bbo == NULL) { 67801e04c3fSmrg anv_batch_bo_destroy(new_bbo, cmd_buffer); 6797ec681f3Smrg return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); 68001e04c3fSmrg } 68101e04c3fSmrg *seen_bbo = new_bbo; 68201e04c3fSmrg 68301e04c3fSmrg cmd_buffer_chain_to_batch_bo(cmd_buffer, new_bbo); 68401e04c3fSmrg 68501e04c3fSmrg list_addtail(&new_bbo->link, &cmd_buffer->batch_bos); 68601e04c3fSmrg 6877ec681f3Smrg anv_batch_bo_start(new_bbo, batch, GFX8_MI_BATCH_BUFFER_START_length * 4); 68801e04c3fSmrg 68901e04c3fSmrg return VK_SUCCESS; 69001e04c3fSmrg} 69101e04c3fSmrg 69201e04c3fSmrgstatic VkResult 69301e04c3fSmrganv_cmd_buffer_grow_batch(struct anv_batch *batch, void *_data) 69401e04c3fSmrg{ 69501e04c3fSmrg struct anv_cmd_buffer *cmd_buffer = _data; 69601e04c3fSmrg struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); 69701e04c3fSmrg 69801e04c3fSmrg anv_batch_bo_grow(cmd_buffer, bbo, &cmd_buffer->batch, 4096, 6997ec681f3Smrg GFX8_MI_BATCH_BUFFER_START_length * 4); 70001e04c3fSmrg 70101e04c3fSmrg return VK_SUCCESS; 70201e04c3fSmrg} 70301e04c3fSmrg 70401e04c3fSmrg/** Allocate a binding table 70501e04c3fSmrg * 70601e04c3fSmrg * This function allocates a binding table. This is a bit more complicated 70701e04c3fSmrg * than one would think due to a combination of Vulkan driver design and some 70801e04c3fSmrg * unfortunate hardware restrictions. 70901e04c3fSmrg * 71001e04c3fSmrg * The 3DSTATE_BINDING_TABLE_POINTERS_* packets only have a 16-bit field for 71101e04c3fSmrg * the binding table pointer which means that all binding tables need to live 71201e04c3fSmrg * in the bottom 64k of surface state base address. The way the GL driver has 71301e04c3fSmrg * classically dealt with this restriction is to emit all surface states 71401e04c3fSmrg * on-the-fly into the batch and have a batch buffer smaller than 64k. This 71501e04c3fSmrg * isn't really an option in Vulkan for a couple of reasons: 71601e04c3fSmrg * 71701e04c3fSmrg * 1) In Vulkan, we have growing (or chaining) batches so surface states have 71801e04c3fSmrg * to live in their own buffer and we have to be able to re-emit 71901e04c3fSmrg * STATE_BASE_ADDRESS as needed which requires a full pipeline stall. In 72001e04c3fSmrg * order to avoid emitting STATE_BASE_ADDRESS any more often than needed 72101e04c3fSmrg * (it's not that hard to hit 64k of just binding tables), we allocate 72201e04c3fSmrg * surface state objects up-front when VkImageView is created. In order 72301e04c3fSmrg * for this to work, surface state objects need to be allocated from a 72401e04c3fSmrg * global buffer. 72501e04c3fSmrg * 72601e04c3fSmrg * 2) We tried to design the surface state system in such a way that it's 72701e04c3fSmrg * already ready for bindless texturing. The way bindless texturing works 72801e04c3fSmrg * on our hardware is that you have a big pool of surface state objects 72901e04c3fSmrg * (with its own state base address) and the bindless handles are simply 73001e04c3fSmrg * offsets into that pool. With the architecture we chose, we already 73101e04c3fSmrg * have that pool and it's exactly the same pool that we use for regular 73201e04c3fSmrg * surface states so we should already be ready for bindless. 73301e04c3fSmrg * 73401e04c3fSmrg * 3) For render targets, we need to be able to fill out the surface states 73501e04c3fSmrg * later in vkBeginRenderPass so that we can assign clear colors 73601e04c3fSmrg * correctly. One way to do this would be to just create the surface 73701e04c3fSmrg * state data and then repeatedly copy it into the surface state BO every 73801e04c3fSmrg * time we have to re-emit STATE_BASE_ADDRESS. While this works, it's 73901e04c3fSmrg * rather annoying and just being able to allocate them up-front and 74001e04c3fSmrg * re-use them for the entire render pass. 74101e04c3fSmrg * 74201e04c3fSmrg * While none of these are technically blockers for emitting state on the fly 74301e04c3fSmrg * like we do in GL, the ability to have a single surface state pool is 74401e04c3fSmrg * simplifies things greatly. Unfortunately, it comes at a cost... 74501e04c3fSmrg * 74601e04c3fSmrg * Because of the 64k limitation of 3DSTATE_BINDING_TABLE_POINTERS_*, we can't 74701e04c3fSmrg * place the binding tables just anywhere in surface state base address. 74801e04c3fSmrg * Because 64k isn't a whole lot of space, we can't simply restrict the 74901e04c3fSmrg * surface state buffer to 64k, we have to be more clever. The solution we've 75001e04c3fSmrg * chosen is to have a block pool with a maximum size of 2G that starts at 75101e04c3fSmrg * zero and grows in both directions. All surface states are allocated from 75201e04c3fSmrg * the top of the pool (positive offsets) and we allocate blocks (< 64k) of 75301e04c3fSmrg * binding tables from the bottom of the pool (negative offsets). Every time 75401e04c3fSmrg * we allocate a new binding table block, we set surface state base address to 75501e04c3fSmrg * point to the bottom of the binding table block. This way all of the 75601e04c3fSmrg * binding tables in the block are in the bottom 64k of surface state base 75701e04c3fSmrg * address. When we fill out the binding table, we add the distance between 75801e04c3fSmrg * the bottom of our binding table block and zero of the block pool to the 75901e04c3fSmrg * surface state offsets so that they are correct relative to out new surface 76001e04c3fSmrg * state base address at the bottom of the binding table block. 76101e04c3fSmrg * 76201e04c3fSmrg * \see adjust_relocations_from_block_pool() 76301e04c3fSmrg * \see adjust_relocations_too_block_pool() 76401e04c3fSmrg * 76501e04c3fSmrg * \param[in] entries The number of surface state entries the binding 76601e04c3fSmrg * table should be able to hold. 76701e04c3fSmrg * 76801e04c3fSmrg * \param[out] state_offset The offset surface surface state base address 76901e04c3fSmrg * where the surface states live. This must be 77001e04c3fSmrg * added to the surface state offset when it is 77101e04c3fSmrg * written into the binding table entry. 77201e04c3fSmrg * 77301e04c3fSmrg * \return An anv_state representing the binding table 77401e04c3fSmrg */ 77501e04c3fSmrgstruct anv_state 77601e04c3fSmrganv_cmd_buffer_alloc_binding_table(struct anv_cmd_buffer *cmd_buffer, 77701e04c3fSmrg uint32_t entries, uint32_t *state_offset) 77801e04c3fSmrg{ 77901e04c3fSmrg struct anv_state *bt_block = u_vector_head(&cmd_buffer->bt_block_states); 78001e04c3fSmrg 7817ec681f3Smrg uint32_t bt_size = align_u32(entries * 4, 32); 78201e04c3fSmrg 7837ec681f3Smrg struct anv_state state = cmd_buffer->bt_next; 7847ec681f3Smrg if (bt_size > state.alloc_size) 78501e04c3fSmrg return (struct anv_state) { 0 }; 78601e04c3fSmrg 7877ec681f3Smrg state.alloc_size = bt_size; 7887ec681f3Smrg cmd_buffer->bt_next.offset += bt_size; 7897ec681f3Smrg cmd_buffer->bt_next.map += bt_size; 7907ec681f3Smrg cmd_buffer->bt_next.alloc_size -= bt_size; 79101e04c3fSmrg 7927ec681f3Smrg assert(bt_block->offset < 0); 7937ec681f3Smrg *state_offset = -bt_block->offset; 79401e04c3fSmrg 79501e04c3fSmrg return state; 79601e04c3fSmrg} 79701e04c3fSmrg 79801e04c3fSmrgstruct anv_state 79901e04c3fSmrganv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer) 80001e04c3fSmrg{ 80101e04c3fSmrg struct isl_device *isl_dev = &cmd_buffer->device->isl_dev; 80201e04c3fSmrg return anv_state_stream_alloc(&cmd_buffer->surface_state_stream, 80301e04c3fSmrg isl_dev->ss.size, isl_dev->ss.align); 80401e04c3fSmrg} 80501e04c3fSmrg 80601e04c3fSmrgstruct anv_state 80701e04c3fSmrganv_cmd_buffer_alloc_dynamic_state(struct anv_cmd_buffer *cmd_buffer, 80801e04c3fSmrg uint32_t size, uint32_t alignment) 80901e04c3fSmrg{ 81001e04c3fSmrg return anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, 81101e04c3fSmrg size, alignment); 81201e04c3fSmrg} 81301e04c3fSmrg 81401e04c3fSmrgVkResult 81501e04c3fSmrganv_cmd_buffer_new_binding_table_block(struct anv_cmd_buffer *cmd_buffer) 81601e04c3fSmrg{ 81701e04c3fSmrg struct anv_state *bt_block = u_vector_add(&cmd_buffer->bt_block_states); 81801e04c3fSmrg if (bt_block == NULL) { 81901e04c3fSmrg anv_batch_set_error(&cmd_buffer->batch, VK_ERROR_OUT_OF_HOST_MEMORY); 8207ec681f3Smrg return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); 82101e04c3fSmrg } 82201e04c3fSmrg 82301e04c3fSmrg *bt_block = anv_binding_table_pool_alloc(cmd_buffer->device); 8247ec681f3Smrg 8257ec681f3Smrg /* The bt_next state is a rolling state (we update it as we suballocate 8267ec681f3Smrg * from it) which is relative to the start of the binding table block. 8277ec681f3Smrg */ 8287ec681f3Smrg cmd_buffer->bt_next = *bt_block; 8297ec681f3Smrg cmd_buffer->bt_next.offset = 0; 83001e04c3fSmrg 83101e04c3fSmrg return VK_SUCCESS; 83201e04c3fSmrg} 83301e04c3fSmrg 83401e04c3fSmrgVkResult 83501e04c3fSmrganv_cmd_buffer_init_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) 83601e04c3fSmrg{ 83701e04c3fSmrg struct anv_batch_bo *batch_bo; 83801e04c3fSmrg VkResult result; 83901e04c3fSmrg 84001e04c3fSmrg list_inithead(&cmd_buffer->batch_bos); 84101e04c3fSmrg 8427ec681f3Smrg cmd_buffer->total_batch_size = ANV_MIN_CMD_BUFFER_BATCH_SIZE; 8437ec681f3Smrg 8447ec681f3Smrg result = anv_batch_bo_create(cmd_buffer, 8457ec681f3Smrg cmd_buffer->total_batch_size, 8467ec681f3Smrg &batch_bo); 84701e04c3fSmrg if (result != VK_SUCCESS) 84801e04c3fSmrg return result; 84901e04c3fSmrg 85001e04c3fSmrg list_addtail(&batch_bo->link, &cmd_buffer->batch_bos); 85101e04c3fSmrg 85201e04c3fSmrg cmd_buffer->batch.alloc = &cmd_buffer->pool->alloc; 85301e04c3fSmrg cmd_buffer->batch.user_data = cmd_buffer; 85401e04c3fSmrg 85501e04c3fSmrg if (cmd_buffer->device->can_chain_batches) { 85601e04c3fSmrg cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch; 85701e04c3fSmrg } else { 85801e04c3fSmrg cmd_buffer->batch.extend_cb = anv_cmd_buffer_grow_batch; 85901e04c3fSmrg } 86001e04c3fSmrg 86101e04c3fSmrg anv_batch_bo_start(batch_bo, &cmd_buffer->batch, 8627ec681f3Smrg GFX8_MI_BATCH_BUFFER_START_length * 4); 86301e04c3fSmrg 8647ec681f3Smrg int success = u_vector_init_pow2(&cmd_buffer->seen_bbos, 8, 8657ec681f3Smrg sizeof(struct anv_bo *)); 86601e04c3fSmrg if (!success) 86701e04c3fSmrg goto fail_batch_bo; 86801e04c3fSmrg 86901e04c3fSmrg *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = batch_bo; 87001e04c3fSmrg 8717ec681f3Smrg success = u_vector_init(&cmd_buffer->bt_block_states, 8, 8727ec681f3Smrg sizeof(struct anv_state)); 87301e04c3fSmrg if (!success) 87401e04c3fSmrg goto fail_seen_bbos; 87501e04c3fSmrg 87601e04c3fSmrg result = anv_reloc_list_init(&cmd_buffer->surface_relocs, 87701e04c3fSmrg &cmd_buffer->pool->alloc); 87801e04c3fSmrg if (result != VK_SUCCESS) 87901e04c3fSmrg goto fail_bt_blocks; 88001e04c3fSmrg cmd_buffer->last_ss_pool_center = 0; 88101e04c3fSmrg 88201e04c3fSmrg result = anv_cmd_buffer_new_binding_table_block(cmd_buffer); 88301e04c3fSmrg if (result != VK_SUCCESS) 88401e04c3fSmrg goto fail_bt_blocks; 88501e04c3fSmrg 88601e04c3fSmrg return VK_SUCCESS; 88701e04c3fSmrg 88801e04c3fSmrg fail_bt_blocks: 88901e04c3fSmrg u_vector_finish(&cmd_buffer->bt_block_states); 89001e04c3fSmrg fail_seen_bbos: 89101e04c3fSmrg u_vector_finish(&cmd_buffer->seen_bbos); 89201e04c3fSmrg fail_batch_bo: 89301e04c3fSmrg anv_batch_bo_destroy(batch_bo, cmd_buffer); 89401e04c3fSmrg 89501e04c3fSmrg return result; 89601e04c3fSmrg} 89701e04c3fSmrg 89801e04c3fSmrgvoid 89901e04c3fSmrganv_cmd_buffer_fini_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) 90001e04c3fSmrg{ 90101e04c3fSmrg struct anv_state *bt_block; 90201e04c3fSmrg u_vector_foreach(bt_block, &cmd_buffer->bt_block_states) 90301e04c3fSmrg anv_binding_table_pool_free(cmd_buffer->device, *bt_block); 90401e04c3fSmrg u_vector_finish(&cmd_buffer->bt_block_states); 90501e04c3fSmrg 90601e04c3fSmrg anv_reloc_list_finish(&cmd_buffer->surface_relocs, &cmd_buffer->pool->alloc); 90701e04c3fSmrg 90801e04c3fSmrg u_vector_finish(&cmd_buffer->seen_bbos); 90901e04c3fSmrg 91001e04c3fSmrg /* Destroy all of the batch buffers */ 91101e04c3fSmrg list_for_each_entry_safe(struct anv_batch_bo, bbo, 91201e04c3fSmrg &cmd_buffer->batch_bos, link) { 9137ec681f3Smrg list_del(&bbo->link); 91401e04c3fSmrg anv_batch_bo_destroy(bbo, cmd_buffer); 91501e04c3fSmrg } 91601e04c3fSmrg} 91701e04c3fSmrg 91801e04c3fSmrgvoid 91901e04c3fSmrganv_cmd_buffer_reset_batch_bo_chain(struct anv_cmd_buffer *cmd_buffer) 92001e04c3fSmrg{ 92101e04c3fSmrg /* Delete all but the first batch bo */ 9227ec681f3Smrg assert(!list_is_empty(&cmd_buffer->batch_bos)); 92301e04c3fSmrg while (cmd_buffer->batch_bos.next != cmd_buffer->batch_bos.prev) { 92401e04c3fSmrg struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); 92501e04c3fSmrg list_del(&bbo->link); 92601e04c3fSmrg anv_batch_bo_destroy(bbo, cmd_buffer); 92701e04c3fSmrg } 9287ec681f3Smrg assert(!list_is_empty(&cmd_buffer->batch_bos)); 92901e04c3fSmrg 93001e04c3fSmrg anv_batch_bo_start(anv_cmd_buffer_current_batch_bo(cmd_buffer), 93101e04c3fSmrg &cmd_buffer->batch, 9327ec681f3Smrg GFX8_MI_BATCH_BUFFER_START_length * 4); 93301e04c3fSmrg 93401e04c3fSmrg while (u_vector_length(&cmd_buffer->bt_block_states) > 1) { 93501e04c3fSmrg struct anv_state *bt_block = u_vector_remove(&cmd_buffer->bt_block_states); 93601e04c3fSmrg anv_binding_table_pool_free(cmd_buffer->device, *bt_block); 93701e04c3fSmrg } 93801e04c3fSmrg assert(u_vector_length(&cmd_buffer->bt_block_states) == 1); 9397ec681f3Smrg cmd_buffer->bt_next = *(struct anv_state *)u_vector_head(&cmd_buffer->bt_block_states); 9407ec681f3Smrg cmd_buffer->bt_next.offset = 0; 94101e04c3fSmrg 9427ec681f3Smrg anv_reloc_list_clear(&cmd_buffer->surface_relocs); 94301e04c3fSmrg cmd_buffer->last_ss_pool_center = 0; 94401e04c3fSmrg 94501e04c3fSmrg /* Reset the list of seen buffers */ 94601e04c3fSmrg cmd_buffer->seen_bbos.head = 0; 94701e04c3fSmrg cmd_buffer->seen_bbos.tail = 0; 94801e04c3fSmrg 9497ec681f3Smrg struct anv_batch_bo *first_bbo = anv_cmd_buffer_current_batch_bo(cmd_buffer); 9507ec681f3Smrg 9517ec681f3Smrg *(struct anv_batch_bo **)u_vector_add(&cmd_buffer->seen_bbos) = first_bbo; 9527ec681f3Smrg 9537ec681f3Smrg 9547ec681f3Smrg assert(!cmd_buffer->device->can_chain_batches || 9557ec681f3Smrg first_bbo->bo->size == ANV_MIN_CMD_BUFFER_BATCH_SIZE); 9567ec681f3Smrg cmd_buffer->total_batch_size = first_bbo->bo->size; 95701e04c3fSmrg} 95801e04c3fSmrg 95901e04c3fSmrgvoid 96001e04c3fSmrganv_cmd_buffer_end_batch_buffer(struct anv_cmd_buffer *cmd_buffer) 96101e04c3fSmrg{ 96201e04c3fSmrg struct anv_batch_bo *batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); 96301e04c3fSmrg 96401e04c3fSmrg if (cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY) { 96501e04c3fSmrg /* When we start a batch buffer, we subtract a certain amount of 96601e04c3fSmrg * padding from the end to ensure that we always have room to emit a 96701e04c3fSmrg * BATCH_BUFFER_START to chain to the next BO. We need to remove 96801e04c3fSmrg * that padding before we end the batch; otherwise, we may end up 96901e04c3fSmrg * with our BATCH_BUFFER_END in another BO. 97001e04c3fSmrg */ 9717ec681f3Smrg cmd_buffer->batch.end += GFX8_MI_BATCH_BUFFER_START_length * 4; 9727ec681f3Smrg assert(cmd_buffer->batch.start == batch_bo->bo->map); 9737ec681f3Smrg assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size); 97401e04c3fSmrg 9757ec681f3Smrg /* Save end instruction location to override it later. */ 9767ec681f3Smrg cmd_buffer->batch_end = cmd_buffer->batch.next; 9777ec681f3Smrg 9787ec681f3Smrg /* If we can chain this command buffer to another one, leave some place 9797ec681f3Smrg * for the jump instruction. 9807ec681f3Smrg */ 9817ec681f3Smrg batch_bo->chained = anv_cmd_buffer_is_chainable(cmd_buffer); 9827ec681f3Smrg if (batch_bo->chained) 9837ec681f3Smrg emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0); 9847ec681f3Smrg else 9857ec681f3Smrg anv_batch_emit(&cmd_buffer->batch, GFX8_MI_BATCH_BUFFER_END, bbe); 98601e04c3fSmrg 98701e04c3fSmrg /* Round batch up to an even number of dwords. */ 98801e04c3fSmrg if ((cmd_buffer->batch.next - cmd_buffer->batch.start) & 4) 9897ec681f3Smrg anv_batch_emit(&cmd_buffer->batch, GFX8_MI_NOOP, noop); 99001e04c3fSmrg 99101e04c3fSmrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_PRIMARY; 99201e04c3fSmrg } else { 99301e04c3fSmrg assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_SECONDARY); 99401e04c3fSmrg /* If this is a secondary command buffer, we need to determine the 99501e04c3fSmrg * mode in which it will be executed with vkExecuteCommands. We 99601e04c3fSmrg * determine this statically here so that this stays in sync with the 99701e04c3fSmrg * actual ExecuteCommands implementation. 99801e04c3fSmrg */ 99901e04c3fSmrg const uint32_t length = cmd_buffer->batch.next - cmd_buffer->batch.start; 100001e04c3fSmrg if (!cmd_buffer->device->can_chain_batches) { 100101e04c3fSmrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT; 10027ec681f3Smrg } else if (cmd_buffer->device->physical->use_call_secondary) { 10037ec681f3Smrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN; 10047ec681f3Smrg /* If the secondary command buffer begins & ends in the same BO and 10057ec681f3Smrg * its length is less than the length of CS prefetch, add some NOOPs 10067ec681f3Smrg * instructions so the last MI_BATCH_BUFFER_START is outside the CS 10077ec681f3Smrg * prefetch. 10087ec681f3Smrg */ 10097ec681f3Smrg if (cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) { 10107ec681f3Smrg const struct intel_device_info *devinfo = &cmd_buffer->device->info; 10117ec681f3Smrg /* Careful to have everything in signed integer. */ 10127ec681f3Smrg int32_t prefetch_len = devinfo->cs_prefetch_size; 10137ec681f3Smrg int32_t batch_len = 10147ec681f3Smrg cmd_buffer->batch.next - cmd_buffer->batch.start; 10157ec681f3Smrg 10167ec681f3Smrg for (int32_t i = 0; i < (prefetch_len - batch_len); i += 4) 10177ec681f3Smrg anv_batch_emit(&cmd_buffer->batch, GFX8_MI_NOOP, noop); 10187ec681f3Smrg } 10197ec681f3Smrg 10207ec681f3Smrg void *jump_addr = 10217ec681f3Smrg anv_batch_emitn(&cmd_buffer->batch, 10227ec681f3Smrg GFX8_MI_BATCH_BUFFER_START_length, 10237ec681f3Smrg GFX8_MI_BATCH_BUFFER_START, 10247ec681f3Smrg .AddressSpaceIndicator = ASI_PPGTT, 10257ec681f3Smrg .SecondLevelBatchBuffer = Firstlevelbatch) + 10267ec681f3Smrg (GFX8_MI_BATCH_BUFFER_START_BatchBufferStartAddress_start / 8); 10277ec681f3Smrg cmd_buffer->return_addr = anv_batch_address(&cmd_buffer->batch, jump_addr); 10287ec681f3Smrg 10297ec681f3Smrg /* The emit above may have caused us to chain batch buffers which 10307ec681f3Smrg * would mean that batch_bo is no longer valid. 10317ec681f3Smrg */ 10327ec681f3Smrg batch_bo = anv_cmd_buffer_current_batch_bo(cmd_buffer); 103301e04c3fSmrg } else if ((cmd_buffer->batch_bos.next == cmd_buffer->batch_bos.prev) && 10347ec681f3Smrg (length < ANV_MIN_CMD_BUFFER_BATCH_SIZE / 2)) { 103501e04c3fSmrg /* If the secondary has exactly one batch buffer in its list *and* 103601e04c3fSmrg * that batch buffer is less than half of the maximum size, we're 103701e04c3fSmrg * probably better of simply copying it into our batch. 103801e04c3fSmrg */ 103901e04c3fSmrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_EMIT; 104001e04c3fSmrg } else if (!(cmd_buffer->usage_flags & 104101e04c3fSmrg VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) { 104201e04c3fSmrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_CHAIN; 104301e04c3fSmrg 104401e04c3fSmrg /* In order to chain, we need this command buffer to contain an 104501e04c3fSmrg * MI_BATCH_BUFFER_START which will jump back to the calling batch. 104601e04c3fSmrg * It doesn't matter where it points now so long as has a valid 104701e04c3fSmrg * relocation. We'll adjust it later as part of the chaining 104801e04c3fSmrg * process. 104901e04c3fSmrg * 105001e04c3fSmrg * We set the end of the batch a little short so we would be sure we 105101e04c3fSmrg * have room for the chaining command. Since we're about to emit the 105201e04c3fSmrg * chaining command, let's set it back where it should go. 105301e04c3fSmrg */ 10547ec681f3Smrg cmd_buffer->batch.end += GFX8_MI_BATCH_BUFFER_START_length * 4; 10557ec681f3Smrg assert(cmd_buffer->batch.start == batch_bo->bo->map); 10567ec681f3Smrg assert(cmd_buffer->batch.end == batch_bo->bo->map + batch_bo->bo->size); 105701e04c3fSmrg 10587ec681f3Smrg emit_batch_buffer_start(cmd_buffer, batch_bo->bo, 0); 10597ec681f3Smrg assert(cmd_buffer->batch.start == batch_bo->bo->map); 106001e04c3fSmrg } else { 106101e04c3fSmrg cmd_buffer->exec_mode = ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN; 106201e04c3fSmrg } 106301e04c3fSmrg } 106401e04c3fSmrg 106501e04c3fSmrg anv_batch_bo_finish(batch_bo, &cmd_buffer->batch); 106601e04c3fSmrg} 106701e04c3fSmrg 106801e04c3fSmrgstatic VkResult 106901e04c3fSmrganv_cmd_buffer_add_seen_bbos(struct anv_cmd_buffer *cmd_buffer, 107001e04c3fSmrg struct list_head *list) 107101e04c3fSmrg{ 107201e04c3fSmrg list_for_each_entry(struct anv_batch_bo, bbo, list, link) { 107301e04c3fSmrg struct anv_batch_bo **bbo_ptr = u_vector_add(&cmd_buffer->seen_bbos); 107401e04c3fSmrg if (bbo_ptr == NULL) 10757ec681f3Smrg return vk_error(cmd_buffer, VK_ERROR_OUT_OF_HOST_MEMORY); 107601e04c3fSmrg 107701e04c3fSmrg *bbo_ptr = bbo; 107801e04c3fSmrg } 107901e04c3fSmrg 108001e04c3fSmrg return VK_SUCCESS; 108101e04c3fSmrg} 108201e04c3fSmrg 108301e04c3fSmrgvoid 108401e04c3fSmrganv_cmd_buffer_add_secondary(struct anv_cmd_buffer *primary, 108501e04c3fSmrg struct anv_cmd_buffer *secondary) 108601e04c3fSmrg{ 10877ec681f3Smrg anv_measure_add_secondary(primary, secondary); 108801e04c3fSmrg switch (secondary->exec_mode) { 108901e04c3fSmrg case ANV_CMD_BUFFER_EXEC_MODE_EMIT: 109001e04c3fSmrg anv_batch_emit_batch(&primary->batch, &secondary->batch); 109101e04c3fSmrg break; 109201e04c3fSmrg case ANV_CMD_BUFFER_EXEC_MODE_GROW_AND_EMIT: { 109301e04c3fSmrg struct anv_batch_bo *bbo = anv_cmd_buffer_current_batch_bo(primary); 109401e04c3fSmrg unsigned length = secondary->batch.end - secondary->batch.start; 109501e04c3fSmrg anv_batch_bo_grow(primary, bbo, &primary->batch, length, 10967ec681f3Smrg GFX8_MI_BATCH_BUFFER_START_length * 4); 109701e04c3fSmrg anv_batch_emit_batch(&primary->batch, &secondary->batch); 109801e04c3fSmrg break; 109901e04c3fSmrg } 110001e04c3fSmrg case ANV_CMD_BUFFER_EXEC_MODE_CHAIN: { 110101e04c3fSmrg struct anv_batch_bo *first_bbo = 110201e04c3fSmrg list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); 110301e04c3fSmrg struct anv_batch_bo *last_bbo = 110401e04c3fSmrg list_last_entry(&secondary->batch_bos, struct anv_batch_bo, link); 110501e04c3fSmrg 11067ec681f3Smrg emit_batch_buffer_start(primary, first_bbo->bo, 0); 110701e04c3fSmrg 110801e04c3fSmrg struct anv_batch_bo *this_bbo = anv_cmd_buffer_current_batch_bo(primary); 11097ec681f3Smrg assert(primary->batch.start == this_bbo->bo->map); 111001e04c3fSmrg uint32_t offset = primary->batch.next - primary->batch.start; 111101e04c3fSmrg 111201e04c3fSmrg /* Make the tail of the secondary point back to right after the 111301e04c3fSmrg * MI_BATCH_BUFFER_START in the primary batch. 111401e04c3fSmrg */ 111501e04c3fSmrg anv_batch_bo_link(primary, last_bbo, this_bbo, offset); 111601e04c3fSmrg 111701e04c3fSmrg anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); 111801e04c3fSmrg break; 111901e04c3fSmrg } 112001e04c3fSmrg case ANV_CMD_BUFFER_EXEC_MODE_COPY_AND_CHAIN: { 112101e04c3fSmrg struct list_head copy_list; 112201e04c3fSmrg VkResult result = anv_batch_bo_list_clone(&secondary->batch_bos, 112301e04c3fSmrg secondary, 112401e04c3fSmrg ©_list); 112501e04c3fSmrg if (result != VK_SUCCESS) 112601e04c3fSmrg return; /* FIXME */ 112701e04c3fSmrg 112801e04c3fSmrg anv_cmd_buffer_add_seen_bbos(primary, ©_list); 112901e04c3fSmrg 113001e04c3fSmrg struct anv_batch_bo *first_bbo = 113101e04c3fSmrg list_first_entry(©_list, struct anv_batch_bo, link); 113201e04c3fSmrg struct anv_batch_bo *last_bbo = 113301e04c3fSmrg list_last_entry(©_list, struct anv_batch_bo, link); 113401e04c3fSmrg 113501e04c3fSmrg cmd_buffer_chain_to_batch_bo(primary, first_bbo); 113601e04c3fSmrg 113701e04c3fSmrg list_splicetail(©_list, &primary->batch_bos); 113801e04c3fSmrg 113901e04c3fSmrg anv_batch_bo_continue(last_bbo, &primary->batch, 11407ec681f3Smrg GFX8_MI_BATCH_BUFFER_START_length * 4); 11417ec681f3Smrg break; 11427ec681f3Smrg } 11437ec681f3Smrg case ANV_CMD_BUFFER_EXEC_MODE_CALL_AND_RETURN: { 11447ec681f3Smrg struct anv_batch_bo *first_bbo = 11457ec681f3Smrg list_first_entry(&secondary->batch_bos, struct anv_batch_bo, link); 11467ec681f3Smrg 11477ec681f3Smrg uint64_t *write_return_addr = 11487ec681f3Smrg anv_batch_emitn(&primary->batch, 11497ec681f3Smrg GFX8_MI_STORE_DATA_IMM_length + 1 /* QWord write */, 11507ec681f3Smrg GFX8_MI_STORE_DATA_IMM, 11517ec681f3Smrg .Address = secondary->return_addr) 11527ec681f3Smrg + (GFX8_MI_STORE_DATA_IMM_ImmediateData_start / 8); 11537ec681f3Smrg 11547ec681f3Smrg emit_batch_buffer_start(primary, first_bbo->bo, 0); 11557ec681f3Smrg 11567ec681f3Smrg *write_return_addr = 11577ec681f3Smrg anv_address_physical(anv_batch_address(&primary->batch, 11587ec681f3Smrg primary->batch.next)); 11597ec681f3Smrg 11607ec681f3Smrg anv_cmd_buffer_add_seen_bbos(primary, &secondary->batch_bos); 116101e04c3fSmrg break; 116201e04c3fSmrg } 116301e04c3fSmrg default: 116401e04c3fSmrg assert(!"Invalid execution mode"); 116501e04c3fSmrg } 116601e04c3fSmrg 116701e04c3fSmrg anv_reloc_list_append(&primary->surface_relocs, &primary->pool->alloc, 116801e04c3fSmrg &secondary->surface_relocs, 0); 116901e04c3fSmrg} 117001e04c3fSmrg 117101e04c3fSmrgstruct anv_execbuf { 117201e04c3fSmrg struct drm_i915_gem_execbuffer2 execbuf; 117301e04c3fSmrg 11747ec681f3Smrg struct drm_i915_gem_execbuffer_ext_timeline_fences timeline_fences; 11757ec681f3Smrg 117601e04c3fSmrg struct drm_i915_gem_exec_object2 * objects; 117701e04c3fSmrg uint32_t bo_count; 117801e04c3fSmrg struct anv_bo ** bos; 117901e04c3fSmrg 118001e04c3fSmrg /* Allocated length of the 'objects' and 'bos' arrays */ 118101e04c3fSmrg uint32_t array_length; 118201e04c3fSmrg 11837ec681f3Smrg /* List of relocations for surface states, only used with platforms not 11847ec681f3Smrg * using softpin. 11857ec681f3Smrg */ 11867ec681f3Smrg void * surface_states_relocs; 11877ec681f3Smrg 11887ec681f3Smrg /* Indicates whether any of the command buffers have relocations. This 11897ec681f3Smrg * doesn't not necessarily mean we'll need the kernel to process them. It 11907ec681f3Smrg * might be that a previous execbuf has already placed things in the VMA 11917ec681f3Smrg * and we can make i915 skip the relocations. 11927ec681f3Smrg */ 119301e04c3fSmrg bool has_relocs; 119401e04c3fSmrg 11957ec681f3Smrg const VkAllocationCallbacks * alloc; 11967ec681f3Smrg VkSystemAllocationScope alloc_scope; 11977ec681f3Smrg 11987ec681f3Smrg int perf_query_pass; 119901e04c3fSmrg}; 120001e04c3fSmrg 120101e04c3fSmrgstatic void 120201e04c3fSmrganv_execbuf_init(struct anv_execbuf *exec) 120301e04c3fSmrg{ 120401e04c3fSmrg memset(exec, 0, sizeof(*exec)); 120501e04c3fSmrg} 120601e04c3fSmrg 120701e04c3fSmrgstatic void 12087ec681f3Smrganv_execbuf_finish(struct anv_execbuf *exec) 120901e04c3fSmrg{ 12107ec681f3Smrg vk_free(exec->alloc, exec->surface_states_relocs); 12117ec681f3Smrg vk_free(exec->alloc, exec->objects); 12127ec681f3Smrg vk_free(exec->alloc, exec->bos); 121301e04c3fSmrg} 121401e04c3fSmrg 12157ec681f3Smrgstatic void 12167ec681f3Smrganv_execbuf_add_ext(struct anv_execbuf *exec, 12177ec681f3Smrg uint32_t ext_name, 12187ec681f3Smrg struct i915_user_extension *ext) 121901e04c3fSmrg{ 12207ec681f3Smrg __u64 *iter = &exec->execbuf.cliprects_ptr; 122101e04c3fSmrg 12227ec681f3Smrg exec->execbuf.flags |= I915_EXEC_USE_EXTENSIONS; 12237ec681f3Smrg 12247ec681f3Smrg while (*iter != 0) { 12257ec681f3Smrg iter = (__u64 *) &((struct i915_user_extension *)(uintptr_t)*iter)->next_extension; 12267ec681f3Smrg } 12277ec681f3Smrg 12287ec681f3Smrg ext->name = ext_name; 12297ec681f3Smrg 12307ec681f3Smrg *iter = (uintptr_t) ext; 123101e04c3fSmrg} 123201e04c3fSmrg 12339f464c52Smayastatic VkResult 12347ec681f3Smrganv_execbuf_add_bo_bitset(struct anv_device *device, 12357ec681f3Smrg struct anv_execbuf *exec, 12367ec681f3Smrg uint32_t dep_words, 12377ec681f3Smrg BITSET_WORD *deps, 12387ec681f3Smrg uint32_t extra_flags); 12399f464c52Smaya 124001e04c3fSmrgstatic VkResult 12417ec681f3Smrganv_execbuf_add_bo(struct anv_device *device, 12427ec681f3Smrg struct anv_execbuf *exec, 124301e04c3fSmrg struct anv_bo *bo, 124401e04c3fSmrg struct anv_reloc_list *relocs, 12457ec681f3Smrg uint32_t extra_flags) 124601e04c3fSmrg{ 124701e04c3fSmrg struct drm_i915_gem_exec_object2 *obj = NULL; 124801e04c3fSmrg 12497ec681f3Smrg bo = anv_bo_unwrap(bo); 12507ec681f3Smrg 125101e04c3fSmrg if (bo->index < exec->bo_count && exec->bos[bo->index] == bo) 125201e04c3fSmrg obj = &exec->objects[bo->index]; 125301e04c3fSmrg 125401e04c3fSmrg if (obj == NULL) { 125501e04c3fSmrg /* We've never seen this one before. Add it to the list and assign 125601e04c3fSmrg * an id that we can use later. 125701e04c3fSmrg */ 125801e04c3fSmrg if (exec->bo_count >= exec->array_length) { 125901e04c3fSmrg uint32_t new_len = exec->objects ? exec->array_length * 2 : 64; 126001e04c3fSmrg 126101e04c3fSmrg struct drm_i915_gem_exec_object2 *new_objects = 12627ec681f3Smrg vk_alloc(exec->alloc, new_len * sizeof(*new_objects), 8, exec->alloc_scope); 126301e04c3fSmrg if (new_objects == NULL) 12647ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 126501e04c3fSmrg 126601e04c3fSmrg struct anv_bo **new_bos = 12677ec681f3Smrg vk_alloc(exec->alloc, new_len * sizeof(*new_bos), 8, exec->alloc_scope); 126801e04c3fSmrg if (new_bos == NULL) { 12697ec681f3Smrg vk_free(exec->alloc, new_objects); 12707ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY); 127101e04c3fSmrg } 127201e04c3fSmrg 127301e04c3fSmrg if (exec->objects) { 127401e04c3fSmrg memcpy(new_objects, exec->objects, 127501e04c3fSmrg exec->bo_count * sizeof(*new_objects)); 127601e04c3fSmrg memcpy(new_bos, exec->bos, 127701e04c3fSmrg exec->bo_count * sizeof(*new_bos)); 127801e04c3fSmrg } 127901e04c3fSmrg 12807ec681f3Smrg vk_free(exec->alloc, exec->objects); 12817ec681f3Smrg vk_free(exec->alloc, exec->bos); 128201e04c3fSmrg 128301e04c3fSmrg exec->objects = new_objects; 128401e04c3fSmrg exec->bos = new_bos; 128501e04c3fSmrg exec->array_length = new_len; 128601e04c3fSmrg } 128701e04c3fSmrg 128801e04c3fSmrg assert(exec->bo_count < exec->array_length); 128901e04c3fSmrg 129001e04c3fSmrg bo->index = exec->bo_count++; 129101e04c3fSmrg obj = &exec->objects[bo->index]; 129201e04c3fSmrg exec->bos[bo->index] = bo; 129301e04c3fSmrg 129401e04c3fSmrg obj->handle = bo->gem_handle; 129501e04c3fSmrg obj->relocation_count = 0; 129601e04c3fSmrg obj->relocs_ptr = 0; 129701e04c3fSmrg obj->alignment = 0; 129801e04c3fSmrg obj->offset = bo->offset; 12997ec681f3Smrg obj->flags = bo->flags | extra_flags; 130001e04c3fSmrg obj->rsvd1 = 0; 130101e04c3fSmrg obj->rsvd2 = 0; 130201e04c3fSmrg } 130301e04c3fSmrg 13047ec681f3Smrg if (extra_flags & EXEC_OBJECT_WRITE) { 13057ec681f3Smrg obj->flags |= EXEC_OBJECT_WRITE; 13067ec681f3Smrg obj->flags &= ~EXEC_OBJECT_ASYNC; 13077ec681f3Smrg } 13087ec681f3Smrg 130901e04c3fSmrg if (relocs != NULL) { 131001e04c3fSmrg assert(obj->relocation_count == 0); 131101e04c3fSmrg 131201e04c3fSmrg if (relocs->num_relocs > 0) { 131301e04c3fSmrg /* This is the first time we've ever seen a list of relocations for 131401e04c3fSmrg * this BO. Go ahead and set the relocations and then walk the list 131501e04c3fSmrg * of relocations and add them all. 131601e04c3fSmrg */ 131701e04c3fSmrg exec->has_relocs = true; 131801e04c3fSmrg obj->relocation_count = relocs->num_relocs; 131901e04c3fSmrg obj->relocs_ptr = (uintptr_t) relocs->relocs; 132001e04c3fSmrg 132101e04c3fSmrg for (size_t i = 0; i < relocs->num_relocs; i++) { 132201e04c3fSmrg VkResult result; 132301e04c3fSmrg 132401e04c3fSmrg /* A quick sanity check on relocations */ 132501e04c3fSmrg assert(relocs->relocs[i].offset < bo->size); 13267ec681f3Smrg result = anv_execbuf_add_bo(device, exec, relocs->reloc_bos[i], 13277ec681f3Smrg NULL, extra_flags); 132801e04c3fSmrg if (result != VK_SUCCESS) 132901e04c3fSmrg return result; 133001e04c3fSmrg } 133101e04c3fSmrg } 133201e04c3fSmrg 13337ec681f3Smrg return anv_execbuf_add_bo_bitset(device, exec, relocs->dep_words, 13347ec681f3Smrg relocs->deps, extra_flags); 13359f464c52Smaya } 133601e04c3fSmrg 13379f464c52Smaya return VK_SUCCESS; 13389f464c52Smaya} 133901e04c3fSmrg 13409f464c52Smaya/* Add BO dependencies to execbuf */ 13419f464c52Smayastatic VkResult 13427ec681f3Smrganv_execbuf_add_bo_bitset(struct anv_device *device, 13437ec681f3Smrg struct anv_execbuf *exec, 13447ec681f3Smrg uint32_t dep_words, 13457ec681f3Smrg BITSET_WORD *deps, 13467ec681f3Smrg uint32_t extra_flags) 134701e04c3fSmrg{ 13487ec681f3Smrg for (uint32_t w = 0; w < dep_words; w++) { 13497ec681f3Smrg BITSET_WORD mask = deps[w]; 13507ec681f3Smrg while (mask) { 13517ec681f3Smrg int i = u_bit_scan(&mask); 13527ec681f3Smrg uint32_t gem_handle = w * BITSET_WORDBITS + i; 13537ec681f3Smrg struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle); 13547ec681f3Smrg assert(bo->refcount > 0); 13557ec681f3Smrg VkResult result = 13567ec681f3Smrg anv_execbuf_add_bo(device, exec, bo, NULL, extra_flags); 13577ec681f3Smrg if (result != VK_SUCCESS) 13587ec681f3Smrg return result; 13597ec681f3Smrg } 136001e04c3fSmrg } 136101e04c3fSmrg 136201e04c3fSmrg return VK_SUCCESS; 136301e04c3fSmrg} 136401e04c3fSmrg 136501e04c3fSmrgstatic void 136601e04c3fSmrganv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer, 136701e04c3fSmrg struct anv_reloc_list *list) 136801e04c3fSmrg{ 136901e04c3fSmrg for (size_t i = 0; i < list->num_relocs; i++) 13707ec681f3Smrg list->relocs[i].target_handle = anv_bo_unwrap(list->reloc_bos[i])->index; 137101e04c3fSmrg} 137201e04c3fSmrg 137301e04c3fSmrgstatic void 137401e04c3fSmrgadjust_relocations_from_state_pool(struct anv_state_pool *pool, 137501e04c3fSmrg struct anv_reloc_list *relocs, 137601e04c3fSmrg uint32_t last_pool_center_bo_offset) 137701e04c3fSmrg{ 137801e04c3fSmrg assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset); 137901e04c3fSmrg uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset; 138001e04c3fSmrg 138101e04c3fSmrg for (size_t i = 0; i < relocs->num_relocs; i++) { 138201e04c3fSmrg /* All of the relocations from this block pool to other BO's should 138301e04c3fSmrg * have been emitted relative to the surface block pool center. We 138401e04c3fSmrg * need to add the center offset to make them relative to the 138501e04c3fSmrg * beginning of the actual GEM bo. 138601e04c3fSmrg */ 138701e04c3fSmrg relocs->relocs[i].offset += delta; 138801e04c3fSmrg } 138901e04c3fSmrg} 139001e04c3fSmrg 139101e04c3fSmrgstatic void 139201e04c3fSmrgadjust_relocations_to_state_pool(struct anv_state_pool *pool, 139301e04c3fSmrg struct anv_bo *from_bo, 139401e04c3fSmrg struct anv_reloc_list *relocs, 139501e04c3fSmrg uint32_t last_pool_center_bo_offset) 139601e04c3fSmrg{ 13977ec681f3Smrg assert(!from_bo->is_wrapper); 139801e04c3fSmrg assert(last_pool_center_bo_offset <= pool->block_pool.center_bo_offset); 139901e04c3fSmrg uint32_t delta = pool->block_pool.center_bo_offset - last_pool_center_bo_offset; 140001e04c3fSmrg 140101e04c3fSmrg /* When we initially emit relocations into a block pool, we don't 140201e04c3fSmrg * actually know what the final center_bo_offset will be so we just emit 140301e04c3fSmrg * it as if center_bo_offset == 0. Now that we know what the center 140401e04c3fSmrg * offset is, we need to walk the list of relocations and adjust any 140501e04c3fSmrg * relocations that point to the pool bo with the correct offset. 140601e04c3fSmrg */ 140701e04c3fSmrg for (size_t i = 0; i < relocs->num_relocs; i++) { 14089f464c52Smaya if (relocs->reloc_bos[i] == pool->block_pool.bo) { 140901e04c3fSmrg /* Adjust the delta value in the relocation to correctly 141001e04c3fSmrg * correspond to the new delta. Initially, this value may have 141101e04c3fSmrg * been negative (if treated as unsigned), but we trust in 141201e04c3fSmrg * uint32_t roll-over to fix that for us at this point. 141301e04c3fSmrg */ 141401e04c3fSmrg relocs->relocs[i].delta += delta; 141501e04c3fSmrg 141601e04c3fSmrg /* Since the delta has changed, we need to update the actual 141701e04c3fSmrg * relocated value with the new presumed value. This function 141801e04c3fSmrg * should only be called on batch buffers, so we know it isn't in 141901e04c3fSmrg * use by the GPU at the moment. 142001e04c3fSmrg */ 142101e04c3fSmrg assert(relocs->relocs[i].offset < from_bo->size); 142201e04c3fSmrg write_reloc(pool->block_pool.device, 142301e04c3fSmrg from_bo->map + relocs->relocs[i].offset, 142401e04c3fSmrg relocs->relocs[i].presumed_offset + 142501e04c3fSmrg relocs->relocs[i].delta, false); 142601e04c3fSmrg } 142701e04c3fSmrg } 142801e04c3fSmrg} 142901e04c3fSmrg 143001e04c3fSmrgstatic void 143101e04c3fSmrganv_reloc_list_apply(struct anv_device *device, 143201e04c3fSmrg struct anv_reloc_list *list, 143301e04c3fSmrg struct anv_bo *bo, 143401e04c3fSmrg bool always_relocate) 143501e04c3fSmrg{ 14367ec681f3Smrg bo = anv_bo_unwrap(bo); 14377ec681f3Smrg 143801e04c3fSmrg for (size_t i = 0; i < list->num_relocs; i++) { 14397ec681f3Smrg struct anv_bo *target_bo = anv_bo_unwrap(list->reloc_bos[i]); 144001e04c3fSmrg if (list->relocs[i].presumed_offset == target_bo->offset && 144101e04c3fSmrg !always_relocate) 144201e04c3fSmrg continue; 144301e04c3fSmrg 144401e04c3fSmrg void *p = bo->map + list->relocs[i].offset; 144501e04c3fSmrg write_reloc(device, p, target_bo->offset + list->relocs[i].delta, true); 144601e04c3fSmrg list->relocs[i].presumed_offset = target_bo->offset; 144701e04c3fSmrg } 144801e04c3fSmrg} 144901e04c3fSmrg 145001e04c3fSmrg/** 145101e04c3fSmrg * This function applies the relocation for a command buffer and writes the 145201e04c3fSmrg * actual addresses into the buffers as per what we were told by the kernel on 145301e04c3fSmrg * the previous execbuf2 call. This should be safe to do because, for each 145401e04c3fSmrg * relocated address, we have two cases: 145501e04c3fSmrg * 145601e04c3fSmrg * 1) The target BO is inactive (as seen by the kernel). In this case, it is 145701e04c3fSmrg * not in use by the GPU so updating the address is 100% ok. It won't be 145801e04c3fSmrg * in-use by the GPU (from our context) again until the next execbuf2 145901e04c3fSmrg * happens. If the kernel decides to move it in the next execbuf2, it 146001e04c3fSmrg * will have to do the relocations itself, but that's ok because it should 146101e04c3fSmrg * have all of the information needed to do so. 146201e04c3fSmrg * 146301e04c3fSmrg * 2) The target BO is active (as seen by the kernel). In this case, it 146401e04c3fSmrg * hasn't moved since the last execbuffer2 call because GTT shuffling 146501e04c3fSmrg * *only* happens when the BO is idle. (From our perspective, it only 146601e04c3fSmrg * happens inside the execbuffer2 ioctl, but the shuffling may be 146701e04c3fSmrg * triggered by another ioctl, with full-ppgtt this is limited to only 146801e04c3fSmrg * execbuffer2 ioctls on the same context, or memory pressure.) Since the 146901e04c3fSmrg * target BO hasn't moved, our anv_bo::offset exactly matches the BO's GTT 147001e04c3fSmrg * address and the relocated value we are writing into the BO will be the 147101e04c3fSmrg * same as the value that is already there. 147201e04c3fSmrg * 147301e04c3fSmrg * There is also a possibility that the target BO is active but the exact 147401e04c3fSmrg * RENDER_SURFACE_STATE object we are writing the relocation into isn't in 147501e04c3fSmrg * use. In this case, the address currently in the RENDER_SURFACE_STATE 147601e04c3fSmrg * may be stale but it's still safe to write the relocation because that 147701e04c3fSmrg * particular RENDER_SURFACE_STATE object isn't in-use by the GPU and 147801e04c3fSmrg * won't be until the next execbuf2 call. 147901e04c3fSmrg * 148001e04c3fSmrg * By doing relocations on the CPU, we can tell the kernel that it doesn't 148101e04c3fSmrg * need to bother. We want to do this because the surface state buffer is 148201e04c3fSmrg * used by every command buffer so, if the kernel does the relocations, it 148301e04c3fSmrg * will always be busy and the kernel will always stall. This is also 148401e04c3fSmrg * probably the fastest mechanism for doing relocations since the kernel would 148501e04c3fSmrg * have to make a full copy of all the relocations lists. 148601e04c3fSmrg */ 148701e04c3fSmrgstatic bool 14887ec681f3Smrgexecbuf_can_skip_relocations(struct anv_execbuf *exec) 148901e04c3fSmrg{ 149001e04c3fSmrg if (!exec->has_relocs) 149101e04c3fSmrg return true; 149201e04c3fSmrg 149301e04c3fSmrg static int userspace_relocs = -1; 149401e04c3fSmrg if (userspace_relocs < 0) 149501e04c3fSmrg userspace_relocs = env_var_as_boolean("ANV_USERSPACE_RELOCS", true); 149601e04c3fSmrg if (!userspace_relocs) 149701e04c3fSmrg return false; 149801e04c3fSmrg 149901e04c3fSmrg /* First, we have to check to see whether or not we can even do the 150001e04c3fSmrg * relocation. New buffers which have never been submitted to the kernel 150101e04c3fSmrg * don't have a valid offset so we need to let the kernel do relocations so 150201e04c3fSmrg * that we can get offsets for them. On future execbuf2 calls, those 150301e04c3fSmrg * buffers will have offsets and we will be able to skip relocating. 150401e04c3fSmrg * Invalid offsets are indicated by anv_bo::offset == (uint64_t)-1. 150501e04c3fSmrg */ 150601e04c3fSmrg for (uint32_t i = 0; i < exec->bo_count; i++) { 15077ec681f3Smrg assert(!exec->bos[i]->is_wrapper); 150801e04c3fSmrg if (exec->bos[i]->offset == (uint64_t)-1) 150901e04c3fSmrg return false; 151001e04c3fSmrg } 151101e04c3fSmrg 15127ec681f3Smrg return true; 15137ec681f3Smrg} 15147ec681f3Smrg 15157ec681f3Smrgstatic void 15167ec681f3Smrgrelocate_cmd_buffer(struct anv_cmd_buffer *cmd_buffer, 15177ec681f3Smrg struct anv_execbuf *exec) 15187ec681f3Smrg{ 151901e04c3fSmrg /* Since surface states are shared between command buffers and we don't 152001e04c3fSmrg * know what order they will be submitted to the kernel, we don't know 152101e04c3fSmrg * what address is actually written in the surface state object at any 152201e04c3fSmrg * given time. The only option is to always relocate them. 152301e04c3fSmrg */ 15247ec681f3Smrg struct anv_bo *surface_state_bo = 15257ec681f3Smrg anv_bo_unwrap(cmd_buffer->device->surface_state_pool.block_pool.bo); 152601e04c3fSmrg anv_reloc_list_apply(cmd_buffer->device, &cmd_buffer->surface_relocs, 15277ec681f3Smrg surface_state_bo, 152801e04c3fSmrg true /* always relocate surface states */); 152901e04c3fSmrg 153001e04c3fSmrg /* Since we own all of the batch buffers, we know what values are stored 153101e04c3fSmrg * in the relocated addresses and only have to update them if the offsets 153201e04c3fSmrg * have changed. 153301e04c3fSmrg */ 153401e04c3fSmrg struct anv_batch_bo **bbo; 153501e04c3fSmrg u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { 153601e04c3fSmrg anv_reloc_list_apply(cmd_buffer->device, 15377ec681f3Smrg &(*bbo)->relocs, (*bbo)->bo, false); 153801e04c3fSmrg } 153901e04c3fSmrg 154001e04c3fSmrg for (uint32_t i = 0; i < exec->bo_count; i++) 154101e04c3fSmrg exec->objects[i].offset = exec->bos[i]->offset; 15427ec681f3Smrg} 154301e04c3fSmrg 15447ec681f3Smrgstatic void 15457ec681f3Smrgreset_cmd_buffer_surface_offsets(struct anv_cmd_buffer *cmd_buffer) 15467ec681f3Smrg{ 15477ec681f3Smrg /* In the case where we fall back to doing kernel relocations, we need to 15487ec681f3Smrg * ensure that the relocation list is valid. All relocations on the batch 15497ec681f3Smrg * buffers are already valid and kept up-to-date. Since surface states are 15507ec681f3Smrg * shared between command buffers and we don't know what order they will be 15517ec681f3Smrg * submitted to the kernel, we don't know what address is actually written 15527ec681f3Smrg * in the surface state object at any given time. The only option is to set 15537ec681f3Smrg * a bogus presumed offset and let the kernel relocate them. 15547ec681f3Smrg */ 15557ec681f3Smrg for (size_t i = 0; i < cmd_buffer->surface_relocs.num_relocs; i++) 15567ec681f3Smrg cmd_buffer->surface_relocs.relocs[i].presumed_offset = -1; 155701e04c3fSmrg} 155801e04c3fSmrg 155901e04c3fSmrgstatic VkResult 156001e04c3fSmrgsetup_execbuf_for_cmd_buffer(struct anv_execbuf *execbuf, 156101e04c3fSmrg struct anv_cmd_buffer *cmd_buffer) 156201e04c3fSmrg{ 156301e04c3fSmrg struct anv_state_pool *ss_pool = 156401e04c3fSmrg &cmd_buffer->device->surface_state_pool; 156501e04c3fSmrg 156601e04c3fSmrg adjust_relocations_from_state_pool(ss_pool, &cmd_buffer->surface_relocs, 156701e04c3fSmrg cmd_buffer->last_ss_pool_center); 15689f464c52Smaya VkResult result; 15697ec681f3Smrg if (cmd_buffer->device->physical->use_softpin) { 15707ec681f3Smrg /* Add surface dependencies (BOs) to the execbuf */ 15717ec681f3Smrg anv_execbuf_add_bo_bitset(cmd_buffer->device, execbuf, 15727ec681f3Smrg cmd_buffer->surface_relocs.dep_words, 15737ec681f3Smrg cmd_buffer->surface_relocs.deps, 0); 15747ec681f3Smrg } else { 15757ec681f3Smrg /* Since we aren't in the softpin case, all of our STATE_BASE_ADDRESS BOs 15767ec681f3Smrg * will get added automatically by processing relocations on the batch 15777ec681f3Smrg * buffer. We have to add the surface state BO manually because it has 15787ec681f3Smrg * relocations of its own that we need to be sure are processsed. 15797ec681f3Smrg */ 15807ec681f3Smrg result = anv_execbuf_add_bo(cmd_buffer->device, execbuf, 15817ec681f3Smrg ss_pool->block_pool.bo, 15827ec681f3Smrg &cmd_buffer->surface_relocs, 0); 15837ec681f3Smrg if (result != VK_SUCCESS) 15847ec681f3Smrg return result; 15857ec681f3Smrg } 15867ec681f3Smrg 15877ec681f3Smrg /* First, we walk over all of the bos we've seen and add them and their 15887ec681f3Smrg * relocations to the validate list. 15897ec681f3Smrg */ 15907ec681f3Smrg struct anv_batch_bo **bbo; 15917ec681f3Smrg u_vector_foreach(bbo, &cmd_buffer->seen_bbos) { 15927ec681f3Smrg adjust_relocations_to_state_pool(ss_pool, (*bbo)->bo, &(*bbo)->relocs, 15937ec681f3Smrg cmd_buffer->last_ss_pool_center); 15947ec681f3Smrg 15957ec681f3Smrg result = anv_execbuf_add_bo(cmd_buffer->device, execbuf, 15967ec681f3Smrg (*bbo)->bo, &(*bbo)->relocs, 0); 15977ec681f3Smrg if (result != VK_SUCCESS) 15987ec681f3Smrg return result; 15997ec681f3Smrg } 16007ec681f3Smrg 16017ec681f3Smrg /* Now that we've adjusted all of the surface state relocations, we need to 16027ec681f3Smrg * record the surface state pool center so future executions of the command 16037ec681f3Smrg * buffer can adjust correctly. 16047ec681f3Smrg */ 16057ec681f3Smrg cmd_buffer->last_ss_pool_center = ss_pool->block_pool.center_bo_offset; 16067ec681f3Smrg 16077ec681f3Smrg return VK_SUCCESS; 16087ec681f3Smrg} 16097ec681f3Smrg 16107ec681f3Smrgstatic void 16117ec681f3Smrgchain_command_buffers(struct anv_cmd_buffer **cmd_buffers, 16127ec681f3Smrg uint32_t num_cmd_buffers) 16137ec681f3Smrg{ 16147ec681f3Smrg if (!anv_cmd_buffer_is_chainable(cmd_buffers[0])) { 16157ec681f3Smrg assert(num_cmd_buffers == 1); 16167ec681f3Smrg return; 16177ec681f3Smrg } 16187ec681f3Smrg 16197ec681f3Smrg /* Chain the N-1 first batch buffers */ 16207ec681f3Smrg for (uint32_t i = 0; i < (num_cmd_buffers - 1); i++) 16217ec681f3Smrg anv_cmd_buffer_record_chain_submit(cmd_buffers[i], cmd_buffers[i + 1]); 16227ec681f3Smrg 16237ec681f3Smrg /* Put an end to the last one */ 16247ec681f3Smrg anv_cmd_buffer_record_end_submit(cmd_buffers[num_cmd_buffers - 1]); 16257ec681f3Smrg} 16267ec681f3Smrg 16277ec681f3Smrgstatic VkResult 16287ec681f3Smrgsetup_execbuf_for_cmd_buffers(struct anv_execbuf *execbuf, 16297ec681f3Smrg struct anv_queue *queue, 16307ec681f3Smrg struct anv_cmd_buffer **cmd_buffers, 16317ec681f3Smrg uint32_t num_cmd_buffers) 16327ec681f3Smrg{ 16337ec681f3Smrg struct anv_device *device = queue->device; 16347ec681f3Smrg struct anv_state_pool *ss_pool = &device->surface_state_pool; 16357ec681f3Smrg VkResult result; 16367ec681f3Smrg 16377ec681f3Smrg /* Edit the tail of the command buffers to chain them all together if they 16387ec681f3Smrg * can be. 16397ec681f3Smrg */ 16407ec681f3Smrg chain_command_buffers(cmd_buffers, num_cmd_buffers); 16417ec681f3Smrg 16427ec681f3Smrg for (uint32_t i = 0; i < num_cmd_buffers; i++) { 16437ec681f3Smrg result = setup_execbuf_for_cmd_buffer(execbuf, cmd_buffers[i]); 16447ec681f3Smrg if (result != VK_SUCCESS) 16457ec681f3Smrg return result; 16467ec681f3Smrg } 16477ec681f3Smrg 16487ec681f3Smrg /* Add all the global BOs to the object list for softpin case. */ 16497ec681f3Smrg if (device->physical->use_softpin) { 16509f464c52Smaya anv_block_pool_foreach_bo(bo, &ss_pool->block_pool) { 16517ec681f3Smrg result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0); 16529f464c52Smaya if (result != VK_SUCCESS) 16539f464c52Smaya return result; 16549f464c52Smaya } 16559f464c52Smaya 16567ec681f3Smrg struct anv_block_pool *pool; 16577ec681f3Smrg pool = &device->dynamic_state_pool.block_pool; 16587ec681f3Smrg anv_block_pool_foreach_bo(bo, pool) { 16597ec681f3Smrg result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0); 16609f464c52Smaya if (result != VK_SUCCESS) 16619f464c52Smaya return result; 16629f464c52Smaya } 16639f464c52Smaya 16647ec681f3Smrg pool = &device->general_state_pool.block_pool; 16659f464c52Smaya anv_block_pool_foreach_bo(bo, pool) { 16667ec681f3Smrg result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0); 16679f464c52Smaya if (result != VK_SUCCESS) 16689f464c52Smaya return result; 16699f464c52Smaya } 16709f464c52Smaya 16717ec681f3Smrg pool = &device->instruction_state_pool.block_pool; 16729f464c52Smaya anv_block_pool_foreach_bo(bo, pool) { 16737ec681f3Smrg result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0); 16749f464c52Smaya if (result != VK_SUCCESS) 16759f464c52Smaya return result; 16769f464c52Smaya } 16779f464c52Smaya 16787ec681f3Smrg pool = &device->binding_table_pool.block_pool; 16799f464c52Smaya anv_block_pool_foreach_bo(bo, pool) { 16807ec681f3Smrg result = anv_execbuf_add_bo(device, execbuf, bo, NULL, 0); 16817ec681f3Smrg if (result != VK_SUCCESS) 16827ec681f3Smrg return result; 16837ec681f3Smrg } 16847ec681f3Smrg 16857ec681f3Smrg /* Add the BOs for all user allocated memory objects because we can't 16867ec681f3Smrg * track after binding updates of VK_EXT_descriptor_indexing. 16877ec681f3Smrg */ 16887ec681f3Smrg list_for_each_entry(struct anv_device_memory, mem, 16897ec681f3Smrg &device->memory_objects, link) { 16907ec681f3Smrg result = anv_execbuf_add_bo(device, execbuf, mem->bo, NULL, 0); 16919f464c52Smaya if (result != VK_SUCCESS) 16929f464c52Smaya return result; 16939f464c52Smaya } 16949f464c52Smaya } else { 16957ec681f3Smrg /* We do not support chaining primary command buffers without 16967ec681f3Smrg * softpin. 16979f464c52Smaya */ 16987ec681f3Smrg assert(num_cmd_buffers == 1); 16999f464c52Smaya } 170001e04c3fSmrg 17017ec681f3Smrg bool no_reloc = true; 17027ec681f3Smrg if (execbuf->has_relocs) { 17037ec681f3Smrg no_reloc = execbuf_can_skip_relocations(execbuf); 17047ec681f3Smrg if (no_reloc) { 17057ec681f3Smrg /* If we were able to successfully relocate everything, tell the 17067ec681f3Smrg * kernel that it can skip doing relocations. The requirement for 17077ec681f3Smrg * using NO_RELOC is: 17087ec681f3Smrg * 17097ec681f3Smrg * 1) The addresses written in the objects must match the 17107ec681f3Smrg * corresponding reloc.presumed_offset which in turn must match 17117ec681f3Smrg * the corresponding execobject.offset. 17127ec681f3Smrg * 17137ec681f3Smrg * 2) To avoid stalling, execobject.offset should match the current 17147ec681f3Smrg * address of that object within the active context. 17157ec681f3Smrg * 17167ec681f3Smrg * In order to satisfy all of the invariants that make userspace 17177ec681f3Smrg * relocations to be safe (see relocate_cmd_buffer()), we need to 17187ec681f3Smrg * further ensure that the addresses we use match those used by the 17197ec681f3Smrg * kernel for the most recent execbuf2. 17207ec681f3Smrg * 17217ec681f3Smrg * The kernel may still choose to do relocations anyway if something 17227ec681f3Smrg * has moved in the GTT. In this case, the relocation list still 17237ec681f3Smrg * needs to be valid. All relocations on the batch buffers are 17247ec681f3Smrg * already valid and kept up-to-date. For surface state relocations, 17257ec681f3Smrg * by applying the relocations in relocate_cmd_buffer, we ensured 17267ec681f3Smrg * that the address in the RENDER_SURFACE_STATE matches 17277ec681f3Smrg * presumed_offset, so it should be safe for the kernel to relocate 17287ec681f3Smrg * them as needed. 17297ec681f3Smrg */ 17307ec681f3Smrg for (uint32_t i = 0; i < num_cmd_buffers; i++) { 17317ec681f3Smrg relocate_cmd_buffer(cmd_buffers[i], execbuf); 173201e04c3fSmrg 17337ec681f3Smrg anv_reloc_list_apply(device, &cmd_buffers[i]->surface_relocs, 17347ec681f3Smrg device->surface_state_pool.block_pool.bo, 17357ec681f3Smrg true /* always relocate surface states */); 17367ec681f3Smrg } 17377ec681f3Smrg } else { 17387ec681f3Smrg /* In the case where we fall back to doing kernel relocations, we 17397ec681f3Smrg * need to ensure that the relocation list is valid. All relocations 17407ec681f3Smrg * on the batch buffers are already valid and kept up-to-date. Since 17417ec681f3Smrg * surface states are shared between command buffers and we don't 17427ec681f3Smrg * know what order they will be submitted to the kernel, we don't 17437ec681f3Smrg * know what address is actually written in the surface state object 17447ec681f3Smrg * at any given time. The only option is to set a bogus presumed 17457ec681f3Smrg * offset and let the kernel relocate them. 17467ec681f3Smrg */ 17477ec681f3Smrg for (uint32_t i = 0; i < num_cmd_buffers; i++) 17487ec681f3Smrg reset_cmd_buffer_surface_offsets(cmd_buffers[i]); 17497ec681f3Smrg } 175001e04c3fSmrg } 175101e04c3fSmrg 175201e04c3fSmrg struct anv_batch_bo *first_batch_bo = 17537ec681f3Smrg list_first_entry(&cmd_buffers[0]->batch_bos, struct anv_batch_bo, link); 175401e04c3fSmrg 175501e04c3fSmrg /* The kernel requires that the last entry in the validation list be the 175601e04c3fSmrg * batch buffer to execute. We can simply swap the element 175701e04c3fSmrg * corresponding to the first batch_bo in the chain with the last 175801e04c3fSmrg * element in the list. 175901e04c3fSmrg */ 17607ec681f3Smrg if (first_batch_bo->bo->index != execbuf->bo_count - 1) { 17617ec681f3Smrg uint32_t idx = first_batch_bo->bo->index; 176201e04c3fSmrg uint32_t last_idx = execbuf->bo_count - 1; 176301e04c3fSmrg 176401e04c3fSmrg struct drm_i915_gem_exec_object2 tmp_obj = execbuf->objects[idx]; 17657ec681f3Smrg assert(execbuf->bos[idx] == first_batch_bo->bo); 176601e04c3fSmrg 176701e04c3fSmrg execbuf->objects[idx] = execbuf->objects[last_idx]; 176801e04c3fSmrg execbuf->bos[idx] = execbuf->bos[last_idx]; 176901e04c3fSmrg execbuf->bos[idx]->index = idx; 177001e04c3fSmrg 177101e04c3fSmrg execbuf->objects[last_idx] = tmp_obj; 17727ec681f3Smrg execbuf->bos[last_idx] = first_batch_bo->bo; 17737ec681f3Smrg first_batch_bo->bo->index = last_idx; 177401e04c3fSmrg } 177501e04c3fSmrg 177601e04c3fSmrg /* If we are pinning our BOs, we shouldn't have to relocate anything */ 17777ec681f3Smrg if (device->physical->use_softpin) 177801e04c3fSmrg assert(!execbuf->has_relocs); 177901e04c3fSmrg 17807ec681f3Smrg /* Now we go through and fixup all of the relocation lists to point to the 17817ec681f3Smrg * correct indices in the object array (I915_EXEC_HANDLE_LUT). We have to 17827ec681f3Smrg * do this after we reorder the list above as some of the indices may have 17837ec681f3Smrg * changed. 178401e04c3fSmrg */ 17857ec681f3Smrg struct anv_batch_bo **bbo; 178601e04c3fSmrg if (execbuf->has_relocs) { 17877ec681f3Smrg assert(num_cmd_buffers == 1); 17887ec681f3Smrg u_vector_foreach(bbo, &cmd_buffers[0]->seen_bbos) 17897ec681f3Smrg anv_cmd_buffer_process_relocs(cmd_buffers[0], &(*bbo)->relocs); 179001e04c3fSmrg 17917ec681f3Smrg anv_cmd_buffer_process_relocs(cmd_buffers[0], &cmd_buffers[0]->surface_relocs); 179201e04c3fSmrg } 179301e04c3fSmrg 17947ec681f3Smrg if (!device->info.has_llc) { 179501e04c3fSmrg __builtin_ia32_mfence(); 17967ec681f3Smrg for (uint32_t i = 0; i < num_cmd_buffers; i++) { 17977ec681f3Smrg u_vector_foreach(bbo, &cmd_buffers[i]->seen_bbos) { 17987ec681f3Smrg for (uint32_t i = 0; i < (*bbo)->length; i += CACHELINE_SIZE) 17997ec681f3Smrg __builtin_ia32_clflush((*bbo)->bo->map + i); 18007ec681f3Smrg } 180101e04c3fSmrg } 180201e04c3fSmrg } 180301e04c3fSmrg 18047ec681f3Smrg struct anv_batch *batch = &cmd_buffers[0]->batch; 180501e04c3fSmrg execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { 180601e04c3fSmrg .buffers_ptr = (uintptr_t) execbuf->objects, 180701e04c3fSmrg .buffer_count = execbuf->bo_count, 180801e04c3fSmrg .batch_start_offset = 0, 18097ec681f3Smrg /* On platforms that cannot chain batch buffers because of the i915 18107ec681f3Smrg * command parser, we have to provide the batch length. Everywhere else 18117ec681f3Smrg * we'll chain batches so no point in passing a length. 18127ec681f3Smrg */ 18137ec681f3Smrg .batch_len = device->can_chain_batches ? 0 : batch->next - batch->start, 181401e04c3fSmrg .cliprects_ptr = 0, 181501e04c3fSmrg .num_cliprects = 0, 181601e04c3fSmrg .DR1 = 0, 181701e04c3fSmrg .DR4 = 0, 18187ec681f3Smrg .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | (no_reloc ? I915_EXEC_NO_RELOC : 0), 18197ec681f3Smrg .rsvd1 = device->context_id, 182001e04c3fSmrg .rsvd2 = 0, 182101e04c3fSmrg }; 182201e04c3fSmrg 182301e04c3fSmrg return VK_SUCCESS; 182401e04c3fSmrg} 182501e04c3fSmrg 182601e04c3fSmrgstatic VkResult 18277ec681f3Smrgsetup_empty_execbuf(struct anv_execbuf *execbuf, struct anv_queue *queue) 182801e04c3fSmrg{ 18297ec681f3Smrg struct anv_device *device = queue->device; 18307ec681f3Smrg VkResult result = anv_execbuf_add_bo(device, execbuf, 18317ec681f3Smrg device->trivial_batch_bo, 18327ec681f3Smrg NULL, 0); 183301e04c3fSmrg if (result != VK_SUCCESS) 183401e04c3fSmrg return result; 183501e04c3fSmrg 183601e04c3fSmrg execbuf->execbuf = (struct drm_i915_gem_execbuffer2) { 183701e04c3fSmrg .buffers_ptr = (uintptr_t) execbuf->objects, 183801e04c3fSmrg .buffer_count = execbuf->bo_count, 183901e04c3fSmrg .batch_start_offset = 0, 18407ec681f3Smrg .batch_len = 8, /* GFX7_MI_BATCH_BUFFER_END and NOOP */ 18417ec681f3Smrg .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, 184201e04c3fSmrg .rsvd1 = device->context_id, 184301e04c3fSmrg .rsvd2 = 0, 184401e04c3fSmrg }; 184501e04c3fSmrg 184601e04c3fSmrg return VK_SUCCESS; 184701e04c3fSmrg} 184801e04c3fSmrg 18497ec681f3Smrg/* We lock around execbuf for three main reasons: 18507ec681f3Smrg * 18517ec681f3Smrg * 1) When a block pool is resized, we create a new gem handle with a 18527ec681f3Smrg * different size and, in the case of surface states, possibly a different 18537ec681f3Smrg * center offset but we re-use the same anv_bo struct when we do so. If 18547ec681f3Smrg * this happens in the middle of setting up an execbuf, we could end up 18557ec681f3Smrg * with our list of BOs out of sync with our list of gem handles. 18567ec681f3Smrg * 18577ec681f3Smrg * 2) The algorithm we use for building the list of unique buffers isn't 18587ec681f3Smrg * thread-safe. While the client is supposed to syncronize around 18597ec681f3Smrg * QueueSubmit, this would be extremely difficult to debug if it ever came 18607ec681f3Smrg * up in the wild due to a broken app. It's better to play it safe and 18617ec681f3Smrg * just lock around QueueSubmit. 18627ec681f3Smrg * 18637ec681f3Smrg * 3) The anv_cmd_buffer_execbuf function may perform relocations in 18647ec681f3Smrg * userspace. Due to the fact that the surface state buffer is shared 18657ec681f3Smrg * between batches, we can't afford to have that happen from multiple 18667ec681f3Smrg * threads at the same time. Even though the user is supposed to ensure 18677ec681f3Smrg * this doesn't happen, we play it safe as in (2) above. 18687ec681f3Smrg * 18697ec681f3Smrg * Since the only other things that ever take the device lock such as block 18707ec681f3Smrg * pool resize only rarely happen, this will almost never be contended so 18717ec681f3Smrg * taking a lock isn't really an expensive operation in this case. 18727ec681f3Smrg */ 187301e04c3fSmrgVkResult 18747ec681f3Smrganv_queue_execbuf_locked(struct anv_queue *queue, 18757ec681f3Smrg struct anv_queue_submit *submit) 187601e04c3fSmrg{ 18777ec681f3Smrg struct anv_device *device = queue->device; 187801e04c3fSmrg struct anv_execbuf execbuf; 187901e04c3fSmrg anv_execbuf_init(&execbuf); 18807ec681f3Smrg execbuf.alloc = submit->alloc; 18817ec681f3Smrg execbuf.alloc_scope = submit->alloc_scope; 18827ec681f3Smrg execbuf.perf_query_pass = submit->perf_query_pass; 188301e04c3fSmrg 18847ec681f3Smrg /* Always add the workaround BO as it includes a driver identifier for the 18857ec681f3Smrg * error_state. 18867ec681f3Smrg */ 18877ec681f3Smrg VkResult result = 18887ec681f3Smrg anv_execbuf_add_bo(device, &execbuf, device->workaround_bo, NULL, 0); 18897ec681f3Smrg if (result != VK_SUCCESS) 18907ec681f3Smrg goto error; 189101e04c3fSmrg 18927ec681f3Smrg for (uint32_t i = 0; i < submit->fence_bo_count; i++) { 18937ec681f3Smrg int signaled; 18947ec681f3Smrg struct anv_bo *bo = anv_unpack_ptr(submit->fence_bos[i], 1, &signaled); 189501e04c3fSmrg 18967ec681f3Smrg result = anv_execbuf_add_bo(device, &execbuf, bo, NULL, 18977ec681f3Smrg signaled ? EXEC_OBJECT_WRITE : 0); 18987ec681f3Smrg if (result != VK_SUCCESS) 18997ec681f3Smrg goto error; 19007ec681f3Smrg } 190101e04c3fSmrg 19027ec681f3Smrg if (submit->cmd_buffer_count) { 19037ec681f3Smrg result = setup_execbuf_for_cmd_buffers(&execbuf, queue, 19047ec681f3Smrg submit->cmd_buffers, 19057ec681f3Smrg submit->cmd_buffer_count); 19067ec681f3Smrg } else if (submit->simple_bo) { 19077ec681f3Smrg result = anv_execbuf_add_bo(device, &execbuf, submit->simple_bo, NULL, 0); 19087ec681f3Smrg if (result != VK_SUCCESS) 19097ec681f3Smrg goto error; 19107ec681f3Smrg 19117ec681f3Smrg execbuf.execbuf = (struct drm_i915_gem_execbuffer2) { 19127ec681f3Smrg .buffers_ptr = (uintptr_t) execbuf.objects, 19137ec681f3Smrg .buffer_count = execbuf.bo_count, 19147ec681f3Smrg .batch_start_offset = 0, 19157ec681f3Smrg .batch_len = submit->simple_bo_size, 19167ec681f3Smrg .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags | I915_EXEC_NO_RELOC, 19177ec681f3Smrg .rsvd1 = device->context_id, 19187ec681f3Smrg .rsvd2 = 0, 19197ec681f3Smrg }; 19207ec681f3Smrg } else { 19217ec681f3Smrg result = setup_empty_execbuf(&execbuf, queue); 192201e04c3fSmrg } 192301e04c3fSmrg 19247ec681f3Smrg if (result != VK_SUCCESS) 19257ec681f3Smrg goto error; 192601e04c3fSmrg 19277ec681f3Smrg const bool has_perf_query = 19287ec681f3Smrg submit->perf_query_pass >= 0 && 19297ec681f3Smrg submit->cmd_buffer_count && 19307ec681f3Smrg submit->perf_query_pool; 193101e04c3fSmrg 19327ec681f3Smrg if (INTEL_DEBUG(DEBUG_SUBMIT)) { 19337ec681f3Smrg fprintf(stderr, "Batch offset=0x%x len=0x%x on queue 0\n", 19347ec681f3Smrg execbuf.execbuf.batch_start_offset, execbuf.execbuf.batch_len); 19357ec681f3Smrg for (uint32_t i = 0; i < execbuf.bo_count; i++) { 19367ec681f3Smrg const struct anv_bo *bo = execbuf.bos[i]; 193701e04c3fSmrg 19387ec681f3Smrg fprintf(stderr, " BO: addr=0x%016"PRIx64" size=%010"PRIx64" handle=%05u name=%s\n", 19397ec681f3Smrg bo->offset, bo->size, bo->gem_handle, bo->name); 194001e04c3fSmrg } 194101e04c3fSmrg } 194201e04c3fSmrg 19437ec681f3Smrg if (INTEL_DEBUG(DEBUG_BATCH)) { 19447ec681f3Smrg fprintf(stderr, "Batch on queue %d\n", (int)(queue - device->queues)); 19457ec681f3Smrg if (submit->cmd_buffer_count) { 19467ec681f3Smrg if (has_perf_query) { 19477ec681f3Smrg struct anv_query_pool *query_pool = submit->perf_query_pool; 19487ec681f3Smrg struct anv_bo *pass_batch_bo = query_pool->bo; 19497ec681f3Smrg uint64_t pass_batch_offset = 19507ec681f3Smrg khr_perf_query_preamble_offset(query_pool, 19517ec681f3Smrg submit->perf_query_pass); 19527ec681f3Smrg 19537ec681f3Smrg intel_print_batch(&device->decoder_ctx, 19547ec681f3Smrg pass_batch_bo->map + pass_batch_offset, 64, 19557ec681f3Smrg pass_batch_bo->offset + pass_batch_offset, false); 19567ec681f3Smrg } 195701e04c3fSmrg 19587ec681f3Smrg for (uint32_t i = 0; i < submit->cmd_buffer_count; i++) { 19597ec681f3Smrg struct anv_batch_bo **bo = 19607ec681f3Smrg u_vector_tail(&submit->cmd_buffers[i]->seen_bbos); 19617ec681f3Smrg device->cmd_buffer_being_decoded = submit->cmd_buffers[i]; 19627ec681f3Smrg intel_print_batch(&device->decoder_ctx, (*bo)->bo->map, 19637ec681f3Smrg (*bo)->bo->size, (*bo)->bo->offset, false); 19647ec681f3Smrg device->cmd_buffer_being_decoded = NULL; 19657ec681f3Smrg } 19667ec681f3Smrg } else if (submit->simple_bo) { 19677ec681f3Smrg intel_print_batch(&device->decoder_ctx, submit->simple_bo->map, 19687ec681f3Smrg submit->simple_bo->size, submit->simple_bo->offset, false); 19697ec681f3Smrg } else { 19707ec681f3Smrg intel_print_batch(&device->decoder_ctx, 19717ec681f3Smrg device->trivial_batch_bo->map, 19727ec681f3Smrg device->trivial_batch_bo->size, 19737ec681f3Smrg device->trivial_batch_bo->offset, false); 197401e04c3fSmrg } 197501e04c3fSmrg } 197601e04c3fSmrg 19777ec681f3Smrg if (submit->fence_count > 0) { 19787ec681f3Smrg if (device->has_thread_submit) { 19797ec681f3Smrg execbuf.timeline_fences.fence_count = submit->fence_count; 19807ec681f3Smrg execbuf.timeline_fences.handles_ptr = (uintptr_t)submit->fences; 19817ec681f3Smrg execbuf.timeline_fences.values_ptr = (uintptr_t)submit->fence_values; 19827ec681f3Smrg anv_execbuf_add_ext(&execbuf, 19837ec681f3Smrg DRM_I915_GEM_EXECBUFFER_EXT_TIMELINE_FENCES, 19847ec681f3Smrg &execbuf.timeline_fences.base); 19857ec681f3Smrg } else { 19867ec681f3Smrg execbuf.execbuf.flags |= I915_EXEC_FENCE_ARRAY; 19877ec681f3Smrg execbuf.execbuf.num_cliprects = submit->fence_count; 19887ec681f3Smrg execbuf.execbuf.cliprects_ptr = (uintptr_t)submit->fences; 19899f464c52Smaya } 19909f464c52Smaya } 199101e04c3fSmrg 19927ec681f3Smrg if (submit->in_fence != -1) { 19937ec681f3Smrg assert(!device->has_thread_submit); 199401e04c3fSmrg execbuf.execbuf.flags |= I915_EXEC_FENCE_IN; 19957ec681f3Smrg execbuf.execbuf.rsvd2 |= (uint32_t)submit->in_fence; 199601e04c3fSmrg } 199701e04c3fSmrg 19987ec681f3Smrg if (submit->need_out_fence) { 19997ec681f3Smrg assert(!device->has_thread_submit); 200001e04c3fSmrg execbuf.execbuf.flags |= I915_EXEC_FENCE_OUT; 200101e04c3fSmrg } 200201e04c3fSmrg 20037ec681f3Smrg if (has_perf_query) { 20047ec681f3Smrg struct anv_query_pool *query_pool = submit->perf_query_pool; 20057ec681f3Smrg assert(submit->perf_query_pass < query_pool->n_passes); 20067ec681f3Smrg struct intel_perf_query_info *query_info = 20077ec681f3Smrg query_pool->pass_query[submit->perf_query_pass]; 20087ec681f3Smrg 20097ec681f3Smrg /* Some performance queries just the pipeline statistic HW, no need for 20107ec681f3Smrg * OA in that case, so no need to reconfigure. 201101e04c3fSmrg */ 20127ec681f3Smrg if (!INTEL_DEBUG(DEBUG_NO_OACONFIG) && 20137ec681f3Smrg (query_info->kind == INTEL_PERF_QUERY_TYPE_OA || 20147ec681f3Smrg query_info->kind == INTEL_PERF_QUERY_TYPE_RAW)) { 20157ec681f3Smrg int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG, 20167ec681f3Smrg (void *)(uintptr_t) query_info->oa_metrics_set_id); 20177ec681f3Smrg if (ret < 0) { 20187ec681f3Smrg result = anv_device_set_lost(device, 20197ec681f3Smrg "i915-perf config failed: %s", 20207ec681f3Smrg strerror(errno)); 20217ec681f3Smrg } 20227ec681f3Smrg } 20237ec681f3Smrg 20247ec681f3Smrg struct anv_bo *pass_batch_bo = query_pool->bo; 20257ec681f3Smrg 20267ec681f3Smrg struct drm_i915_gem_exec_object2 query_pass_object = { 20277ec681f3Smrg .handle = pass_batch_bo->gem_handle, 20287ec681f3Smrg .offset = pass_batch_bo->offset, 20297ec681f3Smrg .flags = pass_batch_bo->flags, 20307ec681f3Smrg }; 20317ec681f3Smrg struct drm_i915_gem_execbuffer2 query_pass_execbuf = { 20327ec681f3Smrg .buffers_ptr = (uintptr_t) &query_pass_object, 20337ec681f3Smrg .buffer_count = 1, 20347ec681f3Smrg .batch_start_offset = khr_perf_query_preamble_offset(query_pool, 20357ec681f3Smrg submit->perf_query_pass), 20367ec681f3Smrg .flags = I915_EXEC_HANDLE_LUT | queue->exec_flags, 20377ec681f3Smrg .rsvd1 = device->context_id, 20387ec681f3Smrg }; 20397ec681f3Smrg 20407ec681f3Smrg int ret = queue->device->info.no_hw ? 0 : 20417ec681f3Smrg anv_gem_execbuffer(queue->device, &query_pass_execbuf); 20427ec681f3Smrg if (ret) 20437ec681f3Smrg result = anv_queue_set_lost(queue, "execbuf2 failed: %m"); 204401e04c3fSmrg } 204501e04c3fSmrg 20467ec681f3Smrg int ret = queue->device->info.no_hw ? 0 : 20477ec681f3Smrg anv_gem_execbuffer(queue->device, &execbuf.execbuf); 20487ec681f3Smrg if (ret) 20497ec681f3Smrg result = anv_queue_set_lost(queue, "execbuf2 failed: %m"); 205001e04c3fSmrg 20517ec681f3Smrg struct drm_i915_gem_exec_object2 *objects = execbuf.objects; 20527ec681f3Smrg for (uint32_t k = 0; k < execbuf.bo_count; k++) { 20537ec681f3Smrg if (execbuf.bos[k]->flags & EXEC_OBJECT_PINNED) 20547ec681f3Smrg assert(execbuf.bos[k]->offset == objects[k].offset); 20557ec681f3Smrg execbuf.bos[k]->offset = objects[k].offset; 205601e04c3fSmrg } 205701e04c3fSmrg 20587ec681f3Smrg if (result == VK_SUCCESS && submit->need_out_fence) 20597ec681f3Smrg submit->out_fence = execbuf.execbuf.rsvd2 >> 32; 20607ec681f3Smrg 20617ec681f3Smrg error: 20627ec681f3Smrg pthread_cond_broadcast(&device->queue_submit); 20637ec681f3Smrg 20647ec681f3Smrg anv_execbuf_finish(&execbuf); 206501e04c3fSmrg 206601e04c3fSmrg return result; 206701e04c3fSmrg} 2068