101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include <stdlib.h> 2501e04c3fSmrg#include <unistd.h> 2601e04c3fSmrg#include <limits.h> 2701e04c3fSmrg#include <assert.h> 2801e04c3fSmrg#include <sys/mman.h> 2901e04c3fSmrg 3001e04c3fSmrg#include "anv_private.h" 3101e04c3fSmrg 327ec681f3Smrg#include "common/intel_aux_map.h" 337ec681f3Smrg#include "util/anon_file.h" 3401e04c3fSmrg 3501e04c3fSmrg#ifdef HAVE_VALGRIND 3601e04c3fSmrg#define VG_NOACCESS_READ(__ptr) ({ \ 3701e04c3fSmrg VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \ 3801e04c3fSmrg __typeof(*(__ptr)) __val = *(__ptr); \ 3901e04c3fSmrg VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\ 4001e04c3fSmrg __val; \ 4101e04c3fSmrg}) 4201e04c3fSmrg#define VG_NOACCESS_WRITE(__ptr, __val) ({ \ 4301e04c3fSmrg VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \ 4401e04c3fSmrg *(__ptr) = (__val); \ 4501e04c3fSmrg VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \ 4601e04c3fSmrg}) 4701e04c3fSmrg#else 4801e04c3fSmrg#define VG_NOACCESS_READ(__ptr) (*(__ptr)) 4901e04c3fSmrg#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val)) 5001e04c3fSmrg#endif 5101e04c3fSmrg 527ec681f3Smrg#ifndef MAP_POPULATE 537ec681f3Smrg#define MAP_POPULATE 0 547ec681f3Smrg#endif 557ec681f3Smrg 5601e04c3fSmrg/* Design goals: 5701e04c3fSmrg * 5801e04c3fSmrg * - Lock free (except when resizing underlying bos) 5901e04c3fSmrg * 6001e04c3fSmrg * - Constant time allocation with typically only one atomic 6101e04c3fSmrg * 6201e04c3fSmrg * - Multiple allocation sizes without fragmentation 6301e04c3fSmrg * 6401e04c3fSmrg * - Can grow while keeping addresses and offset of contents stable 6501e04c3fSmrg * 6601e04c3fSmrg * - All allocations within one bo so we can point one of the 6701e04c3fSmrg * STATE_BASE_ADDRESS pointers at it. 6801e04c3fSmrg * 6901e04c3fSmrg * The overall design is a two-level allocator: top level is a fixed size, big 7001e04c3fSmrg * block (8k) allocator, which operates out of a bo. Allocation is done by 7101e04c3fSmrg * either pulling a block from the free list or growing the used range of the 7201e04c3fSmrg * bo. Growing the range may run out of space in the bo which we then need to 7301e04c3fSmrg * grow. Growing the bo is tricky in a multi-threaded, lockless environment: 7401e04c3fSmrg * we need to keep all pointers and contents in the old map valid. GEM bos in 7501e04c3fSmrg * general can't grow, but we use a trick: we create a memfd and use ftruncate 7601e04c3fSmrg * to grow it as necessary. We mmap the new size and then create a gem bo for 7701e04c3fSmrg * it using the new gem userptr ioctl. Without heavy-handed locking around 7801e04c3fSmrg * our allocation fast-path, there isn't really a way to munmap the old mmap, 7901e04c3fSmrg * so we just keep it around until garbage collection time. While the block 8001e04c3fSmrg * allocator is lockless for normal operations, we block other threads trying 8101e04c3fSmrg * to allocate while we're growing the map. It sholdn't happen often, and 8201e04c3fSmrg * growing is fast anyway. 8301e04c3fSmrg * 8401e04c3fSmrg * At the next level we can use various sub-allocators. The state pool is a 8501e04c3fSmrg * pool of smaller, fixed size objects, which operates much like the block 8601e04c3fSmrg * pool. It uses a free list for freeing objects, but when it runs out of 8701e04c3fSmrg * space it just allocates a new block from the block pool. This allocator is 8801e04c3fSmrg * intended for longer lived state objects such as SURFACE_STATE and most 8901e04c3fSmrg * other persistent state objects in the API. We may need to track more info 9001e04c3fSmrg * with these object and a pointer back to the CPU object (eg VkImage). In 9101e04c3fSmrg * those cases we just allocate a slightly bigger object and put the extra 9201e04c3fSmrg * state after the GPU state object. 9301e04c3fSmrg * 9401e04c3fSmrg * The state stream allocator works similar to how the i965 DRI driver streams 9501e04c3fSmrg * all its state. Even with Vulkan, we need to emit transient state (whether 9601e04c3fSmrg * surface state base or dynamic state base), and for that we can just get a 9701e04c3fSmrg * block and fill it up. These cases are local to a command buffer and the 9801e04c3fSmrg * sub-allocator need not be thread safe. The streaming allocator gets a new 9901e04c3fSmrg * block when it runs out of space and chains them together so they can be 10001e04c3fSmrg * easily freed. 10101e04c3fSmrg */ 10201e04c3fSmrg 10301e04c3fSmrg/* Allocations are always at least 64 byte aligned, so 1 is an invalid value. 10401e04c3fSmrg * We use it to indicate the free list is empty. */ 1059f464c52Smaya#define EMPTY UINT32_MAX 1069f464c52Smaya 1077ec681f3Smrg/* On FreeBSD PAGE_SIZE is already defined in 1087ec681f3Smrg * /usr/include/machine/param.h that is indirectly 1097ec681f3Smrg * included here. 1107ec681f3Smrg */ 1117ec681f3Smrg#ifndef PAGE_SIZE 1129f464c52Smaya#define PAGE_SIZE 4096 1137ec681f3Smrg#endif 11401e04c3fSmrg 11501e04c3fSmrgstruct anv_mmap_cleanup { 11601e04c3fSmrg void *map; 11701e04c3fSmrg size_t size; 11801e04c3fSmrg}; 11901e04c3fSmrg 12001e04c3fSmrgstatic inline uint32_t 12101e04c3fSmrgilog2_round_up(uint32_t value) 12201e04c3fSmrg{ 12301e04c3fSmrg assert(value != 0); 12401e04c3fSmrg return 32 - __builtin_clz(value - 1); 12501e04c3fSmrg} 12601e04c3fSmrg 12701e04c3fSmrgstatic inline uint32_t 12801e04c3fSmrground_to_power_of_two(uint32_t value) 12901e04c3fSmrg{ 13001e04c3fSmrg return 1 << ilog2_round_up(value); 13101e04c3fSmrg} 13201e04c3fSmrg 1339f464c52Smayastruct anv_state_table_cleanup { 1349f464c52Smaya void *map; 1359f464c52Smaya size_t size; 1369f464c52Smaya}; 1379f464c52Smaya 1389f464c52Smaya#define ANV_STATE_TABLE_CLEANUP_INIT ((struct anv_state_table_cleanup){0}) 1399f464c52Smaya#define ANV_STATE_ENTRY_SIZE (sizeof(struct anv_free_entry)) 1409f464c52Smaya 1419f464c52Smayastatic VkResult 1429f464c52Smayaanv_state_table_expand_range(struct anv_state_table *table, uint32_t size); 1439f464c52Smaya 1449f464c52SmayaVkResult 1459f464c52Smayaanv_state_table_init(struct anv_state_table *table, 1469f464c52Smaya struct anv_device *device, 1479f464c52Smaya uint32_t initial_entries) 14801e04c3fSmrg{ 1499f464c52Smaya VkResult result; 15001e04c3fSmrg 1519f464c52Smaya table->device = device; 15201e04c3fSmrg 1539f464c52Smaya /* Just make it 2GB up-front. The Linux kernel won't actually back it 1549f464c52Smaya * with pages until we either map and fault on one of them or we use 1559f464c52Smaya * userptr and send a chunk of it off to the GPU. 1569f464c52Smaya */ 1577ec681f3Smrg table->fd = os_create_anonymous_file(BLOCK_POOL_MEMFD_SIZE, "state table"); 1587ec681f3Smrg if (table->fd == -1) 1597ec681f3Smrg return vk_error(device, VK_ERROR_INITIALIZATION_FAILED); 16001e04c3fSmrg 1617ec681f3Smrg if (!u_vector_init(&table->cleanups, 8, 1627ec681f3Smrg sizeof(struct anv_state_table_cleanup))) { 1637ec681f3Smrg result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED); 1649f464c52Smaya goto fail_fd; 1659f464c52Smaya } 1669f464c52Smaya 1679f464c52Smaya table->state.next = 0; 1689f464c52Smaya table->state.end = 0; 1699f464c52Smaya table->size = 0; 1709f464c52Smaya 1719f464c52Smaya uint32_t initial_size = initial_entries * ANV_STATE_ENTRY_SIZE; 1729f464c52Smaya result = anv_state_table_expand_range(table, initial_size); 1739f464c52Smaya if (result != VK_SUCCESS) 1749f464c52Smaya goto fail_cleanups; 1759f464c52Smaya 1769f464c52Smaya return VK_SUCCESS; 1779f464c52Smaya 1789f464c52Smaya fail_cleanups: 1799f464c52Smaya u_vector_finish(&table->cleanups); 1809f464c52Smaya fail_fd: 1819f464c52Smaya close(table->fd); 1829f464c52Smaya 1839f464c52Smaya return result; 18401e04c3fSmrg} 18501e04c3fSmrg 1869f464c52Smayastatic VkResult 1879f464c52Smayaanv_state_table_expand_range(struct anv_state_table *table, uint32_t size) 18801e04c3fSmrg{ 1899f464c52Smaya void *map; 1909f464c52Smaya struct anv_state_table_cleanup *cleanup; 1919f464c52Smaya 1929f464c52Smaya /* Assert that we only ever grow the pool */ 1939f464c52Smaya assert(size >= table->state.end); 1949f464c52Smaya 1959f464c52Smaya /* Make sure that we don't go outside the bounds of the memfd */ 1969f464c52Smaya if (size > BLOCK_POOL_MEMFD_SIZE) 1977ec681f3Smrg return vk_error(table->device, VK_ERROR_OUT_OF_HOST_MEMORY); 1989f464c52Smaya 1999f464c52Smaya cleanup = u_vector_add(&table->cleanups); 2009f464c52Smaya if (!cleanup) 2017ec681f3Smrg return vk_error(table->device, VK_ERROR_OUT_OF_HOST_MEMORY); 2029f464c52Smaya 2039f464c52Smaya *cleanup = ANV_STATE_TABLE_CLEANUP_INIT; 2049f464c52Smaya 2059f464c52Smaya /* Just leak the old map until we destroy the pool. We can't munmap it 2069f464c52Smaya * without races or imposing locking on the block allocate fast path. On 2079f464c52Smaya * the whole the leaked maps adds up to less than the size of the 2089f464c52Smaya * current map. MAP_POPULATE seems like the right thing to do, but we 2099f464c52Smaya * should try to get some numbers. 2109f464c52Smaya */ 2119f464c52Smaya map = mmap(NULL, size, PROT_READ | PROT_WRITE, 2129f464c52Smaya MAP_SHARED | MAP_POPULATE, table->fd, 0); 2139f464c52Smaya if (map == MAP_FAILED) { 2147ec681f3Smrg return vk_errorf(table->device, VK_ERROR_OUT_OF_HOST_MEMORY, 2157ec681f3Smrg "mmap failed: %m"); 2169f464c52Smaya } 2179f464c52Smaya 2189f464c52Smaya cleanup->map = map; 2199f464c52Smaya cleanup->size = size; 2209f464c52Smaya 2219f464c52Smaya table->map = map; 2229f464c52Smaya table->size = size; 2239f464c52Smaya 2249f464c52Smaya return VK_SUCCESS; 2259f464c52Smaya} 2269f464c52Smaya 2279f464c52Smayastatic VkResult 2289f464c52Smayaanv_state_table_grow(struct anv_state_table *table) 2299f464c52Smaya{ 2309f464c52Smaya VkResult result = VK_SUCCESS; 2319f464c52Smaya 2329f464c52Smaya uint32_t used = align_u32(table->state.next * ANV_STATE_ENTRY_SIZE, 2339f464c52Smaya PAGE_SIZE); 2349f464c52Smaya uint32_t old_size = table->size; 23501e04c3fSmrg 2369f464c52Smaya /* The block pool is always initialized to a nonzero size and this function 2379f464c52Smaya * is always called after initialization. 23801e04c3fSmrg */ 2399f464c52Smaya assert(old_size > 0); 2409f464c52Smaya 2419f464c52Smaya uint32_t required = MAX2(used, old_size); 2429f464c52Smaya if (used * 2 <= required) { 2439f464c52Smaya /* If we're in this case then this isn't the firsta allocation and we 2449f464c52Smaya * already have enough space on both sides to hold double what we 2459f464c52Smaya * have allocated. There's nothing for us to do. 2469f464c52Smaya */ 2479f464c52Smaya goto done; 24801e04c3fSmrg } 24901e04c3fSmrg 2509f464c52Smaya uint32_t size = old_size * 2; 2519f464c52Smaya while (size < required) 2529f464c52Smaya size *= 2; 2539f464c52Smaya 2549f464c52Smaya assert(size > table->size); 2559f464c52Smaya 2569f464c52Smaya result = anv_state_table_expand_range(table, size); 2579f464c52Smaya 2589f464c52Smaya done: 2599f464c52Smaya return result; 2609f464c52Smaya} 2619f464c52Smaya 2629f464c52Smayavoid 2639f464c52Smayaanv_state_table_finish(struct anv_state_table *table) 2649f464c52Smaya{ 2659f464c52Smaya struct anv_state_table_cleanup *cleanup; 2669f464c52Smaya 2679f464c52Smaya u_vector_foreach(cleanup, &table->cleanups) { 2689f464c52Smaya if (cleanup->map) 2699f464c52Smaya munmap(cleanup->map, cleanup->size); 2709f464c52Smaya } 2719f464c52Smaya 2729f464c52Smaya u_vector_finish(&table->cleanups); 2739f464c52Smaya 2749f464c52Smaya close(table->fd); 2759f464c52Smaya} 2769f464c52Smaya 2779f464c52SmayaVkResult 2789f464c52Smayaanv_state_table_add(struct anv_state_table *table, uint32_t *idx, 2799f464c52Smaya uint32_t count) 2809f464c52Smaya{ 2819f464c52Smaya struct anv_block_state state, old, new; 2829f464c52Smaya VkResult result; 2839f464c52Smaya 2849f464c52Smaya assert(idx); 2859f464c52Smaya 2869f464c52Smaya while(1) { 2879f464c52Smaya state.u64 = __sync_fetch_and_add(&table->state.u64, count); 2889f464c52Smaya if (state.next + count <= state.end) { 2899f464c52Smaya assert(table->map); 2909f464c52Smaya struct anv_free_entry *entry = &table->map[state.next]; 2919f464c52Smaya for (int i = 0; i < count; i++) { 2929f464c52Smaya entry[i].state.idx = state.next + i; 2939f464c52Smaya } 2949f464c52Smaya *idx = state.next; 2959f464c52Smaya return VK_SUCCESS; 2969f464c52Smaya } else if (state.next <= state.end) { 2979f464c52Smaya /* We allocated the first block outside the pool so we have to grow 2989f464c52Smaya * the pool. pool_state->next acts a mutex: threads who try to 2999f464c52Smaya * allocate now will get block indexes above the current limit and 3009f464c52Smaya * hit futex_wait below. 3019f464c52Smaya */ 3029f464c52Smaya new.next = state.next + count; 3039f464c52Smaya do { 3049f464c52Smaya result = anv_state_table_grow(table); 3059f464c52Smaya if (result != VK_SUCCESS) 3069f464c52Smaya return result; 3079f464c52Smaya new.end = table->size / ANV_STATE_ENTRY_SIZE; 3089f464c52Smaya } while (new.end < new.next); 3099f464c52Smaya 3109f464c52Smaya old.u64 = __sync_lock_test_and_set(&table->state.u64, new.u64); 3119f464c52Smaya if (old.next != state.next) 3129f464c52Smaya futex_wake(&table->state.end, INT_MAX); 3139f464c52Smaya } else { 3149f464c52Smaya futex_wait(&table->state.end, state.end, NULL); 3159f464c52Smaya continue; 3169f464c52Smaya } 3179f464c52Smaya } 3189f464c52Smaya} 3199f464c52Smaya 3209f464c52Smayavoid 3219f464c52Smayaanv_free_list_push(union anv_free_list *list, 3229f464c52Smaya struct anv_state_table *table, 3239f464c52Smaya uint32_t first, uint32_t count) 3249f464c52Smaya{ 3259f464c52Smaya union anv_free_list current, old, new; 3269f464c52Smaya uint32_t last = first; 3279f464c52Smaya 3289f464c52Smaya for (uint32_t i = 1; i < count; i++, last++) 3299f464c52Smaya table->map[last].next = last + 1; 3309f464c52Smaya 3317ec681f3Smrg old.u64 = list->u64; 33201e04c3fSmrg do { 33301e04c3fSmrg current = old; 3349f464c52Smaya table->map[last].next = current.offset; 3359f464c52Smaya new.offset = first; 33601e04c3fSmrg new.count = current.count + 1; 33701e04c3fSmrg old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); 33801e04c3fSmrg } while (old.u64 != current.u64); 33901e04c3fSmrg} 34001e04c3fSmrg 3419f464c52Smayastruct anv_state * 3429f464c52Smayaanv_free_list_pop(union anv_free_list *list, 3439f464c52Smaya struct anv_state_table *table) 3449f464c52Smaya{ 3459f464c52Smaya union anv_free_list current, new, old; 3469f464c52Smaya 3479f464c52Smaya current.u64 = list->u64; 3489f464c52Smaya while (current.offset != EMPTY) { 3499f464c52Smaya __sync_synchronize(); 3509f464c52Smaya new.offset = table->map[current.offset].next; 3519f464c52Smaya new.count = current.count + 1; 3529f464c52Smaya old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64); 3539f464c52Smaya if (old.u64 == current.u64) { 3549f464c52Smaya struct anv_free_entry *entry = &table->map[current.offset]; 3559f464c52Smaya return &entry->state; 3569f464c52Smaya } 3579f464c52Smaya current = old; 3589f464c52Smaya } 3599f464c52Smaya 3609f464c52Smaya return NULL; 3619f464c52Smaya} 3629f464c52Smaya 36301e04c3fSmrgstatic VkResult 36401e04c3fSmrganv_block_pool_expand_range(struct anv_block_pool *pool, 36501e04c3fSmrg uint32_t center_bo_offset, uint32_t size); 36601e04c3fSmrg 36701e04c3fSmrgVkResult 36801e04c3fSmrganv_block_pool_init(struct anv_block_pool *pool, 36901e04c3fSmrg struct anv_device *device, 3707ec681f3Smrg const char *name, 37101e04c3fSmrg uint64_t start_address, 3727ec681f3Smrg uint32_t initial_size) 37301e04c3fSmrg{ 37401e04c3fSmrg VkResult result; 37501e04c3fSmrg 3767ec681f3Smrg pool->name = name; 37701e04c3fSmrg pool->device = device; 3787ec681f3Smrg pool->use_softpin = device->physical->use_softpin; 3799f464c52Smaya pool->nbos = 0; 3809f464c52Smaya pool->size = 0; 3819f464c52Smaya pool->center_bo_offset = 0; 3827ec681f3Smrg pool->start_address = intel_canonical_address(start_address); 3839f464c52Smaya pool->map = NULL; 38401e04c3fSmrg 3857ec681f3Smrg if (pool->use_softpin) { 3867ec681f3Smrg pool->bo = NULL; 3877ec681f3Smrg pool->fd = -1; 3887ec681f3Smrg } else { 3899f464c52Smaya /* Just make it 2GB up-front. The Linux kernel won't actually back it 3909f464c52Smaya * with pages until we either map and fault on one of them or we use 3919f464c52Smaya * userptr and send a chunk of it off to the GPU. 3929f464c52Smaya */ 3937ec681f3Smrg pool->fd = os_create_anonymous_file(BLOCK_POOL_MEMFD_SIZE, "block pool"); 3947ec681f3Smrg if (pool->fd == -1) 3957ec681f3Smrg return vk_error(device, VK_ERROR_INITIALIZATION_FAILED); 3967ec681f3Smrg 3977ec681f3Smrg pool->wrapper_bo = (struct anv_bo) { 3987ec681f3Smrg .refcount = 1, 3997ec681f3Smrg .offset = -1, 4007ec681f3Smrg .is_wrapper = true, 4017ec681f3Smrg }; 4027ec681f3Smrg pool->bo = &pool->wrapper_bo; 40301e04c3fSmrg } 40401e04c3fSmrg 4057ec681f3Smrg if (!u_vector_init(&pool->mmap_cleanups, 8, 4067ec681f3Smrg sizeof(struct anv_mmap_cleanup))) { 4077ec681f3Smrg result = vk_error(device, VK_ERROR_INITIALIZATION_FAILED); 40801e04c3fSmrg goto fail_fd; 40901e04c3fSmrg } 41001e04c3fSmrg 41101e04c3fSmrg pool->state.next = 0; 41201e04c3fSmrg pool->state.end = 0; 41301e04c3fSmrg pool->back_state.next = 0; 41401e04c3fSmrg pool->back_state.end = 0; 41501e04c3fSmrg 41601e04c3fSmrg result = anv_block_pool_expand_range(pool, 0, initial_size); 41701e04c3fSmrg if (result != VK_SUCCESS) 41801e04c3fSmrg goto fail_mmap_cleanups; 41901e04c3fSmrg 4209f464c52Smaya /* Make the entire pool available in the front of the pool. If back 4219f464c52Smaya * allocation needs to use this space, the "ends" will be re-arranged. 4229f464c52Smaya */ 4239f464c52Smaya pool->state.end = pool->size; 4249f464c52Smaya 42501e04c3fSmrg return VK_SUCCESS; 42601e04c3fSmrg 42701e04c3fSmrg fail_mmap_cleanups: 42801e04c3fSmrg u_vector_finish(&pool->mmap_cleanups); 42901e04c3fSmrg fail_fd: 4307ec681f3Smrg if (pool->fd >= 0) 4319f464c52Smaya close(pool->fd); 43201e04c3fSmrg 43301e04c3fSmrg return result; 43401e04c3fSmrg} 43501e04c3fSmrg 43601e04c3fSmrgvoid 43701e04c3fSmrganv_block_pool_finish(struct anv_block_pool *pool) 43801e04c3fSmrg{ 4397ec681f3Smrg anv_block_pool_foreach_bo(bo, pool) { 4407ec681f3Smrg if (bo->map) 4417ec681f3Smrg anv_gem_munmap(pool->device, bo->map, bo->size); 4427ec681f3Smrg anv_gem_close(pool->device, bo->gem_handle); 44301e04c3fSmrg } 44401e04c3fSmrg 4457ec681f3Smrg struct anv_mmap_cleanup *cleanup; 4467ec681f3Smrg u_vector_foreach(cleanup, &pool->mmap_cleanups) 4477ec681f3Smrg munmap(cleanup->map, cleanup->size); 44801e04c3fSmrg u_vector_finish(&pool->mmap_cleanups); 4497ec681f3Smrg 4507ec681f3Smrg if (pool->fd >= 0) 4519f464c52Smaya close(pool->fd); 45201e04c3fSmrg} 45301e04c3fSmrg 45401e04c3fSmrgstatic VkResult 45501e04c3fSmrganv_block_pool_expand_range(struct anv_block_pool *pool, 45601e04c3fSmrg uint32_t center_bo_offset, uint32_t size) 45701e04c3fSmrg{ 45801e04c3fSmrg /* Assert that we only ever grow the pool */ 45901e04c3fSmrg assert(center_bo_offset >= pool->back_state.end); 46001e04c3fSmrg assert(size - center_bo_offset >= pool->state.end); 46101e04c3fSmrg 46201e04c3fSmrg /* Assert that we don't go outside the bounds of the memfd */ 46301e04c3fSmrg assert(center_bo_offset <= BLOCK_POOL_MEMFD_CENTER); 4647ec681f3Smrg assert(pool->use_softpin || 4659f464c52Smaya size - center_bo_offset <= 46601e04c3fSmrg BLOCK_POOL_MEMFD_SIZE - BLOCK_POOL_MEMFD_CENTER); 46701e04c3fSmrg 4687ec681f3Smrg /* For state pool BOs we have to be a bit careful about where we place them 46901e04c3fSmrg * in the GTT. There are two documented workarounds for state base address 47001e04c3fSmrg * placement : Wa32bitGeneralStateOffset and Wa32bitInstructionBaseOffset 47101e04c3fSmrg * which state that those two base addresses do not support 48-bit 47201e04c3fSmrg * addresses and need to be placed in the bottom 32-bit range. 47301e04c3fSmrg * Unfortunately, this is not quite accurate. 47401e04c3fSmrg * 47501e04c3fSmrg * The real problem is that we always set the size of our state pools in 47601e04c3fSmrg * STATE_BASE_ADDRESS to 0xfffff (the maximum) even though the BO is most 47701e04c3fSmrg * likely significantly smaller. We do this because we do not no at the 47801e04c3fSmrg * time we emit STATE_BASE_ADDRESS whether or not we will need to expand 47901e04c3fSmrg * the pool during command buffer building so we don't actually have a 48001e04c3fSmrg * valid final size. If the address + size, as seen by STATE_BASE_ADDRESS 48101e04c3fSmrg * overflows 48 bits, the GPU appears to treat all accesses to the buffer 48201e04c3fSmrg * as being out of bounds and returns zero. For dynamic state, this 48301e04c3fSmrg * usually just leads to rendering corruptions, but shaders that are all 48401e04c3fSmrg * zero hang the GPU immediately. 48501e04c3fSmrg * 48601e04c3fSmrg * The easiest solution to do is exactly what the bogus workarounds say to 48701e04c3fSmrg * do: restrict these buffers to 32-bit addresses. We could also pin the 48801e04c3fSmrg * BO to some particular location of our choosing, but that's significantly 48901e04c3fSmrg * more work than just not setting a flag. So, we explicitly DO NOT set 49001e04c3fSmrg * the EXEC_OBJECT_SUPPORTS_48B_ADDRESS flag and the kernel does all of the 4917ec681f3Smrg * hard work for us. When using softpin, we're in control and the fixed 4927ec681f3Smrg * addresses we choose are fine for base addresses. 49301e04c3fSmrg */ 4947ec681f3Smrg enum anv_bo_alloc_flags bo_alloc_flags = ANV_BO_ALLOC_CAPTURE; 4957ec681f3Smrg if (!pool->use_softpin) 4967ec681f3Smrg bo_alloc_flags |= ANV_BO_ALLOC_32BIT_ADDRESS; 4979f464c52Smaya 4987ec681f3Smrg if (pool->use_softpin) { 4997ec681f3Smrg uint32_t new_bo_size = size - pool->size; 5007ec681f3Smrg struct anv_bo *new_bo; 5017ec681f3Smrg assert(center_bo_offset == 0); 5027ec681f3Smrg VkResult result = anv_device_alloc_bo(pool->device, 5037ec681f3Smrg pool->name, 5047ec681f3Smrg new_bo_size, 5057ec681f3Smrg bo_alloc_flags | 5067ec681f3Smrg ANV_BO_ALLOC_LOCAL_MEM | 5077ec681f3Smrg ANV_BO_ALLOC_FIXED_ADDRESS | 5087ec681f3Smrg ANV_BO_ALLOC_MAPPED | 5097ec681f3Smrg ANV_BO_ALLOC_SNOOPED, 5107ec681f3Smrg pool->start_address + pool->size, 5117ec681f3Smrg &new_bo); 5127ec681f3Smrg if (result != VK_SUCCESS) 5137ec681f3Smrg return result; 5147ec681f3Smrg 5157ec681f3Smrg pool->bos[pool->nbos++] = new_bo; 5167ec681f3Smrg 5177ec681f3Smrg /* This pointer will always point to the first BO in the list */ 5187ec681f3Smrg pool->bo = pool->bos[0]; 5199f464c52Smaya } else { 5207ec681f3Smrg /* Just leak the old map until we destroy the pool. We can't munmap it 5217ec681f3Smrg * without races or imposing locking on the block allocate fast path. On 5227ec681f3Smrg * the whole the leaked maps adds up to less than the size of the 5237ec681f3Smrg * current map. MAP_POPULATE seems like the right thing to do, but we 5247ec681f3Smrg * should try to get some numbers. 5259f464c52Smaya */ 5267ec681f3Smrg void *map = mmap(NULL, size, PROT_READ | PROT_WRITE, 5277ec681f3Smrg MAP_SHARED | MAP_POPULATE, pool->fd, 5287ec681f3Smrg BLOCK_POOL_MEMFD_CENTER - center_bo_offset); 5297ec681f3Smrg if (map == MAP_FAILED) 5307ec681f3Smrg return vk_errorf(pool->device, VK_ERROR_MEMORY_MAP_FAILED, 5317ec681f3Smrg "mmap failed: %m"); 5327ec681f3Smrg 5337ec681f3Smrg struct anv_bo *new_bo; 5347ec681f3Smrg VkResult result = anv_device_import_bo_from_host_ptr(pool->device, 5357ec681f3Smrg map, size, 5367ec681f3Smrg bo_alloc_flags, 5377ec681f3Smrg 0 /* client_address */, 5387ec681f3Smrg &new_bo); 5397ec681f3Smrg if (result != VK_SUCCESS) { 5407ec681f3Smrg munmap(map, size); 5417ec681f3Smrg return result; 5427ec681f3Smrg } 5439f464c52Smaya 5447ec681f3Smrg struct anv_mmap_cleanup *cleanup = u_vector_add(&pool->mmap_cleanups); 5457ec681f3Smrg if (!cleanup) { 5467ec681f3Smrg munmap(map, size); 5477ec681f3Smrg anv_device_release_bo(pool->device, new_bo); 5487ec681f3Smrg return vk_error(pool->device, VK_ERROR_OUT_OF_HOST_MEMORY); 5497ec681f3Smrg } 5507ec681f3Smrg cleanup->map = map; 5517ec681f3Smrg cleanup->size = size; 5529f464c52Smaya 5537ec681f3Smrg /* Now that we mapped the new memory, we can write the new 5547ec681f3Smrg * center_bo_offset back into pool and update pool->map. */ 5557ec681f3Smrg pool->center_bo_offset = center_bo_offset; 5567ec681f3Smrg pool->map = map + center_bo_offset; 5579f464c52Smaya 5587ec681f3Smrg pool->bos[pool->nbos++] = new_bo; 5597ec681f3Smrg pool->wrapper_bo.map = new_bo; 5609f464c52Smaya } 5619f464c52Smaya 5627ec681f3Smrg assert(pool->nbos < ANV_MAX_BLOCK_POOL_BOS); 5637ec681f3Smrg pool->size = size; 5649f464c52Smaya 5657ec681f3Smrg return VK_SUCCESS; 5669f464c52Smaya} 5679f464c52Smaya 5689f464c52Smaya/** Returns current memory map of the block pool. 5699f464c52Smaya * 5709f464c52Smaya * The returned pointer points to the map for the memory at the specified 5719f464c52Smaya * offset. The offset parameter is relative to the "center" of the block pool 5729f464c52Smaya * rather than the start of the block pool BO map. 5739f464c52Smaya */ 5749f464c52Smayavoid* 5757ec681f3Smrganv_block_pool_map(struct anv_block_pool *pool, int32_t offset, uint32_t size) 5769f464c52Smaya{ 5777ec681f3Smrg if (pool->use_softpin) { 5787ec681f3Smrg struct anv_bo *bo = NULL; 5797ec681f3Smrg int32_t bo_offset = 0; 5807ec681f3Smrg anv_block_pool_foreach_bo(iter_bo, pool) { 5817ec681f3Smrg if (offset < bo_offset + iter_bo->size) { 5827ec681f3Smrg bo = iter_bo; 5837ec681f3Smrg break; 5847ec681f3Smrg } 5857ec681f3Smrg bo_offset += iter_bo->size; 5867ec681f3Smrg } 5877ec681f3Smrg assert(bo != NULL); 5887ec681f3Smrg assert(offset >= bo_offset); 5897ec681f3Smrg assert((offset - bo_offset) + size <= bo->size); 5907ec681f3Smrg 5917ec681f3Smrg return bo->map + (offset - bo_offset); 5929f464c52Smaya } else { 5939f464c52Smaya return pool->map + offset; 5949f464c52Smaya } 5959f464c52Smaya} 5969f464c52Smaya 59701e04c3fSmrg/** Grows and re-centers the block pool. 59801e04c3fSmrg * 59901e04c3fSmrg * We grow the block pool in one or both directions in such a way that the 60001e04c3fSmrg * following conditions are met: 60101e04c3fSmrg * 60201e04c3fSmrg * 1) The size of the entire pool is always a power of two. 60301e04c3fSmrg * 60401e04c3fSmrg * 2) The pool only grows on both ends. Neither end can get 60501e04c3fSmrg * shortened. 60601e04c3fSmrg * 60701e04c3fSmrg * 3) At the end of the allocation, we have about twice as much space 60801e04c3fSmrg * allocated for each end as we have used. This way the pool doesn't 60901e04c3fSmrg * grow too far in one direction or the other. 61001e04c3fSmrg * 61101e04c3fSmrg * 4) If the _alloc_back() has never been called, then the back portion of 61201e04c3fSmrg * the pool retains a size of zero. (This makes it easier for users of 61301e04c3fSmrg * the block pool that only want a one-sided pool.) 61401e04c3fSmrg * 61501e04c3fSmrg * 5) We have enough space allocated for at least one more block in 61601e04c3fSmrg * whichever side `state` points to. 61701e04c3fSmrg * 61801e04c3fSmrg * 6) The center of the pool is always aligned to both the block_size of 61901e04c3fSmrg * the pool and a 4K CPU page. 62001e04c3fSmrg */ 62101e04c3fSmrgstatic uint32_t 6227ec681f3Smrganv_block_pool_grow(struct anv_block_pool *pool, struct anv_block_state *state, 6237ec681f3Smrg uint32_t contiguous_size) 62401e04c3fSmrg{ 62501e04c3fSmrg VkResult result = VK_SUCCESS; 62601e04c3fSmrg 62701e04c3fSmrg pthread_mutex_lock(&pool->device->mutex); 62801e04c3fSmrg 62901e04c3fSmrg assert(state == &pool->state || state == &pool->back_state); 63001e04c3fSmrg 63101e04c3fSmrg /* Gather a little usage information on the pool. Since we may have 63201e04c3fSmrg * threadsd waiting in queue to get some storage while we resize, it's 63301e04c3fSmrg * actually possible that total_used will be larger than old_size. In 63401e04c3fSmrg * particular, block_pool_alloc() increments state->next prior to 63501e04c3fSmrg * calling block_pool_grow, so this ensures that we get enough space for 63601e04c3fSmrg * which ever side tries to grow the pool. 63701e04c3fSmrg * 63801e04c3fSmrg * We align to a page size because it makes it easier to do our 63901e04c3fSmrg * calculations later in such a way that we state page-aigned. 64001e04c3fSmrg */ 64101e04c3fSmrg uint32_t back_used = align_u32(pool->back_state.next, PAGE_SIZE); 64201e04c3fSmrg uint32_t front_used = align_u32(pool->state.next, PAGE_SIZE); 64301e04c3fSmrg uint32_t total_used = front_used + back_used; 64401e04c3fSmrg 64501e04c3fSmrg assert(state == &pool->state || back_used > 0); 64601e04c3fSmrg 6479f464c52Smaya uint32_t old_size = pool->size; 64801e04c3fSmrg 64901e04c3fSmrg /* The block pool is always initialized to a nonzero size and this function 65001e04c3fSmrg * is always called after initialization. 65101e04c3fSmrg */ 65201e04c3fSmrg assert(old_size > 0); 65301e04c3fSmrg 6547ec681f3Smrg const uint32_t old_back = pool->center_bo_offset; 6557ec681f3Smrg const uint32_t old_front = old_size - pool->center_bo_offset; 6567ec681f3Smrg 65701e04c3fSmrg /* The back_used and front_used may actually be smaller than the actual 65801e04c3fSmrg * requirement because they are based on the next pointers which are 65901e04c3fSmrg * updated prior to calling this function. 66001e04c3fSmrg */ 6617ec681f3Smrg uint32_t back_required = MAX2(back_used, old_back); 6627ec681f3Smrg uint32_t front_required = MAX2(front_used, old_front); 6637ec681f3Smrg 6647ec681f3Smrg if (pool->use_softpin) { 6657ec681f3Smrg /* With softpin, the pool is made up of a bunch of buffers with separate 6667ec681f3Smrg * maps. Make sure we have enough contiguous space that we can get a 6677ec681f3Smrg * properly contiguous map for the next chunk. 6687ec681f3Smrg */ 6697ec681f3Smrg assert(old_back == 0); 6707ec681f3Smrg front_required = MAX2(front_required, old_front + contiguous_size); 6717ec681f3Smrg } 67201e04c3fSmrg 67301e04c3fSmrg if (back_used * 2 <= back_required && front_used * 2 <= front_required) { 67401e04c3fSmrg /* If we're in this case then this isn't the firsta allocation and we 67501e04c3fSmrg * already have enough space on both sides to hold double what we 67601e04c3fSmrg * have allocated. There's nothing for us to do. 67701e04c3fSmrg */ 67801e04c3fSmrg goto done; 67901e04c3fSmrg } 68001e04c3fSmrg 68101e04c3fSmrg uint32_t size = old_size * 2; 68201e04c3fSmrg while (size < back_required + front_required) 68301e04c3fSmrg size *= 2; 68401e04c3fSmrg 6859f464c52Smaya assert(size > pool->size); 68601e04c3fSmrg 68701e04c3fSmrg /* We compute a new center_bo_offset such that, when we double the size 68801e04c3fSmrg * of the pool, we maintain the ratio of how much is used by each side. 68901e04c3fSmrg * This way things should remain more-or-less balanced. 69001e04c3fSmrg */ 69101e04c3fSmrg uint32_t center_bo_offset; 69201e04c3fSmrg if (back_used == 0) { 69301e04c3fSmrg /* If we're in this case then we have never called alloc_back(). In 69401e04c3fSmrg * this case, we want keep the offset at 0 to make things as simple 69501e04c3fSmrg * as possible for users that don't care about back allocations. 69601e04c3fSmrg */ 69701e04c3fSmrg center_bo_offset = 0; 69801e04c3fSmrg } else { 69901e04c3fSmrg /* Try to "center" the allocation based on how much is currently in 70001e04c3fSmrg * use on each side of the center line. 70101e04c3fSmrg */ 70201e04c3fSmrg center_bo_offset = ((uint64_t)size * back_used) / total_used; 70301e04c3fSmrg 70401e04c3fSmrg /* Align down to a multiple of the page size */ 70501e04c3fSmrg center_bo_offset &= ~(PAGE_SIZE - 1); 70601e04c3fSmrg 70701e04c3fSmrg assert(center_bo_offset >= back_used); 70801e04c3fSmrg 70901e04c3fSmrg /* Make sure we don't shrink the back end of the pool */ 71001e04c3fSmrg if (center_bo_offset < back_required) 71101e04c3fSmrg center_bo_offset = back_required; 71201e04c3fSmrg 71301e04c3fSmrg /* Make sure that we don't shrink the front end of the pool */ 71401e04c3fSmrg if (size - center_bo_offset < front_required) 71501e04c3fSmrg center_bo_offset = size - front_required; 71601e04c3fSmrg } 71701e04c3fSmrg 71801e04c3fSmrg assert(center_bo_offset % PAGE_SIZE == 0); 71901e04c3fSmrg 72001e04c3fSmrg result = anv_block_pool_expand_range(pool, center_bo_offset, size); 72101e04c3fSmrg 72201e04c3fSmrgdone: 72301e04c3fSmrg pthread_mutex_unlock(&pool->device->mutex); 72401e04c3fSmrg 72501e04c3fSmrg if (result == VK_SUCCESS) { 72601e04c3fSmrg /* Return the appropriate new size. This function never actually 72701e04c3fSmrg * updates state->next. Instead, we let the caller do that because it 72801e04c3fSmrg * needs to do so in order to maintain its concurrency model. 72901e04c3fSmrg */ 73001e04c3fSmrg if (state == &pool->state) { 7319f464c52Smaya return pool->size - pool->center_bo_offset; 73201e04c3fSmrg } else { 73301e04c3fSmrg assert(pool->center_bo_offset > 0); 73401e04c3fSmrg return pool->center_bo_offset; 73501e04c3fSmrg } 73601e04c3fSmrg } else { 73701e04c3fSmrg return 0; 73801e04c3fSmrg } 73901e04c3fSmrg} 74001e04c3fSmrg 74101e04c3fSmrgstatic uint32_t 74201e04c3fSmrganv_block_pool_alloc_new(struct anv_block_pool *pool, 74301e04c3fSmrg struct anv_block_state *pool_state, 7449f464c52Smaya uint32_t block_size, uint32_t *padding) 74501e04c3fSmrg{ 74601e04c3fSmrg struct anv_block_state state, old, new; 74701e04c3fSmrg 7489f464c52Smaya /* Most allocations won't generate any padding */ 7499f464c52Smaya if (padding) 7509f464c52Smaya *padding = 0; 7519f464c52Smaya 75201e04c3fSmrg while (1) { 75301e04c3fSmrg state.u64 = __sync_fetch_and_add(&pool_state->u64, block_size); 75401e04c3fSmrg if (state.next + block_size <= state.end) { 75501e04c3fSmrg return state.next; 75601e04c3fSmrg } else if (state.next <= state.end) { 7577ec681f3Smrg if (pool->use_softpin && state.next < state.end) { 7589f464c52Smaya /* We need to grow the block pool, but still have some leftover 7599f464c52Smaya * space that can't be used by that particular allocation. So we 7609f464c52Smaya * add that as a "padding", and return it. 7619f464c52Smaya */ 7629f464c52Smaya uint32_t leftover = state.end - state.next; 7639f464c52Smaya 7649f464c52Smaya /* If there is some leftover space in the pool, the caller must 7659f464c52Smaya * deal with it. 7669f464c52Smaya */ 7679f464c52Smaya assert(leftover == 0 || padding); 7689f464c52Smaya if (padding) 7699f464c52Smaya *padding = leftover; 7709f464c52Smaya state.next += leftover; 7719f464c52Smaya } 7729f464c52Smaya 77301e04c3fSmrg /* We allocated the first block outside the pool so we have to grow 77401e04c3fSmrg * the pool. pool_state->next acts a mutex: threads who try to 77501e04c3fSmrg * allocate now will get block indexes above the current limit and 77601e04c3fSmrg * hit futex_wait below. 77701e04c3fSmrg */ 77801e04c3fSmrg new.next = state.next + block_size; 77901e04c3fSmrg do { 7807ec681f3Smrg new.end = anv_block_pool_grow(pool, pool_state, block_size); 78101e04c3fSmrg } while (new.end < new.next); 78201e04c3fSmrg 78301e04c3fSmrg old.u64 = __sync_lock_test_and_set(&pool_state->u64, new.u64); 78401e04c3fSmrg if (old.next != state.next) 78501e04c3fSmrg futex_wake(&pool_state->end, INT_MAX); 78601e04c3fSmrg return state.next; 78701e04c3fSmrg } else { 78801e04c3fSmrg futex_wait(&pool_state->end, state.end, NULL); 78901e04c3fSmrg continue; 79001e04c3fSmrg } 79101e04c3fSmrg } 79201e04c3fSmrg} 79301e04c3fSmrg 79401e04c3fSmrgint32_t 79501e04c3fSmrganv_block_pool_alloc(struct anv_block_pool *pool, 7969f464c52Smaya uint32_t block_size, uint32_t *padding) 79701e04c3fSmrg{ 7989f464c52Smaya uint32_t offset; 7999f464c52Smaya 8009f464c52Smaya offset = anv_block_pool_alloc_new(pool, &pool->state, block_size, padding); 8019f464c52Smaya 8029f464c52Smaya return offset; 80301e04c3fSmrg} 80401e04c3fSmrg 80501e04c3fSmrg/* Allocates a block out of the back of the block pool. 80601e04c3fSmrg * 80701e04c3fSmrg * This will allocated a block earlier than the "start" of the block pool. 80801e04c3fSmrg * The offsets returned from this function will be negative but will still 80901e04c3fSmrg * be correct relative to the block pool's map pointer. 81001e04c3fSmrg * 81101e04c3fSmrg * If you ever use anv_block_pool_alloc_back, then you will have to do 81201e04c3fSmrg * gymnastics with the block pool's BO when doing relocations. 81301e04c3fSmrg */ 81401e04c3fSmrgint32_t 81501e04c3fSmrganv_block_pool_alloc_back(struct anv_block_pool *pool, 81601e04c3fSmrg uint32_t block_size) 81701e04c3fSmrg{ 81801e04c3fSmrg int32_t offset = anv_block_pool_alloc_new(pool, &pool->back_state, 8199f464c52Smaya block_size, NULL); 82001e04c3fSmrg 82101e04c3fSmrg /* The offset we get out of anv_block_pool_alloc_new() is actually the 82201e04c3fSmrg * number of bytes downwards from the middle to the end of the block. 82301e04c3fSmrg * We need to turn it into a (negative) offset from the middle to the 82401e04c3fSmrg * start of the block. 82501e04c3fSmrg */ 82601e04c3fSmrg assert(offset >= 0); 82701e04c3fSmrg return -(offset + block_size); 82801e04c3fSmrg} 82901e04c3fSmrg 83001e04c3fSmrgVkResult 83101e04c3fSmrganv_state_pool_init(struct anv_state_pool *pool, 83201e04c3fSmrg struct anv_device *device, 8337ec681f3Smrg const char *name, 8347ec681f3Smrg uint64_t base_address, 8357ec681f3Smrg int32_t start_offset, 8367ec681f3Smrg uint32_t block_size) 83701e04c3fSmrg{ 8387ec681f3Smrg /* We don't want to ever see signed overflow */ 8397ec681f3Smrg assert(start_offset < INT32_MAX - (int32_t)BLOCK_POOL_MEMFD_SIZE); 8407ec681f3Smrg 8417ec681f3Smrg VkResult result = anv_block_pool_init(&pool->block_pool, device, name, 8427ec681f3Smrg base_address + start_offset, 8437ec681f3Smrg block_size * 16); 84401e04c3fSmrg if (result != VK_SUCCESS) 84501e04c3fSmrg return result; 84601e04c3fSmrg 8477ec681f3Smrg pool->start_offset = start_offset; 8487ec681f3Smrg 8499f464c52Smaya result = anv_state_table_init(&pool->table, device, 64); 8509f464c52Smaya if (result != VK_SUCCESS) { 8519f464c52Smaya anv_block_pool_finish(&pool->block_pool); 8529f464c52Smaya return result; 8539f464c52Smaya } 8549f464c52Smaya 85501e04c3fSmrg assert(util_is_power_of_two_or_zero(block_size)); 85601e04c3fSmrg pool->block_size = block_size; 85701e04c3fSmrg pool->back_alloc_free_list = ANV_FREE_LIST_EMPTY; 85801e04c3fSmrg for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) { 85901e04c3fSmrg pool->buckets[i].free_list = ANV_FREE_LIST_EMPTY; 86001e04c3fSmrg pool->buckets[i].block.next = 0; 86101e04c3fSmrg pool->buckets[i].block.end = 0; 86201e04c3fSmrg } 86301e04c3fSmrg VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); 86401e04c3fSmrg 86501e04c3fSmrg return VK_SUCCESS; 86601e04c3fSmrg} 86701e04c3fSmrg 86801e04c3fSmrgvoid 86901e04c3fSmrganv_state_pool_finish(struct anv_state_pool *pool) 87001e04c3fSmrg{ 87101e04c3fSmrg VG(VALGRIND_DESTROY_MEMPOOL(pool)); 8729f464c52Smaya anv_state_table_finish(&pool->table); 87301e04c3fSmrg anv_block_pool_finish(&pool->block_pool); 87401e04c3fSmrg} 87501e04c3fSmrg 87601e04c3fSmrgstatic uint32_t 87701e04c3fSmrganv_fixed_size_state_pool_alloc_new(struct anv_fixed_size_state_pool *pool, 87801e04c3fSmrg struct anv_block_pool *block_pool, 87901e04c3fSmrg uint32_t state_size, 8809f464c52Smaya uint32_t block_size, 8819f464c52Smaya uint32_t *padding) 88201e04c3fSmrg{ 88301e04c3fSmrg struct anv_block_state block, old, new; 88401e04c3fSmrg uint32_t offset; 88501e04c3fSmrg 8869f464c52Smaya /* We don't always use anv_block_pool_alloc(), which would set *padding to 8879f464c52Smaya * zero for us. So if we have a pointer to padding, we must zero it out 8889f464c52Smaya * ourselves here, to make sure we always return some sensible value. 8899f464c52Smaya */ 8909f464c52Smaya if (padding) 8919f464c52Smaya *padding = 0; 8929f464c52Smaya 89301e04c3fSmrg /* If our state is large, we don't need any sub-allocation from a block. 89401e04c3fSmrg * Instead, we just grab whole (potentially large) blocks. 89501e04c3fSmrg */ 89601e04c3fSmrg if (state_size >= block_size) 8979f464c52Smaya return anv_block_pool_alloc(block_pool, state_size, padding); 89801e04c3fSmrg 89901e04c3fSmrg restart: 90001e04c3fSmrg block.u64 = __sync_fetch_and_add(&pool->block.u64, state_size); 90101e04c3fSmrg 90201e04c3fSmrg if (block.next < block.end) { 90301e04c3fSmrg return block.next; 90401e04c3fSmrg } else if (block.next == block.end) { 9059f464c52Smaya offset = anv_block_pool_alloc(block_pool, block_size, padding); 90601e04c3fSmrg new.next = offset + state_size; 90701e04c3fSmrg new.end = offset + block_size; 90801e04c3fSmrg old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64); 90901e04c3fSmrg if (old.next != block.next) 91001e04c3fSmrg futex_wake(&pool->block.end, INT_MAX); 91101e04c3fSmrg return offset; 91201e04c3fSmrg } else { 91301e04c3fSmrg futex_wait(&pool->block.end, block.end, NULL); 91401e04c3fSmrg goto restart; 91501e04c3fSmrg } 91601e04c3fSmrg} 91701e04c3fSmrg 91801e04c3fSmrgstatic uint32_t 91901e04c3fSmrganv_state_pool_get_bucket(uint32_t size) 92001e04c3fSmrg{ 92101e04c3fSmrg unsigned size_log2 = ilog2_round_up(size); 92201e04c3fSmrg assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2); 92301e04c3fSmrg if (size_log2 < ANV_MIN_STATE_SIZE_LOG2) 92401e04c3fSmrg size_log2 = ANV_MIN_STATE_SIZE_LOG2; 92501e04c3fSmrg return size_log2 - ANV_MIN_STATE_SIZE_LOG2; 92601e04c3fSmrg} 92701e04c3fSmrg 92801e04c3fSmrgstatic uint32_t 92901e04c3fSmrganv_state_pool_get_bucket_size(uint32_t bucket) 93001e04c3fSmrg{ 93101e04c3fSmrg uint32_t size_log2 = bucket + ANV_MIN_STATE_SIZE_LOG2; 93201e04c3fSmrg return 1 << size_log2; 93301e04c3fSmrg} 93401e04c3fSmrg 9359f464c52Smaya/** Helper to push a chunk into the state table. 9369f464c52Smaya * 9379f464c52Smaya * It creates 'count' entries into the state table and update their sizes, 9389f464c52Smaya * offsets and maps, also pushing them as "free" states. 9399f464c52Smaya */ 9409f464c52Smayastatic void 9419f464c52Smayaanv_state_pool_return_blocks(struct anv_state_pool *pool, 9429f464c52Smaya uint32_t chunk_offset, uint32_t count, 9439f464c52Smaya uint32_t block_size) 9449f464c52Smaya{ 9459f464c52Smaya /* Disallow returning 0 chunks */ 9469f464c52Smaya assert(count != 0); 9479f464c52Smaya 9489f464c52Smaya /* Make sure we always return chunks aligned to the block_size */ 9499f464c52Smaya assert(chunk_offset % block_size == 0); 9509f464c52Smaya 9519f464c52Smaya uint32_t st_idx; 9529f464c52Smaya UNUSED VkResult result = anv_state_table_add(&pool->table, &st_idx, count); 9539f464c52Smaya assert(result == VK_SUCCESS); 9549f464c52Smaya for (int i = 0; i < count; i++) { 9559f464c52Smaya /* update states that were added back to the state table */ 9569f464c52Smaya struct anv_state *state_i = anv_state_table_get(&pool->table, 9579f464c52Smaya st_idx + i); 9589f464c52Smaya state_i->alloc_size = block_size; 9597ec681f3Smrg state_i->offset = pool->start_offset + chunk_offset + block_size * i; 9607ec681f3Smrg state_i->map = anv_block_pool_map(&pool->block_pool, 9617ec681f3Smrg state_i->offset, 9627ec681f3Smrg state_i->alloc_size); 9639f464c52Smaya } 9649f464c52Smaya 9659f464c52Smaya uint32_t block_bucket = anv_state_pool_get_bucket(block_size); 9669f464c52Smaya anv_free_list_push(&pool->buckets[block_bucket].free_list, 9679f464c52Smaya &pool->table, st_idx, count); 9689f464c52Smaya} 9699f464c52Smaya 9709f464c52Smaya/** Returns a chunk of memory back to the state pool. 9719f464c52Smaya * 9729f464c52Smaya * Do a two-level split. If chunk_size is bigger than divisor 9739f464c52Smaya * (pool->block_size), we return as many divisor sized blocks as we can, from 9749f464c52Smaya * the end of the chunk. 9759f464c52Smaya * 9769f464c52Smaya * The remaining is then split into smaller blocks (starting at small_size if 9779f464c52Smaya * it is non-zero), with larger blocks always being taken from the end of the 9789f464c52Smaya * chunk. 9799f464c52Smaya */ 9809f464c52Smayastatic void 9819f464c52Smayaanv_state_pool_return_chunk(struct anv_state_pool *pool, 9829f464c52Smaya uint32_t chunk_offset, uint32_t chunk_size, 9839f464c52Smaya uint32_t small_size) 9849f464c52Smaya{ 9859f464c52Smaya uint32_t divisor = pool->block_size; 9869f464c52Smaya uint32_t nblocks = chunk_size / divisor; 9879f464c52Smaya uint32_t rest = chunk_size - nblocks * divisor; 9889f464c52Smaya 9899f464c52Smaya if (nblocks > 0) { 9909f464c52Smaya /* First return divisor aligned and sized chunks. We start returning 9919f464c52Smaya * larger blocks from the end fo the chunk, since they should already be 9929f464c52Smaya * aligned to divisor. Also anv_state_pool_return_blocks() only accepts 9939f464c52Smaya * aligned chunks. 9949f464c52Smaya */ 9959f464c52Smaya uint32_t offset = chunk_offset + rest; 9969f464c52Smaya anv_state_pool_return_blocks(pool, offset, nblocks, divisor); 9979f464c52Smaya } 9989f464c52Smaya 9999f464c52Smaya chunk_size = rest; 10009f464c52Smaya divisor /= 2; 10019f464c52Smaya 10029f464c52Smaya if (small_size > 0 && small_size < divisor) 10039f464c52Smaya divisor = small_size; 10049f464c52Smaya 10059f464c52Smaya uint32_t min_size = 1 << ANV_MIN_STATE_SIZE_LOG2; 10069f464c52Smaya 10079f464c52Smaya /* Just as before, return larger divisor aligned blocks from the end of the 10089f464c52Smaya * chunk first. 10099f464c52Smaya */ 10109f464c52Smaya while (chunk_size > 0 && divisor >= min_size) { 10119f464c52Smaya nblocks = chunk_size / divisor; 10129f464c52Smaya rest = chunk_size - nblocks * divisor; 10139f464c52Smaya if (nblocks > 0) { 10149f464c52Smaya anv_state_pool_return_blocks(pool, chunk_offset + rest, 10159f464c52Smaya nblocks, divisor); 10169f464c52Smaya chunk_size = rest; 10179f464c52Smaya } 10189f464c52Smaya divisor /= 2; 10199f464c52Smaya } 10209f464c52Smaya} 10219f464c52Smaya 102201e04c3fSmrgstatic struct anv_state 102301e04c3fSmrganv_state_pool_alloc_no_vg(struct anv_state_pool *pool, 102401e04c3fSmrg uint32_t size, uint32_t align) 102501e04c3fSmrg{ 102601e04c3fSmrg uint32_t bucket = anv_state_pool_get_bucket(MAX2(size, align)); 102701e04c3fSmrg 10289f464c52Smaya struct anv_state *state; 10299f464c52Smaya uint32_t alloc_size = anv_state_pool_get_bucket_size(bucket); 10309f464c52Smaya int32_t offset; 103101e04c3fSmrg 103201e04c3fSmrg /* Try free list first. */ 10339f464c52Smaya state = anv_free_list_pop(&pool->buckets[bucket].free_list, 10349f464c52Smaya &pool->table); 10359f464c52Smaya if (state) { 10367ec681f3Smrg assert(state->offset >= pool->start_offset); 103701e04c3fSmrg goto done; 103801e04c3fSmrg } 103901e04c3fSmrg 104001e04c3fSmrg /* Try to grab a chunk from some larger bucket and split it up */ 104101e04c3fSmrg for (unsigned b = bucket + 1; b < ANV_STATE_BUCKETS; b++) { 10429f464c52Smaya state = anv_free_list_pop(&pool->buckets[b].free_list, &pool->table); 10439f464c52Smaya if (state) { 104401e04c3fSmrg unsigned chunk_size = anv_state_pool_get_bucket_size(b); 10459f464c52Smaya int32_t chunk_offset = state->offset; 10469f464c52Smaya 10479f464c52Smaya /* First lets update the state we got to its new size. offset and map 10489f464c52Smaya * remain the same. 10499f464c52Smaya */ 10509f464c52Smaya state->alloc_size = alloc_size; 105101e04c3fSmrg 10529f464c52Smaya /* Now return the unused part of the chunk back to the pool as free 10539f464c52Smaya * blocks 10549f464c52Smaya * 105501e04c3fSmrg * There are a couple of options as to what we do with it: 105601e04c3fSmrg * 105701e04c3fSmrg * 1) We could fully split the chunk into state.alloc_size sized 105801e04c3fSmrg * pieces. However, this would mean that allocating a 16B 105901e04c3fSmrg * state could potentially split a 2MB chunk into 512K smaller 106001e04c3fSmrg * chunks. This would lead to unnecessary fragmentation. 106101e04c3fSmrg * 106201e04c3fSmrg * 2) The classic "buddy allocator" method would have us split the 106301e04c3fSmrg * chunk in half and return one half. Then we would split the 106401e04c3fSmrg * remaining half in half and return one half, and repeat as 106501e04c3fSmrg * needed until we get down to the size we want. However, if 106601e04c3fSmrg * you are allocating a bunch of the same size state (which is 106701e04c3fSmrg * the common case), this means that every other allocation has 106801e04c3fSmrg * to go up a level and every fourth goes up two levels, etc. 106901e04c3fSmrg * This is not nearly as efficient as it could be if we did a 107001e04c3fSmrg * little more work up-front. 107101e04c3fSmrg * 107201e04c3fSmrg * 3) Split the difference between (1) and (2) by doing a 107301e04c3fSmrg * two-level split. If it's bigger than some fixed block_size, 107401e04c3fSmrg * we split it into block_size sized chunks and return all but 107501e04c3fSmrg * one of them. Then we split what remains into 10769f464c52Smaya * state.alloc_size sized chunks and return them. 107701e04c3fSmrg * 10789f464c52Smaya * We choose something close to option (3), which is implemented with 10799f464c52Smaya * anv_state_pool_return_chunk(). That is done by returning the 10809f464c52Smaya * remaining of the chunk, with alloc_size as a hint of the size that 10819f464c52Smaya * we want the smaller chunk split into. 108201e04c3fSmrg */ 10839f464c52Smaya anv_state_pool_return_chunk(pool, chunk_offset + alloc_size, 10849f464c52Smaya chunk_size - alloc_size, alloc_size); 108501e04c3fSmrg goto done; 108601e04c3fSmrg } 108701e04c3fSmrg } 108801e04c3fSmrg 10899f464c52Smaya uint32_t padding; 10909f464c52Smaya offset = anv_fixed_size_state_pool_alloc_new(&pool->buckets[bucket], 10919f464c52Smaya &pool->block_pool, 10929f464c52Smaya alloc_size, 10939f464c52Smaya pool->block_size, 10949f464c52Smaya &padding); 10959f464c52Smaya /* Everytime we allocate a new state, add it to the state pool */ 10969f464c52Smaya uint32_t idx; 10979f464c52Smaya UNUSED VkResult result = anv_state_table_add(&pool->table, &idx, 1); 10989f464c52Smaya assert(result == VK_SUCCESS); 10999f464c52Smaya 11009f464c52Smaya state = anv_state_table_get(&pool->table, idx); 11017ec681f3Smrg state->offset = pool->start_offset + offset; 11029f464c52Smaya state->alloc_size = alloc_size; 11037ec681f3Smrg state->map = anv_block_pool_map(&pool->block_pool, offset, alloc_size); 11049f464c52Smaya 11059f464c52Smaya if (padding > 0) { 11069f464c52Smaya uint32_t return_offset = offset - padding; 11079f464c52Smaya anv_state_pool_return_chunk(pool, return_offset, padding, 0); 11089f464c52Smaya } 110901e04c3fSmrg 111001e04c3fSmrgdone: 11119f464c52Smaya return *state; 111201e04c3fSmrg} 111301e04c3fSmrg 111401e04c3fSmrgstruct anv_state 111501e04c3fSmrganv_state_pool_alloc(struct anv_state_pool *pool, uint32_t size, uint32_t align) 111601e04c3fSmrg{ 111701e04c3fSmrg if (size == 0) 111801e04c3fSmrg return ANV_STATE_NULL; 111901e04c3fSmrg 112001e04c3fSmrg struct anv_state state = anv_state_pool_alloc_no_vg(pool, size, align); 112101e04c3fSmrg VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size)); 112201e04c3fSmrg return state; 112301e04c3fSmrg} 112401e04c3fSmrg 112501e04c3fSmrgstruct anv_state 112601e04c3fSmrganv_state_pool_alloc_back(struct anv_state_pool *pool) 112701e04c3fSmrg{ 11289f464c52Smaya struct anv_state *state; 11299f464c52Smaya uint32_t alloc_size = pool->block_size; 113001e04c3fSmrg 11317ec681f3Smrg /* This function is only used with pools where start_offset == 0 */ 11327ec681f3Smrg assert(pool->start_offset == 0); 11337ec681f3Smrg 11349f464c52Smaya state = anv_free_list_pop(&pool->back_alloc_free_list, &pool->table); 11359f464c52Smaya if (state) { 11367ec681f3Smrg assert(state->offset < pool->start_offset); 113701e04c3fSmrg goto done; 113801e04c3fSmrg } 113901e04c3fSmrg 11409f464c52Smaya int32_t offset; 11419f464c52Smaya offset = anv_block_pool_alloc_back(&pool->block_pool, 11429f464c52Smaya pool->block_size); 11439f464c52Smaya uint32_t idx; 11449f464c52Smaya UNUSED VkResult result = anv_state_table_add(&pool->table, &idx, 1); 11459f464c52Smaya assert(result == VK_SUCCESS); 11469f464c52Smaya 11479f464c52Smaya state = anv_state_table_get(&pool->table, idx); 11487ec681f3Smrg state->offset = pool->start_offset + offset; 11499f464c52Smaya state->alloc_size = alloc_size; 11507ec681f3Smrg state->map = anv_block_pool_map(&pool->block_pool, offset, alloc_size); 115101e04c3fSmrg 115201e04c3fSmrgdone: 11539f464c52Smaya VG(VALGRIND_MEMPOOL_ALLOC(pool, state->map, state->alloc_size)); 11549f464c52Smaya return *state; 115501e04c3fSmrg} 115601e04c3fSmrg 115701e04c3fSmrgstatic void 115801e04c3fSmrganv_state_pool_free_no_vg(struct anv_state_pool *pool, struct anv_state state) 115901e04c3fSmrg{ 116001e04c3fSmrg assert(util_is_power_of_two_or_zero(state.alloc_size)); 116101e04c3fSmrg unsigned bucket = anv_state_pool_get_bucket(state.alloc_size); 116201e04c3fSmrg 11637ec681f3Smrg if (state.offset < pool->start_offset) { 116401e04c3fSmrg assert(state.alloc_size == pool->block_size); 116501e04c3fSmrg anv_free_list_push(&pool->back_alloc_free_list, 11669f464c52Smaya &pool->table, state.idx, 1); 116701e04c3fSmrg } else { 116801e04c3fSmrg anv_free_list_push(&pool->buckets[bucket].free_list, 11699f464c52Smaya &pool->table, state.idx, 1); 117001e04c3fSmrg } 117101e04c3fSmrg} 117201e04c3fSmrg 117301e04c3fSmrgvoid 117401e04c3fSmrganv_state_pool_free(struct anv_state_pool *pool, struct anv_state state) 117501e04c3fSmrg{ 117601e04c3fSmrg if (state.alloc_size == 0) 117701e04c3fSmrg return; 117801e04c3fSmrg 117901e04c3fSmrg VG(VALGRIND_MEMPOOL_FREE(pool, state.map)); 118001e04c3fSmrg anv_state_pool_free_no_vg(pool, state); 118101e04c3fSmrg} 118201e04c3fSmrg 118301e04c3fSmrgstruct anv_state_stream_block { 118401e04c3fSmrg struct anv_state block; 118501e04c3fSmrg 118601e04c3fSmrg /* The next block */ 118701e04c3fSmrg struct anv_state_stream_block *next; 118801e04c3fSmrg 118901e04c3fSmrg#ifdef HAVE_VALGRIND 119001e04c3fSmrg /* A pointer to the first user-allocated thing in this block. This is 119101e04c3fSmrg * what valgrind sees as the start of the block. 119201e04c3fSmrg */ 119301e04c3fSmrg void *_vg_ptr; 119401e04c3fSmrg#endif 119501e04c3fSmrg}; 119601e04c3fSmrg 119701e04c3fSmrg/* The state stream allocator is a one-shot, single threaded allocator for 119801e04c3fSmrg * variable sized blocks. We use it for allocating dynamic state. 119901e04c3fSmrg */ 120001e04c3fSmrgvoid 120101e04c3fSmrganv_state_stream_init(struct anv_state_stream *stream, 120201e04c3fSmrg struct anv_state_pool *state_pool, 120301e04c3fSmrg uint32_t block_size) 120401e04c3fSmrg{ 120501e04c3fSmrg stream->state_pool = state_pool; 120601e04c3fSmrg stream->block_size = block_size; 120701e04c3fSmrg 120801e04c3fSmrg stream->block = ANV_STATE_NULL; 120901e04c3fSmrg 121001e04c3fSmrg /* Ensure that next + whatever > block_size. This way the first call to 121101e04c3fSmrg * state_stream_alloc fetches a new block. 121201e04c3fSmrg */ 121301e04c3fSmrg stream->next = block_size; 121401e04c3fSmrg 12157ec681f3Smrg util_dynarray_init(&stream->all_blocks, NULL); 12167ec681f3Smrg 121701e04c3fSmrg VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false)); 121801e04c3fSmrg} 121901e04c3fSmrg 122001e04c3fSmrgvoid 122101e04c3fSmrganv_state_stream_finish(struct anv_state_stream *stream) 122201e04c3fSmrg{ 12237ec681f3Smrg util_dynarray_foreach(&stream->all_blocks, struct anv_state, block) { 12247ec681f3Smrg VG(VALGRIND_MEMPOOL_FREE(stream, block->map)); 12257ec681f3Smrg VG(VALGRIND_MAKE_MEM_NOACCESS(block->map, block->alloc_size)); 12267ec681f3Smrg anv_state_pool_free_no_vg(stream->state_pool, *block); 122701e04c3fSmrg } 12287ec681f3Smrg util_dynarray_fini(&stream->all_blocks); 122901e04c3fSmrg 123001e04c3fSmrg VG(VALGRIND_DESTROY_MEMPOOL(stream)); 123101e04c3fSmrg} 123201e04c3fSmrg 123301e04c3fSmrgstruct anv_state 123401e04c3fSmrganv_state_stream_alloc(struct anv_state_stream *stream, 123501e04c3fSmrg uint32_t size, uint32_t alignment) 123601e04c3fSmrg{ 123701e04c3fSmrg if (size == 0) 123801e04c3fSmrg return ANV_STATE_NULL; 123901e04c3fSmrg 124001e04c3fSmrg assert(alignment <= PAGE_SIZE); 124101e04c3fSmrg 124201e04c3fSmrg uint32_t offset = align_u32(stream->next, alignment); 124301e04c3fSmrg if (offset + size > stream->block.alloc_size) { 124401e04c3fSmrg uint32_t block_size = stream->block_size; 124501e04c3fSmrg if (block_size < size) 124601e04c3fSmrg block_size = round_to_power_of_two(size); 124701e04c3fSmrg 124801e04c3fSmrg stream->block = anv_state_pool_alloc_no_vg(stream->state_pool, 124901e04c3fSmrg block_size, PAGE_SIZE); 12507ec681f3Smrg util_dynarray_append(&stream->all_blocks, 12517ec681f3Smrg struct anv_state, stream->block); 12527ec681f3Smrg VG(VALGRIND_MAKE_MEM_NOACCESS(stream->block.map, block_size)); 125301e04c3fSmrg 12547ec681f3Smrg /* Reset back to the start */ 12557ec681f3Smrg stream->next = offset = 0; 125601e04c3fSmrg assert(offset + size <= stream->block.alloc_size); 125701e04c3fSmrg } 12587ec681f3Smrg const bool new_block = stream->next == 0; 125901e04c3fSmrg 126001e04c3fSmrg struct anv_state state = stream->block; 126101e04c3fSmrg state.offset += offset; 126201e04c3fSmrg state.alloc_size = size; 126301e04c3fSmrg state.map += offset; 126401e04c3fSmrg 126501e04c3fSmrg stream->next = offset + size; 126601e04c3fSmrg 12677ec681f3Smrg if (new_block) { 12687ec681f3Smrg assert(state.map == stream->block.map); 12697ec681f3Smrg VG(VALGRIND_MEMPOOL_ALLOC(stream, state.map, size)); 127001e04c3fSmrg } else { 127101e04c3fSmrg /* This only updates the mempool. The newly allocated chunk is still 127201e04c3fSmrg * marked as NOACCESS. */ 12737ec681f3Smrg VG(VALGRIND_MEMPOOL_CHANGE(stream, stream->block.map, stream->block.map, 12747ec681f3Smrg stream->next)); 127501e04c3fSmrg /* Mark the newly allocated chunk as undefined */ 12767ec681f3Smrg VG(VALGRIND_MAKE_MEM_UNDEFINED(state.map, state.alloc_size)); 127701e04c3fSmrg } 127801e04c3fSmrg 127901e04c3fSmrg return state; 128001e04c3fSmrg} 128101e04c3fSmrg 12827ec681f3Smrgvoid 12837ec681f3Smrganv_state_reserved_pool_init(struct anv_state_reserved_pool *pool, 12847ec681f3Smrg struct anv_state_pool *parent, 12857ec681f3Smrg uint32_t count, uint32_t size, uint32_t alignment) 12867ec681f3Smrg{ 12877ec681f3Smrg pool->pool = parent; 12887ec681f3Smrg pool->reserved_blocks = ANV_FREE_LIST_EMPTY; 12897ec681f3Smrg pool->count = count; 12907ec681f3Smrg 12917ec681f3Smrg for (unsigned i = 0; i < count; i++) { 12927ec681f3Smrg struct anv_state state = anv_state_pool_alloc(pool->pool, size, alignment); 12937ec681f3Smrg anv_free_list_push(&pool->reserved_blocks, &pool->pool->table, state.idx, 1); 12947ec681f3Smrg } 12957ec681f3Smrg} 12967ec681f3Smrg 12977ec681f3Smrgvoid 12987ec681f3Smrganv_state_reserved_pool_finish(struct anv_state_reserved_pool *pool) 12997ec681f3Smrg{ 13007ec681f3Smrg struct anv_state *state; 13017ec681f3Smrg 13027ec681f3Smrg while ((state = anv_free_list_pop(&pool->reserved_blocks, &pool->pool->table))) { 13037ec681f3Smrg anv_state_pool_free(pool->pool, *state); 13047ec681f3Smrg pool->count--; 13057ec681f3Smrg } 13067ec681f3Smrg assert(pool->count == 0); 13077ec681f3Smrg} 13087ec681f3Smrg 13097ec681f3Smrgstruct anv_state 13107ec681f3Smrganv_state_reserved_pool_alloc(struct anv_state_reserved_pool *pool) 13117ec681f3Smrg{ 13127ec681f3Smrg return *anv_free_list_pop(&pool->reserved_blocks, &pool->pool->table); 13137ec681f3Smrg} 13147ec681f3Smrg 13157ec681f3Smrgvoid 13167ec681f3Smrganv_state_reserved_pool_free(struct anv_state_reserved_pool *pool, 13177ec681f3Smrg struct anv_state state) 13187ec681f3Smrg{ 13197ec681f3Smrg anv_free_list_push(&pool->reserved_blocks, &pool->pool->table, state.idx, 1); 13207ec681f3Smrg} 132101e04c3fSmrg 132201e04c3fSmrgvoid 132301e04c3fSmrganv_bo_pool_init(struct anv_bo_pool *pool, struct anv_device *device, 13247ec681f3Smrg const char *name) 132501e04c3fSmrg{ 13267ec681f3Smrg pool->name = name; 132701e04c3fSmrg pool->device = device; 13287ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) { 13297ec681f3Smrg util_sparse_array_free_list_init(&pool->free_list[i], 13307ec681f3Smrg &device->bo_cache.bo_map, 0, 13317ec681f3Smrg offsetof(struct anv_bo, free_index)); 13327ec681f3Smrg } 133301e04c3fSmrg 133401e04c3fSmrg VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false)); 133501e04c3fSmrg} 133601e04c3fSmrg 133701e04c3fSmrgvoid 133801e04c3fSmrganv_bo_pool_finish(struct anv_bo_pool *pool) 133901e04c3fSmrg{ 134001e04c3fSmrg for (unsigned i = 0; i < ARRAY_SIZE(pool->free_list); i++) { 13417ec681f3Smrg while (1) { 13427ec681f3Smrg struct anv_bo *bo = 13437ec681f3Smrg util_sparse_array_free_list_pop_elem(&pool->free_list[i]); 13447ec681f3Smrg if (bo == NULL) 13457ec681f3Smrg break; 13467ec681f3Smrg 13477ec681f3Smrg /* anv_device_release_bo is going to "free" it */ 13487ec681f3Smrg VG(VALGRIND_MALLOCLIKE_BLOCK(bo->map, bo->size, 0, 1)); 13497ec681f3Smrg anv_device_release_bo(pool->device, bo); 135001e04c3fSmrg } 135101e04c3fSmrg } 135201e04c3fSmrg 135301e04c3fSmrg VG(VALGRIND_DESTROY_MEMPOOL(pool)); 135401e04c3fSmrg} 135501e04c3fSmrg 135601e04c3fSmrgVkResult 13577ec681f3Smrganv_bo_pool_alloc(struct anv_bo_pool *pool, uint32_t size, 13587ec681f3Smrg struct anv_bo **bo_out) 135901e04c3fSmrg{ 136001e04c3fSmrg const unsigned size_log2 = size < 4096 ? 12 : ilog2_round_up(size); 136101e04c3fSmrg const unsigned pow2_size = 1 << size_log2; 136201e04c3fSmrg const unsigned bucket = size_log2 - 12; 136301e04c3fSmrg assert(bucket < ARRAY_SIZE(pool->free_list)); 136401e04c3fSmrg 13657ec681f3Smrg struct anv_bo *bo = 13667ec681f3Smrg util_sparse_array_free_list_pop_elem(&pool->free_list[bucket]); 13677ec681f3Smrg if (bo != NULL) { 136801e04c3fSmrg VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); 13697ec681f3Smrg *bo_out = bo; 137001e04c3fSmrg return VK_SUCCESS; 137101e04c3fSmrg } 137201e04c3fSmrg 13737ec681f3Smrg VkResult result = anv_device_alloc_bo(pool->device, 13747ec681f3Smrg pool->name, 13757ec681f3Smrg pow2_size, 13767ec681f3Smrg ANV_BO_ALLOC_LOCAL_MEM | 13777ec681f3Smrg ANV_BO_ALLOC_MAPPED | 13787ec681f3Smrg ANV_BO_ALLOC_SNOOPED | 13797ec681f3Smrg ANV_BO_ALLOC_CAPTURE, 13807ec681f3Smrg 0 /* explicit_address */, 13817ec681f3Smrg &bo); 138201e04c3fSmrg if (result != VK_SUCCESS) 138301e04c3fSmrg return result; 138401e04c3fSmrg 13857ec681f3Smrg /* We want it to look like it came from this pool */ 13867ec681f3Smrg VG(VALGRIND_FREELIKE_BLOCK(bo->map, 0)); 138701e04c3fSmrg VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, size)); 138801e04c3fSmrg 13897ec681f3Smrg *bo_out = bo; 13907ec681f3Smrg 139101e04c3fSmrg return VK_SUCCESS; 139201e04c3fSmrg} 139301e04c3fSmrg 139401e04c3fSmrgvoid 13957ec681f3Smrganv_bo_pool_free(struct anv_bo_pool *pool, struct anv_bo *bo) 139601e04c3fSmrg{ 13977ec681f3Smrg VG(VALGRIND_MEMPOOL_FREE(pool, bo->map)); 139801e04c3fSmrg 13997ec681f3Smrg assert(util_is_power_of_two_or_zero(bo->size)); 14007ec681f3Smrg const unsigned size_log2 = ilog2_round_up(bo->size); 140101e04c3fSmrg const unsigned bucket = size_log2 - 12; 140201e04c3fSmrg assert(bucket < ARRAY_SIZE(pool->free_list)); 140301e04c3fSmrg 14047ec681f3Smrg assert(util_sparse_array_get(&pool->device->bo_cache.bo_map, 14057ec681f3Smrg bo->gem_handle) == bo); 14067ec681f3Smrg util_sparse_array_free_list_push(&pool->free_list[bucket], 14077ec681f3Smrg &bo->gem_handle, 1); 140801e04c3fSmrg} 140901e04c3fSmrg 141001e04c3fSmrg// Scratch pool 141101e04c3fSmrg 141201e04c3fSmrgvoid 141301e04c3fSmrganv_scratch_pool_init(struct anv_device *device, struct anv_scratch_pool *pool) 141401e04c3fSmrg{ 141501e04c3fSmrg memset(pool, 0, sizeof(*pool)); 141601e04c3fSmrg} 141701e04c3fSmrg 141801e04c3fSmrgvoid 141901e04c3fSmrganv_scratch_pool_finish(struct anv_device *device, struct anv_scratch_pool *pool) 142001e04c3fSmrg{ 14217ec681f3Smrg for (unsigned s = 0; s < ARRAY_SIZE(pool->bos[0]); s++) { 142201e04c3fSmrg for (unsigned i = 0; i < 16; i++) { 14237ec681f3Smrg if (pool->bos[i][s] != NULL) 14247ec681f3Smrg anv_device_release_bo(device, pool->bos[i][s]); 14257ec681f3Smrg } 14267ec681f3Smrg } 14277ec681f3Smrg 14287ec681f3Smrg for (unsigned i = 0; i < 16; i++) { 14297ec681f3Smrg if (pool->surf_states[i].map != NULL) { 14307ec681f3Smrg anv_state_pool_free(&device->surface_state_pool, 14317ec681f3Smrg pool->surf_states[i]); 143201e04c3fSmrg } 143301e04c3fSmrg } 143401e04c3fSmrg} 143501e04c3fSmrg 143601e04c3fSmrgstruct anv_bo * 143701e04c3fSmrganv_scratch_pool_alloc(struct anv_device *device, struct anv_scratch_pool *pool, 143801e04c3fSmrg gl_shader_stage stage, unsigned per_thread_scratch) 143901e04c3fSmrg{ 144001e04c3fSmrg if (per_thread_scratch == 0) 144101e04c3fSmrg return NULL; 144201e04c3fSmrg 144301e04c3fSmrg unsigned scratch_size_log2 = ffs(per_thread_scratch / 2048); 144401e04c3fSmrg assert(scratch_size_log2 < 16); 144501e04c3fSmrg 14467ec681f3Smrg assert(stage < ARRAY_SIZE(pool->bos)); 144701e04c3fSmrg 14487ec681f3Smrg const struct intel_device_info *devinfo = &device->info; 144901e04c3fSmrg 14507ec681f3Smrg /* On GFX version 12.5, scratch access changed to a surface-based model. 14517ec681f3Smrg * Instead of each shader type having its own layout based on IDs passed 14527ec681f3Smrg * from the relevant fixed-function unit, all scratch access is based on 14537ec681f3Smrg * thread IDs like it always has been for compute. 14547ec681f3Smrg */ 14557ec681f3Smrg if (devinfo->verx10 >= 125) 14567ec681f3Smrg stage = MESA_SHADER_COMPUTE; 145701e04c3fSmrg 14587ec681f3Smrg struct anv_bo *bo = p_atomic_read(&pool->bos[scratch_size_log2][stage]); 145901e04c3fSmrg 14607ec681f3Smrg if (bo != NULL) 14617ec681f3Smrg return bo; 146201e04c3fSmrg 14637ec681f3Smrg assert(stage < ARRAY_SIZE(devinfo->max_scratch_ids)); 14647ec681f3Smrg uint32_t size = per_thread_scratch * devinfo->max_scratch_ids[stage]; 146501e04c3fSmrg 146601e04c3fSmrg /* Even though the Scratch base pointers in 3DSTATE_*S are 64 bits, they 146701e04c3fSmrg * are still relative to the general state base address. When we emit 146801e04c3fSmrg * STATE_BASE_ADDRESS, we set general state base address to 0 and the size 146901e04c3fSmrg * to the maximum (1 page under 4GB). This allows us to just place the 147001e04c3fSmrg * scratch buffers anywhere we wish in the bottom 32 bits of address space 147101e04c3fSmrg * and just set the scratch base pointer in 3DSTATE_*S using a relocation. 147201e04c3fSmrg * However, in order to do so, we need to ensure that the kernel does not 147301e04c3fSmrg * place the scratch BO above the 32-bit boundary. 147401e04c3fSmrg * 147501e04c3fSmrg * NOTE: Technically, it can't go "anywhere" because the top page is off 147601e04c3fSmrg * limits. However, when EXEC_OBJECT_SUPPORTS_48B_ADDRESS is set, the 147701e04c3fSmrg * kernel allocates space using 147801e04c3fSmrg * 147901e04c3fSmrg * end = min_t(u64, end, (1ULL << 32) - I915_GTT_PAGE_SIZE); 148001e04c3fSmrg * 148101e04c3fSmrg * so nothing will ever touch the top page. 148201e04c3fSmrg */ 14837ec681f3Smrg VkResult result = anv_device_alloc_bo(device, "scratch", size, 14847ec681f3Smrg ANV_BO_ALLOC_32BIT_ADDRESS | 14857ec681f3Smrg ANV_BO_ALLOC_LOCAL_MEM, 14867ec681f3Smrg 0 /* explicit_address */, 14877ec681f3Smrg &bo); 14887ec681f3Smrg if (result != VK_SUCCESS) 14897ec681f3Smrg return NULL; /* TODO */ 149001e04c3fSmrg 14917ec681f3Smrg struct anv_bo *current_bo = 14927ec681f3Smrg p_atomic_cmpxchg(&pool->bos[scratch_size_log2][stage], NULL, bo); 14937ec681f3Smrg if (current_bo) { 14947ec681f3Smrg anv_device_release_bo(device, bo); 14957ec681f3Smrg return current_bo; 14967ec681f3Smrg } else { 14977ec681f3Smrg return bo; 14987ec681f3Smrg } 14997ec681f3Smrg} 150001e04c3fSmrg 15017ec681f3Smrguint32_t 15027ec681f3Smrganv_scratch_pool_get_surf(struct anv_device *device, 15037ec681f3Smrg struct anv_scratch_pool *pool, 15047ec681f3Smrg unsigned per_thread_scratch) 15057ec681f3Smrg{ 15067ec681f3Smrg if (per_thread_scratch == 0) 15077ec681f3Smrg return 0; 150801e04c3fSmrg 15097ec681f3Smrg unsigned scratch_size_log2 = ffs(per_thread_scratch / 2048); 15107ec681f3Smrg assert(scratch_size_log2 < 16); 151101e04c3fSmrg 15127ec681f3Smrg uint32_t surf = p_atomic_read(&pool->surfs[scratch_size_log2]); 15137ec681f3Smrg if (surf > 0) 15147ec681f3Smrg return surf; 15157ec681f3Smrg 15167ec681f3Smrg struct anv_bo *bo = 15177ec681f3Smrg anv_scratch_pool_alloc(device, pool, MESA_SHADER_COMPUTE, 15187ec681f3Smrg per_thread_scratch); 15197ec681f3Smrg struct anv_address addr = { .bo = bo }; 15207ec681f3Smrg 15217ec681f3Smrg struct anv_state state = 15227ec681f3Smrg anv_state_pool_alloc(&device->surface_state_pool, 15237ec681f3Smrg device->isl_dev.ss.size, 64); 15247ec681f3Smrg 15257ec681f3Smrg isl_buffer_fill_state(&device->isl_dev, state.map, 15267ec681f3Smrg .address = anv_address_physical(addr), 15277ec681f3Smrg .size_B = bo->size, 15287ec681f3Smrg .mocs = anv_mocs(device, bo, 0), 15297ec681f3Smrg .format = ISL_FORMAT_RAW, 15307ec681f3Smrg .swizzle = ISL_SWIZZLE_IDENTITY, 15317ec681f3Smrg .stride_B = per_thread_scratch, 15327ec681f3Smrg .is_scratch = true); 15337ec681f3Smrg 15347ec681f3Smrg uint32_t current = p_atomic_cmpxchg(&pool->surfs[scratch_size_log2], 15357ec681f3Smrg 0, state.offset); 15367ec681f3Smrg if (current) { 15377ec681f3Smrg anv_state_pool_free(&device->surface_state_pool, state); 15387ec681f3Smrg return current; 15397ec681f3Smrg } else { 15407ec681f3Smrg pool->surf_states[scratch_size_log2] = state; 15417ec681f3Smrg return state.offset; 15427ec681f3Smrg } 154301e04c3fSmrg} 154401e04c3fSmrg 154501e04c3fSmrgVkResult 15467ec681f3Smrganv_bo_cache_init(struct anv_bo_cache *cache, struct anv_device *device) 154701e04c3fSmrg{ 15487ec681f3Smrg util_sparse_array_init(&cache->bo_map, sizeof(struct anv_bo), 1024); 154901e04c3fSmrg 155001e04c3fSmrg if (pthread_mutex_init(&cache->mutex, NULL)) { 15517ec681f3Smrg util_sparse_array_finish(&cache->bo_map); 15527ec681f3Smrg return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY, 155301e04c3fSmrg "pthread_mutex_init failed: %m"); 155401e04c3fSmrg } 155501e04c3fSmrg 155601e04c3fSmrg return VK_SUCCESS; 155701e04c3fSmrg} 155801e04c3fSmrg 155901e04c3fSmrgvoid 156001e04c3fSmrganv_bo_cache_finish(struct anv_bo_cache *cache) 156101e04c3fSmrg{ 15627ec681f3Smrg util_sparse_array_finish(&cache->bo_map); 156301e04c3fSmrg pthread_mutex_destroy(&cache->mutex); 156401e04c3fSmrg} 156501e04c3fSmrg 15667ec681f3Smrg#define ANV_BO_CACHE_SUPPORTED_FLAGS \ 15677ec681f3Smrg (EXEC_OBJECT_WRITE | \ 15687ec681f3Smrg EXEC_OBJECT_ASYNC | \ 15697ec681f3Smrg EXEC_OBJECT_SUPPORTS_48B_ADDRESS | \ 15707ec681f3Smrg EXEC_OBJECT_PINNED | \ 15717ec681f3Smrg EXEC_OBJECT_CAPTURE) 15727ec681f3Smrg 15737ec681f3Smrgstatic uint32_t 15747ec681f3Smrganv_bo_alloc_flags_to_bo_flags(struct anv_device *device, 15757ec681f3Smrg enum anv_bo_alloc_flags alloc_flags) 157601e04c3fSmrg{ 15777ec681f3Smrg struct anv_physical_device *pdevice = device->physical; 157801e04c3fSmrg 15797ec681f3Smrg uint64_t bo_flags = 0; 15807ec681f3Smrg if (!(alloc_flags & ANV_BO_ALLOC_32BIT_ADDRESS) && 15817ec681f3Smrg pdevice->supports_48bit_addresses) 15827ec681f3Smrg bo_flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 158301e04c3fSmrg 15847ec681f3Smrg if ((alloc_flags & ANV_BO_ALLOC_CAPTURE) && pdevice->has_exec_capture) 15857ec681f3Smrg bo_flags |= EXEC_OBJECT_CAPTURE; 158601e04c3fSmrg 15877ec681f3Smrg if (alloc_flags & ANV_BO_ALLOC_IMPLICIT_WRITE) { 15887ec681f3Smrg assert(alloc_flags & ANV_BO_ALLOC_IMPLICIT_SYNC); 15897ec681f3Smrg bo_flags |= EXEC_OBJECT_WRITE; 15907ec681f3Smrg } 159101e04c3fSmrg 15927ec681f3Smrg if (!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_SYNC) && pdevice->has_exec_async) 15937ec681f3Smrg bo_flags |= EXEC_OBJECT_ASYNC; 159401e04c3fSmrg 15957ec681f3Smrg if (pdevice->use_softpin) 15967ec681f3Smrg bo_flags |= EXEC_OBJECT_PINNED; 159701e04c3fSmrg 15987ec681f3Smrg return bo_flags; 159901e04c3fSmrg} 160001e04c3fSmrg 16017ec681f3Smrgstatic uint32_t 16027ec681f3Smrganv_device_get_bo_align(struct anv_device *device, 16037ec681f3Smrg enum anv_bo_alloc_flags alloc_flags) 16047ec681f3Smrg{ 16057ec681f3Smrg /* Gfx12 CCS surface addresses need to be 64K aligned. */ 16067ec681f3Smrg if (device->info.ver >= 12 && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)) 16077ec681f3Smrg return 64 * 1024; 16087ec681f3Smrg 16097ec681f3Smrg return 4096; 16107ec681f3Smrg} 161101e04c3fSmrg 161201e04c3fSmrgVkResult 16137ec681f3Smrganv_device_alloc_bo(struct anv_device *device, 16147ec681f3Smrg const char *name, 16157ec681f3Smrg uint64_t size, 16167ec681f3Smrg enum anv_bo_alloc_flags alloc_flags, 16177ec681f3Smrg uint64_t explicit_address, 16187ec681f3Smrg struct anv_bo **bo_out) 161901e04c3fSmrg{ 16207ec681f3Smrg if (!(alloc_flags & ANV_BO_ALLOC_LOCAL_MEM)) 16217ec681f3Smrg anv_perf_warn(VK_LOG_NO_OBJS(&device->physical->instance->vk.base), 16227ec681f3Smrg "system memory used"); 162301e04c3fSmrg 16247ec681f3Smrg if (!device->physical->has_implicit_ccs) 16257ec681f3Smrg assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)); 162601e04c3fSmrg 16277ec681f3Smrg const uint32_t bo_flags = 16287ec681f3Smrg anv_bo_alloc_flags_to_bo_flags(device, alloc_flags); 16297ec681f3Smrg assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS)); 163001e04c3fSmrg 163101e04c3fSmrg /* The kernel is going to give us whole pages anyway */ 163201e04c3fSmrg size = align_u64(size, 4096); 163301e04c3fSmrg 16347ec681f3Smrg const uint32_t align = anv_device_get_bo_align(device, alloc_flags); 16357ec681f3Smrg 16367ec681f3Smrg uint64_t ccs_size = 0; 16377ec681f3Smrg if (device->info.has_aux_map && (alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)) { 16387ec681f3Smrg /* Align the size up to the next multiple of 64K so we don't have any 16397ec681f3Smrg * AUX-TT entries pointing from a 64K page to itself. 16407ec681f3Smrg */ 16417ec681f3Smrg size = align_u64(size, 64 * 1024); 16427ec681f3Smrg 16437ec681f3Smrg /* See anv_bo::_ccs_size */ 16447ec681f3Smrg ccs_size = align_u64(DIV_ROUND_UP(size, INTEL_AUX_MAP_GFX12_CCS_SCALE), 4096); 164501e04c3fSmrg } 164601e04c3fSmrg 16477ec681f3Smrg uint32_t gem_handle; 164801e04c3fSmrg 16497ec681f3Smrg /* If we have vram size, we have multiple memory regions and should choose 16507ec681f3Smrg * one of them. 16517ec681f3Smrg */ 16527ec681f3Smrg if (device->physical->vram.size > 0) { 16537ec681f3Smrg struct drm_i915_gem_memory_class_instance regions[2]; 16547ec681f3Smrg uint32_t nregions = 0; 16557ec681f3Smrg 16567ec681f3Smrg if (alloc_flags & ANV_BO_ALLOC_LOCAL_MEM) { 16577ec681f3Smrg /* For vram allocation, still use system memory as a fallback. */ 16587ec681f3Smrg regions[nregions++] = device->physical->vram.region; 16597ec681f3Smrg regions[nregions++] = device->physical->sys.region; 16607ec681f3Smrg } else { 16617ec681f3Smrg regions[nregions++] = device->physical->sys.region; 16627ec681f3Smrg } 16637ec681f3Smrg 16647ec681f3Smrg gem_handle = anv_gem_create_regions(device, size + ccs_size, 16657ec681f3Smrg nregions, regions); 16667ec681f3Smrg } else { 16677ec681f3Smrg gem_handle = anv_gem_create(device, size + ccs_size); 166801e04c3fSmrg } 166901e04c3fSmrg 16707ec681f3Smrg if (gem_handle == 0) 16717ec681f3Smrg return vk_error(device, VK_ERROR_OUT_OF_DEVICE_MEMORY); 16727ec681f3Smrg 16737ec681f3Smrg struct anv_bo new_bo = { 16747ec681f3Smrg .name = name, 16757ec681f3Smrg .gem_handle = gem_handle, 16767ec681f3Smrg .refcount = 1, 16777ec681f3Smrg .offset = -1, 16787ec681f3Smrg .size = size, 16797ec681f3Smrg ._ccs_size = ccs_size, 16807ec681f3Smrg .flags = bo_flags, 16817ec681f3Smrg .is_external = (alloc_flags & ANV_BO_ALLOC_EXTERNAL), 16827ec681f3Smrg .has_client_visible_address = 16837ec681f3Smrg (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0, 16847ec681f3Smrg .has_implicit_ccs = ccs_size > 0, 16857ec681f3Smrg }; 168601e04c3fSmrg 16877ec681f3Smrg if (alloc_flags & ANV_BO_ALLOC_MAPPED) { 16887ec681f3Smrg new_bo.map = anv_gem_mmap(device, new_bo.gem_handle, 0, size, 0); 16897ec681f3Smrg if (new_bo.map == MAP_FAILED) { 16907ec681f3Smrg anv_gem_close(device, new_bo.gem_handle); 16917ec681f3Smrg return vk_errorf(device, VK_ERROR_OUT_OF_HOST_MEMORY, 16927ec681f3Smrg "mmap failed: %m"); 16937ec681f3Smrg } 16947ec681f3Smrg } 169501e04c3fSmrg 16967ec681f3Smrg if (alloc_flags & ANV_BO_ALLOC_SNOOPED) { 16977ec681f3Smrg assert(alloc_flags & ANV_BO_ALLOC_MAPPED); 16987ec681f3Smrg /* We don't want to change these defaults if it's going to be shared 16997ec681f3Smrg * with another process. 17007ec681f3Smrg */ 17017ec681f3Smrg assert(!(alloc_flags & ANV_BO_ALLOC_EXTERNAL)); 17027ec681f3Smrg 17037ec681f3Smrg /* Regular objects are created I915_CACHING_CACHED on LLC platforms and 17047ec681f3Smrg * I915_CACHING_NONE on non-LLC platforms. For many internal state 17057ec681f3Smrg * objects, we'd rather take the snooping overhead than risk forgetting 17067ec681f3Smrg * a CLFLUSH somewhere. Userptr objects are always created as 17077ec681f3Smrg * I915_CACHING_CACHED, which on non-LLC means snooped so there's no 17087ec681f3Smrg * need to do this there. 17097ec681f3Smrg */ 17107ec681f3Smrg if (!device->info.has_llc) { 17117ec681f3Smrg anv_gem_set_caching(device, new_bo.gem_handle, 17127ec681f3Smrg I915_CACHING_CACHED); 17137ec681f3Smrg } 17147ec681f3Smrg } 171501e04c3fSmrg 17167ec681f3Smrg if (alloc_flags & ANV_BO_ALLOC_FIXED_ADDRESS) { 17177ec681f3Smrg new_bo.has_fixed_address = true; 17187ec681f3Smrg new_bo.offset = explicit_address; 17197ec681f3Smrg } else if (new_bo.flags & EXEC_OBJECT_PINNED) { 17207ec681f3Smrg new_bo.offset = anv_vma_alloc(device, new_bo.size + new_bo._ccs_size, 17217ec681f3Smrg align, alloc_flags, explicit_address); 17227ec681f3Smrg if (new_bo.offset == 0) { 17237ec681f3Smrg if (new_bo.map) 17247ec681f3Smrg anv_gem_munmap(device, new_bo.map, size); 17257ec681f3Smrg anv_gem_close(device, new_bo.gem_handle); 17267ec681f3Smrg return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY, 17277ec681f3Smrg "failed to allocate virtual address for BO"); 17287ec681f3Smrg } 17297ec681f3Smrg } else { 17307ec681f3Smrg assert(!new_bo.has_client_visible_address); 17317ec681f3Smrg } 17327ec681f3Smrg 17337ec681f3Smrg if (new_bo._ccs_size > 0) { 17347ec681f3Smrg assert(device->info.has_aux_map); 17357ec681f3Smrg intel_aux_map_add_mapping(device->aux_map_ctx, 17367ec681f3Smrg intel_canonical_address(new_bo.offset), 17377ec681f3Smrg intel_canonical_address(new_bo.offset + new_bo.size), 17387ec681f3Smrg new_bo.size, 0 /* format_bits */); 17397ec681f3Smrg } 17407ec681f3Smrg 17417ec681f3Smrg assert(new_bo.gem_handle); 17427ec681f3Smrg 17437ec681f3Smrg /* If we just got this gem_handle from anv_bo_init_new then we know no one 17447ec681f3Smrg * else is touching this BO at the moment so we don't need to lock here. 17457ec681f3Smrg */ 17467ec681f3Smrg struct anv_bo *bo = anv_device_lookup_bo(device, new_bo.gem_handle); 17477ec681f3Smrg *bo = new_bo; 174801e04c3fSmrg 17497ec681f3Smrg *bo_out = bo; 175001e04c3fSmrg 175101e04c3fSmrg return VK_SUCCESS; 175201e04c3fSmrg} 175301e04c3fSmrg 17549f464c52SmayaVkResult 17557ec681f3Smrganv_device_import_bo_from_host_ptr(struct anv_device *device, 17567ec681f3Smrg void *host_ptr, uint32_t size, 17577ec681f3Smrg enum anv_bo_alloc_flags alloc_flags, 17587ec681f3Smrg uint64_t client_address, 17597ec681f3Smrg struct anv_bo **bo_out) 17609f464c52Smaya{ 17617ec681f3Smrg assert(!(alloc_flags & (ANV_BO_ALLOC_MAPPED | 17627ec681f3Smrg ANV_BO_ALLOC_SNOOPED | 17637ec681f3Smrg ANV_BO_ALLOC_FIXED_ADDRESS))); 17647ec681f3Smrg 17657ec681f3Smrg /* We can't do implicit CCS with an aux table on shared memory */ 17667ec681f3Smrg if (!device->physical->has_implicit_ccs || device->info.has_aux_map) 17677ec681f3Smrg assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)); 17687ec681f3Smrg 17697ec681f3Smrg struct anv_bo_cache *cache = &device->bo_cache; 17707ec681f3Smrg const uint32_t bo_flags = 17717ec681f3Smrg anv_bo_alloc_flags_to_bo_flags(device, alloc_flags); 17729f464c52Smaya assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS)); 17739f464c52Smaya 17749f464c52Smaya uint32_t gem_handle = anv_gem_userptr(device, host_ptr, size); 17759f464c52Smaya if (!gem_handle) 17767ec681f3Smrg return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); 17779f464c52Smaya 17789f464c52Smaya pthread_mutex_lock(&cache->mutex); 17799f464c52Smaya 17807ec681f3Smrg struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle); 17817ec681f3Smrg if (bo->refcount > 0) { 17829f464c52Smaya /* VK_EXT_external_memory_host doesn't require handling importing the 17839f464c52Smaya * same pointer twice at the same time, but we don't get in the way. If 17849f464c52Smaya * kernel gives us the same gem_handle, only succeed if the flags match. 17859f464c52Smaya */ 17867ec681f3Smrg assert(bo->gem_handle == gem_handle); 17877ec681f3Smrg if (bo_flags != bo->flags) { 17889f464c52Smaya pthread_mutex_unlock(&cache->mutex); 17897ec681f3Smrg return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, 17909f464c52Smaya "same host pointer imported two different ways"); 17919f464c52Smaya } 17927ec681f3Smrg 17937ec681f3Smrg if (bo->has_client_visible_address != 17947ec681f3Smrg ((alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0)) { 17959f464c52Smaya pthread_mutex_unlock(&cache->mutex); 17967ec681f3Smrg return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, 17977ec681f3Smrg "The same BO was imported with and without buffer " 17987ec681f3Smrg "device address"); 17999f464c52Smaya } 18009f464c52Smaya 18017ec681f3Smrg if (client_address && client_address != intel_48b_address(bo->offset)) { 18029f464c52Smaya pthread_mutex_unlock(&cache->mutex); 18037ec681f3Smrg return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, 18047ec681f3Smrg "The same BO was imported at two different " 18057ec681f3Smrg "addresses"); 18069f464c52Smaya } 18079f464c52Smaya 18087ec681f3Smrg __sync_fetch_and_add(&bo->refcount, 1); 18097ec681f3Smrg } else { 18107ec681f3Smrg struct anv_bo new_bo = { 18117ec681f3Smrg .name = "host-ptr", 18127ec681f3Smrg .gem_handle = gem_handle, 18137ec681f3Smrg .refcount = 1, 18147ec681f3Smrg .offset = -1, 18157ec681f3Smrg .size = size, 18167ec681f3Smrg .map = host_ptr, 18177ec681f3Smrg .flags = bo_flags, 18187ec681f3Smrg .is_external = true, 18197ec681f3Smrg .from_host_ptr = true, 18207ec681f3Smrg .has_client_visible_address = 18217ec681f3Smrg (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0, 18227ec681f3Smrg }; 18237ec681f3Smrg 18247ec681f3Smrg assert(client_address == intel_48b_address(client_address)); 18257ec681f3Smrg if (new_bo.flags & EXEC_OBJECT_PINNED) { 18267ec681f3Smrg assert(new_bo._ccs_size == 0); 18277ec681f3Smrg new_bo.offset = anv_vma_alloc(device, new_bo.size, 18287ec681f3Smrg anv_device_get_bo_align(device, 18297ec681f3Smrg alloc_flags), 18307ec681f3Smrg alloc_flags, client_address); 18317ec681f3Smrg if (new_bo.offset == 0) { 18327ec681f3Smrg anv_gem_close(device, new_bo.gem_handle); 18337ec681f3Smrg pthread_mutex_unlock(&cache->mutex); 18347ec681f3Smrg return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY, 18357ec681f3Smrg "failed to allocate virtual address for BO"); 18367ec681f3Smrg } 18377ec681f3Smrg } else { 18387ec681f3Smrg assert(!new_bo.has_client_visible_address); 18397ec681f3Smrg } 18407ec681f3Smrg 18417ec681f3Smrg *bo = new_bo; 18429f464c52Smaya } 18439f464c52Smaya 18449f464c52Smaya pthread_mutex_unlock(&cache->mutex); 18457ec681f3Smrg *bo_out = bo; 18469f464c52Smaya 18479f464c52Smaya return VK_SUCCESS; 18489f464c52Smaya} 18499f464c52Smaya 185001e04c3fSmrgVkResult 18517ec681f3Smrganv_device_import_bo(struct anv_device *device, 18527ec681f3Smrg int fd, 18537ec681f3Smrg enum anv_bo_alloc_flags alloc_flags, 18547ec681f3Smrg uint64_t client_address, 18557ec681f3Smrg struct anv_bo **bo_out) 185601e04c3fSmrg{ 18577ec681f3Smrg assert(!(alloc_flags & (ANV_BO_ALLOC_MAPPED | 18587ec681f3Smrg ANV_BO_ALLOC_SNOOPED | 18597ec681f3Smrg ANV_BO_ALLOC_FIXED_ADDRESS))); 18607ec681f3Smrg 18617ec681f3Smrg /* We can't do implicit CCS with an aux table on shared memory */ 18627ec681f3Smrg if (!device->physical->has_implicit_ccs || device->info.has_aux_map) 18637ec681f3Smrg assert(!(alloc_flags & ANV_BO_ALLOC_IMPLICIT_CCS)); 18647ec681f3Smrg 18657ec681f3Smrg struct anv_bo_cache *cache = &device->bo_cache; 18667ec681f3Smrg const uint32_t bo_flags = 18677ec681f3Smrg anv_bo_alloc_flags_to_bo_flags(device, alloc_flags); 186801e04c3fSmrg assert(bo_flags == (bo_flags & ANV_BO_CACHE_SUPPORTED_FLAGS)); 186901e04c3fSmrg 187001e04c3fSmrg pthread_mutex_lock(&cache->mutex); 187101e04c3fSmrg 187201e04c3fSmrg uint32_t gem_handle = anv_gem_fd_to_handle(device, fd); 187301e04c3fSmrg if (!gem_handle) { 187401e04c3fSmrg pthread_mutex_unlock(&cache->mutex); 18757ec681f3Smrg return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); 187601e04c3fSmrg } 187701e04c3fSmrg 18787ec681f3Smrg struct anv_bo *bo = anv_device_lookup_bo(device, gem_handle); 18797ec681f3Smrg if (bo->refcount > 0) { 188001e04c3fSmrg /* We have to be careful how we combine flags so that it makes sense. 188101e04c3fSmrg * Really, though, if we get to this case and it actually matters, the 188201e04c3fSmrg * client has imported a BO twice in different ways and they get what 188301e04c3fSmrg * they have coming. 188401e04c3fSmrg */ 18857ec681f3Smrg uint64_t new_flags = 0; 18867ec681f3Smrg new_flags |= (bo->flags | bo_flags) & EXEC_OBJECT_WRITE; 18877ec681f3Smrg new_flags |= (bo->flags & bo_flags) & EXEC_OBJECT_ASYNC; 18887ec681f3Smrg new_flags |= (bo->flags & bo_flags) & EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 18897ec681f3Smrg new_flags |= (bo->flags | bo_flags) & EXEC_OBJECT_PINNED; 18907ec681f3Smrg new_flags |= (bo->flags | bo_flags) & EXEC_OBJECT_CAPTURE; 189101e04c3fSmrg 189201e04c3fSmrg /* It's theoretically possible for a BO to get imported such that it's 189301e04c3fSmrg * both pinned and not pinned. The only way this can happen is if it 189401e04c3fSmrg * gets imported as both a semaphore and a memory object and that would 189501e04c3fSmrg * be an application error. Just fail out in that case. 189601e04c3fSmrg */ 18977ec681f3Smrg if ((bo->flags & EXEC_OBJECT_PINNED) != 189801e04c3fSmrg (bo_flags & EXEC_OBJECT_PINNED)) { 189901e04c3fSmrg pthread_mutex_unlock(&cache->mutex); 19007ec681f3Smrg return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, 190101e04c3fSmrg "The same BO was imported two different ways"); 190201e04c3fSmrg } 190301e04c3fSmrg 190401e04c3fSmrg /* It's also theoretically possible that someone could export a BO from 190501e04c3fSmrg * one heap and import it into another or to import the same BO into two 190601e04c3fSmrg * different heaps. If this happens, we could potentially end up both 190701e04c3fSmrg * allowing and disallowing 48-bit addresses. There's not much we can 190801e04c3fSmrg * do about it if we're pinning so we just throw an error and hope no 190901e04c3fSmrg * app is actually that stupid. 191001e04c3fSmrg */ 191101e04c3fSmrg if ((new_flags & EXEC_OBJECT_PINNED) && 19127ec681f3Smrg (bo->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) != 191301e04c3fSmrg (bo_flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS)) { 191401e04c3fSmrg pthread_mutex_unlock(&cache->mutex); 19157ec681f3Smrg return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, 191601e04c3fSmrg "The same BO was imported on two different heaps"); 191701e04c3fSmrg } 191801e04c3fSmrg 19197ec681f3Smrg if (bo->has_client_visible_address != 19207ec681f3Smrg ((alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0)) { 19217ec681f3Smrg pthread_mutex_unlock(&cache->mutex); 19227ec681f3Smrg return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, 19237ec681f3Smrg "The same BO was imported with and without buffer " 19247ec681f3Smrg "device address"); 19257ec681f3Smrg } 19267ec681f3Smrg 19277ec681f3Smrg if (client_address && client_address != intel_48b_address(bo->offset)) { 19287ec681f3Smrg pthread_mutex_unlock(&cache->mutex); 19297ec681f3Smrg return vk_errorf(device, VK_ERROR_INVALID_EXTERNAL_HANDLE, 19307ec681f3Smrg "The same BO was imported at two different " 19317ec681f3Smrg "addresses"); 19327ec681f3Smrg } 19337ec681f3Smrg 19347ec681f3Smrg bo->flags = new_flags; 193501e04c3fSmrg 193601e04c3fSmrg __sync_fetch_and_add(&bo->refcount, 1); 193701e04c3fSmrg } else { 193801e04c3fSmrg off_t size = lseek(fd, 0, SEEK_END); 193901e04c3fSmrg if (size == (off_t)-1) { 194001e04c3fSmrg anv_gem_close(device, gem_handle); 194101e04c3fSmrg pthread_mutex_unlock(&cache->mutex); 19427ec681f3Smrg return vk_error(device, VK_ERROR_INVALID_EXTERNAL_HANDLE); 194301e04c3fSmrg } 194401e04c3fSmrg 19457ec681f3Smrg struct anv_bo new_bo = { 19467ec681f3Smrg .name = "imported", 19477ec681f3Smrg .gem_handle = gem_handle, 19487ec681f3Smrg .refcount = 1, 19497ec681f3Smrg .offset = -1, 19507ec681f3Smrg .size = size, 19517ec681f3Smrg .flags = bo_flags, 19527ec681f3Smrg .is_external = true, 19537ec681f3Smrg .has_client_visible_address = 19547ec681f3Smrg (alloc_flags & ANV_BO_ALLOC_CLIENT_VISIBLE_ADDRESS) != 0, 19557ec681f3Smrg }; 19567ec681f3Smrg 19577ec681f3Smrg assert(client_address == intel_48b_address(client_address)); 19587ec681f3Smrg if (new_bo.flags & EXEC_OBJECT_PINNED) { 19597ec681f3Smrg assert(new_bo._ccs_size == 0); 19607ec681f3Smrg new_bo.offset = anv_vma_alloc(device, new_bo.size, 19617ec681f3Smrg anv_device_get_bo_align(device, 19627ec681f3Smrg alloc_flags), 19637ec681f3Smrg alloc_flags, client_address); 19647ec681f3Smrg if (new_bo.offset == 0) { 19657ec681f3Smrg anv_gem_close(device, new_bo.gem_handle); 19667ec681f3Smrg pthread_mutex_unlock(&cache->mutex); 19677ec681f3Smrg return vk_errorf(device, VK_ERROR_OUT_OF_DEVICE_MEMORY, 19687ec681f3Smrg "failed to allocate virtual address for BO"); 19697ec681f3Smrg } 19707ec681f3Smrg } else { 19717ec681f3Smrg assert(!new_bo.has_client_visible_address); 197201e04c3fSmrg } 197301e04c3fSmrg 19747ec681f3Smrg *bo = new_bo; 197501e04c3fSmrg } 197601e04c3fSmrg 197701e04c3fSmrg pthread_mutex_unlock(&cache->mutex); 19787ec681f3Smrg *bo_out = bo; 197901e04c3fSmrg 198001e04c3fSmrg return VK_SUCCESS; 198101e04c3fSmrg} 198201e04c3fSmrg 198301e04c3fSmrgVkResult 19847ec681f3Smrganv_device_export_bo(struct anv_device *device, 19857ec681f3Smrg struct anv_bo *bo, int *fd_out) 198601e04c3fSmrg{ 19877ec681f3Smrg assert(anv_device_lookup_bo(device, bo->gem_handle) == bo); 198801e04c3fSmrg 198901e04c3fSmrg /* This BO must have been flagged external in order for us to be able 199001e04c3fSmrg * to export it. This is done based on external options passed into 199101e04c3fSmrg * anv_AllocateMemory. 199201e04c3fSmrg */ 19937ec681f3Smrg assert(bo->is_external); 199401e04c3fSmrg 19957ec681f3Smrg int fd = anv_gem_handle_to_fd(device, bo->gem_handle); 199601e04c3fSmrg if (fd < 0) 19977ec681f3Smrg return vk_error(device, VK_ERROR_TOO_MANY_OBJECTS); 199801e04c3fSmrg 199901e04c3fSmrg *fd_out = fd; 200001e04c3fSmrg 200101e04c3fSmrg return VK_SUCCESS; 200201e04c3fSmrg} 200301e04c3fSmrg 200401e04c3fSmrgstatic bool 200501e04c3fSmrgatomic_dec_not_one(uint32_t *counter) 200601e04c3fSmrg{ 200701e04c3fSmrg uint32_t old, val; 200801e04c3fSmrg 200901e04c3fSmrg val = *counter; 201001e04c3fSmrg while (1) { 201101e04c3fSmrg if (val == 1) 201201e04c3fSmrg return false; 201301e04c3fSmrg 201401e04c3fSmrg old = __sync_val_compare_and_swap(counter, val, val - 1); 201501e04c3fSmrg if (old == val) 201601e04c3fSmrg return true; 201701e04c3fSmrg 201801e04c3fSmrg val = old; 201901e04c3fSmrg } 202001e04c3fSmrg} 202101e04c3fSmrg 202201e04c3fSmrgvoid 20237ec681f3Smrganv_device_release_bo(struct anv_device *device, 20247ec681f3Smrg struct anv_bo *bo) 202501e04c3fSmrg{ 20267ec681f3Smrg struct anv_bo_cache *cache = &device->bo_cache; 20277ec681f3Smrg assert(anv_device_lookup_bo(device, bo->gem_handle) == bo); 202801e04c3fSmrg 202901e04c3fSmrg /* Try to decrement the counter but don't go below one. If this succeeds 203001e04c3fSmrg * then the refcount has been decremented and we are not the last 203101e04c3fSmrg * reference. 203201e04c3fSmrg */ 203301e04c3fSmrg if (atomic_dec_not_one(&bo->refcount)) 203401e04c3fSmrg return; 203501e04c3fSmrg 203601e04c3fSmrg pthread_mutex_lock(&cache->mutex); 203701e04c3fSmrg 203801e04c3fSmrg /* We are probably the last reference since our attempt to decrement above 203901e04c3fSmrg * failed. However, we can't actually know until we are inside the mutex. 204001e04c3fSmrg * Otherwise, someone could import the BO between the decrement and our 204101e04c3fSmrg * taking the mutex. 204201e04c3fSmrg */ 204301e04c3fSmrg if (unlikely(__sync_sub_and_fetch(&bo->refcount, 1) > 0)) { 204401e04c3fSmrg /* Turns out we're not the last reference. Unlock and bail. */ 204501e04c3fSmrg pthread_mutex_unlock(&cache->mutex); 204601e04c3fSmrg return; 204701e04c3fSmrg } 20487ec681f3Smrg assert(bo->refcount == 0); 20497ec681f3Smrg 20507ec681f3Smrg if (bo->map && !bo->from_host_ptr) 20517ec681f3Smrg anv_gem_munmap(device, bo->map, bo->size); 20527ec681f3Smrg 20537ec681f3Smrg if (bo->_ccs_size > 0) { 20547ec681f3Smrg assert(device->physical->has_implicit_ccs); 20557ec681f3Smrg assert(device->info.has_aux_map); 20567ec681f3Smrg assert(bo->has_implicit_ccs); 20577ec681f3Smrg intel_aux_map_unmap_range(device->aux_map_ctx, 20587ec681f3Smrg intel_canonical_address(bo->offset), 20597ec681f3Smrg bo->size); 20607ec681f3Smrg } 206101e04c3fSmrg 20627ec681f3Smrg if ((bo->flags & EXEC_OBJECT_PINNED) && !bo->has_fixed_address) 20637ec681f3Smrg anv_vma_free(device, bo->offset, bo->size + bo->_ccs_size); 206401e04c3fSmrg 20657ec681f3Smrg uint32_t gem_handle = bo->gem_handle; 206601e04c3fSmrg 20677ec681f3Smrg /* Memset the BO just in case. The refcount being zero should be enough to 20687ec681f3Smrg * prevent someone from assuming the data is valid but it's safer to just 20697ec681f3Smrg * stomp to zero just in case. We explicitly do this *before* we close the 20707ec681f3Smrg * GEM handle to ensure that if anyone allocates something and gets the 20717ec681f3Smrg * same GEM handle, the memset has already happen and won't stomp all over 20727ec681f3Smrg * any data they may write in this BO. 20737ec681f3Smrg */ 20747ec681f3Smrg memset(bo, 0, sizeof(*bo)); 207501e04c3fSmrg 20767ec681f3Smrg anv_gem_close(device, gem_handle); 207701e04c3fSmrg 207801e04c3fSmrg /* Don't unlock until we've actually closed the BO. The whole point of 207901e04c3fSmrg * the BO cache is to ensure that we correctly handle races with creating 208001e04c3fSmrg * and releasing GEM handles and we don't want to let someone import the BO 208101e04c3fSmrg * again between mutex unlock and closing the GEM handle. 208201e04c3fSmrg */ 208301e04c3fSmrg pthread_mutex_unlock(&cache->mutex); 208401e04c3fSmrg} 2085