19f464c52Smaya/* 29f464c52Smaya * Copyright © 2017 Intel Corporation 39f464c52Smaya * 49f464c52Smaya * Permission is hereby granted, free of charge, to any person obtaining a 59f464c52Smaya * copy of this software and associated documentation files (the "Software"), 69f464c52Smaya * to deal in the Software without restriction, including without limitation 79f464c52Smaya * the rights to use, copy, modify, merge, publish, distribute, sublicense, 89f464c52Smaya * and/or sell copies of the Software, and to permit persons to whom the 99f464c52Smaya * Software is furnished to do so, subject to the following conditions: 109f464c52Smaya * 119f464c52Smaya * The above copyright notice and this permission notice shall be included 129f464c52Smaya * in all copies or substantial portions of the Software. 139f464c52Smaya * 149f464c52Smaya * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 159f464c52Smaya * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 169f464c52Smaya * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 179f464c52Smaya * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 189f464c52Smaya * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 199f464c52Smaya * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 209f464c52Smaya * DEALINGS IN THE SOFTWARE. 219f464c52Smaya */ 229f464c52Smaya 239f464c52Smaya/** 249f464c52Smaya * @file iris_bufmgr.c 259f464c52Smaya * 269f464c52Smaya * The Iris buffer manager. 279f464c52Smaya * 289f464c52Smaya * XXX: write better comments 299f464c52Smaya * - BOs 309f464c52Smaya * - Explain BO cache 319f464c52Smaya * - main interface to GEM in the kernel 329f464c52Smaya */ 339f464c52Smaya 349f464c52Smaya#include <xf86drm.h> 359f464c52Smaya#include <util/u_atomic.h> 369f464c52Smaya#include <fcntl.h> 379f464c52Smaya#include <stdio.h> 389f464c52Smaya#include <stdlib.h> 399f464c52Smaya#include <string.h> 409f464c52Smaya#include <unistd.h> 419f464c52Smaya#include <assert.h> 429f464c52Smaya#include <sys/ioctl.h> 439f464c52Smaya#include <sys/mman.h> 449f464c52Smaya#include <sys/stat.h> 459f464c52Smaya#include <sys/types.h> 469f464c52Smaya#include <stdbool.h> 479f464c52Smaya#include <time.h> 487ec681f3Smrg#include <unistd.h> 499f464c52Smaya 509f464c52Smaya#include "errno.h" 517ec681f3Smrg#include "common/intel_aux_map.h" 527ec681f3Smrg#include "common/intel_clflush.h" 537ec681f3Smrg#include "dev/intel_debug.h" 547ec681f3Smrg#include "common/intel_gem.h" 557ec681f3Smrg#include "dev/intel_device_info.h" 567ec681f3Smrg#include "isl/isl.h" 579f464c52Smaya#include "main/macros.h" 587ec681f3Smrg#include "os/os_mman.h" 599f464c52Smaya#include "util/debug.h" 609f464c52Smaya#include "util/macros.h" 619f464c52Smaya#include "util/hash_table.h" 629f464c52Smaya#include "util/list.h" 637ec681f3Smrg#include "util/os_file.h" 649f464c52Smaya#include "util/u_dynarray.h" 659f464c52Smaya#include "util/vma.h" 669f464c52Smaya#include "iris_bufmgr.h" 679f464c52Smaya#include "iris_context.h" 689f464c52Smaya#include "string.h" 699f464c52Smaya 709f464c52Smaya#include "drm-uapi/i915_drm.h" 719f464c52Smaya 729f464c52Smaya#ifdef HAVE_VALGRIND 739f464c52Smaya#include <valgrind.h> 749f464c52Smaya#include <memcheck.h> 759f464c52Smaya#define VG(x) x 769f464c52Smaya#else 779f464c52Smaya#define VG(x) 789f464c52Smaya#endif 799f464c52Smaya 809f464c52Smaya/* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier 819f464c52Smaya * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is 829f464c52Smaya * leaked. All because it does not call VG(cli_free) from its 839f464c52Smaya * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like 849f464c52Smaya * and allocation, we mark it available for use upon mmapping and remove 859f464c52Smaya * it upon unmapping. 869f464c52Smaya */ 879f464c52Smaya#define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size)) 889f464c52Smaya#define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size)) 899f464c52Smaya 907ec681f3Smrg/* On FreeBSD PAGE_SIZE is already defined in 917ec681f3Smrg * /usr/include/machine/param.h that is indirectly 927ec681f3Smrg * included here. 937ec681f3Smrg */ 947ec681f3Smrg#ifndef PAGE_SIZE 959f464c52Smaya#define PAGE_SIZE 4096 967ec681f3Smrg#endif 977ec681f3Smrg 987ec681f3Smrg#define WARN_ONCE(cond, fmt...) do { \ 997ec681f3Smrg if (unlikely(cond)) { \ 1007ec681f3Smrg static bool _warned = false; \ 1017ec681f3Smrg if (!_warned) { \ 1027ec681f3Smrg fprintf(stderr, "WARNING: "); \ 1037ec681f3Smrg fprintf(stderr, fmt); \ 1047ec681f3Smrg _warned = true; \ 1057ec681f3Smrg } \ 1067ec681f3Smrg } \ 1077ec681f3Smrg} while (0) 1089f464c52Smaya 1099f464c52Smaya#define FILE_DEBUG_FLAG DEBUG_BUFMGR 1109f464c52Smaya 1119f464c52Smaya/** 1127ec681f3Smrg * For debugging purposes, this returns a time in seconds. 1139f464c52Smaya */ 1147ec681f3Smrgstatic double 1157ec681f3Smrgget_time(void) 1169f464c52Smaya{ 1177ec681f3Smrg struct timespec tp; 1189f464c52Smaya 1197ec681f3Smrg clock_gettime(CLOCK_MONOTONIC, &tp); 1207ec681f3Smrg 1217ec681f3Smrg return tp.tv_sec + tp.tv_nsec / 1000000000.0; 1229f464c52Smaya} 1239f464c52Smaya 1249f464c52Smayastatic inline int 1259f464c52Smayaatomic_add_unless(int *v, int add, int unless) 1269f464c52Smaya{ 1279f464c52Smaya int c, old; 1289f464c52Smaya c = p_atomic_read(v); 1299f464c52Smaya while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c) 1309f464c52Smaya c = old; 1319f464c52Smaya return c == unless; 1329f464c52Smaya} 1339f464c52Smaya 1349f464c52Smayastatic const char * 1359f464c52Smayamemzone_name(enum iris_memory_zone memzone) 1369f464c52Smaya{ 1379f464c52Smaya const char *names[] = { 1387ec681f3Smrg [IRIS_MEMZONE_SHADER] = "shader", 1397ec681f3Smrg [IRIS_MEMZONE_BINDER] = "binder", 1407ec681f3Smrg [IRIS_MEMZONE_BINDLESS] = "scratchsurf", 1417ec681f3Smrg [IRIS_MEMZONE_SURFACE] = "surface", 1427ec681f3Smrg [IRIS_MEMZONE_DYNAMIC] = "dynamic", 1437ec681f3Smrg [IRIS_MEMZONE_OTHER] = "other", 1449f464c52Smaya [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor", 1459f464c52Smaya }; 1469f464c52Smaya assert(memzone < ARRAY_SIZE(names)); 1479f464c52Smaya return names[memzone]; 1489f464c52Smaya} 1499f464c52Smaya 1509f464c52Smayastruct bo_cache_bucket { 1519f464c52Smaya /** List of cached BOs. */ 1529f464c52Smaya struct list_head head; 1539f464c52Smaya 1549f464c52Smaya /** Size of this bucket, in bytes. */ 1559f464c52Smaya uint64_t size; 1569f464c52Smaya}; 1579f464c52Smaya 1587ec681f3Smrgstruct bo_export { 1597ec681f3Smrg /** File descriptor associated with a handle export. */ 1607ec681f3Smrg int drm_fd; 1617ec681f3Smrg 1627ec681f3Smrg /** GEM handle in drm_fd */ 1637ec681f3Smrg uint32_t gem_handle; 1647ec681f3Smrg 1657ec681f3Smrg struct list_head link; 1667ec681f3Smrg}; 1677ec681f3Smrg 1687ec681f3Smrgstruct iris_memregion { 1697ec681f3Smrg struct drm_i915_gem_memory_class_instance region; 1707ec681f3Smrg uint64_t size; 1717ec681f3Smrg}; 1727ec681f3Smrg 1737ec681f3Smrg#define NUM_SLAB_ALLOCATORS 3 1747ec681f3Smrg 1757ec681f3Smrgenum iris_heap { 1767ec681f3Smrg IRIS_HEAP_SYSTEM_MEMORY, 1777ec681f3Smrg IRIS_HEAP_DEVICE_LOCAL, 1787ec681f3Smrg IRIS_HEAP_MAX, 1797ec681f3Smrg}; 1807ec681f3Smrg 1817ec681f3Smrgstruct iris_slab { 1827ec681f3Smrg struct pb_slab base; 1837ec681f3Smrg 1847ec681f3Smrg unsigned entry_size; 1857ec681f3Smrg 1867ec681f3Smrg /** The BO representing the entire slab */ 1877ec681f3Smrg struct iris_bo *bo; 1887ec681f3Smrg 1897ec681f3Smrg /** Array of iris_bo structs representing BOs allocated out of this slab */ 1907ec681f3Smrg struct iris_bo *entries; 1917ec681f3Smrg}; 1927ec681f3Smrg 1939f464c52Smayastruct iris_bufmgr { 1947ec681f3Smrg /** 1957ec681f3Smrg * List into the list of bufmgr. 1967ec681f3Smrg */ 1977ec681f3Smrg struct list_head link; 1987ec681f3Smrg 1997ec681f3Smrg uint32_t refcount; 2007ec681f3Smrg 2019f464c52Smaya int fd; 2029f464c52Smaya 2037ec681f3Smrg simple_mtx_t lock; 2047ec681f3Smrg simple_mtx_t bo_deps_lock; 2059f464c52Smaya 2069f464c52Smaya /** Array of lists of cached gem objects of power-of-two sizes */ 2079f464c52Smaya struct bo_cache_bucket cache_bucket[14 * 4]; 2089f464c52Smaya int num_buckets; 2097ec681f3Smrg 2107ec681f3Smrg /** Same as cache_bucket, but for local memory gem objects */ 2117ec681f3Smrg struct bo_cache_bucket local_cache_bucket[14 * 4]; 2127ec681f3Smrg int num_local_buckets; 2137ec681f3Smrg 2149f464c52Smaya time_t time; 2159f464c52Smaya 2169f464c52Smaya struct hash_table *name_table; 2179f464c52Smaya struct hash_table *handle_table; 2189f464c52Smaya 2197ec681f3Smrg /** 2207ec681f3Smrg * List of BOs which we've effectively freed, but are hanging on to 2217ec681f3Smrg * until they're idle before closing and returning the VMA. 2227ec681f3Smrg */ 2237ec681f3Smrg struct list_head zombie_list; 2247ec681f3Smrg 2259f464c52Smaya struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT]; 2269f464c52Smaya 2277ec681f3Smrg uint64_t vma_min_align; 2287ec681f3Smrg struct iris_memregion vram, sys; 2297ec681f3Smrg 2307ec681f3Smrg int next_screen_id; 2317ec681f3Smrg 2329f464c52Smaya bool has_llc:1; 2337ec681f3Smrg bool has_local_mem:1; 2347ec681f3Smrg bool has_mmap_offset:1; 2357ec681f3Smrg bool has_tiling_uapi:1; 2367ec681f3Smrg bool has_userptr_probe:1; 2379f464c52Smaya bool bo_reuse:1; 2389f464c52Smaya 2397ec681f3Smrg struct intel_aux_map_context *aux_map_ctx; 2409f464c52Smaya 2417ec681f3Smrg struct pb_slabs bo_slabs[NUM_SLAB_ALLOCATORS]; 2427ec681f3Smrg}; 2439f464c52Smaya 2447ec681f3Smrgstatic simple_mtx_t global_bufmgr_list_mutex = _SIMPLE_MTX_INITIALIZER_NP; 2457ec681f3Smrgstatic struct list_head global_bufmgr_list = { 2467ec681f3Smrg .next = &global_bufmgr_list, 2477ec681f3Smrg .prev = &global_bufmgr_list, 2487ec681f3Smrg}; 2499f464c52Smaya 2507ec681f3Smrgstatic void bo_free(struct iris_bo *bo); 2519f464c52Smaya 2529f464c52Smayastatic struct iris_bo * 2537ec681f3Smrgfind_and_ref_external_bo(struct hash_table *ht, unsigned int key) 2549f464c52Smaya{ 2559f464c52Smaya struct hash_entry *entry = _mesa_hash_table_search(ht, &key); 2567ec681f3Smrg struct iris_bo *bo = entry ? entry->data : NULL; 2577ec681f3Smrg 2587ec681f3Smrg if (bo) { 2597ec681f3Smrg assert(iris_bo_is_external(bo)); 2607ec681f3Smrg assert(iris_bo_is_real(bo)); 2617ec681f3Smrg assert(!bo->real.reusable); 2627ec681f3Smrg 2637ec681f3Smrg /* Being non-reusable, the BO cannot be in the cache lists, but it 2647ec681f3Smrg * may be in the zombie list if it had reached zero references, but 2657ec681f3Smrg * we hadn't yet closed it...and then reimported the same BO. If it 2667ec681f3Smrg * is, then remove it since it's now been resurrected. 2677ec681f3Smrg */ 2687ec681f3Smrg if (list_is_linked(&bo->head)) 2697ec681f3Smrg list_del(&bo->head); 2707ec681f3Smrg 2717ec681f3Smrg iris_bo_reference(bo); 2727ec681f3Smrg } 2737ec681f3Smrg 2747ec681f3Smrg return bo; 2759f464c52Smaya} 2769f464c52Smaya 2779f464c52Smaya/** 2789f464c52Smaya * This function finds the correct bucket fit for the input size. 2799f464c52Smaya * The function works with O(1) complexity when the requested size 2809f464c52Smaya * was queried instead of iterating the size through all the buckets. 2819f464c52Smaya */ 2829f464c52Smayastatic struct bo_cache_bucket * 2837ec681f3Smrgbucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size, bool local) 2849f464c52Smaya{ 2859f464c52Smaya /* Calculating the pages and rounding up to the page size. */ 2869f464c52Smaya const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; 2879f464c52Smaya 2889f464c52Smaya /* Row Bucket sizes clz((x-1) | 3) Row Column 2899f464c52Smaya * in pages stride size 2909f464c52Smaya * 0: 1 2 3 4 -> 30 30 30 30 4 1 2919f464c52Smaya * 1: 5 6 7 8 -> 29 29 29 29 4 1 2929f464c52Smaya * 2: 10 12 14 16 -> 28 28 28 28 8 2 2939f464c52Smaya * 3: 20 24 28 32 -> 27 27 27 27 16 4 2949f464c52Smaya */ 2959f464c52Smaya const unsigned row = 30 - __builtin_clz((pages - 1) | 3); 2969f464c52Smaya const unsigned row_max_pages = 4 << row; 2979f464c52Smaya 2989f464c52Smaya /* The '& ~2' is the special case for row 1. In row 1, max pages / 2999f464c52Smaya * 2 is 2, but the previous row maximum is zero (because there is 3009f464c52Smaya * no previous row). All row maximum sizes are power of 2, so that 3019f464c52Smaya * is the only case where that bit will be set. 3029f464c52Smaya */ 3039f464c52Smaya const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2; 3049f464c52Smaya int col_size_log2 = row - 1; 3059f464c52Smaya col_size_log2 += (col_size_log2 < 0); 3069f464c52Smaya 3079f464c52Smaya const unsigned col = (pages - prev_row_max_pages + 3089f464c52Smaya ((1 << col_size_log2) - 1)) >> col_size_log2; 3099f464c52Smaya 3109f464c52Smaya /* Calculating the index based on the row and column. */ 3119f464c52Smaya const unsigned index = (row * 4) + (col - 1); 3129f464c52Smaya 3137ec681f3Smrg int num_buckets = local ? bufmgr->num_local_buckets : bufmgr->num_buckets; 3147ec681f3Smrg struct bo_cache_bucket *buckets = local ? 3157ec681f3Smrg bufmgr->local_cache_bucket : bufmgr->cache_bucket; 3167ec681f3Smrg 3177ec681f3Smrg return (index < num_buckets) ? &buckets[index] : NULL; 3189f464c52Smaya} 3199f464c52Smaya 3209f464c52Smayaenum iris_memory_zone 3219f464c52Smayairis_memzone_for_address(uint64_t address) 3229f464c52Smaya{ 3237ec681f3Smrg STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START); 3247ec681f3Smrg STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START); 3257ec681f3Smrg STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_BINDLESS_START); 3267ec681f3Smrg STATIC_ASSERT(IRIS_MEMZONE_BINDLESS_START > IRIS_MEMZONE_BINDER_START); 3277ec681f3Smrg STATIC_ASSERT(IRIS_MEMZONE_BINDER_START > IRIS_MEMZONE_SHADER_START); 3289f464c52Smaya STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START); 3299f464c52Smaya 3309f464c52Smaya if (address >= IRIS_MEMZONE_OTHER_START) 3319f464c52Smaya return IRIS_MEMZONE_OTHER; 3329f464c52Smaya 3339f464c52Smaya if (address == IRIS_BORDER_COLOR_POOL_ADDRESS) 3349f464c52Smaya return IRIS_MEMZONE_BORDER_COLOR_POOL; 3359f464c52Smaya 3369f464c52Smaya if (address > IRIS_MEMZONE_DYNAMIC_START) 3379f464c52Smaya return IRIS_MEMZONE_DYNAMIC; 3389f464c52Smaya 3399f464c52Smaya if (address >= IRIS_MEMZONE_SURFACE_START) 3409f464c52Smaya return IRIS_MEMZONE_SURFACE; 3419f464c52Smaya 3427ec681f3Smrg if (address >= IRIS_MEMZONE_BINDLESS_START) 3437ec681f3Smrg return IRIS_MEMZONE_BINDLESS; 3447ec681f3Smrg 3459f464c52Smaya if (address >= IRIS_MEMZONE_BINDER_START) 3469f464c52Smaya return IRIS_MEMZONE_BINDER; 3479f464c52Smaya 3489f464c52Smaya return IRIS_MEMZONE_SHADER; 3499f464c52Smaya} 3509f464c52Smaya 3519f464c52Smaya/** 3529f464c52Smaya * Allocate a section of virtual memory for a buffer, assigning an address. 3539f464c52Smaya * 3549f464c52Smaya * This uses either the bucket allocator for the given size, or the large 3559f464c52Smaya * object allocator (util_vma). 3569f464c52Smaya */ 3579f464c52Smayastatic uint64_t 3589f464c52Smayavma_alloc(struct iris_bufmgr *bufmgr, 3599f464c52Smaya enum iris_memory_zone memzone, 3609f464c52Smaya uint64_t size, 3619f464c52Smaya uint64_t alignment) 3629f464c52Smaya{ 3637ec681f3Smrg /* Force minimum alignment based on device requirements */ 3647ec681f3Smrg assert((alignment & (alignment - 1)) == 0); 3657ec681f3Smrg alignment = MAX2(alignment, bufmgr->vma_min_align); 3669f464c52Smaya 3679f464c52Smaya if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL) 3689f464c52Smaya return IRIS_BORDER_COLOR_POOL_ADDRESS; 3699f464c52Smaya 3709f464c52Smaya /* The binder handles its own allocations. Return non-zero here. */ 3719f464c52Smaya if (memzone == IRIS_MEMZONE_BINDER) 3729f464c52Smaya return IRIS_MEMZONE_BINDER_START; 3739f464c52Smaya 3749f464c52Smaya uint64_t addr = 3759f464c52Smaya util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment); 3769f464c52Smaya 3779f464c52Smaya assert((addr >> 48ull) == 0); 3789f464c52Smaya assert((addr % alignment) == 0); 3799f464c52Smaya 3807ec681f3Smrg return intel_canonical_address(addr); 3819f464c52Smaya} 3829f464c52Smaya 3839f464c52Smayastatic void 3849f464c52Smayavma_free(struct iris_bufmgr *bufmgr, 3859f464c52Smaya uint64_t address, 3869f464c52Smaya uint64_t size) 3879f464c52Smaya{ 3889f464c52Smaya if (address == IRIS_BORDER_COLOR_POOL_ADDRESS) 3899f464c52Smaya return; 3909f464c52Smaya 3919f464c52Smaya /* Un-canonicalize the address. */ 3927ec681f3Smrg address = intel_48b_address(address); 3939f464c52Smaya 3949f464c52Smaya if (address == 0ull) 3959f464c52Smaya return; 3969f464c52Smaya 3979f464c52Smaya enum iris_memory_zone memzone = iris_memzone_for_address(address); 3989f464c52Smaya 3999f464c52Smaya /* The binder handles its own allocations. */ 4009f464c52Smaya if (memzone == IRIS_MEMZONE_BINDER) 4019f464c52Smaya return; 4029f464c52Smaya 4037ec681f3Smrg assert(memzone < ARRAY_SIZE(bufmgr->vma_allocator)); 4047ec681f3Smrg 4059f464c52Smaya util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size); 4069f464c52Smaya} 4079f464c52Smaya 4087ec681f3Smrgstatic bool 4097ec681f3Smrgiris_bo_busy_gem(struct iris_bo *bo) 4109f464c52Smaya{ 4117ec681f3Smrg assert(iris_bo_is_real(bo)); 4127ec681f3Smrg 4139f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 4149f464c52Smaya struct drm_i915_gem_busy busy = { .handle = bo->gem_handle }; 4159f464c52Smaya 4167ec681f3Smrg int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 4179f464c52Smaya if (ret == 0) { 4189f464c52Smaya return busy.busy; 4199f464c52Smaya } 4209f464c52Smaya return false; 4219f464c52Smaya} 4229f464c52Smaya 4237ec681f3Smrg/* A timeout of 0 just checks for busyness. */ 4247ec681f3Smrgstatic int 4257ec681f3Smrgiris_bo_wait_syncobj(struct iris_bo *bo, int64_t timeout_ns) 4267ec681f3Smrg{ 4277ec681f3Smrg int ret = 0; 4287ec681f3Smrg struct iris_bufmgr *bufmgr = bo->bufmgr; 4297ec681f3Smrg 4307ec681f3Smrg /* If we know it's idle, don't bother with the kernel round trip */ 4317ec681f3Smrg if (bo->idle) 4327ec681f3Smrg return 0; 4337ec681f3Smrg 4347ec681f3Smrg simple_mtx_lock(&bufmgr->bo_deps_lock); 4357ec681f3Smrg 4367ec681f3Smrg uint32_t handles[bo->deps_size * IRIS_BATCH_COUNT * 2]; 4377ec681f3Smrg int handle_count = 0; 4387ec681f3Smrg 4397ec681f3Smrg for (int d = 0; d < bo->deps_size; d++) { 4407ec681f3Smrg for (int b = 0; b < IRIS_BATCH_COUNT; b++) { 4417ec681f3Smrg struct iris_syncobj *r = bo->deps[d].read_syncobjs[b]; 4427ec681f3Smrg struct iris_syncobj *w = bo->deps[d].write_syncobjs[b]; 4437ec681f3Smrg if (r) 4447ec681f3Smrg handles[handle_count++] = r->handle; 4457ec681f3Smrg if (w) 4467ec681f3Smrg handles[handle_count++] = w->handle; 4477ec681f3Smrg } 4487ec681f3Smrg } 4497ec681f3Smrg 4507ec681f3Smrg if (handle_count == 0) 4517ec681f3Smrg goto out; 4527ec681f3Smrg 4537ec681f3Smrg /* Unlike the gem wait, negative values are not infinite here. */ 4547ec681f3Smrg int64_t timeout_abs = os_time_get_absolute_timeout(timeout_ns); 4557ec681f3Smrg if (timeout_abs < 0) 4567ec681f3Smrg timeout_abs = INT64_MAX; 4577ec681f3Smrg 4587ec681f3Smrg struct drm_syncobj_wait args = { 4597ec681f3Smrg .handles = (uintptr_t) handles, 4607ec681f3Smrg .timeout_nsec = timeout_abs, 4617ec681f3Smrg .count_handles = handle_count, 4627ec681f3Smrg .flags = DRM_SYNCOBJ_WAIT_FLAGS_WAIT_ALL, 4637ec681f3Smrg }; 4647ec681f3Smrg 4657ec681f3Smrg ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_SYNCOBJ_WAIT, &args); 4667ec681f3Smrg if (ret != 0) { 4677ec681f3Smrg ret = -errno; 4687ec681f3Smrg goto out; 4697ec681f3Smrg } 4707ec681f3Smrg 4717ec681f3Smrg /* We just waited everything, so clean all the deps. */ 4727ec681f3Smrg for (int d = 0; d < bo->deps_size; d++) { 4737ec681f3Smrg for (int b = 0; b < IRIS_BATCH_COUNT; b++) { 4747ec681f3Smrg iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL); 4757ec681f3Smrg iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL); 4767ec681f3Smrg } 4777ec681f3Smrg } 4787ec681f3Smrg 4797ec681f3Smrgout: 4807ec681f3Smrg simple_mtx_unlock(&bufmgr->bo_deps_lock); 4817ec681f3Smrg return ret; 4827ec681f3Smrg} 4837ec681f3Smrg 4847ec681f3Smrgstatic bool 4857ec681f3Smrgiris_bo_busy_syncobj(struct iris_bo *bo) 4867ec681f3Smrg{ 4877ec681f3Smrg return iris_bo_wait_syncobj(bo, 0) == -ETIME; 4887ec681f3Smrg} 4897ec681f3Smrg 4907ec681f3Smrgbool 4917ec681f3Smrgiris_bo_busy(struct iris_bo *bo) 4927ec681f3Smrg{ 4937ec681f3Smrg bool busy; 4947ec681f3Smrg if (iris_bo_is_external(bo)) 4957ec681f3Smrg busy = iris_bo_busy_gem(bo); 4967ec681f3Smrg else 4977ec681f3Smrg busy = iris_bo_busy_syncobj(bo); 4987ec681f3Smrg 4997ec681f3Smrg bo->idle = !busy; 5007ec681f3Smrg 5017ec681f3Smrg return busy; 5027ec681f3Smrg} 5037ec681f3Smrg 5049f464c52Smayaint 5059f464c52Smayairis_bo_madvise(struct iris_bo *bo, int state) 5069f464c52Smaya{ 5077ec681f3Smrg /* We can't madvise suballocated BOs. */ 5087ec681f3Smrg assert(iris_bo_is_real(bo)); 5097ec681f3Smrg 5109f464c52Smaya struct drm_i915_gem_madvise madv = { 5119f464c52Smaya .handle = bo->gem_handle, 5129f464c52Smaya .madv = state, 5139f464c52Smaya .retained = 1, 5149f464c52Smaya }; 5159f464c52Smaya 5167ec681f3Smrg intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 5179f464c52Smaya 5189f464c52Smaya return madv.retained; 5199f464c52Smaya} 5209f464c52Smaya 5217ec681f3Smrgstatic struct iris_bo * 5227ec681f3Smrgbo_calloc(void) 5237ec681f3Smrg{ 5247ec681f3Smrg struct iris_bo *bo = calloc(1, sizeof(*bo)); 5257ec681f3Smrg if (!bo) 5267ec681f3Smrg return NULL; 5277ec681f3Smrg 5287ec681f3Smrg list_inithead(&bo->real.exports); 5297ec681f3Smrg 5307ec681f3Smrg bo->hash = _mesa_hash_pointer(bo); 5317ec681f3Smrg 5327ec681f3Smrg return bo; 5337ec681f3Smrg} 5347ec681f3Smrg 5359f464c52Smayastatic void 5367ec681f3Smrgbo_unmap(struct iris_bo *bo) 5379f464c52Smaya{ 5387ec681f3Smrg assert(iris_bo_is_real(bo)); 5399f464c52Smaya 5407ec681f3Smrg VG_NOACCESS(bo->real.map, bo->size); 5417ec681f3Smrg os_munmap(bo->real.map, bo->size); 5427ec681f3Smrg bo->real.map = NULL; 5437ec681f3Smrg} 5447ec681f3Smrg 5457ec681f3Smrgstatic struct pb_slabs * 5467ec681f3Smrgget_slabs(struct iris_bufmgr *bufmgr, uint64_t size) 5477ec681f3Smrg{ 5487ec681f3Smrg for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 5497ec681f3Smrg struct pb_slabs *slabs = &bufmgr->bo_slabs[i]; 5507ec681f3Smrg 5517ec681f3Smrg if (size <= 1ull << (slabs->min_order + slabs->num_orders - 1)) 5527ec681f3Smrg return slabs; 5539f464c52Smaya } 5547ec681f3Smrg 5557ec681f3Smrg unreachable("should have found a valid slab for this size"); 5569f464c52Smaya} 5579f464c52Smaya 5587ec681f3Smrg/* Return the power of two size of a slab entry matching the input size. */ 5597ec681f3Smrgstatic unsigned 5607ec681f3Smrgget_slab_pot_entry_size(struct iris_bufmgr *bufmgr, unsigned size) 5619f464c52Smaya{ 5627ec681f3Smrg unsigned entry_size = util_next_power_of_two(size); 5637ec681f3Smrg unsigned min_entry_size = 1 << bufmgr->bo_slabs[0].min_order; 5647ec681f3Smrg 5657ec681f3Smrg return MAX2(entry_size, min_entry_size); 5667ec681f3Smrg} 5677ec681f3Smrg 5687ec681f3Smrg/* Return the slab entry alignment. */ 5697ec681f3Smrgstatic unsigned 5707ec681f3Smrgget_slab_entry_alignment(struct iris_bufmgr *bufmgr, unsigned size) 5717ec681f3Smrg{ 5727ec681f3Smrg unsigned entry_size = get_slab_pot_entry_size(bufmgr, size); 5737ec681f3Smrg 5747ec681f3Smrg if (size <= entry_size * 3 / 4) 5757ec681f3Smrg return entry_size / 4; 5767ec681f3Smrg 5777ec681f3Smrg return entry_size; 5787ec681f3Smrg} 5797ec681f3Smrg 5807ec681f3Smrgstatic bool 5817ec681f3Smrgiris_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) 5827ec681f3Smrg{ 5837ec681f3Smrg struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry); 5847ec681f3Smrg 5857ec681f3Smrg return !iris_bo_busy(bo); 5867ec681f3Smrg} 5877ec681f3Smrg 5887ec681f3Smrgstatic void 5897ec681f3Smrgiris_slab_free(void *priv, struct pb_slab *pslab) 5907ec681f3Smrg{ 5917ec681f3Smrg struct iris_bufmgr *bufmgr = priv; 5927ec681f3Smrg struct iris_slab *slab = (void *) pslab; 5937ec681f3Smrg struct intel_aux_map_context *aux_map_ctx = bufmgr->aux_map_ctx; 5947ec681f3Smrg 5957ec681f3Smrg assert(!slab->bo->aux_map_address); 5967ec681f3Smrg 5977ec681f3Smrg /* Since we're freeing the whole slab, all buffers allocated out of it 5987ec681f3Smrg * must be reclaimable. We require buffers to be idle to be reclaimed 5997ec681f3Smrg * (see iris_can_reclaim_slab()), so we know all entries must be idle. 6007ec681f3Smrg * Therefore, we can safely unmap their aux table entries. 6017ec681f3Smrg */ 6027ec681f3Smrg for (unsigned i = 0; i < pslab->num_entries; i++) { 6037ec681f3Smrg struct iris_bo *bo = &slab->entries[i]; 6047ec681f3Smrg if (aux_map_ctx && bo->aux_map_address) { 6057ec681f3Smrg intel_aux_map_unmap_range(aux_map_ctx, bo->address, bo->size); 6067ec681f3Smrg bo->aux_map_address = 0; 6077ec681f3Smrg } 6087ec681f3Smrg 6097ec681f3Smrg /* Unref read/write dependency syncobjs and free the array. */ 6107ec681f3Smrg for (int d = 0; d < bo->deps_size; d++) { 6117ec681f3Smrg for (int b = 0; b < IRIS_BATCH_COUNT; b++) { 6127ec681f3Smrg iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL); 6137ec681f3Smrg iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL); 6147ec681f3Smrg } 6157ec681f3Smrg } 6167ec681f3Smrg free(bo->deps); 6177ec681f3Smrg } 6187ec681f3Smrg 6197ec681f3Smrg iris_bo_unreference(slab->bo); 6207ec681f3Smrg 6217ec681f3Smrg free(slab->entries); 6227ec681f3Smrg free(slab); 6237ec681f3Smrg} 6247ec681f3Smrg 6257ec681f3Smrgstatic struct pb_slab * 6267ec681f3Smrgiris_slab_alloc(void *priv, 6277ec681f3Smrg unsigned heap, 6287ec681f3Smrg unsigned entry_size, 6297ec681f3Smrg unsigned group_index) 6307ec681f3Smrg{ 6317ec681f3Smrg struct iris_bufmgr *bufmgr = priv; 6327ec681f3Smrg struct iris_slab *slab = calloc(1, sizeof(struct iris_slab)); 6337ec681f3Smrg unsigned flags = heap == IRIS_HEAP_SYSTEM_MEMORY ? BO_ALLOC_SMEM : 0; 6347ec681f3Smrg unsigned slab_size = 0; 6357ec681f3Smrg /* We only support slab allocation for IRIS_MEMZONE_OTHER */ 6367ec681f3Smrg enum iris_memory_zone memzone = IRIS_MEMZONE_OTHER; 6377ec681f3Smrg 6387ec681f3Smrg if (!slab) 6397ec681f3Smrg return NULL; 6407ec681f3Smrg 6417ec681f3Smrg struct pb_slabs *slabs = bufmgr->bo_slabs; 6427ec681f3Smrg 6437ec681f3Smrg /* Determine the slab buffer size. */ 6447ec681f3Smrg for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 6457ec681f3Smrg unsigned max_entry_size = 6467ec681f3Smrg 1 << (slabs[i].min_order + slabs[i].num_orders - 1); 6477ec681f3Smrg 6487ec681f3Smrg if (entry_size <= max_entry_size) { 6497ec681f3Smrg /* The slab size is twice the size of the largest possible entry. */ 6507ec681f3Smrg slab_size = max_entry_size * 2; 6517ec681f3Smrg 6527ec681f3Smrg if (!util_is_power_of_two_nonzero(entry_size)) { 6537ec681f3Smrg assert(util_is_power_of_two_nonzero(entry_size * 4 / 3)); 6547ec681f3Smrg 6557ec681f3Smrg /* If the entry size is 3/4 of a power of two, we would waste 6567ec681f3Smrg * space and not gain anything if we allocated only twice the 6577ec681f3Smrg * power of two for the backing buffer: 6587ec681f3Smrg * 6597ec681f3Smrg * 2 * 3/4 = 1.5 usable with buffer size 2 6607ec681f3Smrg * 6617ec681f3Smrg * Allocating 5 times the entry size leads us to the next power 6627ec681f3Smrg * of two and results in a much better memory utilization: 6637ec681f3Smrg * 6647ec681f3Smrg * 5 * 3/4 = 3.75 usable with buffer size 4 6657ec681f3Smrg */ 6667ec681f3Smrg if (entry_size * 5 > slab_size) 6677ec681f3Smrg slab_size = util_next_power_of_two(entry_size * 5); 6687ec681f3Smrg } 6697ec681f3Smrg 6707ec681f3Smrg /* The largest slab should have the same size as the PTE fragment 6717ec681f3Smrg * size to get faster address translation. 6727ec681f3Smrg * 6737ec681f3Smrg * TODO: move this to intel_device_info? 6747ec681f3Smrg */ 6757ec681f3Smrg const unsigned pte_size = 2 * 1024 * 1024; 6767ec681f3Smrg 6777ec681f3Smrg if (i == NUM_SLAB_ALLOCATORS - 1 && slab_size < pte_size) 6787ec681f3Smrg slab_size = pte_size; 6797ec681f3Smrg 6807ec681f3Smrg break; 6817ec681f3Smrg } 6827ec681f3Smrg } 6837ec681f3Smrg assert(slab_size != 0); 6847ec681f3Smrg 6857ec681f3Smrg slab->bo = 6867ec681f3Smrg iris_bo_alloc(bufmgr, "slab", slab_size, slab_size, memzone, flags); 6877ec681f3Smrg if (!slab->bo) 6887ec681f3Smrg goto fail; 6897ec681f3Smrg 6907ec681f3Smrg slab_size = slab->bo->size; 6917ec681f3Smrg 6927ec681f3Smrg slab->base.num_entries = slab_size / entry_size; 6937ec681f3Smrg slab->base.num_free = slab->base.num_entries; 6947ec681f3Smrg slab->entry_size = entry_size; 6957ec681f3Smrg slab->entries = calloc(slab->base.num_entries, sizeof(*slab->entries)); 6967ec681f3Smrg if (!slab->entries) 6977ec681f3Smrg goto fail_bo; 6987ec681f3Smrg 6997ec681f3Smrg list_inithead(&slab->base.free); 7007ec681f3Smrg 7017ec681f3Smrg for (unsigned i = 0; i < slab->base.num_entries; i++) { 7027ec681f3Smrg struct iris_bo *bo = &slab->entries[i]; 7037ec681f3Smrg 7047ec681f3Smrg bo->size = entry_size; 7057ec681f3Smrg bo->bufmgr = bufmgr; 7069f464c52Smaya bo->hash = _mesa_hash_pointer(bo); 7077ec681f3Smrg bo->gem_handle = 0; 7087ec681f3Smrg bo->address = slab->bo->address + i * entry_size; 7097ec681f3Smrg bo->aux_map_address = 0; 7107ec681f3Smrg bo->index = -1; 7117ec681f3Smrg bo->refcount = 0; 7127ec681f3Smrg bo->idle = true; 7137ec681f3Smrg 7147ec681f3Smrg bo->slab.entry.slab = &slab->base; 7157ec681f3Smrg bo->slab.entry.group_index = group_index; 7167ec681f3Smrg bo->slab.entry.entry_size = entry_size; 7177ec681f3Smrg 7187ec681f3Smrg bo->slab.real = iris_get_backing_bo(slab->bo); 7197ec681f3Smrg 7207ec681f3Smrg list_addtail(&bo->slab.entry.head, &slab->base.free); 7219f464c52Smaya } 7227ec681f3Smrg 7237ec681f3Smrg return &slab->base; 7247ec681f3Smrg 7257ec681f3Smrgfail_bo: 7267ec681f3Smrg iris_bo_unreference(slab->bo); 7277ec681f3Smrgfail: 7287ec681f3Smrg free(slab); 7297ec681f3Smrg return NULL; 7309f464c52Smaya} 7319f464c52Smaya 7329f464c52Smayastatic struct iris_bo * 7337ec681f3Smrgalloc_bo_from_slabs(struct iris_bufmgr *bufmgr, 7347ec681f3Smrg const char *name, 7357ec681f3Smrg uint64_t size, 7367ec681f3Smrg uint32_t alignment, 7377ec681f3Smrg unsigned flags, 7387ec681f3Smrg bool local) 7399f464c52Smaya{ 7407ec681f3Smrg if (flags & BO_ALLOC_NO_SUBALLOC) 7417ec681f3Smrg return NULL; 7429f464c52Smaya 7437ec681f3Smrg struct pb_slabs *last_slab = &bufmgr->bo_slabs[NUM_SLAB_ALLOCATORS - 1]; 7447ec681f3Smrg unsigned max_slab_entry_size = 7457ec681f3Smrg 1 << (last_slab->min_order + last_slab->num_orders - 1); 7467ec681f3Smrg 7477ec681f3Smrg if (size > max_slab_entry_size) 7487ec681f3Smrg return NULL; 7497ec681f3Smrg 7507ec681f3Smrg struct pb_slab_entry *entry; 7517ec681f3Smrg 7527ec681f3Smrg enum iris_heap heap = 7537ec681f3Smrg local ? IRIS_HEAP_DEVICE_LOCAL : IRIS_HEAP_SYSTEM_MEMORY; 7547ec681f3Smrg 7557ec681f3Smrg unsigned alloc_size = size; 7567ec681f3Smrg 7577ec681f3Smrg /* Always use slabs for sizes less than 4 KB because the kernel aligns 7587ec681f3Smrg * everything to 4 KB. 7597ec681f3Smrg */ 7607ec681f3Smrg if (size < alignment && alignment <= 4 * 1024) 7617ec681f3Smrg alloc_size = alignment; 7629f464c52Smaya 7637ec681f3Smrg if (alignment > get_slab_entry_alignment(bufmgr, alloc_size)) { 7647ec681f3Smrg /* 3/4 allocations can return too small alignment. 7657ec681f3Smrg * Try again with a power of two allocation size. 7667ec681f3Smrg */ 7677ec681f3Smrg unsigned pot_size = get_slab_pot_entry_size(bufmgr, alloc_size); 7687ec681f3Smrg 7697ec681f3Smrg if (alignment <= pot_size) { 7707ec681f3Smrg /* This size works but wastes some memory to fulfill the alignment. */ 7717ec681f3Smrg alloc_size = pot_size; 7727ec681f3Smrg } else { 7737ec681f3Smrg /* can't fulfill alignment requirements */ 7747ec681f3Smrg return NULL; 7757ec681f3Smrg } 7767ec681f3Smrg } 7777ec681f3Smrg 7787ec681f3Smrg struct pb_slabs *slabs = get_slabs(bufmgr, alloc_size); 7797ec681f3Smrg entry = pb_slab_alloc(slabs, alloc_size, heap); 7807ec681f3Smrg if (!entry) { 7817ec681f3Smrg /* Clean up and try again... */ 7827ec681f3Smrg pb_slabs_reclaim(slabs); 7837ec681f3Smrg 7847ec681f3Smrg entry = pb_slab_alloc(slabs, alloc_size, heap); 7859f464c52Smaya } 7867ec681f3Smrg if (!entry) 7877ec681f3Smrg return NULL; 7887ec681f3Smrg 7897ec681f3Smrg struct iris_bo *bo = container_of(entry, struct iris_bo, slab.entry); 7909f464c52Smaya 7917ec681f3Smrg if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) { 7927ec681f3Smrg /* This buffer was associated with an aux-buffer range. We only allow 7937ec681f3Smrg * slab allocated buffers to be reclaimed when idle (not in use by an 7947ec681f3Smrg * executing batch). (See iris_can_reclaim_slab().) So we know that 7957ec681f3Smrg * our previous aux mapping is no longer in use, and we can safely 7967ec681f3Smrg * remove it. 7977ec681f3Smrg */ 7987ec681f3Smrg intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address, 7997ec681f3Smrg bo->size); 8007ec681f3Smrg bo->aux_map_address = 0; 8017ec681f3Smrg } 8029f464c52Smaya 8037ec681f3Smrg p_atomic_set(&bo->refcount, 1); 8047ec681f3Smrg bo->name = name; 8057ec681f3Smrg bo->size = size; 8067ec681f3Smrg 8077ec681f3Smrg /* Zero the contents if necessary. If this fails, fall back to 8087ec681f3Smrg * allocating a fresh BO, which will always be zeroed by the kernel. 8099f464c52Smaya */ 8107ec681f3Smrg if (flags & BO_ALLOC_ZEROED) { 8117ec681f3Smrg void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); 8127ec681f3Smrg if (map) { 8137ec681f3Smrg memset(map, 0, bo->size); 8147ec681f3Smrg } else { 8157ec681f3Smrg pb_slab_free(slabs, &bo->slab.entry); 8167ec681f3Smrg return NULL; 8177ec681f3Smrg } 8189f464c52Smaya } 8199f464c52Smaya 8207ec681f3Smrg return bo; 8217ec681f3Smrg} 8227ec681f3Smrg 8237ec681f3Smrgstatic struct iris_bo * 8247ec681f3Smrgalloc_bo_from_cache(struct iris_bufmgr *bufmgr, 8257ec681f3Smrg struct bo_cache_bucket *bucket, 8267ec681f3Smrg uint32_t alignment, 8277ec681f3Smrg enum iris_memory_zone memzone, 8287ec681f3Smrg enum iris_mmap_mode mmap_mode, 8297ec681f3Smrg unsigned flags, 8307ec681f3Smrg bool match_zone) 8317ec681f3Smrg{ 8327ec681f3Smrg if (!bucket) 8337ec681f3Smrg return NULL; 8347ec681f3Smrg 8357ec681f3Smrg struct iris_bo *bo = NULL; 8367ec681f3Smrg 8377ec681f3Smrg list_for_each_entry_safe(struct iris_bo, cur, &bucket->head, head) { 8387ec681f3Smrg assert(iris_bo_is_real(cur)); 8397ec681f3Smrg 8407ec681f3Smrg /* Find one that's got the right mapping type. We used to swap maps 8417ec681f3Smrg * around but the kernel doesn't allow this on discrete GPUs. 8429f464c52Smaya */ 8437ec681f3Smrg if (mmap_mode != cur->real.mmap_mode) 8447ec681f3Smrg continue; 8457ec681f3Smrg 8467ec681f3Smrg /* Try a little harder to find one that's already in the right memzone */ 8477ec681f3Smrg if (match_zone && memzone != iris_memzone_for_address(cur->address)) 8487ec681f3Smrg continue; 8497ec681f3Smrg 8507ec681f3Smrg /* If the last BO in the cache is busy, there are no idle BOs. Bail, 8517ec681f3Smrg * either falling back to a non-matching memzone, or if that fails, 8527ec681f3Smrg * allocating a fresh buffer. 8537ec681f3Smrg */ 8547ec681f3Smrg if (iris_bo_busy(cur)) 8557ec681f3Smrg return NULL; 8567ec681f3Smrg 8577ec681f3Smrg list_del(&cur->head); 8587ec681f3Smrg 8597ec681f3Smrg /* Tell the kernel we need this BO. If it still exists, we're done! */ 8607ec681f3Smrg if (iris_bo_madvise(cur, I915_MADV_WILLNEED)) { 8617ec681f3Smrg bo = cur; 8627ec681f3Smrg break; 8639f464c52Smaya } 8649f464c52Smaya 8657ec681f3Smrg /* This BO was purged, throw it out and keep looking. */ 8667ec681f3Smrg bo_free(cur); 8677ec681f3Smrg } 8689f464c52Smaya 8697ec681f3Smrg if (!bo) 8707ec681f3Smrg return NULL; 8719f464c52Smaya 8727ec681f3Smrg if (bo->aux_map_address) { 8737ec681f3Smrg /* This buffer was associated with an aux-buffer range. We make sure 8747ec681f3Smrg * that buffers are not reused from the cache while the buffer is (busy) 8757ec681f3Smrg * being used by an executing batch. Since we are here, the buffer is no 8767ec681f3Smrg * longer being used by a batch and the buffer was deleted (in order to 8777ec681f3Smrg * end up in the cache). Therefore its old aux-buffer range can be 8787ec681f3Smrg * removed from the aux-map. 8797ec681f3Smrg */ 8807ec681f3Smrg if (bo->bufmgr->aux_map_ctx) 8817ec681f3Smrg intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address, 8827ec681f3Smrg bo->size); 8837ec681f3Smrg bo->aux_map_address = 0; 8847ec681f3Smrg } 8857ec681f3Smrg 8867ec681f3Smrg /* If the cached BO isn't in the right memory zone, or the alignment 8877ec681f3Smrg * isn't sufficient, free the old memory and assign it a new address. 8887ec681f3Smrg */ 8897ec681f3Smrg if (memzone != iris_memzone_for_address(bo->address) || 8907ec681f3Smrg bo->address % alignment != 0) { 8917ec681f3Smrg vma_free(bufmgr, bo->address, bo->size); 8927ec681f3Smrg bo->address = 0ull; 8937ec681f3Smrg } 8947ec681f3Smrg 8957ec681f3Smrg /* Zero the contents if necessary. If this fails, fall back to 8967ec681f3Smrg * allocating a fresh BO, which will always be zeroed by the kernel. 8977ec681f3Smrg */ 8987ec681f3Smrg if (flags & BO_ALLOC_ZEROED) { 8997ec681f3Smrg void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); 9007ec681f3Smrg if (map) { 9017ec681f3Smrg memset(map, 0, bo->size); 9027ec681f3Smrg } else { 9037ec681f3Smrg bo_free(bo); 9047ec681f3Smrg return NULL; 9059f464c52Smaya } 9069f464c52Smaya } 9079f464c52Smaya 9087ec681f3Smrg return bo; 9097ec681f3Smrg} 9107ec681f3Smrg 9117ec681f3Smrgstatic struct iris_bo * 9127ec681f3Smrgalloc_fresh_bo(struct iris_bufmgr *bufmgr, uint64_t bo_size, bool local) 9137ec681f3Smrg{ 9147ec681f3Smrg struct iris_bo *bo = bo_calloc(); 9157ec681f3Smrg if (!bo) 9167ec681f3Smrg return NULL; 9177ec681f3Smrg 9187ec681f3Smrg /* If we have vram size, we have multiple memory regions and should choose 9197ec681f3Smrg * one of them. 9207ec681f3Smrg */ 9217ec681f3Smrg if (bufmgr->vram.size > 0) { 9227ec681f3Smrg /* All new BOs we get from the kernel are zeroed, so we don't need to 9237ec681f3Smrg * worry about that here. 9249f464c52Smaya */ 9257ec681f3Smrg struct drm_i915_gem_memory_class_instance regions[2]; 9267ec681f3Smrg uint32_t nregions = 0; 9277ec681f3Smrg if (local) { 9287ec681f3Smrg /* For vram allocations, still use system memory as a fallback. */ 9297ec681f3Smrg regions[nregions++] = bufmgr->vram.region; 9307ec681f3Smrg regions[nregions++] = bufmgr->sys.region; 9317ec681f3Smrg } else { 9327ec681f3Smrg regions[nregions++] = bufmgr->sys.region; 9339f464c52Smaya } 9349f464c52Smaya 9357ec681f3Smrg struct drm_i915_gem_create_ext_memory_regions ext_regions = { 9367ec681f3Smrg .base = { .name = I915_GEM_CREATE_EXT_MEMORY_REGIONS }, 9377ec681f3Smrg .num_regions = nregions, 9387ec681f3Smrg .regions = (uintptr_t)regions, 9397ec681f3Smrg }; 9407ec681f3Smrg 9417ec681f3Smrg struct drm_i915_gem_create_ext create = { 9427ec681f3Smrg .size = bo_size, 9437ec681f3Smrg .extensions = (uintptr_t)&ext_regions, 9447ec681f3Smrg }; 9459f464c52Smaya 9467ec681f3Smrg /* It should be safe to use GEM_CREATE_EXT without checking, since we are 9477ec681f3Smrg * in the side of the branch where discrete memory is available. So we 9487ec681f3Smrg * can assume GEM_CREATE_EXT is supported already. 9497ec681f3Smrg */ 9507ec681f3Smrg if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE_EXT, &create) != 0) { 9517ec681f3Smrg free(bo); 9527ec681f3Smrg return NULL; 9537ec681f3Smrg } 9547ec681f3Smrg bo->gem_handle = create.handle; 9557ec681f3Smrg } else { 9569f464c52Smaya struct drm_i915_gem_create create = { .size = bo_size }; 9579f464c52Smaya 9589f464c52Smaya /* All new BOs we get from the kernel are zeroed, so we don't need to 9599f464c52Smaya * worry about that here. 9609f464c52Smaya */ 9617ec681f3Smrg if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create) != 0) { 9629f464c52Smaya free(bo); 9637ec681f3Smrg return NULL; 9649f464c52Smaya } 9659f464c52Smaya bo->gem_handle = create.handle; 9667ec681f3Smrg } 9679f464c52Smaya 9687ec681f3Smrg bo->bufmgr = bufmgr; 9697ec681f3Smrg bo->size = bo_size; 9707ec681f3Smrg bo->idle = true; 9717ec681f3Smrg bo->real.local = local; 9729f464c52Smaya 9737ec681f3Smrg if (bufmgr->vram.size == 0) { 9749f464c52Smaya /* Calling set_domain() will allocate pages for the BO outside of the 9759f464c52Smaya * struct mutex lock in the kernel, which is more efficient than waiting 9769f464c52Smaya * to create them during the first execbuf that uses the BO. 9779f464c52Smaya */ 9789f464c52Smaya struct drm_i915_gem_set_domain sd = { 9799f464c52Smaya .handle = bo->gem_handle, 9809f464c52Smaya .read_domains = I915_GEM_DOMAIN_CPU, 9819f464c52Smaya .write_domain = 0, 9829f464c52Smaya }; 9839f464c52Smaya 9847ec681f3Smrg intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd); 9857ec681f3Smrg } 9867ec681f3Smrg 9877ec681f3Smrg return bo; 9887ec681f3Smrg} 9897ec681f3Smrg 9907ec681f3Smrgstruct iris_bo * 9917ec681f3Smrgiris_bo_alloc(struct iris_bufmgr *bufmgr, 9927ec681f3Smrg const char *name, 9937ec681f3Smrg uint64_t size, 9947ec681f3Smrg uint32_t alignment, 9957ec681f3Smrg enum iris_memory_zone memzone, 9967ec681f3Smrg unsigned flags) 9977ec681f3Smrg{ 9987ec681f3Smrg struct iris_bo *bo; 9997ec681f3Smrg unsigned int page_size = getpagesize(); 10007ec681f3Smrg bool local = bufmgr->vram.size > 0 && 10017ec681f3Smrg !(flags & BO_ALLOC_COHERENT || flags & BO_ALLOC_SMEM); 10027ec681f3Smrg struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size, local); 10037ec681f3Smrg 10047ec681f3Smrg if (memzone != IRIS_MEMZONE_OTHER || (flags & BO_ALLOC_COHERENT)) 10057ec681f3Smrg flags |= BO_ALLOC_NO_SUBALLOC; 10067ec681f3Smrg 10077ec681f3Smrg bo = alloc_bo_from_slabs(bufmgr, name, size, alignment, flags, local); 10087ec681f3Smrg 10097ec681f3Smrg if (bo) 10107ec681f3Smrg return bo; 10117ec681f3Smrg 10127ec681f3Smrg /* Round the size up to the bucket size, or if we don't have caching 10137ec681f3Smrg * at this size, a multiple of the page size. 10147ec681f3Smrg */ 10157ec681f3Smrg uint64_t bo_size = 10167ec681f3Smrg bucket ? bucket->size : MAX2(ALIGN(size, page_size), page_size); 10177ec681f3Smrg 10187ec681f3Smrg bool is_coherent = bufmgr->has_llc || 10197ec681f3Smrg (bufmgr->vram.size > 0 && !local) || 10207ec681f3Smrg (flags & BO_ALLOC_COHERENT); 10217ec681f3Smrg bool is_scanout = (flags & BO_ALLOC_SCANOUT) != 0; 10227ec681f3Smrg enum iris_mmap_mode mmap_mode = 10237ec681f3Smrg !local && is_coherent && !is_scanout ? IRIS_MMAP_WB : IRIS_MMAP_WC; 10247ec681f3Smrg 10257ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 10267ec681f3Smrg 10277ec681f3Smrg /* Get a buffer out of the cache if available. First, we try to find 10287ec681f3Smrg * one with a matching memory zone so we can avoid reallocating VMA. 10297ec681f3Smrg */ 10307ec681f3Smrg bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode, 10317ec681f3Smrg flags, true); 10327ec681f3Smrg 10337ec681f3Smrg /* If that fails, we try for any cached BO, without matching memzone. */ 10347ec681f3Smrg if (!bo) { 10357ec681f3Smrg bo = alloc_bo_from_cache(bufmgr, bucket, alignment, memzone, mmap_mode, 10367ec681f3Smrg flags, false); 10377ec681f3Smrg } 10387ec681f3Smrg 10397ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 10407ec681f3Smrg 10417ec681f3Smrg if (!bo) { 10427ec681f3Smrg bo = alloc_fresh_bo(bufmgr, bo_size, local); 10437ec681f3Smrg if (!bo) 10447ec681f3Smrg return NULL; 10457ec681f3Smrg } 10467ec681f3Smrg 10477ec681f3Smrg if (bo->address == 0ull) { 10487ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 10497ec681f3Smrg bo->address = vma_alloc(bufmgr, memzone, bo->size, alignment); 10507ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 10517ec681f3Smrg 10527ec681f3Smrg if (bo->address == 0ull) 10539f464c52Smaya goto err_free; 10549f464c52Smaya } 10559f464c52Smaya 10569f464c52Smaya bo->name = name; 10579f464c52Smaya p_atomic_set(&bo->refcount, 1); 10587ec681f3Smrg bo->real.reusable = bucket && bufmgr->bo_reuse; 10599f464c52Smaya bo->index = -1; 10607ec681f3Smrg bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; 10619f464c52Smaya 10629f464c52Smaya /* By default, capture all driver-internal buffers like shader kernels, 10639f464c52Smaya * surface states, dynamic states, border colors, and so on. 10649f464c52Smaya */ 10659f464c52Smaya if (memzone < IRIS_MEMZONE_OTHER) 10667ec681f3Smrg bo->real.kflags |= EXEC_OBJECT_CAPTURE; 10679f464c52Smaya 10687ec681f3Smrg assert(bo->real.map == NULL || bo->real.mmap_mode == mmap_mode); 10697ec681f3Smrg bo->real.mmap_mode = mmap_mode; 10709f464c52Smaya 10717ec681f3Smrg /* On integrated GPUs, enable snooping to ensure coherency if needed. 10727ec681f3Smrg * For discrete, we instead use SMEM and avoid WB maps for coherency. 10737ec681f3Smrg */ 10747ec681f3Smrg if ((flags & BO_ALLOC_COHERENT) && 10757ec681f3Smrg !bufmgr->has_llc && bufmgr->vram.size == 0) { 10769f464c52Smaya struct drm_i915_gem_caching arg = { 10779f464c52Smaya .handle = bo->gem_handle, 10789f464c52Smaya .caching = 1, 10799f464c52Smaya }; 10807ec681f3Smrg if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) != 0) 10817ec681f3Smrg goto err_free; 10827ec681f3Smrg 10837ec681f3Smrg bo->real.reusable = false; 10849f464c52Smaya } 10859f464c52Smaya 10867ec681f3Smrg DBG("bo_create: buf %d (%s) (%s memzone) (%s) %llub\n", bo->gem_handle, 10877ec681f3Smrg bo->name, memzone_name(memzone), bo->real.local ? "local" : "system", 10887ec681f3Smrg (unsigned long long) size); 10899f464c52Smaya 10909f464c52Smaya return bo; 10919f464c52Smaya 10929f464c52Smayaerr_free: 10939f464c52Smaya bo_free(bo); 10949f464c52Smaya return NULL; 10959f464c52Smaya} 10969f464c52Smaya 10979f464c52Smayastruct iris_bo * 10989f464c52Smayairis_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name, 10999f464c52Smaya void *ptr, size_t size, 11009f464c52Smaya enum iris_memory_zone memzone) 11019f464c52Smaya{ 11027ec681f3Smrg struct drm_gem_close close = { 0, }; 11039f464c52Smaya struct iris_bo *bo; 11049f464c52Smaya 11059f464c52Smaya bo = bo_calloc(); 11069f464c52Smaya if (!bo) 11079f464c52Smaya return NULL; 11089f464c52Smaya 11099f464c52Smaya struct drm_i915_gem_userptr arg = { 11109f464c52Smaya .user_ptr = (uintptr_t)ptr, 11119f464c52Smaya .user_size = size, 11127ec681f3Smrg .flags = bufmgr->has_userptr_probe ? I915_USERPTR_PROBE : 0, 11139f464c52Smaya }; 11147ec681f3Smrg if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) 11159f464c52Smaya goto err_free; 11169f464c52Smaya bo->gem_handle = arg.handle; 11179f464c52Smaya 11187ec681f3Smrg if (!bufmgr->has_userptr_probe) { 11197ec681f3Smrg /* Check the buffer for validity before we try and use it in a batch */ 11207ec681f3Smrg struct drm_i915_gem_set_domain sd = { 11217ec681f3Smrg .handle = bo->gem_handle, 11227ec681f3Smrg .read_domains = I915_GEM_DOMAIN_CPU, 11237ec681f3Smrg }; 11247ec681f3Smrg if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd)) 11257ec681f3Smrg goto err_close; 11267ec681f3Smrg } 11279f464c52Smaya 11289f464c52Smaya bo->name = name; 11299f464c52Smaya bo->size = size; 11307ec681f3Smrg bo->real.map = ptr; 11319f464c52Smaya 11329f464c52Smaya bo->bufmgr = bufmgr; 11337ec681f3Smrg bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; 11347ec681f3Smrg 11357ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 11367ec681f3Smrg bo->address = vma_alloc(bufmgr, memzone, size, 1); 11377ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 11387ec681f3Smrg 11397ec681f3Smrg if (bo->address == 0ull) 11409f464c52Smaya goto err_close; 11419f464c52Smaya 11429f464c52Smaya p_atomic_set(&bo->refcount, 1); 11437ec681f3Smrg bo->real.userptr = true; 11449f464c52Smaya bo->index = -1; 11459f464c52Smaya bo->idle = true; 11467ec681f3Smrg bo->real.mmap_mode = IRIS_MMAP_WB; 11479f464c52Smaya 11489f464c52Smaya return bo; 11499f464c52Smaya 11509f464c52Smayaerr_close: 11517ec681f3Smrg close.handle = bo->gem_handle; 11527ec681f3Smrg intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); 11539f464c52Smayaerr_free: 11549f464c52Smaya free(bo); 11559f464c52Smaya return NULL; 11569f464c52Smaya} 11579f464c52Smaya 11589f464c52Smaya/** 11599f464c52Smaya * Returns a iris_bo wrapping the given buffer object handle. 11609f464c52Smaya * 11619f464c52Smaya * This can be used when one application needs to pass a buffer object 11629f464c52Smaya * to another. 11639f464c52Smaya */ 11649f464c52Smayastruct iris_bo * 11659f464c52Smayairis_bo_gem_create_from_name(struct iris_bufmgr *bufmgr, 11669f464c52Smaya const char *name, unsigned int handle) 11679f464c52Smaya{ 11689f464c52Smaya struct iris_bo *bo; 11699f464c52Smaya 11709f464c52Smaya /* At the moment most applications only have a few named bo. 11719f464c52Smaya * For instance, in a DRI client only the render buffers passed 11729f464c52Smaya * between X and the client are named. And since X returns the 11739f464c52Smaya * alternating names for the front/back buffer a linear search 11749f464c52Smaya * provides a sufficiently fast match. 11759f464c52Smaya */ 11767ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 11777ec681f3Smrg bo = find_and_ref_external_bo(bufmgr->name_table, handle); 11787ec681f3Smrg if (bo) 11799f464c52Smaya goto out; 11809f464c52Smaya 11819f464c52Smaya struct drm_gem_open open_arg = { .name = handle }; 11827ec681f3Smrg int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg); 11839f464c52Smaya if (ret != 0) { 11849f464c52Smaya DBG("Couldn't reference %s handle 0x%08x: %s\n", 11859f464c52Smaya name, handle, strerror(errno)); 11869f464c52Smaya bo = NULL; 11879f464c52Smaya goto out; 11889f464c52Smaya } 11899f464c52Smaya /* Now see if someone has used a prime handle to get this 11909f464c52Smaya * object from the kernel before by looking through the list 11919f464c52Smaya * again for a matching gem_handle 11929f464c52Smaya */ 11937ec681f3Smrg bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle); 11947ec681f3Smrg if (bo) 11959f464c52Smaya goto out; 11969f464c52Smaya 11979f464c52Smaya bo = bo_calloc(); 11989f464c52Smaya if (!bo) 11999f464c52Smaya goto out; 12009f464c52Smaya 12019f464c52Smaya p_atomic_set(&bo->refcount, 1); 12029f464c52Smaya 12039f464c52Smaya bo->size = open_arg.size; 12049f464c52Smaya bo->bufmgr = bufmgr; 12059f464c52Smaya bo->gem_handle = open_arg.handle; 12069f464c52Smaya bo->name = name; 12077ec681f3Smrg bo->real.global_name = handle; 12087ec681f3Smrg bo->real.reusable = false; 12097ec681f3Smrg bo->real.imported = true; 12107ec681f3Smrg bo->real.mmap_mode = IRIS_MMAP_NONE; 12117ec681f3Smrg bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; 12127ec681f3Smrg bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1); 12139f464c52Smaya 12149f464c52Smaya _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 12157ec681f3Smrg _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo); 12169f464c52Smaya 12179f464c52Smaya DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name); 12189f464c52Smaya 12199f464c52Smayaout: 12207ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 12219f464c52Smaya return bo; 12229f464c52Smaya} 12239f464c52Smaya 12249f464c52Smayastatic void 12257ec681f3Smrgbo_close(struct iris_bo *bo) 12269f464c52Smaya{ 12279f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 12289f464c52Smaya 12297ec681f3Smrg assert(iris_bo_is_real(bo)); 12309f464c52Smaya 12317ec681f3Smrg if (iris_bo_is_external(bo)) { 12329f464c52Smaya struct hash_entry *entry; 12339f464c52Smaya 12347ec681f3Smrg if (bo->real.global_name) { 12357ec681f3Smrg entry = _mesa_hash_table_search(bufmgr->name_table, 12367ec681f3Smrg &bo->real.global_name); 12379f464c52Smaya _mesa_hash_table_remove(bufmgr->name_table, entry); 12389f464c52Smaya } 12399f464c52Smaya 12409f464c52Smaya entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle); 12419f464c52Smaya _mesa_hash_table_remove(bufmgr->handle_table, entry); 12427ec681f3Smrg 12437ec681f3Smrg list_for_each_entry_safe(struct bo_export, export, &bo->real.exports, link) { 12447ec681f3Smrg struct drm_gem_close close = { .handle = export->gem_handle }; 12457ec681f3Smrg intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close); 12467ec681f3Smrg 12477ec681f3Smrg list_del(&export->link); 12487ec681f3Smrg free(export); 12497ec681f3Smrg } 12507ec681f3Smrg } else { 12517ec681f3Smrg assert(list_is_empty(&bo->real.exports)); 12529f464c52Smaya } 12539f464c52Smaya 12549f464c52Smaya /* Close this object */ 12559f464c52Smaya struct drm_gem_close close = { .handle = bo->gem_handle }; 12567ec681f3Smrg int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); 12579f464c52Smaya if (ret != 0) { 12589f464c52Smaya DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 12599f464c52Smaya bo->gem_handle, bo->name, strerror(errno)); 12609f464c52Smaya } 12619f464c52Smaya 12627ec681f3Smrg if (bo->aux_map_address && bo->bufmgr->aux_map_ctx) { 12637ec681f3Smrg intel_aux_map_unmap_range(bo->bufmgr->aux_map_ctx, bo->address, 12647ec681f3Smrg bo->size); 12657ec681f3Smrg } 12667ec681f3Smrg 12677ec681f3Smrg /* Return the VMA for reuse */ 12687ec681f3Smrg vma_free(bo->bufmgr, bo->address, bo->size); 12697ec681f3Smrg 12707ec681f3Smrg for (int d = 0; d < bo->deps_size; d++) { 12717ec681f3Smrg for (int b = 0; b < IRIS_BATCH_COUNT; b++) { 12727ec681f3Smrg iris_syncobj_reference(bufmgr, &bo->deps[d].write_syncobjs[b], NULL); 12737ec681f3Smrg iris_syncobj_reference(bufmgr, &bo->deps[d].read_syncobjs[b], NULL); 12747ec681f3Smrg } 12757ec681f3Smrg } 12767ec681f3Smrg free(bo->deps); 12779f464c52Smaya 12789f464c52Smaya free(bo); 12799f464c52Smaya} 12809f464c52Smaya 12817ec681f3Smrgstatic void 12827ec681f3Smrgbo_free(struct iris_bo *bo) 12837ec681f3Smrg{ 12847ec681f3Smrg struct iris_bufmgr *bufmgr = bo->bufmgr; 12857ec681f3Smrg 12867ec681f3Smrg assert(iris_bo_is_real(bo)); 12877ec681f3Smrg 12887ec681f3Smrg if (!bo->real.userptr && bo->real.map) 12897ec681f3Smrg bo_unmap(bo); 12907ec681f3Smrg 12917ec681f3Smrg if (bo->idle) { 12927ec681f3Smrg bo_close(bo); 12937ec681f3Smrg } else { 12947ec681f3Smrg /* Defer closing the GEM BO and returning the VMA for reuse until the 12957ec681f3Smrg * BO is idle. Just move it to the dead list for now. 12967ec681f3Smrg */ 12977ec681f3Smrg list_addtail(&bo->head, &bufmgr->zombie_list); 12987ec681f3Smrg } 12997ec681f3Smrg} 13007ec681f3Smrg 13019f464c52Smaya/** Frees all cached buffers significantly older than @time. */ 13029f464c52Smayastatic void 13039f464c52Smayacleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time) 13049f464c52Smaya{ 13059f464c52Smaya int i; 13069f464c52Smaya 13079f464c52Smaya if (bufmgr->time == time) 13089f464c52Smaya return; 13099f464c52Smaya 13109f464c52Smaya for (i = 0; i < bufmgr->num_buckets; i++) { 13119f464c52Smaya struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 13129f464c52Smaya 13139f464c52Smaya list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { 13147ec681f3Smrg if (time - bo->real.free_time <= 1) 13157ec681f3Smrg break; 13167ec681f3Smrg 13177ec681f3Smrg list_del(&bo->head); 13187ec681f3Smrg 13197ec681f3Smrg bo_free(bo); 13207ec681f3Smrg } 13217ec681f3Smrg } 13227ec681f3Smrg 13237ec681f3Smrg for (i = 0; i < bufmgr->num_local_buckets; i++) { 13247ec681f3Smrg struct bo_cache_bucket *bucket = &bufmgr->local_cache_bucket[i]; 13257ec681f3Smrg 13267ec681f3Smrg list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { 13277ec681f3Smrg if (time - bo->real.free_time <= 1) 13289f464c52Smaya break; 13299f464c52Smaya 13309f464c52Smaya list_del(&bo->head); 13319f464c52Smaya 13329f464c52Smaya bo_free(bo); 13339f464c52Smaya } 13349f464c52Smaya } 13359f464c52Smaya 13367ec681f3Smrg list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) { 13377ec681f3Smrg /* Stop once we reach a busy BO - all others past this point were 13387ec681f3Smrg * freed more recently so are likely also busy. 13397ec681f3Smrg */ 13407ec681f3Smrg if (!bo->idle && iris_bo_busy(bo)) 13417ec681f3Smrg break; 13427ec681f3Smrg 13437ec681f3Smrg list_del(&bo->head); 13447ec681f3Smrg bo_close(bo); 13457ec681f3Smrg } 13467ec681f3Smrg 13479f464c52Smaya bufmgr->time = time; 13489f464c52Smaya} 13499f464c52Smaya 13509f464c52Smayastatic void 13519f464c52Smayabo_unreference_final(struct iris_bo *bo, time_t time) 13529f464c52Smaya{ 13539f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 13549f464c52Smaya struct bo_cache_bucket *bucket; 13559f464c52Smaya 13569f464c52Smaya DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name); 13579f464c52Smaya 13587ec681f3Smrg assert(iris_bo_is_real(bo)); 13597ec681f3Smrg 13609f464c52Smaya bucket = NULL; 13617ec681f3Smrg if (bo->real.reusable) 13627ec681f3Smrg bucket = bucket_for_size(bufmgr, bo->size, bo->real.local); 13639f464c52Smaya /* Put the buffer into our internal cache for reuse if we can. */ 13649f464c52Smaya if (bucket && iris_bo_madvise(bo, I915_MADV_DONTNEED)) { 13657ec681f3Smrg bo->real.free_time = time; 13669f464c52Smaya bo->name = NULL; 13679f464c52Smaya 13689f464c52Smaya list_addtail(&bo->head, &bucket->head); 13699f464c52Smaya } else { 13709f464c52Smaya bo_free(bo); 13719f464c52Smaya } 13729f464c52Smaya} 13739f464c52Smaya 13749f464c52Smayavoid 13759f464c52Smayairis_bo_unreference(struct iris_bo *bo) 13769f464c52Smaya{ 13779f464c52Smaya if (bo == NULL) 13789f464c52Smaya return; 13799f464c52Smaya 13809f464c52Smaya assert(p_atomic_read(&bo->refcount) > 0); 13819f464c52Smaya 13829f464c52Smaya if (atomic_add_unless(&bo->refcount, -1, 1)) { 13839f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 13849f464c52Smaya struct timespec time; 13859f464c52Smaya 13869f464c52Smaya clock_gettime(CLOCK_MONOTONIC, &time); 13879f464c52Smaya 13887ec681f3Smrg if (bo->gem_handle == 0) { 13897ec681f3Smrg pb_slab_free(get_slabs(bufmgr, bo->size), &bo->slab.entry); 13907ec681f3Smrg } else { 13917ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 13929f464c52Smaya 13937ec681f3Smrg if (p_atomic_dec_zero(&bo->refcount)) { 13947ec681f3Smrg bo_unreference_final(bo, time.tv_sec); 13957ec681f3Smrg cleanup_bo_cache(bufmgr, time.tv_sec); 13967ec681f3Smrg } 13979f464c52Smaya 13987ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 13997ec681f3Smrg } 14009f464c52Smaya } 14019f464c52Smaya} 14029f464c52Smaya 14039f464c52Smayastatic void 14049f464c52Smayabo_wait_with_stall_warning(struct pipe_debug_callback *dbg, 14059f464c52Smaya struct iris_bo *bo, 14069f464c52Smaya const char *action) 14079f464c52Smaya{ 14089f464c52Smaya bool busy = dbg && !bo->idle; 14099f464c52Smaya double elapsed = unlikely(busy) ? -get_time() : 0.0; 14109f464c52Smaya 14119f464c52Smaya iris_bo_wait_rendering(bo); 14129f464c52Smaya 14139f464c52Smaya if (unlikely(busy)) { 14149f464c52Smaya elapsed += get_time(); 14159f464c52Smaya if (elapsed > 1e-5) /* 0.01ms */ { 14169f464c52Smaya perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n", 14179f464c52Smaya action, bo->name, elapsed * 1000); 14189f464c52Smaya } 14199f464c52Smaya } 14209f464c52Smaya} 14219f464c52Smaya 14229f464c52Smayastatic void 14239f464c52Smayaprint_flags(unsigned flags) 14249f464c52Smaya{ 14259f464c52Smaya if (flags & MAP_READ) 14269f464c52Smaya DBG("READ "); 14279f464c52Smaya if (flags & MAP_WRITE) 14289f464c52Smaya DBG("WRITE "); 14299f464c52Smaya if (flags & MAP_ASYNC) 14309f464c52Smaya DBG("ASYNC "); 14319f464c52Smaya if (flags & MAP_PERSISTENT) 14329f464c52Smaya DBG("PERSISTENT "); 14339f464c52Smaya if (flags & MAP_COHERENT) 14349f464c52Smaya DBG("COHERENT "); 14359f464c52Smaya if (flags & MAP_RAW) 14369f464c52Smaya DBG("RAW "); 14379f464c52Smaya DBG("\n"); 14389f464c52Smaya} 14399f464c52Smaya 14409f464c52Smayastatic void * 14417ec681f3Smrgiris_bo_gem_mmap_legacy(struct pipe_debug_callback *dbg, struct iris_bo *bo) 14429f464c52Smaya{ 14439f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 14449f464c52Smaya 14457ec681f3Smrg assert(bufmgr->vram.size == 0); 14467ec681f3Smrg assert(iris_bo_is_real(bo)); 14477ec681f3Smrg assert(bo->real.mmap_mode == IRIS_MMAP_WB || 14487ec681f3Smrg bo->real.mmap_mode == IRIS_MMAP_WC); 14499f464c52Smaya 14507ec681f3Smrg struct drm_i915_gem_mmap mmap_arg = { 14517ec681f3Smrg .handle = bo->gem_handle, 14527ec681f3Smrg .size = bo->size, 14537ec681f3Smrg .flags = bo->real.mmap_mode == IRIS_MMAP_WC ? I915_MMAP_WC : 0, 14547ec681f3Smrg }; 14559f464c52Smaya 14567ec681f3Smrg int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); 14577ec681f3Smrg if (ret != 0) { 14587ec681f3Smrg DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 14597ec681f3Smrg __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 14607ec681f3Smrg return NULL; 14619f464c52Smaya } 14627ec681f3Smrg void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; 14639f464c52Smaya 14647ec681f3Smrg return map; 14659f464c52Smaya} 14669f464c52Smaya 14679f464c52Smayastatic void * 14687ec681f3Smrgiris_bo_gem_mmap_offset(struct pipe_debug_callback *dbg, struct iris_bo *bo) 14699f464c52Smaya{ 14709f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 14719f464c52Smaya 14727ec681f3Smrg assert(iris_bo_is_real(bo)); 14739f464c52Smaya 14747ec681f3Smrg struct drm_i915_gem_mmap_offset mmap_arg = { 14757ec681f3Smrg .handle = bo->gem_handle, 14767ec681f3Smrg }; 14779f464c52Smaya 14787ec681f3Smrg if (bufmgr->has_local_mem) { 14797ec681f3Smrg /* On discrete memory platforms, we cannot control the mmap caching mode 14807ec681f3Smrg * at mmap time. Instead, it's fixed when the object is created (this 14817ec681f3Smrg * is a limitation of TTM). 14827ec681f3Smrg * 14837ec681f3Smrg * On DG1, our only currently enabled discrete platform, there is no 14847ec681f3Smrg * control over what mode we get. For SMEM, we always get WB because 14857ec681f3Smrg * it's fast (probably what we want) and when the device views SMEM 14867ec681f3Smrg * across PCIe, it's always snooped. The only caching mode allowed by 14877ec681f3Smrg * DG1 hardware for LMEM is WC. 14889f464c52Smaya */ 14897ec681f3Smrg if (bo->real.local) 14907ec681f3Smrg assert(bo->real.mmap_mode == IRIS_MMAP_WC); 14917ec681f3Smrg else 14927ec681f3Smrg assert(bo->real.mmap_mode == IRIS_MMAP_WB); 14939f464c52Smaya 14947ec681f3Smrg mmap_arg.flags = I915_MMAP_OFFSET_FIXED; 14957ec681f3Smrg } else { 14967ec681f3Smrg /* Only integrated platforms get to select a mmap caching mode here */ 14977ec681f3Smrg static const uint32_t mmap_offset_for_mode[] = { 14987ec681f3Smrg [IRIS_MMAP_UC] = I915_MMAP_OFFSET_UC, 14997ec681f3Smrg [IRIS_MMAP_WC] = I915_MMAP_OFFSET_WC, 15007ec681f3Smrg [IRIS_MMAP_WB] = I915_MMAP_OFFSET_WB, 15017ec681f3Smrg }; 15027ec681f3Smrg assert(bo->real.mmap_mode != IRIS_MMAP_NONE); 15037ec681f3Smrg assert(bo->real.mmap_mode < ARRAY_SIZE(mmap_offset_for_mode)); 15047ec681f3Smrg mmap_arg.flags = mmap_offset_for_mode[bo->real.mmap_mode]; 15059f464c52Smaya } 15069f464c52Smaya 15077ec681f3Smrg /* Get the fake offset back */ 15087ec681f3Smrg int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg); 15097ec681f3Smrg if (ret != 0) { 15107ec681f3Smrg DBG("%s:%d: Error preparing buffer %d (%s): %s .\n", 15117ec681f3Smrg __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 15127ec681f3Smrg return NULL; 15139f464c52Smaya } 15149f464c52Smaya 15157ec681f3Smrg /* And map it */ 15167ec681f3Smrg void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, 15177ec681f3Smrg bufmgr->fd, mmap_arg.offset); 15187ec681f3Smrg if (map == MAP_FAILED) { 15197ec681f3Smrg DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 15207ec681f3Smrg __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 15217ec681f3Smrg return NULL; 15227ec681f3Smrg } 15239f464c52Smaya 15247ec681f3Smrg return map; 15259f464c52Smaya} 15269f464c52Smaya 15279f464c52Smayavoid * 15289f464c52Smayairis_bo_map(struct pipe_debug_callback *dbg, 15299f464c52Smaya struct iris_bo *bo, unsigned flags) 15309f464c52Smaya{ 15317ec681f3Smrg struct iris_bufmgr *bufmgr = bo->bufmgr; 15327ec681f3Smrg void *map = NULL; 15339f464c52Smaya 15347ec681f3Smrg if (bo->gem_handle == 0) { 15357ec681f3Smrg struct iris_bo *real = iris_get_backing_bo(bo); 15367ec681f3Smrg uint64_t offset = bo->address - real->address; 15377ec681f3Smrg map = iris_bo_map(dbg, real, flags | MAP_ASYNC) + offset; 15387ec681f3Smrg } else { 15397ec681f3Smrg assert(bo->real.mmap_mode != IRIS_MMAP_NONE); 15407ec681f3Smrg if (bo->real.mmap_mode == IRIS_MMAP_NONE) 15417ec681f3Smrg return NULL; 15429f464c52Smaya 15437ec681f3Smrg if (!bo->real.map) { 15447ec681f3Smrg DBG("iris_bo_map: %d (%s)\n", bo->gem_handle, bo->name); 15457ec681f3Smrg map = bufmgr->has_mmap_offset ? iris_bo_gem_mmap_offset(dbg, bo) 15467ec681f3Smrg : iris_bo_gem_mmap_legacy(dbg, bo); 15477ec681f3Smrg if (!map) { 15487ec681f3Smrg return NULL; 15497ec681f3Smrg } 15509f464c52Smaya 15517ec681f3Smrg VG_DEFINED(map, bo->size); 15527ec681f3Smrg 15537ec681f3Smrg if (p_atomic_cmpxchg(&bo->real.map, NULL, map)) { 15547ec681f3Smrg VG_NOACCESS(map, bo->size); 15557ec681f3Smrg os_munmap(map, bo->size); 15567ec681f3Smrg } 15577ec681f3Smrg } 15587ec681f3Smrg assert(bo->real.map); 15597ec681f3Smrg map = bo->real.map; 15607ec681f3Smrg } 15617ec681f3Smrg 15627ec681f3Smrg DBG("iris_bo_map: %d (%s) -> %p\n", 15637ec681f3Smrg bo->gem_handle, bo->name, bo->real.map); 15647ec681f3Smrg print_flags(flags); 15657ec681f3Smrg 15667ec681f3Smrg if (!(flags & MAP_ASYNC)) { 15677ec681f3Smrg bo_wait_with_stall_warning(dbg, bo, "memory mapping"); 15689f464c52Smaya } 15699f464c52Smaya 15709f464c52Smaya return map; 15719f464c52Smaya} 15729f464c52Smaya 15739f464c52Smaya/** Waits for all GPU rendering with the object to have completed. */ 15749f464c52Smayavoid 15759f464c52Smayairis_bo_wait_rendering(struct iris_bo *bo) 15769f464c52Smaya{ 15779f464c52Smaya /* We require a kernel recent enough for WAIT_IOCTL support. 15789f464c52Smaya * See intel_init_bufmgr() 15799f464c52Smaya */ 15809f464c52Smaya iris_bo_wait(bo, -1); 15819f464c52Smaya} 15829f464c52Smaya 15837ec681f3Smrgstatic int 15847ec681f3Smrgiris_bo_wait_gem(struct iris_bo *bo, int64_t timeout_ns) 15857ec681f3Smrg{ 15867ec681f3Smrg assert(iris_bo_is_real(bo)); 15877ec681f3Smrg 15887ec681f3Smrg struct iris_bufmgr *bufmgr = bo->bufmgr; 15897ec681f3Smrg struct drm_i915_gem_wait wait = { 15907ec681f3Smrg .bo_handle = bo->gem_handle, 15917ec681f3Smrg .timeout_ns = timeout_ns, 15927ec681f3Smrg }; 15937ec681f3Smrg 15947ec681f3Smrg int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 15957ec681f3Smrg if (ret != 0) 15967ec681f3Smrg return -errno; 15977ec681f3Smrg 15987ec681f3Smrg return 0; 15997ec681f3Smrg} 16007ec681f3Smrg 16019f464c52Smaya/** 16029f464c52Smaya * Waits on a BO for the given amount of time. 16039f464c52Smaya * 16049f464c52Smaya * @bo: buffer object to wait for 16059f464c52Smaya * @timeout_ns: amount of time to wait in nanoseconds. 16069f464c52Smaya * If value is less than 0, an infinite wait will occur. 16079f464c52Smaya * 16089f464c52Smaya * Returns 0 if the wait was successful ie. the last batch referencing the 16099f464c52Smaya * object has completed within the allotted time. Otherwise some negative return 16109f464c52Smaya * value describes the error. Of particular interest is -ETIME when the wait has 16119f464c52Smaya * failed to yield the desired result. 16129f464c52Smaya * 16139f464c52Smaya * Similar to iris_bo_wait_rendering except a timeout parameter allows 16149f464c52Smaya * the operation to give up after a certain amount of time. Another subtle 16159f464c52Smaya * difference is the internal locking semantics are different (this variant does 16169f464c52Smaya * not hold the lock for the duration of the wait). This makes the wait subject 16179f464c52Smaya * to a larger userspace race window. 16189f464c52Smaya * 16199f464c52Smaya * The implementation shall wait until the object is no longer actively 16209f464c52Smaya * referenced within a batch buffer at the time of the call. The wait will 16219f464c52Smaya * not guarantee that the buffer is re-issued via another thread, or an flinked 16229f464c52Smaya * handle. Userspace must make sure this race does not occur if such precision 16239f464c52Smaya * is important. 16249f464c52Smaya * 16257ec681f3Smrg * Note that some kernels have broken the infinite wait for negative values 16269f464c52Smaya * promise, upgrade to latest stable kernels if this is the case. 16279f464c52Smaya */ 16289f464c52Smayaint 16299f464c52Smayairis_bo_wait(struct iris_bo *bo, int64_t timeout_ns) 16309f464c52Smaya{ 16317ec681f3Smrg int ret; 16329f464c52Smaya 16337ec681f3Smrg if (iris_bo_is_external(bo)) 16347ec681f3Smrg ret = iris_bo_wait_gem(bo, timeout_ns); 16357ec681f3Smrg else 16367ec681f3Smrg ret = iris_bo_wait_syncobj(bo, timeout_ns); 16379f464c52Smaya 16389f464c52Smaya if (ret != 0) 16399f464c52Smaya return -errno; 16409f464c52Smaya 16419f464c52Smaya bo->idle = true; 16429f464c52Smaya 16439f464c52Smaya return ret; 16449f464c52Smaya} 16459f464c52Smaya 16467ec681f3Smrgstatic void 16479f464c52Smayairis_bufmgr_destroy(struct iris_bufmgr *bufmgr) 16489f464c52Smaya{ 16497ec681f3Smrg /* Free aux-map buffers */ 16507ec681f3Smrg intel_aux_map_finish(bufmgr->aux_map_ctx); 16517ec681f3Smrg 16527ec681f3Smrg /* bufmgr will no longer try to free VMA entries in the aux-map */ 16537ec681f3Smrg bufmgr->aux_map_ctx = NULL; 16547ec681f3Smrg 16557ec681f3Smrg for (int i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 16567ec681f3Smrg if (bufmgr->bo_slabs[i].groups) 16577ec681f3Smrg pb_slabs_deinit(&bufmgr->bo_slabs[i]); 16587ec681f3Smrg } 16597ec681f3Smrg 16607ec681f3Smrg simple_mtx_destroy(&bufmgr->lock); 16617ec681f3Smrg simple_mtx_destroy(&bufmgr->bo_deps_lock); 16629f464c52Smaya 16639f464c52Smaya /* Free any cached buffer objects we were going to reuse */ 16649f464c52Smaya for (int i = 0; i < bufmgr->num_buckets; i++) { 16659f464c52Smaya struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 16669f464c52Smaya 16679f464c52Smaya list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { 16689f464c52Smaya list_del(&bo->head); 16699f464c52Smaya 16709f464c52Smaya bo_free(bo); 16719f464c52Smaya } 16729f464c52Smaya } 16739f464c52Smaya 16747ec681f3Smrg for (int i = 0; i < bufmgr->num_local_buckets; i++) { 16757ec681f3Smrg struct bo_cache_bucket *bucket = &bufmgr->local_cache_bucket[i]; 16767ec681f3Smrg 16777ec681f3Smrg list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { 16787ec681f3Smrg list_del(&bo->head); 16797ec681f3Smrg 16807ec681f3Smrg bo_free(bo); 16817ec681f3Smrg } 16827ec681f3Smrg } 16837ec681f3Smrg 16847ec681f3Smrg /* Close any buffer objects on the dead list. */ 16857ec681f3Smrg list_for_each_entry_safe(struct iris_bo, bo, &bufmgr->zombie_list, head) { 16867ec681f3Smrg list_del(&bo->head); 16877ec681f3Smrg bo_close(bo); 16887ec681f3Smrg } 16897ec681f3Smrg 16909f464c52Smaya _mesa_hash_table_destroy(bufmgr->name_table, NULL); 16919f464c52Smaya _mesa_hash_table_destroy(bufmgr->handle_table, NULL); 16929f464c52Smaya 16939f464c52Smaya for (int z = 0; z < IRIS_MEMZONE_COUNT; z++) { 16949f464c52Smaya if (z != IRIS_MEMZONE_BINDER) 16959f464c52Smaya util_vma_heap_finish(&bufmgr->vma_allocator[z]); 16969f464c52Smaya } 16979f464c52Smaya 16987ec681f3Smrg close(bufmgr->fd); 16997ec681f3Smrg 17009f464c52Smaya free(bufmgr); 17019f464c52Smaya} 17029f464c52Smaya 17037ec681f3Smrgint 17047ec681f3Smrgiris_gem_get_tiling(struct iris_bo *bo, uint32_t *tiling) 17059f464c52Smaya{ 17069f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 17079f464c52Smaya 17087ec681f3Smrg if (!bufmgr->has_tiling_uapi) { 17097ec681f3Smrg *tiling = I915_TILING_NONE; 17109f464c52Smaya return 0; 17117ec681f3Smrg } 17129f464c52Smaya 17137ec681f3Smrg struct drm_i915_gem_get_tiling ti = { .handle = bo->gem_handle }; 17147ec681f3Smrg int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &ti); 17159f464c52Smaya 17167ec681f3Smrg if (ret) { 17177ec681f3Smrg DBG("gem_get_tiling failed for BO %u: %s\n", 17187ec681f3Smrg bo->gem_handle, strerror(errno)); 17197ec681f3Smrg } 17209f464c52Smaya 17217ec681f3Smrg *tiling = ti.tiling_mode; 17227ec681f3Smrg 17237ec681f3Smrg return ret; 17249f464c52Smaya} 17259f464c52Smaya 17269f464c52Smayaint 17277ec681f3Smrgiris_gem_set_tiling(struct iris_bo *bo, const struct isl_surf *surf) 17289f464c52Smaya{ 17297ec681f3Smrg struct iris_bufmgr *bufmgr = bo->bufmgr; 17307ec681f3Smrg uint32_t tiling_mode = isl_tiling_to_i915_tiling(surf->tiling); 17317ec681f3Smrg int ret; 17327ec681f3Smrg 17337ec681f3Smrg /* If we can't do map_gtt, the set/get_tiling API isn't useful. And it's 17347ec681f3Smrg * actually not supported by the kernel in those cases. 17357ec681f3Smrg */ 17367ec681f3Smrg if (!bufmgr->has_tiling_uapi) 17377ec681f3Smrg return 0; 17387ec681f3Smrg 17397ec681f3Smrg /* GEM_SET_TILING is slightly broken and overwrites the input on the 17407ec681f3Smrg * error path, so we have to open code intel_ioctl(). 17417ec681f3Smrg */ 17427ec681f3Smrg do { 17437ec681f3Smrg struct drm_i915_gem_set_tiling set_tiling = { 17447ec681f3Smrg .handle = bo->gem_handle, 17457ec681f3Smrg .tiling_mode = tiling_mode, 17467ec681f3Smrg .stride = surf->row_pitch_B, 17477ec681f3Smrg }; 17487ec681f3Smrg ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); 17497ec681f3Smrg } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 17507ec681f3Smrg 17517ec681f3Smrg if (ret) { 17527ec681f3Smrg DBG("gem_set_tiling failed for BO %u: %s\n", 17537ec681f3Smrg bo->gem_handle, strerror(errno)); 17547ec681f3Smrg } 17557ec681f3Smrg 17567ec681f3Smrg return ret; 17579f464c52Smaya} 17589f464c52Smaya 17599f464c52Smayastruct iris_bo * 17609f464c52Smayairis_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd) 17619f464c52Smaya{ 17629f464c52Smaya uint32_t handle; 17639f464c52Smaya struct iris_bo *bo; 17649f464c52Smaya 17657ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 17669f464c52Smaya int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle); 17679f464c52Smaya if (ret) { 17689f464c52Smaya DBG("import_dmabuf: failed to obtain handle from fd: %s\n", 17699f464c52Smaya strerror(errno)); 17707ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 17719f464c52Smaya return NULL; 17729f464c52Smaya } 17739f464c52Smaya 17749f464c52Smaya /* 17759f464c52Smaya * See if the kernel has already returned this buffer to us. Just as 17769f464c52Smaya * for named buffers, we must not create two bo's pointing at the same 17779f464c52Smaya * kernel object 17789f464c52Smaya */ 17797ec681f3Smrg bo = find_and_ref_external_bo(bufmgr->handle_table, handle); 17807ec681f3Smrg if (bo) 17819f464c52Smaya goto out; 17829f464c52Smaya 17839f464c52Smaya bo = bo_calloc(); 17849f464c52Smaya if (!bo) 17859f464c52Smaya goto out; 17869f464c52Smaya 17879f464c52Smaya p_atomic_set(&bo->refcount, 1); 17889f464c52Smaya 17899f464c52Smaya /* Determine size of bo. The fd-to-handle ioctl really should 17909f464c52Smaya * return the size, but it doesn't. If we have kernel 3.12 or 17919f464c52Smaya * later, we can lseek on the prime fd to get the size. Older 17929f464c52Smaya * kernels will just fail, in which case we fall back to the 17939f464c52Smaya * provided (estimated or guess size). */ 17949f464c52Smaya ret = lseek(prime_fd, 0, SEEK_END); 17959f464c52Smaya if (ret != -1) 17969f464c52Smaya bo->size = ret; 17979f464c52Smaya 17989f464c52Smaya bo->bufmgr = bufmgr; 17999f464c52Smaya bo->name = "prime"; 18007ec681f3Smrg bo->real.reusable = false; 18017ec681f3Smrg bo->real.imported = true; 18027ec681f3Smrg bo->real.mmap_mode = IRIS_MMAP_NONE; 18037ec681f3Smrg bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; 18049f464c52Smaya 18057ec681f3Smrg /* From the Bspec, Memory Compression - Gfx12: 18067ec681f3Smrg * 18077ec681f3Smrg * The base address for the surface has to be 64K page aligned and the 18087ec681f3Smrg * surface is expected to be padded in the virtual domain to be 4 4K 18097ec681f3Smrg * pages. 18107ec681f3Smrg * 18117ec681f3Smrg * The dmabuf may contain a compressed surface. Align the BO to 64KB just 18127ec681f3Smrg * in case. We always align to 64KB even on platforms where we don't need 18137ec681f3Smrg * to, because it's a fairly reasonable thing to do anyway. 18147ec681f3Smrg */ 18157ec681f3Smrg bo->address = 18167ec681f3Smrg vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024); 18179f464c52Smaya 18187ec681f3Smrg bo->gem_handle = handle; 18197ec681f3Smrg _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 18209f464c52Smaya 18219f464c52Smayaout: 18227ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 18239f464c52Smaya return bo; 18249f464c52Smaya} 18259f464c52Smaya 18269f464c52Smayastatic void 18277ec681f3Smrgiris_bo_mark_exported_locked(struct iris_bo *bo) 18289f464c52Smaya{ 18297ec681f3Smrg /* We cannot export suballocated BOs. */ 18307ec681f3Smrg assert(iris_bo_is_real(bo)); 18317ec681f3Smrg 18327ec681f3Smrg if (!iris_bo_is_external(bo)) 18339f464c52Smaya _mesa_hash_table_insert(bo->bufmgr->handle_table, &bo->gem_handle, bo); 18347ec681f3Smrg 18357ec681f3Smrg if (!bo->real.exported) { 18367ec681f3Smrg /* If a BO is going to be used externally, it could be sent to the 18377ec681f3Smrg * display HW. So make sure our CPU mappings don't assume cache 18387ec681f3Smrg * coherency since display is outside that cache. 18397ec681f3Smrg */ 18407ec681f3Smrg bo->real.exported = true; 18417ec681f3Smrg bo->real.reusable = false; 18429f464c52Smaya } 18439f464c52Smaya} 18449f464c52Smaya 18457ec681f3Smrgvoid 18467ec681f3Smrgiris_bo_mark_exported(struct iris_bo *bo) 18479f464c52Smaya{ 18489f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 18499f464c52Smaya 18507ec681f3Smrg /* We cannot export suballocated BOs. */ 18517ec681f3Smrg assert(iris_bo_is_real(bo)); 18527ec681f3Smrg 18537ec681f3Smrg if (bo->real.exported) { 18547ec681f3Smrg assert(!bo->real.reusable); 18559f464c52Smaya return; 18567ec681f3Smrg } 18579f464c52Smaya 18587ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 18597ec681f3Smrg iris_bo_mark_exported_locked(bo); 18607ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 18619f464c52Smaya} 18629f464c52Smaya 18639f464c52Smayaint 18649f464c52Smayairis_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd) 18659f464c52Smaya{ 18669f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 18679f464c52Smaya 18687ec681f3Smrg /* We cannot export suballocated BOs. */ 18697ec681f3Smrg assert(iris_bo_is_real(bo)); 18707ec681f3Smrg 18717ec681f3Smrg iris_bo_mark_exported(bo); 18729f464c52Smaya 18739f464c52Smaya if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, 18747ec681f3Smrg DRM_CLOEXEC | DRM_RDWR, prime_fd) != 0) 18759f464c52Smaya return -errno; 18769f464c52Smaya 18779f464c52Smaya return 0; 18789f464c52Smaya} 18799f464c52Smaya 18809f464c52Smayauint32_t 18819f464c52Smayairis_bo_export_gem_handle(struct iris_bo *bo) 18829f464c52Smaya{ 18837ec681f3Smrg /* We cannot export suballocated BOs. */ 18847ec681f3Smrg assert(iris_bo_is_real(bo)); 18857ec681f3Smrg 18867ec681f3Smrg iris_bo_mark_exported(bo); 18879f464c52Smaya 18889f464c52Smaya return bo->gem_handle; 18899f464c52Smaya} 18909f464c52Smaya 18919f464c52Smayaint 18929f464c52Smayairis_bo_flink(struct iris_bo *bo, uint32_t *name) 18939f464c52Smaya{ 18949f464c52Smaya struct iris_bufmgr *bufmgr = bo->bufmgr; 18959f464c52Smaya 18967ec681f3Smrg /* We cannot export suballocated BOs. */ 18977ec681f3Smrg assert(iris_bo_is_real(bo)); 18987ec681f3Smrg 18997ec681f3Smrg if (!bo->real.global_name) { 19009f464c52Smaya struct drm_gem_flink flink = { .handle = bo->gem_handle }; 19019f464c52Smaya 19027ec681f3Smrg if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) 19039f464c52Smaya return -errno; 19049f464c52Smaya 19057ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 19067ec681f3Smrg if (!bo->real.global_name) { 19077ec681f3Smrg iris_bo_mark_exported_locked(bo); 19087ec681f3Smrg bo->real.global_name = flink.name; 19097ec681f3Smrg _mesa_hash_table_insert(bufmgr->name_table, &bo->real.global_name, bo); 19109f464c52Smaya } 19117ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 19127ec681f3Smrg } 19137ec681f3Smrg 19147ec681f3Smrg *name = bo->real.global_name; 19157ec681f3Smrg return 0; 19167ec681f3Smrg} 19177ec681f3Smrg 19187ec681f3Smrgint 19197ec681f3Smrgiris_bo_export_gem_handle_for_device(struct iris_bo *bo, int drm_fd, 19207ec681f3Smrg uint32_t *out_handle) 19217ec681f3Smrg{ 19227ec681f3Smrg /* We cannot export suballocated BOs. */ 19237ec681f3Smrg assert(iris_bo_is_real(bo)); 19247ec681f3Smrg 19257ec681f3Smrg /* Only add the new GEM handle to the list of export if it belongs to a 19267ec681f3Smrg * different GEM device. Otherwise we might close the same buffer multiple 19277ec681f3Smrg * times. 19287ec681f3Smrg */ 19297ec681f3Smrg struct iris_bufmgr *bufmgr = bo->bufmgr; 19307ec681f3Smrg int ret = os_same_file_description(drm_fd, bufmgr->fd); 19317ec681f3Smrg WARN_ONCE(ret < 0, 19327ec681f3Smrg "Kernel has no file descriptor comparison support: %s\n", 19337ec681f3Smrg strerror(errno)); 19347ec681f3Smrg if (ret == 0) { 19357ec681f3Smrg *out_handle = iris_bo_export_gem_handle(bo); 19367ec681f3Smrg return 0; 19377ec681f3Smrg } 19387ec681f3Smrg 19397ec681f3Smrg struct bo_export *export = calloc(1, sizeof(*export)); 19407ec681f3Smrg if (!export) 19417ec681f3Smrg return -ENOMEM; 19427ec681f3Smrg 19437ec681f3Smrg export->drm_fd = drm_fd; 19447ec681f3Smrg 19457ec681f3Smrg int dmabuf_fd = -1; 19467ec681f3Smrg int err = iris_bo_export_dmabuf(bo, &dmabuf_fd); 19477ec681f3Smrg if (err) { 19487ec681f3Smrg free(export); 19497ec681f3Smrg return err; 19507ec681f3Smrg } 19519f464c52Smaya 19527ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 19537ec681f3Smrg err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle); 19547ec681f3Smrg close(dmabuf_fd); 19557ec681f3Smrg if (err) { 19567ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 19577ec681f3Smrg free(export); 19587ec681f3Smrg return err; 19599f464c52Smaya } 19609f464c52Smaya 19617ec681f3Smrg bool found = false; 19627ec681f3Smrg list_for_each_entry(struct bo_export, iter, &bo->real.exports, link) { 19637ec681f3Smrg if (iter->drm_fd != drm_fd) 19647ec681f3Smrg continue; 19657ec681f3Smrg /* Here we assume that for a given DRM fd, we'll always get back the 19667ec681f3Smrg * same GEM handle for a given buffer. 19677ec681f3Smrg */ 19687ec681f3Smrg assert(iter->gem_handle == export->gem_handle); 19697ec681f3Smrg free(export); 19707ec681f3Smrg export = iter; 19717ec681f3Smrg found = true; 19727ec681f3Smrg break; 19737ec681f3Smrg } 19747ec681f3Smrg if (!found) 19757ec681f3Smrg list_addtail(&export->link, &bo->real.exports); 19767ec681f3Smrg 19777ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 19787ec681f3Smrg 19797ec681f3Smrg *out_handle = export->gem_handle; 19807ec681f3Smrg 19819f464c52Smaya return 0; 19829f464c52Smaya} 19839f464c52Smaya 19849f464c52Smayastatic void 19857ec681f3Smrgadd_bucket(struct iris_bufmgr *bufmgr, int size, bool local) 19869f464c52Smaya{ 19877ec681f3Smrg unsigned int i = local ? 19887ec681f3Smrg bufmgr->num_local_buckets : bufmgr->num_buckets; 19897ec681f3Smrg 19907ec681f3Smrg struct bo_cache_bucket *buckets = local ? 19917ec681f3Smrg bufmgr->local_cache_bucket : bufmgr->cache_bucket; 19929f464c52Smaya 19939f464c52Smaya assert(i < ARRAY_SIZE(bufmgr->cache_bucket)); 19949f464c52Smaya 19957ec681f3Smrg list_inithead(&buckets[i].head); 19967ec681f3Smrg buckets[i].size = size; 19977ec681f3Smrg 19987ec681f3Smrg if (local) 19997ec681f3Smrg bufmgr->num_local_buckets++; 20007ec681f3Smrg else 20017ec681f3Smrg bufmgr->num_buckets++; 20029f464c52Smaya 20037ec681f3Smrg assert(bucket_for_size(bufmgr, size, local) == &buckets[i]); 20047ec681f3Smrg assert(bucket_for_size(bufmgr, size - 2048, local) == &buckets[i]); 20057ec681f3Smrg assert(bucket_for_size(bufmgr, size + 1, local) != &buckets[i]); 20069f464c52Smaya} 20079f464c52Smaya 20089f464c52Smayastatic void 20097ec681f3Smrginit_cache_buckets(struct iris_bufmgr *bufmgr, bool local) 20109f464c52Smaya{ 20119f464c52Smaya uint64_t size, cache_max_size = 64 * 1024 * 1024; 20129f464c52Smaya 20139f464c52Smaya /* OK, so power of two buckets was too wasteful of memory. 20149f464c52Smaya * Give 3 other sizes between each power of two, to hopefully 20159f464c52Smaya * cover things accurately enough. (The alternative is 20169f464c52Smaya * probably to just go for exact matching of sizes, and assume 20179f464c52Smaya * that for things like composited window resize the tiled 20189f464c52Smaya * width/height alignment and rounding of sizes to pages will 20199f464c52Smaya * get us useful cache hit rates anyway) 20209f464c52Smaya */ 20217ec681f3Smrg add_bucket(bufmgr, PAGE_SIZE, local); 20227ec681f3Smrg add_bucket(bufmgr, PAGE_SIZE * 2, local); 20237ec681f3Smrg add_bucket(bufmgr, PAGE_SIZE * 3, local); 20249f464c52Smaya 20259f464c52Smaya /* Initialize the linked lists for BO reuse cache. */ 20269f464c52Smaya for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) { 20277ec681f3Smrg add_bucket(bufmgr, size, local); 20289f464c52Smaya 20297ec681f3Smrg add_bucket(bufmgr, size + size * 1 / 4, local); 20307ec681f3Smrg add_bucket(bufmgr, size + size * 2 / 4, local); 20317ec681f3Smrg add_bucket(bufmgr, size + size * 3 / 4, local); 20329f464c52Smaya } 20339f464c52Smaya} 20349f464c52Smaya 20359f464c52Smayauint32_t 20369f464c52Smayairis_create_hw_context(struct iris_bufmgr *bufmgr) 20379f464c52Smaya{ 20389f464c52Smaya struct drm_i915_gem_context_create create = { }; 20397ec681f3Smrg int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 20409f464c52Smaya if (ret != 0) { 20419f464c52Smaya DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno)); 20429f464c52Smaya return 0; 20439f464c52Smaya } 20449f464c52Smaya 20457ec681f3Smrg /* Upon declaring a GPU hang, the kernel will zap the guilty context 20467ec681f3Smrg * back to the default logical HW state and attempt to continue on to 20477ec681f3Smrg * our next submitted batchbuffer. However, our render batches assume 20487ec681f3Smrg * the previous GPU state is preserved, and only emit commands needed 20497ec681f3Smrg * to incrementally change that state. In particular, we inherit the 20507ec681f3Smrg * STATE_BASE_ADDRESS and PIPELINE_SELECT settings, which are critical. 20517ec681f3Smrg * With default base addresses, our next batches will almost certainly 20527ec681f3Smrg * cause more GPU hangs, leading to repeated hangs until we're banned 20537ec681f3Smrg * or the machine is dead. 20547ec681f3Smrg * 20557ec681f3Smrg * Here we tell the kernel not to attempt to recover our context but 20567ec681f3Smrg * immediately (on the next batchbuffer submission) report that the 20577ec681f3Smrg * context is lost, and we will do the recovery ourselves. Ideally, 20587ec681f3Smrg * we'll have two lost batches instead of a continual stream of hangs. 20597ec681f3Smrg */ 20607ec681f3Smrg struct drm_i915_gem_context_param p = { 20617ec681f3Smrg .ctx_id = create.ctx_id, 20627ec681f3Smrg .param = I915_CONTEXT_PARAM_RECOVERABLE, 20637ec681f3Smrg .value = false, 20647ec681f3Smrg }; 20657ec681f3Smrg intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p); 20667ec681f3Smrg 20679f464c52Smaya return create.ctx_id; 20689f464c52Smaya} 20699f464c52Smaya 20707ec681f3Smrgstatic int 20717ec681f3Smrgiris_hw_context_get_priority(struct iris_bufmgr *bufmgr, uint32_t ctx_id) 20727ec681f3Smrg{ 20737ec681f3Smrg struct drm_i915_gem_context_param p = { 20747ec681f3Smrg .ctx_id = ctx_id, 20757ec681f3Smrg .param = I915_CONTEXT_PARAM_PRIORITY, 20767ec681f3Smrg }; 20777ec681f3Smrg intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p); 20787ec681f3Smrg return p.value; /* on error, return 0 i.e. default priority */ 20797ec681f3Smrg} 20807ec681f3Smrg 20819f464c52Smayaint 20829f464c52Smayairis_hw_context_set_priority(struct iris_bufmgr *bufmgr, 20839f464c52Smaya uint32_t ctx_id, 20849f464c52Smaya int priority) 20859f464c52Smaya{ 20869f464c52Smaya struct drm_i915_gem_context_param p = { 20879f464c52Smaya .ctx_id = ctx_id, 20889f464c52Smaya .param = I915_CONTEXT_PARAM_PRIORITY, 20899f464c52Smaya .value = priority, 20909f464c52Smaya }; 20919f464c52Smaya int err; 20929f464c52Smaya 20939f464c52Smaya err = 0; 20947ec681f3Smrg if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p)) 20959f464c52Smaya err = -errno; 20969f464c52Smaya 20979f464c52Smaya return err; 20989f464c52Smaya} 20999f464c52Smaya 21007ec681f3Smrguint32_t 21017ec681f3Smrgiris_clone_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id) 21027ec681f3Smrg{ 21037ec681f3Smrg uint32_t new_ctx = iris_create_hw_context(bufmgr); 21047ec681f3Smrg 21057ec681f3Smrg if (new_ctx) { 21067ec681f3Smrg int priority = iris_hw_context_get_priority(bufmgr, ctx_id); 21077ec681f3Smrg iris_hw_context_set_priority(bufmgr, new_ctx, priority); 21087ec681f3Smrg } 21097ec681f3Smrg 21107ec681f3Smrg return new_ctx; 21117ec681f3Smrg} 21127ec681f3Smrg 21139f464c52Smayavoid 21149f464c52Smayairis_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id) 21159f464c52Smaya{ 21169f464c52Smaya struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id }; 21179f464c52Smaya 21189f464c52Smaya if (ctx_id != 0 && 21197ec681f3Smrg intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) { 21209f464c52Smaya fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 21219f464c52Smaya strerror(errno)); 21229f464c52Smaya } 21239f464c52Smaya} 21249f464c52Smaya 21259f464c52Smayaint 21269f464c52Smayairis_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *result) 21279f464c52Smaya{ 21289f464c52Smaya struct drm_i915_reg_read reg_read = { .offset = offset }; 21297ec681f3Smrg int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, ®_read); 21309f464c52Smaya 21319f464c52Smaya *result = reg_read.val; 21329f464c52Smaya return ret; 21339f464c52Smaya} 21349f464c52Smaya 21359f464c52Smayastatic uint64_t 21369f464c52Smayairis_gtt_size(int fd) 21379f464c52Smaya{ 21389f464c52Smaya /* We use the default (already allocated) context to determine 21399f464c52Smaya * the default configuration of the virtual address space. 21409f464c52Smaya */ 21419f464c52Smaya struct drm_i915_gem_context_param p = { 21429f464c52Smaya .param = I915_CONTEXT_PARAM_GTT_SIZE, 21439f464c52Smaya }; 21447ec681f3Smrg if (!intel_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p)) 21459f464c52Smaya return p.value; 21469f464c52Smaya 21479f464c52Smaya return 0; 21489f464c52Smaya} 21499f464c52Smaya 21507ec681f3Smrgstatic struct intel_buffer * 21517ec681f3Smrgintel_aux_map_buffer_alloc(void *driver_ctx, uint32_t size) 21527ec681f3Smrg{ 21537ec681f3Smrg struct intel_buffer *buf = malloc(sizeof(struct intel_buffer)); 21547ec681f3Smrg if (!buf) 21557ec681f3Smrg return NULL; 21567ec681f3Smrg 21577ec681f3Smrg struct iris_bufmgr *bufmgr = (struct iris_bufmgr *)driver_ctx; 21587ec681f3Smrg 21597ec681f3Smrg bool local = bufmgr->vram.size > 0; 21607ec681f3Smrg unsigned int page_size = getpagesize(); 21617ec681f3Smrg size = MAX2(ALIGN(size, page_size), page_size); 21627ec681f3Smrg 21637ec681f3Smrg struct iris_bo *bo = alloc_fresh_bo(bufmgr, size, local); 21647ec681f3Smrg 21657ec681f3Smrg simple_mtx_lock(&bufmgr->lock); 21667ec681f3Smrg bo->address = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 64 * 1024); 21677ec681f3Smrg assert(bo->address != 0ull); 21687ec681f3Smrg simple_mtx_unlock(&bufmgr->lock); 21697ec681f3Smrg 21707ec681f3Smrg bo->name = "aux-map"; 21717ec681f3Smrg p_atomic_set(&bo->refcount, 1); 21727ec681f3Smrg bo->index = -1; 21737ec681f3Smrg bo->real.kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED | 21747ec681f3Smrg EXEC_OBJECT_CAPTURE; 21757ec681f3Smrg bo->real.mmap_mode = local ? IRIS_MMAP_WC : IRIS_MMAP_WB; 21767ec681f3Smrg 21777ec681f3Smrg buf->driver_bo = bo; 21787ec681f3Smrg buf->gpu = bo->address; 21797ec681f3Smrg buf->gpu_end = buf->gpu + bo->size; 21807ec681f3Smrg buf->map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); 21817ec681f3Smrg return buf; 21827ec681f3Smrg} 21837ec681f3Smrg 21847ec681f3Smrgstatic void 21857ec681f3Smrgintel_aux_map_buffer_free(void *driver_ctx, struct intel_buffer *buffer) 21867ec681f3Smrg{ 21877ec681f3Smrg iris_bo_unreference((struct iris_bo*)buffer->driver_bo); 21887ec681f3Smrg free(buffer); 21897ec681f3Smrg} 21907ec681f3Smrg 21917ec681f3Smrgstatic struct intel_mapped_pinned_buffer_alloc aux_map_allocator = { 21927ec681f3Smrg .alloc = intel_aux_map_buffer_alloc, 21937ec681f3Smrg .free = intel_aux_map_buffer_free, 21947ec681f3Smrg}; 21957ec681f3Smrg 21967ec681f3Smrgstatic int 21977ec681f3Smrggem_param(int fd, int name) 21987ec681f3Smrg{ 21997ec681f3Smrg int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */ 22007ec681f3Smrg 22017ec681f3Smrg struct drm_i915_getparam gp = { .param = name, .value = &v }; 22027ec681f3Smrg if (intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 22037ec681f3Smrg return -1; 22047ec681f3Smrg 22057ec681f3Smrg return v; 22067ec681f3Smrg} 22077ec681f3Smrg 22087ec681f3Smrgstatic bool 22097ec681f3Smrgiris_bufmgr_query_meminfo(struct iris_bufmgr *bufmgr) 22107ec681f3Smrg{ 22117ec681f3Smrg struct drm_i915_query_memory_regions *meminfo = 22127ec681f3Smrg intel_i915_query_alloc(bufmgr->fd, DRM_I915_QUERY_MEMORY_REGIONS); 22137ec681f3Smrg if (meminfo == NULL) 22147ec681f3Smrg return false; 22157ec681f3Smrg 22167ec681f3Smrg for (int i = 0; i < meminfo->num_regions; i++) { 22177ec681f3Smrg const struct drm_i915_memory_region_info *mem = &meminfo->regions[i]; 22187ec681f3Smrg switch (mem->region.memory_class) { 22197ec681f3Smrg case I915_MEMORY_CLASS_SYSTEM: 22207ec681f3Smrg bufmgr->sys.region = mem->region; 22217ec681f3Smrg bufmgr->sys.size = mem->probed_size; 22227ec681f3Smrg break; 22237ec681f3Smrg case I915_MEMORY_CLASS_DEVICE: 22247ec681f3Smrg bufmgr->vram.region = mem->region; 22257ec681f3Smrg bufmgr->vram.size = mem->probed_size; 22267ec681f3Smrg break; 22277ec681f3Smrg default: 22287ec681f3Smrg break; 22297ec681f3Smrg } 22307ec681f3Smrg } 22317ec681f3Smrg 22327ec681f3Smrg free(meminfo); 22337ec681f3Smrg 22347ec681f3Smrg return true; 22357ec681f3Smrg} 22367ec681f3Smrg 22379f464c52Smaya/** 22389f464c52Smaya * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 22399f464c52Smaya * and manage map buffer objections. 22409f464c52Smaya * 22419f464c52Smaya * \param fd File descriptor of the opened DRM device. 22429f464c52Smaya */ 22437ec681f3Smrgstatic struct iris_bufmgr * 22447ec681f3Smrgiris_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse) 22459f464c52Smaya{ 22469f464c52Smaya uint64_t gtt_size = iris_gtt_size(fd); 22479f464c52Smaya if (gtt_size <= IRIS_MEMZONE_OTHER_START) 22489f464c52Smaya return NULL; 22499f464c52Smaya 22509f464c52Smaya struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr)); 22519f464c52Smaya if (bufmgr == NULL) 22529f464c52Smaya return NULL; 22539f464c52Smaya 22549f464c52Smaya /* Handles to buffer objects belong to the device fd and are not 22559f464c52Smaya * reference counted by the kernel. If the same fd is used by 22569f464c52Smaya * multiple parties (threads sharing the same screen bufmgr, or 22579f464c52Smaya * even worse the same device fd passed to multiple libraries) 22589f464c52Smaya * ownership of those handles is shared by those independent parties. 22599f464c52Smaya * 22609f464c52Smaya * Don't do this! Ensure that each library/bufmgr has its own device 22619f464c52Smaya * fd so that its namespace does not clash with another. 22629f464c52Smaya */ 22637ec681f3Smrg bufmgr->fd = os_dupfd_cloexec(fd); 22649f464c52Smaya 22657ec681f3Smrg p_atomic_set(&bufmgr->refcount, 1); 22667ec681f3Smrg 22677ec681f3Smrg simple_mtx_init(&bufmgr->lock, mtx_plain); 22687ec681f3Smrg simple_mtx_init(&bufmgr->bo_deps_lock, mtx_plain); 22697ec681f3Smrg 22707ec681f3Smrg list_inithead(&bufmgr->zombie_list); 22719f464c52Smaya 22729f464c52Smaya bufmgr->has_llc = devinfo->has_llc; 22737ec681f3Smrg bufmgr->has_local_mem = devinfo->has_local_mem; 22747ec681f3Smrg bufmgr->has_tiling_uapi = devinfo->has_tiling_uapi; 22757ec681f3Smrg bufmgr->bo_reuse = bo_reuse; 22767ec681f3Smrg bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4; 22777ec681f3Smrg bufmgr->has_userptr_probe = 22787ec681f3Smrg gem_param(fd, I915_PARAM_HAS_USERPTR_PROBE) >= 1; 22797ec681f3Smrg iris_bufmgr_query_meminfo(bufmgr); 22809f464c52Smaya 22819f464c52Smaya STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull); 22829f464c52Smaya const uint64_t _4GB = 1ull << 32; 22837ec681f3Smrg const uint64_t _2GB = 1ul << 31; 22849f464c52Smaya 22859f464c52Smaya /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */ 22869f464c52Smaya const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE; 22879f464c52Smaya 22889f464c52Smaya util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER], 22899f464c52Smaya PAGE_SIZE, _4GB_minus_1 - PAGE_SIZE); 22907ec681f3Smrg util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_BINDLESS], 22917ec681f3Smrg IRIS_MEMZONE_BINDLESS_START, IRIS_BINDLESS_SIZE); 22929f464c52Smaya util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE], 22939f464c52Smaya IRIS_MEMZONE_SURFACE_START, 22947ec681f3Smrg _4GB_minus_1 - IRIS_MAX_BINDERS * IRIS_BINDER_SIZE - 22957ec681f3Smrg IRIS_BINDLESS_SIZE); 22967ec681f3Smrg /* TODO: Why does limiting to 2GB help some state items on gfx12? 22977ec681f3Smrg * - CC Viewport Pointer 22987ec681f3Smrg * - Blend State Pointer 22997ec681f3Smrg * - Color Calc State Pointer 23007ec681f3Smrg */ 23017ec681f3Smrg const uint64_t dynamic_pool_size = 23027ec681f3Smrg (devinfo->ver >= 12 ? _2GB : _4GB_minus_1) - IRIS_BORDER_COLOR_POOL_SIZE; 23039f464c52Smaya util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_DYNAMIC], 23049f464c52Smaya IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE, 23057ec681f3Smrg dynamic_pool_size); 23069f464c52Smaya 23079f464c52Smaya /* Leave the last 4GB out of the high vma range, so that no state 23089f464c52Smaya * base address + size can overflow 48 bits. 23099f464c52Smaya */ 23109f464c52Smaya util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_OTHER], 23119f464c52Smaya IRIS_MEMZONE_OTHER_START, 23129f464c52Smaya (gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START); 23139f464c52Smaya 23147ec681f3Smrg init_cache_buckets(bufmgr, false); 23157ec681f3Smrg init_cache_buckets(bufmgr, true); 23167ec681f3Smrg 23177ec681f3Smrg unsigned min_slab_order = 8; /* 256 bytes */ 23187ec681f3Smrg unsigned max_slab_order = 20; /* 1 MB (slab size = 2 MB) */ 23197ec681f3Smrg unsigned num_slab_orders_per_allocator = 23207ec681f3Smrg (max_slab_order - min_slab_order) / NUM_SLAB_ALLOCATORS; 23217ec681f3Smrg 23227ec681f3Smrg /* Divide the size order range among slab managers. */ 23237ec681f3Smrg for (unsigned i = 0; i < NUM_SLAB_ALLOCATORS; i++) { 23247ec681f3Smrg unsigned min_order = min_slab_order; 23257ec681f3Smrg unsigned max_order = 23267ec681f3Smrg MIN2(min_order + num_slab_orders_per_allocator, max_slab_order); 23277ec681f3Smrg 23287ec681f3Smrg if (!pb_slabs_init(&bufmgr->bo_slabs[i], min_order, max_order, 23297ec681f3Smrg IRIS_HEAP_MAX, true, bufmgr, 23307ec681f3Smrg iris_can_reclaim_slab, 23317ec681f3Smrg iris_slab_alloc, 23327ec681f3Smrg (void *) iris_slab_free)) { 23337ec681f3Smrg free(bufmgr); 23347ec681f3Smrg return NULL; 23357ec681f3Smrg } 23367ec681f3Smrg min_slab_order = max_order + 1; 23377ec681f3Smrg } 23389f464c52Smaya 23399f464c52Smaya bufmgr->name_table = 23407ec681f3Smrg _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal); 23419f464c52Smaya bufmgr->handle_table = 23427ec681f3Smrg _mesa_hash_table_create(NULL, _mesa_hash_uint, _mesa_key_uint_equal); 23437ec681f3Smrg 23447ec681f3Smrg bufmgr->vma_min_align = devinfo->has_local_mem ? 64 * 1024 : PAGE_SIZE; 23457ec681f3Smrg 23467ec681f3Smrg if (devinfo->has_aux_map) { 23477ec681f3Smrg bufmgr->aux_map_ctx = intel_aux_map_init(bufmgr, &aux_map_allocator, 23487ec681f3Smrg devinfo); 23497ec681f3Smrg assert(bufmgr->aux_map_ctx); 23507ec681f3Smrg } 23517ec681f3Smrg 23527ec681f3Smrg return bufmgr; 23537ec681f3Smrg} 23547ec681f3Smrg 23557ec681f3Smrgstatic struct iris_bufmgr * 23567ec681f3Smrgiris_bufmgr_ref(struct iris_bufmgr *bufmgr) 23577ec681f3Smrg{ 23587ec681f3Smrg p_atomic_inc(&bufmgr->refcount); 23597ec681f3Smrg return bufmgr; 23607ec681f3Smrg} 23617ec681f3Smrg 23627ec681f3Smrgvoid 23637ec681f3Smrgiris_bufmgr_unref(struct iris_bufmgr *bufmgr) 23647ec681f3Smrg{ 23657ec681f3Smrg simple_mtx_lock(&global_bufmgr_list_mutex); 23667ec681f3Smrg if (p_atomic_dec_zero(&bufmgr->refcount)) { 23677ec681f3Smrg list_del(&bufmgr->link); 23687ec681f3Smrg iris_bufmgr_destroy(bufmgr); 23697ec681f3Smrg } 23707ec681f3Smrg simple_mtx_unlock(&global_bufmgr_list_mutex); 23717ec681f3Smrg} 23727ec681f3Smrg 23737ec681f3Smrg/** Returns a new unique id, to be used by screens. */ 23747ec681f3Smrgint 23757ec681f3Smrgiris_bufmgr_create_screen_id(struct iris_bufmgr *bufmgr) 23767ec681f3Smrg{ 23777ec681f3Smrg return p_atomic_inc_return(&bufmgr->next_screen_id) - 1; 23787ec681f3Smrg} 23797ec681f3Smrg 23807ec681f3Smrg/** 23817ec681f3Smrg * Gets an already existing GEM buffer manager or create a new one. 23827ec681f3Smrg * 23837ec681f3Smrg * \param fd File descriptor of the opened DRM device. 23847ec681f3Smrg */ 23857ec681f3Smrgstruct iris_bufmgr * 23867ec681f3Smrgiris_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd, bool bo_reuse) 23877ec681f3Smrg{ 23887ec681f3Smrg struct stat st; 23897ec681f3Smrg 23907ec681f3Smrg if (fstat(fd, &st)) 23917ec681f3Smrg return NULL; 23927ec681f3Smrg 23937ec681f3Smrg struct iris_bufmgr *bufmgr = NULL; 23947ec681f3Smrg 23957ec681f3Smrg simple_mtx_lock(&global_bufmgr_list_mutex); 23967ec681f3Smrg list_for_each_entry(struct iris_bufmgr, iter_bufmgr, &global_bufmgr_list, link) { 23977ec681f3Smrg struct stat iter_st; 23987ec681f3Smrg if (fstat(iter_bufmgr->fd, &iter_st)) 23997ec681f3Smrg continue; 24007ec681f3Smrg 24017ec681f3Smrg if (st.st_rdev == iter_st.st_rdev) { 24027ec681f3Smrg assert(iter_bufmgr->bo_reuse == bo_reuse); 24037ec681f3Smrg bufmgr = iris_bufmgr_ref(iter_bufmgr); 24047ec681f3Smrg goto unlock; 24057ec681f3Smrg } 24067ec681f3Smrg } 24077ec681f3Smrg 24087ec681f3Smrg bufmgr = iris_bufmgr_create(devinfo, fd, bo_reuse); 24097ec681f3Smrg if (bufmgr) 24107ec681f3Smrg list_addtail(&bufmgr->link, &global_bufmgr_list); 24117ec681f3Smrg 24127ec681f3Smrg unlock: 24137ec681f3Smrg simple_mtx_unlock(&global_bufmgr_list_mutex); 24149f464c52Smaya 24159f464c52Smaya return bufmgr; 24169f464c52Smaya} 24177ec681f3Smrg 24187ec681f3Smrgint 24197ec681f3Smrgiris_bufmgr_get_fd(struct iris_bufmgr *bufmgr) 24207ec681f3Smrg{ 24217ec681f3Smrg return bufmgr->fd; 24227ec681f3Smrg} 24237ec681f3Smrg 24247ec681f3Smrgvoid* 24257ec681f3Smrgiris_bufmgr_get_aux_map_context(struct iris_bufmgr *bufmgr) 24267ec681f3Smrg{ 24277ec681f3Smrg return bufmgr->aux_map_ctx; 24287ec681f3Smrg} 24297ec681f3Smrg 24307ec681f3Smrgsimple_mtx_t * 24317ec681f3Smrgiris_bufmgr_get_bo_deps_lock(struct iris_bufmgr *bufmgr) 24327ec681f3Smrg{ 24337ec681f3Smrg return &bufmgr->bo_deps_lock; 24347ec681f3Smrg} 2435