1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file iris_bufmgr.c 25 * 26 * The Iris buffer manager. 27 * 28 * XXX: write better comments 29 * - BOs 30 * - Explain BO cache 31 * - main interface to GEM in the kernel 32 */ 33 34#ifdef HAVE_CONFIG_H 35#include "config.h" 36#endif 37 38#include <xf86drm.h> 39#include <util/u_atomic.h> 40#include <fcntl.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> 44#include <unistd.h> 45#include <assert.h> 46#include <sys/ioctl.h> 47#include <sys/mman.h> 48#include <sys/stat.h> 49#include <sys/types.h> 50#include <stdbool.h> 51#include <time.h> 52 53#include "errno.h" 54#ifndef ETIME 55#define ETIME ETIMEDOUT 56#endif 57#include "common/gen_clflush.h" 58#include "dev/gen_debug.h" 59#include "common/gen_gem.h" 60#include "dev/gen_device_info.h" 61#include "main/macros.h" 62#include "util/debug.h" 63#include "util/macros.h" 64#include "util/hash_table.h" 65#include "util/list.h" 66#include "util/u_dynarray.h" 67#include "util/vma.h" 68#include "iris_bufmgr.h" 69#include "iris_context.h" 70#include "string.h" 71 72#include "drm-uapi/i915_drm.h" 73 74#ifdef HAVE_VALGRIND 75#include <valgrind.h> 76#include <memcheck.h> 77#define VG(x) x 78#else 79#define VG(x) 80#endif 81 82/* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier 83 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is 84 * leaked. All because it does not call VG(cli_free) from its 85 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like 86 * and allocation, we mark it available for use upon mmapping and remove 87 * it upon unmapping. 88 */ 89#define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size)) 90#define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size)) 91 92#define PAGE_SIZE 4096 93 94#define FILE_DEBUG_FLAG DEBUG_BUFMGR 95 96/** 97 * Call ioctl, restarting if it is interupted 98 */ 99int 100drm_ioctl(int fd, unsigned long request, void *arg) 101{ 102 int ret; 103 104 do { 105 ret = ioctl(fd, request, arg); 106 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 107 return ret; 108} 109 110static inline int 111atomic_add_unless(int *v, int add, int unless) 112{ 113 int c, old; 114 c = p_atomic_read(v); 115 while (c != unless && (old = p_atomic_cmpxchg(v, c, c + add)) != c) 116 c = old; 117 return c == unless; 118} 119 120static const char * 121memzone_name(enum iris_memory_zone memzone) 122{ 123 const char *names[] = { 124 [IRIS_MEMZONE_SHADER] = "shader", 125 [IRIS_MEMZONE_BINDER] = "binder", 126 [IRIS_MEMZONE_SURFACE] = "surface", 127 [IRIS_MEMZONE_DYNAMIC] = "dynamic", 128 [IRIS_MEMZONE_OTHER] = "other", 129 [IRIS_MEMZONE_BORDER_COLOR_POOL] = "bordercolor", 130 }; 131 assert(memzone < ARRAY_SIZE(names)); 132 return names[memzone]; 133} 134 135struct bo_cache_bucket { 136 /** List of cached BOs. */ 137 struct list_head head; 138 139 /** Size of this bucket, in bytes. */ 140 uint64_t size; 141}; 142 143struct iris_bufmgr { 144 int fd; 145 146 mtx_t lock; 147 148 /** Array of lists of cached gem objects of power-of-two sizes */ 149 struct bo_cache_bucket cache_bucket[14 * 4]; 150 int num_buckets; 151 time_t time; 152 153 struct hash_table *name_table; 154 struct hash_table *handle_table; 155 156 struct util_vma_heap vma_allocator[IRIS_MEMZONE_COUNT]; 157 158 bool has_llc:1; 159 bool bo_reuse:1; 160}; 161 162static int bo_set_tiling_internal(struct iris_bo *bo, uint32_t tiling_mode, 163 uint32_t stride); 164 165static void bo_free(struct iris_bo *bo); 166 167static uint64_t vma_alloc(struct iris_bufmgr *bufmgr, 168 enum iris_memory_zone memzone, 169 uint64_t size, uint64_t alignment); 170 171static uint32_t 172key_hash_uint(const void *key) 173{ 174 return _mesa_hash_data(key, 4); 175} 176 177static bool 178key_uint_equal(const void *a, const void *b) 179{ 180 return *((unsigned *) a) == *((unsigned *) b); 181} 182 183static struct iris_bo * 184hash_find_bo(struct hash_table *ht, unsigned int key) 185{ 186 struct hash_entry *entry = _mesa_hash_table_search(ht, &key); 187 return entry ? (struct iris_bo *) entry->data : NULL; 188} 189 190/** 191 * This function finds the correct bucket fit for the input size. 192 * The function works with O(1) complexity when the requested size 193 * was queried instead of iterating the size through all the buckets. 194 */ 195static struct bo_cache_bucket * 196bucket_for_size(struct iris_bufmgr *bufmgr, uint64_t size) 197{ 198 /* Calculating the pages and rounding up to the page size. */ 199 const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; 200 201 /* Row Bucket sizes clz((x-1) | 3) Row Column 202 * in pages stride size 203 * 0: 1 2 3 4 -> 30 30 30 30 4 1 204 * 1: 5 6 7 8 -> 29 29 29 29 4 1 205 * 2: 10 12 14 16 -> 28 28 28 28 8 2 206 * 3: 20 24 28 32 -> 27 27 27 27 16 4 207 */ 208 const unsigned row = 30 - __builtin_clz((pages - 1) | 3); 209 const unsigned row_max_pages = 4 << row; 210 211 /* The '& ~2' is the special case for row 1. In row 1, max pages / 212 * 2 is 2, but the previous row maximum is zero (because there is 213 * no previous row). All row maximum sizes are power of 2, so that 214 * is the only case where that bit will be set. 215 */ 216 const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2; 217 int col_size_log2 = row - 1; 218 col_size_log2 += (col_size_log2 < 0); 219 220 const unsigned col = (pages - prev_row_max_pages + 221 ((1 << col_size_log2) - 1)) >> col_size_log2; 222 223 /* Calculating the index based on the row and column. */ 224 const unsigned index = (row * 4) + (col - 1); 225 226 return (index < bufmgr->num_buckets) ? 227 &bufmgr->cache_bucket[index] : NULL; 228} 229 230enum iris_memory_zone 231iris_memzone_for_address(uint64_t address) 232{ 233 STATIC_ASSERT(IRIS_MEMZONE_OTHER_START > IRIS_MEMZONE_DYNAMIC_START); 234 STATIC_ASSERT(IRIS_MEMZONE_DYNAMIC_START > IRIS_MEMZONE_SURFACE_START); 235 STATIC_ASSERT(IRIS_MEMZONE_SURFACE_START > IRIS_MEMZONE_BINDER_START); 236 STATIC_ASSERT(IRIS_MEMZONE_BINDER_START > IRIS_MEMZONE_SHADER_START); 237 STATIC_ASSERT(IRIS_BORDER_COLOR_POOL_ADDRESS == IRIS_MEMZONE_DYNAMIC_START); 238 239 if (address >= IRIS_MEMZONE_OTHER_START) 240 return IRIS_MEMZONE_OTHER; 241 242 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS) 243 return IRIS_MEMZONE_BORDER_COLOR_POOL; 244 245 if (address > IRIS_MEMZONE_DYNAMIC_START) 246 return IRIS_MEMZONE_DYNAMIC; 247 248 if (address >= IRIS_MEMZONE_SURFACE_START) 249 return IRIS_MEMZONE_SURFACE; 250 251 if (address >= IRIS_MEMZONE_BINDER_START) 252 return IRIS_MEMZONE_BINDER; 253 254 return IRIS_MEMZONE_SHADER; 255} 256 257/** 258 * Allocate a section of virtual memory for a buffer, assigning an address. 259 * 260 * This uses either the bucket allocator for the given size, or the large 261 * object allocator (util_vma). 262 */ 263static uint64_t 264vma_alloc(struct iris_bufmgr *bufmgr, 265 enum iris_memory_zone memzone, 266 uint64_t size, 267 uint64_t alignment) 268{ 269 /* Force alignment to be some number of pages */ 270 alignment = ALIGN(alignment, PAGE_SIZE); 271 272 if (memzone == IRIS_MEMZONE_BORDER_COLOR_POOL) 273 return IRIS_BORDER_COLOR_POOL_ADDRESS; 274 275 /* The binder handles its own allocations. Return non-zero here. */ 276 if (memzone == IRIS_MEMZONE_BINDER) 277 return IRIS_MEMZONE_BINDER_START; 278 279 uint64_t addr = 280 util_vma_heap_alloc(&bufmgr->vma_allocator[memzone], size, alignment); 281 282 assert((addr >> 48ull) == 0); 283 assert((addr % alignment) == 0); 284 285 return gen_canonical_address(addr); 286} 287 288static void 289vma_free(struct iris_bufmgr *bufmgr, 290 uint64_t address, 291 uint64_t size) 292{ 293 if (address == IRIS_BORDER_COLOR_POOL_ADDRESS) 294 return; 295 296 /* Un-canonicalize the address. */ 297 address = gen_48b_address(address); 298 299 if (address == 0ull) 300 return; 301 302 enum iris_memory_zone memzone = iris_memzone_for_address(address); 303 304 /* The binder handles its own allocations. */ 305 if (memzone == IRIS_MEMZONE_BINDER) 306 return; 307 308 util_vma_heap_free(&bufmgr->vma_allocator[memzone], address, size); 309} 310 311int 312iris_bo_busy(struct iris_bo *bo) 313{ 314 struct iris_bufmgr *bufmgr = bo->bufmgr; 315 struct drm_i915_gem_busy busy = { .handle = bo->gem_handle }; 316 317 int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 318 if (ret == 0) { 319 bo->idle = !busy.busy; 320 return busy.busy; 321 } 322 return false; 323} 324 325int 326iris_bo_madvise(struct iris_bo *bo, int state) 327{ 328 struct drm_i915_gem_madvise madv = { 329 .handle = bo->gem_handle, 330 .madv = state, 331 .retained = 1, 332 }; 333 334 drm_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 335 336 return madv.retained; 337} 338 339/* drop the oldest entries that have been purged by the kernel */ 340static void 341iris_bo_cache_purge_bucket(struct iris_bufmgr *bufmgr, 342 struct bo_cache_bucket *bucket) 343{ 344 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { 345 if (iris_bo_madvise(bo, I915_MADV_DONTNEED)) 346 break; 347 348 list_del(&bo->head); 349 bo_free(bo); 350 } 351} 352 353static struct iris_bo * 354bo_calloc(void) 355{ 356 struct iris_bo *bo = calloc(1, sizeof(*bo)); 357 if (bo) { 358 bo->hash = _mesa_hash_pointer(bo); 359 } 360 return bo; 361} 362 363static struct iris_bo * 364bo_alloc_internal(struct iris_bufmgr *bufmgr, 365 const char *name, 366 uint64_t size, 367 enum iris_memory_zone memzone, 368 unsigned flags, 369 uint32_t tiling_mode, 370 uint32_t stride) 371{ 372 struct iris_bo *bo; 373 unsigned int page_size = getpagesize(); 374 int ret; 375 struct bo_cache_bucket *bucket; 376 bool alloc_from_cache; 377 uint64_t bo_size; 378 bool zeroed = false; 379 380 if (flags & BO_ALLOC_ZEROED) 381 zeroed = true; 382 383 if ((flags & BO_ALLOC_COHERENT) && !bufmgr->has_llc) { 384 bo_size = MAX2(ALIGN(size, page_size), page_size); 385 bucket = NULL; 386 goto skip_cache; 387 } 388 389 /* Round the allocated size up to a power of two number of pages. */ 390 bucket = bucket_for_size(bufmgr, size); 391 392 /* If we don't have caching at this size, don't actually round the 393 * allocation up. 394 */ 395 if (bucket == NULL) { 396 bo_size = MAX2(ALIGN(size, page_size), page_size); 397 } else { 398 bo_size = bucket->size; 399 } 400 401 mtx_lock(&bufmgr->lock); 402 /* Get a buffer out of the cache if available */ 403retry: 404 alloc_from_cache = false; 405 if (bucket != NULL && !list_empty(&bucket->head)) { 406 /* If the last BO in the cache is idle, then reuse it. Otherwise, 407 * allocate a fresh buffer to avoid stalling. 408 */ 409 bo = LIST_ENTRY(struct iris_bo, bucket->head.next, head); 410 if (!iris_bo_busy(bo)) { 411 alloc_from_cache = true; 412 list_del(&bo->head); 413 } 414 415 if (alloc_from_cache) { 416 if (!iris_bo_madvise(bo, I915_MADV_WILLNEED)) { 417 bo_free(bo); 418 iris_bo_cache_purge_bucket(bufmgr, bucket); 419 goto retry; 420 } 421 422 if (bo_set_tiling_internal(bo, tiling_mode, stride)) { 423 bo_free(bo); 424 goto retry; 425 } 426 427 if (zeroed) { 428 void *map = iris_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); 429 if (!map) { 430 bo_free(bo); 431 goto retry; 432 } 433 memset(map, 0, bo_size); 434 } 435 } 436 } 437 438 if (alloc_from_cache) { 439 /* If the cached BO isn't in the right memory zone, free the old 440 * memory and assign it a new address. 441 */ 442 if (memzone != iris_memzone_for_address(bo->gtt_offset)) { 443 vma_free(bufmgr, bo->gtt_offset, bo->size); 444 bo->gtt_offset = 0ull; 445 } 446 } else { 447skip_cache: 448 bo = bo_calloc(); 449 if (!bo) 450 goto err; 451 452 bo->size = bo_size; 453 bo->idle = true; 454 455 struct drm_i915_gem_create create = { .size = bo_size }; 456 457 /* All new BOs we get from the kernel are zeroed, so we don't need to 458 * worry about that here. 459 */ 460 ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create); 461 if (ret != 0) { 462 free(bo); 463 goto err; 464 } 465 466 bo->gem_handle = create.handle; 467 468 bo->bufmgr = bufmgr; 469 470 bo->tiling_mode = I915_TILING_NONE; 471 bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 472 bo->stride = 0; 473 474 if (bo_set_tiling_internal(bo, tiling_mode, stride)) 475 goto err_free; 476 477 /* Calling set_domain() will allocate pages for the BO outside of the 478 * struct mutex lock in the kernel, which is more efficient than waiting 479 * to create them during the first execbuf that uses the BO. 480 */ 481 struct drm_i915_gem_set_domain sd = { 482 .handle = bo->gem_handle, 483 .read_domains = I915_GEM_DOMAIN_CPU, 484 .write_domain = 0, 485 }; 486 487 if (drm_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) 488 goto err_free; 489 } 490 491 bo->name = name; 492 p_atomic_set(&bo->refcount, 1); 493 bo->reusable = bucket && bufmgr->bo_reuse; 494 bo->cache_coherent = bufmgr->has_llc; 495 bo->index = -1; 496 bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; 497 498 /* By default, capture all driver-internal buffers like shader kernels, 499 * surface states, dynamic states, border colors, and so on. 500 */ 501 if (memzone < IRIS_MEMZONE_OTHER) 502 bo->kflags |= EXEC_OBJECT_CAPTURE; 503 504 if (bo->gtt_offset == 0ull) { 505 bo->gtt_offset = vma_alloc(bufmgr, memzone, bo->size, 1); 506 507 if (bo->gtt_offset == 0ull) 508 goto err_free; 509 } 510 511 mtx_unlock(&bufmgr->lock); 512 513 if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) { 514 struct drm_i915_gem_caching arg = { 515 .handle = bo->gem_handle, 516 .caching = 1, 517 }; 518 if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) == 0) { 519 bo->cache_coherent = true; 520 bo->reusable = false; 521 } 522 } 523 524 DBG("bo_create: buf %d (%s) (%s memzone) %llub\n", bo->gem_handle, 525 bo->name, memzone_name(memzone), (unsigned long long) size); 526 527 return bo; 528 529err_free: 530 bo_free(bo); 531err: 532 mtx_unlock(&bufmgr->lock); 533 return NULL; 534} 535 536struct iris_bo * 537iris_bo_alloc(struct iris_bufmgr *bufmgr, 538 const char *name, 539 uint64_t size, 540 enum iris_memory_zone memzone) 541{ 542 return bo_alloc_internal(bufmgr, name, size, memzone, 543 0, I915_TILING_NONE, 0); 544} 545 546struct iris_bo * 547iris_bo_alloc_tiled(struct iris_bufmgr *bufmgr, const char *name, 548 uint64_t size, enum iris_memory_zone memzone, 549 uint32_t tiling_mode, uint32_t pitch, unsigned flags) 550{ 551 return bo_alloc_internal(bufmgr, name, size, memzone, 552 flags, tiling_mode, pitch); 553} 554 555struct iris_bo * 556iris_bo_create_userptr(struct iris_bufmgr *bufmgr, const char *name, 557 void *ptr, size_t size, 558 enum iris_memory_zone memzone) 559{ 560 struct iris_bo *bo; 561 562 bo = bo_calloc(); 563 if (!bo) 564 return NULL; 565 566 struct drm_i915_gem_userptr arg = { 567 .user_ptr = (uintptr_t)ptr, 568 .user_size = size, 569 }; 570 if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) 571 goto err_free; 572 bo->gem_handle = arg.handle; 573 574 /* Check the buffer for validity before we try and use it in a batch */ 575 struct drm_i915_gem_set_domain sd = { 576 .handle = bo->gem_handle, 577 .read_domains = I915_GEM_DOMAIN_CPU, 578 }; 579 if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd)) 580 goto err_close; 581 582 bo->name = name; 583 bo->size = size; 584 bo->map_cpu = ptr; 585 586 bo->bufmgr = bufmgr; 587 bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; 588 bo->gtt_offset = vma_alloc(bufmgr, memzone, size, 1); 589 if (bo->gtt_offset == 0ull) 590 goto err_close; 591 592 p_atomic_set(&bo->refcount, 1); 593 bo->userptr = true; 594 bo->cache_coherent = true; 595 bo->index = -1; 596 bo->idle = true; 597 598 return bo; 599 600err_close: 601 drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &bo->gem_handle); 602err_free: 603 free(bo); 604 return NULL; 605} 606 607/** 608 * Returns a iris_bo wrapping the given buffer object handle. 609 * 610 * This can be used when one application needs to pass a buffer object 611 * to another. 612 */ 613struct iris_bo * 614iris_bo_gem_create_from_name(struct iris_bufmgr *bufmgr, 615 const char *name, unsigned int handle) 616{ 617 struct iris_bo *bo; 618 619 /* At the moment most applications only have a few named bo. 620 * For instance, in a DRI client only the render buffers passed 621 * between X and the client are named. And since X returns the 622 * alternating names for the front/back buffer a linear search 623 * provides a sufficiently fast match. 624 */ 625 mtx_lock(&bufmgr->lock); 626 bo = hash_find_bo(bufmgr->name_table, handle); 627 if (bo) { 628 iris_bo_reference(bo); 629 goto out; 630 } 631 632 struct drm_gem_open open_arg = { .name = handle }; 633 int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg); 634 if (ret != 0) { 635 DBG("Couldn't reference %s handle 0x%08x: %s\n", 636 name, handle, strerror(errno)); 637 bo = NULL; 638 goto out; 639 } 640 /* Now see if someone has used a prime handle to get this 641 * object from the kernel before by looking through the list 642 * again for a matching gem_handle 643 */ 644 bo = hash_find_bo(bufmgr->handle_table, open_arg.handle); 645 if (bo) { 646 iris_bo_reference(bo); 647 goto out; 648 } 649 650 bo = bo_calloc(); 651 if (!bo) 652 goto out; 653 654 p_atomic_set(&bo->refcount, 1); 655 656 bo->size = open_arg.size; 657 bo->gtt_offset = 0; 658 bo->bufmgr = bufmgr; 659 bo->gem_handle = open_arg.handle; 660 bo->name = name; 661 bo->global_name = handle; 662 bo->reusable = false; 663 bo->external = true; 664 bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; 665 bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1); 666 667 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 668 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); 669 670 struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; 671 ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 672 if (ret != 0) 673 goto err_unref; 674 675 bo->tiling_mode = get_tiling.tiling_mode; 676 bo->swizzle_mode = get_tiling.swizzle_mode; 677 /* XXX stride is unknown */ 678 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name); 679 680out: 681 mtx_unlock(&bufmgr->lock); 682 return bo; 683 684err_unref: 685 bo_free(bo); 686 mtx_unlock(&bufmgr->lock); 687 return NULL; 688} 689 690static void 691bo_free(struct iris_bo *bo) 692{ 693 struct iris_bufmgr *bufmgr = bo->bufmgr; 694 695 if (bo->map_cpu && !bo->userptr) { 696 VG_NOACCESS(bo->map_cpu, bo->size); 697 munmap(bo->map_cpu, bo->size); 698 } 699 if (bo->map_wc) { 700 VG_NOACCESS(bo->map_wc, bo->size); 701 munmap(bo->map_wc, bo->size); 702 } 703 if (bo->map_gtt) { 704 VG_NOACCESS(bo->map_gtt, bo->size); 705 munmap(bo->map_gtt, bo->size); 706 } 707 708 if (bo->external) { 709 struct hash_entry *entry; 710 711 if (bo->global_name) { 712 entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name); 713 _mesa_hash_table_remove(bufmgr->name_table, entry); 714 } 715 716 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle); 717 _mesa_hash_table_remove(bufmgr->handle_table, entry); 718 } 719 720 /* Close this object */ 721 struct drm_gem_close close = { .handle = bo->gem_handle }; 722 int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); 723 if (ret != 0) { 724 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 725 bo->gem_handle, bo->name, strerror(errno)); 726 } 727 728 vma_free(bo->bufmgr, bo->gtt_offset, bo->size); 729 730 free(bo); 731} 732 733/** Frees all cached buffers significantly older than @time. */ 734static void 735cleanup_bo_cache(struct iris_bufmgr *bufmgr, time_t time) 736{ 737 int i; 738 739 if (bufmgr->time == time) 740 return; 741 742 for (i = 0; i < bufmgr->num_buckets; i++) { 743 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 744 745 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { 746 if (time - bo->free_time <= 1) 747 break; 748 749 list_del(&bo->head); 750 751 bo_free(bo); 752 } 753 } 754 755 bufmgr->time = time; 756} 757 758static void 759bo_unreference_final(struct iris_bo *bo, time_t time) 760{ 761 struct iris_bufmgr *bufmgr = bo->bufmgr; 762 struct bo_cache_bucket *bucket; 763 764 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name); 765 766 bucket = NULL; 767 if (bo->reusable) 768 bucket = bucket_for_size(bufmgr, bo->size); 769 /* Put the buffer into our internal cache for reuse if we can. */ 770 if (bucket && iris_bo_madvise(bo, I915_MADV_DONTNEED)) { 771 bo->free_time = time; 772 bo->name = NULL; 773 774 list_addtail(&bo->head, &bucket->head); 775 } else { 776 bo_free(bo); 777 } 778} 779 780void 781iris_bo_unreference(struct iris_bo *bo) 782{ 783 if (bo == NULL) 784 return; 785 786 assert(p_atomic_read(&bo->refcount) > 0); 787 788 if (atomic_add_unless(&bo->refcount, -1, 1)) { 789 struct iris_bufmgr *bufmgr = bo->bufmgr; 790 struct timespec time; 791 792 clock_gettime(CLOCK_MONOTONIC, &time); 793 794 mtx_lock(&bufmgr->lock); 795 796 if (p_atomic_dec_zero(&bo->refcount)) { 797 bo_unreference_final(bo, time.tv_sec); 798 cleanup_bo_cache(bufmgr, time.tv_sec); 799 } 800 801 mtx_unlock(&bufmgr->lock); 802 } 803} 804 805static void 806bo_wait_with_stall_warning(struct pipe_debug_callback *dbg, 807 struct iris_bo *bo, 808 const char *action) 809{ 810 bool busy = dbg && !bo->idle; 811 double elapsed = unlikely(busy) ? -get_time() : 0.0; 812 813 iris_bo_wait_rendering(bo); 814 815 if (unlikely(busy)) { 816 elapsed += get_time(); 817 if (elapsed > 1e-5) /* 0.01ms */ { 818 perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n", 819 action, bo->name, elapsed * 1000); 820 } 821 } 822} 823 824static void 825print_flags(unsigned flags) 826{ 827 if (flags & MAP_READ) 828 DBG("READ "); 829 if (flags & MAP_WRITE) 830 DBG("WRITE "); 831 if (flags & MAP_ASYNC) 832 DBG("ASYNC "); 833 if (flags & MAP_PERSISTENT) 834 DBG("PERSISTENT "); 835 if (flags & MAP_COHERENT) 836 DBG("COHERENT "); 837 if (flags & MAP_RAW) 838 DBG("RAW "); 839 DBG("\n"); 840} 841 842static void * 843iris_bo_map_cpu(struct pipe_debug_callback *dbg, 844 struct iris_bo *bo, unsigned flags) 845{ 846 struct iris_bufmgr *bufmgr = bo->bufmgr; 847 848 /* We disallow CPU maps for writing to non-coherent buffers, as the 849 * CPU map can become invalidated when a batch is flushed out, which 850 * can happen at unpredictable times. You should use WC maps instead. 851 */ 852 assert(bo->cache_coherent || !(flags & MAP_WRITE)); 853 854 if (!bo->map_cpu) { 855 DBG("iris_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name); 856 857 struct drm_i915_gem_mmap mmap_arg = { 858 .handle = bo->gem_handle, 859 .size = bo->size, 860 }; 861 int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); 862 if (ret != 0) { 863 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 864 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 865 return NULL; 866 } 867 void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; 868 VG_DEFINED(map, bo->size); 869 870 if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) { 871 VG_NOACCESS(map, bo->size); 872 munmap(map, bo->size); 873 } 874 } 875 assert(bo->map_cpu); 876 877 DBG("iris_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name, 878 bo->map_cpu); 879 print_flags(flags); 880 881 if (!(flags & MAP_ASYNC)) { 882 bo_wait_with_stall_warning(dbg, bo, "CPU mapping"); 883 } 884 885 if (!bo->cache_coherent && !bo->bufmgr->has_llc) { 886 /* If we're reusing an existing CPU mapping, the CPU caches may 887 * contain stale data from the last time we read from that mapping. 888 * (With the BO cache, it might even be data from a previous buffer!) 889 * Even if it's a brand new mapping, the kernel may have zeroed the 890 * buffer via CPU writes. 891 * 892 * We need to invalidate those cachelines so that we see the latest 893 * contents, and so long as we only read from the CPU mmap we do not 894 * need to write those cachelines back afterwards. 895 * 896 * On LLC, the emprical evidence suggests that writes from the GPU 897 * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU 898 * cachelines. (Other reads, such as the display engine, bypass the 899 * LLC entirely requiring us to keep dirty pixels for the scanout 900 * out of any cache.) 901 */ 902 gen_invalidate_range(bo->map_cpu, bo->size); 903 } 904 905 return bo->map_cpu; 906} 907 908static void * 909iris_bo_map_wc(struct pipe_debug_callback *dbg, 910 struct iris_bo *bo, unsigned flags) 911{ 912 struct iris_bufmgr *bufmgr = bo->bufmgr; 913 914 if (!bo->map_wc) { 915 DBG("iris_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name); 916 917 struct drm_i915_gem_mmap mmap_arg = { 918 .handle = bo->gem_handle, 919 .size = bo->size, 920 .flags = I915_MMAP_WC, 921 }; 922 int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); 923 if (ret != 0) { 924 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 925 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 926 return NULL; 927 } 928 929 void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; 930 VG_DEFINED(map, bo->size); 931 932 if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) { 933 VG_NOACCESS(map, bo->size); 934 munmap(map, bo->size); 935 } 936 } 937 assert(bo->map_wc); 938 939 DBG("iris_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc); 940 print_flags(flags); 941 942 if (!(flags & MAP_ASYNC)) { 943 bo_wait_with_stall_warning(dbg, bo, "WC mapping"); 944 } 945 946 return bo->map_wc; 947} 948 949/** 950 * Perform an uncached mapping via the GTT. 951 * 952 * Write access through the GTT is not quite fully coherent. On low power 953 * systems especially, like modern Atoms, we can observe reads from RAM before 954 * the write via GTT has landed. A write memory barrier that flushes the Write 955 * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later 956 * read after the write as the GTT write suffers a small delay through the GTT 957 * indirection. The kernel uses an uncached mmio read to ensure the GTT write 958 * is ordered with reads (either by the GPU, WB or WC) and unconditionally 959 * flushes prior to execbuf submission. However, if we are not informing the 960 * kernel about our GTT writes, it will not flush before earlier access, such 961 * as when using the cmdparser. Similarly, we need to be careful if we should 962 * ever issue a CPU read immediately following a GTT write. 963 * 964 * Telling the kernel about write access also has one more important 965 * side-effect. Upon receiving notification about the write, it cancels any 966 * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by 967 * either SW_FINISH or DIRTYFB. The presumption is that we never write to the 968 * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR 969 * tracking is handled on the buffer exchange instead. 970 */ 971static void * 972iris_bo_map_gtt(struct pipe_debug_callback *dbg, 973 struct iris_bo *bo, unsigned flags) 974{ 975 struct iris_bufmgr *bufmgr = bo->bufmgr; 976 977 /* Get a mapping of the buffer if we haven't before. */ 978 if (bo->map_gtt == NULL) { 979 DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name); 980 981 struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle }; 982 983 /* Get the fake offset back... */ 984 int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); 985 if (ret != 0) { 986 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 987 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 988 return NULL; 989 } 990 991 /* and mmap it. */ 992 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, 993 MAP_SHARED, bufmgr->fd, mmap_arg.offset); 994 if (map == MAP_FAILED) { 995 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 996 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 997 return NULL; 998 } 999 1000 /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will 1001 * already intercept this mmap call. However, for consistency between 1002 * all the mmap paths, we mark the pointer as defined now and mark it 1003 * as inaccessible afterwards. 1004 */ 1005 VG_DEFINED(map, bo->size); 1006 1007 if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) { 1008 VG_NOACCESS(map, bo->size); 1009 munmap(map, bo->size); 1010 } 1011 } 1012 assert(bo->map_gtt); 1013 1014 DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt); 1015 print_flags(flags); 1016 1017 if (!(flags & MAP_ASYNC)) { 1018 bo_wait_with_stall_warning(dbg, bo, "GTT mapping"); 1019 } 1020 1021 return bo->map_gtt; 1022} 1023 1024static bool 1025can_map_cpu(struct iris_bo *bo, unsigned flags) 1026{ 1027 if (bo->cache_coherent) 1028 return true; 1029 1030 /* Even if the buffer itself is not cache-coherent (such as a scanout), on 1031 * an LLC platform reads always are coherent (as they are performed via the 1032 * central system agent). It is just the writes that we need to take special 1033 * care to ensure that land in main memory and not stick in the CPU cache. 1034 */ 1035 if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc) 1036 return true; 1037 1038 /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid 1039 * across batch flushes where the kernel will change cache domains of the 1040 * bo, invalidating continued access to the CPU mmap on non-LLC device. 1041 * 1042 * Similarly, ASYNC typically means that the buffer will be accessed via 1043 * both the CPU and the GPU simultaneously. Batches may be executed that 1044 * use the BO even while it is mapped. While OpenGL technically disallows 1045 * most drawing while non-persistent mappings are active, we may still use 1046 * the GPU for blits or other operations, causing batches to happen at 1047 * inconvenient times. 1048 * 1049 * If RAW is set, we expect the caller to be able to handle a WC buffer 1050 * more efficiently than the involuntary clflushes. 1051 */ 1052 if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC | MAP_RAW)) 1053 return false; 1054 1055 return !(flags & MAP_WRITE); 1056} 1057 1058void * 1059iris_bo_map(struct pipe_debug_callback *dbg, 1060 struct iris_bo *bo, unsigned flags) 1061{ 1062 if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW)) 1063 return iris_bo_map_gtt(dbg, bo, flags); 1064 1065 void *map; 1066 1067 if (can_map_cpu(bo, flags)) 1068 map = iris_bo_map_cpu(dbg, bo, flags); 1069 else 1070 map = iris_bo_map_wc(dbg, bo, flags); 1071 1072 /* Allow the attempt to fail by falling back to the GTT where necessary. 1073 * 1074 * Not every buffer can be mmaped directly using the CPU (or WC), for 1075 * example buffers that wrap stolen memory or are imported from other 1076 * devices. For those, we have little choice but to use a GTT mmapping. 1077 * However, if we use a slow GTT mmapping for reads where we expected fast 1078 * access, that order of magnitude difference in throughput will be clearly 1079 * expressed by angry users. 1080 * 1081 * We skip MAP_RAW because we want to avoid map_gtt's fence detiling. 1082 */ 1083 if (!map && !(flags & MAP_RAW)) { 1084 perf_debug(dbg, "Fallback GTT mapping for %s with access flags %x\n", 1085 bo->name, flags); 1086 map = iris_bo_map_gtt(dbg, bo, flags); 1087 } 1088 1089 return map; 1090} 1091 1092/** Waits for all GPU rendering with the object to have completed. */ 1093void 1094iris_bo_wait_rendering(struct iris_bo *bo) 1095{ 1096 /* We require a kernel recent enough for WAIT_IOCTL support. 1097 * See intel_init_bufmgr() 1098 */ 1099 iris_bo_wait(bo, -1); 1100} 1101 1102/** 1103 * Waits on a BO for the given amount of time. 1104 * 1105 * @bo: buffer object to wait for 1106 * @timeout_ns: amount of time to wait in nanoseconds. 1107 * If value is less than 0, an infinite wait will occur. 1108 * 1109 * Returns 0 if the wait was successful ie. the last batch referencing the 1110 * object has completed within the allotted time. Otherwise some negative return 1111 * value describes the error. Of particular interest is -ETIME when the wait has 1112 * failed to yield the desired result. 1113 * 1114 * Similar to iris_bo_wait_rendering except a timeout parameter allows 1115 * the operation to give up after a certain amount of time. Another subtle 1116 * difference is the internal locking semantics are different (this variant does 1117 * not hold the lock for the duration of the wait). This makes the wait subject 1118 * to a larger userspace race window. 1119 * 1120 * The implementation shall wait until the object is no longer actively 1121 * referenced within a batch buffer at the time of the call. The wait will 1122 * not guarantee that the buffer is re-issued via another thread, or an flinked 1123 * handle. Userspace must make sure this race does not occur if such precision 1124 * is important. 1125 * 1126 * Note that some kernels have broken the inifite wait for negative values 1127 * promise, upgrade to latest stable kernels if this is the case. 1128 */ 1129int 1130iris_bo_wait(struct iris_bo *bo, int64_t timeout_ns) 1131{ 1132 struct iris_bufmgr *bufmgr = bo->bufmgr; 1133 1134 /* If we know it's idle, don't bother with the kernel round trip */ 1135 if (bo->idle && !bo->external) 1136 return 0; 1137 1138 struct drm_i915_gem_wait wait = { 1139 .bo_handle = bo->gem_handle, 1140 .timeout_ns = timeout_ns, 1141 }; 1142 int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1143 if (ret != 0) 1144 return -errno; 1145 1146 bo->idle = true; 1147 1148 return ret; 1149} 1150 1151void 1152iris_bufmgr_destroy(struct iris_bufmgr *bufmgr) 1153{ 1154 mtx_destroy(&bufmgr->lock); 1155 1156 /* Free any cached buffer objects we were going to reuse */ 1157 for (int i = 0; i < bufmgr->num_buckets; i++) { 1158 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 1159 1160 list_for_each_entry_safe(struct iris_bo, bo, &bucket->head, head) { 1161 list_del(&bo->head); 1162 1163 bo_free(bo); 1164 } 1165 } 1166 1167 _mesa_hash_table_destroy(bufmgr->name_table, NULL); 1168 _mesa_hash_table_destroy(bufmgr->handle_table, NULL); 1169 1170 for (int z = 0; z < IRIS_MEMZONE_COUNT; z++) { 1171 if (z != IRIS_MEMZONE_BINDER) 1172 util_vma_heap_finish(&bufmgr->vma_allocator[z]); 1173 } 1174 1175 free(bufmgr); 1176} 1177 1178static int 1179bo_set_tiling_internal(struct iris_bo *bo, uint32_t tiling_mode, 1180 uint32_t stride) 1181{ 1182 struct iris_bufmgr *bufmgr = bo->bufmgr; 1183 struct drm_i915_gem_set_tiling set_tiling; 1184 int ret; 1185 1186 if (bo->global_name == 0 && 1187 tiling_mode == bo->tiling_mode && stride == bo->stride) 1188 return 0; 1189 1190 memset(&set_tiling, 0, sizeof(set_tiling)); 1191 do { 1192 /* set_tiling is slightly broken and overwrites the 1193 * input on the error path, so we have to open code 1194 * drm_ioctl. 1195 */ 1196 set_tiling.handle = bo->gem_handle; 1197 set_tiling.tiling_mode = tiling_mode; 1198 set_tiling.stride = stride; 1199 1200 ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); 1201 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 1202 if (ret == -1) 1203 return -errno; 1204 1205 bo->tiling_mode = set_tiling.tiling_mode; 1206 bo->swizzle_mode = set_tiling.swizzle_mode; 1207 bo->stride = set_tiling.stride; 1208 return 0; 1209} 1210 1211int 1212iris_bo_get_tiling(struct iris_bo *bo, uint32_t *tiling_mode, 1213 uint32_t *swizzle_mode) 1214{ 1215 *tiling_mode = bo->tiling_mode; 1216 *swizzle_mode = bo->swizzle_mode; 1217 return 0; 1218} 1219 1220struct iris_bo * 1221iris_bo_import_dmabuf(struct iris_bufmgr *bufmgr, int prime_fd) 1222{ 1223 uint32_t handle; 1224 struct iris_bo *bo; 1225 1226 mtx_lock(&bufmgr->lock); 1227 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle); 1228 if (ret) { 1229 DBG("import_dmabuf: failed to obtain handle from fd: %s\n", 1230 strerror(errno)); 1231 mtx_unlock(&bufmgr->lock); 1232 return NULL; 1233 } 1234 1235 /* 1236 * See if the kernel has already returned this buffer to us. Just as 1237 * for named buffers, we must not create two bo's pointing at the same 1238 * kernel object 1239 */ 1240 bo = hash_find_bo(bufmgr->handle_table, handle); 1241 if (bo) { 1242 iris_bo_reference(bo); 1243 goto out; 1244 } 1245 1246 bo = bo_calloc(); 1247 if (!bo) 1248 goto out; 1249 1250 p_atomic_set(&bo->refcount, 1); 1251 1252 /* Determine size of bo. The fd-to-handle ioctl really should 1253 * return the size, but it doesn't. If we have kernel 3.12 or 1254 * later, we can lseek on the prime fd to get the size. Older 1255 * kernels will just fail, in which case we fall back to the 1256 * provided (estimated or guess size). */ 1257 ret = lseek(prime_fd, 0, SEEK_END); 1258 if (ret != -1) 1259 bo->size = ret; 1260 1261 bo->bufmgr = bufmgr; 1262 1263 bo->gem_handle = handle; 1264 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 1265 1266 bo->name = "prime"; 1267 bo->reusable = false; 1268 bo->external = true; 1269 bo->kflags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS | EXEC_OBJECT_PINNED; 1270 bo->gtt_offset = vma_alloc(bufmgr, IRIS_MEMZONE_OTHER, bo->size, 1); 1271 1272 struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; 1273 if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) 1274 goto err; 1275 1276 bo->tiling_mode = get_tiling.tiling_mode; 1277 bo->swizzle_mode = get_tiling.swizzle_mode; 1278 /* XXX stride is unknown */ 1279 1280out: 1281 mtx_unlock(&bufmgr->lock); 1282 return bo; 1283 1284err: 1285 bo_free(bo); 1286 mtx_unlock(&bufmgr->lock); 1287 return NULL; 1288} 1289 1290static void 1291iris_bo_make_external_locked(struct iris_bo *bo) 1292{ 1293 if (!bo->external) { 1294 _mesa_hash_table_insert(bo->bufmgr->handle_table, &bo->gem_handle, bo); 1295 bo->external = true; 1296 } 1297} 1298 1299static void 1300iris_bo_make_external(struct iris_bo *bo) 1301{ 1302 struct iris_bufmgr *bufmgr = bo->bufmgr; 1303 1304 if (bo->external) 1305 return; 1306 1307 mtx_lock(&bufmgr->lock); 1308 iris_bo_make_external_locked(bo); 1309 mtx_unlock(&bufmgr->lock); 1310} 1311 1312int 1313iris_bo_export_dmabuf(struct iris_bo *bo, int *prime_fd) 1314{ 1315 struct iris_bufmgr *bufmgr = bo->bufmgr; 1316 1317 iris_bo_make_external(bo); 1318 1319 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, 1320 DRM_CLOEXEC, prime_fd) != 0) 1321 return -errno; 1322 1323 bo->reusable = false; 1324 1325 return 0; 1326} 1327 1328uint32_t 1329iris_bo_export_gem_handle(struct iris_bo *bo) 1330{ 1331 iris_bo_make_external(bo); 1332 1333 return bo->gem_handle; 1334} 1335 1336int 1337iris_bo_flink(struct iris_bo *bo, uint32_t *name) 1338{ 1339 struct iris_bufmgr *bufmgr = bo->bufmgr; 1340 1341 if (!bo->global_name) { 1342 struct drm_gem_flink flink = { .handle = bo->gem_handle }; 1343 1344 if (drm_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) 1345 return -errno; 1346 1347 mtx_lock(&bufmgr->lock); 1348 if (!bo->global_name) { 1349 iris_bo_make_external_locked(bo); 1350 bo->global_name = flink.name; 1351 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); 1352 } 1353 mtx_unlock(&bufmgr->lock); 1354 1355 bo->reusable = false; 1356 } 1357 1358 *name = bo->global_name; 1359 return 0; 1360} 1361 1362static void 1363add_bucket(struct iris_bufmgr *bufmgr, int size) 1364{ 1365 unsigned int i = bufmgr->num_buckets; 1366 1367 assert(i < ARRAY_SIZE(bufmgr->cache_bucket)); 1368 1369 list_inithead(&bufmgr->cache_bucket[i].head); 1370 bufmgr->cache_bucket[i].size = size; 1371 bufmgr->num_buckets++; 1372 1373 assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); 1374 assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); 1375 assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); 1376} 1377 1378static void 1379init_cache_buckets(struct iris_bufmgr *bufmgr) 1380{ 1381 uint64_t size, cache_max_size = 64 * 1024 * 1024; 1382 1383 /* OK, so power of two buckets was too wasteful of memory. 1384 * Give 3 other sizes between each power of two, to hopefully 1385 * cover things accurately enough. (The alternative is 1386 * probably to just go for exact matching of sizes, and assume 1387 * that for things like composited window resize the tiled 1388 * width/height alignment and rounding of sizes to pages will 1389 * get us useful cache hit rates anyway) 1390 */ 1391 add_bucket(bufmgr, PAGE_SIZE); 1392 add_bucket(bufmgr, PAGE_SIZE * 2); 1393 add_bucket(bufmgr, PAGE_SIZE * 3); 1394 1395 /* Initialize the linked lists for BO reuse cache. */ 1396 for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) { 1397 add_bucket(bufmgr, size); 1398 1399 add_bucket(bufmgr, size + size * 1 / 4); 1400 add_bucket(bufmgr, size + size * 2 / 4); 1401 add_bucket(bufmgr, size + size * 3 / 4); 1402 } 1403} 1404 1405uint32_t 1406iris_create_hw_context(struct iris_bufmgr *bufmgr) 1407{ 1408 struct drm_i915_gem_context_create create = { }; 1409 int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 1410 if (ret != 0) { 1411 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno)); 1412 return 0; 1413 } 1414 1415 return create.ctx_id; 1416} 1417 1418int 1419iris_hw_context_set_priority(struct iris_bufmgr *bufmgr, 1420 uint32_t ctx_id, 1421 int priority) 1422{ 1423 struct drm_i915_gem_context_param p = { 1424 .ctx_id = ctx_id, 1425 .param = I915_CONTEXT_PARAM_PRIORITY, 1426 .value = priority, 1427 }; 1428 int err; 1429 1430 err = 0; 1431 if (drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p)) 1432 err = -errno; 1433 1434 return err; 1435} 1436 1437void 1438iris_destroy_hw_context(struct iris_bufmgr *bufmgr, uint32_t ctx_id) 1439{ 1440 struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id }; 1441 1442 if (ctx_id != 0 && 1443 drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) { 1444 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 1445 strerror(errno)); 1446 } 1447} 1448 1449int 1450iris_reg_read(struct iris_bufmgr *bufmgr, uint32_t offset, uint64_t *result) 1451{ 1452 struct drm_i915_reg_read reg_read = { .offset = offset }; 1453 int ret = drm_ioctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, ®_read); 1454 1455 *result = reg_read.val; 1456 return ret; 1457} 1458 1459static uint64_t 1460iris_gtt_size(int fd) 1461{ 1462 /* We use the default (already allocated) context to determine 1463 * the default configuration of the virtual address space. 1464 */ 1465 struct drm_i915_gem_context_param p = { 1466 .param = I915_CONTEXT_PARAM_GTT_SIZE, 1467 }; 1468 if (!drm_ioctl(fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p)) 1469 return p.value; 1470 1471 return 0; 1472} 1473 1474/** 1475 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 1476 * and manage map buffer objections. 1477 * 1478 * \param fd File descriptor of the opened DRM device. 1479 */ 1480struct iris_bufmgr * 1481iris_bufmgr_init(struct gen_device_info *devinfo, int fd) 1482{ 1483 uint64_t gtt_size = iris_gtt_size(fd); 1484 if (gtt_size <= IRIS_MEMZONE_OTHER_START) 1485 return NULL; 1486 1487 struct iris_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr)); 1488 if (bufmgr == NULL) 1489 return NULL; 1490 1491 /* Handles to buffer objects belong to the device fd and are not 1492 * reference counted by the kernel. If the same fd is used by 1493 * multiple parties (threads sharing the same screen bufmgr, or 1494 * even worse the same device fd passed to multiple libraries) 1495 * ownership of those handles is shared by those independent parties. 1496 * 1497 * Don't do this! Ensure that each library/bufmgr has its own device 1498 * fd so that its namespace does not clash with another. 1499 */ 1500 bufmgr->fd = fd; 1501 1502 if (mtx_init(&bufmgr->lock, mtx_plain) != 0) { 1503 free(bufmgr); 1504 return NULL; 1505 } 1506 1507 bufmgr->has_llc = devinfo->has_llc; 1508 1509 STATIC_ASSERT(IRIS_MEMZONE_SHADER_START == 0ull); 1510 const uint64_t _4GB = 1ull << 32; 1511 1512 /* The STATE_BASE_ADDRESS size field can only hold 1 page shy of 4GB */ 1513 const uint64_t _4GB_minus_1 = _4GB - PAGE_SIZE; 1514 1515 util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SHADER], 1516 PAGE_SIZE, _4GB_minus_1 - PAGE_SIZE); 1517 util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_SURFACE], 1518 IRIS_MEMZONE_SURFACE_START, 1519 _4GB_minus_1 - IRIS_MAX_BINDERS * IRIS_BINDER_SIZE); 1520 util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_DYNAMIC], 1521 IRIS_MEMZONE_DYNAMIC_START + IRIS_BORDER_COLOR_POOL_SIZE, 1522 _4GB_minus_1 - IRIS_BORDER_COLOR_POOL_SIZE); 1523 1524 /* Leave the last 4GB out of the high vma range, so that no state 1525 * base address + size can overflow 48 bits. 1526 */ 1527 util_vma_heap_init(&bufmgr->vma_allocator[IRIS_MEMZONE_OTHER], 1528 IRIS_MEMZONE_OTHER_START, 1529 (gtt_size - _4GB) - IRIS_MEMZONE_OTHER_START); 1530 1531 // XXX: driconf 1532 bufmgr->bo_reuse = env_var_as_boolean("bo_reuse", true); 1533 1534 init_cache_buckets(bufmgr); 1535 1536 bufmgr->name_table = 1537 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal); 1538 bufmgr->handle_table = 1539 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal); 1540 1541 return bufmgr; 1542} 1543