crocus_bufmgr.c revision 7ec681f3
1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file crocus_bufmgr.c 25 * 26 * The crocus buffer manager. 27 * 28 * XXX: write better comments 29 * - BOs 30 * - Explain BO cache 31 * - main interface to GEM in the kernel 32 */ 33 34#ifdef HAVE_CONFIG_H 35#include "config.h" 36#endif 37 38#include <xf86drm.h> 39#include <util/u_atomic.h> 40#include <fcntl.h> 41#include <stdio.h> 42#include <stdlib.h> 43#include <string.h> 44#include <unistd.h> 45#include <assert.h> 46#include <sys/ioctl.h> 47#include <sys/mman.h> 48#include <sys/stat.h> 49#include <sys/types.h> 50#include <stdbool.h> 51#include <time.h> 52 53#include "errno.h" 54#include "common/intel_clflush.h" 55#include "dev/intel_debug.h" 56#include "common/intel_gem.h" 57#include "dev/intel_device_info.h" 58#include "main/macros.h" 59#include "util/debug.h" 60#include "util/macros.h" 61#include "util/hash_table.h" 62#include "util/list.h" 63#include "util/os_file.h" 64#include "util/u_dynarray.h" 65#include "util/vma.h" 66#include "crocus_bufmgr.h" 67#include "crocus_context.h" 68#include "string.h" 69 70#include "drm-uapi/i915_drm.h" 71 72#ifdef HAVE_VALGRIND 73#include <valgrind.h> 74#include <memcheck.h> 75#define VG(x) x 76#else 77#define VG(x) 78#endif 79 80/** 81 * For debugging purposes, this returns a time in seconds. 82 */ 83static double 84get_time(void) 85{ 86 struct timespec tp; 87 88 clock_gettime(CLOCK_MONOTONIC, &tp); 89 90 return tp.tv_sec + tp.tv_nsec / 1000000000.0; 91} 92 93/* VALGRIND_FREELIKE_BLOCK unfortunately does not actually undo the earlier 94 * VALGRIND_MALLOCLIKE_BLOCK but instead leaves vg convinced the memory is 95 * leaked. All because it does not call VG(cli_free) from its 96 * VG_USERREQ__FREELIKE_BLOCK handler. Instead of treating the memory like 97 * and allocation, we mark it available for use upon mmapping and remove 98 * it upon unmapping. 99 */ 100#define VG_DEFINED(ptr, size) VG(VALGRIND_MAKE_MEM_DEFINED(ptr, size)) 101#define VG_NOACCESS(ptr, size) VG(VALGRIND_MAKE_MEM_NOACCESS(ptr, size)) 102 103#define PAGE_SIZE 4096 104 105#define WARN_ONCE(cond, fmt...) do { \ 106 if (unlikely(cond)) { \ 107 static bool _warned = false; \ 108 if (!_warned) { \ 109 fprintf(stderr, "WARNING: "); \ 110 fprintf(stderr, fmt); \ 111 _warned = true; \ 112 } \ 113 } \ 114} while (0) 115 116#define FILE_DEBUG_FLAG DEBUG_BUFMGR 117 118struct bo_cache_bucket { 119 /** List of cached BOs. */ 120 struct list_head head; 121 122 /** Size of this bucket, in bytes. */ 123 uint64_t size; 124}; 125 126struct bo_export { 127 /** File descriptor associated with a handle export. */ 128 int drm_fd; 129 130 /** GEM handle in drm_fd */ 131 uint32_t gem_handle; 132 133 struct list_head link; 134}; 135 136struct crocus_bufmgr { 137 /** 138 * List into the list of bufmgr. 139 */ 140 struct list_head link; 141 142 uint32_t refcount; 143 144 int fd; 145 146 simple_mtx_t lock; 147 148 /** Array of lists of cached gem objects of power-of-two sizes */ 149 struct bo_cache_bucket cache_bucket[14 * 4]; 150 int num_buckets; 151 time_t time; 152 153 struct hash_table *name_table; 154 struct hash_table *handle_table; 155 156 /** 157 * List of BOs which we've effectively freed, but are hanging on to 158 * until they're idle before closing and returning the VMA. 159 */ 160 struct list_head zombie_list; 161 162 bool has_llc:1; 163 bool has_mmap_offset:1; 164 bool has_tiling_uapi:1; 165 bool bo_reuse:1; 166}; 167 168static simple_mtx_t global_bufmgr_list_mutex = _SIMPLE_MTX_INITIALIZER_NP; 169static struct list_head global_bufmgr_list = { 170 .next = &global_bufmgr_list, 171 .prev = &global_bufmgr_list, 172}; 173 174static int bo_set_tiling_internal(struct crocus_bo *bo, uint32_t tiling_mode, 175 uint32_t stride); 176 177static void bo_free(struct crocus_bo *bo); 178 179static uint32_t 180key_hash_uint(const void *key) 181{ 182 return _mesa_hash_data(key, 4); 183} 184 185static bool 186key_uint_equal(const void *a, const void *b) 187{ 188 return *((unsigned *) a) == *((unsigned *) b); 189} 190 191static struct crocus_bo * 192find_and_ref_external_bo(struct hash_table *ht, unsigned int key) 193{ 194 struct hash_entry *entry = _mesa_hash_table_search(ht, &key); 195 struct crocus_bo *bo = entry ? entry->data : NULL; 196 197 if (bo) { 198 assert(bo->external); 199 assert(!bo->reusable); 200 201 /* Being non-reusable, the BO cannot be in the cache lists, but it 202 * may be in the zombie list if it had reached zero references, but 203 * we hadn't yet closed it...and then reimported the same BO. If it 204 * is, then remove it since it's now been resurrected. 205 */ 206 if (bo->head.prev || bo->head.next) 207 list_del(&bo->head); 208 209 crocus_bo_reference(bo); 210 } 211 212 return bo; 213} 214 215/** 216 * This function finds the correct bucket fit for the input size. 217 * The function works with O(1) complexity when the requested size 218 * was queried instead of iterating the size through all the buckets. 219 */ 220static struct bo_cache_bucket * 221bucket_for_size(struct crocus_bufmgr *bufmgr, uint64_t size) 222{ 223 /* Calculating the pages and rounding up to the page size. */ 224 const unsigned pages = (size + PAGE_SIZE - 1) / PAGE_SIZE; 225 226 /* Row Bucket sizes clz((x-1) | 3) Row Column 227 * in pages stride size 228 * 0: 1 2 3 4 -> 30 30 30 30 4 1 229 * 1: 5 6 7 8 -> 29 29 29 29 4 1 230 * 2: 10 12 14 16 -> 28 28 28 28 8 2 231 * 3: 20 24 28 32 -> 27 27 27 27 16 4 232 */ 233 const unsigned row = 30 - __builtin_clz((pages - 1) | 3); 234 const unsigned row_max_pages = 4 << row; 235 236 /* The '& ~2' is the special case for row 1. In row 1, max pages / 237 * 2 is 2, but the previous row maximum is zero (because there is 238 * no previous row). All row maximum sizes are power of 2, so that 239 * is the only case where that bit will be set. 240 */ 241 const unsigned prev_row_max_pages = (row_max_pages / 2) & ~2; 242 int col_size_log2 = row - 1; 243 col_size_log2 += (col_size_log2 < 0); 244 245 const unsigned col = (pages - prev_row_max_pages + 246 ((1 << col_size_log2) - 1)) >> col_size_log2; 247 248 /* Calculating the index based on the row and column. */ 249 const unsigned index = (row * 4) + (col - 1); 250 251 return (index < bufmgr->num_buckets) ? 252 &bufmgr->cache_bucket[index] : NULL; 253} 254 255 256int 257crocus_bo_busy(struct crocus_bo *bo) 258{ 259 struct crocus_bufmgr *bufmgr = bo->bufmgr; 260 struct drm_i915_gem_busy busy = { .handle = bo->gem_handle }; 261 262 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 263 if (ret == 0) { 264 bo->idle = !busy.busy; 265 return busy.busy; 266 } 267 return false; 268} 269 270int 271crocus_bo_madvise(struct crocus_bo *bo, int state) 272{ 273 struct drm_i915_gem_madvise madv = { 274 .handle = bo->gem_handle, 275 .madv = state, 276 .retained = 1, 277 }; 278 279 intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 280 281 return madv.retained; 282} 283 284static struct crocus_bo * 285bo_calloc(void) 286{ 287 struct crocus_bo *bo = calloc(1, sizeof(*bo)); 288 if (!bo) 289 return NULL; 290 291 list_inithead(&bo->exports); 292 bo->hash = _mesa_hash_pointer(bo); 293 return bo; 294} 295 296static struct crocus_bo * 297alloc_bo_from_cache(struct crocus_bufmgr *bufmgr, 298 struct bo_cache_bucket *bucket, 299 uint32_t alignment, 300 unsigned flags) 301{ 302 if (!bucket) 303 return NULL; 304 305 struct crocus_bo *bo = NULL; 306 307 list_for_each_entry_safe(struct crocus_bo, cur, &bucket->head, head) { 308 /* If the last BO in the cache is busy, there are no idle BOs. Bail, 309 * either falling back to a non-matching memzone, or if that fails, 310 * allocating a fresh buffer. 311 */ 312 if (crocus_bo_busy(cur)) 313 return NULL; 314 315 list_del(&cur->head); 316 317 /* Tell the kernel we need this BO. If it still exists, we're done! */ 318 if (crocus_bo_madvise(cur, I915_MADV_WILLNEED)) { 319 bo = cur; 320 break; 321 } 322 323 /* This BO was purged, throw it out and keep looking. */ 324 bo_free(cur); 325 } 326 327 if (!bo) 328 return NULL; 329 330 /* Zero the contents if necessary. If this fails, fall back to 331 * allocating a fresh BO, which will always be zeroed by the kernel. 332 */ 333 if (flags & BO_ALLOC_ZEROED) { 334 void *map = crocus_bo_map(NULL, bo, MAP_WRITE | MAP_RAW); 335 if (map) { 336 memset(map, 0, bo->size); 337 } else { 338 bo_free(bo); 339 return NULL; 340 } 341 } 342 343 return bo; 344} 345 346static struct crocus_bo * 347alloc_fresh_bo(struct crocus_bufmgr *bufmgr, uint64_t bo_size) 348{ 349 struct crocus_bo *bo = bo_calloc(); 350 if (!bo) 351 return NULL; 352 353 struct drm_i915_gem_create create = { .size = bo_size }; 354 355 /* All new BOs we get from the kernel are zeroed, so we don't need to 356 * worry about that here. 357 */ 358 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CREATE, &create) != 0) { 359 free(bo); 360 return NULL; 361 } 362 363 bo->gem_handle = create.handle; 364 bo->bufmgr = bufmgr; 365 bo->size = bo_size; 366 bo->idle = true; 367 bo->tiling_mode = I915_TILING_NONE; 368 bo->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 369 bo->stride = 0; 370 371 /* Calling set_domain() will allocate pages for the BO outside of the 372 * struct mutex lock in the kernel, which is more efficient than waiting 373 * to create them during the first execbuf that uses the BO. 374 */ 375 struct drm_i915_gem_set_domain sd = { 376 .handle = bo->gem_handle, 377 .read_domains = I915_GEM_DOMAIN_CPU, 378 .write_domain = 0, 379 }; 380 381 if (intel_ioctl(bo->bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd) != 0) { 382 bo_free(bo); 383 return NULL; 384 } 385 386 return bo; 387} 388 389static struct crocus_bo * 390bo_alloc_internal(struct crocus_bufmgr *bufmgr, 391 const char *name, 392 uint64_t size, 393 uint32_t alignment, 394 unsigned flags, 395 uint32_t tiling_mode, 396 uint32_t stride) 397{ 398 struct crocus_bo *bo; 399 unsigned int page_size = getpagesize(); 400 struct bo_cache_bucket *bucket = bucket_for_size(bufmgr, size); 401 402 /* Round the size up to the bucket size, or if we don't have caching 403 * at this size, a multiple of the page size. 404 */ 405 uint64_t bo_size = 406 bucket ? bucket->size : MAX2(ALIGN(size, page_size), page_size); 407 408 simple_mtx_lock(&bufmgr->lock); 409 410 /* Get a buffer out of the cache if available. First, we try to find 411 * one with a matching memory zone so we can avoid reallocating VMA. 412 */ 413 bo = alloc_bo_from_cache(bufmgr, bucket, alignment, flags); 414 415 simple_mtx_unlock(&bufmgr->lock); 416 417 if (!bo) { 418 bo = alloc_fresh_bo(bufmgr, bo_size); 419 if (!bo) 420 return NULL; 421 } 422 423 if (bo_set_tiling_internal(bo, tiling_mode, stride)) 424 goto err_free; 425 426 bo->name = name; 427 p_atomic_set(&bo->refcount, 1); 428 bo->reusable = bucket && bufmgr->bo_reuse; 429 bo->cache_coherent = bufmgr->has_llc; 430 bo->index = -1; 431 bo->kflags = 0; 432 433 if ((flags & BO_ALLOC_COHERENT) && !bo->cache_coherent) { 434 struct drm_i915_gem_caching arg = { 435 .handle = bo->gem_handle, 436 .caching = 1, 437 }; 438 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_CACHING, &arg) == 0) { 439 bo->cache_coherent = true; 440 bo->reusable = false; 441 } 442 } 443 444 DBG("bo_create: buf %d (%s) %llub\n", bo->gem_handle, 445 bo->name, (unsigned long long) size); 446 447 return bo; 448 449err_free: 450 bo_free(bo); 451 return NULL; 452} 453 454struct crocus_bo * 455crocus_bo_alloc(struct crocus_bufmgr *bufmgr, 456 const char *name, 457 uint64_t size) 458{ 459 return bo_alloc_internal(bufmgr, name, size, 1, 460 0, I915_TILING_NONE, 0); 461} 462 463struct crocus_bo * 464crocus_bo_alloc_tiled(struct crocus_bufmgr *bufmgr, const char *name, 465 uint64_t size, uint32_t alignment, 466 uint32_t tiling_mode, uint32_t pitch, unsigned flags) 467{ 468 return bo_alloc_internal(bufmgr, name, size, alignment, 469 flags, tiling_mode, pitch); 470} 471 472struct crocus_bo * 473crocus_bo_create_userptr(struct crocus_bufmgr *bufmgr, const char *name, 474 void *ptr, size_t size) 475{ 476 struct crocus_bo *bo; 477 478 bo = bo_calloc(); 479 if (!bo) 480 return NULL; 481 482 struct drm_i915_gem_userptr arg = { 483 .user_ptr = (uintptr_t)ptr, 484 .user_size = size, 485 }; 486 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_USERPTR, &arg)) 487 goto err_free; 488 bo->gem_handle = arg.handle; 489 490 /* Check the buffer for validity before we try and use it in a batch */ 491 struct drm_i915_gem_set_domain sd = { 492 .handle = bo->gem_handle, 493 .read_domains = I915_GEM_DOMAIN_CPU, 494 }; 495 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &sd)) 496 goto err_close; 497 498 bo->name = name; 499 bo->size = size; 500 bo->map_cpu = ptr; 501 502 bo->bufmgr = bufmgr; 503 bo->kflags = 0; 504 505 p_atomic_set(&bo->refcount, 1); 506 bo->userptr = true; 507 bo->cache_coherent = true; 508 bo->index = -1; 509 bo->idle = true; 510 511 return bo; 512 513err_close: 514 intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &bo->gem_handle); 515err_free: 516 free(bo); 517 return NULL; 518} 519 520/** 521 * Returns a crocus_bo wrapping the given buffer object handle. 522 * 523 * This can be used when one application needs to pass a buffer object 524 * to another. 525 */ 526struct crocus_bo * 527crocus_bo_gem_create_from_name(struct crocus_bufmgr *bufmgr, 528 const char *name, unsigned int handle) 529{ 530 struct crocus_bo *bo; 531 532 /* At the moment most applications only have a few named bo. 533 * For instance, in a DRI client only the render buffers passed 534 * between X and the client are named. And since X returns the 535 * alternating names for the front/back buffer a linear search 536 * provides a sufficiently fast match. 537 */ 538 simple_mtx_lock(&bufmgr->lock); 539 bo = find_and_ref_external_bo(bufmgr->name_table, handle); 540 if (bo) 541 goto out; 542 543 struct drm_gem_open open_arg = { .name = handle }; 544 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_OPEN, &open_arg); 545 if (ret != 0) { 546 DBG("Couldn't reference %s handle 0x%08x: %s\n", 547 name, handle, strerror(errno)); 548 bo = NULL; 549 goto out; 550 } 551 /* Now see if someone has used a prime handle to get this 552 * object from the kernel before by looking through the list 553 * again for a matching gem_handle 554 */ 555 bo = find_and_ref_external_bo(bufmgr->handle_table, open_arg.handle); 556 if (bo) 557 goto out; 558 559 bo = bo_calloc(); 560 if (!bo) 561 goto out; 562 563 p_atomic_set(&bo->refcount, 1); 564 565 bo->size = open_arg.size; 566 bo->gtt_offset = 0; 567 bo->bufmgr = bufmgr; 568 bo->gem_handle = open_arg.handle; 569 bo->name = name; 570 bo->global_name = handle; 571 bo->reusable = false; 572 bo->external = true; 573 bo->kflags = 0; 574 575 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 576 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); 577 578 struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; 579 ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling); 580 if (ret != 0) 581 goto err_unref; 582 583 bo->tiling_mode = get_tiling.tiling_mode; 584 bo->swizzle_mode = get_tiling.swizzle_mode; 585 /* XXX stride is unknown */ 586 DBG("bo_create_from_handle: %d (%s)\n", handle, bo->name); 587 588out: 589 simple_mtx_unlock(&bufmgr->lock); 590 return bo; 591 592err_unref: 593 bo_free(bo); 594 simple_mtx_unlock(&bufmgr->lock); 595 return NULL; 596} 597 598static void 599bo_close(struct crocus_bo *bo) 600{ 601 struct crocus_bufmgr *bufmgr = bo->bufmgr; 602 603 if (bo->external) { 604 struct hash_entry *entry; 605 606 if (bo->global_name) { 607 entry = _mesa_hash_table_search(bufmgr->name_table, &bo->global_name); 608 _mesa_hash_table_remove(bufmgr->name_table, entry); 609 } 610 611 entry = _mesa_hash_table_search(bufmgr->handle_table, &bo->gem_handle); 612 _mesa_hash_table_remove(bufmgr->handle_table, entry); 613 614 list_for_each_entry_safe(struct bo_export, export, &bo->exports, link) { 615 struct drm_gem_close close = { .handle = export->gem_handle }; 616 intel_ioctl(export->drm_fd, DRM_IOCTL_GEM_CLOSE, &close); 617 618 list_del(&export->link); 619 free(export); 620 } 621 } else { 622 assert(list_is_empty(&bo->exports)); 623 } 624 625 /* Close this object */ 626 struct drm_gem_close close = { .handle = bo->gem_handle }; 627 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_CLOSE, &close); 628 if (ret != 0) { 629 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 630 bo->gem_handle, bo->name, strerror(errno)); 631 } 632 633 free(bo); 634} 635 636static void 637bo_free(struct crocus_bo *bo) 638{ 639 struct crocus_bufmgr *bufmgr = bo->bufmgr; 640 641 if (bo->map_cpu && !bo->userptr) { 642 VG_NOACCESS(bo->map_cpu, bo->size); 643 munmap(bo->map_cpu, bo->size); 644 } 645 if (bo->map_wc) { 646 VG_NOACCESS(bo->map_wc, bo->size); 647 munmap(bo->map_wc, bo->size); 648 } 649 if (bo->map_gtt) { 650 VG_NOACCESS(bo->map_gtt, bo->size); 651 munmap(bo->map_gtt, bo->size); 652 } 653 654 if (bo->idle) { 655 bo_close(bo); 656 } else { 657 /* Defer closing the GEM BO and returning the VMA for reuse until the 658 * BO is idle. Just move it to the dead list for now. 659 */ 660 list_addtail(&bo->head, &bufmgr->zombie_list); 661 } 662} 663 664/** Frees all cached buffers significantly older than @time. */ 665static void 666cleanup_bo_cache(struct crocus_bufmgr *bufmgr, time_t time) 667{ 668 int i; 669 670 if (bufmgr->time == time) 671 return; 672 673 for (i = 0; i < bufmgr->num_buckets; i++) { 674 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 675 676 list_for_each_entry_safe(struct crocus_bo, bo, &bucket->head, head) { 677 if (time - bo->free_time <= 1) 678 break; 679 680 list_del(&bo->head); 681 682 bo_free(bo); 683 } 684 } 685 686 list_for_each_entry_safe(struct crocus_bo, bo, &bufmgr->zombie_list, head) { 687 /* Stop once we reach a busy BO - all others past this point were 688 * freed more recently so are likely also busy. 689 */ 690 if (!bo->idle && crocus_bo_busy(bo)) 691 break; 692 693 list_del(&bo->head); 694 bo_close(bo); 695 } 696 697 bufmgr->time = time; 698} 699 700static void 701bo_unreference_final(struct crocus_bo *bo, time_t time) 702{ 703 struct crocus_bufmgr *bufmgr = bo->bufmgr; 704 struct bo_cache_bucket *bucket; 705 706 DBG("bo_unreference final: %d (%s)\n", bo->gem_handle, bo->name); 707 708 bucket = NULL; 709 if (bo->reusable) 710 bucket = bucket_for_size(bufmgr, bo->size); 711 /* Put the buffer into our internal cache for reuse if we can. */ 712 if (bucket && crocus_bo_madvise(bo, I915_MADV_DONTNEED)) { 713 bo->free_time = time; 714 bo->name = NULL; 715 716 list_addtail(&bo->head, &bucket->head); 717 } else { 718 bo_free(bo); 719 } 720} 721 722void 723__crocus_bo_unreference(struct crocus_bo *bo) 724{ 725 struct crocus_bufmgr *bufmgr = bo->bufmgr; 726 struct timespec time; 727 728 clock_gettime(CLOCK_MONOTONIC, &time); 729 730 simple_mtx_lock(&bufmgr->lock); 731 732 if (p_atomic_dec_zero(&bo->refcount)) { 733 bo_unreference_final(bo, time.tv_sec); 734 cleanup_bo_cache(bufmgr, time.tv_sec); 735 } 736 737 simple_mtx_unlock(&bufmgr->lock); 738} 739 740static void 741bo_wait_with_stall_warning(struct pipe_debug_callback *dbg, 742 struct crocus_bo *bo, 743 const char *action) 744{ 745 bool busy = dbg && !bo->idle; 746 double elapsed = unlikely(busy) ? -get_time() : 0.0; 747 748 crocus_bo_wait_rendering(bo); 749 750 if (unlikely(busy)) { 751 elapsed += get_time(); 752 if (elapsed > 1e-5) /* 0.01ms */ { 753 perf_debug(dbg, "%s a busy \"%s\" BO stalled and took %.03f ms.\n", 754 action, bo->name, elapsed * 1000); 755 } 756 } 757} 758 759static void 760print_flags(unsigned flags) 761{ 762 if (flags & MAP_READ) 763 DBG("READ "); 764 if (flags & MAP_WRITE) 765 DBG("WRITE "); 766 if (flags & MAP_ASYNC) 767 DBG("ASYNC "); 768 if (flags & MAP_PERSISTENT) 769 DBG("PERSISTENT "); 770 if (flags & MAP_COHERENT) 771 DBG("COHERENT "); 772 if (flags & MAP_RAW) 773 DBG("RAW "); 774 DBG("\n"); 775} 776 777static void * 778crocus_bo_gem_mmap_legacy(struct pipe_debug_callback *dbg, 779 struct crocus_bo *bo, bool wc) 780{ 781 struct crocus_bufmgr *bufmgr = bo->bufmgr; 782 783 struct drm_i915_gem_mmap mmap_arg = { 784 .handle = bo->gem_handle, 785 .size = bo->size, 786 .flags = wc ? I915_MMAP_WC : 0, 787 }; 788 789 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP, &mmap_arg); 790 if (ret != 0) { 791 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 792 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 793 return NULL; 794 } 795 void *map = (void *) (uintptr_t) mmap_arg.addr_ptr; 796 797 return map; 798} 799 800static void * 801crocus_bo_gem_mmap_offset(struct pipe_debug_callback *dbg, struct crocus_bo *bo, 802 bool wc) 803{ 804 struct crocus_bufmgr *bufmgr = bo->bufmgr; 805 806 struct drm_i915_gem_mmap_offset mmap_arg = { 807 .handle = bo->gem_handle, 808 .flags = wc ? I915_MMAP_OFFSET_WC : I915_MMAP_OFFSET_WB, 809 }; 810 811 /* Get the fake offset back */ 812 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_OFFSET, &mmap_arg); 813 if (ret != 0) { 814 DBG("%s:%d: Error preparing buffer %d (%s): %s .\n", 815 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 816 return NULL; 817 } 818 819 /* And map it */ 820 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED, 821 bufmgr->fd, mmap_arg.offset); 822 if (map == MAP_FAILED) { 823 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 824 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 825 return NULL; 826 } 827 828 return map; 829} 830 831static void * 832crocus_bo_gem_mmap(struct pipe_debug_callback *dbg, struct crocus_bo *bo, bool wc) 833{ 834 struct crocus_bufmgr *bufmgr = bo->bufmgr; 835 836 if (bufmgr->has_mmap_offset) 837 return crocus_bo_gem_mmap_offset(dbg, bo, wc); 838 else 839 return crocus_bo_gem_mmap_legacy(dbg, bo, wc); 840} 841 842static void * 843crocus_bo_map_cpu(struct pipe_debug_callback *dbg, 844 struct crocus_bo *bo, unsigned flags) 845{ 846 /* We disallow CPU maps for writing to non-coherent buffers, as the 847 * CPU map can become invalidated when a batch is flushed out, which 848 * can happen at unpredictable times. You should use WC maps instead. 849 */ 850 assert(bo->cache_coherent || !(flags & MAP_WRITE)); 851 852 if (!bo->map_cpu) { 853 DBG("crocus_bo_map_cpu: %d (%s)\n", bo->gem_handle, bo->name); 854 855 void *map = crocus_bo_gem_mmap(dbg, bo, false); 856 if (!map) { 857 return NULL; 858 } 859 860 VG_DEFINED(map, bo->size); 861 862 if (p_atomic_cmpxchg(&bo->map_cpu, NULL, map)) { 863 VG_NOACCESS(map, bo->size); 864 munmap(map, bo->size); 865 } 866 } 867 assert(bo->map_cpu); 868 869 DBG("crocus_bo_map_cpu: %d (%s) -> %p, ", bo->gem_handle, bo->name, 870 bo->map_cpu); 871 print_flags(flags); 872 873 if (!(flags & MAP_ASYNC)) { 874 bo_wait_with_stall_warning(dbg, bo, "CPU mapping"); 875 } 876 877 if (!bo->cache_coherent && !bo->bufmgr->has_llc) { 878 /* If we're reusing an existing CPU mapping, the CPU caches may 879 * contain stale data from the last time we read from that mapping. 880 * (With the BO cache, it might even be data from a previous buffer!) 881 * Even if it's a brand new mapping, the kernel may have zeroed the 882 * buffer via CPU writes. 883 * 884 * We need to invalidate those cachelines so that we see the latest 885 * contents, and so long as we only read from the CPU mmap we do not 886 * need to write those cachelines back afterwards. 887 * 888 * On LLC, the emprical evidence suggests that writes from the GPU 889 * that bypass the LLC (i.e. for scanout) do *invalidate* the CPU 890 * cachelines. (Other reads, such as the display engine, bypass the 891 * LLC entirely requiring us to keep dirty pixels for the scanout 892 * out of any cache.) 893 */ 894 intel_invalidate_range(bo->map_cpu, bo->size); 895 } 896 897 return bo->map_cpu; 898} 899 900static void * 901crocus_bo_map_wc(struct pipe_debug_callback *dbg, 902 struct crocus_bo *bo, unsigned flags) 903{ 904 if (!bo->map_wc) { 905 DBG("crocus_bo_map_wc: %d (%s)\n", bo->gem_handle, bo->name); 906 907 void *map = crocus_bo_gem_mmap(dbg, bo, true); 908 if (!map) { 909 return NULL; 910 } 911 912 VG_DEFINED(map, bo->size); 913 914 if (p_atomic_cmpxchg(&bo->map_wc, NULL, map)) { 915 VG_NOACCESS(map, bo->size); 916 munmap(map, bo->size); 917 } 918 } 919 assert(bo->map_wc); 920 921 DBG("crocus_bo_map_wc: %d (%s) -> %p\n", bo->gem_handle, bo->name, bo->map_wc); 922 print_flags(flags); 923 924 if (!(flags & MAP_ASYNC)) { 925 bo_wait_with_stall_warning(dbg, bo, "WC mapping"); 926 } 927 928 return bo->map_wc; 929} 930 931/** 932 * Perform an uncached mapping via the GTT. 933 * 934 * Write access through the GTT is not quite fully coherent. On low power 935 * systems especially, like modern Atoms, we can observe reads from RAM before 936 * the write via GTT has landed. A write memory barrier that flushes the Write 937 * Combining Buffer (i.e. sfence/mfence) is not sufficient to order the later 938 * read after the write as the GTT write suffers a small delay through the GTT 939 * indirection. The kernel uses an uncached mmio read to ensure the GTT write 940 * is ordered with reads (either by the GPU, WB or WC) and unconditionally 941 * flushes prior to execbuf submission. However, if we are not informing the 942 * kernel about our GTT writes, it will not flush before earlier access, such 943 * as when using the cmdparser. Similarly, we need to be careful if we should 944 * ever issue a CPU read immediately following a GTT write. 945 * 946 * Telling the kernel about write access also has one more important 947 * side-effect. Upon receiving notification about the write, it cancels any 948 * scanout buffering for FBC/PSR and friends. Later FBC/PSR is then flushed by 949 * either SW_FINISH or DIRTYFB. The presumption is that we never write to the 950 * actual scanout via a mmaping, only to a backbuffer and so all the FBC/PSR 951 * tracking is handled on the buffer exchange instead. 952 */ 953static void * 954crocus_bo_map_gtt(struct pipe_debug_callback *dbg, 955 struct crocus_bo *bo, unsigned flags) 956{ 957 struct crocus_bufmgr *bufmgr = bo->bufmgr; 958 959 /* If we don't support get/set_tiling, there's no support for GTT mapping 960 * either (it won't do any de-tiling for us). 961 */ 962 assert(bufmgr->has_tiling_uapi); 963 964 /* Get a mapping of the buffer if we haven't before. */ 965 if (bo->map_gtt == NULL) { 966 DBG("bo_map_gtt: mmap %d (%s)\n", bo->gem_handle, bo->name); 967 968 struct drm_i915_gem_mmap_gtt mmap_arg = { .handle = bo->gem_handle }; 969 970 /* Get the fake offset back... */ 971 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg); 972 if (ret != 0) { 973 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 974 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 975 return NULL; 976 } 977 978 /* and mmap it. */ 979 void *map = mmap(0, bo->size, PROT_READ | PROT_WRITE, 980 MAP_SHARED, bufmgr->fd, mmap_arg.offset); 981 if (map == MAP_FAILED) { 982 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 983 __FILE__, __LINE__, bo->gem_handle, bo->name, strerror(errno)); 984 return NULL; 985 } 986 987 /* We don't need to use VALGRIND_MALLOCLIKE_BLOCK because Valgrind will 988 * already intercept this mmap call. However, for consistency between 989 * all the mmap paths, we mark the pointer as defined now and mark it 990 * as inaccessible afterwards. 991 */ 992 VG_DEFINED(map, bo->size); 993 994 if (p_atomic_cmpxchg(&bo->map_gtt, NULL, map)) { 995 VG_NOACCESS(map, bo->size); 996 munmap(map, bo->size); 997 } 998 } 999 assert(bo->map_gtt); 1000 1001 DBG("bo_map_gtt: %d (%s) -> %p, ", bo->gem_handle, bo->name, bo->map_gtt); 1002 print_flags(flags); 1003 1004 if (!(flags & MAP_ASYNC)) { 1005 bo_wait_with_stall_warning(dbg, bo, "GTT mapping"); 1006 } 1007 1008 return bo->map_gtt; 1009} 1010 1011static bool 1012can_map_cpu(struct crocus_bo *bo, unsigned flags) 1013{ 1014 if (bo->cache_coherent) 1015 return true; 1016 1017 /* Even if the buffer itself is not cache-coherent (such as a scanout), on 1018 * an LLC platform reads always are coherent (as they are performed via the 1019 * central system agent). It is just the writes that we need to take special 1020 * care to ensure that land in main memory and not stick in the CPU cache. 1021 */ 1022 if (!(flags & MAP_WRITE) && bo->bufmgr->has_llc) 1023 return true; 1024 1025 /* If PERSISTENT or COHERENT are set, the mmapping needs to remain valid 1026 * across batch flushes where the kernel will change cache domains of the 1027 * bo, invalidating continued access to the CPU mmap on non-LLC device. 1028 * 1029 * Similarly, ASYNC typically means that the buffer will be accessed via 1030 * both the CPU and the GPU simultaneously. Batches may be executed that 1031 * use the BO even while it is mapped. While OpenGL technically disallows 1032 * most drawing while non-persistent mappings are active, we may still use 1033 * the GPU for blits or other operations, causing batches to happen at 1034 * inconvenient times. 1035 * 1036 * If RAW is set, we expect the caller to be able to handle a WC buffer 1037 * more efficiently than the involuntary clflushes. 1038 */ 1039 if (flags & (MAP_PERSISTENT | MAP_COHERENT | MAP_ASYNC | MAP_RAW)) 1040 return false; 1041 1042 return !(flags & MAP_WRITE); 1043} 1044 1045void * 1046crocus_bo_map(struct pipe_debug_callback *dbg, 1047 struct crocus_bo *bo, unsigned flags) 1048{ 1049 if (bo->tiling_mode != I915_TILING_NONE && !(flags & MAP_RAW)) 1050 return crocus_bo_map_gtt(dbg, bo, flags); 1051 1052 void *map; 1053 1054 if (can_map_cpu(bo, flags)) 1055 map = crocus_bo_map_cpu(dbg, bo, flags); 1056 else 1057 map = crocus_bo_map_wc(dbg, bo, flags); 1058 1059 /* Allow the attempt to fail by falling back to the GTT where necessary. 1060 * 1061 * Not every buffer can be mmaped directly using the CPU (or WC), for 1062 * example buffers that wrap stolen memory or are imported from other 1063 * devices. For those, we have little choice but to use a GTT mmapping. 1064 * However, if we use a slow GTT mmapping for reads where we expected fast 1065 * access, that order of magnitude difference in throughput will be clearly 1066 * expressed by angry users. 1067 * 1068 * We skip MAP_RAW because we want to avoid map_gtt's fence detiling. 1069 */ 1070 if (!map && !(flags & MAP_RAW)) { 1071 perf_debug(dbg, "Fallback GTT mapping for %s with access flags %x\n", 1072 bo->name, flags); 1073 map = crocus_bo_map_gtt(dbg, bo, flags); 1074 } 1075 1076 return map; 1077} 1078 1079/** Waits for all GPU rendering with the object to have completed. */ 1080void 1081crocus_bo_wait_rendering(struct crocus_bo *bo) 1082{ 1083 /* We require a kernel recent enough for WAIT_IOCTL support. 1084 * See intel_init_bufmgr() 1085 */ 1086 crocus_bo_wait(bo, -1); 1087} 1088 1089/** 1090 * Waits on a BO for the given amount of time. 1091 * 1092 * @bo: buffer object to wait for 1093 * @timeout_ns: amount of time to wait in nanoseconds. 1094 * If value is less than 0, an infinite wait will occur. 1095 * 1096 * Returns 0 if the wait was successful ie. the last batch referencing the 1097 * object has completed within the allotted time. Otherwise some negative return 1098 * value describes the error. Of particular interest is -ETIME when the wait has 1099 * failed to yield the desired result. 1100 * 1101 * Similar to crocus_bo_wait_rendering except a timeout parameter allows 1102 * the operation to give up after a certain amount of time. Another subtle 1103 * difference is the internal locking semantics are different (this variant does 1104 * not hold the lock for the duration of the wait). This makes the wait subject 1105 * to a larger userspace race window. 1106 * 1107 * The implementation shall wait until the object is no longer actively 1108 * referenced within a batch buffer at the time of the call. The wait will 1109 * not guarantee that the buffer is re-issued via another thread, or an flinked 1110 * handle. Userspace must make sure this race does not occur if such precision 1111 * is important. 1112 * 1113 * Note that some kernels have broken the inifite wait for negative values 1114 * promise, upgrade to latest stable kernels if this is the case. 1115 */ 1116int 1117crocus_bo_wait(struct crocus_bo *bo, int64_t timeout_ns) 1118{ 1119 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1120 1121 /* If we know it's idle, don't bother with the kernel round trip */ 1122 if (bo->idle && !bo->external) 1123 return 0; 1124 1125 struct drm_i915_gem_wait wait = { 1126 .bo_handle = bo->gem_handle, 1127 .timeout_ns = timeout_ns, 1128 }; 1129 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1130 if (ret != 0) 1131 return -errno; 1132 1133 bo->idle = true; 1134 1135 return ret; 1136} 1137 1138static void 1139crocus_bufmgr_destroy(struct crocus_bufmgr *bufmgr) 1140{ 1141 simple_mtx_destroy(&bufmgr->lock); 1142 1143 /* Free any cached buffer objects we were going to reuse */ 1144 for (int i = 0; i < bufmgr->num_buckets; i++) { 1145 struct bo_cache_bucket *bucket = &bufmgr->cache_bucket[i]; 1146 1147 list_for_each_entry_safe(struct crocus_bo, bo, &bucket->head, head) { 1148 list_del(&bo->head); 1149 1150 bo_free(bo); 1151 } 1152 } 1153 1154 /* Close any buffer objects on the dead list. */ 1155 list_for_each_entry_safe(struct crocus_bo, bo, &bufmgr->zombie_list, head) { 1156 list_del(&bo->head); 1157 bo_close(bo); 1158 } 1159 1160 _mesa_hash_table_destroy(bufmgr->name_table, NULL); 1161 _mesa_hash_table_destroy(bufmgr->handle_table, NULL); 1162 1163 close(bufmgr->fd); 1164 1165 free(bufmgr); 1166} 1167 1168static int 1169bo_set_tiling_internal(struct crocus_bo *bo, uint32_t tiling_mode, 1170 uint32_t stride) 1171{ 1172 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1173 struct drm_i915_gem_set_tiling set_tiling; 1174 int ret; 1175 1176 if (bo->global_name == 0 && 1177 tiling_mode == bo->tiling_mode && stride == bo->stride) 1178 return 0; 1179 1180 memset(&set_tiling, 0, sizeof(set_tiling)); 1181 do { 1182 /* set_tiling is slightly broken and overwrites the 1183 * input on the error path, so we have to open code 1184 * drm_ioctl. 1185 */ 1186 set_tiling.handle = bo->gem_handle; 1187 set_tiling.tiling_mode = tiling_mode; 1188 set_tiling.stride = stride; 1189 1190 ret = ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); 1191 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 1192 if (ret == -1) 1193 return -errno; 1194 1195 bo->tiling_mode = set_tiling.tiling_mode; 1196 bo->swizzle_mode = set_tiling.swizzle_mode; 1197 bo->stride = set_tiling.stride; 1198 return 0; 1199} 1200 1201int 1202crocus_bo_get_tiling(struct crocus_bo *bo, uint32_t *tiling_mode, 1203 uint32_t *swizzle_mode) 1204{ 1205 *tiling_mode = bo->tiling_mode; 1206 *swizzle_mode = bo->swizzle_mode; 1207 return 0; 1208} 1209 1210struct crocus_bo * 1211crocus_bo_import_dmabuf(struct crocus_bufmgr *bufmgr, int prime_fd, 1212 uint64_t modifier) 1213{ 1214 uint32_t handle; 1215 struct crocus_bo *bo; 1216 1217 simple_mtx_lock(&bufmgr->lock); 1218 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle); 1219 if (ret) { 1220 DBG("import_dmabuf: failed to obtain handle from fd: %s\n", 1221 strerror(errno)); 1222 simple_mtx_unlock(&bufmgr->lock); 1223 return NULL; 1224 } 1225 1226 /* 1227 * See if the kernel has already returned this buffer to us. Just as 1228 * for named buffers, we must not create two bo's pointing at the same 1229 * kernel object 1230 */ 1231 bo = find_and_ref_external_bo(bufmgr->handle_table, handle); 1232 if (bo) 1233 goto out; 1234 1235 bo = bo_calloc(); 1236 if (!bo) 1237 goto out; 1238 1239 p_atomic_set(&bo->refcount, 1); 1240 1241 /* Determine size of bo. The fd-to-handle ioctl really should 1242 * return the size, but it doesn't. If we have kernel 3.12 or 1243 * later, we can lseek on the prime fd to get the size. Older 1244 * kernels will just fail, in which case we fall back to the 1245 * provided (estimated or guess size). */ 1246 ret = lseek(prime_fd, 0, SEEK_END); 1247 if (ret != -1) 1248 bo->size = ret; 1249 1250 bo->bufmgr = bufmgr; 1251 bo->name = "prime"; 1252 bo->reusable = false; 1253 bo->external = true; 1254 bo->kflags = 0; 1255 bo->gem_handle = handle; 1256 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 1257 1258 const struct isl_drm_modifier_info *mod_info = 1259 isl_drm_modifier_get_info(modifier); 1260 if (mod_info) { 1261 bo->tiling_mode = isl_tiling_to_i915_tiling(mod_info->tiling); 1262 } else if (bufmgr->has_tiling_uapi) { 1263 struct drm_i915_gem_get_tiling get_tiling = { .handle = bo->gem_handle }; 1264 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling)) 1265 goto err; 1266 1267 bo->tiling_mode = get_tiling.tiling_mode; 1268 } else { 1269 bo->tiling_mode = I915_TILING_NONE; 1270 } 1271 1272out: 1273 simple_mtx_unlock(&bufmgr->lock); 1274 return bo; 1275 1276err: 1277 bo_free(bo); 1278 simple_mtx_unlock(&bufmgr->lock); 1279 return NULL; 1280} 1281 1282struct crocus_bo * 1283crocus_bo_import_dmabuf_no_mods(struct crocus_bufmgr *bufmgr, 1284 int prime_fd) 1285{ 1286 uint32_t handle; 1287 struct crocus_bo *bo; 1288 1289 simple_mtx_lock(&bufmgr->lock); 1290 int ret = drmPrimeFDToHandle(bufmgr->fd, prime_fd, &handle); 1291 if (ret) { 1292 DBG("import_dmabuf: failed to obtain handle from fd: %s\n", 1293 strerror(errno)); 1294 simple_mtx_unlock(&bufmgr->lock); 1295 return NULL; 1296 } 1297 1298 /* 1299 * See if the kernel has already returned this buffer to us. Just as 1300 * for named buffers, we must not create two bo's pointing at the same 1301 * kernel object 1302 */ 1303 bo = find_and_ref_external_bo(bufmgr->handle_table, handle); 1304 if (bo) 1305 goto out; 1306 1307 bo = bo_calloc(); 1308 if (!bo) 1309 goto out; 1310 1311 p_atomic_set(&bo->refcount, 1); 1312 1313 /* Determine size of bo. The fd-to-handle ioctl really should 1314 * return the size, but it doesn't. If we have kernel 3.12 or 1315 * later, we can lseek on the prime fd to get the size. Older 1316 * kernels will just fail, in which case we fall back to the 1317 * provided (estimated or guess size). */ 1318 ret = lseek(prime_fd, 0, SEEK_END); 1319 if (ret != -1) 1320 bo->size = ret; 1321 1322 bo->bufmgr = bufmgr; 1323 bo->name = "prime"; 1324 bo->reusable = false; 1325 bo->external = true; 1326 bo->kflags = 0; 1327 bo->gem_handle = handle; 1328 _mesa_hash_table_insert(bufmgr->handle_table, &bo->gem_handle, bo); 1329 1330out: 1331 simple_mtx_unlock(&bufmgr->lock); 1332 return bo; 1333} 1334 1335static void 1336crocus_bo_make_external_locked(struct crocus_bo *bo) 1337{ 1338 if (!bo->external) { 1339 _mesa_hash_table_insert(bo->bufmgr->handle_table, &bo->gem_handle, bo); 1340 bo->external = true; 1341 bo->reusable = false; 1342 } 1343} 1344 1345static void 1346crocus_bo_make_external(struct crocus_bo *bo) 1347{ 1348 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1349 1350 if (bo->external) { 1351 assert(!bo->reusable); 1352 return; 1353 } 1354 1355 simple_mtx_lock(&bufmgr->lock); 1356 crocus_bo_make_external_locked(bo); 1357 simple_mtx_unlock(&bufmgr->lock); 1358} 1359 1360int 1361crocus_bo_export_dmabuf(struct crocus_bo *bo, int *prime_fd) 1362{ 1363 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1364 1365 crocus_bo_make_external(bo); 1366 1367 if (drmPrimeHandleToFD(bufmgr->fd, bo->gem_handle, 1368 DRM_CLOEXEC, prime_fd) != 0) 1369 return -errno; 1370 1371 return 0; 1372} 1373 1374uint32_t 1375crocus_bo_export_gem_handle(struct crocus_bo *bo) 1376{ 1377 crocus_bo_make_external(bo); 1378 1379 return bo->gem_handle; 1380} 1381 1382int 1383crocus_bo_flink(struct crocus_bo *bo, uint32_t *name) 1384{ 1385 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1386 1387 if (!bo->global_name) { 1388 struct drm_gem_flink flink = { .handle = bo->gem_handle }; 1389 1390 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_GEM_FLINK, &flink)) 1391 return -errno; 1392 1393 simple_mtx_lock(&bufmgr->lock); 1394 if (!bo->global_name) { 1395 crocus_bo_make_external_locked(bo); 1396 bo->global_name = flink.name; 1397 _mesa_hash_table_insert(bufmgr->name_table, &bo->global_name, bo); 1398 } 1399 simple_mtx_unlock(&bufmgr->lock); 1400 } 1401 1402 *name = bo->global_name; 1403 return 0; 1404} 1405 1406int 1407crocus_bo_export_gem_handle_for_device(struct crocus_bo *bo, int drm_fd, 1408 uint32_t *out_handle) 1409{ 1410 /* Only add the new GEM handle to the list of export if it belongs to a 1411 * different GEM device. Otherwise we might close the same buffer multiple 1412 * times. 1413 */ 1414 struct crocus_bufmgr *bufmgr = bo->bufmgr; 1415 int ret = os_same_file_description(drm_fd, bufmgr->fd); 1416 WARN_ONCE(ret < 0, 1417 "Kernel has no file descriptor comparison support: %s\n", 1418 strerror(errno)); 1419 if (ret == 0) { 1420 *out_handle = crocus_bo_export_gem_handle(bo); 1421 return 0; 1422 } 1423 1424 struct bo_export *export = calloc(1, sizeof(*export)); 1425 if (!export) 1426 return -ENOMEM; 1427 1428 export->drm_fd = drm_fd; 1429 1430 int dmabuf_fd = -1; 1431 int err = crocus_bo_export_dmabuf(bo, &dmabuf_fd); 1432 if (err) { 1433 free(export); 1434 return err; 1435 } 1436 1437 simple_mtx_lock(&bufmgr->lock); 1438 err = drmPrimeFDToHandle(drm_fd, dmabuf_fd, &export->gem_handle); 1439 close(dmabuf_fd); 1440 if (err) { 1441 simple_mtx_unlock(&bufmgr->lock); 1442 free(export); 1443 return err; 1444 } 1445 1446 bool found = false; 1447 list_for_each_entry(struct bo_export, iter, &bo->exports, link) { 1448 if (iter->drm_fd != drm_fd) 1449 continue; 1450 /* Here we assume that for a given DRM fd, we'll always get back the 1451 * same GEM handle for a given buffer. 1452 */ 1453 assert(iter->gem_handle == export->gem_handle); 1454 free(export); 1455 export = iter; 1456 found = true; 1457 break; 1458 } 1459 if (!found) 1460 list_addtail(&export->link, &bo->exports); 1461 1462 simple_mtx_unlock(&bufmgr->lock); 1463 1464 *out_handle = export->gem_handle; 1465 1466 return 0; 1467} 1468 1469static void 1470add_bucket(struct crocus_bufmgr *bufmgr, int size) 1471{ 1472 unsigned int i = bufmgr->num_buckets; 1473 1474 assert(i < ARRAY_SIZE(bufmgr->cache_bucket)); 1475 1476 list_inithead(&bufmgr->cache_bucket[i].head); 1477 bufmgr->cache_bucket[i].size = size; 1478 bufmgr->num_buckets++; 1479 1480 assert(bucket_for_size(bufmgr, size) == &bufmgr->cache_bucket[i]); 1481 assert(bucket_for_size(bufmgr, size - 2048) == &bufmgr->cache_bucket[i]); 1482 assert(bucket_for_size(bufmgr, size + 1) != &bufmgr->cache_bucket[i]); 1483} 1484 1485static void 1486init_cache_buckets(struct crocus_bufmgr *bufmgr) 1487{ 1488 uint64_t size, cache_max_size = 64 * 1024 * 1024; 1489 1490 /* OK, so power of two buckets was too wasteful of memory. 1491 * Give 3 other sizes between each power of two, to hopefully 1492 * cover things accurately enough. (The alternative is 1493 * probably to just go for exact matching of sizes, and assume 1494 * that for things like composited window resize the tiled 1495 * width/height alignment and rounding of sizes to pages will 1496 * get us useful cache hit rates anyway) 1497 */ 1498 add_bucket(bufmgr, PAGE_SIZE); 1499 add_bucket(bufmgr, PAGE_SIZE * 2); 1500 add_bucket(bufmgr, PAGE_SIZE * 3); 1501 1502 /* Initialize the linked lists for BO reuse cache. */ 1503 for (size = 4 * PAGE_SIZE; size <= cache_max_size; size *= 2) { 1504 add_bucket(bufmgr, size); 1505 1506 add_bucket(bufmgr, size + size * 1 / 4); 1507 add_bucket(bufmgr, size + size * 2 / 4); 1508 add_bucket(bufmgr, size + size * 3 / 4); 1509 } 1510} 1511 1512uint32_t 1513crocus_create_hw_context(struct crocus_bufmgr *bufmgr) 1514{ 1515 struct drm_i915_gem_context_create create = { }; 1516 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 1517 if (ret != 0) { 1518 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", strerror(errno)); 1519 return 0; 1520 } 1521 1522 /* Upon declaring a GPU hang, the kernel will zap the guilty context 1523 * back to the default logical HW state and attempt to continue on to 1524 * our next submitted batchbuffer. However, our render batches assume 1525 * the previous GPU state is preserved, and only emit commands needed 1526 * to incrementally change that state. In particular, we inherit the 1527 * STATE_BASE_ADDRESS and PIPELINE_SELECT settings, which are critical. 1528 * With default base addresses, our next batches will almost certainly 1529 * cause more GPU hangs, leading to repeated hangs until we're banned 1530 * or the machine is dead. 1531 * 1532 * Here we tell the kernel not to attempt to recover our context but 1533 * immediately (on the next batchbuffer submission) report that the 1534 * context is lost, and we will do the recovery ourselves. Ideally, 1535 * we'll have two lost batches instead of a continual stream of hangs. 1536 */ 1537 struct drm_i915_gem_context_param p = { 1538 .ctx_id = create.ctx_id, 1539 .param = I915_CONTEXT_PARAM_RECOVERABLE, 1540 .value = false, 1541 }; 1542 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p); 1543 1544 return create.ctx_id; 1545} 1546 1547static int 1548crocus_hw_context_get_priority(struct crocus_bufmgr *bufmgr, uint32_t ctx_id) 1549{ 1550 struct drm_i915_gem_context_param p = { 1551 .ctx_id = ctx_id, 1552 .param = I915_CONTEXT_PARAM_PRIORITY, 1553 }; 1554 drmIoctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_GETPARAM, &p); 1555 return p.value; /* on error, return 0 i.e. default priority */ 1556} 1557 1558int 1559crocus_hw_context_set_priority(struct crocus_bufmgr *bufmgr, 1560 uint32_t ctx_id, 1561 int priority) 1562{ 1563 struct drm_i915_gem_context_param p = { 1564 .ctx_id = ctx_id, 1565 .param = I915_CONTEXT_PARAM_PRIORITY, 1566 .value = priority, 1567 }; 1568 int err; 1569 1570 err = 0; 1571 if (intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_SETPARAM, &p)) 1572 err = -errno; 1573 1574 return err; 1575} 1576 1577uint32_t 1578crocus_clone_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id) 1579{ 1580 uint32_t new_ctx = crocus_create_hw_context(bufmgr); 1581 1582 if (new_ctx) { 1583 int priority = crocus_hw_context_get_priority(bufmgr, ctx_id); 1584 crocus_hw_context_set_priority(bufmgr, new_ctx, priority); 1585 } 1586 1587 return new_ctx; 1588} 1589 1590void 1591crocus_destroy_hw_context(struct crocus_bufmgr *bufmgr, uint32_t ctx_id) 1592{ 1593 struct drm_i915_gem_context_destroy d = { .ctx_id = ctx_id }; 1594 1595 if (ctx_id != 0 && 1596 intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, &d) != 0) { 1597 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 1598 strerror(errno)); 1599 } 1600} 1601 1602int 1603crocus_reg_read(struct crocus_bufmgr *bufmgr, uint32_t offset, uint64_t *result) 1604{ 1605 struct drm_i915_reg_read reg_read = { .offset = offset }; 1606 int ret = intel_ioctl(bufmgr->fd, DRM_IOCTL_I915_REG_READ, ®_read); 1607 1608 *result = reg_read.val; 1609 return ret; 1610} 1611 1612static int 1613gem_param(int fd, int name) 1614{ 1615 int v = -1; /* No param uses (yet) the sign bit, reserve it for errors */ 1616 1617 struct drm_i915_getparam gp = { .param = name, .value = &v }; 1618 if (intel_ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 1619 return -1; 1620 1621 return v; 1622} 1623 1624/** 1625 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 1626 * and manage map buffer objections. 1627 * 1628 * \param fd File descriptor of the opened DRM device. 1629 */ 1630static struct crocus_bufmgr * 1631crocus_bufmgr_create(struct intel_device_info *devinfo, int fd, bool bo_reuse) 1632{ 1633 struct crocus_bufmgr *bufmgr = calloc(1, sizeof(*bufmgr)); 1634 if (bufmgr == NULL) 1635 return NULL; 1636 1637 /* Handles to buffer objects belong to the device fd and are not 1638 * reference counted by the kernel. If the same fd is used by 1639 * multiple parties (threads sharing the same screen bufmgr, or 1640 * even worse the same device fd passed to multiple libraries) 1641 * ownership of those handles is shared by those independent parties. 1642 * 1643 * Don't do this! Ensure that each library/bufmgr has its own device 1644 * fd so that its namespace does not clash with another. 1645 */ 1646 bufmgr->fd = os_dupfd_cloexec(fd); 1647 1648 p_atomic_set(&bufmgr->refcount, 1); 1649 1650 simple_mtx_init(&bufmgr->lock, mtx_plain); 1651 1652 list_inithead(&bufmgr->zombie_list); 1653 1654 bufmgr->has_llc = devinfo->has_llc; 1655 bufmgr->has_tiling_uapi = devinfo->has_tiling_uapi; 1656 bufmgr->bo_reuse = bo_reuse; 1657 bufmgr->has_mmap_offset = gem_param(fd, I915_PARAM_MMAP_GTT_VERSION) >= 4; 1658 1659 init_cache_buckets(bufmgr); 1660 1661 bufmgr->name_table = 1662 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal); 1663 bufmgr->handle_table = 1664 _mesa_hash_table_create(NULL, key_hash_uint, key_uint_equal); 1665 1666 return bufmgr; 1667} 1668 1669static struct crocus_bufmgr * 1670crocus_bufmgr_ref(struct crocus_bufmgr *bufmgr) 1671{ 1672 p_atomic_inc(&bufmgr->refcount); 1673 return bufmgr; 1674} 1675 1676void 1677crocus_bufmgr_unref(struct crocus_bufmgr *bufmgr) 1678{ 1679 simple_mtx_lock(&global_bufmgr_list_mutex); 1680 if (p_atomic_dec_zero(&bufmgr->refcount)) { 1681 list_del(&bufmgr->link); 1682 crocus_bufmgr_destroy(bufmgr); 1683 } 1684 simple_mtx_unlock(&global_bufmgr_list_mutex); 1685} 1686 1687/** 1688 * Gets an already existing GEM buffer manager or create a new one. 1689 * 1690 * \param fd File descriptor of the opened DRM device. 1691 */ 1692struct crocus_bufmgr * 1693crocus_bufmgr_get_for_fd(struct intel_device_info *devinfo, int fd, bool bo_reuse) 1694{ 1695 struct stat st; 1696 1697 if (fstat(fd, &st)) 1698 return NULL; 1699 1700 struct crocus_bufmgr *bufmgr = NULL; 1701 1702 simple_mtx_lock(&global_bufmgr_list_mutex); 1703 list_for_each_entry(struct crocus_bufmgr, iter_bufmgr, &global_bufmgr_list, link) { 1704 struct stat iter_st; 1705 if (fstat(iter_bufmgr->fd, &iter_st)) 1706 continue; 1707 1708 if (st.st_rdev == iter_st.st_rdev) { 1709 assert(iter_bufmgr->bo_reuse == bo_reuse); 1710 bufmgr = crocus_bufmgr_ref(iter_bufmgr); 1711 goto unlock; 1712 } 1713 } 1714 1715 bufmgr = crocus_bufmgr_create(devinfo, fd, bo_reuse); 1716 if (bufmgr) 1717 list_addtail(&bufmgr->link, &global_bufmgr_list); 1718 1719 unlock: 1720 simple_mtx_unlock(&global_bufmgr_list_mutex); 1721 1722 return bufmgr; 1723} 1724 1725int 1726crocus_bufmgr_get_fd(struct crocus_bufmgr *bufmgr) 1727{ 1728 return bufmgr->fd; 1729} 1730