intel_bufmgr_gem.c revision fe517fc9
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007-2012 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <stddef.h> 51#include <sys/ioctl.h> 52#include <sys/stat.h> 53#include <sys/types.h> 54#include <stdbool.h> 55 56#include "errno.h" 57#ifndef ETIME 58#define ETIME ETIMEDOUT 59#endif 60#include "libdrm_macros.h" 61#include "libdrm_lists.h" 62#include "intel_bufmgr.h" 63#include "intel_bufmgr_priv.h" 64#include "intel_chipset.h" 65#include "string.h" 66 67#include "i915_drm.h" 68 69#ifdef HAVE_VALGRIND 70#include <valgrind.h> 71#include <memcheck.h> 72#define VG(x) x 73#else 74#define VG(x) 75#endif 76 77#define memclear(s) memset(&s, 0, sizeof(s)) 78 79#define DBG(...) do { \ 80 if (bufmgr_gem->bufmgr.debug) \ 81 fprintf(stderr, __VA_ARGS__); \ 82} while (0) 83 84#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 85#define MAX2(A, B) ((A) > (B) ? (A) : (B)) 86 87/** 88 * upper_32_bits - return bits 32-63 of a number 89 * @n: the number we're accessing 90 * 91 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress 92 * the "right shift count >= width of type" warning when that quantity is 93 * 32-bits. 94 */ 95#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) 96 97/** 98 * lower_32_bits - return bits 0-31 of a number 99 * @n: the number we're accessing 100 */ 101#define lower_32_bits(n) ((__u32)(n)) 102 103typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 104 105struct drm_intel_gem_bo_bucket { 106 drmMMListHead head; 107 unsigned long size; 108}; 109 110typedef struct _drm_intel_bufmgr_gem { 111 drm_intel_bufmgr bufmgr; 112 113 atomic_t refcount; 114 115 int fd; 116 117 int max_relocs; 118 119 pthread_mutex_t lock; 120 121 struct drm_i915_gem_exec_object *exec_objects; 122 struct drm_i915_gem_exec_object2 *exec2_objects; 123 drm_intel_bo **exec_bos; 124 int exec_size; 125 int exec_count; 126 127 /** Array of lists of cached gem objects of power-of-two sizes */ 128 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 129 int num_buckets; 130 time_t time; 131 132 drmMMListHead managers; 133 134 drmMMListHead named; 135 drmMMListHead vma_cache; 136 int vma_count, vma_open, vma_max; 137 138 uint64_t gtt_size; 139 int available_fences; 140 int pci_device; 141 int gen; 142 unsigned int has_bsd : 1; 143 unsigned int has_blt : 1; 144 unsigned int has_relaxed_fencing : 1; 145 unsigned int has_llc : 1; 146 unsigned int has_wait_timeout : 1; 147 unsigned int bo_reuse : 1; 148 unsigned int no_exec : 1; 149 unsigned int has_vebox : 1; 150 bool fenced_relocs; 151 152 struct { 153 void *ptr; 154 uint32_t handle; 155 } userptr_active; 156 157} drm_intel_bufmgr_gem; 158 159#define DRM_INTEL_RELOC_FENCE (1<<0) 160 161typedef struct _drm_intel_reloc_target_info { 162 drm_intel_bo *bo; 163 int flags; 164} drm_intel_reloc_target; 165 166struct _drm_intel_bo_gem { 167 drm_intel_bo bo; 168 169 atomic_t refcount; 170 uint32_t gem_handle; 171 const char *name; 172 173 /** 174 * Kenel-assigned global name for this object 175 * 176 * List contains both flink named and prime fd'd objects 177 */ 178 unsigned int global_name; 179 drmMMListHead name_list; 180 181 /** 182 * Index of the buffer within the validation list while preparing a 183 * batchbuffer execution. 184 */ 185 int validate_index; 186 187 /** 188 * Current tiling mode 189 */ 190 uint32_t tiling_mode; 191 uint32_t swizzle_mode; 192 unsigned long stride; 193 194 time_t free_time; 195 196 /** Array passed to the DRM containing relocation information. */ 197 struct drm_i915_gem_relocation_entry *relocs; 198 /** 199 * Array of info structs corresponding to relocs[i].target_handle etc 200 */ 201 drm_intel_reloc_target *reloc_target_info; 202 /** Number of entries in relocs */ 203 int reloc_count; 204 /** Array of BOs that are referenced by this buffer and will be softpinned */ 205 drm_intel_bo **softpin_target; 206 /** Number softpinned BOs that are referenced by this buffer */ 207 int softpin_target_count; 208 /** Maximum amount of softpinned BOs that are referenced by this buffer */ 209 int softpin_target_size; 210 211 /** Mapped address for the buffer, saved across map/unmap cycles */ 212 void *mem_virtual; 213 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 214 void *gtt_virtual; 215 /** 216 * Virtual address of the buffer allocated by user, used for userptr 217 * objects only. 218 */ 219 void *user_virtual; 220 int map_count; 221 drmMMListHead vma_list; 222 223 /** BO cache list */ 224 drmMMListHead head; 225 226 /** 227 * Boolean of whether this BO and its children have been included in 228 * the current drm_intel_bufmgr_check_aperture_space() total. 229 */ 230 bool included_in_check_aperture; 231 232 /** 233 * Boolean of whether this buffer has been used as a relocation 234 * target and had its size accounted for, and thus can't have any 235 * further relocations added to it. 236 */ 237 bool used_as_reloc_target; 238 239 /** 240 * Boolean of whether we have encountered an error whilst building the relocation tree. 241 */ 242 bool has_error; 243 244 /** 245 * Boolean of whether this buffer can be re-used 246 */ 247 bool reusable; 248 249 /** 250 * Boolean of whether the GPU is definitely not accessing the buffer. 251 * 252 * This is only valid when reusable, since non-reusable 253 * buffers are those that have been shared wth other 254 * processes, so we don't know their state. 255 */ 256 bool idle; 257 258 /** 259 * Boolean of whether this buffer was allocated with userptr 260 */ 261 bool is_userptr; 262 263 /** 264 * Boolean of whether this buffer can be placed in the full 48-bit 265 * address range on gen8+. 266 * 267 * By default, buffers will be keep in a 32-bit range, unless this 268 * flag is explicitly set. 269 */ 270 bool use_48b_address_range; 271 272 /** 273 * Whether this buffer is softpinned at offset specified by the user 274 */ 275 bool is_softpin; 276 277 /** 278 * Size in bytes of this buffer and its relocation descendents. 279 * 280 * Used to avoid costly tree walking in 281 * drm_intel_bufmgr_check_aperture in the common case. 282 */ 283 int reloc_tree_size; 284 285 /** 286 * Number of potential fence registers required by this buffer and its 287 * relocations. 288 */ 289 int reloc_tree_fences; 290 291 /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ 292 bool mapped_cpu_write; 293}; 294 295static unsigned int 296drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 297 298static unsigned int 299drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 300 301static int 302drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 303 uint32_t * swizzle_mode); 304 305static int 306drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 307 uint32_t tiling_mode, 308 uint32_t stride); 309 310static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 311 time_t time); 312 313static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 314 315static void drm_intel_gem_bo_free(drm_intel_bo *bo); 316 317static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo) 318{ 319 return (drm_intel_bo_gem *)bo; 320} 321 322static unsigned long 323drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 324 uint32_t *tiling_mode) 325{ 326 unsigned long min_size, max_size; 327 unsigned long i; 328 329 if (*tiling_mode == I915_TILING_NONE) 330 return size; 331 332 /* 965+ just need multiples of page size for tiling */ 333 if (bufmgr_gem->gen >= 4) 334 return ROUND_UP_TO(size, 4096); 335 336 /* Older chips need powers of two, of at least 512k or 1M */ 337 if (bufmgr_gem->gen == 3) { 338 min_size = 1024*1024; 339 max_size = 128*1024*1024; 340 } else { 341 min_size = 512*1024; 342 max_size = 64*1024*1024; 343 } 344 345 if (size > max_size) { 346 *tiling_mode = I915_TILING_NONE; 347 return size; 348 } 349 350 /* Do we need to allocate every page for the fence? */ 351 if (bufmgr_gem->has_relaxed_fencing) 352 return ROUND_UP_TO(size, 4096); 353 354 for (i = min_size; i < size; i <<= 1) 355 ; 356 357 return i; 358} 359 360/* 361 * Round a given pitch up to the minimum required for X tiling on a 362 * given chip. We use 512 as the minimum to allow for a later tiling 363 * change. 364 */ 365static unsigned long 366drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 367 unsigned long pitch, uint32_t *tiling_mode) 368{ 369 unsigned long tile_width; 370 unsigned long i; 371 372 /* If untiled, then just align it so that we can do rendering 373 * to it with the 3D engine. 374 */ 375 if (*tiling_mode == I915_TILING_NONE) 376 return ALIGN(pitch, 64); 377 378 if (*tiling_mode == I915_TILING_X 379 || (IS_915(bufmgr_gem->pci_device) 380 && *tiling_mode == I915_TILING_Y)) 381 tile_width = 512; 382 else 383 tile_width = 128; 384 385 /* 965 is flexible */ 386 if (bufmgr_gem->gen >= 4) 387 return ROUND_UP_TO(pitch, tile_width); 388 389 /* The older hardware has a maximum pitch of 8192 with tiled 390 * surfaces, so fallback to untiled if it's too large. 391 */ 392 if (pitch > 8192) { 393 *tiling_mode = I915_TILING_NONE; 394 return ALIGN(pitch, 64); 395 } 396 397 /* Pre-965 needs power of two tile width */ 398 for (i = tile_width; i < pitch; i <<= 1) 399 ; 400 401 return i; 402} 403 404static struct drm_intel_gem_bo_bucket * 405drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 406 unsigned long size) 407{ 408 int i; 409 410 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 411 struct drm_intel_gem_bo_bucket *bucket = 412 &bufmgr_gem->cache_bucket[i]; 413 if (bucket->size >= size) { 414 return bucket; 415 } 416 } 417 418 return NULL; 419} 420 421static void 422drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 423{ 424 int i, j; 425 426 for (i = 0; i < bufmgr_gem->exec_count; i++) { 427 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 428 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 429 430 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) { 431 DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle, 432 bo_gem->is_softpin ? "*" : "", 433 bo_gem->name); 434 continue; 435 } 436 437 for (j = 0; j < bo_gem->reloc_count; j++) { 438 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 439 drm_intel_bo_gem *target_gem = 440 (drm_intel_bo_gem *) target_bo; 441 442 DBG("%2d: %d %s(%s)@0x%08x %08x -> " 443 "%d (%s)@0x%08x %08x + 0x%08x\n", 444 i, 445 bo_gem->gem_handle, 446 bo_gem->is_softpin ? "*" : "", 447 bo_gem->name, 448 upper_32_bits(bo_gem->relocs[j].offset), 449 lower_32_bits(bo_gem->relocs[j].offset), 450 target_gem->gem_handle, 451 target_gem->name, 452 upper_32_bits(target_bo->offset64), 453 lower_32_bits(target_bo->offset64), 454 bo_gem->relocs[j].delta); 455 } 456 457 for (j = 0; j < bo_gem->softpin_target_count; j++) { 458 drm_intel_bo *target_bo = bo_gem->softpin_target[j]; 459 drm_intel_bo_gem *target_gem = 460 (drm_intel_bo_gem *) target_bo; 461 DBG("%2d: %d %s(%s) -> " 462 "%d *(%s)@0x%08x %08x\n", 463 i, 464 bo_gem->gem_handle, 465 bo_gem->is_softpin ? "*" : "", 466 bo_gem->name, 467 target_gem->gem_handle, 468 target_gem->name, 469 upper_32_bits(target_bo->offset64), 470 lower_32_bits(target_bo->offset64)); 471 } 472 } 473} 474 475static inline void 476drm_intel_gem_bo_reference(drm_intel_bo *bo) 477{ 478 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 479 480 atomic_inc(&bo_gem->refcount); 481} 482 483/** 484 * Adds the given buffer to the list of buffers to be validated (moved into the 485 * appropriate memory type) with the next batch submission. 486 * 487 * If a buffer is validated multiple times in a batch submission, it ends up 488 * with the intersection of the memory type flags and the union of the 489 * access flags. 490 */ 491static void 492drm_intel_add_validate_buffer(drm_intel_bo *bo) 493{ 494 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 495 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 496 int index; 497 498 if (bo_gem->validate_index != -1) 499 return; 500 501 /* Extend the array of validation entries as necessary. */ 502 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 503 int new_size = bufmgr_gem->exec_size * 2; 504 505 if (new_size == 0) 506 new_size = 5; 507 508 bufmgr_gem->exec_objects = 509 realloc(bufmgr_gem->exec_objects, 510 sizeof(*bufmgr_gem->exec_objects) * new_size); 511 bufmgr_gem->exec_bos = 512 realloc(bufmgr_gem->exec_bos, 513 sizeof(*bufmgr_gem->exec_bos) * new_size); 514 bufmgr_gem->exec_size = new_size; 515 } 516 517 index = bufmgr_gem->exec_count; 518 bo_gem->validate_index = index; 519 /* Fill in array entry */ 520 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 521 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 522 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 523 bufmgr_gem->exec_objects[index].alignment = bo->align; 524 bufmgr_gem->exec_objects[index].offset = 0; 525 bufmgr_gem->exec_bos[index] = bo; 526 bufmgr_gem->exec_count++; 527} 528 529static void 530drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 531{ 532 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 533 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 534 int index; 535 int flags = 0; 536 537 if (need_fence) 538 flags |= EXEC_OBJECT_NEEDS_FENCE; 539 if (bo_gem->use_48b_address_range) 540 flags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 541 if (bo_gem->is_softpin) 542 flags |= EXEC_OBJECT_PINNED; 543 544 if (bo_gem->validate_index != -1) { 545 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; 546 return; 547 } 548 549 /* Extend the array of validation entries as necessary. */ 550 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 551 int new_size = bufmgr_gem->exec_size * 2; 552 553 if (new_size == 0) 554 new_size = 5; 555 556 bufmgr_gem->exec2_objects = 557 realloc(bufmgr_gem->exec2_objects, 558 sizeof(*bufmgr_gem->exec2_objects) * new_size); 559 bufmgr_gem->exec_bos = 560 realloc(bufmgr_gem->exec_bos, 561 sizeof(*bufmgr_gem->exec_bos) * new_size); 562 bufmgr_gem->exec_size = new_size; 563 } 564 565 index = bufmgr_gem->exec_count; 566 bo_gem->validate_index = index; 567 /* Fill in array entry */ 568 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 569 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 570 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 571 bufmgr_gem->exec2_objects[index].alignment = bo->align; 572 bufmgr_gem->exec2_objects[index].offset = bo_gem->is_softpin ? 573 bo->offset64 : 0; 574 bufmgr_gem->exec_bos[index] = bo; 575 bufmgr_gem->exec2_objects[index].flags = flags; 576 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 577 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 578 bufmgr_gem->exec_count++; 579} 580 581#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 582 sizeof(uint32_t)) 583 584static void 585drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 586 drm_intel_bo_gem *bo_gem, 587 unsigned int alignment) 588{ 589 unsigned int size; 590 591 assert(!bo_gem->used_as_reloc_target); 592 593 /* The older chipsets are far-less flexible in terms of tiling, 594 * and require tiled buffer to be size aligned in the aperture. 595 * This means that in the worst possible case we will need a hole 596 * twice as large as the object in order for it to fit into the 597 * aperture. Optimal packing is for wimps. 598 */ 599 size = bo_gem->bo.size; 600 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 601 unsigned int min_size; 602 603 if (bufmgr_gem->has_relaxed_fencing) { 604 if (bufmgr_gem->gen == 3) 605 min_size = 1024*1024; 606 else 607 min_size = 512*1024; 608 609 while (min_size < size) 610 min_size *= 2; 611 } else 612 min_size = size; 613 614 /* Account for worst-case alignment. */ 615 alignment = MAX2(alignment, min_size); 616 } 617 618 bo_gem->reloc_tree_size = size + alignment; 619} 620 621static int 622drm_intel_setup_reloc_list(drm_intel_bo *bo) 623{ 624 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 625 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 626 unsigned int max_relocs = bufmgr_gem->max_relocs; 627 628 if (bo->size / 4 < max_relocs) 629 max_relocs = bo->size / 4; 630 631 bo_gem->relocs = malloc(max_relocs * 632 sizeof(struct drm_i915_gem_relocation_entry)); 633 bo_gem->reloc_target_info = malloc(max_relocs * 634 sizeof(drm_intel_reloc_target)); 635 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 636 bo_gem->has_error = true; 637 638 free (bo_gem->relocs); 639 bo_gem->relocs = NULL; 640 641 free (bo_gem->reloc_target_info); 642 bo_gem->reloc_target_info = NULL; 643 644 return 1; 645 } 646 647 return 0; 648} 649 650static int 651drm_intel_gem_bo_busy(drm_intel_bo *bo) 652{ 653 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 654 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 655 struct drm_i915_gem_busy busy; 656 int ret; 657 658 if (bo_gem->reusable && bo_gem->idle) 659 return false; 660 661 memclear(busy); 662 busy.handle = bo_gem->gem_handle; 663 664 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 665 if (ret == 0) { 666 bo_gem->idle = !busy.busy; 667 return busy.busy; 668 } else { 669 return false; 670 } 671 return (ret == 0 && busy.busy); 672} 673 674static int 675drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 676 drm_intel_bo_gem *bo_gem, int state) 677{ 678 struct drm_i915_gem_madvise madv; 679 680 memclear(madv); 681 madv.handle = bo_gem->gem_handle; 682 madv.madv = state; 683 madv.retained = 1; 684 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 685 686 return madv.retained; 687} 688 689static int 690drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 691{ 692 return drm_intel_gem_bo_madvise_internal 693 ((drm_intel_bufmgr_gem *) bo->bufmgr, 694 (drm_intel_bo_gem *) bo, 695 madv); 696} 697 698/* drop the oldest entries that have been purged by the kernel */ 699static void 700drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 701 struct drm_intel_gem_bo_bucket *bucket) 702{ 703 while (!DRMLISTEMPTY(&bucket->head)) { 704 drm_intel_bo_gem *bo_gem; 705 706 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 707 bucket->head.next, head); 708 if (drm_intel_gem_bo_madvise_internal 709 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 710 break; 711 712 DRMLISTDEL(&bo_gem->head); 713 drm_intel_gem_bo_free(&bo_gem->bo); 714 } 715} 716 717static drm_intel_bo * 718drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 719 const char *name, 720 unsigned long size, 721 unsigned long flags, 722 uint32_t tiling_mode, 723 unsigned long stride, 724 unsigned int alignment) 725{ 726 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 727 drm_intel_bo_gem *bo_gem; 728 unsigned int page_size = getpagesize(); 729 int ret; 730 struct drm_intel_gem_bo_bucket *bucket; 731 bool alloc_from_cache; 732 unsigned long bo_size; 733 bool for_render = false; 734 735 if (flags & BO_ALLOC_FOR_RENDER) 736 for_render = true; 737 738 /* Round the allocated size up to a power of two number of pages. */ 739 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 740 741 /* If we don't have caching at this size, don't actually round the 742 * allocation up. 743 */ 744 if (bucket == NULL) { 745 bo_size = size; 746 if (bo_size < page_size) 747 bo_size = page_size; 748 } else { 749 bo_size = bucket->size; 750 } 751 752 pthread_mutex_lock(&bufmgr_gem->lock); 753 /* Get a buffer out of the cache if available */ 754retry: 755 alloc_from_cache = false; 756 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 757 if (for_render) { 758 /* Allocate new render-target BOs from the tail (MRU) 759 * of the list, as it will likely be hot in the GPU 760 * cache and in the aperture for us. 761 */ 762 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 763 bucket->head.prev, head); 764 DRMLISTDEL(&bo_gem->head); 765 alloc_from_cache = true; 766 bo_gem->bo.align = alignment; 767 } else { 768 assert(alignment == 0); 769 /* For non-render-target BOs (where we're probably 770 * going to map it first thing in order to fill it 771 * with data), check if the last BO in the cache is 772 * unbusy, and only reuse in that case. Otherwise, 773 * allocating a new buffer is probably faster than 774 * waiting for the GPU to finish. 775 */ 776 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 777 bucket->head.next, head); 778 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 779 alloc_from_cache = true; 780 DRMLISTDEL(&bo_gem->head); 781 } 782 } 783 784 if (alloc_from_cache) { 785 if (!drm_intel_gem_bo_madvise_internal 786 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 787 drm_intel_gem_bo_free(&bo_gem->bo); 788 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 789 bucket); 790 goto retry; 791 } 792 793 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 794 tiling_mode, 795 stride)) { 796 drm_intel_gem_bo_free(&bo_gem->bo); 797 goto retry; 798 } 799 } 800 } 801 pthread_mutex_unlock(&bufmgr_gem->lock); 802 803 if (!alloc_from_cache) { 804 struct drm_i915_gem_create create; 805 806 bo_gem = calloc(1, sizeof(*bo_gem)); 807 if (!bo_gem) 808 return NULL; 809 810 bo_gem->bo.size = bo_size; 811 812 memclear(create); 813 create.size = bo_size; 814 815 ret = drmIoctl(bufmgr_gem->fd, 816 DRM_IOCTL_I915_GEM_CREATE, 817 &create); 818 bo_gem->gem_handle = create.handle; 819 bo_gem->bo.handle = bo_gem->gem_handle; 820 if (ret != 0) { 821 free(bo_gem); 822 return NULL; 823 } 824 bo_gem->bo.bufmgr = bufmgr; 825 bo_gem->bo.align = alignment; 826 827 bo_gem->tiling_mode = I915_TILING_NONE; 828 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 829 bo_gem->stride = 0; 830 831 /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized 832 list (vma_list), so better set the list head here */ 833 DRMINITLISTHEAD(&bo_gem->name_list); 834 DRMINITLISTHEAD(&bo_gem->vma_list); 835 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 836 tiling_mode, 837 stride)) { 838 drm_intel_gem_bo_free(&bo_gem->bo); 839 return NULL; 840 } 841 } 842 843 bo_gem->name = name; 844 atomic_set(&bo_gem->refcount, 1); 845 bo_gem->validate_index = -1; 846 bo_gem->reloc_tree_fences = 0; 847 bo_gem->used_as_reloc_target = false; 848 bo_gem->has_error = false; 849 bo_gem->reusable = true; 850 bo_gem->use_48b_address_range = false; 851 852 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); 853 854 DBG("bo_create: buf %d (%s) %ldb\n", 855 bo_gem->gem_handle, bo_gem->name, size); 856 857 return &bo_gem->bo; 858} 859 860static drm_intel_bo * 861drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 862 const char *name, 863 unsigned long size, 864 unsigned int alignment) 865{ 866 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 867 BO_ALLOC_FOR_RENDER, 868 I915_TILING_NONE, 0, 869 alignment); 870} 871 872static drm_intel_bo * 873drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 874 const char *name, 875 unsigned long size, 876 unsigned int alignment) 877{ 878 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 879 I915_TILING_NONE, 0, 0); 880} 881 882static drm_intel_bo * 883drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 884 int x, int y, int cpp, uint32_t *tiling_mode, 885 unsigned long *pitch, unsigned long flags) 886{ 887 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 888 unsigned long size, stride; 889 uint32_t tiling; 890 891 do { 892 unsigned long aligned_y, height_alignment; 893 894 tiling = *tiling_mode; 895 896 /* If we're tiled, our allocations are in 8 or 32-row blocks, 897 * so failure to align our height means that we won't allocate 898 * enough pages. 899 * 900 * If we're untiled, we still have to align to 2 rows high 901 * because the data port accesses 2x2 blocks even if the 902 * bottom row isn't to be rendered, so failure to align means 903 * we could walk off the end of the GTT and fault. This is 904 * documented on 965, and may be the case on older chipsets 905 * too so we try to be careful. 906 */ 907 aligned_y = y; 908 height_alignment = 2; 909 910 if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 911 height_alignment = 16; 912 else if (tiling == I915_TILING_X 913 || (IS_915(bufmgr_gem->pci_device) 914 && tiling == I915_TILING_Y)) 915 height_alignment = 8; 916 else if (tiling == I915_TILING_Y) 917 height_alignment = 32; 918 aligned_y = ALIGN(y, height_alignment); 919 920 stride = x * cpp; 921 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 922 size = stride * aligned_y; 923 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 924 } while (*tiling_mode != tiling); 925 *pitch = stride; 926 927 if (tiling == I915_TILING_NONE) 928 stride = 0; 929 930 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 931 tiling, stride, 0); 932} 933 934static drm_intel_bo * 935drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 936 const char *name, 937 void *addr, 938 uint32_t tiling_mode, 939 uint32_t stride, 940 unsigned long size, 941 unsigned long flags) 942{ 943 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 944 drm_intel_bo_gem *bo_gem; 945 int ret; 946 struct drm_i915_gem_userptr userptr; 947 948 /* Tiling with userptr surfaces is not supported 949 * on all hardware so refuse it for time being. 950 */ 951 if (tiling_mode != I915_TILING_NONE) 952 return NULL; 953 954 bo_gem = calloc(1, sizeof(*bo_gem)); 955 if (!bo_gem) 956 return NULL; 957 958 bo_gem->bo.size = size; 959 960 memclear(userptr); 961 userptr.user_ptr = (__u64)((unsigned long)addr); 962 userptr.user_size = size; 963 userptr.flags = flags; 964 965 ret = drmIoctl(bufmgr_gem->fd, 966 DRM_IOCTL_I915_GEM_USERPTR, 967 &userptr); 968 if (ret != 0) { 969 DBG("bo_create_userptr: " 970 "ioctl failed with user ptr %p size 0x%lx, " 971 "user flags 0x%lx\n", addr, size, flags); 972 free(bo_gem); 973 return NULL; 974 } 975 976 bo_gem->gem_handle = userptr.handle; 977 bo_gem->bo.handle = bo_gem->gem_handle; 978 bo_gem->bo.bufmgr = bufmgr; 979 bo_gem->is_userptr = true; 980 bo_gem->bo.virtual = addr; 981 /* Save the address provided by user */ 982 bo_gem->user_virtual = addr; 983 bo_gem->tiling_mode = I915_TILING_NONE; 984 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 985 bo_gem->stride = 0; 986 987 DRMINITLISTHEAD(&bo_gem->name_list); 988 DRMINITLISTHEAD(&bo_gem->vma_list); 989 990 bo_gem->name = name; 991 atomic_set(&bo_gem->refcount, 1); 992 bo_gem->validate_index = -1; 993 bo_gem->reloc_tree_fences = 0; 994 bo_gem->used_as_reloc_target = false; 995 bo_gem->has_error = false; 996 bo_gem->reusable = false; 997 bo_gem->use_48b_address_range = false; 998 999 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 1000 1001 DBG("bo_create_userptr: " 1002 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", 1003 addr, bo_gem->gem_handle, bo_gem->name, 1004 size, stride, tiling_mode); 1005 1006 return &bo_gem->bo; 1007} 1008 1009static bool 1010has_userptr(drm_intel_bufmgr_gem *bufmgr_gem) 1011{ 1012 int ret; 1013 void *ptr; 1014 long pgsz; 1015 struct drm_i915_gem_userptr userptr; 1016 1017 pgsz = sysconf(_SC_PAGESIZE); 1018 assert(pgsz > 0); 1019 1020 ret = posix_memalign(&ptr, pgsz, pgsz); 1021 if (ret) { 1022 DBG("Failed to get a page (%ld) for userptr detection!\n", 1023 pgsz); 1024 return false; 1025 } 1026 1027 memclear(userptr); 1028 userptr.user_ptr = (__u64)(unsigned long)ptr; 1029 userptr.user_size = pgsz; 1030 1031retry: 1032 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); 1033 if (ret) { 1034 if (errno == ENODEV && userptr.flags == 0) { 1035 userptr.flags = I915_USERPTR_UNSYNCHRONIZED; 1036 goto retry; 1037 } 1038 free(ptr); 1039 return false; 1040 } 1041 1042 /* We don't release the userptr bo here as we want to keep the 1043 * kernel mm tracking alive for our lifetime. The first time we 1044 * create a userptr object the kernel has to install a mmu_notifer 1045 * which is a heavyweight operation (e.g. it requires taking all 1046 * mm_locks and stop_machine()). 1047 */ 1048 1049 bufmgr_gem->userptr_active.ptr = ptr; 1050 bufmgr_gem->userptr_active.handle = userptr.handle; 1051 1052 return true; 1053} 1054 1055static drm_intel_bo * 1056check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 1057 const char *name, 1058 void *addr, 1059 uint32_t tiling_mode, 1060 uint32_t stride, 1061 unsigned long size, 1062 unsigned long flags) 1063{ 1064 if (has_userptr((drm_intel_bufmgr_gem *)bufmgr)) 1065 bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr; 1066 else 1067 bufmgr->bo_alloc_userptr = NULL; 1068 1069 return drm_intel_bo_alloc_userptr(bufmgr, name, addr, 1070 tiling_mode, stride, size, flags); 1071} 1072 1073/** 1074 * Returns a drm_intel_bo wrapping the given buffer object handle. 1075 * 1076 * This can be used when one application needs to pass a buffer object 1077 * to another. 1078 */ 1079drm_intel_bo * 1080drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 1081 const char *name, 1082 unsigned int handle) 1083{ 1084 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1085 drm_intel_bo_gem *bo_gem; 1086 int ret; 1087 struct drm_gem_open open_arg; 1088 struct drm_i915_gem_get_tiling get_tiling; 1089 drmMMListHead *list; 1090 1091 /* At the moment most applications only have a few named bo. 1092 * For instance, in a DRI client only the render buffers passed 1093 * between X and the client are named. And since X returns the 1094 * alternating names for the front/back buffer a linear search 1095 * provides a sufficiently fast match. 1096 */ 1097 pthread_mutex_lock(&bufmgr_gem->lock); 1098 for (list = bufmgr_gem->named.next; 1099 list != &bufmgr_gem->named; 1100 list = list->next) { 1101 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 1102 if (bo_gem->global_name == handle) { 1103 drm_intel_gem_bo_reference(&bo_gem->bo); 1104 pthread_mutex_unlock(&bufmgr_gem->lock); 1105 return &bo_gem->bo; 1106 } 1107 } 1108 1109 memclear(open_arg); 1110 open_arg.name = handle; 1111 ret = drmIoctl(bufmgr_gem->fd, 1112 DRM_IOCTL_GEM_OPEN, 1113 &open_arg); 1114 if (ret != 0) { 1115 DBG("Couldn't reference %s handle 0x%08x: %s\n", 1116 name, handle, strerror(errno)); 1117 pthread_mutex_unlock(&bufmgr_gem->lock); 1118 return NULL; 1119 } 1120 /* Now see if someone has used a prime handle to get this 1121 * object from the kernel before by looking through the list 1122 * again for a matching gem_handle 1123 */ 1124 for (list = bufmgr_gem->named.next; 1125 list != &bufmgr_gem->named; 1126 list = list->next) { 1127 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 1128 if (bo_gem->gem_handle == open_arg.handle) { 1129 drm_intel_gem_bo_reference(&bo_gem->bo); 1130 pthread_mutex_unlock(&bufmgr_gem->lock); 1131 return &bo_gem->bo; 1132 } 1133 } 1134 1135 bo_gem = calloc(1, sizeof(*bo_gem)); 1136 if (!bo_gem) { 1137 pthread_mutex_unlock(&bufmgr_gem->lock); 1138 return NULL; 1139 } 1140 1141 bo_gem->bo.size = open_arg.size; 1142 bo_gem->bo.offset = 0; 1143 bo_gem->bo.offset64 = 0; 1144 bo_gem->bo.virtual = NULL; 1145 bo_gem->bo.bufmgr = bufmgr; 1146 bo_gem->name = name; 1147 atomic_set(&bo_gem->refcount, 1); 1148 bo_gem->validate_index = -1; 1149 bo_gem->gem_handle = open_arg.handle; 1150 bo_gem->bo.handle = open_arg.handle; 1151 bo_gem->global_name = handle; 1152 bo_gem->reusable = false; 1153 bo_gem->use_48b_address_range = false; 1154 1155 memclear(get_tiling); 1156 get_tiling.handle = bo_gem->gem_handle; 1157 ret = drmIoctl(bufmgr_gem->fd, 1158 DRM_IOCTL_I915_GEM_GET_TILING, 1159 &get_tiling); 1160 if (ret != 0) { 1161 drm_intel_gem_bo_unreference(&bo_gem->bo); 1162 pthread_mutex_unlock(&bufmgr_gem->lock); 1163 return NULL; 1164 } 1165 bo_gem->tiling_mode = get_tiling.tiling_mode; 1166 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 1167 /* XXX stride is unknown */ 1168 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 1169 1170 DRMINITLISTHEAD(&bo_gem->vma_list); 1171 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 1172 pthread_mutex_unlock(&bufmgr_gem->lock); 1173 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 1174 1175 return &bo_gem->bo; 1176} 1177 1178static void 1179drm_intel_gem_bo_free(drm_intel_bo *bo) 1180{ 1181 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1182 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1183 struct drm_gem_close close; 1184 int ret; 1185 1186 DRMLISTDEL(&bo_gem->vma_list); 1187 if (bo_gem->mem_virtual) { 1188 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 1189 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1190 bufmgr_gem->vma_count--; 1191 } 1192 if (bo_gem->gtt_virtual) { 1193 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1194 bufmgr_gem->vma_count--; 1195 } 1196 1197 /* Close this object */ 1198 memclear(close); 1199 close.handle = bo_gem->gem_handle; 1200 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 1201 if (ret != 0) { 1202 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 1203 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 1204 } 1205 free(bo); 1206} 1207 1208static void 1209drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 1210{ 1211#if HAVE_VALGRIND 1212 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1213 1214 if (bo_gem->mem_virtual) 1215 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 1216 1217 if (bo_gem->gtt_virtual) 1218 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 1219#endif 1220} 1221 1222/** Frees all cached buffers significantly older than @time. */ 1223static void 1224drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 1225{ 1226 int i; 1227 1228 if (bufmgr_gem->time == time) 1229 return; 1230 1231 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1232 struct drm_intel_gem_bo_bucket *bucket = 1233 &bufmgr_gem->cache_bucket[i]; 1234 1235 while (!DRMLISTEMPTY(&bucket->head)) { 1236 drm_intel_bo_gem *bo_gem; 1237 1238 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1239 bucket->head.next, head); 1240 if (time - bo_gem->free_time <= 1) 1241 break; 1242 1243 DRMLISTDEL(&bo_gem->head); 1244 1245 drm_intel_gem_bo_free(&bo_gem->bo); 1246 } 1247 } 1248 1249 bufmgr_gem->time = time; 1250} 1251 1252static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 1253{ 1254 int limit; 1255 1256 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 1257 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 1258 1259 if (bufmgr_gem->vma_max < 0) 1260 return; 1261 1262 /* We may need to evict a few entries in order to create new mmaps */ 1263 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 1264 if (limit < 0) 1265 limit = 0; 1266 1267 while (bufmgr_gem->vma_count > limit) { 1268 drm_intel_bo_gem *bo_gem; 1269 1270 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1271 bufmgr_gem->vma_cache.next, 1272 vma_list); 1273 assert(bo_gem->map_count == 0); 1274 DRMLISTDELINIT(&bo_gem->vma_list); 1275 1276 if (bo_gem->mem_virtual) { 1277 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1278 bo_gem->mem_virtual = NULL; 1279 bufmgr_gem->vma_count--; 1280 } 1281 if (bo_gem->gtt_virtual) { 1282 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1283 bo_gem->gtt_virtual = NULL; 1284 bufmgr_gem->vma_count--; 1285 } 1286 } 1287} 1288 1289static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1290 drm_intel_bo_gem *bo_gem) 1291{ 1292 bufmgr_gem->vma_open--; 1293 DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 1294 if (bo_gem->mem_virtual) 1295 bufmgr_gem->vma_count++; 1296 if (bo_gem->gtt_virtual) 1297 bufmgr_gem->vma_count++; 1298 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1299} 1300 1301static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1302 drm_intel_bo_gem *bo_gem) 1303{ 1304 bufmgr_gem->vma_open++; 1305 DRMLISTDEL(&bo_gem->vma_list); 1306 if (bo_gem->mem_virtual) 1307 bufmgr_gem->vma_count--; 1308 if (bo_gem->gtt_virtual) 1309 bufmgr_gem->vma_count--; 1310 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1311} 1312 1313static void 1314drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 1315{ 1316 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1317 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1318 struct drm_intel_gem_bo_bucket *bucket; 1319 int i; 1320 1321 /* Unreference all the target buffers */ 1322 for (i = 0; i < bo_gem->reloc_count; i++) { 1323 if (bo_gem->reloc_target_info[i].bo != bo) { 1324 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1325 reloc_target_info[i].bo, 1326 time); 1327 } 1328 } 1329 for (i = 0; i < bo_gem->softpin_target_count; i++) 1330 drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i], 1331 time); 1332 bo_gem->reloc_count = 0; 1333 bo_gem->used_as_reloc_target = false; 1334 bo_gem->softpin_target_count = 0; 1335 1336 DBG("bo_unreference final: %d (%s)\n", 1337 bo_gem->gem_handle, bo_gem->name); 1338 1339 /* release memory associated with this object */ 1340 if (bo_gem->reloc_target_info) { 1341 free(bo_gem->reloc_target_info); 1342 bo_gem->reloc_target_info = NULL; 1343 } 1344 if (bo_gem->relocs) { 1345 free(bo_gem->relocs); 1346 bo_gem->relocs = NULL; 1347 } 1348 if (bo_gem->softpin_target) { 1349 free(bo_gem->softpin_target); 1350 bo_gem->softpin_target = NULL; 1351 bo_gem->softpin_target_size = 0; 1352 } 1353 1354 /* Clear any left-over mappings */ 1355 if (bo_gem->map_count) { 1356 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 1357 bo_gem->map_count = 0; 1358 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1359 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1360 } 1361 1362 DRMLISTDEL(&bo_gem->name_list); 1363 1364 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 1365 /* Put the buffer into our internal cache for reuse if we can. */ 1366 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 1367 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 1368 I915_MADV_DONTNEED)) { 1369 bo_gem->free_time = time; 1370 1371 bo_gem->name = NULL; 1372 bo_gem->validate_index = -1; 1373 1374 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 1375 } else { 1376 drm_intel_gem_bo_free(bo); 1377 } 1378} 1379 1380static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 1381 time_t time) 1382{ 1383 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1384 1385 assert(atomic_read(&bo_gem->refcount) > 0); 1386 if (atomic_dec_and_test(&bo_gem->refcount)) 1387 drm_intel_gem_bo_unreference_final(bo, time); 1388} 1389 1390static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 1391{ 1392 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1393 1394 assert(atomic_read(&bo_gem->refcount) > 0); 1395 1396 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) { 1397 drm_intel_bufmgr_gem *bufmgr_gem = 1398 (drm_intel_bufmgr_gem *) bo->bufmgr; 1399 struct timespec time; 1400 1401 clock_gettime(CLOCK_MONOTONIC, &time); 1402 1403 pthread_mutex_lock(&bufmgr_gem->lock); 1404 1405 if (atomic_dec_and_test(&bo_gem->refcount)) { 1406 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 1407 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1408 } 1409 1410 pthread_mutex_unlock(&bufmgr_gem->lock); 1411 } 1412} 1413 1414static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 1415{ 1416 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1417 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1418 struct drm_i915_gem_set_domain set_domain; 1419 int ret; 1420 1421 if (bo_gem->is_userptr) { 1422 /* Return the same user ptr */ 1423 bo->virtual = bo_gem->user_virtual; 1424 return 0; 1425 } 1426 1427 pthread_mutex_lock(&bufmgr_gem->lock); 1428 1429 if (bo_gem->map_count++ == 0) 1430 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1431 1432 if (!bo_gem->mem_virtual) { 1433 struct drm_i915_gem_mmap mmap_arg; 1434 1435 DBG("bo_map: %d (%s), map_count=%d\n", 1436 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1437 1438 memclear(mmap_arg); 1439 mmap_arg.handle = bo_gem->gem_handle; 1440 mmap_arg.size = bo->size; 1441 ret = drmIoctl(bufmgr_gem->fd, 1442 DRM_IOCTL_I915_GEM_MMAP, 1443 &mmap_arg); 1444 if (ret != 0) { 1445 ret = -errno; 1446 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1447 __FILE__, __LINE__, bo_gem->gem_handle, 1448 bo_gem->name, strerror(errno)); 1449 if (--bo_gem->map_count == 0) 1450 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1451 pthread_mutex_unlock(&bufmgr_gem->lock); 1452 return ret; 1453 } 1454 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 1455 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 1456 } 1457 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1458 bo_gem->mem_virtual); 1459 bo->virtual = bo_gem->mem_virtual; 1460 1461 memclear(set_domain); 1462 set_domain.handle = bo_gem->gem_handle; 1463 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 1464 if (write_enable) 1465 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 1466 else 1467 set_domain.write_domain = 0; 1468 ret = drmIoctl(bufmgr_gem->fd, 1469 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1470 &set_domain); 1471 if (ret != 0) { 1472 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1473 __FILE__, __LINE__, bo_gem->gem_handle, 1474 strerror(errno)); 1475 } 1476 1477 if (write_enable) 1478 bo_gem->mapped_cpu_write = true; 1479 1480 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1481 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 1482 pthread_mutex_unlock(&bufmgr_gem->lock); 1483 1484 return 0; 1485} 1486 1487static int 1488map_gtt(drm_intel_bo *bo) 1489{ 1490 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1491 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1492 int ret; 1493 1494 if (bo_gem->is_userptr) 1495 return -EINVAL; 1496 1497 if (bo_gem->map_count++ == 0) 1498 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1499 1500 /* Get a mapping of the buffer if we haven't before. */ 1501 if (bo_gem->gtt_virtual == NULL) { 1502 struct drm_i915_gem_mmap_gtt mmap_arg; 1503 1504 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 1505 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1506 1507 memclear(mmap_arg); 1508 mmap_arg.handle = bo_gem->gem_handle; 1509 1510 /* Get the fake offset back... */ 1511 ret = drmIoctl(bufmgr_gem->fd, 1512 DRM_IOCTL_I915_GEM_MMAP_GTT, 1513 &mmap_arg); 1514 if (ret != 0) { 1515 ret = -errno; 1516 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1517 __FILE__, __LINE__, 1518 bo_gem->gem_handle, bo_gem->name, 1519 strerror(errno)); 1520 if (--bo_gem->map_count == 0) 1521 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1522 return ret; 1523 } 1524 1525 /* and mmap it */ 1526 ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size, 1527 &bo_gem->gtt_virtual); 1528 if (ret) { 1529 bo_gem->gtt_virtual = NULL; 1530 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1531 __FILE__, __LINE__, 1532 bo_gem->gem_handle, bo_gem->name, 1533 strerror(errno)); 1534 if (--bo_gem->map_count == 0) 1535 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1536 return ret; 1537 } 1538 } 1539 1540 bo->virtual = bo_gem->gtt_virtual; 1541 1542 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1543 bo_gem->gtt_virtual); 1544 1545 return 0; 1546} 1547 1548int 1549drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1550{ 1551 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1552 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1553 struct drm_i915_gem_set_domain set_domain; 1554 int ret; 1555 1556 pthread_mutex_lock(&bufmgr_gem->lock); 1557 1558 ret = map_gtt(bo); 1559 if (ret) { 1560 pthread_mutex_unlock(&bufmgr_gem->lock); 1561 return ret; 1562 } 1563 1564 /* Now move it to the GTT domain so that the GPU and CPU 1565 * caches are flushed and the GPU isn't actively using the 1566 * buffer. 1567 * 1568 * The pagefault handler does this domain change for us when 1569 * it has unbound the BO from the GTT, but it's up to us to 1570 * tell it when we're about to use things if we had done 1571 * rendering and it still happens to be bound to the GTT. 1572 */ 1573 memclear(set_domain); 1574 set_domain.handle = bo_gem->gem_handle; 1575 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1576 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1577 ret = drmIoctl(bufmgr_gem->fd, 1578 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1579 &set_domain); 1580 if (ret != 0) { 1581 DBG("%s:%d: Error setting domain %d: %s\n", 1582 __FILE__, __LINE__, bo_gem->gem_handle, 1583 strerror(errno)); 1584 } 1585 1586 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1587 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1588 pthread_mutex_unlock(&bufmgr_gem->lock); 1589 1590 return 0; 1591} 1592 1593/** 1594 * Performs a mapping of the buffer object like the normal GTT 1595 * mapping, but avoids waiting for the GPU to be done reading from or 1596 * rendering to the buffer. 1597 * 1598 * This is used in the implementation of GL_ARB_map_buffer_range: The 1599 * user asks to create a buffer, then does a mapping, fills some 1600 * space, runs a drawing command, then asks to map it again without 1601 * synchronizing because it guarantees that it won't write over the 1602 * data that the GPU is busy using (or, more specifically, that if it 1603 * does write over the data, it acknowledges that rendering is 1604 * undefined). 1605 */ 1606 1607int 1608drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 1609{ 1610 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1611#ifdef HAVE_VALGRIND 1612 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1613#endif 1614 int ret; 1615 1616 /* If the CPU cache isn't coherent with the GTT, then use a 1617 * regular synchronized mapping. The problem is that we don't 1618 * track where the buffer was last used on the CPU side in 1619 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 1620 * we would potentially corrupt the buffer even when the user 1621 * does reasonable things. 1622 */ 1623 if (!bufmgr_gem->has_llc) 1624 return drm_intel_gem_bo_map_gtt(bo); 1625 1626 pthread_mutex_lock(&bufmgr_gem->lock); 1627 1628 ret = map_gtt(bo); 1629 if (ret == 0) { 1630 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1631 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1632 } 1633 1634 pthread_mutex_unlock(&bufmgr_gem->lock); 1635 1636 return ret; 1637} 1638 1639static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1640{ 1641 drm_intel_bufmgr_gem *bufmgr_gem; 1642 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1643 int ret = 0; 1644 1645 if (bo == NULL) 1646 return 0; 1647 1648 if (bo_gem->is_userptr) 1649 return 0; 1650 1651 bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1652 1653 pthread_mutex_lock(&bufmgr_gem->lock); 1654 1655 if (bo_gem->map_count <= 0) { 1656 DBG("attempted to unmap an unmapped bo\n"); 1657 pthread_mutex_unlock(&bufmgr_gem->lock); 1658 /* Preserve the old behaviour of just treating this as a 1659 * no-op rather than reporting the error. 1660 */ 1661 return 0; 1662 } 1663 1664 if (bo_gem->mapped_cpu_write) { 1665 struct drm_i915_gem_sw_finish sw_finish; 1666 1667 /* Cause a flush to happen if the buffer's pinned for 1668 * scanout, so the results show up in a timely manner. 1669 * Unlike GTT set domains, this only does work if the 1670 * buffer should be scanout-related. 1671 */ 1672 memclear(sw_finish); 1673 sw_finish.handle = bo_gem->gem_handle; 1674 ret = drmIoctl(bufmgr_gem->fd, 1675 DRM_IOCTL_I915_GEM_SW_FINISH, 1676 &sw_finish); 1677 ret = ret == -1 ? -errno : 0; 1678 1679 bo_gem->mapped_cpu_write = false; 1680 } 1681 1682 /* We need to unmap after every innovation as we cannot track 1683 * an open vma for every bo as that will exhaasut the system 1684 * limits and cause later failures. 1685 */ 1686 if (--bo_gem->map_count == 0) { 1687 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1688 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1689 bo->virtual = NULL; 1690 } 1691 pthread_mutex_unlock(&bufmgr_gem->lock); 1692 1693 return ret; 1694} 1695 1696int 1697drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1698{ 1699 return drm_intel_gem_bo_unmap(bo); 1700} 1701 1702static int 1703drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1704 unsigned long size, const void *data) 1705{ 1706 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1707 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1708 struct drm_i915_gem_pwrite pwrite; 1709 int ret; 1710 1711 if (bo_gem->is_userptr) 1712 return -EINVAL; 1713 1714 memclear(pwrite); 1715 pwrite.handle = bo_gem->gem_handle; 1716 pwrite.offset = offset; 1717 pwrite.size = size; 1718 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1719 ret = drmIoctl(bufmgr_gem->fd, 1720 DRM_IOCTL_I915_GEM_PWRITE, 1721 &pwrite); 1722 if (ret != 0) { 1723 ret = -errno; 1724 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1725 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1726 (int)size, strerror(errno)); 1727 } 1728 1729 return ret; 1730} 1731 1732static int 1733drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1734{ 1735 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1736 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1737 int ret; 1738 1739 memclear(get_pipe_from_crtc_id); 1740 get_pipe_from_crtc_id.crtc_id = crtc_id; 1741 ret = drmIoctl(bufmgr_gem->fd, 1742 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1743 &get_pipe_from_crtc_id); 1744 if (ret != 0) { 1745 /* We return -1 here to signal that we don't 1746 * know which pipe is associated with this crtc. 1747 * This lets the caller know that this information 1748 * isn't available; using the wrong pipe for 1749 * vblank waiting can cause the chipset to lock up 1750 */ 1751 return -1; 1752 } 1753 1754 return get_pipe_from_crtc_id.pipe; 1755} 1756 1757static int 1758drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1759 unsigned long size, void *data) 1760{ 1761 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1762 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1763 struct drm_i915_gem_pread pread; 1764 int ret; 1765 1766 if (bo_gem->is_userptr) 1767 return -EINVAL; 1768 1769 memclear(pread); 1770 pread.handle = bo_gem->gem_handle; 1771 pread.offset = offset; 1772 pread.size = size; 1773 pread.data_ptr = (uint64_t) (uintptr_t) data; 1774 ret = drmIoctl(bufmgr_gem->fd, 1775 DRM_IOCTL_I915_GEM_PREAD, 1776 &pread); 1777 if (ret != 0) { 1778 ret = -errno; 1779 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1780 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1781 (int)size, strerror(errno)); 1782 } 1783 1784 return ret; 1785} 1786 1787/** Waits for all GPU rendering with the object to have completed. */ 1788static void 1789drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1790{ 1791 drm_intel_gem_bo_start_gtt_access(bo, 1); 1792} 1793 1794/** 1795 * Waits on a BO for the given amount of time. 1796 * 1797 * @bo: buffer object to wait for 1798 * @timeout_ns: amount of time to wait in nanoseconds. 1799 * If value is less than 0, an infinite wait will occur. 1800 * 1801 * Returns 0 if the wait was successful ie. the last batch referencing the 1802 * object has completed within the allotted time. Otherwise some negative return 1803 * value describes the error. Of particular interest is -ETIME when the wait has 1804 * failed to yield the desired result. 1805 * 1806 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 1807 * the operation to give up after a certain amount of time. Another subtle 1808 * difference is the internal locking semantics are different (this variant does 1809 * not hold the lock for the duration of the wait). This makes the wait subject 1810 * to a larger userspace race window. 1811 * 1812 * The implementation shall wait until the object is no longer actively 1813 * referenced within a batch buffer at the time of the call. The wait will 1814 * not guarantee that the buffer is re-issued via another thread, or an flinked 1815 * handle. Userspace must make sure this race does not occur if such precision 1816 * is important. 1817 * 1818 * Note that some kernels have broken the inifite wait for negative values 1819 * promise, upgrade to latest stable kernels if this is the case. 1820 */ 1821int 1822drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 1823{ 1824 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1825 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1826 struct drm_i915_gem_wait wait; 1827 int ret; 1828 1829 if (!bufmgr_gem->has_wait_timeout) { 1830 DBG("%s:%d: Timed wait is not supported. Falling back to " 1831 "infinite wait\n", __FILE__, __LINE__); 1832 if (timeout_ns) { 1833 drm_intel_gem_bo_wait_rendering(bo); 1834 return 0; 1835 } else { 1836 return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 1837 } 1838 } 1839 1840 memclear(wait); 1841 wait.bo_handle = bo_gem->gem_handle; 1842 wait.timeout_ns = timeout_ns; 1843 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1844 if (ret == -1) 1845 return -errno; 1846 1847 return ret; 1848} 1849 1850/** 1851 * Sets the object to the GTT read and possibly write domain, used by the X 1852 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1853 * 1854 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1855 * can do tiled pixmaps this way. 1856 */ 1857void 1858drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1859{ 1860 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1861 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1862 struct drm_i915_gem_set_domain set_domain; 1863 int ret; 1864 1865 memclear(set_domain); 1866 set_domain.handle = bo_gem->gem_handle; 1867 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1868 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1869 ret = drmIoctl(bufmgr_gem->fd, 1870 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1871 &set_domain); 1872 if (ret != 0) { 1873 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1874 __FILE__, __LINE__, bo_gem->gem_handle, 1875 set_domain.read_domains, set_domain.write_domain, 1876 strerror(errno)); 1877 } 1878} 1879 1880static void 1881drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1882{ 1883 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1884 struct drm_gem_close close_bo; 1885 int i, ret; 1886 1887 free(bufmgr_gem->exec2_objects); 1888 free(bufmgr_gem->exec_objects); 1889 free(bufmgr_gem->exec_bos); 1890 1891 pthread_mutex_destroy(&bufmgr_gem->lock); 1892 1893 /* Free any cached buffer objects we were going to reuse */ 1894 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1895 struct drm_intel_gem_bo_bucket *bucket = 1896 &bufmgr_gem->cache_bucket[i]; 1897 drm_intel_bo_gem *bo_gem; 1898 1899 while (!DRMLISTEMPTY(&bucket->head)) { 1900 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1901 bucket->head.next, head); 1902 DRMLISTDEL(&bo_gem->head); 1903 1904 drm_intel_gem_bo_free(&bo_gem->bo); 1905 } 1906 } 1907 1908 /* Release userptr bo kept hanging around for optimisation. */ 1909 if (bufmgr_gem->userptr_active.ptr) { 1910 memclear(close_bo); 1911 close_bo.handle = bufmgr_gem->userptr_active.handle; 1912 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); 1913 free(bufmgr_gem->userptr_active.ptr); 1914 if (ret) 1915 fprintf(stderr, 1916 "Failed to release test userptr object! (%d) " 1917 "i915 kernel driver may not be sane!\n", errno); 1918 } 1919 1920 free(bufmgr); 1921} 1922 1923/** 1924 * Adds the target buffer to the validation list and adds the relocation 1925 * to the reloc_buffer's relocation list. 1926 * 1927 * The relocation entry at the given offset must already contain the 1928 * precomputed relocation value, because the kernel will optimize out 1929 * the relocation entry write when the buffer hasn't moved from the 1930 * last known offset in target_bo. 1931 */ 1932static int 1933do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1934 drm_intel_bo *target_bo, uint32_t target_offset, 1935 uint32_t read_domains, uint32_t write_domain, 1936 bool need_fence) 1937{ 1938 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1939 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1940 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1941 bool fenced_command; 1942 1943 if (bo_gem->has_error) 1944 return -ENOMEM; 1945 1946 if (target_bo_gem->has_error) { 1947 bo_gem->has_error = true; 1948 return -ENOMEM; 1949 } 1950 1951 /* We never use HW fences for rendering on 965+ */ 1952 if (bufmgr_gem->gen >= 4) 1953 need_fence = false; 1954 1955 fenced_command = need_fence; 1956 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1957 need_fence = false; 1958 1959 /* Create a new relocation list if needed */ 1960 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1961 return -ENOMEM; 1962 1963 /* Check overflow */ 1964 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1965 1966 /* Check args */ 1967 assert(offset <= bo->size - 4); 1968 assert((write_domain & (write_domain - 1)) == 0); 1969 1970 /* An object needing a fence is a tiled buffer, so it won't have 1971 * relocs to other buffers. 1972 */ 1973 if (need_fence) { 1974 assert(target_bo_gem->reloc_count == 0); 1975 target_bo_gem->reloc_tree_fences = 1; 1976 } 1977 1978 /* Make sure that we're not adding a reloc to something whose size has 1979 * already been accounted for. 1980 */ 1981 assert(!bo_gem->used_as_reloc_target); 1982 if (target_bo_gem != bo_gem) { 1983 target_bo_gem->used_as_reloc_target = true; 1984 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1985 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1986 } 1987 1988 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1989 if (target_bo != bo) 1990 drm_intel_gem_bo_reference(target_bo); 1991 if (fenced_command) 1992 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1993 DRM_INTEL_RELOC_FENCE; 1994 else 1995 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1996 1997 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1998 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1999 bo_gem->relocs[bo_gem->reloc_count].target_handle = 2000 target_bo_gem->gem_handle; 2001 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 2002 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 2003 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; 2004 bo_gem->reloc_count++; 2005 2006 return 0; 2007} 2008 2009static void 2010drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) 2011{ 2012 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2013 bo_gem->use_48b_address_range = enable; 2014} 2015 2016static int 2017drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo) 2018{ 2019 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2020 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2021 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2022 if (bo_gem->has_error) 2023 return -ENOMEM; 2024 2025 if (target_bo_gem->has_error) { 2026 bo_gem->has_error = true; 2027 return -ENOMEM; 2028 } 2029 2030 if (!target_bo_gem->is_softpin) 2031 return -EINVAL; 2032 if (target_bo_gem == bo_gem) 2033 return -EINVAL; 2034 2035 if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) { 2036 int new_size = bo_gem->softpin_target_size * 2; 2037 if (new_size == 0) 2038 new_size = bufmgr_gem->max_relocs; 2039 2040 bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size * 2041 sizeof(drm_intel_bo *)); 2042 if (!bo_gem->softpin_target) 2043 return -ENOMEM; 2044 2045 bo_gem->softpin_target_size = new_size; 2046 } 2047 bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo; 2048 drm_intel_gem_bo_reference(target_bo); 2049 bo_gem->softpin_target_count++; 2050 2051 return 0; 2052} 2053 2054static int 2055drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 2056 drm_intel_bo *target_bo, uint32_t target_offset, 2057 uint32_t read_domains, uint32_t write_domain) 2058{ 2059 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2060 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; 2061 2062 if (target_bo_gem->is_softpin) 2063 return drm_intel_gem_bo_add_softpin_target(bo, target_bo); 2064 else 2065 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 2066 read_domains, write_domain, 2067 !bufmgr_gem->fenced_relocs); 2068} 2069 2070static int 2071drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 2072 drm_intel_bo *target_bo, 2073 uint32_t target_offset, 2074 uint32_t read_domains, uint32_t write_domain) 2075{ 2076 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 2077 read_domains, write_domain, true); 2078} 2079 2080int 2081drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 2082{ 2083 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2084 2085 return bo_gem->reloc_count; 2086} 2087 2088/** 2089 * Removes existing relocation entries in the BO after "start". 2090 * 2091 * This allows a user to avoid a two-step process for state setup with 2092 * counting up all the buffer objects and doing a 2093 * drm_intel_bufmgr_check_aperture_space() before emitting any of the 2094 * relocations for the state setup. Instead, save the state of the 2095 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 2096 * state, and then check if it still fits in the aperture. 2097 * 2098 * Any further drm_intel_bufmgr_check_aperture_space() queries 2099 * involving this buffer in the tree are undefined after this call. 2100 * 2101 * This also removes all softpinned targets being referenced by the BO. 2102 */ 2103void 2104drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 2105{ 2106 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2107 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2108 int i; 2109 struct timespec time; 2110 2111 clock_gettime(CLOCK_MONOTONIC, &time); 2112 2113 assert(bo_gem->reloc_count >= start); 2114 2115 /* Unreference the cleared target buffers */ 2116 pthread_mutex_lock(&bufmgr_gem->lock); 2117 2118 for (i = start; i < bo_gem->reloc_count; i++) { 2119 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 2120 if (&target_bo_gem->bo != bo) { 2121 bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 2122 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 2123 time.tv_sec); 2124 } 2125 } 2126 bo_gem->reloc_count = start; 2127 2128 for (i = 0; i < bo_gem->softpin_target_count; i++) { 2129 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i]; 2130 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); 2131 } 2132 bo_gem->softpin_target_count = 0; 2133 2134 pthread_mutex_unlock(&bufmgr_gem->lock); 2135 2136} 2137 2138/** 2139 * Walk the tree of relocations rooted at BO and accumulate the list of 2140 * validations to be performed and update the relocation buffers with 2141 * index values into the validation list. 2142 */ 2143static void 2144drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 2145{ 2146 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2147 int i; 2148 2149 if (bo_gem->relocs == NULL) 2150 return; 2151 2152 for (i = 0; i < bo_gem->reloc_count; i++) { 2153 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2154 2155 if (target_bo == bo) 2156 continue; 2157 2158 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2159 2160 /* Continue walking the tree depth-first. */ 2161 drm_intel_gem_bo_process_reloc(target_bo); 2162 2163 /* Add the target to the validate list */ 2164 drm_intel_add_validate_buffer(target_bo); 2165 } 2166} 2167 2168static void 2169drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 2170{ 2171 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2172 int i; 2173 2174 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) 2175 return; 2176 2177 for (i = 0; i < bo_gem->reloc_count; i++) { 2178 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2179 int need_fence; 2180 2181 if (target_bo == bo) 2182 continue; 2183 2184 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2185 2186 /* Continue walking the tree depth-first. */ 2187 drm_intel_gem_bo_process_reloc2(target_bo); 2188 2189 need_fence = (bo_gem->reloc_target_info[i].flags & 2190 DRM_INTEL_RELOC_FENCE); 2191 2192 /* Add the target to the validate list */ 2193 drm_intel_add_validate_buffer2(target_bo, need_fence); 2194 } 2195 2196 for (i = 0; i < bo_gem->softpin_target_count; i++) { 2197 drm_intel_bo *target_bo = bo_gem->softpin_target[i]; 2198 2199 if (target_bo == bo) 2200 continue; 2201 2202 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2203 drm_intel_gem_bo_process_reloc2(target_bo); 2204 drm_intel_add_validate_buffer2(target_bo, false); 2205 } 2206} 2207 2208 2209static void 2210drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 2211{ 2212 int i; 2213 2214 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2215 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2216 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2217 2218 /* Update the buffer offset */ 2219 if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { 2220 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", 2221 bo_gem->gem_handle, bo_gem->name, 2222 upper_32_bits(bo->offset64), 2223 lower_32_bits(bo->offset64), 2224 upper_32_bits(bufmgr_gem->exec_objects[i].offset), 2225 lower_32_bits(bufmgr_gem->exec_objects[i].offset)); 2226 bo->offset64 = bufmgr_gem->exec_objects[i].offset; 2227 bo->offset = bufmgr_gem->exec_objects[i].offset; 2228 } 2229 } 2230} 2231 2232static void 2233drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 2234{ 2235 int i; 2236 2237 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2238 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2239 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2240 2241 /* Update the buffer offset */ 2242 if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { 2243 /* If we're seeing softpinned object here it means that the kernel 2244 * has relocated our object... Indicating a programming error 2245 */ 2246 assert(!bo_gem->is_softpin); 2247 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", 2248 bo_gem->gem_handle, bo_gem->name, 2249 upper_32_bits(bo->offset64), 2250 lower_32_bits(bo->offset64), 2251 upper_32_bits(bufmgr_gem->exec2_objects[i].offset), 2252 lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); 2253 bo->offset64 = bufmgr_gem->exec2_objects[i].offset; 2254 bo->offset = bufmgr_gem->exec2_objects[i].offset; 2255 } 2256 } 2257} 2258 2259void 2260drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 2261 int x1, int y1, int width, int height, 2262 enum aub_dump_bmp_format format, 2263 int pitch, int offset) 2264{ 2265} 2266 2267static int 2268drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 2269 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 2270{ 2271 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2272 struct drm_i915_gem_execbuffer execbuf; 2273 int ret, i; 2274 2275 if (to_bo_gem(bo)->has_error) 2276 return -ENOMEM; 2277 2278 pthread_mutex_lock(&bufmgr_gem->lock); 2279 /* Update indices and set up the validate list. */ 2280 drm_intel_gem_bo_process_reloc(bo); 2281 2282 /* Add the batch buffer to the validation list. There are no 2283 * relocations pointing to it. 2284 */ 2285 drm_intel_add_validate_buffer(bo); 2286 2287 memclear(execbuf); 2288 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 2289 execbuf.buffer_count = bufmgr_gem->exec_count; 2290 execbuf.batch_start_offset = 0; 2291 execbuf.batch_len = used; 2292 execbuf.cliprects_ptr = (uintptr_t) cliprects; 2293 execbuf.num_cliprects = num_cliprects; 2294 execbuf.DR1 = 0; 2295 execbuf.DR4 = DR4; 2296 2297 ret = drmIoctl(bufmgr_gem->fd, 2298 DRM_IOCTL_I915_GEM_EXECBUFFER, 2299 &execbuf); 2300 if (ret != 0) { 2301 ret = -errno; 2302 if (errno == ENOSPC) { 2303 DBG("Execbuffer fails to pin. " 2304 "Estimate: %u. Actual: %u. Available: %u\n", 2305 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2306 bufmgr_gem-> 2307 exec_count), 2308 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2309 bufmgr_gem-> 2310 exec_count), 2311 (unsigned int)bufmgr_gem->gtt_size); 2312 } 2313 } 2314 drm_intel_update_buffer_offsets(bufmgr_gem); 2315 2316 if (bufmgr_gem->bufmgr.debug) 2317 drm_intel_gem_dump_validation_list(bufmgr_gem); 2318 2319 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2320 drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); 2321 2322 bo_gem->idle = false; 2323 2324 /* Disconnect the buffer from the validate list */ 2325 bo_gem->validate_index = -1; 2326 bufmgr_gem->exec_bos[i] = NULL; 2327 } 2328 bufmgr_gem->exec_count = 0; 2329 pthread_mutex_unlock(&bufmgr_gem->lock); 2330 2331 return ret; 2332} 2333 2334static int 2335do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 2336 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2337 unsigned int flags) 2338{ 2339 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2340 struct drm_i915_gem_execbuffer2 execbuf; 2341 int ret = 0; 2342 int i; 2343 2344 if (to_bo_gem(bo)->has_error) 2345 return -ENOMEM; 2346 2347 switch (flags & 0x7) { 2348 default: 2349 return -EINVAL; 2350 case I915_EXEC_BLT: 2351 if (!bufmgr_gem->has_blt) 2352 return -EINVAL; 2353 break; 2354 case I915_EXEC_BSD: 2355 if (!bufmgr_gem->has_bsd) 2356 return -EINVAL; 2357 break; 2358 case I915_EXEC_VEBOX: 2359 if (!bufmgr_gem->has_vebox) 2360 return -EINVAL; 2361 break; 2362 case I915_EXEC_RENDER: 2363 case I915_EXEC_DEFAULT: 2364 break; 2365 } 2366 2367 pthread_mutex_lock(&bufmgr_gem->lock); 2368 /* Update indices and set up the validate list. */ 2369 drm_intel_gem_bo_process_reloc2(bo); 2370 2371 /* Add the batch buffer to the validation list. There are no relocations 2372 * pointing to it. 2373 */ 2374 drm_intel_add_validate_buffer2(bo, 0); 2375 2376 memclear(execbuf); 2377 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 2378 execbuf.buffer_count = bufmgr_gem->exec_count; 2379 execbuf.batch_start_offset = 0; 2380 execbuf.batch_len = used; 2381 execbuf.cliprects_ptr = (uintptr_t)cliprects; 2382 execbuf.num_cliprects = num_cliprects; 2383 execbuf.DR1 = 0; 2384 execbuf.DR4 = DR4; 2385 execbuf.flags = flags; 2386 if (ctx == NULL) 2387 i915_execbuffer2_set_context_id(execbuf, 0); 2388 else 2389 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 2390 execbuf.rsvd2 = 0; 2391 2392 if (bufmgr_gem->no_exec) 2393 goto skip_execution; 2394 2395 ret = drmIoctl(bufmgr_gem->fd, 2396 DRM_IOCTL_I915_GEM_EXECBUFFER2, 2397 &execbuf); 2398 if (ret != 0) { 2399 ret = -errno; 2400 if (ret == -ENOSPC) { 2401 DBG("Execbuffer fails to pin. " 2402 "Estimate: %u. Actual: %u. Available: %u\n", 2403 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2404 bufmgr_gem->exec_count), 2405 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2406 bufmgr_gem->exec_count), 2407 (unsigned int) bufmgr_gem->gtt_size); 2408 } 2409 } 2410 drm_intel_update_buffer_offsets2(bufmgr_gem); 2411 2412skip_execution: 2413 if (bufmgr_gem->bufmgr.debug) 2414 drm_intel_gem_dump_validation_list(bufmgr_gem); 2415 2416 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2417 drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); 2418 2419 bo_gem->idle = false; 2420 2421 /* Disconnect the buffer from the validate list */ 2422 bo_gem->validate_index = -1; 2423 bufmgr_gem->exec_bos[i] = NULL; 2424 } 2425 bufmgr_gem->exec_count = 0; 2426 pthread_mutex_unlock(&bufmgr_gem->lock); 2427 2428 return ret; 2429} 2430 2431static int 2432drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2433 drm_clip_rect_t *cliprects, int num_cliprects, 2434 int DR4) 2435{ 2436 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2437 I915_EXEC_RENDER); 2438} 2439 2440static int 2441drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 2442 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2443 unsigned int flags) 2444{ 2445 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2446 flags); 2447} 2448 2449int 2450drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 2451 int used, unsigned int flags) 2452{ 2453 return do_exec2(bo, used, ctx, NULL, 0, 0, flags); 2454} 2455 2456static int 2457drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 2458{ 2459 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2460 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2461 struct drm_i915_gem_pin pin; 2462 int ret; 2463 2464 memclear(pin); 2465 pin.handle = bo_gem->gem_handle; 2466 pin.alignment = alignment; 2467 2468 ret = drmIoctl(bufmgr_gem->fd, 2469 DRM_IOCTL_I915_GEM_PIN, 2470 &pin); 2471 if (ret != 0) 2472 return -errno; 2473 2474 bo->offset64 = pin.offset; 2475 bo->offset = pin.offset; 2476 return 0; 2477} 2478 2479static int 2480drm_intel_gem_bo_unpin(drm_intel_bo *bo) 2481{ 2482 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2483 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2484 struct drm_i915_gem_unpin unpin; 2485 int ret; 2486 2487 memclear(unpin); 2488 unpin.handle = bo_gem->gem_handle; 2489 2490 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 2491 if (ret != 0) 2492 return -errno; 2493 2494 return 0; 2495} 2496 2497static int 2498drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2499 uint32_t tiling_mode, 2500 uint32_t stride) 2501{ 2502 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2503 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2504 struct drm_i915_gem_set_tiling set_tiling; 2505 int ret; 2506 2507 if (bo_gem->global_name == 0 && 2508 tiling_mode == bo_gem->tiling_mode && 2509 stride == bo_gem->stride) 2510 return 0; 2511 2512 memset(&set_tiling, 0, sizeof(set_tiling)); 2513 do { 2514 /* set_tiling is slightly broken and overwrites the 2515 * input on the error path, so we have to open code 2516 * rmIoctl. 2517 */ 2518 set_tiling.handle = bo_gem->gem_handle; 2519 set_tiling.tiling_mode = tiling_mode; 2520 set_tiling.stride = stride; 2521 2522 ret = ioctl(bufmgr_gem->fd, 2523 DRM_IOCTL_I915_GEM_SET_TILING, 2524 &set_tiling); 2525 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 2526 if (ret == -1) 2527 return -errno; 2528 2529 bo_gem->tiling_mode = set_tiling.tiling_mode; 2530 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 2531 bo_gem->stride = set_tiling.stride; 2532 return 0; 2533} 2534 2535static int 2536drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2537 uint32_t stride) 2538{ 2539 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2540 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2541 int ret; 2542 2543 /* Tiling with userptr surfaces is not supported 2544 * on all hardware so refuse it for time being. 2545 */ 2546 if (bo_gem->is_userptr) 2547 return -EINVAL; 2548 2549 /* Linear buffers have no stride. By ensuring that we only ever use 2550 * stride 0 with linear buffers, we simplify our code. 2551 */ 2552 if (*tiling_mode == I915_TILING_NONE) 2553 stride = 0; 2554 2555 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 2556 if (ret == 0) 2557 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 2558 2559 *tiling_mode = bo_gem->tiling_mode; 2560 return ret; 2561} 2562 2563static int 2564drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2565 uint32_t * swizzle_mode) 2566{ 2567 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2568 2569 *tiling_mode = bo_gem->tiling_mode; 2570 *swizzle_mode = bo_gem->swizzle_mode; 2571 return 0; 2572} 2573 2574static int 2575drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) 2576{ 2577 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2578 2579 bo_gem->is_softpin = true; 2580 bo->offset64 = offset; 2581 bo->offset = offset; 2582 return 0; 2583} 2584 2585drm_intel_bo * 2586drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 2587{ 2588 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2589 int ret; 2590 uint32_t handle; 2591 drm_intel_bo_gem *bo_gem; 2592 struct drm_i915_gem_get_tiling get_tiling; 2593 drmMMListHead *list; 2594 2595 pthread_mutex_lock(&bufmgr_gem->lock); 2596 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 2597 if (ret) { 2598 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno)); 2599 pthread_mutex_unlock(&bufmgr_gem->lock); 2600 return NULL; 2601 } 2602 2603 /* 2604 * See if the kernel has already returned this buffer to us. Just as 2605 * for named buffers, we must not create two bo's pointing at the same 2606 * kernel object 2607 */ 2608 for (list = bufmgr_gem->named.next; 2609 list != &bufmgr_gem->named; 2610 list = list->next) { 2611 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 2612 if (bo_gem->gem_handle == handle) { 2613 drm_intel_gem_bo_reference(&bo_gem->bo); 2614 pthread_mutex_unlock(&bufmgr_gem->lock); 2615 return &bo_gem->bo; 2616 } 2617 } 2618 2619 bo_gem = calloc(1, sizeof(*bo_gem)); 2620 if (!bo_gem) { 2621 pthread_mutex_unlock(&bufmgr_gem->lock); 2622 return NULL; 2623 } 2624 /* Determine size of bo. The fd-to-handle ioctl really should 2625 * return the size, but it doesn't. If we have kernel 3.12 or 2626 * later, we can lseek on the prime fd to get the size. Older 2627 * kernels will just fail, in which case we fall back to the 2628 * provided (estimated or guess size). */ 2629 ret = lseek(prime_fd, 0, SEEK_END); 2630 if (ret != -1) 2631 bo_gem->bo.size = ret; 2632 else 2633 bo_gem->bo.size = size; 2634 2635 bo_gem->bo.handle = handle; 2636 bo_gem->bo.bufmgr = bufmgr; 2637 2638 bo_gem->gem_handle = handle; 2639 2640 atomic_set(&bo_gem->refcount, 1); 2641 2642 bo_gem->name = "prime"; 2643 bo_gem->validate_index = -1; 2644 bo_gem->reloc_tree_fences = 0; 2645 bo_gem->used_as_reloc_target = false; 2646 bo_gem->has_error = false; 2647 bo_gem->reusable = false; 2648 bo_gem->use_48b_address_range = false; 2649 2650 DRMINITLISTHEAD(&bo_gem->vma_list); 2651 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2652 pthread_mutex_unlock(&bufmgr_gem->lock); 2653 2654 memclear(get_tiling); 2655 get_tiling.handle = bo_gem->gem_handle; 2656 ret = drmIoctl(bufmgr_gem->fd, 2657 DRM_IOCTL_I915_GEM_GET_TILING, 2658 &get_tiling); 2659 if (ret != 0) { 2660 DBG("create_from_prime: failed to get tiling: %s\n", strerror(errno)); 2661 drm_intel_gem_bo_unreference(&bo_gem->bo); 2662 return NULL; 2663 } 2664 bo_gem->tiling_mode = get_tiling.tiling_mode; 2665 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 2666 /* XXX stride is unknown */ 2667 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 2668 2669 return &bo_gem->bo; 2670} 2671 2672int 2673drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 2674{ 2675 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2676 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2677 2678 pthread_mutex_lock(&bufmgr_gem->lock); 2679 if (DRMLISTEMPTY(&bo_gem->name_list)) 2680 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2681 pthread_mutex_unlock(&bufmgr_gem->lock); 2682 2683 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 2684 DRM_CLOEXEC, prime_fd) != 0) 2685 return -errno; 2686 2687 bo_gem->reusable = false; 2688 2689 return 0; 2690} 2691 2692static int 2693drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 2694{ 2695 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2696 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2697 int ret; 2698 2699 if (!bo_gem->global_name) { 2700 struct drm_gem_flink flink; 2701 2702 memclear(flink); 2703 flink.handle = bo_gem->gem_handle; 2704 2705 pthread_mutex_lock(&bufmgr_gem->lock); 2706 2707 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 2708 if (ret != 0) { 2709 pthread_mutex_unlock(&bufmgr_gem->lock); 2710 return -errno; 2711 } 2712 2713 bo_gem->global_name = flink.name; 2714 bo_gem->reusable = false; 2715 2716 if (DRMLISTEMPTY(&bo_gem->name_list)) 2717 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2718 pthread_mutex_unlock(&bufmgr_gem->lock); 2719 } 2720 2721 *name = bo_gem->global_name; 2722 return 0; 2723} 2724 2725/** 2726 * Enables unlimited caching of buffer objects for reuse. 2727 * 2728 * This is potentially very memory expensive, as the cache at each bucket 2729 * size is only bounded by how many buffers of that size we've managed to have 2730 * in flight at once. 2731 */ 2732void 2733drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 2734{ 2735 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2736 2737 bufmgr_gem->bo_reuse = true; 2738} 2739 2740/** 2741 * Enable use of fenced reloc type. 2742 * 2743 * New code should enable this to avoid unnecessary fence register 2744 * allocation. If this option is not enabled, all relocs will have fence 2745 * register allocated. 2746 */ 2747void 2748drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 2749{ 2750 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2751 2752 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 2753 bufmgr_gem->fenced_relocs = true; 2754} 2755 2756/** 2757 * Return the additional aperture space required by the tree of buffer objects 2758 * rooted at bo. 2759 */ 2760static int 2761drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 2762{ 2763 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2764 int i; 2765 int total = 0; 2766 2767 if (bo == NULL || bo_gem->included_in_check_aperture) 2768 return 0; 2769 2770 total += bo->size; 2771 bo_gem->included_in_check_aperture = true; 2772 2773 for (i = 0; i < bo_gem->reloc_count; i++) 2774 total += 2775 drm_intel_gem_bo_get_aperture_space(bo_gem-> 2776 reloc_target_info[i].bo); 2777 2778 return total; 2779} 2780 2781/** 2782 * Count the number of buffers in this list that need a fence reg 2783 * 2784 * If the count is greater than the number of available regs, we'll have 2785 * to ask the caller to resubmit a batch with fewer tiled buffers. 2786 * 2787 * This function over-counts if the same buffer is used multiple times. 2788 */ 2789static unsigned int 2790drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 2791{ 2792 int i; 2793 unsigned int total = 0; 2794 2795 for (i = 0; i < count; i++) { 2796 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2797 2798 if (bo_gem == NULL) 2799 continue; 2800 2801 total += bo_gem->reloc_tree_fences; 2802 } 2803 return total; 2804} 2805 2806/** 2807 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 2808 * for the next drm_intel_bufmgr_check_aperture_space() call. 2809 */ 2810static void 2811drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 2812{ 2813 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2814 int i; 2815 2816 if (bo == NULL || !bo_gem->included_in_check_aperture) 2817 return; 2818 2819 bo_gem->included_in_check_aperture = false; 2820 2821 for (i = 0; i < bo_gem->reloc_count; i++) 2822 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 2823 reloc_target_info[i].bo); 2824} 2825 2826/** 2827 * Return a conservative estimate for the amount of aperture required 2828 * for a collection of buffers. This may double-count some buffers. 2829 */ 2830static unsigned int 2831drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 2832{ 2833 int i; 2834 unsigned int total = 0; 2835 2836 for (i = 0; i < count; i++) { 2837 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2838 if (bo_gem != NULL) 2839 total += bo_gem->reloc_tree_size; 2840 } 2841 return total; 2842} 2843 2844/** 2845 * Return the amount of aperture needed for a collection of buffers. 2846 * This avoids double counting any buffers, at the cost of looking 2847 * at every buffer in the set. 2848 */ 2849static unsigned int 2850drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 2851{ 2852 int i; 2853 unsigned int total = 0; 2854 2855 for (i = 0; i < count; i++) { 2856 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 2857 /* For the first buffer object in the array, we get an 2858 * accurate count back for its reloc_tree size (since nothing 2859 * had been flagged as being counted yet). We can save that 2860 * value out as a more conservative reloc_tree_size that 2861 * avoids double-counting target buffers. Since the first 2862 * buffer happens to usually be the batch buffer in our 2863 * callers, this can pull us back from doing the tree 2864 * walk on every new batch emit. 2865 */ 2866 if (i == 0) { 2867 drm_intel_bo_gem *bo_gem = 2868 (drm_intel_bo_gem *) bo_array[i]; 2869 bo_gem->reloc_tree_size = total; 2870 } 2871 } 2872 2873 for (i = 0; i < count; i++) 2874 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 2875 return total; 2876} 2877 2878/** 2879 * Return -1 if the batchbuffer should be flushed before attempting to 2880 * emit rendering referencing the buffers pointed to by bo_array. 2881 * 2882 * This is required because if we try to emit a batchbuffer with relocations 2883 * to a tree of buffers that won't simultaneously fit in the aperture, 2884 * the rendering will return an error at a point where the software is not 2885 * prepared to recover from it. 2886 * 2887 * However, we also want to emit the batchbuffer significantly before we reach 2888 * the limit, as a series of batchbuffers each of which references buffers 2889 * covering almost all of the aperture means that at each emit we end up 2890 * waiting to evict a buffer from the last rendering, and we get synchronous 2891 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 2892 * get better parallelism. 2893 */ 2894static int 2895drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 2896{ 2897 drm_intel_bufmgr_gem *bufmgr_gem = 2898 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 2899 unsigned int total = 0; 2900 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 2901 int total_fences; 2902 2903 /* Check for fence reg constraints if necessary */ 2904 if (bufmgr_gem->available_fences) { 2905 total_fences = drm_intel_gem_total_fences(bo_array, count); 2906 if (total_fences > bufmgr_gem->available_fences) 2907 return -ENOSPC; 2908 } 2909 2910 total = drm_intel_gem_estimate_batch_space(bo_array, count); 2911 2912 if (total > threshold) 2913 total = drm_intel_gem_compute_batch_space(bo_array, count); 2914 2915 if (total > threshold) { 2916 DBG("check_space: overflowed available aperture, " 2917 "%dkb vs %dkb\n", 2918 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 2919 return -ENOSPC; 2920 } else { 2921 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 2922 (int)bufmgr_gem->gtt_size / 1024); 2923 return 0; 2924 } 2925} 2926 2927/* 2928 * Disable buffer reuse for objects which are shared with the kernel 2929 * as scanout buffers 2930 */ 2931static int 2932drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 2933{ 2934 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2935 2936 bo_gem->reusable = false; 2937 return 0; 2938} 2939 2940static int 2941drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 2942{ 2943 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2944 2945 return bo_gem->reusable; 2946} 2947 2948static int 2949_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2950{ 2951 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2952 int i; 2953 2954 for (i = 0; i < bo_gem->reloc_count; i++) { 2955 if (bo_gem->reloc_target_info[i].bo == target_bo) 2956 return 1; 2957 if (bo == bo_gem->reloc_target_info[i].bo) 2958 continue; 2959 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 2960 target_bo)) 2961 return 1; 2962 } 2963 2964 for (i = 0; i< bo_gem->softpin_target_count; i++) { 2965 if (bo_gem->softpin_target[i] == target_bo) 2966 return 1; 2967 if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo)) 2968 return 1; 2969 } 2970 2971 return 0; 2972} 2973 2974/** Return true if target_bo is referenced by bo's relocation tree. */ 2975static int 2976drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2977{ 2978 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2979 2980 if (bo == NULL || target_bo == NULL) 2981 return 0; 2982 if (target_bo_gem->used_as_reloc_target) 2983 return _drm_intel_gem_bo_references(bo, target_bo); 2984 return 0; 2985} 2986 2987static void 2988add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 2989{ 2990 unsigned int i = bufmgr_gem->num_buckets; 2991 2992 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2993 2994 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2995 bufmgr_gem->cache_bucket[i].size = size; 2996 bufmgr_gem->num_buckets++; 2997} 2998 2999static void 3000init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 3001{ 3002 unsigned long size, cache_max_size = 64 * 1024 * 1024; 3003 3004 /* OK, so power of two buckets was too wasteful of memory. 3005 * Give 3 other sizes between each power of two, to hopefully 3006 * cover things accurately enough. (The alternative is 3007 * probably to just go for exact matching of sizes, and assume 3008 * that for things like composited window resize the tiled 3009 * width/height alignment and rounding of sizes to pages will 3010 * get us useful cache hit rates anyway) 3011 */ 3012 add_bucket(bufmgr_gem, 4096); 3013 add_bucket(bufmgr_gem, 4096 * 2); 3014 add_bucket(bufmgr_gem, 4096 * 3); 3015 3016 /* Initialize the linked lists for BO reuse cache. */ 3017 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 3018 add_bucket(bufmgr_gem, size); 3019 3020 add_bucket(bufmgr_gem, size + size * 1 / 4); 3021 add_bucket(bufmgr_gem, size + size * 2 / 4); 3022 add_bucket(bufmgr_gem, size + size * 3 / 4); 3023 } 3024} 3025 3026void 3027drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 3028{ 3029 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3030 3031 bufmgr_gem->vma_max = limit; 3032 3033 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 3034} 3035 3036/** 3037 * Get the PCI ID for the device. This can be overridden by setting the 3038 * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 3039 */ 3040static int 3041get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 3042{ 3043 char *devid_override; 3044 int devid = 0; 3045 int ret; 3046 drm_i915_getparam_t gp; 3047 3048 if (geteuid() == getuid()) { 3049 devid_override = getenv("INTEL_DEVID_OVERRIDE"); 3050 if (devid_override) { 3051 bufmgr_gem->no_exec = true; 3052 return strtod(devid_override, NULL); 3053 } 3054 } 3055 3056 memclear(gp); 3057 gp.param = I915_PARAM_CHIPSET_ID; 3058 gp.value = &devid; 3059 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3060 if (ret) { 3061 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 3062 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 3063 } 3064 return devid; 3065} 3066 3067int 3068drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 3069{ 3070 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3071 3072 return bufmgr_gem->pci_device; 3073} 3074 3075/** 3076 * Sets the AUB filename. 3077 * 3078 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 3079 * for it to have any effect. 3080 */ 3081void 3082drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 3083 const char *filename) 3084{ 3085} 3086 3087/** 3088 * Sets up AUB dumping. 3089 * 3090 * This is a trace file format that can be used with the simulator. 3091 * Packets are emitted in a format somewhat like GPU command packets. 3092 * You can set up a GTT and upload your objects into the referenced 3093 * space, then send off batchbuffers and get BMPs out the other end. 3094 */ 3095void 3096drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 3097{ 3098 fprintf(stderr, "libdrm aub dumping is deprecated.\n\n" 3099 "Use intel_aubdump from intel-gpu-tools instead. Install intel-gpu-tools,\n" 3100 "then run (for example)\n\n" 3101 "\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n" 3102 "See the intel_aubdump man page for more details.\n"); 3103} 3104 3105drm_intel_context * 3106drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 3107{ 3108 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3109 struct drm_i915_gem_context_create create; 3110 drm_intel_context *context = NULL; 3111 int ret; 3112 3113 context = calloc(1, sizeof(*context)); 3114 if (!context) 3115 return NULL; 3116 3117 memclear(create); 3118 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 3119 if (ret != 0) { 3120 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 3121 strerror(errno)); 3122 free(context); 3123 return NULL; 3124 } 3125 3126 context->ctx_id = create.ctx_id; 3127 context->bufmgr = bufmgr; 3128 3129 return context; 3130} 3131 3132void 3133drm_intel_gem_context_destroy(drm_intel_context *ctx) 3134{ 3135 drm_intel_bufmgr_gem *bufmgr_gem; 3136 struct drm_i915_gem_context_destroy destroy; 3137 int ret; 3138 3139 if (ctx == NULL) 3140 return; 3141 3142 memclear(destroy); 3143 3144 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3145 destroy.ctx_id = ctx->ctx_id; 3146 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 3147 &destroy); 3148 if (ret != 0) 3149 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 3150 strerror(errno)); 3151 3152 free(ctx); 3153} 3154 3155int 3156drm_intel_get_reset_stats(drm_intel_context *ctx, 3157 uint32_t *reset_count, 3158 uint32_t *active, 3159 uint32_t *pending) 3160{ 3161 drm_intel_bufmgr_gem *bufmgr_gem; 3162 struct drm_i915_reset_stats stats; 3163 int ret; 3164 3165 if (ctx == NULL) 3166 return -EINVAL; 3167 3168 memclear(stats); 3169 3170 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3171 stats.ctx_id = ctx->ctx_id; 3172 ret = drmIoctl(bufmgr_gem->fd, 3173 DRM_IOCTL_I915_GET_RESET_STATS, 3174 &stats); 3175 if (ret == 0) { 3176 if (reset_count != NULL) 3177 *reset_count = stats.reset_count; 3178 3179 if (active != NULL) 3180 *active = stats.batch_active; 3181 3182 if (pending != NULL) 3183 *pending = stats.batch_pending; 3184 } 3185 3186 return ret; 3187} 3188 3189int 3190drm_intel_reg_read(drm_intel_bufmgr *bufmgr, 3191 uint32_t offset, 3192 uint64_t *result) 3193{ 3194 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3195 struct drm_i915_reg_read reg_read; 3196 int ret; 3197 3198 memclear(reg_read); 3199 reg_read.offset = offset; 3200 3201 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 3202 3203 *result = reg_read.val; 3204 return ret; 3205} 3206 3207int 3208drm_intel_get_subslice_total(int fd, unsigned int *subslice_total) 3209{ 3210 drm_i915_getparam_t gp; 3211 int ret; 3212 3213 memclear(gp); 3214 gp.value = (int*)subslice_total; 3215 gp.param = I915_PARAM_SUBSLICE_TOTAL; 3216 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3217 if (ret) 3218 return -errno; 3219 3220 return 0; 3221} 3222 3223int 3224drm_intel_get_eu_total(int fd, unsigned int *eu_total) 3225{ 3226 drm_i915_getparam_t gp; 3227 int ret; 3228 3229 memclear(gp); 3230 gp.value = (int*)eu_total; 3231 gp.param = I915_PARAM_EU_TOTAL; 3232 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3233 if (ret) 3234 return -errno; 3235 3236 return 0; 3237} 3238 3239/** 3240 * Annotate the given bo for use in aub dumping. 3241 * 3242 * \param annotations is an array of drm_intel_aub_annotation objects 3243 * describing the type of data in various sections of the bo. Each 3244 * element of the array specifies the type and subtype of a section of 3245 * the bo, and the past-the-end offset of that section. The elements 3246 * of \c annotations must be sorted so that ending_offset is 3247 * increasing. 3248 * 3249 * \param count is the number of elements in the \c annotations array. 3250 * If \c count is zero, then \c annotations will not be dereferenced. 3251 * 3252 * Annotations are copied into a private data structure, so caller may 3253 * re-use the memory pointed to by \c annotations after the call 3254 * returns. 3255 * 3256 * Annotations are stored for the lifetime of the bo; to reset to the 3257 * default state (no annotations), call this function with a \c count 3258 * of zero. 3259 */ 3260void 3261drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 3262 drm_intel_aub_annotation *annotations, 3263 unsigned count) 3264{ 3265} 3266 3267static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; 3268static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list }; 3269 3270static drm_intel_bufmgr_gem * 3271drm_intel_bufmgr_gem_find(int fd) 3272{ 3273 drm_intel_bufmgr_gem *bufmgr_gem; 3274 3275 DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) { 3276 if (bufmgr_gem->fd == fd) { 3277 atomic_inc(&bufmgr_gem->refcount); 3278 return bufmgr_gem; 3279 } 3280 } 3281 3282 return NULL; 3283} 3284 3285static void 3286drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) 3287{ 3288 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3289 3290 if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) { 3291 pthread_mutex_lock(&bufmgr_list_mutex); 3292 3293 if (atomic_dec_and_test(&bufmgr_gem->refcount)) { 3294 DRMLISTDEL(&bufmgr_gem->managers); 3295 drm_intel_bufmgr_gem_destroy(bufmgr); 3296 } 3297 3298 pthread_mutex_unlock(&bufmgr_list_mutex); 3299 } 3300} 3301 3302/** 3303 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 3304 * and manage map buffer objections. 3305 * 3306 * \param fd File descriptor of the opened DRM device. 3307 */ 3308drm_intel_bufmgr * 3309drm_intel_bufmgr_gem_init(int fd, int batch_size) 3310{ 3311 drm_intel_bufmgr_gem *bufmgr_gem; 3312 struct drm_i915_gem_get_aperture aperture; 3313 drm_i915_getparam_t gp; 3314 int ret, tmp; 3315 bool exec2 = false; 3316 3317 pthread_mutex_lock(&bufmgr_list_mutex); 3318 3319 bufmgr_gem = drm_intel_bufmgr_gem_find(fd); 3320 if (bufmgr_gem) 3321 goto exit; 3322 3323 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 3324 if (bufmgr_gem == NULL) 3325 goto exit; 3326 3327 bufmgr_gem->fd = fd; 3328 atomic_set(&bufmgr_gem->refcount, 1); 3329 3330 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 3331 free(bufmgr_gem); 3332 bufmgr_gem = NULL; 3333 goto exit; 3334 } 3335 3336 memclear(aperture); 3337 ret = drmIoctl(bufmgr_gem->fd, 3338 DRM_IOCTL_I915_GEM_GET_APERTURE, 3339 &aperture); 3340 3341 if (ret == 0) 3342 bufmgr_gem->gtt_size = aperture.aper_available_size; 3343 else { 3344 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 3345 strerror(errno)); 3346 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 3347 fprintf(stderr, "Assuming %dkB available aperture size.\n" 3348 "May lead to reduced performance or incorrect " 3349 "rendering.\n", 3350 (int)bufmgr_gem->gtt_size / 1024); 3351 } 3352 3353 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 3354 3355 if (IS_GEN2(bufmgr_gem->pci_device)) 3356 bufmgr_gem->gen = 2; 3357 else if (IS_GEN3(bufmgr_gem->pci_device)) 3358 bufmgr_gem->gen = 3; 3359 else if (IS_GEN4(bufmgr_gem->pci_device)) 3360 bufmgr_gem->gen = 4; 3361 else if (IS_GEN5(bufmgr_gem->pci_device)) 3362 bufmgr_gem->gen = 5; 3363 else if (IS_GEN6(bufmgr_gem->pci_device)) 3364 bufmgr_gem->gen = 6; 3365 else if (IS_GEN7(bufmgr_gem->pci_device)) 3366 bufmgr_gem->gen = 7; 3367 else if (IS_GEN8(bufmgr_gem->pci_device)) 3368 bufmgr_gem->gen = 8; 3369 else if (IS_GEN9(bufmgr_gem->pci_device)) 3370 bufmgr_gem->gen = 9; 3371 else { 3372 free(bufmgr_gem); 3373 bufmgr_gem = NULL; 3374 goto exit; 3375 } 3376 3377 if (IS_GEN3(bufmgr_gem->pci_device) && 3378 bufmgr_gem->gtt_size > 256*1024*1024) { 3379 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 3380 * be used for tiled blits. To simplify the accounting, just 3381 * subtract the unmappable part (fixed to 256MB on all known 3382 * gen3 devices) if the kernel advertises it. */ 3383 bufmgr_gem->gtt_size -= 256*1024*1024; 3384 } 3385 3386 memclear(gp); 3387 gp.value = &tmp; 3388 3389 gp.param = I915_PARAM_HAS_EXECBUF2; 3390 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3391 if (!ret) 3392 exec2 = true; 3393 3394 gp.param = I915_PARAM_HAS_BSD; 3395 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3396 bufmgr_gem->has_bsd = ret == 0; 3397 3398 gp.param = I915_PARAM_HAS_BLT; 3399 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3400 bufmgr_gem->has_blt = ret == 0; 3401 3402 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 3403 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3404 bufmgr_gem->has_relaxed_fencing = ret == 0; 3405 3406 bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; 3407 3408 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 3409 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3410 bufmgr_gem->has_wait_timeout = ret == 0; 3411 3412 gp.param = I915_PARAM_HAS_LLC; 3413 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3414 if (ret != 0) { 3415 /* Kernel does not supports HAS_LLC query, fallback to GPU 3416 * generation detection and assume that we have LLC on GEN6/7 3417 */ 3418 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 3419 IS_GEN7(bufmgr_gem->pci_device)); 3420 } else 3421 bufmgr_gem->has_llc = *gp.value; 3422 3423 gp.param = I915_PARAM_HAS_VEBOX; 3424 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3425 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 3426 3427 gp.param = I915_PARAM_HAS_EXEC_SOFTPIN; 3428 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3429 if (ret == 0 && *gp.value > 0) 3430 bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset; 3431 3432 if (bufmgr_gem->gen < 4) { 3433 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 3434 gp.value = &bufmgr_gem->available_fences; 3435 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3436 if (ret) { 3437 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 3438 errno); 3439 fprintf(stderr, "param: %d, val: %d\n", gp.param, 3440 *gp.value); 3441 bufmgr_gem->available_fences = 0; 3442 } else { 3443 /* XXX The kernel reports the total number of fences, 3444 * including any that may be pinned. 3445 * 3446 * We presume that there will be at least one pinned 3447 * fence for the scanout buffer, but there may be more 3448 * than one scanout and the user may be manually 3449 * pinning buffers. Let's move to execbuffer2 and 3450 * thereby forget the insanity of using fences... 3451 */ 3452 bufmgr_gem->available_fences -= 2; 3453 if (bufmgr_gem->available_fences < 0) 3454 bufmgr_gem->available_fences = 0; 3455 } 3456 } 3457 3458 if (bufmgr_gem->gen >= 8) { 3459 gp.param = I915_PARAM_HAS_ALIASING_PPGTT; 3460 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3461 if (ret == 0 && *gp.value == 3) 3462 bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; 3463 } 3464 3465 /* Let's go with one relocation per every 2 dwords (but round down a bit 3466 * since a power of two will mean an extra page allocation for the reloc 3467 * buffer). 3468 * 3469 * Every 4 was too few for the blender benchmark. 3470 */ 3471 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 3472 3473 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 3474 bufmgr_gem->bufmgr.bo_alloc_for_render = 3475 drm_intel_gem_bo_alloc_for_render; 3476 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 3477 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 3478 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 3479 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 3480 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 3481 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 3482 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 3483 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 3484 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 3485 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 3486 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 3487 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 3488 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 3489 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 3490 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 3491 /* Use the new one if available */ 3492 if (exec2) { 3493 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 3494 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3495 } else 3496 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 3497 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 3498 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 3499 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref; 3500 bufmgr_gem->bufmgr.debug = 0; 3501 bufmgr_gem->bufmgr.check_aperture_space = 3502 drm_intel_gem_check_aperture_space; 3503 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3504 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 3505 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 3506 drm_intel_gem_get_pipe_from_crtc_id; 3507 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 3508 3509 DRMINITLISTHEAD(&bufmgr_gem->named); 3510 init_cache_buckets(bufmgr_gem); 3511 3512 DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 3513 bufmgr_gem->vma_max = -1; /* unlimited by default */ 3514 3515 DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); 3516 3517exit: 3518 pthread_mutex_unlock(&bufmgr_list_mutex); 3519 3520 return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL; 3521} 3522