intel_bufmgr_gem.c revision 424e9256
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007-2012 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <stddef.h> 51#include <sys/ioctl.h> 52#include <sys/stat.h> 53#include <sys/types.h> 54#include <stdbool.h> 55 56#include "errno.h" 57#ifndef ETIME 58#define ETIME ETIMEDOUT 59#endif 60#include "libdrm_macros.h" 61#include "libdrm_lists.h" 62#include "intel_bufmgr.h" 63#include "intel_bufmgr_priv.h" 64#include "intel_chipset.h" 65#include "intel_aub.h" 66#include "string.h" 67 68#include "i915_drm.h" 69 70#ifdef HAVE_VALGRIND 71#include <valgrind.h> 72#include <memcheck.h> 73#define VG(x) x 74#else 75#define VG(x) 76#endif 77 78#define memclear(s) memset(&s, 0, sizeof(s)) 79 80#define DBG(...) do { \ 81 if (bufmgr_gem->bufmgr.debug) \ 82 fprintf(stderr, __VA_ARGS__); \ 83} while (0) 84 85#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 86 87typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 88 89struct drm_intel_gem_bo_bucket { 90 drmMMListHead head; 91 unsigned long size; 92}; 93 94typedef struct _drm_intel_bufmgr_gem { 95 drm_intel_bufmgr bufmgr; 96 97 atomic_t refcount; 98 99 int fd; 100 101 int max_relocs; 102 103 pthread_mutex_t lock; 104 105 struct drm_i915_gem_exec_object *exec_objects; 106 struct drm_i915_gem_exec_object2 *exec2_objects; 107 drm_intel_bo **exec_bos; 108 int exec_size; 109 int exec_count; 110 111 /** Array of lists of cached gem objects of power-of-two sizes */ 112 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 113 int num_buckets; 114 time_t time; 115 116 drmMMListHead managers; 117 118 drmMMListHead named; 119 drmMMListHead vma_cache; 120 int vma_count, vma_open, vma_max; 121 122 uint64_t gtt_size; 123 int available_fences; 124 int pci_device; 125 int gen; 126 unsigned int has_bsd : 1; 127 unsigned int has_blt : 1; 128 unsigned int has_relaxed_fencing : 1; 129 unsigned int has_llc : 1; 130 unsigned int has_wait_timeout : 1; 131 unsigned int bo_reuse : 1; 132 unsigned int no_exec : 1; 133 unsigned int has_vebox : 1; 134 bool fenced_relocs; 135 136 struct { 137 void *ptr; 138 uint32_t handle; 139 } userptr_active; 140 141 char *aub_filename; 142 FILE *aub_file; 143 uint32_t aub_offset; 144} drm_intel_bufmgr_gem; 145 146#define DRM_INTEL_RELOC_FENCE (1<<0) 147 148typedef struct _drm_intel_reloc_target_info { 149 drm_intel_bo *bo; 150 int flags; 151} drm_intel_reloc_target; 152 153struct _drm_intel_bo_gem { 154 drm_intel_bo bo; 155 156 atomic_t refcount; 157 uint32_t gem_handle; 158 const char *name; 159 160 /** 161 * Kenel-assigned global name for this object 162 * 163 * List contains both flink named and prime fd'd objects 164 */ 165 unsigned int global_name; 166 drmMMListHead name_list; 167 168 /** 169 * Index of the buffer within the validation list while preparing a 170 * batchbuffer execution. 171 */ 172 int validate_index; 173 174 /** 175 * Current tiling mode 176 */ 177 uint32_t tiling_mode; 178 uint32_t swizzle_mode; 179 unsigned long stride; 180 181 time_t free_time; 182 183 /** Array passed to the DRM containing relocation information. */ 184 struct drm_i915_gem_relocation_entry *relocs; 185 /** 186 * Array of info structs corresponding to relocs[i].target_handle etc 187 */ 188 drm_intel_reloc_target *reloc_target_info; 189 /** Number of entries in relocs */ 190 int reloc_count; 191 /** Mapped address for the buffer, saved across map/unmap cycles */ 192 void *mem_virtual; 193 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 194 void *gtt_virtual; 195 /** 196 * Virtual address of the buffer allocated by user, used for userptr 197 * objects only. 198 */ 199 void *user_virtual; 200 int map_count; 201 drmMMListHead vma_list; 202 203 /** BO cache list */ 204 drmMMListHead head; 205 206 /** 207 * Boolean of whether this BO and its children have been included in 208 * the current drm_intel_bufmgr_check_aperture_space() total. 209 */ 210 bool included_in_check_aperture; 211 212 /** 213 * Boolean of whether this buffer has been used as a relocation 214 * target and had its size accounted for, and thus can't have any 215 * further relocations added to it. 216 */ 217 bool used_as_reloc_target; 218 219 /** 220 * Boolean of whether we have encountered an error whilst building the relocation tree. 221 */ 222 bool has_error; 223 224 /** 225 * Boolean of whether this buffer can be re-used 226 */ 227 bool reusable; 228 229 /** 230 * Boolean of whether the GPU is definitely not accessing the buffer. 231 * 232 * This is only valid when reusable, since non-reusable 233 * buffers are those that have been shared wth other 234 * processes, so we don't know their state. 235 */ 236 bool idle; 237 238 /** 239 * Boolean of whether this buffer was allocated with userptr 240 */ 241 bool is_userptr; 242 243 /** 244 * Size in bytes of this buffer and its relocation descendents. 245 * 246 * Used to avoid costly tree walking in 247 * drm_intel_bufmgr_check_aperture in the common case. 248 */ 249 int reloc_tree_size; 250 251 /** 252 * Number of potential fence registers required by this buffer and its 253 * relocations. 254 */ 255 int reloc_tree_fences; 256 257 /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ 258 bool mapped_cpu_write; 259 260 uint32_t aub_offset; 261 262 drm_intel_aub_annotation *aub_annotations; 263 unsigned aub_annotation_count; 264}; 265 266static unsigned int 267drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 268 269static unsigned int 270drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 271 272static int 273drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 274 uint32_t * swizzle_mode); 275 276static int 277drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 278 uint32_t tiling_mode, 279 uint32_t stride); 280 281static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 282 time_t time); 283 284static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 285 286static void drm_intel_gem_bo_free(drm_intel_bo *bo); 287 288static unsigned long 289drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 290 uint32_t *tiling_mode) 291{ 292 unsigned long min_size, max_size; 293 unsigned long i; 294 295 if (*tiling_mode == I915_TILING_NONE) 296 return size; 297 298 /* 965+ just need multiples of page size for tiling */ 299 if (bufmgr_gem->gen >= 4) 300 return ROUND_UP_TO(size, 4096); 301 302 /* Older chips need powers of two, of at least 512k or 1M */ 303 if (bufmgr_gem->gen == 3) { 304 min_size = 1024*1024; 305 max_size = 128*1024*1024; 306 } else { 307 min_size = 512*1024; 308 max_size = 64*1024*1024; 309 } 310 311 if (size > max_size) { 312 *tiling_mode = I915_TILING_NONE; 313 return size; 314 } 315 316 /* Do we need to allocate every page for the fence? */ 317 if (bufmgr_gem->has_relaxed_fencing) 318 return ROUND_UP_TO(size, 4096); 319 320 for (i = min_size; i < size; i <<= 1) 321 ; 322 323 return i; 324} 325 326/* 327 * Round a given pitch up to the minimum required for X tiling on a 328 * given chip. We use 512 as the minimum to allow for a later tiling 329 * change. 330 */ 331static unsigned long 332drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 333 unsigned long pitch, uint32_t *tiling_mode) 334{ 335 unsigned long tile_width; 336 unsigned long i; 337 338 /* If untiled, then just align it so that we can do rendering 339 * to it with the 3D engine. 340 */ 341 if (*tiling_mode == I915_TILING_NONE) 342 return ALIGN(pitch, 64); 343 344 if (*tiling_mode == I915_TILING_X 345 || (IS_915(bufmgr_gem->pci_device) 346 && *tiling_mode == I915_TILING_Y)) 347 tile_width = 512; 348 else 349 tile_width = 128; 350 351 /* 965 is flexible */ 352 if (bufmgr_gem->gen >= 4) 353 return ROUND_UP_TO(pitch, tile_width); 354 355 /* The older hardware has a maximum pitch of 8192 with tiled 356 * surfaces, so fallback to untiled if it's too large. 357 */ 358 if (pitch > 8192) { 359 *tiling_mode = I915_TILING_NONE; 360 return ALIGN(pitch, 64); 361 } 362 363 /* Pre-965 needs power of two tile width */ 364 for (i = tile_width; i < pitch; i <<= 1) 365 ; 366 367 return i; 368} 369 370static struct drm_intel_gem_bo_bucket * 371drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 372 unsigned long size) 373{ 374 int i; 375 376 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 377 struct drm_intel_gem_bo_bucket *bucket = 378 &bufmgr_gem->cache_bucket[i]; 379 if (bucket->size >= size) { 380 return bucket; 381 } 382 } 383 384 return NULL; 385} 386 387static void 388drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 389{ 390 int i, j; 391 392 for (i = 0; i < bufmgr_gem->exec_count; i++) { 393 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 394 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 395 396 if (bo_gem->relocs == NULL) { 397 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 398 bo_gem->name); 399 continue; 400 } 401 402 for (j = 0; j < bo_gem->reloc_count; j++) { 403 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 404 drm_intel_bo_gem *target_gem = 405 (drm_intel_bo_gem *) target_bo; 406 407 DBG("%2d: %d (%s)@0x%08llx -> " 408 "%d (%s)@0x%08llx + 0x%08x\n", 409 i, 410 bo_gem->gem_handle, bo_gem->name, 411 (unsigned long long)bo_gem->relocs[j].offset, 412 target_gem->gem_handle, 413 target_gem->name, 414 (unsigned long long)target_bo->offset64, 415 bo_gem->relocs[j].delta); 416 } 417 } 418} 419 420static inline void 421drm_intel_gem_bo_reference(drm_intel_bo *bo) 422{ 423 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 424 425 atomic_inc(&bo_gem->refcount); 426} 427 428/** 429 * Adds the given buffer to the list of buffers to be validated (moved into the 430 * appropriate memory type) with the next batch submission. 431 * 432 * If a buffer is validated multiple times in a batch submission, it ends up 433 * with the intersection of the memory type flags and the union of the 434 * access flags. 435 */ 436static void 437drm_intel_add_validate_buffer(drm_intel_bo *bo) 438{ 439 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 440 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 441 int index; 442 443 if (bo_gem->validate_index != -1) 444 return; 445 446 /* Extend the array of validation entries as necessary. */ 447 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 448 int new_size = bufmgr_gem->exec_size * 2; 449 450 if (new_size == 0) 451 new_size = 5; 452 453 bufmgr_gem->exec_objects = 454 realloc(bufmgr_gem->exec_objects, 455 sizeof(*bufmgr_gem->exec_objects) * new_size); 456 bufmgr_gem->exec_bos = 457 realloc(bufmgr_gem->exec_bos, 458 sizeof(*bufmgr_gem->exec_bos) * new_size); 459 bufmgr_gem->exec_size = new_size; 460 } 461 462 index = bufmgr_gem->exec_count; 463 bo_gem->validate_index = index; 464 /* Fill in array entry */ 465 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 466 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 467 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 468 bufmgr_gem->exec_objects[index].alignment = 0; 469 bufmgr_gem->exec_objects[index].offset = 0; 470 bufmgr_gem->exec_bos[index] = bo; 471 bufmgr_gem->exec_count++; 472} 473 474static void 475drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 476{ 477 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 478 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 479 int index; 480 481 if (bo_gem->validate_index != -1) { 482 if (need_fence) 483 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 484 EXEC_OBJECT_NEEDS_FENCE; 485 return; 486 } 487 488 /* Extend the array of validation entries as necessary. */ 489 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 490 int new_size = bufmgr_gem->exec_size * 2; 491 492 if (new_size == 0) 493 new_size = 5; 494 495 bufmgr_gem->exec2_objects = 496 realloc(bufmgr_gem->exec2_objects, 497 sizeof(*bufmgr_gem->exec2_objects) * new_size); 498 bufmgr_gem->exec_bos = 499 realloc(bufmgr_gem->exec_bos, 500 sizeof(*bufmgr_gem->exec_bos) * new_size); 501 bufmgr_gem->exec_size = new_size; 502 } 503 504 index = bufmgr_gem->exec_count; 505 bo_gem->validate_index = index; 506 /* Fill in array entry */ 507 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 508 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 509 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 510 bufmgr_gem->exec2_objects[index].alignment = 0; 511 bufmgr_gem->exec2_objects[index].offset = 0; 512 bufmgr_gem->exec_bos[index] = bo; 513 bufmgr_gem->exec2_objects[index].flags = 0; 514 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 515 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 516 if (need_fence) { 517 bufmgr_gem->exec2_objects[index].flags |= 518 EXEC_OBJECT_NEEDS_FENCE; 519 } 520 bufmgr_gem->exec_count++; 521} 522 523#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 524 sizeof(uint32_t)) 525 526static void 527drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 528 drm_intel_bo_gem *bo_gem) 529{ 530 int size; 531 532 assert(!bo_gem->used_as_reloc_target); 533 534 /* The older chipsets are far-less flexible in terms of tiling, 535 * and require tiled buffer to be size aligned in the aperture. 536 * This means that in the worst possible case we will need a hole 537 * twice as large as the object in order for it to fit into the 538 * aperture. Optimal packing is for wimps. 539 */ 540 size = bo_gem->bo.size; 541 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 542 int min_size; 543 544 if (bufmgr_gem->has_relaxed_fencing) { 545 if (bufmgr_gem->gen == 3) 546 min_size = 1024*1024; 547 else 548 min_size = 512*1024; 549 550 while (min_size < size) 551 min_size *= 2; 552 } else 553 min_size = size; 554 555 /* Account for worst-case alignment. */ 556 size = 2 * min_size; 557 } 558 559 bo_gem->reloc_tree_size = size; 560} 561 562static int 563drm_intel_setup_reloc_list(drm_intel_bo *bo) 564{ 565 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 566 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 567 unsigned int max_relocs = bufmgr_gem->max_relocs; 568 569 if (bo->size / 4 < max_relocs) 570 max_relocs = bo->size / 4; 571 572 bo_gem->relocs = malloc(max_relocs * 573 sizeof(struct drm_i915_gem_relocation_entry)); 574 bo_gem->reloc_target_info = malloc(max_relocs * 575 sizeof(drm_intel_reloc_target)); 576 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 577 bo_gem->has_error = true; 578 579 free (bo_gem->relocs); 580 bo_gem->relocs = NULL; 581 582 free (bo_gem->reloc_target_info); 583 bo_gem->reloc_target_info = NULL; 584 585 return 1; 586 } 587 588 return 0; 589} 590 591static int 592drm_intel_gem_bo_busy(drm_intel_bo *bo) 593{ 594 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 595 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 596 struct drm_i915_gem_busy busy; 597 int ret; 598 599 if (bo_gem->reusable && bo_gem->idle) 600 return false; 601 602 memclear(busy); 603 busy.handle = bo_gem->gem_handle; 604 605 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 606 if (ret == 0) { 607 bo_gem->idle = !busy.busy; 608 return busy.busy; 609 } else { 610 return false; 611 } 612 return (ret == 0 && busy.busy); 613} 614 615static int 616drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 617 drm_intel_bo_gem *bo_gem, int state) 618{ 619 struct drm_i915_gem_madvise madv; 620 621 memclear(madv); 622 madv.handle = bo_gem->gem_handle; 623 madv.madv = state; 624 madv.retained = 1; 625 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 626 627 return madv.retained; 628} 629 630static int 631drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 632{ 633 return drm_intel_gem_bo_madvise_internal 634 ((drm_intel_bufmgr_gem *) bo->bufmgr, 635 (drm_intel_bo_gem *) bo, 636 madv); 637} 638 639/* drop the oldest entries that have been purged by the kernel */ 640static void 641drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 642 struct drm_intel_gem_bo_bucket *bucket) 643{ 644 while (!DRMLISTEMPTY(&bucket->head)) { 645 drm_intel_bo_gem *bo_gem; 646 647 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 648 bucket->head.next, head); 649 if (drm_intel_gem_bo_madvise_internal 650 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 651 break; 652 653 DRMLISTDEL(&bo_gem->head); 654 drm_intel_gem_bo_free(&bo_gem->bo); 655 } 656} 657 658static drm_intel_bo * 659drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 660 const char *name, 661 unsigned long size, 662 unsigned long flags, 663 uint32_t tiling_mode, 664 unsigned long stride) 665{ 666 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 667 drm_intel_bo_gem *bo_gem; 668 unsigned int page_size = getpagesize(); 669 int ret; 670 struct drm_intel_gem_bo_bucket *bucket; 671 bool alloc_from_cache; 672 unsigned long bo_size; 673 bool for_render = false; 674 675 if (flags & BO_ALLOC_FOR_RENDER) 676 for_render = true; 677 678 /* Round the allocated size up to a power of two number of pages. */ 679 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 680 681 /* If we don't have caching at this size, don't actually round the 682 * allocation up. 683 */ 684 if (bucket == NULL) { 685 bo_size = size; 686 if (bo_size < page_size) 687 bo_size = page_size; 688 } else { 689 bo_size = bucket->size; 690 } 691 692 pthread_mutex_lock(&bufmgr_gem->lock); 693 /* Get a buffer out of the cache if available */ 694retry: 695 alloc_from_cache = false; 696 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 697 if (for_render) { 698 /* Allocate new render-target BOs from the tail (MRU) 699 * of the list, as it will likely be hot in the GPU 700 * cache and in the aperture for us. 701 */ 702 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 703 bucket->head.prev, head); 704 DRMLISTDEL(&bo_gem->head); 705 alloc_from_cache = true; 706 } else { 707 /* For non-render-target BOs (where we're probably 708 * going to map it first thing in order to fill it 709 * with data), check if the last BO in the cache is 710 * unbusy, and only reuse in that case. Otherwise, 711 * allocating a new buffer is probably faster than 712 * waiting for the GPU to finish. 713 */ 714 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 715 bucket->head.next, head); 716 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 717 alloc_from_cache = true; 718 DRMLISTDEL(&bo_gem->head); 719 } 720 } 721 722 if (alloc_from_cache) { 723 if (!drm_intel_gem_bo_madvise_internal 724 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 725 drm_intel_gem_bo_free(&bo_gem->bo); 726 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 727 bucket); 728 goto retry; 729 } 730 731 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 732 tiling_mode, 733 stride)) { 734 drm_intel_gem_bo_free(&bo_gem->bo); 735 goto retry; 736 } 737 } 738 } 739 pthread_mutex_unlock(&bufmgr_gem->lock); 740 741 if (!alloc_from_cache) { 742 struct drm_i915_gem_create create; 743 744 bo_gem = calloc(1, sizeof(*bo_gem)); 745 if (!bo_gem) 746 return NULL; 747 748 bo_gem->bo.size = bo_size; 749 750 memclear(create); 751 create.size = bo_size; 752 753 ret = drmIoctl(bufmgr_gem->fd, 754 DRM_IOCTL_I915_GEM_CREATE, 755 &create); 756 bo_gem->gem_handle = create.handle; 757 bo_gem->bo.handle = bo_gem->gem_handle; 758 if (ret != 0) { 759 free(bo_gem); 760 return NULL; 761 } 762 bo_gem->bo.bufmgr = bufmgr; 763 764 bo_gem->tiling_mode = I915_TILING_NONE; 765 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 766 bo_gem->stride = 0; 767 768 /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized 769 list (vma_list), so better set the list head here */ 770 DRMINITLISTHEAD(&bo_gem->name_list); 771 DRMINITLISTHEAD(&bo_gem->vma_list); 772 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 773 tiling_mode, 774 stride)) { 775 drm_intel_gem_bo_free(&bo_gem->bo); 776 return NULL; 777 } 778 } 779 780 bo_gem->name = name; 781 atomic_set(&bo_gem->refcount, 1); 782 bo_gem->validate_index = -1; 783 bo_gem->reloc_tree_fences = 0; 784 bo_gem->used_as_reloc_target = false; 785 bo_gem->has_error = false; 786 bo_gem->reusable = true; 787 bo_gem->aub_annotations = NULL; 788 bo_gem->aub_annotation_count = 0; 789 790 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 791 792 DBG("bo_create: buf %d (%s) %ldb\n", 793 bo_gem->gem_handle, bo_gem->name, size); 794 795 return &bo_gem->bo; 796} 797 798static drm_intel_bo * 799drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 800 const char *name, 801 unsigned long size, 802 unsigned int alignment) 803{ 804 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 805 BO_ALLOC_FOR_RENDER, 806 I915_TILING_NONE, 0); 807} 808 809static drm_intel_bo * 810drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 811 const char *name, 812 unsigned long size, 813 unsigned int alignment) 814{ 815 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 816 I915_TILING_NONE, 0); 817} 818 819static drm_intel_bo * 820drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 821 int x, int y, int cpp, uint32_t *tiling_mode, 822 unsigned long *pitch, unsigned long flags) 823{ 824 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 825 unsigned long size, stride; 826 uint32_t tiling; 827 828 do { 829 unsigned long aligned_y, height_alignment; 830 831 tiling = *tiling_mode; 832 833 /* If we're tiled, our allocations are in 8 or 32-row blocks, 834 * so failure to align our height means that we won't allocate 835 * enough pages. 836 * 837 * If we're untiled, we still have to align to 2 rows high 838 * because the data port accesses 2x2 blocks even if the 839 * bottom row isn't to be rendered, so failure to align means 840 * we could walk off the end of the GTT and fault. This is 841 * documented on 965, and may be the case on older chipsets 842 * too so we try to be careful. 843 */ 844 aligned_y = y; 845 height_alignment = 2; 846 847 if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 848 height_alignment = 16; 849 else if (tiling == I915_TILING_X 850 || (IS_915(bufmgr_gem->pci_device) 851 && tiling == I915_TILING_Y)) 852 height_alignment = 8; 853 else if (tiling == I915_TILING_Y) 854 height_alignment = 32; 855 aligned_y = ALIGN(y, height_alignment); 856 857 stride = x * cpp; 858 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 859 size = stride * aligned_y; 860 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 861 } while (*tiling_mode != tiling); 862 *pitch = stride; 863 864 if (tiling == I915_TILING_NONE) 865 stride = 0; 866 867 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 868 tiling, stride); 869} 870 871static drm_intel_bo * 872drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 873 const char *name, 874 void *addr, 875 uint32_t tiling_mode, 876 uint32_t stride, 877 unsigned long size, 878 unsigned long flags) 879{ 880 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 881 drm_intel_bo_gem *bo_gem; 882 int ret; 883 struct drm_i915_gem_userptr userptr; 884 885 /* Tiling with userptr surfaces is not supported 886 * on all hardware so refuse it for time being. 887 */ 888 if (tiling_mode != I915_TILING_NONE) 889 return NULL; 890 891 bo_gem = calloc(1, sizeof(*bo_gem)); 892 if (!bo_gem) 893 return NULL; 894 895 bo_gem->bo.size = size; 896 897 memclear(userptr); 898 userptr.user_ptr = (__u64)((unsigned long)addr); 899 userptr.user_size = size; 900 userptr.flags = flags; 901 902 ret = drmIoctl(bufmgr_gem->fd, 903 DRM_IOCTL_I915_GEM_USERPTR, 904 &userptr); 905 if (ret != 0) { 906 DBG("bo_create_userptr: " 907 "ioctl failed with user ptr %p size 0x%lx, " 908 "user flags 0x%lx\n", addr, size, flags); 909 free(bo_gem); 910 return NULL; 911 } 912 913 bo_gem->gem_handle = userptr.handle; 914 bo_gem->bo.handle = bo_gem->gem_handle; 915 bo_gem->bo.bufmgr = bufmgr; 916 bo_gem->is_userptr = true; 917 bo_gem->bo.virtual = addr; 918 /* Save the address provided by user */ 919 bo_gem->user_virtual = addr; 920 bo_gem->tiling_mode = I915_TILING_NONE; 921 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 922 bo_gem->stride = 0; 923 924 DRMINITLISTHEAD(&bo_gem->name_list); 925 DRMINITLISTHEAD(&bo_gem->vma_list); 926 927 bo_gem->name = name; 928 atomic_set(&bo_gem->refcount, 1); 929 bo_gem->validate_index = -1; 930 bo_gem->reloc_tree_fences = 0; 931 bo_gem->used_as_reloc_target = false; 932 bo_gem->has_error = false; 933 bo_gem->reusable = false; 934 935 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 936 937 DBG("bo_create_userptr: " 938 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", 939 addr, bo_gem->gem_handle, bo_gem->name, 940 size, stride, tiling_mode); 941 942 return &bo_gem->bo; 943} 944 945static bool 946has_userptr(drm_intel_bufmgr_gem *bufmgr_gem) 947{ 948 int ret; 949 void *ptr; 950 long pgsz; 951 struct drm_i915_gem_userptr userptr; 952 953 pgsz = sysconf(_SC_PAGESIZE); 954 assert(pgsz > 0); 955 956 ret = posix_memalign(&ptr, pgsz, pgsz); 957 if (ret) { 958 DBG("Failed to get a page (%ld) for userptr detection!\n", 959 pgsz); 960 return false; 961 } 962 963 memclear(userptr); 964 userptr.user_ptr = (__u64)(unsigned long)ptr; 965 userptr.user_size = pgsz; 966 967retry: 968 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); 969 if (ret) { 970 if (errno == ENODEV && userptr.flags == 0) { 971 userptr.flags = I915_USERPTR_UNSYNCHRONIZED; 972 goto retry; 973 } 974 free(ptr); 975 return false; 976 } 977 978 /* We don't release the userptr bo here as we want to keep the 979 * kernel mm tracking alive for our lifetime. The first time we 980 * create a userptr object the kernel has to install a mmu_notifer 981 * which is a heavyweight operation (e.g. it requires taking all 982 * mm_locks and stop_machine()). 983 */ 984 985 bufmgr_gem->userptr_active.ptr = ptr; 986 bufmgr_gem->userptr_active.handle = userptr.handle; 987 988 return true; 989} 990 991static drm_intel_bo * 992check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 993 const char *name, 994 void *addr, 995 uint32_t tiling_mode, 996 uint32_t stride, 997 unsigned long size, 998 unsigned long flags) 999{ 1000 if (has_userptr((drm_intel_bufmgr_gem *)bufmgr)) 1001 bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr; 1002 else 1003 bufmgr->bo_alloc_userptr = NULL; 1004 1005 return drm_intel_bo_alloc_userptr(bufmgr, name, addr, 1006 tiling_mode, stride, size, flags); 1007} 1008 1009/** 1010 * Returns a drm_intel_bo wrapping the given buffer object handle. 1011 * 1012 * This can be used when one application needs to pass a buffer object 1013 * to another. 1014 */ 1015drm_intel_bo * 1016drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 1017 const char *name, 1018 unsigned int handle) 1019{ 1020 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1021 drm_intel_bo_gem *bo_gem; 1022 int ret; 1023 struct drm_gem_open open_arg; 1024 struct drm_i915_gem_get_tiling get_tiling; 1025 drmMMListHead *list; 1026 1027 /* At the moment most applications only have a few named bo. 1028 * For instance, in a DRI client only the render buffers passed 1029 * between X and the client are named. And since X returns the 1030 * alternating names for the front/back buffer a linear search 1031 * provides a sufficiently fast match. 1032 */ 1033 pthread_mutex_lock(&bufmgr_gem->lock); 1034 for (list = bufmgr_gem->named.next; 1035 list != &bufmgr_gem->named; 1036 list = list->next) { 1037 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 1038 if (bo_gem->global_name == handle) { 1039 drm_intel_gem_bo_reference(&bo_gem->bo); 1040 pthread_mutex_unlock(&bufmgr_gem->lock); 1041 return &bo_gem->bo; 1042 } 1043 } 1044 1045 memclear(open_arg); 1046 open_arg.name = handle; 1047 ret = drmIoctl(bufmgr_gem->fd, 1048 DRM_IOCTL_GEM_OPEN, 1049 &open_arg); 1050 if (ret != 0) { 1051 DBG("Couldn't reference %s handle 0x%08x: %s\n", 1052 name, handle, strerror(errno)); 1053 pthread_mutex_unlock(&bufmgr_gem->lock); 1054 return NULL; 1055 } 1056 /* Now see if someone has used a prime handle to get this 1057 * object from the kernel before by looking through the list 1058 * again for a matching gem_handle 1059 */ 1060 for (list = bufmgr_gem->named.next; 1061 list != &bufmgr_gem->named; 1062 list = list->next) { 1063 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 1064 if (bo_gem->gem_handle == open_arg.handle) { 1065 drm_intel_gem_bo_reference(&bo_gem->bo); 1066 pthread_mutex_unlock(&bufmgr_gem->lock); 1067 return &bo_gem->bo; 1068 } 1069 } 1070 1071 bo_gem = calloc(1, sizeof(*bo_gem)); 1072 if (!bo_gem) { 1073 pthread_mutex_unlock(&bufmgr_gem->lock); 1074 return NULL; 1075 } 1076 1077 bo_gem->bo.size = open_arg.size; 1078 bo_gem->bo.offset = 0; 1079 bo_gem->bo.offset64 = 0; 1080 bo_gem->bo.virtual = NULL; 1081 bo_gem->bo.bufmgr = bufmgr; 1082 bo_gem->name = name; 1083 atomic_set(&bo_gem->refcount, 1); 1084 bo_gem->validate_index = -1; 1085 bo_gem->gem_handle = open_arg.handle; 1086 bo_gem->bo.handle = open_arg.handle; 1087 bo_gem->global_name = handle; 1088 bo_gem->reusable = false; 1089 1090 memclear(get_tiling); 1091 get_tiling.handle = bo_gem->gem_handle; 1092 ret = drmIoctl(bufmgr_gem->fd, 1093 DRM_IOCTL_I915_GEM_GET_TILING, 1094 &get_tiling); 1095 if (ret != 0) { 1096 drm_intel_gem_bo_unreference(&bo_gem->bo); 1097 pthread_mutex_unlock(&bufmgr_gem->lock); 1098 return NULL; 1099 } 1100 bo_gem->tiling_mode = get_tiling.tiling_mode; 1101 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 1102 /* XXX stride is unknown */ 1103 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 1104 1105 DRMINITLISTHEAD(&bo_gem->vma_list); 1106 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 1107 pthread_mutex_unlock(&bufmgr_gem->lock); 1108 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 1109 1110 return &bo_gem->bo; 1111} 1112 1113static void 1114drm_intel_gem_bo_free(drm_intel_bo *bo) 1115{ 1116 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1117 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1118 struct drm_gem_close close; 1119 int ret; 1120 1121 DRMLISTDEL(&bo_gem->vma_list); 1122 if (bo_gem->mem_virtual) { 1123 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 1124 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1125 bufmgr_gem->vma_count--; 1126 } 1127 if (bo_gem->gtt_virtual) { 1128 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1129 bufmgr_gem->vma_count--; 1130 } 1131 1132 /* Close this object */ 1133 memclear(close); 1134 close.handle = bo_gem->gem_handle; 1135 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 1136 if (ret != 0) { 1137 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 1138 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 1139 } 1140 free(bo_gem->aub_annotations); 1141 free(bo); 1142} 1143 1144static void 1145drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 1146{ 1147#if HAVE_VALGRIND 1148 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1149 1150 if (bo_gem->mem_virtual) 1151 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 1152 1153 if (bo_gem->gtt_virtual) 1154 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 1155#endif 1156} 1157 1158/** Frees all cached buffers significantly older than @time. */ 1159static void 1160drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 1161{ 1162 int i; 1163 1164 if (bufmgr_gem->time == time) 1165 return; 1166 1167 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1168 struct drm_intel_gem_bo_bucket *bucket = 1169 &bufmgr_gem->cache_bucket[i]; 1170 1171 while (!DRMLISTEMPTY(&bucket->head)) { 1172 drm_intel_bo_gem *bo_gem; 1173 1174 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1175 bucket->head.next, head); 1176 if (time - bo_gem->free_time <= 1) 1177 break; 1178 1179 DRMLISTDEL(&bo_gem->head); 1180 1181 drm_intel_gem_bo_free(&bo_gem->bo); 1182 } 1183 } 1184 1185 bufmgr_gem->time = time; 1186} 1187 1188static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 1189{ 1190 int limit; 1191 1192 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 1193 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 1194 1195 if (bufmgr_gem->vma_max < 0) 1196 return; 1197 1198 /* We may need to evict a few entries in order to create new mmaps */ 1199 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 1200 if (limit < 0) 1201 limit = 0; 1202 1203 while (bufmgr_gem->vma_count > limit) { 1204 drm_intel_bo_gem *bo_gem; 1205 1206 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1207 bufmgr_gem->vma_cache.next, 1208 vma_list); 1209 assert(bo_gem->map_count == 0); 1210 DRMLISTDELINIT(&bo_gem->vma_list); 1211 1212 if (bo_gem->mem_virtual) { 1213 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1214 bo_gem->mem_virtual = NULL; 1215 bufmgr_gem->vma_count--; 1216 } 1217 if (bo_gem->gtt_virtual) { 1218 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1219 bo_gem->gtt_virtual = NULL; 1220 bufmgr_gem->vma_count--; 1221 } 1222 } 1223} 1224 1225static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1226 drm_intel_bo_gem *bo_gem) 1227{ 1228 bufmgr_gem->vma_open--; 1229 DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 1230 if (bo_gem->mem_virtual) 1231 bufmgr_gem->vma_count++; 1232 if (bo_gem->gtt_virtual) 1233 bufmgr_gem->vma_count++; 1234 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1235} 1236 1237static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1238 drm_intel_bo_gem *bo_gem) 1239{ 1240 bufmgr_gem->vma_open++; 1241 DRMLISTDEL(&bo_gem->vma_list); 1242 if (bo_gem->mem_virtual) 1243 bufmgr_gem->vma_count--; 1244 if (bo_gem->gtt_virtual) 1245 bufmgr_gem->vma_count--; 1246 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1247} 1248 1249static void 1250drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 1251{ 1252 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1253 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1254 struct drm_intel_gem_bo_bucket *bucket; 1255 int i; 1256 1257 /* Unreference all the target buffers */ 1258 for (i = 0; i < bo_gem->reloc_count; i++) { 1259 if (bo_gem->reloc_target_info[i].bo != bo) { 1260 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1261 reloc_target_info[i].bo, 1262 time); 1263 } 1264 } 1265 bo_gem->reloc_count = 0; 1266 bo_gem->used_as_reloc_target = false; 1267 1268 DBG("bo_unreference final: %d (%s)\n", 1269 bo_gem->gem_handle, bo_gem->name); 1270 1271 /* release memory associated with this object */ 1272 if (bo_gem->reloc_target_info) { 1273 free(bo_gem->reloc_target_info); 1274 bo_gem->reloc_target_info = NULL; 1275 } 1276 if (bo_gem->relocs) { 1277 free(bo_gem->relocs); 1278 bo_gem->relocs = NULL; 1279 } 1280 1281 /* Clear any left-over mappings */ 1282 if (bo_gem->map_count) { 1283 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 1284 bo_gem->map_count = 0; 1285 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1286 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1287 } 1288 1289 DRMLISTDEL(&bo_gem->name_list); 1290 1291 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 1292 /* Put the buffer into our internal cache for reuse if we can. */ 1293 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 1294 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 1295 I915_MADV_DONTNEED)) { 1296 bo_gem->free_time = time; 1297 1298 bo_gem->name = NULL; 1299 bo_gem->validate_index = -1; 1300 1301 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 1302 } else { 1303 drm_intel_gem_bo_free(bo); 1304 } 1305} 1306 1307static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 1308 time_t time) 1309{ 1310 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1311 1312 assert(atomic_read(&bo_gem->refcount) > 0); 1313 if (atomic_dec_and_test(&bo_gem->refcount)) 1314 drm_intel_gem_bo_unreference_final(bo, time); 1315} 1316 1317static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 1318{ 1319 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1320 1321 assert(atomic_read(&bo_gem->refcount) > 0); 1322 1323 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) { 1324 drm_intel_bufmgr_gem *bufmgr_gem = 1325 (drm_intel_bufmgr_gem *) bo->bufmgr; 1326 struct timespec time; 1327 1328 clock_gettime(CLOCK_MONOTONIC, &time); 1329 1330 pthread_mutex_lock(&bufmgr_gem->lock); 1331 1332 if (atomic_dec_and_test(&bo_gem->refcount)) { 1333 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 1334 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1335 } 1336 1337 pthread_mutex_unlock(&bufmgr_gem->lock); 1338 } 1339} 1340 1341static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 1342{ 1343 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1344 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1345 struct drm_i915_gem_set_domain set_domain; 1346 int ret; 1347 1348 if (bo_gem->is_userptr) { 1349 /* Return the same user ptr */ 1350 bo->virtual = bo_gem->user_virtual; 1351 return 0; 1352 } 1353 1354 pthread_mutex_lock(&bufmgr_gem->lock); 1355 1356 if (bo_gem->map_count++ == 0) 1357 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1358 1359 if (!bo_gem->mem_virtual) { 1360 struct drm_i915_gem_mmap mmap_arg; 1361 1362 DBG("bo_map: %d (%s), map_count=%d\n", 1363 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1364 1365 memclear(mmap_arg); 1366 mmap_arg.handle = bo_gem->gem_handle; 1367 mmap_arg.size = bo->size; 1368 ret = drmIoctl(bufmgr_gem->fd, 1369 DRM_IOCTL_I915_GEM_MMAP, 1370 &mmap_arg); 1371 if (ret != 0) { 1372 ret = -errno; 1373 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1374 __FILE__, __LINE__, bo_gem->gem_handle, 1375 bo_gem->name, strerror(errno)); 1376 if (--bo_gem->map_count == 0) 1377 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1378 pthread_mutex_unlock(&bufmgr_gem->lock); 1379 return ret; 1380 } 1381 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 1382 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 1383 } 1384 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1385 bo_gem->mem_virtual); 1386 bo->virtual = bo_gem->mem_virtual; 1387 1388 memclear(set_domain); 1389 set_domain.handle = bo_gem->gem_handle; 1390 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 1391 if (write_enable) 1392 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 1393 else 1394 set_domain.write_domain = 0; 1395 ret = drmIoctl(bufmgr_gem->fd, 1396 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1397 &set_domain); 1398 if (ret != 0) { 1399 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1400 __FILE__, __LINE__, bo_gem->gem_handle, 1401 strerror(errno)); 1402 } 1403 1404 if (write_enable) 1405 bo_gem->mapped_cpu_write = true; 1406 1407 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1408 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 1409 pthread_mutex_unlock(&bufmgr_gem->lock); 1410 1411 return 0; 1412} 1413 1414static int 1415map_gtt(drm_intel_bo *bo) 1416{ 1417 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1418 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1419 int ret; 1420 1421 if (bo_gem->is_userptr) 1422 return -EINVAL; 1423 1424 if (bo_gem->map_count++ == 0) 1425 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1426 1427 /* Get a mapping of the buffer if we haven't before. */ 1428 if (bo_gem->gtt_virtual == NULL) { 1429 struct drm_i915_gem_mmap_gtt mmap_arg; 1430 1431 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 1432 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1433 1434 memclear(mmap_arg); 1435 mmap_arg.handle = bo_gem->gem_handle; 1436 1437 /* Get the fake offset back... */ 1438 ret = drmIoctl(bufmgr_gem->fd, 1439 DRM_IOCTL_I915_GEM_MMAP_GTT, 1440 &mmap_arg); 1441 if (ret != 0) { 1442 ret = -errno; 1443 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1444 __FILE__, __LINE__, 1445 bo_gem->gem_handle, bo_gem->name, 1446 strerror(errno)); 1447 if (--bo_gem->map_count == 0) 1448 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1449 return ret; 1450 } 1451 1452 /* and mmap it */ 1453 ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size, 1454 &bo_gem->gtt_virtual); 1455 if (ret) { 1456 bo_gem->gtt_virtual = NULL; 1457 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1458 __FILE__, __LINE__, 1459 bo_gem->gem_handle, bo_gem->name, 1460 strerror(errno)); 1461 if (--bo_gem->map_count == 0) 1462 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1463 return ret; 1464 } 1465 } 1466 1467 bo->virtual = bo_gem->gtt_virtual; 1468 1469 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1470 bo_gem->gtt_virtual); 1471 1472 return 0; 1473} 1474 1475int 1476drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1477{ 1478 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1479 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1480 struct drm_i915_gem_set_domain set_domain; 1481 int ret; 1482 1483 pthread_mutex_lock(&bufmgr_gem->lock); 1484 1485 ret = map_gtt(bo); 1486 if (ret) { 1487 pthread_mutex_unlock(&bufmgr_gem->lock); 1488 return ret; 1489 } 1490 1491 /* Now move it to the GTT domain so that the GPU and CPU 1492 * caches are flushed and the GPU isn't actively using the 1493 * buffer. 1494 * 1495 * The pagefault handler does this domain change for us when 1496 * it has unbound the BO from the GTT, but it's up to us to 1497 * tell it when we're about to use things if we had done 1498 * rendering and it still happens to be bound to the GTT. 1499 */ 1500 memclear(set_domain); 1501 set_domain.handle = bo_gem->gem_handle; 1502 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1503 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1504 ret = drmIoctl(bufmgr_gem->fd, 1505 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1506 &set_domain); 1507 if (ret != 0) { 1508 DBG("%s:%d: Error setting domain %d: %s\n", 1509 __FILE__, __LINE__, bo_gem->gem_handle, 1510 strerror(errno)); 1511 } 1512 1513 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1514 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1515 pthread_mutex_unlock(&bufmgr_gem->lock); 1516 1517 return 0; 1518} 1519 1520/** 1521 * Performs a mapping of the buffer object like the normal GTT 1522 * mapping, but avoids waiting for the GPU to be done reading from or 1523 * rendering to the buffer. 1524 * 1525 * This is used in the implementation of GL_ARB_map_buffer_range: The 1526 * user asks to create a buffer, then does a mapping, fills some 1527 * space, runs a drawing command, then asks to map it again without 1528 * synchronizing because it guarantees that it won't write over the 1529 * data that the GPU is busy using (or, more specifically, that if it 1530 * does write over the data, it acknowledges that rendering is 1531 * undefined). 1532 */ 1533 1534int 1535drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 1536{ 1537 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1538#ifdef HAVE_VALGRIND 1539 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1540#endif 1541 int ret; 1542 1543 /* If the CPU cache isn't coherent with the GTT, then use a 1544 * regular synchronized mapping. The problem is that we don't 1545 * track where the buffer was last used on the CPU side in 1546 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 1547 * we would potentially corrupt the buffer even when the user 1548 * does reasonable things. 1549 */ 1550 if (!bufmgr_gem->has_llc) 1551 return drm_intel_gem_bo_map_gtt(bo); 1552 1553 pthread_mutex_lock(&bufmgr_gem->lock); 1554 1555 ret = map_gtt(bo); 1556 if (ret == 0) { 1557 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1558 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1559 } 1560 1561 pthread_mutex_unlock(&bufmgr_gem->lock); 1562 1563 return ret; 1564} 1565 1566static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1567{ 1568 drm_intel_bufmgr_gem *bufmgr_gem; 1569 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1570 int ret = 0; 1571 1572 if (bo == NULL) 1573 return 0; 1574 1575 if (bo_gem->is_userptr) 1576 return 0; 1577 1578 bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1579 1580 pthread_mutex_lock(&bufmgr_gem->lock); 1581 1582 if (bo_gem->map_count <= 0) { 1583 DBG("attempted to unmap an unmapped bo\n"); 1584 pthread_mutex_unlock(&bufmgr_gem->lock); 1585 /* Preserve the old behaviour of just treating this as a 1586 * no-op rather than reporting the error. 1587 */ 1588 return 0; 1589 } 1590 1591 if (bo_gem->mapped_cpu_write) { 1592 struct drm_i915_gem_sw_finish sw_finish; 1593 1594 /* Cause a flush to happen if the buffer's pinned for 1595 * scanout, so the results show up in a timely manner. 1596 * Unlike GTT set domains, this only does work if the 1597 * buffer should be scanout-related. 1598 */ 1599 memclear(sw_finish); 1600 sw_finish.handle = bo_gem->gem_handle; 1601 ret = drmIoctl(bufmgr_gem->fd, 1602 DRM_IOCTL_I915_GEM_SW_FINISH, 1603 &sw_finish); 1604 ret = ret == -1 ? -errno : 0; 1605 1606 bo_gem->mapped_cpu_write = false; 1607 } 1608 1609 /* We need to unmap after every innovation as we cannot track 1610 * an open vma for every bo as that will exhaasut the system 1611 * limits and cause later failures. 1612 */ 1613 if (--bo_gem->map_count == 0) { 1614 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1615 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1616 bo->virtual = NULL; 1617 } 1618 pthread_mutex_unlock(&bufmgr_gem->lock); 1619 1620 return ret; 1621} 1622 1623int 1624drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1625{ 1626 return drm_intel_gem_bo_unmap(bo); 1627} 1628 1629static int 1630drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1631 unsigned long size, const void *data) 1632{ 1633 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1634 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1635 struct drm_i915_gem_pwrite pwrite; 1636 int ret; 1637 1638 if (bo_gem->is_userptr) 1639 return -EINVAL; 1640 1641 memclear(pwrite); 1642 pwrite.handle = bo_gem->gem_handle; 1643 pwrite.offset = offset; 1644 pwrite.size = size; 1645 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1646 ret = drmIoctl(bufmgr_gem->fd, 1647 DRM_IOCTL_I915_GEM_PWRITE, 1648 &pwrite); 1649 if (ret != 0) { 1650 ret = -errno; 1651 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1652 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1653 (int)size, strerror(errno)); 1654 } 1655 1656 return ret; 1657} 1658 1659static int 1660drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1661{ 1662 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1663 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1664 int ret; 1665 1666 memclear(get_pipe_from_crtc_id); 1667 get_pipe_from_crtc_id.crtc_id = crtc_id; 1668 ret = drmIoctl(bufmgr_gem->fd, 1669 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1670 &get_pipe_from_crtc_id); 1671 if (ret != 0) { 1672 /* We return -1 here to signal that we don't 1673 * know which pipe is associated with this crtc. 1674 * This lets the caller know that this information 1675 * isn't available; using the wrong pipe for 1676 * vblank waiting can cause the chipset to lock up 1677 */ 1678 return -1; 1679 } 1680 1681 return get_pipe_from_crtc_id.pipe; 1682} 1683 1684static int 1685drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1686 unsigned long size, void *data) 1687{ 1688 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1689 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1690 struct drm_i915_gem_pread pread; 1691 int ret; 1692 1693 if (bo_gem->is_userptr) 1694 return -EINVAL; 1695 1696 memclear(pread); 1697 pread.handle = bo_gem->gem_handle; 1698 pread.offset = offset; 1699 pread.size = size; 1700 pread.data_ptr = (uint64_t) (uintptr_t) data; 1701 ret = drmIoctl(bufmgr_gem->fd, 1702 DRM_IOCTL_I915_GEM_PREAD, 1703 &pread); 1704 if (ret != 0) { 1705 ret = -errno; 1706 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1707 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1708 (int)size, strerror(errno)); 1709 } 1710 1711 return ret; 1712} 1713 1714/** Waits for all GPU rendering with the object to have completed. */ 1715static void 1716drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1717{ 1718 drm_intel_gem_bo_start_gtt_access(bo, 1); 1719} 1720 1721/** 1722 * Waits on a BO for the given amount of time. 1723 * 1724 * @bo: buffer object to wait for 1725 * @timeout_ns: amount of time to wait in nanoseconds. 1726 * If value is less than 0, an infinite wait will occur. 1727 * 1728 * Returns 0 if the wait was successful ie. the last batch referencing the 1729 * object has completed within the allotted time. Otherwise some negative return 1730 * value describes the error. Of particular interest is -ETIME when the wait has 1731 * failed to yield the desired result. 1732 * 1733 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 1734 * the operation to give up after a certain amount of time. Another subtle 1735 * difference is the internal locking semantics are different (this variant does 1736 * not hold the lock for the duration of the wait). This makes the wait subject 1737 * to a larger userspace race window. 1738 * 1739 * The implementation shall wait until the object is no longer actively 1740 * referenced within a batch buffer at the time of the call. The wait will 1741 * not guarantee that the buffer is re-issued via another thread, or an flinked 1742 * handle. Userspace must make sure this race does not occur if such precision 1743 * is important. 1744 * 1745 * Note that some kernels have broken the inifite wait for negative values 1746 * promise, upgrade to latest stable kernels if this is the case. 1747 */ 1748int 1749drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 1750{ 1751 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1752 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1753 struct drm_i915_gem_wait wait; 1754 int ret; 1755 1756 if (!bufmgr_gem->has_wait_timeout) { 1757 DBG("%s:%d: Timed wait is not supported. Falling back to " 1758 "infinite wait\n", __FILE__, __LINE__); 1759 if (timeout_ns) { 1760 drm_intel_gem_bo_wait_rendering(bo); 1761 return 0; 1762 } else { 1763 return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 1764 } 1765 } 1766 1767 memclear(wait); 1768 wait.bo_handle = bo_gem->gem_handle; 1769 wait.timeout_ns = timeout_ns; 1770 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1771 if (ret == -1) 1772 return -errno; 1773 1774 return ret; 1775} 1776 1777/** 1778 * Sets the object to the GTT read and possibly write domain, used by the X 1779 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1780 * 1781 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1782 * can do tiled pixmaps this way. 1783 */ 1784void 1785drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1786{ 1787 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1788 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1789 struct drm_i915_gem_set_domain set_domain; 1790 int ret; 1791 1792 memclear(set_domain); 1793 set_domain.handle = bo_gem->gem_handle; 1794 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1795 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1796 ret = drmIoctl(bufmgr_gem->fd, 1797 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1798 &set_domain); 1799 if (ret != 0) { 1800 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1801 __FILE__, __LINE__, bo_gem->gem_handle, 1802 set_domain.read_domains, set_domain.write_domain, 1803 strerror(errno)); 1804 } 1805} 1806 1807static void 1808drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1809{ 1810 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1811 struct drm_gem_close close_bo; 1812 int i, ret; 1813 1814 free(bufmgr_gem->exec2_objects); 1815 free(bufmgr_gem->exec_objects); 1816 free(bufmgr_gem->exec_bos); 1817 free(bufmgr_gem->aub_filename); 1818 1819 pthread_mutex_destroy(&bufmgr_gem->lock); 1820 1821 /* Free any cached buffer objects we were going to reuse */ 1822 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1823 struct drm_intel_gem_bo_bucket *bucket = 1824 &bufmgr_gem->cache_bucket[i]; 1825 drm_intel_bo_gem *bo_gem; 1826 1827 while (!DRMLISTEMPTY(&bucket->head)) { 1828 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1829 bucket->head.next, head); 1830 DRMLISTDEL(&bo_gem->head); 1831 1832 drm_intel_gem_bo_free(&bo_gem->bo); 1833 } 1834 } 1835 1836 /* Release userptr bo kept hanging around for optimisation. */ 1837 if (bufmgr_gem->userptr_active.ptr) { 1838 memclear(close_bo); 1839 close_bo.handle = bufmgr_gem->userptr_active.handle; 1840 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); 1841 free(bufmgr_gem->userptr_active.ptr); 1842 if (ret) 1843 fprintf(stderr, 1844 "Failed to release test userptr object! (%d) " 1845 "i915 kernel driver may not be sane!\n", errno); 1846 } 1847 1848 free(bufmgr); 1849} 1850 1851/** 1852 * Adds the target buffer to the validation list and adds the relocation 1853 * to the reloc_buffer's relocation list. 1854 * 1855 * The relocation entry at the given offset must already contain the 1856 * precomputed relocation value, because the kernel will optimize out 1857 * the relocation entry write when the buffer hasn't moved from the 1858 * last known offset in target_bo. 1859 */ 1860static int 1861do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1862 drm_intel_bo *target_bo, uint32_t target_offset, 1863 uint32_t read_domains, uint32_t write_domain, 1864 bool need_fence) 1865{ 1866 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1867 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1868 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1869 bool fenced_command; 1870 1871 if (bo_gem->has_error) 1872 return -ENOMEM; 1873 1874 if (target_bo_gem->has_error) { 1875 bo_gem->has_error = true; 1876 return -ENOMEM; 1877 } 1878 1879 /* We never use HW fences for rendering on 965+ */ 1880 if (bufmgr_gem->gen >= 4) 1881 need_fence = false; 1882 1883 fenced_command = need_fence; 1884 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1885 need_fence = false; 1886 1887 /* Create a new relocation list if needed */ 1888 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1889 return -ENOMEM; 1890 1891 /* Check overflow */ 1892 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1893 1894 /* Check args */ 1895 assert(offset <= bo->size - 4); 1896 assert((write_domain & (write_domain - 1)) == 0); 1897 1898 /* An object needing a fence is a tiled buffer, so it won't have 1899 * relocs to other buffers. 1900 */ 1901 if (need_fence) { 1902 assert(target_bo_gem->reloc_count == 0); 1903 target_bo_gem->reloc_tree_fences = 1; 1904 } 1905 1906 /* Make sure that we're not adding a reloc to something whose size has 1907 * already been accounted for. 1908 */ 1909 assert(!bo_gem->used_as_reloc_target); 1910 if (target_bo_gem != bo_gem) { 1911 target_bo_gem->used_as_reloc_target = true; 1912 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1913 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1914 } 1915 1916 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1917 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1918 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1919 target_bo_gem->gem_handle; 1920 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1921 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1922 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; 1923 1924 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1925 if (target_bo != bo) 1926 drm_intel_gem_bo_reference(target_bo); 1927 if (fenced_command) 1928 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1929 DRM_INTEL_RELOC_FENCE; 1930 else 1931 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1932 1933 bo_gem->reloc_count++; 1934 1935 return 0; 1936} 1937 1938static int 1939drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1940 drm_intel_bo *target_bo, uint32_t target_offset, 1941 uint32_t read_domains, uint32_t write_domain) 1942{ 1943 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1944 1945 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1946 read_domains, write_domain, 1947 !bufmgr_gem->fenced_relocs); 1948} 1949 1950static int 1951drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 1952 drm_intel_bo *target_bo, 1953 uint32_t target_offset, 1954 uint32_t read_domains, uint32_t write_domain) 1955{ 1956 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1957 read_domains, write_domain, true); 1958} 1959 1960int 1961drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 1962{ 1963 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1964 1965 return bo_gem->reloc_count; 1966} 1967 1968/** 1969 * Removes existing relocation entries in the BO after "start". 1970 * 1971 * This allows a user to avoid a two-step process for state setup with 1972 * counting up all the buffer objects and doing a 1973 * drm_intel_bufmgr_check_aperture_space() before emitting any of the 1974 * relocations for the state setup. Instead, save the state of the 1975 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 1976 * state, and then check if it still fits in the aperture. 1977 * 1978 * Any further drm_intel_bufmgr_check_aperture_space() queries 1979 * involving this buffer in the tree are undefined after this call. 1980 */ 1981void 1982drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 1983{ 1984 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1985 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1986 int i; 1987 struct timespec time; 1988 1989 clock_gettime(CLOCK_MONOTONIC, &time); 1990 1991 assert(bo_gem->reloc_count >= start); 1992 1993 /* Unreference the cleared target buffers */ 1994 pthread_mutex_lock(&bufmgr_gem->lock); 1995 1996 for (i = start; i < bo_gem->reloc_count; i++) { 1997 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 1998 if (&target_bo_gem->bo != bo) { 1999 bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 2000 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 2001 time.tv_sec); 2002 } 2003 } 2004 bo_gem->reloc_count = start; 2005 2006 pthread_mutex_unlock(&bufmgr_gem->lock); 2007 2008} 2009 2010/** 2011 * Walk the tree of relocations rooted at BO and accumulate the list of 2012 * validations to be performed and update the relocation buffers with 2013 * index values into the validation list. 2014 */ 2015static void 2016drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 2017{ 2018 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2019 int i; 2020 2021 if (bo_gem->relocs == NULL) 2022 return; 2023 2024 for (i = 0; i < bo_gem->reloc_count; i++) { 2025 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2026 2027 if (target_bo == bo) 2028 continue; 2029 2030 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2031 2032 /* Continue walking the tree depth-first. */ 2033 drm_intel_gem_bo_process_reloc(target_bo); 2034 2035 /* Add the target to the validate list */ 2036 drm_intel_add_validate_buffer(target_bo); 2037 } 2038} 2039 2040static void 2041drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 2042{ 2043 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2044 int i; 2045 2046 if (bo_gem->relocs == NULL) 2047 return; 2048 2049 for (i = 0; i < bo_gem->reloc_count; i++) { 2050 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2051 int need_fence; 2052 2053 if (target_bo == bo) 2054 continue; 2055 2056 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2057 2058 /* Continue walking the tree depth-first. */ 2059 drm_intel_gem_bo_process_reloc2(target_bo); 2060 2061 need_fence = (bo_gem->reloc_target_info[i].flags & 2062 DRM_INTEL_RELOC_FENCE); 2063 2064 /* Add the target to the validate list */ 2065 drm_intel_add_validate_buffer2(target_bo, need_fence); 2066 } 2067} 2068 2069 2070static void 2071drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 2072{ 2073 int i; 2074 2075 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2076 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2077 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2078 2079 /* Update the buffer offset */ 2080 if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { 2081 DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n", 2082 bo_gem->gem_handle, bo_gem->name, 2083 (unsigned long long)bo->offset64, 2084 (unsigned long long)bufmgr_gem->exec_objects[i]. 2085 offset); 2086 bo->offset64 = bufmgr_gem->exec_objects[i].offset; 2087 bo->offset = bufmgr_gem->exec_objects[i].offset; 2088 } 2089 } 2090} 2091 2092static void 2093drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 2094{ 2095 int i; 2096 2097 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2098 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2099 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2100 2101 /* Update the buffer offset */ 2102 if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { 2103 DBG("BO %d (%s) migrated: 0x%08llx -> 0x%08llx\n", 2104 bo_gem->gem_handle, bo_gem->name, 2105 (unsigned long long)bo->offset64, 2106 (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 2107 bo->offset64 = bufmgr_gem->exec2_objects[i].offset; 2108 bo->offset = bufmgr_gem->exec2_objects[i].offset; 2109 } 2110 } 2111} 2112 2113static void 2114aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) 2115{ 2116 fwrite(&data, 1, 4, bufmgr_gem->aub_file); 2117} 2118 2119static void 2120aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) 2121{ 2122 fwrite(data, 1, size, bufmgr_gem->aub_file); 2123} 2124 2125static void 2126aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) 2127{ 2128 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2129 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2130 uint32_t *data; 2131 unsigned int i; 2132 2133 data = malloc(bo->size); 2134 drm_intel_bo_get_subdata(bo, offset, size, data); 2135 2136 /* Easy mode: write out bo with no relocations */ 2137 if (!bo_gem->reloc_count) { 2138 aub_out_data(bufmgr_gem, data, size); 2139 free(data); 2140 return; 2141 } 2142 2143 /* Otherwise, handle the relocations while writing. */ 2144 for (i = 0; i < size / 4; i++) { 2145 int r; 2146 for (r = 0; r < bo_gem->reloc_count; r++) { 2147 struct drm_i915_gem_relocation_entry *reloc; 2148 drm_intel_reloc_target *info; 2149 2150 reloc = &bo_gem->relocs[r]; 2151 info = &bo_gem->reloc_target_info[r]; 2152 2153 if (reloc->offset == offset + i * 4) { 2154 drm_intel_bo_gem *target_gem; 2155 uint32_t val; 2156 2157 target_gem = (drm_intel_bo_gem *)info->bo; 2158 2159 val = reloc->delta; 2160 val += target_gem->aub_offset; 2161 2162 aub_out(bufmgr_gem, val); 2163 data[i] = val; 2164 break; 2165 } 2166 } 2167 if (r == bo_gem->reloc_count) { 2168 /* no relocation, just the data */ 2169 aub_out(bufmgr_gem, data[i]); 2170 } 2171 } 2172 2173 free(data); 2174} 2175 2176static void 2177aub_bo_get_address(drm_intel_bo *bo) 2178{ 2179 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2180 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2181 2182 /* Give the object a graphics address in the AUB file. We 2183 * don't just use the GEM object address because we do AUB 2184 * dumping before execution -- we want to successfully log 2185 * when the hardware might hang, and we might even want to aub 2186 * capture for a driver trying to execute on a different 2187 * generation of hardware by disabling the actual kernel exec 2188 * call. 2189 */ 2190 bo_gem->aub_offset = bufmgr_gem->aub_offset; 2191 bufmgr_gem->aub_offset += bo->size; 2192 /* XXX: Handle aperture overflow. */ 2193 assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); 2194} 2195 2196static void 2197aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 2198 uint32_t offset, uint32_t size) 2199{ 2200 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2201 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2202 2203 aub_out(bufmgr_gem, 2204 CMD_AUB_TRACE_HEADER_BLOCK | 2205 ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 2206 aub_out(bufmgr_gem, 2207 AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); 2208 aub_out(bufmgr_gem, subtype); 2209 aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 2210 aub_out(bufmgr_gem, size); 2211 if (bufmgr_gem->gen >= 8) 2212 aub_out(bufmgr_gem, 0); 2213 aub_write_bo_data(bo, offset, size); 2214} 2215 2216/** 2217 * Break up large objects into multiple writes. Otherwise a 128kb VBO 2218 * would overflow the 16 bits of size field in the packet header and 2219 * everything goes badly after that. 2220 */ 2221static void 2222aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 2223 uint32_t offset, uint32_t size) 2224{ 2225 uint32_t block_size; 2226 uint32_t sub_offset; 2227 2228 for (sub_offset = 0; sub_offset < size; sub_offset += block_size) { 2229 block_size = size - sub_offset; 2230 2231 if (block_size > 8 * 4096) 2232 block_size = 8 * 4096; 2233 2234 aub_write_trace_block(bo, type, subtype, offset + sub_offset, 2235 block_size); 2236 } 2237} 2238 2239static void 2240aub_write_bo(drm_intel_bo *bo) 2241{ 2242 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2243 uint32_t offset = 0; 2244 unsigned i; 2245 2246 aub_bo_get_address(bo); 2247 2248 /* Write out each annotated section separately. */ 2249 for (i = 0; i < bo_gem->aub_annotation_count; ++i) { 2250 drm_intel_aub_annotation *annotation = 2251 &bo_gem->aub_annotations[i]; 2252 uint32_t ending_offset = annotation->ending_offset; 2253 if (ending_offset > bo->size) 2254 ending_offset = bo->size; 2255 if (ending_offset > offset) { 2256 aub_write_large_trace_block(bo, annotation->type, 2257 annotation->subtype, 2258 offset, 2259 ending_offset - offset); 2260 offset = ending_offset; 2261 } 2262 } 2263 2264 /* Write out any remaining unannotated data */ 2265 if (offset < bo->size) { 2266 aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, 2267 offset, bo->size - offset); 2268 } 2269} 2270 2271/* 2272 * Make a ringbuffer on fly and dump it 2273 */ 2274static void 2275aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, 2276 uint32_t batch_buffer, int ring_flag) 2277{ 2278 uint32_t ringbuffer[4096]; 2279 int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ 2280 int ring_count = 0; 2281 2282 if (ring_flag == I915_EXEC_BSD) 2283 ring = AUB_TRACE_TYPE_RING_PRB1; 2284 else if (ring_flag == I915_EXEC_BLT) 2285 ring = AUB_TRACE_TYPE_RING_PRB2; 2286 2287 /* Make a ring buffer to execute our batchbuffer. */ 2288 memset(ringbuffer, 0, sizeof(ringbuffer)); 2289 if (bufmgr_gem->gen >= 8) { 2290 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); 2291 ringbuffer[ring_count++] = batch_buffer; 2292 ringbuffer[ring_count++] = 0; 2293 } else { 2294 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; 2295 ringbuffer[ring_count++] = batch_buffer; 2296 } 2297 2298 /* Write out the ring. This appears to trigger execution of 2299 * the ring in the simulator. 2300 */ 2301 aub_out(bufmgr_gem, 2302 CMD_AUB_TRACE_HEADER_BLOCK | 2303 ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 2304 aub_out(bufmgr_gem, 2305 AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); 2306 aub_out(bufmgr_gem, 0); /* general/surface subtype */ 2307 aub_out(bufmgr_gem, bufmgr_gem->aub_offset); 2308 aub_out(bufmgr_gem, ring_count * 4); 2309 if (bufmgr_gem->gen >= 8) 2310 aub_out(bufmgr_gem, 0); 2311 2312 /* FIXME: Need some flush operations here? */ 2313 aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); 2314 2315 /* Update offset pointer */ 2316 bufmgr_gem->aub_offset += 4096; 2317} 2318 2319void 2320drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 2321 int x1, int y1, int width, int height, 2322 enum aub_dump_bmp_format format, 2323 int pitch, int offset) 2324{ 2325 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2326 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2327 uint32_t cpp; 2328 2329 switch (format) { 2330 case AUB_DUMP_BMP_FORMAT_8BIT: 2331 cpp = 1; 2332 break; 2333 case AUB_DUMP_BMP_FORMAT_ARGB_4444: 2334 cpp = 2; 2335 break; 2336 case AUB_DUMP_BMP_FORMAT_ARGB_0888: 2337 case AUB_DUMP_BMP_FORMAT_ARGB_8888: 2338 cpp = 4; 2339 break; 2340 default: 2341 printf("Unknown AUB dump format %d\n", format); 2342 return; 2343 } 2344 2345 if (!bufmgr_gem->aub_file) 2346 return; 2347 2348 aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); 2349 aub_out(bufmgr_gem, (y1 << 16) | x1); 2350 aub_out(bufmgr_gem, 2351 (format << 24) | 2352 (cpp << 19) | 2353 pitch / 4); 2354 aub_out(bufmgr_gem, (height << 16) | width); 2355 aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 2356 aub_out(bufmgr_gem, 2357 ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | 2358 ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); 2359} 2360 2361static void 2362aub_exec(drm_intel_bo *bo, int ring_flag, int used) 2363{ 2364 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2365 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2366 int i; 2367 bool batch_buffer_needs_annotations; 2368 2369 if (!bufmgr_gem->aub_file) 2370 return; 2371 2372 /* If batch buffer is not annotated, annotate it the best we 2373 * can. 2374 */ 2375 batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0; 2376 if (batch_buffer_needs_annotations) { 2377 drm_intel_aub_annotation annotations[2] = { 2378 { AUB_TRACE_TYPE_BATCH, 0, used }, 2379 { AUB_TRACE_TYPE_NOTYPE, 0, bo->size } 2380 }; 2381 drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2); 2382 } 2383 2384 /* Write out all buffers to AUB memory */ 2385 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2386 aub_write_bo(bufmgr_gem->exec_bos[i]); 2387 } 2388 2389 /* Remove any annotations we added */ 2390 if (batch_buffer_needs_annotations) 2391 drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0); 2392 2393 /* Dump ring buffer */ 2394 aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); 2395 2396 fflush(bufmgr_gem->aub_file); 2397 2398 /* 2399 * One frame has been dumped. So reset the aub_offset for the next frame. 2400 * 2401 * FIXME: Can we do this? 2402 */ 2403 bufmgr_gem->aub_offset = 0x10000; 2404} 2405 2406static int 2407drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 2408 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 2409{ 2410 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2411 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2412 struct drm_i915_gem_execbuffer execbuf; 2413 int ret, i; 2414 2415 if (bo_gem->has_error) 2416 return -ENOMEM; 2417 2418 pthread_mutex_lock(&bufmgr_gem->lock); 2419 /* Update indices and set up the validate list. */ 2420 drm_intel_gem_bo_process_reloc(bo); 2421 2422 /* Add the batch buffer to the validation list. There are no 2423 * relocations pointing to it. 2424 */ 2425 drm_intel_add_validate_buffer(bo); 2426 2427 memclear(execbuf); 2428 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 2429 execbuf.buffer_count = bufmgr_gem->exec_count; 2430 execbuf.batch_start_offset = 0; 2431 execbuf.batch_len = used; 2432 execbuf.cliprects_ptr = (uintptr_t) cliprects; 2433 execbuf.num_cliprects = num_cliprects; 2434 execbuf.DR1 = 0; 2435 execbuf.DR4 = DR4; 2436 2437 ret = drmIoctl(bufmgr_gem->fd, 2438 DRM_IOCTL_I915_GEM_EXECBUFFER, 2439 &execbuf); 2440 if (ret != 0) { 2441 ret = -errno; 2442 if (errno == ENOSPC) { 2443 DBG("Execbuffer fails to pin. " 2444 "Estimate: %u. Actual: %u. Available: %u\n", 2445 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2446 bufmgr_gem-> 2447 exec_count), 2448 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2449 bufmgr_gem-> 2450 exec_count), 2451 (unsigned int)bufmgr_gem->gtt_size); 2452 } 2453 } 2454 drm_intel_update_buffer_offsets(bufmgr_gem); 2455 2456 if (bufmgr_gem->bufmgr.debug) 2457 drm_intel_gem_dump_validation_list(bufmgr_gem); 2458 2459 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2460 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2461 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2462 2463 bo_gem->idle = false; 2464 2465 /* Disconnect the buffer from the validate list */ 2466 bo_gem->validate_index = -1; 2467 bufmgr_gem->exec_bos[i] = NULL; 2468 } 2469 bufmgr_gem->exec_count = 0; 2470 pthread_mutex_unlock(&bufmgr_gem->lock); 2471 2472 return ret; 2473} 2474 2475static int 2476do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 2477 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2478 unsigned int flags) 2479{ 2480 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2481 struct drm_i915_gem_execbuffer2 execbuf; 2482 int ret = 0; 2483 int i; 2484 2485 switch (flags & 0x7) { 2486 default: 2487 return -EINVAL; 2488 case I915_EXEC_BLT: 2489 if (!bufmgr_gem->has_blt) 2490 return -EINVAL; 2491 break; 2492 case I915_EXEC_BSD: 2493 if (!bufmgr_gem->has_bsd) 2494 return -EINVAL; 2495 break; 2496 case I915_EXEC_VEBOX: 2497 if (!bufmgr_gem->has_vebox) 2498 return -EINVAL; 2499 break; 2500 case I915_EXEC_RENDER: 2501 case I915_EXEC_DEFAULT: 2502 break; 2503 } 2504 2505 pthread_mutex_lock(&bufmgr_gem->lock); 2506 /* Update indices and set up the validate list. */ 2507 drm_intel_gem_bo_process_reloc2(bo); 2508 2509 /* Add the batch buffer to the validation list. There are no relocations 2510 * pointing to it. 2511 */ 2512 drm_intel_add_validate_buffer2(bo, 0); 2513 2514 memclear(execbuf); 2515 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 2516 execbuf.buffer_count = bufmgr_gem->exec_count; 2517 execbuf.batch_start_offset = 0; 2518 execbuf.batch_len = used; 2519 execbuf.cliprects_ptr = (uintptr_t)cliprects; 2520 execbuf.num_cliprects = num_cliprects; 2521 execbuf.DR1 = 0; 2522 execbuf.DR4 = DR4; 2523 execbuf.flags = flags; 2524 if (ctx == NULL) 2525 i915_execbuffer2_set_context_id(execbuf, 0); 2526 else 2527 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 2528 execbuf.rsvd2 = 0; 2529 2530 aub_exec(bo, flags, used); 2531 2532 if (bufmgr_gem->no_exec) 2533 goto skip_execution; 2534 2535 ret = drmIoctl(bufmgr_gem->fd, 2536 DRM_IOCTL_I915_GEM_EXECBUFFER2, 2537 &execbuf); 2538 if (ret != 0) { 2539 ret = -errno; 2540 if (ret == -ENOSPC) { 2541 DBG("Execbuffer fails to pin. " 2542 "Estimate: %u. Actual: %u. Available: %u\n", 2543 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2544 bufmgr_gem->exec_count), 2545 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2546 bufmgr_gem->exec_count), 2547 (unsigned int) bufmgr_gem->gtt_size); 2548 } 2549 } 2550 drm_intel_update_buffer_offsets2(bufmgr_gem); 2551 2552skip_execution: 2553 if (bufmgr_gem->bufmgr.debug) 2554 drm_intel_gem_dump_validation_list(bufmgr_gem); 2555 2556 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2557 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2558 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2559 2560 bo_gem->idle = false; 2561 2562 /* Disconnect the buffer from the validate list */ 2563 bo_gem->validate_index = -1; 2564 bufmgr_gem->exec_bos[i] = NULL; 2565 } 2566 bufmgr_gem->exec_count = 0; 2567 pthread_mutex_unlock(&bufmgr_gem->lock); 2568 2569 return ret; 2570} 2571 2572static int 2573drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2574 drm_clip_rect_t *cliprects, int num_cliprects, 2575 int DR4) 2576{ 2577 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2578 I915_EXEC_RENDER); 2579} 2580 2581static int 2582drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 2583 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2584 unsigned int flags) 2585{ 2586 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2587 flags); 2588} 2589 2590int 2591drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 2592 int used, unsigned int flags) 2593{ 2594 return do_exec2(bo, used, ctx, NULL, 0, 0, flags); 2595} 2596 2597static int 2598drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 2599{ 2600 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2601 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2602 struct drm_i915_gem_pin pin; 2603 int ret; 2604 2605 memclear(pin); 2606 pin.handle = bo_gem->gem_handle; 2607 pin.alignment = alignment; 2608 2609 ret = drmIoctl(bufmgr_gem->fd, 2610 DRM_IOCTL_I915_GEM_PIN, 2611 &pin); 2612 if (ret != 0) 2613 return -errno; 2614 2615 bo->offset64 = pin.offset; 2616 bo->offset = pin.offset; 2617 return 0; 2618} 2619 2620static int 2621drm_intel_gem_bo_unpin(drm_intel_bo *bo) 2622{ 2623 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2624 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2625 struct drm_i915_gem_unpin unpin; 2626 int ret; 2627 2628 memclear(unpin); 2629 unpin.handle = bo_gem->gem_handle; 2630 2631 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 2632 if (ret != 0) 2633 return -errno; 2634 2635 return 0; 2636} 2637 2638static int 2639drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2640 uint32_t tiling_mode, 2641 uint32_t stride) 2642{ 2643 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2644 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2645 struct drm_i915_gem_set_tiling set_tiling; 2646 int ret; 2647 2648 if (bo_gem->global_name == 0 && 2649 tiling_mode == bo_gem->tiling_mode && 2650 stride == bo_gem->stride) 2651 return 0; 2652 2653 memset(&set_tiling, 0, sizeof(set_tiling)); 2654 do { 2655 /* set_tiling is slightly broken and overwrites the 2656 * input on the error path, so we have to open code 2657 * rmIoctl. 2658 */ 2659 set_tiling.handle = bo_gem->gem_handle; 2660 set_tiling.tiling_mode = tiling_mode; 2661 set_tiling.stride = stride; 2662 2663 ret = ioctl(bufmgr_gem->fd, 2664 DRM_IOCTL_I915_GEM_SET_TILING, 2665 &set_tiling); 2666 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 2667 if (ret == -1) 2668 return -errno; 2669 2670 bo_gem->tiling_mode = set_tiling.tiling_mode; 2671 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 2672 bo_gem->stride = set_tiling.stride; 2673 return 0; 2674} 2675 2676static int 2677drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2678 uint32_t stride) 2679{ 2680 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2681 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2682 int ret; 2683 2684 /* Tiling with userptr surfaces is not supported 2685 * on all hardware so refuse it for time being. 2686 */ 2687 if (bo_gem->is_userptr) 2688 return -EINVAL; 2689 2690 /* Linear buffers have no stride. By ensuring that we only ever use 2691 * stride 0 with linear buffers, we simplify our code. 2692 */ 2693 if (*tiling_mode == I915_TILING_NONE) 2694 stride = 0; 2695 2696 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 2697 if (ret == 0) 2698 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 2699 2700 *tiling_mode = bo_gem->tiling_mode; 2701 return ret; 2702} 2703 2704static int 2705drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2706 uint32_t * swizzle_mode) 2707{ 2708 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2709 2710 *tiling_mode = bo_gem->tiling_mode; 2711 *swizzle_mode = bo_gem->swizzle_mode; 2712 return 0; 2713} 2714 2715drm_intel_bo * 2716drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 2717{ 2718 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2719 int ret; 2720 uint32_t handle; 2721 drm_intel_bo_gem *bo_gem; 2722 struct drm_i915_gem_get_tiling get_tiling; 2723 drmMMListHead *list; 2724 2725 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 2726 2727 /* 2728 * See if the kernel has already returned this buffer to us. Just as 2729 * for named buffers, we must not create two bo's pointing at the same 2730 * kernel object 2731 */ 2732 pthread_mutex_lock(&bufmgr_gem->lock); 2733 for (list = bufmgr_gem->named.next; 2734 list != &bufmgr_gem->named; 2735 list = list->next) { 2736 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 2737 if (bo_gem->gem_handle == handle) { 2738 drm_intel_gem_bo_reference(&bo_gem->bo); 2739 pthread_mutex_unlock(&bufmgr_gem->lock); 2740 return &bo_gem->bo; 2741 } 2742 } 2743 2744 if (ret) { 2745 fprintf(stderr,"ret is %d %d\n", ret, errno); 2746 pthread_mutex_unlock(&bufmgr_gem->lock); 2747 return NULL; 2748 } 2749 2750 bo_gem = calloc(1, sizeof(*bo_gem)); 2751 if (!bo_gem) { 2752 pthread_mutex_unlock(&bufmgr_gem->lock); 2753 return NULL; 2754 } 2755 /* Determine size of bo. The fd-to-handle ioctl really should 2756 * return the size, but it doesn't. If we have kernel 3.12 or 2757 * later, we can lseek on the prime fd to get the size. Older 2758 * kernels will just fail, in which case we fall back to the 2759 * provided (estimated or guess size). */ 2760 ret = lseek(prime_fd, 0, SEEK_END); 2761 if (ret != -1) 2762 bo_gem->bo.size = ret; 2763 else 2764 bo_gem->bo.size = size; 2765 2766 bo_gem->bo.handle = handle; 2767 bo_gem->bo.bufmgr = bufmgr; 2768 2769 bo_gem->gem_handle = handle; 2770 2771 atomic_set(&bo_gem->refcount, 1); 2772 2773 bo_gem->name = "prime"; 2774 bo_gem->validate_index = -1; 2775 bo_gem->reloc_tree_fences = 0; 2776 bo_gem->used_as_reloc_target = false; 2777 bo_gem->has_error = false; 2778 bo_gem->reusable = false; 2779 2780 DRMINITLISTHEAD(&bo_gem->vma_list); 2781 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2782 pthread_mutex_unlock(&bufmgr_gem->lock); 2783 2784 memclear(get_tiling); 2785 get_tiling.handle = bo_gem->gem_handle; 2786 ret = drmIoctl(bufmgr_gem->fd, 2787 DRM_IOCTL_I915_GEM_GET_TILING, 2788 &get_tiling); 2789 if (ret != 0) { 2790 drm_intel_gem_bo_unreference(&bo_gem->bo); 2791 return NULL; 2792 } 2793 bo_gem->tiling_mode = get_tiling.tiling_mode; 2794 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 2795 /* XXX stride is unknown */ 2796 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 2797 2798 return &bo_gem->bo; 2799} 2800 2801int 2802drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 2803{ 2804 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2805 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2806 2807 pthread_mutex_lock(&bufmgr_gem->lock); 2808 if (DRMLISTEMPTY(&bo_gem->name_list)) 2809 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2810 pthread_mutex_unlock(&bufmgr_gem->lock); 2811 2812 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 2813 DRM_CLOEXEC, prime_fd) != 0) 2814 return -errno; 2815 2816 bo_gem->reusable = false; 2817 2818 return 0; 2819} 2820 2821static int 2822drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 2823{ 2824 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2825 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2826 int ret; 2827 2828 if (!bo_gem->global_name) { 2829 struct drm_gem_flink flink; 2830 2831 memclear(flink); 2832 flink.handle = bo_gem->gem_handle; 2833 2834 pthread_mutex_lock(&bufmgr_gem->lock); 2835 2836 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 2837 if (ret != 0) { 2838 pthread_mutex_unlock(&bufmgr_gem->lock); 2839 return -errno; 2840 } 2841 2842 bo_gem->global_name = flink.name; 2843 bo_gem->reusable = false; 2844 2845 if (DRMLISTEMPTY(&bo_gem->name_list)) 2846 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2847 pthread_mutex_unlock(&bufmgr_gem->lock); 2848 } 2849 2850 *name = bo_gem->global_name; 2851 return 0; 2852} 2853 2854/** 2855 * Enables unlimited caching of buffer objects for reuse. 2856 * 2857 * This is potentially very memory expensive, as the cache at each bucket 2858 * size is only bounded by how many buffers of that size we've managed to have 2859 * in flight at once. 2860 */ 2861void 2862drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 2863{ 2864 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2865 2866 bufmgr_gem->bo_reuse = true; 2867} 2868 2869/** 2870 * Enable use of fenced reloc type. 2871 * 2872 * New code should enable this to avoid unnecessary fence register 2873 * allocation. If this option is not enabled, all relocs will have fence 2874 * register allocated. 2875 */ 2876void 2877drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 2878{ 2879 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2880 2881 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 2882 bufmgr_gem->fenced_relocs = true; 2883} 2884 2885/** 2886 * Return the additional aperture space required by the tree of buffer objects 2887 * rooted at bo. 2888 */ 2889static int 2890drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 2891{ 2892 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2893 int i; 2894 int total = 0; 2895 2896 if (bo == NULL || bo_gem->included_in_check_aperture) 2897 return 0; 2898 2899 total += bo->size; 2900 bo_gem->included_in_check_aperture = true; 2901 2902 for (i = 0; i < bo_gem->reloc_count; i++) 2903 total += 2904 drm_intel_gem_bo_get_aperture_space(bo_gem-> 2905 reloc_target_info[i].bo); 2906 2907 return total; 2908} 2909 2910/** 2911 * Count the number of buffers in this list that need a fence reg 2912 * 2913 * If the count is greater than the number of available regs, we'll have 2914 * to ask the caller to resubmit a batch with fewer tiled buffers. 2915 * 2916 * This function over-counts if the same buffer is used multiple times. 2917 */ 2918static unsigned int 2919drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 2920{ 2921 int i; 2922 unsigned int total = 0; 2923 2924 for (i = 0; i < count; i++) { 2925 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2926 2927 if (bo_gem == NULL) 2928 continue; 2929 2930 total += bo_gem->reloc_tree_fences; 2931 } 2932 return total; 2933} 2934 2935/** 2936 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 2937 * for the next drm_intel_bufmgr_check_aperture_space() call. 2938 */ 2939static void 2940drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 2941{ 2942 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2943 int i; 2944 2945 if (bo == NULL || !bo_gem->included_in_check_aperture) 2946 return; 2947 2948 bo_gem->included_in_check_aperture = false; 2949 2950 for (i = 0; i < bo_gem->reloc_count; i++) 2951 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 2952 reloc_target_info[i].bo); 2953} 2954 2955/** 2956 * Return a conservative estimate for the amount of aperture required 2957 * for a collection of buffers. This may double-count some buffers. 2958 */ 2959static unsigned int 2960drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 2961{ 2962 int i; 2963 unsigned int total = 0; 2964 2965 for (i = 0; i < count; i++) { 2966 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2967 if (bo_gem != NULL) 2968 total += bo_gem->reloc_tree_size; 2969 } 2970 return total; 2971} 2972 2973/** 2974 * Return the amount of aperture needed for a collection of buffers. 2975 * This avoids double counting any buffers, at the cost of looking 2976 * at every buffer in the set. 2977 */ 2978static unsigned int 2979drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 2980{ 2981 int i; 2982 unsigned int total = 0; 2983 2984 for (i = 0; i < count; i++) { 2985 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 2986 /* For the first buffer object in the array, we get an 2987 * accurate count back for its reloc_tree size (since nothing 2988 * had been flagged as being counted yet). We can save that 2989 * value out as a more conservative reloc_tree_size that 2990 * avoids double-counting target buffers. Since the first 2991 * buffer happens to usually be the batch buffer in our 2992 * callers, this can pull us back from doing the tree 2993 * walk on every new batch emit. 2994 */ 2995 if (i == 0) { 2996 drm_intel_bo_gem *bo_gem = 2997 (drm_intel_bo_gem *) bo_array[i]; 2998 bo_gem->reloc_tree_size = total; 2999 } 3000 } 3001 3002 for (i = 0; i < count; i++) 3003 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 3004 return total; 3005} 3006 3007/** 3008 * Return -1 if the batchbuffer should be flushed before attempting to 3009 * emit rendering referencing the buffers pointed to by bo_array. 3010 * 3011 * This is required because if we try to emit a batchbuffer with relocations 3012 * to a tree of buffers that won't simultaneously fit in the aperture, 3013 * the rendering will return an error at a point where the software is not 3014 * prepared to recover from it. 3015 * 3016 * However, we also want to emit the batchbuffer significantly before we reach 3017 * the limit, as a series of batchbuffers each of which references buffers 3018 * covering almost all of the aperture means that at each emit we end up 3019 * waiting to evict a buffer from the last rendering, and we get synchronous 3020 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 3021 * get better parallelism. 3022 */ 3023static int 3024drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 3025{ 3026 drm_intel_bufmgr_gem *bufmgr_gem = 3027 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 3028 unsigned int total = 0; 3029 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 3030 int total_fences; 3031 3032 /* Check for fence reg constraints if necessary */ 3033 if (bufmgr_gem->available_fences) { 3034 total_fences = drm_intel_gem_total_fences(bo_array, count); 3035 if (total_fences > bufmgr_gem->available_fences) 3036 return -ENOSPC; 3037 } 3038 3039 total = drm_intel_gem_estimate_batch_space(bo_array, count); 3040 3041 if (total > threshold) 3042 total = drm_intel_gem_compute_batch_space(bo_array, count); 3043 3044 if (total > threshold) { 3045 DBG("check_space: overflowed available aperture, " 3046 "%dkb vs %dkb\n", 3047 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 3048 return -ENOSPC; 3049 } else { 3050 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 3051 (int)bufmgr_gem->gtt_size / 1024); 3052 return 0; 3053 } 3054} 3055 3056/* 3057 * Disable buffer reuse for objects which are shared with the kernel 3058 * as scanout buffers 3059 */ 3060static int 3061drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 3062{ 3063 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3064 3065 bo_gem->reusable = false; 3066 return 0; 3067} 3068 3069static int 3070drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 3071{ 3072 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3073 3074 return bo_gem->reusable; 3075} 3076 3077static int 3078_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 3079{ 3080 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3081 int i; 3082 3083 for (i = 0; i < bo_gem->reloc_count; i++) { 3084 if (bo_gem->reloc_target_info[i].bo == target_bo) 3085 return 1; 3086 if (bo == bo_gem->reloc_target_info[i].bo) 3087 continue; 3088 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 3089 target_bo)) 3090 return 1; 3091 } 3092 3093 return 0; 3094} 3095 3096/** Return true if target_bo is referenced by bo's relocation tree. */ 3097static int 3098drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 3099{ 3100 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 3101 3102 if (bo == NULL || target_bo == NULL) 3103 return 0; 3104 if (target_bo_gem->used_as_reloc_target) 3105 return _drm_intel_gem_bo_references(bo, target_bo); 3106 return 0; 3107} 3108 3109static void 3110add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 3111{ 3112 unsigned int i = bufmgr_gem->num_buckets; 3113 3114 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 3115 3116 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 3117 bufmgr_gem->cache_bucket[i].size = size; 3118 bufmgr_gem->num_buckets++; 3119} 3120 3121static void 3122init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 3123{ 3124 unsigned long size, cache_max_size = 64 * 1024 * 1024; 3125 3126 /* OK, so power of two buckets was too wasteful of memory. 3127 * Give 3 other sizes between each power of two, to hopefully 3128 * cover things accurately enough. (The alternative is 3129 * probably to just go for exact matching of sizes, and assume 3130 * that for things like composited window resize the tiled 3131 * width/height alignment and rounding of sizes to pages will 3132 * get us useful cache hit rates anyway) 3133 */ 3134 add_bucket(bufmgr_gem, 4096); 3135 add_bucket(bufmgr_gem, 4096 * 2); 3136 add_bucket(bufmgr_gem, 4096 * 3); 3137 3138 /* Initialize the linked lists for BO reuse cache. */ 3139 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 3140 add_bucket(bufmgr_gem, size); 3141 3142 add_bucket(bufmgr_gem, size + size * 1 / 4); 3143 add_bucket(bufmgr_gem, size + size * 2 / 4); 3144 add_bucket(bufmgr_gem, size + size * 3 / 4); 3145 } 3146} 3147 3148void 3149drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 3150{ 3151 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3152 3153 bufmgr_gem->vma_max = limit; 3154 3155 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 3156} 3157 3158/** 3159 * Get the PCI ID for the device. This can be overridden by setting the 3160 * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 3161 */ 3162static int 3163get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 3164{ 3165 char *devid_override; 3166 int devid = 0; 3167 int ret; 3168 drm_i915_getparam_t gp; 3169 3170 if (geteuid() == getuid()) { 3171 devid_override = getenv("INTEL_DEVID_OVERRIDE"); 3172 if (devid_override) { 3173 bufmgr_gem->no_exec = true; 3174 return strtod(devid_override, NULL); 3175 } 3176 } 3177 3178 memclear(gp); 3179 gp.param = I915_PARAM_CHIPSET_ID; 3180 gp.value = &devid; 3181 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3182 if (ret) { 3183 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 3184 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 3185 } 3186 return devid; 3187} 3188 3189int 3190drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 3191{ 3192 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3193 3194 return bufmgr_gem->pci_device; 3195} 3196 3197/** 3198 * Sets the AUB filename. 3199 * 3200 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 3201 * for it to have any effect. 3202 */ 3203void 3204drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 3205 const char *filename) 3206{ 3207 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3208 3209 free(bufmgr_gem->aub_filename); 3210 if (filename) 3211 bufmgr_gem->aub_filename = strdup(filename); 3212} 3213 3214/** 3215 * Sets up AUB dumping. 3216 * 3217 * This is a trace file format that can be used with the simulator. 3218 * Packets are emitted in a format somewhat like GPU command packets. 3219 * You can set up a GTT and upload your objects into the referenced 3220 * space, then send off batchbuffers and get BMPs out the other end. 3221 */ 3222void 3223drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 3224{ 3225 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3226 int entry = 0x200003; 3227 int i; 3228 int gtt_size = 0x10000; 3229 const char *filename; 3230 3231 if (!enable) { 3232 if (bufmgr_gem->aub_file) { 3233 fclose(bufmgr_gem->aub_file); 3234 bufmgr_gem->aub_file = NULL; 3235 } 3236 return; 3237 } 3238 3239 if (geteuid() != getuid()) 3240 return; 3241 3242 if (bufmgr_gem->aub_filename) 3243 filename = bufmgr_gem->aub_filename; 3244 else 3245 filename = "intel.aub"; 3246 bufmgr_gem->aub_file = fopen(filename, "w+"); 3247 if (!bufmgr_gem->aub_file) 3248 return; 3249 3250 /* Start allocating objects from just after the GTT. */ 3251 bufmgr_gem->aub_offset = gtt_size; 3252 3253 /* Start with a (required) version packet. */ 3254 aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); 3255 aub_out(bufmgr_gem, 3256 (4 << AUB_HEADER_MAJOR_SHIFT) | 3257 (0 << AUB_HEADER_MINOR_SHIFT)); 3258 for (i = 0; i < 8; i++) { 3259 aub_out(bufmgr_gem, 0); /* app name */ 3260 } 3261 aub_out(bufmgr_gem, 0); /* timestamp */ 3262 aub_out(bufmgr_gem, 0); /* timestamp */ 3263 aub_out(bufmgr_gem, 0); /* comment len */ 3264 3265 /* Set up the GTT. The max we can handle is 256M */ 3266 aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 3267 /* Need to use GTT_ENTRY type for recent emulator */ 3268 aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT_ENTRY | 0 | AUB_TRACE_OP_DATA_WRITE); 3269 aub_out(bufmgr_gem, 0); /* subtype */ 3270 aub_out(bufmgr_gem, 0); /* offset */ 3271 aub_out(bufmgr_gem, gtt_size); /* size */ 3272 if (bufmgr_gem->gen >= 8) 3273 aub_out(bufmgr_gem, 0); 3274 for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { 3275 aub_out(bufmgr_gem, entry); 3276 } 3277} 3278 3279drm_intel_context * 3280drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 3281{ 3282 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3283 struct drm_i915_gem_context_create create; 3284 drm_intel_context *context = NULL; 3285 int ret; 3286 3287 context = calloc(1, sizeof(*context)); 3288 if (!context) 3289 return NULL; 3290 3291 memclear(create); 3292 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 3293 if (ret != 0) { 3294 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 3295 strerror(errno)); 3296 free(context); 3297 return NULL; 3298 } 3299 3300 context->ctx_id = create.ctx_id; 3301 context->bufmgr = bufmgr; 3302 3303 return context; 3304} 3305 3306void 3307drm_intel_gem_context_destroy(drm_intel_context *ctx) 3308{ 3309 drm_intel_bufmgr_gem *bufmgr_gem; 3310 struct drm_i915_gem_context_destroy destroy; 3311 int ret; 3312 3313 if (ctx == NULL) 3314 return; 3315 3316 memclear(destroy); 3317 3318 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3319 destroy.ctx_id = ctx->ctx_id; 3320 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 3321 &destroy); 3322 if (ret != 0) 3323 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 3324 strerror(errno)); 3325 3326 free(ctx); 3327} 3328 3329int 3330drm_intel_get_reset_stats(drm_intel_context *ctx, 3331 uint32_t *reset_count, 3332 uint32_t *active, 3333 uint32_t *pending) 3334{ 3335 drm_intel_bufmgr_gem *bufmgr_gem; 3336 struct drm_i915_reset_stats stats; 3337 int ret; 3338 3339 if (ctx == NULL) 3340 return -EINVAL; 3341 3342 memclear(stats); 3343 3344 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3345 stats.ctx_id = ctx->ctx_id; 3346 ret = drmIoctl(bufmgr_gem->fd, 3347 DRM_IOCTL_I915_GET_RESET_STATS, 3348 &stats); 3349 if (ret == 0) { 3350 if (reset_count != NULL) 3351 *reset_count = stats.reset_count; 3352 3353 if (active != NULL) 3354 *active = stats.batch_active; 3355 3356 if (pending != NULL) 3357 *pending = stats.batch_pending; 3358 } 3359 3360 return ret; 3361} 3362 3363int 3364drm_intel_reg_read(drm_intel_bufmgr *bufmgr, 3365 uint32_t offset, 3366 uint64_t *result) 3367{ 3368 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3369 struct drm_i915_reg_read reg_read; 3370 int ret; 3371 3372 memclear(reg_read); 3373 reg_read.offset = offset; 3374 3375 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 3376 3377 *result = reg_read.val; 3378 return ret; 3379} 3380 3381int 3382drm_intel_get_subslice_total(int fd, unsigned int *subslice_total) 3383{ 3384 drm_i915_getparam_t gp; 3385 int ret; 3386 3387 memclear(gp); 3388 gp.value = (int*)subslice_total; 3389 gp.param = I915_PARAM_SUBSLICE_TOTAL; 3390 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3391 if (ret) 3392 return -errno; 3393 3394 return 0; 3395} 3396 3397int 3398drm_intel_get_eu_total(int fd, unsigned int *eu_total) 3399{ 3400 drm_i915_getparam_t gp; 3401 int ret; 3402 3403 memclear(gp); 3404 gp.value = (int*)eu_total; 3405 gp.param = I915_PARAM_EU_TOTAL; 3406 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3407 if (ret) 3408 return -errno; 3409 3410 return 0; 3411} 3412 3413/** 3414 * Annotate the given bo for use in aub dumping. 3415 * 3416 * \param annotations is an array of drm_intel_aub_annotation objects 3417 * describing the type of data in various sections of the bo. Each 3418 * element of the array specifies the type and subtype of a section of 3419 * the bo, and the past-the-end offset of that section. The elements 3420 * of \c annotations must be sorted so that ending_offset is 3421 * increasing. 3422 * 3423 * \param count is the number of elements in the \c annotations array. 3424 * If \c count is zero, then \c annotations will not be dereferenced. 3425 * 3426 * Annotations are copied into a private data structure, so caller may 3427 * re-use the memory pointed to by \c annotations after the call 3428 * returns. 3429 * 3430 * Annotations are stored for the lifetime of the bo; to reset to the 3431 * default state (no annotations), call this function with a \c count 3432 * of zero. 3433 */ 3434void 3435drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 3436 drm_intel_aub_annotation *annotations, 3437 unsigned count) 3438{ 3439 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3440 unsigned size = sizeof(*annotations) * count; 3441 drm_intel_aub_annotation *new_annotations = 3442 count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL; 3443 if (new_annotations == NULL) { 3444 free(bo_gem->aub_annotations); 3445 bo_gem->aub_annotations = NULL; 3446 bo_gem->aub_annotation_count = 0; 3447 return; 3448 } 3449 memcpy(new_annotations, annotations, size); 3450 bo_gem->aub_annotations = new_annotations; 3451 bo_gem->aub_annotation_count = count; 3452} 3453 3454static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; 3455static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list }; 3456 3457static drm_intel_bufmgr_gem * 3458drm_intel_bufmgr_gem_find(int fd) 3459{ 3460 drm_intel_bufmgr_gem *bufmgr_gem; 3461 3462 DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) { 3463 if (bufmgr_gem->fd == fd) { 3464 atomic_inc(&bufmgr_gem->refcount); 3465 return bufmgr_gem; 3466 } 3467 } 3468 3469 return NULL; 3470} 3471 3472static void 3473drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) 3474{ 3475 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3476 3477 if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) { 3478 pthread_mutex_lock(&bufmgr_list_mutex); 3479 3480 if (atomic_dec_and_test(&bufmgr_gem->refcount)) { 3481 DRMLISTDEL(&bufmgr_gem->managers); 3482 drm_intel_bufmgr_gem_destroy(bufmgr); 3483 } 3484 3485 pthread_mutex_unlock(&bufmgr_list_mutex); 3486 } 3487} 3488 3489/** 3490 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 3491 * and manage map buffer objections. 3492 * 3493 * \param fd File descriptor of the opened DRM device. 3494 */ 3495drm_intel_bufmgr * 3496drm_intel_bufmgr_gem_init(int fd, int batch_size) 3497{ 3498 drm_intel_bufmgr_gem *bufmgr_gem; 3499 struct drm_i915_gem_get_aperture aperture; 3500 drm_i915_getparam_t gp; 3501 int ret, tmp; 3502 bool exec2 = false; 3503 3504 pthread_mutex_lock(&bufmgr_list_mutex); 3505 3506 bufmgr_gem = drm_intel_bufmgr_gem_find(fd); 3507 if (bufmgr_gem) 3508 goto exit; 3509 3510 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 3511 if (bufmgr_gem == NULL) 3512 goto exit; 3513 3514 bufmgr_gem->fd = fd; 3515 atomic_set(&bufmgr_gem->refcount, 1); 3516 3517 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 3518 free(bufmgr_gem); 3519 bufmgr_gem = NULL; 3520 goto exit; 3521 } 3522 3523 memclear(aperture); 3524 ret = drmIoctl(bufmgr_gem->fd, 3525 DRM_IOCTL_I915_GEM_GET_APERTURE, 3526 &aperture); 3527 3528 if (ret == 0) 3529 bufmgr_gem->gtt_size = aperture.aper_available_size; 3530 else { 3531 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 3532 strerror(errno)); 3533 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 3534 fprintf(stderr, "Assuming %dkB available aperture size.\n" 3535 "May lead to reduced performance or incorrect " 3536 "rendering.\n", 3537 (int)bufmgr_gem->gtt_size / 1024); 3538 } 3539 3540 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 3541 3542 if (IS_GEN2(bufmgr_gem->pci_device)) 3543 bufmgr_gem->gen = 2; 3544 else if (IS_GEN3(bufmgr_gem->pci_device)) 3545 bufmgr_gem->gen = 3; 3546 else if (IS_GEN4(bufmgr_gem->pci_device)) 3547 bufmgr_gem->gen = 4; 3548 else if (IS_GEN5(bufmgr_gem->pci_device)) 3549 bufmgr_gem->gen = 5; 3550 else if (IS_GEN6(bufmgr_gem->pci_device)) 3551 bufmgr_gem->gen = 6; 3552 else if (IS_GEN7(bufmgr_gem->pci_device)) 3553 bufmgr_gem->gen = 7; 3554 else if (IS_GEN8(bufmgr_gem->pci_device)) 3555 bufmgr_gem->gen = 8; 3556 else if (IS_GEN9(bufmgr_gem->pci_device)) 3557 bufmgr_gem->gen = 9; 3558 else { 3559 free(bufmgr_gem); 3560 bufmgr_gem = NULL; 3561 goto exit; 3562 } 3563 3564 if (IS_GEN3(bufmgr_gem->pci_device) && 3565 bufmgr_gem->gtt_size > 256*1024*1024) { 3566 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 3567 * be used for tiled blits. To simplify the accounting, just 3568 * substract the unmappable part (fixed to 256MB on all known 3569 * gen3 devices) if the kernel advertises it. */ 3570 bufmgr_gem->gtt_size -= 256*1024*1024; 3571 } 3572 3573 memclear(gp); 3574 gp.value = &tmp; 3575 3576 gp.param = I915_PARAM_HAS_EXECBUF2; 3577 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3578 if (!ret) 3579 exec2 = true; 3580 3581 gp.param = I915_PARAM_HAS_BSD; 3582 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3583 bufmgr_gem->has_bsd = ret == 0; 3584 3585 gp.param = I915_PARAM_HAS_BLT; 3586 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3587 bufmgr_gem->has_blt = ret == 0; 3588 3589 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 3590 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3591 bufmgr_gem->has_relaxed_fencing = ret == 0; 3592 3593 bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; 3594 3595 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 3596 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3597 bufmgr_gem->has_wait_timeout = ret == 0; 3598 3599 gp.param = I915_PARAM_HAS_LLC; 3600 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3601 if (ret != 0) { 3602 /* Kernel does not supports HAS_LLC query, fallback to GPU 3603 * generation detection and assume that we have LLC on GEN6/7 3604 */ 3605 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 3606 IS_GEN7(bufmgr_gem->pci_device)); 3607 } else 3608 bufmgr_gem->has_llc = *gp.value; 3609 3610 gp.param = I915_PARAM_HAS_VEBOX; 3611 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3612 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 3613 3614 if (bufmgr_gem->gen < 4) { 3615 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 3616 gp.value = &bufmgr_gem->available_fences; 3617 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3618 if (ret) { 3619 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 3620 errno); 3621 fprintf(stderr, "param: %d, val: %d\n", gp.param, 3622 *gp.value); 3623 bufmgr_gem->available_fences = 0; 3624 } else { 3625 /* XXX The kernel reports the total number of fences, 3626 * including any that may be pinned. 3627 * 3628 * We presume that there will be at least one pinned 3629 * fence for the scanout buffer, but there may be more 3630 * than one scanout and the user may be manually 3631 * pinning buffers. Let's move to execbuffer2 and 3632 * thereby forget the insanity of using fences... 3633 */ 3634 bufmgr_gem->available_fences -= 2; 3635 if (bufmgr_gem->available_fences < 0) 3636 bufmgr_gem->available_fences = 0; 3637 } 3638 } 3639 3640 /* Let's go with one relocation per every 2 dwords (but round down a bit 3641 * since a power of two will mean an extra page allocation for the reloc 3642 * buffer). 3643 * 3644 * Every 4 was too few for the blender benchmark. 3645 */ 3646 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 3647 3648 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 3649 bufmgr_gem->bufmgr.bo_alloc_for_render = 3650 drm_intel_gem_bo_alloc_for_render; 3651 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 3652 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 3653 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 3654 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 3655 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 3656 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 3657 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 3658 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 3659 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 3660 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 3661 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 3662 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 3663 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 3664 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 3665 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 3666 /* Use the new one if available */ 3667 if (exec2) { 3668 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 3669 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3670 } else 3671 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 3672 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 3673 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 3674 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref; 3675 bufmgr_gem->bufmgr.debug = 0; 3676 bufmgr_gem->bufmgr.check_aperture_space = 3677 drm_intel_gem_check_aperture_space; 3678 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3679 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 3680 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 3681 drm_intel_gem_get_pipe_from_crtc_id; 3682 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 3683 3684 DRMINITLISTHEAD(&bufmgr_gem->named); 3685 init_cache_buckets(bufmgr_gem); 3686 3687 DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 3688 bufmgr_gem->vma_max = -1; /* unlimited by default */ 3689 3690 DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); 3691 3692exit: 3693 pthread_mutex_unlock(&bufmgr_list_mutex); 3694 3695 return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL; 3696} 3697