intel_bufmgr_gem.c revision 0655efef
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007-2012 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <stddef.h> 51#include <sys/ioctl.h> 52#include <sys/stat.h> 53#include <sys/types.h> 54#include <stdbool.h> 55 56#include "errno.h" 57#ifndef ETIME 58#define ETIME ETIMEDOUT 59#endif 60#include "libdrm_macros.h" 61#include "libdrm_lists.h" 62#include "intel_bufmgr.h" 63#include "intel_bufmgr_priv.h" 64#include "intel_chipset.h" 65#include "string.h" 66 67#include "i915_drm.h" 68#include "uthash.h" 69 70#ifdef HAVE_VALGRIND 71#include <valgrind.h> 72#include <memcheck.h> 73#define VG(x) x 74#else 75#define VG(x) 76#endif 77 78#define memclear(s) memset(&s, 0, sizeof(s)) 79 80#define DBG(...) do { \ 81 if (bufmgr_gem->bufmgr.debug) \ 82 fprintf(stderr, __VA_ARGS__); \ 83} while (0) 84 85#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 86#define MAX2(A, B) ((A) > (B) ? (A) : (B)) 87 88/** 89 * upper_32_bits - return bits 32-63 of a number 90 * @n: the number we're accessing 91 * 92 * A basic shift-right of a 64- or 32-bit quantity. Use this to suppress 93 * the "right shift count >= width of type" warning when that quantity is 94 * 32-bits. 95 */ 96#define upper_32_bits(n) ((__u32)(((n) >> 16) >> 16)) 97 98/** 99 * lower_32_bits - return bits 0-31 of a number 100 * @n: the number we're accessing 101 */ 102#define lower_32_bits(n) ((__u32)(n)) 103 104typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 105 106struct drm_intel_gem_bo_bucket { 107 drmMMListHead head; 108 unsigned long size; 109}; 110 111typedef struct _drm_intel_bufmgr_gem { 112 drm_intel_bufmgr bufmgr; 113 114 atomic_t refcount; 115 116 int fd; 117 118 int max_relocs; 119 120 pthread_mutex_t lock; 121 122 struct drm_i915_gem_exec_object *exec_objects; 123 struct drm_i915_gem_exec_object2 *exec2_objects; 124 drm_intel_bo **exec_bos; 125 int exec_size; 126 int exec_count; 127 128 /** Array of lists of cached gem objects of power-of-two sizes */ 129 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 130 int num_buckets; 131 time_t time; 132 133 drmMMListHead managers; 134 135 drm_intel_bo_gem *name_table; 136 drm_intel_bo_gem *handle_table; 137 138 drmMMListHead vma_cache; 139 int vma_count, vma_open, vma_max; 140 141 uint64_t gtt_size; 142 int available_fences; 143 int pci_device; 144 int gen; 145 unsigned int has_bsd : 1; 146 unsigned int has_blt : 1; 147 unsigned int has_relaxed_fencing : 1; 148 unsigned int has_llc : 1; 149 unsigned int has_wait_timeout : 1; 150 unsigned int bo_reuse : 1; 151 unsigned int no_exec : 1; 152 unsigned int has_vebox : 1; 153 unsigned int has_exec_async : 1; 154 bool fenced_relocs; 155 156 struct { 157 void *ptr; 158 uint32_t handle; 159 } userptr_active; 160 161} drm_intel_bufmgr_gem; 162 163#define DRM_INTEL_RELOC_FENCE (1<<0) 164 165typedef struct _drm_intel_reloc_target_info { 166 drm_intel_bo *bo; 167 int flags; 168} drm_intel_reloc_target; 169 170struct _drm_intel_bo_gem { 171 drm_intel_bo bo; 172 173 atomic_t refcount; 174 uint32_t gem_handle; 175 const char *name; 176 177 /** 178 * Kenel-assigned global name for this object 179 * 180 * List contains both flink named and prime fd'd objects 181 */ 182 unsigned int global_name; 183 184 UT_hash_handle handle_hh; 185 UT_hash_handle name_hh; 186 187 /** 188 * Index of the buffer within the validation list while preparing a 189 * batchbuffer execution. 190 */ 191 int validate_index; 192 193 /** 194 * Current tiling mode 195 */ 196 uint32_t tiling_mode; 197 uint32_t swizzle_mode; 198 unsigned long stride; 199 200 unsigned long kflags; 201 202 time_t free_time; 203 204 /** Array passed to the DRM containing relocation information. */ 205 struct drm_i915_gem_relocation_entry *relocs; 206 /** 207 * Array of info structs corresponding to relocs[i].target_handle etc 208 */ 209 drm_intel_reloc_target *reloc_target_info; 210 /** Number of entries in relocs */ 211 int reloc_count; 212 /** Array of BOs that are referenced by this buffer and will be softpinned */ 213 drm_intel_bo **softpin_target; 214 /** Number softpinned BOs that are referenced by this buffer */ 215 int softpin_target_count; 216 /** Maximum amount of softpinned BOs that are referenced by this buffer */ 217 int softpin_target_size; 218 219 /** Mapped address for the buffer, saved across map/unmap cycles */ 220 void *mem_virtual; 221 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 222 void *gtt_virtual; 223 /** WC CPU address for the buffer, saved across map/unmap cycles */ 224 void *wc_virtual; 225 /** 226 * Virtual address of the buffer allocated by user, used for userptr 227 * objects only. 228 */ 229 void *user_virtual; 230 int map_count; 231 drmMMListHead vma_list; 232 233 /** BO cache list */ 234 drmMMListHead head; 235 236 /** 237 * Boolean of whether this BO and its children have been included in 238 * the current drm_intel_bufmgr_check_aperture_space() total. 239 */ 240 bool included_in_check_aperture; 241 242 /** 243 * Boolean of whether this buffer has been used as a relocation 244 * target and had its size accounted for, and thus can't have any 245 * further relocations added to it. 246 */ 247 bool used_as_reloc_target; 248 249 /** 250 * Boolean of whether we have encountered an error whilst building the relocation tree. 251 */ 252 bool has_error; 253 254 /** 255 * Boolean of whether this buffer can be re-used 256 */ 257 bool reusable; 258 259 /** 260 * Boolean of whether the GPU is definitely not accessing the buffer. 261 * 262 * This is only valid when reusable, since non-reusable 263 * buffers are those that have been shared with other 264 * processes, so we don't know their state. 265 */ 266 bool idle; 267 268 /** 269 * Boolean of whether this buffer was allocated with userptr 270 */ 271 bool is_userptr; 272 273 /** 274 * Size in bytes of this buffer and its relocation descendents. 275 * 276 * Used to avoid costly tree walking in 277 * drm_intel_bufmgr_check_aperture in the common case. 278 */ 279 int reloc_tree_size; 280 281 /** 282 * Number of potential fence registers required by this buffer and its 283 * relocations. 284 */ 285 int reloc_tree_fences; 286 287 /** Flags that we may need to do the SW_FINISH ioctl on unmap. */ 288 bool mapped_cpu_write; 289}; 290 291static unsigned int 292drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 293 294static unsigned int 295drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 296 297static int 298drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 299 uint32_t * swizzle_mode); 300 301static int 302drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 303 uint32_t tiling_mode, 304 uint32_t stride); 305 306static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 307 time_t time); 308 309static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 310 311static void drm_intel_gem_bo_free(drm_intel_bo *bo); 312 313static inline drm_intel_bo_gem *to_bo_gem(drm_intel_bo *bo) 314{ 315 return (drm_intel_bo_gem *)bo; 316} 317 318static unsigned long 319drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 320 uint32_t *tiling_mode) 321{ 322 unsigned long min_size, max_size; 323 unsigned long i; 324 325 if (*tiling_mode == I915_TILING_NONE) 326 return size; 327 328 /* 965+ just need multiples of page size for tiling */ 329 if (bufmgr_gem->gen >= 4) 330 return ROUND_UP_TO(size, 4096); 331 332 /* Older chips need powers of two, of at least 512k or 1M */ 333 if (bufmgr_gem->gen == 3) { 334 min_size = 1024*1024; 335 max_size = 128*1024*1024; 336 } else { 337 min_size = 512*1024; 338 max_size = 64*1024*1024; 339 } 340 341 if (size > max_size) { 342 *tiling_mode = I915_TILING_NONE; 343 return size; 344 } 345 346 /* Do we need to allocate every page for the fence? */ 347 if (bufmgr_gem->has_relaxed_fencing) 348 return ROUND_UP_TO(size, 4096); 349 350 for (i = min_size; i < size; i <<= 1) 351 ; 352 353 return i; 354} 355 356/* 357 * Round a given pitch up to the minimum required for X tiling on a 358 * given chip. We use 512 as the minimum to allow for a later tiling 359 * change. 360 */ 361static unsigned long 362drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 363 unsigned long pitch, uint32_t *tiling_mode) 364{ 365 unsigned long tile_width; 366 unsigned long i; 367 368 /* If untiled, then just align it so that we can do rendering 369 * to it with the 3D engine. 370 */ 371 if (*tiling_mode == I915_TILING_NONE) 372 return ALIGN(pitch, 64); 373 374 if (*tiling_mode == I915_TILING_X 375 || (IS_915(bufmgr_gem->pci_device) 376 && *tiling_mode == I915_TILING_Y)) 377 tile_width = 512; 378 else 379 tile_width = 128; 380 381 /* 965 is flexible */ 382 if (bufmgr_gem->gen >= 4) 383 return ROUND_UP_TO(pitch, tile_width); 384 385 /* The older hardware has a maximum pitch of 8192 with tiled 386 * surfaces, so fallback to untiled if it's too large. 387 */ 388 if (pitch > 8192) { 389 *tiling_mode = I915_TILING_NONE; 390 return ALIGN(pitch, 64); 391 } 392 393 /* Pre-965 needs power of two tile width */ 394 for (i = tile_width; i < pitch; i <<= 1) 395 ; 396 397 return i; 398} 399 400static struct drm_intel_gem_bo_bucket * 401drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 402 unsigned long size) 403{ 404 int i; 405 406 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 407 struct drm_intel_gem_bo_bucket *bucket = 408 &bufmgr_gem->cache_bucket[i]; 409 if (bucket->size >= size) { 410 return bucket; 411 } 412 } 413 414 return NULL; 415} 416 417static void 418drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 419{ 420 int i, j; 421 422 for (i = 0; i < bufmgr_gem->exec_count; i++) { 423 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 424 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 425 426 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) { 427 DBG("%2d: %d %s(%s)\n", i, bo_gem->gem_handle, 428 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", 429 bo_gem->name); 430 continue; 431 } 432 433 for (j = 0; j < bo_gem->reloc_count; j++) { 434 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 435 drm_intel_bo_gem *target_gem = 436 (drm_intel_bo_gem *) target_bo; 437 438 DBG("%2d: %d %s(%s)@0x%08x %08x -> " 439 "%d (%s)@0x%08x %08x + 0x%08x\n", 440 i, 441 bo_gem->gem_handle, 442 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", 443 bo_gem->name, 444 upper_32_bits(bo_gem->relocs[j].offset), 445 lower_32_bits(bo_gem->relocs[j].offset), 446 target_gem->gem_handle, 447 target_gem->name, 448 upper_32_bits(target_bo->offset64), 449 lower_32_bits(target_bo->offset64), 450 bo_gem->relocs[j].delta); 451 } 452 453 for (j = 0; j < bo_gem->softpin_target_count; j++) { 454 drm_intel_bo *target_bo = bo_gem->softpin_target[j]; 455 drm_intel_bo_gem *target_gem = 456 (drm_intel_bo_gem *) target_bo; 457 DBG("%2d: %d %s(%s) -> " 458 "%d *(%s)@0x%08x %08x\n", 459 i, 460 bo_gem->gem_handle, 461 bo_gem->kflags & EXEC_OBJECT_PINNED ? "*" : "", 462 bo_gem->name, 463 target_gem->gem_handle, 464 target_gem->name, 465 upper_32_bits(target_bo->offset64), 466 lower_32_bits(target_bo->offset64)); 467 } 468 } 469} 470 471static inline void 472drm_intel_gem_bo_reference(drm_intel_bo *bo) 473{ 474 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 475 476 atomic_inc(&bo_gem->refcount); 477} 478 479/** 480 * Adds the given buffer to the list of buffers to be validated (moved into the 481 * appropriate memory type) with the next batch submission. 482 * 483 * If a buffer is validated multiple times in a batch submission, it ends up 484 * with the intersection of the memory type flags and the union of the 485 * access flags. 486 */ 487static void 488drm_intel_add_validate_buffer(drm_intel_bo *bo) 489{ 490 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 491 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 492 int index; 493 494 if (bo_gem->validate_index != -1) 495 return; 496 497 /* Extend the array of validation entries as necessary. */ 498 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 499 int new_size = bufmgr_gem->exec_size * 2; 500 501 if (new_size == 0) 502 new_size = 5; 503 504 bufmgr_gem->exec_objects = 505 realloc(bufmgr_gem->exec_objects, 506 sizeof(*bufmgr_gem->exec_objects) * new_size); 507 bufmgr_gem->exec_bos = 508 realloc(bufmgr_gem->exec_bos, 509 sizeof(*bufmgr_gem->exec_bos) * new_size); 510 bufmgr_gem->exec_size = new_size; 511 } 512 513 index = bufmgr_gem->exec_count; 514 bo_gem->validate_index = index; 515 /* Fill in array entry */ 516 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 517 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 518 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 519 bufmgr_gem->exec_objects[index].alignment = bo->align; 520 bufmgr_gem->exec_objects[index].offset = 0; 521 bufmgr_gem->exec_bos[index] = bo; 522 bufmgr_gem->exec_count++; 523} 524 525static void 526drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 527{ 528 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 529 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 530 int index; 531 unsigned long flags; 532 533 flags = 0; 534 if (need_fence) 535 flags |= EXEC_OBJECT_NEEDS_FENCE; 536 537 if (bo_gem->validate_index != -1) { 538 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= flags; 539 return; 540 } 541 542 /* Extend the array of validation entries as necessary. */ 543 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 544 int new_size = bufmgr_gem->exec_size * 2; 545 546 if (new_size == 0) 547 new_size = 5; 548 549 bufmgr_gem->exec2_objects = 550 realloc(bufmgr_gem->exec2_objects, 551 sizeof(*bufmgr_gem->exec2_objects) * new_size); 552 bufmgr_gem->exec_bos = 553 realloc(bufmgr_gem->exec_bos, 554 sizeof(*bufmgr_gem->exec_bos) * new_size); 555 bufmgr_gem->exec_size = new_size; 556 } 557 558 index = bufmgr_gem->exec_count; 559 bo_gem->validate_index = index; 560 /* Fill in array entry */ 561 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 562 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 563 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 564 bufmgr_gem->exec2_objects[index].alignment = bo->align; 565 bufmgr_gem->exec2_objects[index].offset = bo->offset64; 566 bufmgr_gem->exec2_objects[index].flags = bo_gem->kflags | flags; 567 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 568 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 569 bufmgr_gem->exec_bos[index] = bo; 570 bufmgr_gem->exec_count++; 571} 572 573#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 574 sizeof(uint32_t)) 575 576static void 577drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 578 drm_intel_bo_gem *bo_gem, 579 unsigned int alignment) 580{ 581 unsigned int size; 582 583 assert(!bo_gem->used_as_reloc_target); 584 585 /* The older chipsets are far-less flexible in terms of tiling, 586 * and require tiled buffer to be size aligned in the aperture. 587 * This means that in the worst possible case we will need a hole 588 * twice as large as the object in order for it to fit into the 589 * aperture. Optimal packing is for wimps. 590 */ 591 size = bo_gem->bo.size; 592 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 593 unsigned int min_size; 594 595 if (bufmgr_gem->has_relaxed_fencing) { 596 if (bufmgr_gem->gen == 3) 597 min_size = 1024*1024; 598 else 599 min_size = 512*1024; 600 601 while (min_size < size) 602 min_size *= 2; 603 } else 604 min_size = size; 605 606 /* Account for worst-case alignment. */ 607 alignment = MAX2(alignment, min_size); 608 } 609 610 bo_gem->reloc_tree_size = size + alignment; 611} 612 613static int 614drm_intel_setup_reloc_list(drm_intel_bo *bo) 615{ 616 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 617 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 618 unsigned int max_relocs = bufmgr_gem->max_relocs; 619 620 if (bo->size / 4 < max_relocs) 621 max_relocs = bo->size / 4; 622 623 bo_gem->relocs = malloc(max_relocs * 624 sizeof(struct drm_i915_gem_relocation_entry)); 625 bo_gem->reloc_target_info = malloc(max_relocs * 626 sizeof(drm_intel_reloc_target)); 627 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 628 bo_gem->has_error = true; 629 630 free (bo_gem->relocs); 631 bo_gem->relocs = NULL; 632 633 free (bo_gem->reloc_target_info); 634 bo_gem->reloc_target_info = NULL; 635 636 return 1; 637 } 638 639 return 0; 640} 641 642static int 643drm_intel_gem_bo_busy(drm_intel_bo *bo) 644{ 645 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 646 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 647 struct drm_i915_gem_busy busy; 648 int ret; 649 650 if (bo_gem->reusable && bo_gem->idle) 651 return false; 652 653 memclear(busy); 654 busy.handle = bo_gem->gem_handle; 655 656 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 657 if (ret == 0) { 658 bo_gem->idle = !busy.busy; 659 return busy.busy; 660 } else { 661 return false; 662 } 663} 664 665static int 666drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 667 drm_intel_bo_gem *bo_gem, int state) 668{ 669 struct drm_i915_gem_madvise madv; 670 671 memclear(madv); 672 madv.handle = bo_gem->gem_handle; 673 madv.madv = state; 674 madv.retained = 1; 675 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 676 677 return madv.retained; 678} 679 680static int 681drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 682{ 683 return drm_intel_gem_bo_madvise_internal 684 ((drm_intel_bufmgr_gem *) bo->bufmgr, 685 (drm_intel_bo_gem *) bo, 686 madv); 687} 688 689/* drop the oldest entries that have been purged by the kernel */ 690static void 691drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 692 struct drm_intel_gem_bo_bucket *bucket) 693{ 694 while (!DRMLISTEMPTY(&bucket->head)) { 695 drm_intel_bo_gem *bo_gem; 696 697 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 698 bucket->head.next, head); 699 if (drm_intel_gem_bo_madvise_internal 700 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 701 break; 702 703 DRMLISTDEL(&bo_gem->head); 704 drm_intel_gem_bo_free(&bo_gem->bo); 705 } 706} 707 708static drm_intel_bo * 709drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 710 const char *name, 711 unsigned long size, 712 unsigned long flags, 713 uint32_t tiling_mode, 714 unsigned long stride, 715 unsigned int alignment) 716{ 717 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 718 drm_intel_bo_gem *bo_gem; 719 unsigned int page_size = getpagesize(); 720 int ret; 721 struct drm_intel_gem_bo_bucket *bucket; 722 bool alloc_from_cache; 723 unsigned long bo_size; 724 bool for_render = false; 725 726 if (flags & BO_ALLOC_FOR_RENDER) 727 for_render = true; 728 729 /* Round the allocated size up to a power of two number of pages. */ 730 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 731 732 /* If we don't have caching at this size, don't actually round the 733 * allocation up. 734 */ 735 if (bucket == NULL) { 736 bo_size = size; 737 if (bo_size < page_size) 738 bo_size = page_size; 739 } else { 740 bo_size = bucket->size; 741 } 742 743 pthread_mutex_lock(&bufmgr_gem->lock); 744 /* Get a buffer out of the cache if available */ 745retry: 746 alloc_from_cache = false; 747 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 748 if (for_render) { 749 /* Allocate new render-target BOs from the tail (MRU) 750 * of the list, as it will likely be hot in the GPU 751 * cache and in the aperture for us. 752 */ 753 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 754 bucket->head.prev, head); 755 DRMLISTDEL(&bo_gem->head); 756 alloc_from_cache = true; 757 bo_gem->bo.align = alignment; 758 } else { 759 assert(alignment == 0); 760 /* For non-render-target BOs (where we're probably 761 * going to map it first thing in order to fill it 762 * with data), check if the last BO in the cache is 763 * unbusy, and only reuse in that case. Otherwise, 764 * allocating a new buffer is probably faster than 765 * waiting for the GPU to finish. 766 */ 767 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 768 bucket->head.next, head); 769 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 770 alloc_from_cache = true; 771 DRMLISTDEL(&bo_gem->head); 772 } 773 } 774 775 if (alloc_from_cache) { 776 if (!drm_intel_gem_bo_madvise_internal 777 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 778 drm_intel_gem_bo_free(&bo_gem->bo); 779 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 780 bucket); 781 goto retry; 782 } 783 784 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 785 tiling_mode, 786 stride)) { 787 drm_intel_gem_bo_free(&bo_gem->bo); 788 goto retry; 789 } 790 } 791 } 792 793 if (!alloc_from_cache) { 794 struct drm_i915_gem_create create; 795 796 bo_gem = calloc(1, sizeof(*bo_gem)); 797 if (!bo_gem) 798 goto err; 799 800 /* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized 801 list (vma_list), so better set the list head here */ 802 DRMINITLISTHEAD(&bo_gem->vma_list); 803 804 bo_gem->bo.size = bo_size; 805 806 memclear(create); 807 create.size = bo_size; 808 809 ret = drmIoctl(bufmgr_gem->fd, 810 DRM_IOCTL_I915_GEM_CREATE, 811 &create); 812 if (ret != 0) { 813 free(bo_gem); 814 goto err; 815 } 816 817 bo_gem->gem_handle = create.handle; 818 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 819 gem_handle, sizeof(bo_gem->gem_handle), 820 bo_gem); 821 822 bo_gem->bo.handle = bo_gem->gem_handle; 823 bo_gem->bo.bufmgr = bufmgr; 824 bo_gem->bo.align = alignment; 825 826 bo_gem->tiling_mode = I915_TILING_NONE; 827 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 828 bo_gem->stride = 0; 829 830 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 831 tiling_mode, 832 stride)) 833 goto err_free; 834 } 835 836 bo_gem->name = name; 837 atomic_set(&bo_gem->refcount, 1); 838 bo_gem->validate_index = -1; 839 bo_gem->reloc_tree_fences = 0; 840 bo_gem->used_as_reloc_target = false; 841 bo_gem->has_error = false; 842 bo_gem->reusable = true; 843 844 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, alignment); 845 pthread_mutex_unlock(&bufmgr_gem->lock); 846 847 DBG("bo_create: buf %d (%s) %ldb\n", 848 bo_gem->gem_handle, bo_gem->name, size); 849 850 return &bo_gem->bo; 851 852err_free: 853 drm_intel_gem_bo_free(&bo_gem->bo); 854err: 855 pthread_mutex_unlock(&bufmgr_gem->lock); 856 return NULL; 857} 858 859static drm_intel_bo * 860drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 861 const char *name, 862 unsigned long size, 863 unsigned int alignment) 864{ 865 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 866 BO_ALLOC_FOR_RENDER, 867 I915_TILING_NONE, 0, 868 alignment); 869} 870 871static drm_intel_bo * 872drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 873 const char *name, 874 unsigned long size, 875 unsigned int alignment) 876{ 877 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 878 I915_TILING_NONE, 0, 0); 879} 880 881static drm_intel_bo * 882drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 883 int x, int y, int cpp, uint32_t *tiling_mode, 884 unsigned long *pitch, unsigned long flags) 885{ 886 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 887 unsigned long size, stride; 888 uint32_t tiling; 889 890 do { 891 unsigned long aligned_y, height_alignment; 892 893 tiling = *tiling_mode; 894 895 /* If we're tiled, our allocations are in 8 or 32-row blocks, 896 * so failure to align our height means that we won't allocate 897 * enough pages. 898 * 899 * If we're untiled, we still have to align to 2 rows high 900 * because the data port accesses 2x2 blocks even if the 901 * bottom row isn't to be rendered, so failure to align means 902 * we could walk off the end of the GTT and fault. This is 903 * documented on 965, and may be the case on older chipsets 904 * too so we try to be careful. 905 */ 906 aligned_y = y; 907 height_alignment = 2; 908 909 if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 910 height_alignment = 16; 911 else if (tiling == I915_TILING_X 912 || (IS_915(bufmgr_gem->pci_device) 913 && tiling == I915_TILING_Y)) 914 height_alignment = 8; 915 else if (tiling == I915_TILING_Y) 916 height_alignment = 32; 917 aligned_y = ALIGN(y, height_alignment); 918 919 stride = x * cpp; 920 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 921 size = stride * aligned_y; 922 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 923 } while (*tiling_mode != tiling); 924 *pitch = stride; 925 926 if (tiling == I915_TILING_NONE) 927 stride = 0; 928 929 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 930 tiling, stride, 0); 931} 932 933static drm_intel_bo * 934drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 935 const char *name, 936 void *addr, 937 uint32_t tiling_mode, 938 uint32_t stride, 939 unsigned long size, 940 unsigned long flags) 941{ 942 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 943 drm_intel_bo_gem *bo_gem; 944 int ret; 945 struct drm_i915_gem_userptr userptr; 946 947 /* Tiling with userptr surfaces is not supported 948 * on all hardware so refuse it for time being. 949 */ 950 if (tiling_mode != I915_TILING_NONE) 951 return NULL; 952 953 bo_gem = calloc(1, sizeof(*bo_gem)); 954 if (!bo_gem) 955 return NULL; 956 957 atomic_set(&bo_gem->refcount, 1); 958 DRMINITLISTHEAD(&bo_gem->vma_list); 959 960 bo_gem->bo.size = size; 961 962 memclear(userptr); 963 userptr.user_ptr = (__u64)((unsigned long)addr); 964 userptr.user_size = size; 965 userptr.flags = flags; 966 967 ret = drmIoctl(bufmgr_gem->fd, 968 DRM_IOCTL_I915_GEM_USERPTR, 969 &userptr); 970 if (ret != 0) { 971 DBG("bo_create_userptr: " 972 "ioctl failed with user ptr %p size 0x%lx, " 973 "user flags 0x%lx\n", addr, size, flags); 974 free(bo_gem); 975 return NULL; 976 } 977 978 pthread_mutex_lock(&bufmgr_gem->lock); 979 980 bo_gem->gem_handle = userptr.handle; 981 bo_gem->bo.handle = bo_gem->gem_handle; 982 bo_gem->bo.bufmgr = bufmgr; 983 bo_gem->is_userptr = true; 984 bo_gem->bo.virtual = addr; 985 /* Save the address provided by user */ 986 bo_gem->user_virtual = addr; 987 bo_gem->tiling_mode = I915_TILING_NONE; 988 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 989 bo_gem->stride = 0; 990 991 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 992 gem_handle, sizeof(bo_gem->gem_handle), 993 bo_gem); 994 995 bo_gem->name = name; 996 bo_gem->validate_index = -1; 997 bo_gem->reloc_tree_fences = 0; 998 bo_gem->used_as_reloc_target = false; 999 bo_gem->has_error = false; 1000 bo_gem->reusable = false; 1001 1002 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 1003 pthread_mutex_unlock(&bufmgr_gem->lock); 1004 1005 DBG("bo_create_userptr: " 1006 "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n", 1007 addr, bo_gem->gem_handle, bo_gem->name, 1008 size, stride, tiling_mode); 1009 1010 return &bo_gem->bo; 1011} 1012 1013static bool 1014has_userptr(drm_intel_bufmgr_gem *bufmgr_gem) 1015{ 1016 int ret; 1017 void *ptr; 1018 long pgsz; 1019 struct drm_i915_gem_userptr userptr; 1020 1021 pgsz = sysconf(_SC_PAGESIZE); 1022 assert(pgsz > 0); 1023 1024 ret = posix_memalign(&ptr, pgsz, pgsz); 1025 if (ret) { 1026 DBG("Failed to get a page (%ld) for userptr detection!\n", 1027 pgsz); 1028 return false; 1029 } 1030 1031 memclear(userptr); 1032 userptr.user_ptr = (__u64)(unsigned long)ptr; 1033 userptr.user_size = pgsz; 1034 1035retry: 1036 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr); 1037 if (ret) { 1038 if (errno == ENODEV && userptr.flags == 0) { 1039 userptr.flags = I915_USERPTR_UNSYNCHRONIZED; 1040 goto retry; 1041 } 1042 free(ptr); 1043 return false; 1044 } 1045 1046 /* We don't release the userptr bo here as we want to keep the 1047 * kernel mm tracking alive for our lifetime. The first time we 1048 * create a userptr object the kernel has to install a mmu_notifer 1049 * which is a heavyweight operation (e.g. it requires taking all 1050 * mm_locks and stop_machine()). 1051 */ 1052 1053 bufmgr_gem->userptr_active.ptr = ptr; 1054 bufmgr_gem->userptr_active.handle = userptr.handle; 1055 1056 return true; 1057} 1058 1059static drm_intel_bo * 1060check_bo_alloc_userptr(drm_intel_bufmgr *bufmgr, 1061 const char *name, 1062 void *addr, 1063 uint32_t tiling_mode, 1064 uint32_t stride, 1065 unsigned long size, 1066 unsigned long flags) 1067{ 1068 if (has_userptr((drm_intel_bufmgr_gem *)bufmgr)) 1069 bufmgr->bo_alloc_userptr = drm_intel_gem_bo_alloc_userptr; 1070 else 1071 bufmgr->bo_alloc_userptr = NULL; 1072 1073 return drm_intel_bo_alloc_userptr(bufmgr, name, addr, 1074 tiling_mode, stride, size, flags); 1075} 1076 1077/** 1078 * Returns a drm_intel_bo wrapping the given buffer object handle. 1079 * 1080 * This can be used when one application needs to pass a buffer object 1081 * to another. 1082 */ 1083drm_intel_bo * 1084drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 1085 const char *name, 1086 unsigned int handle) 1087{ 1088 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1089 drm_intel_bo_gem *bo_gem; 1090 int ret; 1091 struct drm_gem_open open_arg; 1092 struct drm_i915_gem_get_tiling get_tiling; 1093 1094 /* At the moment most applications only have a few named bo. 1095 * For instance, in a DRI client only the render buffers passed 1096 * between X and the client are named. And since X returns the 1097 * alternating names for the front/back buffer a linear search 1098 * provides a sufficiently fast match. 1099 */ 1100 pthread_mutex_lock(&bufmgr_gem->lock); 1101 HASH_FIND(name_hh, bufmgr_gem->name_table, 1102 &handle, sizeof(handle), bo_gem); 1103 if (bo_gem) { 1104 drm_intel_gem_bo_reference(&bo_gem->bo); 1105 goto out; 1106 } 1107 1108 memclear(open_arg); 1109 open_arg.name = handle; 1110 ret = drmIoctl(bufmgr_gem->fd, 1111 DRM_IOCTL_GEM_OPEN, 1112 &open_arg); 1113 if (ret != 0) { 1114 DBG("Couldn't reference %s handle 0x%08x: %s\n", 1115 name, handle, strerror(errno)); 1116 bo_gem = NULL; 1117 goto out; 1118 } 1119 /* Now see if someone has used a prime handle to get this 1120 * object from the kernel before by looking through the list 1121 * again for a matching gem_handle 1122 */ 1123 HASH_FIND(handle_hh, bufmgr_gem->handle_table, 1124 &open_arg.handle, sizeof(open_arg.handle), bo_gem); 1125 if (bo_gem) { 1126 drm_intel_gem_bo_reference(&bo_gem->bo); 1127 goto out; 1128 } 1129 1130 bo_gem = calloc(1, sizeof(*bo_gem)); 1131 if (!bo_gem) 1132 goto out; 1133 1134 atomic_set(&bo_gem->refcount, 1); 1135 DRMINITLISTHEAD(&bo_gem->vma_list); 1136 1137 bo_gem->bo.size = open_arg.size; 1138 bo_gem->bo.offset = 0; 1139 bo_gem->bo.offset64 = 0; 1140 bo_gem->bo.virtual = NULL; 1141 bo_gem->bo.bufmgr = bufmgr; 1142 bo_gem->name = name; 1143 bo_gem->validate_index = -1; 1144 bo_gem->gem_handle = open_arg.handle; 1145 bo_gem->bo.handle = open_arg.handle; 1146 bo_gem->global_name = handle; 1147 bo_gem->reusable = false; 1148 1149 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 1150 gem_handle, sizeof(bo_gem->gem_handle), bo_gem); 1151 HASH_ADD(name_hh, bufmgr_gem->name_table, 1152 global_name, sizeof(bo_gem->global_name), bo_gem); 1153 1154 memclear(get_tiling); 1155 get_tiling.handle = bo_gem->gem_handle; 1156 ret = drmIoctl(bufmgr_gem->fd, 1157 DRM_IOCTL_I915_GEM_GET_TILING, 1158 &get_tiling); 1159 if (ret != 0) 1160 goto err_unref; 1161 1162 bo_gem->tiling_mode = get_tiling.tiling_mode; 1163 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 1164 /* XXX stride is unknown */ 1165 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 1166 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 1167 1168out: 1169 pthread_mutex_unlock(&bufmgr_gem->lock); 1170 return &bo_gem->bo; 1171 1172err_unref: 1173 drm_intel_gem_bo_free(&bo_gem->bo); 1174 pthread_mutex_unlock(&bufmgr_gem->lock); 1175 return NULL; 1176} 1177 1178static void 1179drm_intel_gem_bo_free(drm_intel_bo *bo) 1180{ 1181 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1182 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1183 struct drm_gem_close close; 1184 int ret; 1185 1186 DRMLISTDEL(&bo_gem->vma_list); 1187 if (bo_gem->mem_virtual) { 1188 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 1189 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1190 bufmgr_gem->vma_count--; 1191 } 1192 if (bo_gem->wc_virtual) { 1193 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->wc_virtual, 0)); 1194 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); 1195 bufmgr_gem->vma_count--; 1196 } 1197 if (bo_gem->gtt_virtual) { 1198 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1199 bufmgr_gem->vma_count--; 1200 } 1201 1202 if (bo_gem->global_name) 1203 HASH_DELETE(name_hh, bufmgr_gem->name_table, bo_gem); 1204 HASH_DELETE(handle_hh, bufmgr_gem->handle_table, bo_gem); 1205 1206 /* Close this object */ 1207 memclear(close); 1208 close.handle = bo_gem->gem_handle; 1209 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 1210 if (ret != 0) { 1211 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 1212 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 1213 } 1214 free(bo); 1215} 1216 1217static void 1218drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 1219{ 1220#if HAVE_VALGRIND 1221 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1222 1223 if (bo_gem->mem_virtual) 1224 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 1225 1226 if (bo_gem->wc_virtual) 1227 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->wc_virtual, bo->size); 1228 1229 if (bo_gem->gtt_virtual) 1230 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 1231#endif 1232} 1233 1234/** Frees all cached buffers significantly older than @time. */ 1235static void 1236drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 1237{ 1238 int i; 1239 1240 if (bufmgr_gem->time == time) 1241 return; 1242 1243 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1244 struct drm_intel_gem_bo_bucket *bucket = 1245 &bufmgr_gem->cache_bucket[i]; 1246 1247 while (!DRMLISTEMPTY(&bucket->head)) { 1248 drm_intel_bo_gem *bo_gem; 1249 1250 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1251 bucket->head.next, head); 1252 if (time - bo_gem->free_time <= 1) 1253 break; 1254 1255 DRMLISTDEL(&bo_gem->head); 1256 1257 drm_intel_gem_bo_free(&bo_gem->bo); 1258 } 1259 } 1260 1261 bufmgr_gem->time = time; 1262} 1263 1264static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 1265{ 1266 int limit; 1267 1268 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 1269 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 1270 1271 if (bufmgr_gem->vma_max < 0) 1272 return; 1273 1274 /* We may need to evict a few entries in order to create new mmaps */ 1275 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 1276 if (limit < 0) 1277 limit = 0; 1278 1279 while (bufmgr_gem->vma_count > limit) { 1280 drm_intel_bo_gem *bo_gem; 1281 1282 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1283 bufmgr_gem->vma_cache.next, 1284 vma_list); 1285 assert(bo_gem->map_count == 0); 1286 DRMLISTDELINIT(&bo_gem->vma_list); 1287 1288 if (bo_gem->mem_virtual) { 1289 drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1290 bo_gem->mem_virtual = NULL; 1291 bufmgr_gem->vma_count--; 1292 } 1293 if (bo_gem->wc_virtual) { 1294 drm_munmap(bo_gem->wc_virtual, bo_gem->bo.size); 1295 bo_gem->wc_virtual = NULL; 1296 bufmgr_gem->vma_count--; 1297 } 1298 if (bo_gem->gtt_virtual) { 1299 drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1300 bo_gem->gtt_virtual = NULL; 1301 bufmgr_gem->vma_count--; 1302 } 1303 } 1304} 1305 1306static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1307 drm_intel_bo_gem *bo_gem) 1308{ 1309 bufmgr_gem->vma_open--; 1310 DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 1311 if (bo_gem->mem_virtual) 1312 bufmgr_gem->vma_count++; 1313 if (bo_gem->wc_virtual) 1314 bufmgr_gem->vma_count++; 1315 if (bo_gem->gtt_virtual) 1316 bufmgr_gem->vma_count++; 1317 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1318} 1319 1320static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1321 drm_intel_bo_gem *bo_gem) 1322{ 1323 bufmgr_gem->vma_open++; 1324 DRMLISTDEL(&bo_gem->vma_list); 1325 if (bo_gem->mem_virtual) 1326 bufmgr_gem->vma_count--; 1327 if (bo_gem->wc_virtual) 1328 bufmgr_gem->vma_count--; 1329 if (bo_gem->gtt_virtual) 1330 bufmgr_gem->vma_count--; 1331 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1332} 1333 1334static void 1335drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 1336{ 1337 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1338 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1339 struct drm_intel_gem_bo_bucket *bucket; 1340 int i; 1341 1342 /* Unreference all the target buffers */ 1343 for (i = 0; i < bo_gem->reloc_count; i++) { 1344 if (bo_gem->reloc_target_info[i].bo != bo) { 1345 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1346 reloc_target_info[i].bo, 1347 time); 1348 } 1349 } 1350 for (i = 0; i < bo_gem->softpin_target_count; i++) 1351 drm_intel_gem_bo_unreference_locked_timed(bo_gem->softpin_target[i], 1352 time); 1353 bo_gem->kflags = 0; 1354 bo_gem->reloc_count = 0; 1355 bo_gem->used_as_reloc_target = false; 1356 bo_gem->softpin_target_count = 0; 1357 1358 DBG("bo_unreference final: %d (%s)\n", 1359 bo_gem->gem_handle, bo_gem->name); 1360 1361 /* release memory associated with this object */ 1362 if (bo_gem->reloc_target_info) { 1363 free(bo_gem->reloc_target_info); 1364 bo_gem->reloc_target_info = NULL; 1365 } 1366 if (bo_gem->relocs) { 1367 free(bo_gem->relocs); 1368 bo_gem->relocs = NULL; 1369 } 1370 if (bo_gem->softpin_target) { 1371 free(bo_gem->softpin_target); 1372 bo_gem->softpin_target = NULL; 1373 bo_gem->softpin_target_size = 0; 1374 } 1375 1376 /* Clear any left-over mappings */ 1377 if (bo_gem->map_count) { 1378 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 1379 bo_gem->map_count = 0; 1380 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1381 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1382 } 1383 1384 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 1385 /* Put the buffer into our internal cache for reuse if we can. */ 1386 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 1387 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 1388 I915_MADV_DONTNEED)) { 1389 bo_gem->free_time = time; 1390 1391 bo_gem->name = NULL; 1392 bo_gem->validate_index = -1; 1393 1394 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 1395 } else { 1396 drm_intel_gem_bo_free(bo); 1397 } 1398} 1399 1400static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 1401 time_t time) 1402{ 1403 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1404 1405 assert(atomic_read(&bo_gem->refcount) > 0); 1406 if (atomic_dec_and_test(&bo_gem->refcount)) 1407 drm_intel_gem_bo_unreference_final(bo, time); 1408} 1409 1410static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 1411{ 1412 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1413 1414 assert(atomic_read(&bo_gem->refcount) > 0); 1415 1416 if (atomic_add_unless(&bo_gem->refcount, -1, 1)) { 1417 drm_intel_bufmgr_gem *bufmgr_gem = 1418 (drm_intel_bufmgr_gem *) bo->bufmgr; 1419 struct timespec time; 1420 1421 clock_gettime(CLOCK_MONOTONIC, &time); 1422 1423 pthread_mutex_lock(&bufmgr_gem->lock); 1424 1425 if (atomic_dec_and_test(&bo_gem->refcount)) { 1426 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 1427 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1428 } 1429 1430 pthread_mutex_unlock(&bufmgr_gem->lock); 1431 } 1432} 1433 1434static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 1435{ 1436 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1437 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1438 struct drm_i915_gem_set_domain set_domain; 1439 int ret; 1440 1441 if (bo_gem->is_userptr) { 1442 /* Return the same user ptr */ 1443 bo->virtual = bo_gem->user_virtual; 1444 return 0; 1445 } 1446 1447 pthread_mutex_lock(&bufmgr_gem->lock); 1448 1449 if (bo_gem->map_count++ == 0) 1450 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1451 1452 if (!bo_gem->mem_virtual) { 1453 struct drm_i915_gem_mmap mmap_arg; 1454 1455 DBG("bo_map: %d (%s), map_count=%d\n", 1456 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1457 1458 memclear(mmap_arg); 1459 mmap_arg.handle = bo_gem->gem_handle; 1460 mmap_arg.size = bo->size; 1461 ret = drmIoctl(bufmgr_gem->fd, 1462 DRM_IOCTL_I915_GEM_MMAP, 1463 &mmap_arg); 1464 if (ret != 0) { 1465 ret = -errno; 1466 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1467 __FILE__, __LINE__, bo_gem->gem_handle, 1468 bo_gem->name, strerror(errno)); 1469 if (--bo_gem->map_count == 0) 1470 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1471 pthread_mutex_unlock(&bufmgr_gem->lock); 1472 return ret; 1473 } 1474 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 1475 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 1476 } 1477 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1478 bo_gem->mem_virtual); 1479 bo->virtual = bo_gem->mem_virtual; 1480 1481 memclear(set_domain); 1482 set_domain.handle = bo_gem->gem_handle; 1483 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 1484 if (write_enable) 1485 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 1486 else 1487 set_domain.write_domain = 0; 1488 ret = drmIoctl(bufmgr_gem->fd, 1489 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1490 &set_domain); 1491 if (ret != 0) { 1492 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1493 __FILE__, __LINE__, bo_gem->gem_handle, 1494 strerror(errno)); 1495 } 1496 1497 if (write_enable) 1498 bo_gem->mapped_cpu_write = true; 1499 1500 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1501 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 1502 pthread_mutex_unlock(&bufmgr_gem->lock); 1503 1504 return 0; 1505} 1506 1507static int 1508map_gtt(drm_intel_bo *bo) 1509{ 1510 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1511 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1512 int ret; 1513 1514 if (bo_gem->is_userptr) 1515 return -EINVAL; 1516 1517 if (bo_gem->map_count++ == 0) 1518 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1519 1520 /* Get a mapping of the buffer if we haven't before. */ 1521 if (bo_gem->gtt_virtual == NULL) { 1522 struct drm_i915_gem_mmap_gtt mmap_arg; 1523 1524 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 1525 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1526 1527 memclear(mmap_arg); 1528 mmap_arg.handle = bo_gem->gem_handle; 1529 1530 /* Get the fake offset back... */ 1531 ret = drmIoctl(bufmgr_gem->fd, 1532 DRM_IOCTL_I915_GEM_MMAP_GTT, 1533 &mmap_arg); 1534 if (ret != 0) { 1535 ret = -errno; 1536 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1537 __FILE__, __LINE__, 1538 bo_gem->gem_handle, bo_gem->name, 1539 strerror(errno)); 1540 if (--bo_gem->map_count == 0) 1541 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1542 return ret; 1543 } 1544 1545 /* and mmap it */ 1546 ret = drmMap(bufmgr_gem->fd, mmap_arg.offset, bo->size, 1547 &bo_gem->gtt_virtual); 1548 if (ret) { 1549 bo_gem->gtt_virtual = NULL; 1550 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1551 __FILE__, __LINE__, 1552 bo_gem->gem_handle, bo_gem->name, 1553 strerror(errno)); 1554 if (--bo_gem->map_count == 0) 1555 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1556 return ret; 1557 } 1558 } 1559 1560 bo->virtual = bo_gem->gtt_virtual; 1561 1562 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1563 bo_gem->gtt_virtual); 1564 1565 return 0; 1566} 1567 1568int 1569drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1570{ 1571 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1572 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1573 struct drm_i915_gem_set_domain set_domain; 1574 int ret; 1575 1576 pthread_mutex_lock(&bufmgr_gem->lock); 1577 1578 ret = map_gtt(bo); 1579 if (ret) { 1580 pthread_mutex_unlock(&bufmgr_gem->lock); 1581 return ret; 1582 } 1583 1584 /* Now move it to the GTT domain so that the GPU and CPU 1585 * caches are flushed and the GPU isn't actively using the 1586 * buffer. 1587 * 1588 * The pagefault handler does this domain change for us when 1589 * it has unbound the BO from the GTT, but it's up to us to 1590 * tell it when we're about to use things if we had done 1591 * rendering and it still happens to be bound to the GTT. 1592 */ 1593 memclear(set_domain); 1594 set_domain.handle = bo_gem->gem_handle; 1595 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1596 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1597 ret = drmIoctl(bufmgr_gem->fd, 1598 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1599 &set_domain); 1600 if (ret != 0) { 1601 DBG("%s:%d: Error setting domain %d: %s\n", 1602 __FILE__, __LINE__, bo_gem->gem_handle, 1603 strerror(errno)); 1604 } 1605 1606 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1607 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1608 pthread_mutex_unlock(&bufmgr_gem->lock); 1609 1610 return 0; 1611} 1612 1613/** 1614 * Performs a mapping of the buffer object like the normal GTT 1615 * mapping, but avoids waiting for the GPU to be done reading from or 1616 * rendering to the buffer. 1617 * 1618 * This is used in the implementation of GL_ARB_map_buffer_range: The 1619 * user asks to create a buffer, then does a mapping, fills some 1620 * space, runs a drawing command, then asks to map it again without 1621 * synchronizing because it guarantees that it won't write over the 1622 * data that the GPU is busy using (or, more specifically, that if it 1623 * does write over the data, it acknowledges that rendering is 1624 * undefined). 1625 */ 1626 1627int 1628drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 1629{ 1630 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1631#ifdef HAVE_VALGRIND 1632 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1633#endif 1634 int ret; 1635 1636 /* If the CPU cache isn't coherent with the GTT, then use a 1637 * regular synchronized mapping. The problem is that we don't 1638 * track where the buffer was last used on the CPU side in 1639 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 1640 * we would potentially corrupt the buffer even when the user 1641 * does reasonable things. 1642 */ 1643 if (!bufmgr_gem->has_llc) 1644 return drm_intel_gem_bo_map_gtt(bo); 1645 1646 pthread_mutex_lock(&bufmgr_gem->lock); 1647 1648 ret = map_gtt(bo); 1649 if (ret == 0) { 1650 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1651 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1652 } 1653 1654 pthread_mutex_unlock(&bufmgr_gem->lock); 1655 1656 return ret; 1657} 1658 1659static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1660{ 1661 drm_intel_bufmgr_gem *bufmgr_gem; 1662 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1663 int ret = 0; 1664 1665 if (bo == NULL) 1666 return 0; 1667 1668 if (bo_gem->is_userptr) 1669 return 0; 1670 1671 bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1672 1673 pthread_mutex_lock(&bufmgr_gem->lock); 1674 1675 if (bo_gem->map_count <= 0) { 1676 DBG("attempted to unmap an unmapped bo\n"); 1677 pthread_mutex_unlock(&bufmgr_gem->lock); 1678 /* Preserve the old behaviour of just treating this as a 1679 * no-op rather than reporting the error. 1680 */ 1681 return 0; 1682 } 1683 1684 if (bo_gem->mapped_cpu_write) { 1685 struct drm_i915_gem_sw_finish sw_finish; 1686 1687 /* Cause a flush to happen if the buffer's pinned for 1688 * scanout, so the results show up in a timely manner. 1689 * Unlike GTT set domains, this only does work if the 1690 * buffer should be scanout-related. 1691 */ 1692 memclear(sw_finish); 1693 sw_finish.handle = bo_gem->gem_handle; 1694 ret = drmIoctl(bufmgr_gem->fd, 1695 DRM_IOCTL_I915_GEM_SW_FINISH, 1696 &sw_finish); 1697 ret = ret == -1 ? -errno : 0; 1698 1699 bo_gem->mapped_cpu_write = false; 1700 } 1701 1702 /* We need to unmap after every innovation as we cannot track 1703 * an open vma for every bo as that will exhaust the system 1704 * limits and cause later failures. 1705 */ 1706 if (--bo_gem->map_count == 0) { 1707 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1708 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1709 bo->virtual = NULL; 1710 } 1711 pthread_mutex_unlock(&bufmgr_gem->lock); 1712 1713 return ret; 1714} 1715 1716int 1717drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1718{ 1719 return drm_intel_gem_bo_unmap(bo); 1720} 1721 1722static int 1723drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1724 unsigned long size, const void *data) 1725{ 1726 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1727 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1728 struct drm_i915_gem_pwrite pwrite; 1729 int ret; 1730 1731 if (bo_gem->is_userptr) 1732 return -EINVAL; 1733 1734 memclear(pwrite); 1735 pwrite.handle = bo_gem->gem_handle; 1736 pwrite.offset = offset; 1737 pwrite.size = size; 1738 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1739 ret = drmIoctl(bufmgr_gem->fd, 1740 DRM_IOCTL_I915_GEM_PWRITE, 1741 &pwrite); 1742 if (ret != 0) { 1743 ret = -errno; 1744 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1745 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1746 (int)size, strerror(errno)); 1747 } 1748 1749 return ret; 1750} 1751 1752static int 1753drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1754{ 1755 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1756 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1757 int ret; 1758 1759 memclear(get_pipe_from_crtc_id); 1760 get_pipe_from_crtc_id.crtc_id = crtc_id; 1761 ret = drmIoctl(bufmgr_gem->fd, 1762 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1763 &get_pipe_from_crtc_id); 1764 if (ret != 0) { 1765 /* We return -1 here to signal that we don't 1766 * know which pipe is associated with this crtc. 1767 * This lets the caller know that this information 1768 * isn't available; using the wrong pipe for 1769 * vblank waiting can cause the chipset to lock up 1770 */ 1771 return -1; 1772 } 1773 1774 return get_pipe_from_crtc_id.pipe; 1775} 1776 1777static int 1778drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1779 unsigned long size, void *data) 1780{ 1781 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1782 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1783 struct drm_i915_gem_pread pread; 1784 int ret; 1785 1786 if (bo_gem->is_userptr) 1787 return -EINVAL; 1788 1789 memclear(pread); 1790 pread.handle = bo_gem->gem_handle; 1791 pread.offset = offset; 1792 pread.size = size; 1793 pread.data_ptr = (uint64_t) (uintptr_t) data; 1794 ret = drmIoctl(bufmgr_gem->fd, 1795 DRM_IOCTL_I915_GEM_PREAD, 1796 &pread); 1797 if (ret != 0) { 1798 ret = -errno; 1799 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1800 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1801 (int)size, strerror(errno)); 1802 } 1803 1804 return ret; 1805} 1806 1807/** Waits for all GPU rendering with the object to have completed. */ 1808static void 1809drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1810{ 1811 drm_intel_gem_bo_start_gtt_access(bo, 1); 1812} 1813 1814/** 1815 * Waits on a BO for the given amount of time. 1816 * 1817 * @bo: buffer object to wait for 1818 * @timeout_ns: amount of time to wait in nanoseconds. 1819 * If value is less than 0, an infinite wait will occur. 1820 * 1821 * Returns 0 if the wait was successful ie. the last batch referencing the 1822 * object has completed within the allotted time. Otherwise some negative return 1823 * value describes the error. Of particular interest is -ETIME when the wait has 1824 * failed to yield the desired result. 1825 * 1826 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 1827 * the operation to give up after a certain amount of time. Another subtle 1828 * difference is the internal locking semantics are different (this variant does 1829 * not hold the lock for the duration of the wait). This makes the wait subject 1830 * to a larger userspace race window. 1831 * 1832 * The implementation shall wait until the object is no longer actively 1833 * referenced within a batch buffer at the time of the call. The wait will 1834 * not guarantee that the buffer is re-issued via another thread, or an flinked 1835 * handle. Userspace must make sure this race does not occur if such precision 1836 * is important. 1837 * 1838 * Note that some kernels have broken the inifite wait for negative values 1839 * promise, upgrade to latest stable kernels if this is the case. 1840 */ 1841int 1842drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 1843{ 1844 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1845 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1846 struct drm_i915_gem_wait wait; 1847 int ret; 1848 1849 if (!bufmgr_gem->has_wait_timeout) { 1850 DBG("%s:%d: Timed wait is not supported. Falling back to " 1851 "infinite wait\n", __FILE__, __LINE__); 1852 if (timeout_ns) { 1853 drm_intel_gem_bo_wait_rendering(bo); 1854 return 0; 1855 } else { 1856 return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 1857 } 1858 } 1859 1860 memclear(wait); 1861 wait.bo_handle = bo_gem->gem_handle; 1862 wait.timeout_ns = timeout_ns; 1863 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1864 if (ret == -1) 1865 return -errno; 1866 1867 return ret; 1868} 1869 1870/** 1871 * Sets the object to the GTT read and possibly write domain, used by the X 1872 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1873 * 1874 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1875 * can do tiled pixmaps this way. 1876 */ 1877void 1878drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1879{ 1880 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1881 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1882 struct drm_i915_gem_set_domain set_domain; 1883 int ret; 1884 1885 memclear(set_domain); 1886 set_domain.handle = bo_gem->gem_handle; 1887 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1888 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1889 ret = drmIoctl(bufmgr_gem->fd, 1890 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1891 &set_domain); 1892 if (ret != 0) { 1893 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1894 __FILE__, __LINE__, bo_gem->gem_handle, 1895 set_domain.read_domains, set_domain.write_domain, 1896 strerror(errno)); 1897 } 1898} 1899 1900static void 1901drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1902{ 1903 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1904 struct drm_gem_close close_bo; 1905 int i, ret; 1906 1907 free(bufmgr_gem->exec2_objects); 1908 free(bufmgr_gem->exec_objects); 1909 free(bufmgr_gem->exec_bos); 1910 1911 pthread_mutex_destroy(&bufmgr_gem->lock); 1912 1913 /* Free any cached buffer objects we were going to reuse */ 1914 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1915 struct drm_intel_gem_bo_bucket *bucket = 1916 &bufmgr_gem->cache_bucket[i]; 1917 drm_intel_bo_gem *bo_gem; 1918 1919 while (!DRMLISTEMPTY(&bucket->head)) { 1920 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1921 bucket->head.next, head); 1922 DRMLISTDEL(&bo_gem->head); 1923 1924 drm_intel_gem_bo_free(&bo_gem->bo); 1925 } 1926 } 1927 1928 /* Release userptr bo kept hanging around for optimisation. */ 1929 if (bufmgr_gem->userptr_active.ptr) { 1930 memclear(close_bo); 1931 close_bo.handle = bufmgr_gem->userptr_active.handle; 1932 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo); 1933 free(bufmgr_gem->userptr_active.ptr); 1934 if (ret) 1935 fprintf(stderr, 1936 "Failed to release test userptr object! (%d) " 1937 "i915 kernel driver may not be sane!\n", errno); 1938 } 1939 1940 free(bufmgr); 1941} 1942 1943/** 1944 * Adds the target buffer to the validation list and adds the relocation 1945 * to the reloc_buffer's relocation list. 1946 * 1947 * The relocation entry at the given offset must already contain the 1948 * precomputed relocation value, because the kernel will optimize out 1949 * the relocation entry write when the buffer hasn't moved from the 1950 * last known offset in target_bo. 1951 */ 1952static int 1953do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1954 drm_intel_bo *target_bo, uint32_t target_offset, 1955 uint32_t read_domains, uint32_t write_domain, 1956 bool need_fence) 1957{ 1958 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1959 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1960 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1961 bool fenced_command; 1962 1963 if (bo_gem->has_error) 1964 return -ENOMEM; 1965 1966 if (target_bo_gem->has_error) { 1967 bo_gem->has_error = true; 1968 return -ENOMEM; 1969 } 1970 1971 /* We never use HW fences for rendering on 965+ */ 1972 if (bufmgr_gem->gen >= 4) 1973 need_fence = false; 1974 1975 fenced_command = need_fence; 1976 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1977 need_fence = false; 1978 1979 /* Create a new relocation list if needed */ 1980 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1981 return -ENOMEM; 1982 1983 /* Check overflow */ 1984 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1985 1986 /* Check args */ 1987 assert(offset <= bo->size - 4); 1988 assert((write_domain & (write_domain - 1)) == 0); 1989 1990 /* An object needing a fence is a tiled buffer, so it won't have 1991 * relocs to other buffers. 1992 */ 1993 if (need_fence) { 1994 assert(target_bo_gem->reloc_count == 0); 1995 target_bo_gem->reloc_tree_fences = 1; 1996 } 1997 1998 /* Make sure that we're not adding a reloc to something whose size has 1999 * already been accounted for. 2000 */ 2001 assert(!bo_gem->used_as_reloc_target); 2002 if (target_bo_gem != bo_gem) { 2003 target_bo_gem->used_as_reloc_target = true; 2004 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 2005 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 2006 } 2007 2008 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 2009 if (target_bo != bo) 2010 drm_intel_gem_bo_reference(target_bo); 2011 if (fenced_command) 2012 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 2013 DRM_INTEL_RELOC_FENCE; 2014 else 2015 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 2016 2017 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 2018 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 2019 bo_gem->relocs[bo_gem->reloc_count].target_handle = 2020 target_bo_gem->gem_handle; 2021 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 2022 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 2023 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64; 2024 bo_gem->reloc_count++; 2025 2026 return 0; 2027} 2028 2029static void 2030drm_intel_gem_bo_use_48b_address_range(drm_intel_bo *bo, uint32_t enable) 2031{ 2032 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2033 2034 if (enable) 2035 bo_gem->kflags |= EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 2036 else 2037 bo_gem->kflags &= ~EXEC_OBJECT_SUPPORTS_48B_ADDRESS; 2038} 2039 2040static int 2041drm_intel_gem_bo_add_softpin_target(drm_intel_bo *bo, drm_intel_bo *target_bo) 2042{ 2043 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2044 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2045 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2046 if (bo_gem->has_error) 2047 return -ENOMEM; 2048 2049 if (target_bo_gem->has_error) { 2050 bo_gem->has_error = true; 2051 return -ENOMEM; 2052 } 2053 2054 if (!(target_bo_gem->kflags & EXEC_OBJECT_PINNED)) 2055 return -EINVAL; 2056 if (target_bo_gem == bo_gem) 2057 return -EINVAL; 2058 2059 if (bo_gem->softpin_target_count == bo_gem->softpin_target_size) { 2060 int new_size = bo_gem->softpin_target_size * 2; 2061 if (new_size == 0) 2062 new_size = bufmgr_gem->max_relocs; 2063 2064 bo_gem->softpin_target = realloc(bo_gem->softpin_target, new_size * 2065 sizeof(drm_intel_bo *)); 2066 if (!bo_gem->softpin_target) 2067 return -ENOMEM; 2068 2069 bo_gem->softpin_target_size = new_size; 2070 } 2071 bo_gem->softpin_target[bo_gem->softpin_target_count] = target_bo; 2072 drm_intel_gem_bo_reference(target_bo); 2073 bo_gem->softpin_target_count++; 2074 2075 return 0; 2076} 2077 2078static int 2079drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 2080 drm_intel_bo *target_bo, uint32_t target_offset, 2081 uint32_t read_domains, uint32_t write_domain) 2082{ 2083 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2084 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *)target_bo; 2085 2086 if (target_bo_gem->kflags & EXEC_OBJECT_PINNED) 2087 return drm_intel_gem_bo_add_softpin_target(bo, target_bo); 2088 else 2089 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 2090 read_domains, write_domain, 2091 !bufmgr_gem->fenced_relocs); 2092} 2093 2094static int 2095drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 2096 drm_intel_bo *target_bo, 2097 uint32_t target_offset, 2098 uint32_t read_domains, uint32_t write_domain) 2099{ 2100 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 2101 read_domains, write_domain, true); 2102} 2103 2104int 2105drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 2106{ 2107 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2108 2109 return bo_gem->reloc_count; 2110} 2111 2112/** 2113 * Removes existing relocation entries in the BO after "start". 2114 * 2115 * This allows a user to avoid a two-step process for state setup with 2116 * counting up all the buffer objects and doing a 2117 * drm_intel_bufmgr_check_aperture_space() before emitting any of the 2118 * relocations for the state setup. Instead, save the state of the 2119 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 2120 * state, and then check if it still fits in the aperture. 2121 * 2122 * Any further drm_intel_bufmgr_check_aperture_space() queries 2123 * involving this buffer in the tree are undefined after this call. 2124 * 2125 * This also removes all softpinned targets being referenced by the BO. 2126 */ 2127void 2128drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 2129{ 2130 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2131 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2132 int i; 2133 struct timespec time; 2134 2135 clock_gettime(CLOCK_MONOTONIC, &time); 2136 2137 assert(bo_gem->reloc_count >= start); 2138 2139 /* Unreference the cleared target buffers */ 2140 pthread_mutex_lock(&bufmgr_gem->lock); 2141 2142 for (i = start; i < bo_gem->reloc_count; i++) { 2143 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 2144 if (&target_bo_gem->bo != bo) { 2145 bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 2146 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 2147 time.tv_sec); 2148 } 2149 } 2150 bo_gem->reloc_count = start; 2151 2152 for (i = 0; i < bo_gem->softpin_target_count; i++) { 2153 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->softpin_target[i]; 2154 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, time.tv_sec); 2155 } 2156 bo_gem->softpin_target_count = 0; 2157 2158 pthread_mutex_unlock(&bufmgr_gem->lock); 2159 2160} 2161 2162/** 2163 * Walk the tree of relocations rooted at BO and accumulate the list of 2164 * validations to be performed and update the relocation buffers with 2165 * index values into the validation list. 2166 */ 2167static void 2168drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 2169{ 2170 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2171 int i; 2172 2173 if (bo_gem->relocs == NULL) 2174 return; 2175 2176 for (i = 0; i < bo_gem->reloc_count; i++) { 2177 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2178 2179 if (target_bo == bo) 2180 continue; 2181 2182 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2183 2184 /* Continue walking the tree depth-first. */ 2185 drm_intel_gem_bo_process_reloc(target_bo); 2186 2187 /* Add the target to the validate list */ 2188 drm_intel_add_validate_buffer(target_bo); 2189 } 2190} 2191 2192static void 2193drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 2194{ 2195 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2196 int i; 2197 2198 if (bo_gem->relocs == NULL && bo_gem->softpin_target == NULL) 2199 return; 2200 2201 for (i = 0; i < bo_gem->reloc_count; i++) { 2202 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 2203 int need_fence; 2204 2205 if (target_bo == bo) 2206 continue; 2207 2208 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2209 2210 /* Continue walking the tree depth-first. */ 2211 drm_intel_gem_bo_process_reloc2(target_bo); 2212 2213 need_fence = (bo_gem->reloc_target_info[i].flags & 2214 DRM_INTEL_RELOC_FENCE); 2215 2216 /* Add the target to the validate list */ 2217 drm_intel_add_validate_buffer2(target_bo, need_fence); 2218 } 2219 2220 for (i = 0; i < bo_gem->softpin_target_count; i++) { 2221 drm_intel_bo *target_bo = bo_gem->softpin_target[i]; 2222 2223 if (target_bo == bo) 2224 continue; 2225 2226 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 2227 drm_intel_gem_bo_process_reloc2(target_bo); 2228 drm_intel_add_validate_buffer2(target_bo, false); 2229 } 2230} 2231 2232 2233static void 2234drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 2235{ 2236 int i; 2237 2238 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2239 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2240 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2241 2242 /* Update the buffer offset */ 2243 if (bufmgr_gem->exec_objects[i].offset != bo->offset64) { 2244 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", 2245 bo_gem->gem_handle, bo_gem->name, 2246 upper_32_bits(bo->offset64), 2247 lower_32_bits(bo->offset64), 2248 upper_32_bits(bufmgr_gem->exec_objects[i].offset), 2249 lower_32_bits(bufmgr_gem->exec_objects[i].offset)); 2250 bo->offset64 = bufmgr_gem->exec_objects[i].offset; 2251 bo->offset = bufmgr_gem->exec_objects[i].offset; 2252 } 2253 } 2254} 2255 2256static void 2257drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 2258{ 2259 int i; 2260 2261 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2262 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2263 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2264 2265 /* Update the buffer offset */ 2266 if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) { 2267 /* If we're seeing softpinned object here it means that the kernel 2268 * has relocated our object... Indicating a programming error 2269 */ 2270 assert(!(bo_gem->kflags & EXEC_OBJECT_PINNED)); 2271 DBG("BO %d (%s) migrated: 0x%08x %08x -> 0x%08x %08x\n", 2272 bo_gem->gem_handle, bo_gem->name, 2273 upper_32_bits(bo->offset64), 2274 lower_32_bits(bo->offset64), 2275 upper_32_bits(bufmgr_gem->exec2_objects[i].offset), 2276 lower_32_bits(bufmgr_gem->exec2_objects[i].offset)); 2277 bo->offset64 = bufmgr_gem->exec2_objects[i].offset; 2278 bo->offset = bufmgr_gem->exec2_objects[i].offset; 2279 } 2280 } 2281} 2282 2283void 2284drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 2285 int x1, int y1, int width, int height, 2286 enum aub_dump_bmp_format format, 2287 int pitch, int offset) 2288{ 2289} 2290 2291static int 2292drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 2293 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 2294{ 2295 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2296 struct drm_i915_gem_execbuffer execbuf; 2297 int ret, i; 2298 2299 if (to_bo_gem(bo)->has_error) 2300 return -ENOMEM; 2301 2302 pthread_mutex_lock(&bufmgr_gem->lock); 2303 /* Update indices and set up the validate list. */ 2304 drm_intel_gem_bo_process_reloc(bo); 2305 2306 /* Add the batch buffer to the validation list. There are no 2307 * relocations pointing to it. 2308 */ 2309 drm_intel_add_validate_buffer(bo); 2310 2311 memclear(execbuf); 2312 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 2313 execbuf.buffer_count = bufmgr_gem->exec_count; 2314 execbuf.batch_start_offset = 0; 2315 execbuf.batch_len = used; 2316 execbuf.cliprects_ptr = (uintptr_t) cliprects; 2317 execbuf.num_cliprects = num_cliprects; 2318 execbuf.DR1 = 0; 2319 execbuf.DR4 = DR4; 2320 2321 ret = drmIoctl(bufmgr_gem->fd, 2322 DRM_IOCTL_I915_GEM_EXECBUFFER, 2323 &execbuf); 2324 if (ret != 0) { 2325 ret = -errno; 2326 if (errno == ENOSPC) { 2327 DBG("Execbuffer fails to pin. " 2328 "Estimate: %u. Actual: %u. Available: %u\n", 2329 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2330 bufmgr_gem-> 2331 exec_count), 2332 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2333 bufmgr_gem-> 2334 exec_count), 2335 (unsigned int)bufmgr_gem->gtt_size); 2336 } 2337 } 2338 drm_intel_update_buffer_offsets(bufmgr_gem); 2339 2340 if (bufmgr_gem->bufmgr.debug) 2341 drm_intel_gem_dump_validation_list(bufmgr_gem); 2342 2343 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2344 drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); 2345 2346 bo_gem->idle = false; 2347 2348 /* Disconnect the buffer from the validate list */ 2349 bo_gem->validate_index = -1; 2350 bufmgr_gem->exec_bos[i] = NULL; 2351 } 2352 bufmgr_gem->exec_count = 0; 2353 pthread_mutex_unlock(&bufmgr_gem->lock); 2354 2355 return ret; 2356} 2357 2358static int 2359do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 2360 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2361 int in_fence, int *out_fence, 2362 unsigned int flags) 2363{ 2364 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2365 struct drm_i915_gem_execbuffer2 execbuf; 2366 int ret = 0; 2367 int i; 2368 2369 if (to_bo_gem(bo)->has_error) 2370 return -ENOMEM; 2371 2372 switch (flags & 0x7) { 2373 default: 2374 return -EINVAL; 2375 case I915_EXEC_BLT: 2376 if (!bufmgr_gem->has_blt) 2377 return -EINVAL; 2378 break; 2379 case I915_EXEC_BSD: 2380 if (!bufmgr_gem->has_bsd) 2381 return -EINVAL; 2382 break; 2383 case I915_EXEC_VEBOX: 2384 if (!bufmgr_gem->has_vebox) 2385 return -EINVAL; 2386 break; 2387 case I915_EXEC_RENDER: 2388 case I915_EXEC_DEFAULT: 2389 break; 2390 } 2391 2392 pthread_mutex_lock(&bufmgr_gem->lock); 2393 /* Update indices and set up the validate list. */ 2394 drm_intel_gem_bo_process_reloc2(bo); 2395 2396 /* Add the batch buffer to the validation list. There are no relocations 2397 * pointing to it. 2398 */ 2399 drm_intel_add_validate_buffer2(bo, 0); 2400 2401 memclear(execbuf); 2402 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 2403 execbuf.buffer_count = bufmgr_gem->exec_count; 2404 execbuf.batch_start_offset = 0; 2405 execbuf.batch_len = used; 2406 execbuf.cliprects_ptr = (uintptr_t)cliprects; 2407 execbuf.num_cliprects = num_cliprects; 2408 execbuf.DR1 = 0; 2409 execbuf.DR4 = DR4; 2410 execbuf.flags = flags; 2411 if (ctx == NULL) 2412 i915_execbuffer2_set_context_id(execbuf, 0); 2413 else 2414 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 2415 execbuf.rsvd2 = 0; 2416 if (in_fence != -1) { 2417 execbuf.rsvd2 = in_fence; 2418 execbuf.flags |= I915_EXEC_FENCE_IN; 2419 } 2420 if (out_fence != NULL) { 2421 *out_fence = -1; 2422 execbuf.flags |= I915_EXEC_FENCE_OUT; 2423 } 2424 2425 if (bufmgr_gem->no_exec) 2426 goto skip_execution; 2427 2428 ret = drmIoctl(bufmgr_gem->fd, 2429 DRM_IOCTL_I915_GEM_EXECBUFFER2_WR, 2430 &execbuf); 2431 if (ret != 0) { 2432 ret = -errno; 2433 if (ret == -ENOSPC) { 2434 DBG("Execbuffer fails to pin. " 2435 "Estimate: %u. Actual: %u. Available: %u\n", 2436 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2437 bufmgr_gem->exec_count), 2438 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2439 bufmgr_gem->exec_count), 2440 (unsigned int) bufmgr_gem->gtt_size); 2441 } 2442 } 2443 drm_intel_update_buffer_offsets2(bufmgr_gem); 2444 2445 if (ret == 0 && out_fence != NULL) 2446 *out_fence = execbuf.rsvd2 >> 32; 2447 2448skip_execution: 2449 if (bufmgr_gem->bufmgr.debug) 2450 drm_intel_gem_dump_validation_list(bufmgr_gem); 2451 2452 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2453 drm_intel_bo_gem *bo_gem = to_bo_gem(bufmgr_gem->exec_bos[i]); 2454 2455 bo_gem->idle = false; 2456 2457 /* Disconnect the buffer from the validate list */ 2458 bo_gem->validate_index = -1; 2459 bufmgr_gem->exec_bos[i] = NULL; 2460 } 2461 bufmgr_gem->exec_count = 0; 2462 pthread_mutex_unlock(&bufmgr_gem->lock); 2463 2464 return ret; 2465} 2466 2467static int 2468drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2469 drm_clip_rect_t *cliprects, int num_cliprects, 2470 int DR4) 2471{ 2472 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2473 -1, NULL, I915_EXEC_RENDER); 2474} 2475 2476static int 2477drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 2478 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2479 unsigned int flags) 2480{ 2481 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2482 -1, NULL, flags); 2483} 2484 2485int 2486drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 2487 int used, unsigned int flags) 2488{ 2489 return do_exec2(bo, used, ctx, NULL, 0, 0, -1, NULL, flags); 2490} 2491 2492int 2493drm_intel_gem_bo_fence_exec(drm_intel_bo *bo, 2494 drm_intel_context *ctx, 2495 int used, 2496 int in_fence, 2497 int *out_fence, 2498 unsigned int flags) 2499{ 2500 return do_exec2(bo, used, ctx, NULL, 0, 0, in_fence, out_fence, flags); 2501} 2502 2503static int 2504drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 2505{ 2506 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2507 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2508 struct drm_i915_gem_pin pin; 2509 int ret; 2510 2511 memclear(pin); 2512 pin.handle = bo_gem->gem_handle; 2513 pin.alignment = alignment; 2514 2515 ret = drmIoctl(bufmgr_gem->fd, 2516 DRM_IOCTL_I915_GEM_PIN, 2517 &pin); 2518 if (ret != 0) 2519 return -errno; 2520 2521 bo->offset64 = pin.offset; 2522 bo->offset = pin.offset; 2523 return 0; 2524} 2525 2526static int 2527drm_intel_gem_bo_unpin(drm_intel_bo *bo) 2528{ 2529 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2530 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2531 struct drm_i915_gem_unpin unpin; 2532 int ret; 2533 2534 memclear(unpin); 2535 unpin.handle = bo_gem->gem_handle; 2536 2537 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 2538 if (ret != 0) 2539 return -errno; 2540 2541 return 0; 2542} 2543 2544static int 2545drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2546 uint32_t tiling_mode, 2547 uint32_t stride) 2548{ 2549 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2550 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2551 struct drm_i915_gem_set_tiling set_tiling; 2552 int ret; 2553 2554 if (bo_gem->global_name == 0 && 2555 tiling_mode == bo_gem->tiling_mode && 2556 stride == bo_gem->stride) 2557 return 0; 2558 2559 memset(&set_tiling, 0, sizeof(set_tiling)); 2560 do { 2561 /* set_tiling is slightly broken and overwrites the 2562 * input on the error path, so we have to open code 2563 * rmIoctl. 2564 */ 2565 set_tiling.handle = bo_gem->gem_handle; 2566 set_tiling.tiling_mode = tiling_mode; 2567 set_tiling.stride = stride; 2568 2569 ret = ioctl(bufmgr_gem->fd, 2570 DRM_IOCTL_I915_GEM_SET_TILING, 2571 &set_tiling); 2572 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 2573 if (ret == -1) 2574 return -errno; 2575 2576 bo_gem->tiling_mode = set_tiling.tiling_mode; 2577 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 2578 bo_gem->stride = set_tiling.stride; 2579 return 0; 2580} 2581 2582static int 2583drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2584 uint32_t stride) 2585{ 2586 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2587 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2588 int ret; 2589 2590 /* Tiling with userptr surfaces is not supported 2591 * on all hardware so refuse it for time being. 2592 */ 2593 if (bo_gem->is_userptr) 2594 return -EINVAL; 2595 2596 /* Linear buffers have no stride. By ensuring that we only ever use 2597 * stride 0 with linear buffers, we simplify our code. 2598 */ 2599 if (*tiling_mode == I915_TILING_NONE) 2600 stride = 0; 2601 2602 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 2603 if (ret == 0) 2604 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 2605 2606 *tiling_mode = bo_gem->tiling_mode; 2607 return ret; 2608} 2609 2610static int 2611drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2612 uint32_t * swizzle_mode) 2613{ 2614 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2615 2616 *tiling_mode = bo_gem->tiling_mode; 2617 *swizzle_mode = bo_gem->swizzle_mode; 2618 return 0; 2619} 2620 2621static int 2622drm_intel_gem_bo_set_softpin_offset(drm_intel_bo *bo, uint64_t offset) 2623{ 2624 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2625 2626 bo->offset64 = offset; 2627 bo->offset = offset; 2628 bo_gem->kflags |= EXEC_OBJECT_PINNED; 2629 2630 return 0; 2631} 2632 2633drm_intel_bo * 2634drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 2635{ 2636 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2637 int ret; 2638 uint32_t handle; 2639 drm_intel_bo_gem *bo_gem; 2640 struct drm_i915_gem_get_tiling get_tiling; 2641 2642 pthread_mutex_lock(&bufmgr_gem->lock); 2643 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 2644 if (ret) { 2645 DBG("create_from_prime: failed to obtain handle from fd: %s\n", strerror(errno)); 2646 pthread_mutex_unlock(&bufmgr_gem->lock); 2647 return NULL; 2648 } 2649 2650 /* 2651 * See if the kernel has already returned this buffer to us. Just as 2652 * for named buffers, we must not create two bo's pointing at the same 2653 * kernel object 2654 */ 2655 HASH_FIND(handle_hh, bufmgr_gem->handle_table, 2656 &handle, sizeof(handle), bo_gem); 2657 if (bo_gem) { 2658 drm_intel_gem_bo_reference(&bo_gem->bo); 2659 goto out; 2660 } 2661 2662 bo_gem = calloc(1, sizeof(*bo_gem)); 2663 if (!bo_gem) 2664 goto out; 2665 2666 atomic_set(&bo_gem->refcount, 1); 2667 DRMINITLISTHEAD(&bo_gem->vma_list); 2668 2669 /* Determine size of bo. The fd-to-handle ioctl really should 2670 * return the size, but it doesn't. If we have kernel 3.12 or 2671 * later, we can lseek on the prime fd to get the size. Older 2672 * kernels will just fail, in which case we fall back to the 2673 * provided (estimated or guess size). */ 2674 ret = lseek(prime_fd, 0, SEEK_END); 2675 if (ret != -1) 2676 bo_gem->bo.size = ret; 2677 else 2678 bo_gem->bo.size = size; 2679 2680 bo_gem->bo.handle = handle; 2681 bo_gem->bo.bufmgr = bufmgr; 2682 2683 bo_gem->gem_handle = handle; 2684 HASH_ADD(handle_hh, bufmgr_gem->handle_table, 2685 gem_handle, sizeof(bo_gem->gem_handle), bo_gem); 2686 2687 bo_gem->name = "prime"; 2688 bo_gem->validate_index = -1; 2689 bo_gem->reloc_tree_fences = 0; 2690 bo_gem->used_as_reloc_target = false; 2691 bo_gem->has_error = false; 2692 bo_gem->reusable = false; 2693 2694 memclear(get_tiling); 2695 get_tiling.handle = bo_gem->gem_handle; 2696 if (drmIoctl(bufmgr_gem->fd, 2697 DRM_IOCTL_I915_GEM_GET_TILING, 2698 &get_tiling)) 2699 goto err; 2700 2701 bo_gem->tiling_mode = get_tiling.tiling_mode; 2702 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 2703 /* XXX stride is unknown */ 2704 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem, 0); 2705 2706out: 2707 pthread_mutex_unlock(&bufmgr_gem->lock); 2708 return &bo_gem->bo; 2709 2710err: 2711 drm_intel_gem_bo_free(&bo_gem->bo); 2712 pthread_mutex_unlock(&bufmgr_gem->lock); 2713 return NULL; 2714} 2715 2716int 2717drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 2718{ 2719 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2720 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2721 2722 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 2723 DRM_CLOEXEC, prime_fd) != 0) 2724 return -errno; 2725 2726 bo_gem->reusable = false; 2727 2728 return 0; 2729} 2730 2731static int 2732drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 2733{ 2734 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2735 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2736 2737 if (!bo_gem->global_name) { 2738 struct drm_gem_flink flink; 2739 2740 memclear(flink); 2741 flink.handle = bo_gem->gem_handle; 2742 if (drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink)) 2743 return -errno; 2744 2745 pthread_mutex_lock(&bufmgr_gem->lock); 2746 if (!bo_gem->global_name) { 2747 bo_gem->global_name = flink.name; 2748 bo_gem->reusable = false; 2749 2750 HASH_ADD(name_hh, bufmgr_gem->name_table, 2751 global_name, sizeof(bo_gem->global_name), 2752 bo_gem); 2753 } 2754 pthread_mutex_unlock(&bufmgr_gem->lock); 2755 } 2756 2757 *name = bo_gem->global_name; 2758 return 0; 2759} 2760 2761/** 2762 * Enables unlimited caching of buffer objects for reuse. 2763 * 2764 * This is potentially very memory expensive, as the cache at each bucket 2765 * size is only bounded by how many buffers of that size we've managed to have 2766 * in flight at once. 2767 */ 2768void 2769drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 2770{ 2771 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2772 2773 bufmgr_gem->bo_reuse = true; 2774} 2775 2776/** 2777 * Disables implicit synchronisation before executing the bo 2778 * 2779 * This will cause rendering corruption unless you correctly manage explicit 2780 * fences for all rendering involving this buffer - including use by others. 2781 * Disabling the implicit serialisation is only required if that serialisation 2782 * is too coarse (for example, you have split the buffer into many 2783 * non-overlapping regions and are sharing the whole buffer between concurrent 2784 * independent command streams). 2785 * 2786 * Note the kernel must advertise support via I915_PARAM_HAS_EXEC_ASYNC, 2787 * which can be checked using drm_intel_bufmgr_can_disable_implicit_sync, 2788 * or subsequent execbufs involving the bo will generate EINVAL. 2789 */ 2790void 2791drm_intel_gem_bo_disable_implicit_sync(drm_intel_bo *bo) 2792{ 2793 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2794 2795 bo_gem->kflags |= EXEC_OBJECT_ASYNC; 2796} 2797 2798/** 2799 * Enables implicit synchronisation before executing the bo 2800 * 2801 * This is the default behaviour of the kernel, to wait upon prior writes 2802 * completing on the object before rendering with it, or to wait for prior 2803 * reads to complete before writing into the object. 2804 * drm_intel_gem_bo_disable_implicit_sync() can stop this behaviour, telling 2805 * the kernel never to insert a stall before using the object. Then this 2806 * function can be used to restore the implicit sync before subsequent 2807 * rendering. 2808 */ 2809void 2810drm_intel_gem_bo_enable_implicit_sync(drm_intel_bo *bo) 2811{ 2812 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2813 2814 bo_gem->kflags &= ~EXEC_OBJECT_ASYNC; 2815} 2816 2817/** 2818 * Query whether the kernel supports disabling of its implicit synchronisation 2819 * before execbuf. See drm_intel_gem_bo_disable_implicit_sync() 2820 */ 2821int 2822drm_intel_bufmgr_gem_can_disable_implicit_sync(drm_intel_bufmgr *bufmgr) 2823{ 2824 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2825 2826 return bufmgr_gem->has_exec_async; 2827} 2828 2829/** 2830 * Enable use of fenced reloc type. 2831 * 2832 * New code should enable this to avoid unnecessary fence register 2833 * allocation. If this option is not enabled, all relocs will have fence 2834 * register allocated. 2835 */ 2836void 2837drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 2838{ 2839 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2840 2841 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 2842 bufmgr_gem->fenced_relocs = true; 2843} 2844 2845/** 2846 * Return the additional aperture space required by the tree of buffer objects 2847 * rooted at bo. 2848 */ 2849static int 2850drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 2851{ 2852 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2853 int i; 2854 int total = 0; 2855 2856 if (bo == NULL || bo_gem->included_in_check_aperture) 2857 return 0; 2858 2859 total += bo->size; 2860 bo_gem->included_in_check_aperture = true; 2861 2862 for (i = 0; i < bo_gem->reloc_count; i++) 2863 total += 2864 drm_intel_gem_bo_get_aperture_space(bo_gem-> 2865 reloc_target_info[i].bo); 2866 2867 return total; 2868} 2869 2870/** 2871 * Count the number of buffers in this list that need a fence reg 2872 * 2873 * If the count is greater than the number of available regs, we'll have 2874 * to ask the caller to resubmit a batch with fewer tiled buffers. 2875 * 2876 * This function over-counts if the same buffer is used multiple times. 2877 */ 2878static unsigned int 2879drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 2880{ 2881 int i; 2882 unsigned int total = 0; 2883 2884 for (i = 0; i < count; i++) { 2885 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2886 2887 if (bo_gem == NULL) 2888 continue; 2889 2890 total += bo_gem->reloc_tree_fences; 2891 } 2892 return total; 2893} 2894 2895/** 2896 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 2897 * for the next drm_intel_bufmgr_check_aperture_space() call. 2898 */ 2899static void 2900drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 2901{ 2902 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2903 int i; 2904 2905 if (bo == NULL || !bo_gem->included_in_check_aperture) 2906 return; 2907 2908 bo_gem->included_in_check_aperture = false; 2909 2910 for (i = 0; i < bo_gem->reloc_count; i++) 2911 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 2912 reloc_target_info[i].bo); 2913} 2914 2915/** 2916 * Return a conservative estimate for the amount of aperture required 2917 * for a collection of buffers. This may double-count some buffers. 2918 */ 2919static unsigned int 2920drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 2921{ 2922 int i; 2923 unsigned int total = 0; 2924 2925 for (i = 0; i < count; i++) { 2926 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2927 if (bo_gem != NULL) 2928 total += bo_gem->reloc_tree_size; 2929 } 2930 return total; 2931} 2932 2933/** 2934 * Return the amount of aperture needed for a collection of buffers. 2935 * This avoids double counting any buffers, at the cost of looking 2936 * at every buffer in the set. 2937 */ 2938static unsigned int 2939drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 2940{ 2941 int i; 2942 unsigned int total = 0; 2943 2944 for (i = 0; i < count; i++) { 2945 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 2946 /* For the first buffer object in the array, we get an 2947 * accurate count back for its reloc_tree size (since nothing 2948 * had been flagged as being counted yet). We can save that 2949 * value out as a more conservative reloc_tree_size that 2950 * avoids double-counting target buffers. Since the first 2951 * buffer happens to usually be the batch buffer in our 2952 * callers, this can pull us back from doing the tree 2953 * walk on every new batch emit. 2954 */ 2955 if (i == 0) { 2956 drm_intel_bo_gem *bo_gem = 2957 (drm_intel_bo_gem *) bo_array[i]; 2958 bo_gem->reloc_tree_size = total; 2959 } 2960 } 2961 2962 for (i = 0; i < count; i++) 2963 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 2964 return total; 2965} 2966 2967/** 2968 * Return -1 if the batchbuffer should be flushed before attempting to 2969 * emit rendering referencing the buffers pointed to by bo_array. 2970 * 2971 * This is required because if we try to emit a batchbuffer with relocations 2972 * to a tree of buffers that won't simultaneously fit in the aperture, 2973 * the rendering will return an error at a point where the software is not 2974 * prepared to recover from it. 2975 * 2976 * However, we also want to emit the batchbuffer significantly before we reach 2977 * the limit, as a series of batchbuffers each of which references buffers 2978 * covering almost all of the aperture means that at each emit we end up 2979 * waiting to evict a buffer from the last rendering, and we get synchronous 2980 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 2981 * get better parallelism. 2982 */ 2983static int 2984drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 2985{ 2986 drm_intel_bufmgr_gem *bufmgr_gem = 2987 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 2988 unsigned int total = 0; 2989 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 2990 int total_fences; 2991 2992 /* Check for fence reg constraints if necessary */ 2993 if (bufmgr_gem->available_fences) { 2994 total_fences = drm_intel_gem_total_fences(bo_array, count); 2995 if (total_fences > bufmgr_gem->available_fences) 2996 return -ENOSPC; 2997 } 2998 2999 total = drm_intel_gem_estimate_batch_space(bo_array, count); 3000 3001 if (total > threshold) 3002 total = drm_intel_gem_compute_batch_space(bo_array, count); 3003 3004 if (total > threshold) { 3005 DBG("check_space: overflowed available aperture, " 3006 "%dkb vs %dkb\n", 3007 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 3008 return -ENOSPC; 3009 } else { 3010 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 3011 (int)bufmgr_gem->gtt_size / 1024); 3012 return 0; 3013 } 3014} 3015 3016/* 3017 * Disable buffer reuse for objects which are shared with the kernel 3018 * as scanout buffers 3019 */ 3020static int 3021drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 3022{ 3023 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3024 3025 bo_gem->reusable = false; 3026 return 0; 3027} 3028 3029static int 3030drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 3031{ 3032 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3033 3034 return bo_gem->reusable; 3035} 3036 3037static int 3038_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 3039{ 3040 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3041 int i; 3042 3043 for (i = 0; i < bo_gem->reloc_count; i++) { 3044 if (bo_gem->reloc_target_info[i].bo == target_bo) 3045 return 1; 3046 if (bo == bo_gem->reloc_target_info[i].bo) 3047 continue; 3048 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 3049 target_bo)) 3050 return 1; 3051 } 3052 3053 for (i = 0; i< bo_gem->softpin_target_count; i++) { 3054 if (bo_gem->softpin_target[i] == target_bo) 3055 return 1; 3056 if (_drm_intel_gem_bo_references(bo_gem->softpin_target[i], target_bo)) 3057 return 1; 3058 } 3059 3060 return 0; 3061} 3062 3063/** Return true if target_bo is referenced by bo's relocation tree. */ 3064static int 3065drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 3066{ 3067 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 3068 3069 if (bo == NULL || target_bo == NULL) 3070 return 0; 3071 if (target_bo_gem->used_as_reloc_target) 3072 return _drm_intel_gem_bo_references(bo, target_bo); 3073 return 0; 3074} 3075 3076static void 3077add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 3078{ 3079 unsigned int i = bufmgr_gem->num_buckets; 3080 3081 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 3082 3083 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 3084 bufmgr_gem->cache_bucket[i].size = size; 3085 bufmgr_gem->num_buckets++; 3086} 3087 3088static void 3089init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 3090{ 3091 unsigned long size, cache_max_size = 64 * 1024 * 1024; 3092 3093 /* OK, so power of two buckets was too wasteful of memory. 3094 * Give 3 other sizes between each power of two, to hopefully 3095 * cover things accurately enough. (The alternative is 3096 * probably to just go for exact matching of sizes, and assume 3097 * that for things like composited window resize the tiled 3098 * width/height alignment and rounding of sizes to pages will 3099 * get us useful cache hit rates anyway) 3100 */ 3101 add_bucket(bufmgr_gem, 4096); 3102 add_bucket(bufmgr_gem, 4096 * 2); 3103 add_bucket(bufmgr_gem, 4096 * 3); 3104 3105 /* Initialize the linked lists for BO reuse cache. */ 3106 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 3107 add_bucket(bufmgr_gem, size); 3108 3109 add_bucket(bufmgr_gem, size + size * 1 / 4); 3110 add_bucket(bufmgr_gem, size + size * 2 / 4); 3111 add_bucket(bufmgr_gem, size + size * 3 / 4); 3112 } 3113} 3114 3115void 3116drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 3117{ 3118 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3119 3120 bufmgr_gem->vma_max = limit; 3121 3122 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 3123} 3124 3125static int 3126parse_devid_override(const char *devid_override) 3127{ 3128 static const struct { 3129 const char *name; 3130 int pci_id; 3131 } name_map[] = { 3132 { "brw", PCI_CHIP_I965_GM }, 3133 { "g4x", PCI_CHIP_GM45_GM }, 3134 { "ilk", PCI_CHIP_ILD_G }, 3135 { "snb", PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS }, 3136 { "ivb", PCI_CHIP_IVYBRIDGE_S_GT2 }, 3137 { "hsw", PCI_CHIP_HASWELL_CRW_E_GT3 }, 3138 { "byt", PCI_CHIP_VALLEYVIEW_3 }, 3139 { "bdw", 0x1620 | BDW_ULX }, 3140 { "skl", PCI_CHIP_SKYLAKE_DT_GT2 }, 3141 { "kbl", PCI_CHIP_KABYLAKE_DT_GT2 }, 3142 }; 3143 unsigned int i; 3144 3145 for (i = 0; i < ARRAY_SIZE(name_map); i++) { 3146 if (!strcmp(name_map[i].name, devid_override)) 3147 return name_map[i].pci_id; 3148 } 3149 3150 return strtod(devid_override, NULL); 3151} 3152 3153/** 3154 * Get the PCI ID for the device. This can be overridden by setting the 3155 * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 3156 */ 3157static int 3158get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 3159{ 3160 char *devid_override; 3161 int devid = 0; 3162 int ret; 3163 drm_i915_getparam_t gp; 3164 3165 if (geteuid() == getuid()) { 3166 devid_override = getenv("INTEL_DEVID_OVERRIDE"); 3167 if (devid_override) { 3168 bufmgr_gem->no_exec = true; 3169 return parse_devid_override(devid_override); 3170 } 3171 } 3172 3173 memclear(gp); 3174 gp.param = I915_PARAM_CHIPSET_ID; 3175 gp.value = &devid; 3176 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3177 if (ret) { 3178 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 3179 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 3180 } 3181 return devid; 3182} 3183 3184int 3185drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 3186{ 3187 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3188 3189 return bufmgr_gem->pci_device; 3190} 3191 3192/** 3193 * Sets the AUB filename. 3194 * 3195 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 3196 * for it to have any effect. 3197 */ 3198void 3199drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 3200 const char *filename) 3201{ 3202} 3203 3204/** 3205 * Sets up AUB dumping. 3206 * 3207 * This is a trace file format that can be used with the simulator. 3208 * Packets are emitted in a format somewhat like GPU command packets. 3209 * You can set up a GTT and upload your objects into the referenced 3210 * space, then send off batchbuffers and get BMPs out the other end. 3211 */ 3212void 3213drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 3214{ 3215 fprintf(stderr, "libdrm aub dumping is deprecated.\n\n" 3216 "Use intel_aubdump from intel-gpu-tools instead. Install intel-gpu-tools,\n" 3217 "then run (for example)\n\n" 3218 "\t$ intel_aubdump --output=trace.aub glxgears -geometry 500x500\n\n" 3219 "See the intel_aubdump man page for more details.\n"); 3220} 3221 3222drm_intel_context * 3223drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 3224{ 3225 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3226 struct drm_i915_gem_context_create create; 3227 drm_intel_context *context = NULL; 3228 int ret; 3229 3230 context = calloc(1, sizeof(*context)); 3231 if (!context) 3232 return NULL; 3233 3234 memclear(create); 3235 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 3236 if (ret != 0) { 3237 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 3238 strerror(errno)); 3239 free(context); 3240 return NULL; 3241 } 3242 3243 context->ctx_id = create.ctx_id; 3244 context->bufmgr = bufmgr; 3245 3246 return context; 3247} 3248 3249int 3250drm_intel_gem_context_get_id(drm_intel_context *ctx, uint32_t *ctx_id) 3251{ 3252 if (ctx == NULL) 3253 return -EINVAL; 3254 3255 *ctx_id = ctx->ctx_id; 3256 3257 return 0; 3258} 3259 3260void 3261drm_intel_gem_context_destroy(drm_intel_context *ctx) 3262{ 3263 drm_intel_bufmgr_gem *bufmgr_gem; 3264 struct drm_i915_gem_context_destroy destroy; 3265 int ret; 3266 3267 if (ctx == NULL) 3268 return; 3269 3270 memclear(destroy); 3271 3272 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3273 destroy.ctx_id = ctx->ctx_id; 3274 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 3275 &destroy); 3276 if (ret != 0) 3277 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 3278 strerror(errno)); 3279 3280 free(ctx); 3281} 3282 3283int 3284drm_intel_get_reset_stats(drm_intel_context *ctx, 3285 uint32_t *reset_count, 3286 uint32_t *active, 3287 uint32_t *pending) 3288{ 3289 drm_intel_bufmgr_gem *bufmgr_gem; 3290 struct drm_i915_reset_stats stats; 3291 int ret; 3292 3293 if (ctx == NULL) 3294 return -EINVAL; 3295 3296 memclear(stats); 3297 3298 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3299 stats.ctx_id = ctx->ctx_id; 3300 ret = drmIoctl(bufmgr_gem->fd, 3301 DRM_IOCTL_I915_GET_RESET_STATS, 3302 &stats); 3303 if (ret == 0) { 3304 if (reset_count != NULL) 3305 *reset_count = stats.reset_count; 3306 3307 if (active != NULL) 3308 *active = stats.batch_active; 3309 3310 if (pending != NULL) 3311 *pending = stats.batch_pending; 3312 } 3313 3314 return ret; 3315} 3316 3317int 3318drm_intel_reg_read(drm_intel_bufmgr *bufmgr, 3319 uint32_t offset, 3320 uint64_t *result) 3321{ 3322 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3323 struct drm_i915_reg_read reg_read; 3324 int ret; 3325 3326 memclear(reg_read); 3327 reg_read.offset = offset; 3328 3329 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 3330 3331 *result = reg_read.val; 3332 return ret; 3333} 3334 3335int 3336drm_intel_get_subslice_total(int fd, unsigned int *subslice_total) 3337{ 3338 drm_i915_getparam_t gp; 3339 int ret; 3340 3341 memclear(gp); 3342 gp.value = (int*)subslice_total; 3343 gp.param = I915_PARAM_SUBSLICE_TOTAL; 3344 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3345 if (ret) 3346 return -errno; 3347 3348 return 0; 3349} 3350 3351int 3352drm_intel_get_eu_total(int fd, unsigned int *eu_total) 3353{ 3354 drm_i915_getparam_t gp; 3355 int ret; 3356 3357 memclear(gp); 3358 gp.value = (int*)eu_total; 3359 gp.param = I915_PARAM_EU_TOTAL; 3360 ret = drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); 3361 if (ret) 3362 return -errno; 3363 3364 return 0; 3365} 3366 3367int 3368drm_intel_get_pooled_eu(int fd) 3369{ 3370 drm_i915_getparam_t gp; 3371 int ret = -1; 3372 3373 memclear(gp); 3374 gp.param = I915_PARAM_HAS_POOLED_EU; 3375 gp.value = &ret; 3376 if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 3377 return -errno; 3378 3379 return ret; 3380} 3381 3382int 3383drm_intel_get_min_eu_in_pool(int fd) 3384{ 3385 drm_i915_getparam_t gp; 3386 int ret = -1; 3387 3388 memclear(gp); 3389 gp.param = I915_PARAM_MIN_EU_IN_POOL; 3390 gp.value = &ret; 3391 if (drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp)) 3392 return -errno; 3393 3394 return ret; 3395} 3396 3397/** 3398 * Annotate the given bo for use in aub dumping. 3399 * 3400 * \param annotations is an array of drm_intel_aub_annotation objects 3401 * describing the type of data in various sections of the bo. Each 3402 * element of the array specifies the type and subtype of a section of 3403 * the bo, and the past-the-end offset of that section. The elements 3404 * of \c annotations must be sorted so that ending_offset is 3405 * increasing. 3406 * 3407 * \param count is the number of elements in the \c annotations array. 3408 * If \c count is zero, then \c annotations will not be dereferenced. 3409 * 3410 * Annotations are copied into a private data structure, so caller may 3411 * re-use the memory pointed to by \c annotations after the call 3412 * returns. 3413 * 3414 * Annotations are stored for the lifetime of the bo; to reset to the 3415 * default state (no annotations), call this function with a \c count 3416 * of zero. 3417 */ 3418void 3419drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 3420 drm_intel_aub_annotation *annotations, 3421 unsigned count) 3422{ 3423} 3424 3425static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER; 3426static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list }; 3427 3428static drm_intel_bufmgr_gem * 3429drm_intel_bufmgr_gem_find(int fd) 3430{ 3431 drm_intel_bufmgr_gem *bufmgr_gem; 3432 3433 DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) { 3434 if (bufmgr_gem->fd == fd) { 3435 atomic_inc(&bufmgr_gem->refcount); 3436 return bufmgr_gem; 3437 } 3438 } 3439 3440 return NULL; 3441} 3442 3443static void 3444drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr) 3445{ 3446 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3447 3448 if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) { 3449 pthread_mutex_lock(&bufmgr_list_mutex); 3450 3451 if (atomic_dec_and_test(&bufmgr_gem->refcount)) { 3452 DRMLISTDEL(&bufmgr_gem->managers); 3453 drm_intel_bufmgr_gem_destroy(bufmgr); 3454 } 3455 3456 pthread_mutex_unlock(&bufmgr_list_mutex); 3457 } 3458} 3459 3460void *drm_intel_gem_bo_map__gtt(drm_intel_bo *bo) 3461{ 3462 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 3463 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3464 3465 if (bo_gem->gtt_virtual) 3466 return bo_gem->gtt_virtual; 3467 3468 if (bo_gem->is_userptr) 3469 return NULL; 3470 3471 pthread_mutex_lock(&bufmgr_gem->lock); 3472 if (bo_gem->gtt_virtual == NULL) { 3473 struct drm_i915_gem_mmap_gtt mmap_arg; 3474 void *ptr; 3475 3476 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 3477 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 3478 3479 if (bo_gem->map_count++ == 0) 3480 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 3481 3482 memclear(mmap_arg); 3483 mmap_arg.handle = bo_gem->gem_handle; 3484 3485 /* Get the fake offset back... */ 3486 ptr = MAP_FAILED; 3487 if (drmIoctl(bufmgr_gem->fd, 3488 DRM_IOCTL_I915_GEM_MMAP_GTT, 3489 &mmap_arg) == 0) { 3490 /* and mmap it */ 3491 ptr = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE, 3492 MAP_SHARED, bufmgr_gem->fd, 3493 mmap_arg.offset); 3494 } 3495 if (ptr == MAP_FAILED) { 3496 if (--bo_gem->map_count == 0) 3497 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 3498 ptr = NULL; 3499 } 3500 3501 bo_gem->gtt_virtual = ptr; 3502 } 3503 pthread_mutex_unlock(&bufmgr_gem->lock); 3504 3505 return bo_gem->gtt_virtual; 3506} 3507 3508void *drm_intel_gem_bo_map__cpu(drm_intel_bo *bo) 3509{ 3510 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 3511 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3512 3513 if (bo_gem->mem_virtual) 3514 return bo_gem->mem_virtual; 3515 3516 if (bo_gem->is_userptr) { 3517 /* Return the same user ptr */ 3518 return bo_gem->user_virtual; 3519 } 3520 3521 pthread_mutex_lock(&bufmgr_gem->lock); 3522 if (!bo_gem->mem_virtual) { 3523 struct drm_i915_gem_mmap mmap_arg; 3524 3525 if (bo_gem->map_count++ == 0) 3526 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 3527 3528 DBG("bo_map: %d (%s), map_count=%d\n", 3529 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 3530 3531 memclear(mmap_arg); 3532 mmap_arg.handle = bo_gem->gem_handle; 3533 mmap_arg.size = bo->size; 3534 if (drmIoctl(bufmgr_gem->fd, 3535 DRM_IOCTL_I915_GEM_MMAP, 3536 &mmap_arg)) { 3537 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 3538 __FILE__, __LINE__, bo_gem->gem_handle, 3539 bo_gem->name, strerror(errno)); 3540 if (--bo_gem->map_count == 0) 3541 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 3542 } else { 3543 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 3544 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 3545 } 3546 } 3547 pthread_mutex_unlock(&bufmgr_gem->lock); 3548 3549 return bo_gem->mem_virtual; 3550} 3551 3552void *drm_intel_gem_bo_map__wc(drm_intel_bo *bo) 3553{ 3554 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 3555 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3556 3557 if (bo_gem->wc_virtual) 3558 return bo_gem->wc_virtual; 3559 3560 if (bo_gem->is_userptr) 3561 return NULL; 3562 3563 pthread_mutex_lock(&bufmgr_gem->lock); 3564 if (!bo_gem->wc_virtual) { 3565 struct drm_i915_gem_mmap mmap_arg; 3566 3567 if (bo_gem->map_count++ == 0) 3568 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 3569 3570 DBG("bo_map: %d (%s), map_count=%d\n", 3571 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 3572 3573 memclear(mmap_arg); 3574 mmap_arg.handle = bo_gem->gem_handle; 3575 mmap_arg.size = bo->size; 3576 mmap_arg.flags = I915_MMAP_WC; 3577 if (drmIoctl(bufmgr_gem->fd, 3578 DRM_IOCTL_I915_GEM_MMAP, 3579 &mmap_arg)) { 3580 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 3581 __FILE__, __LINE__, bo_gem->gem_handle, 3582 bo_gem->name, strerror(errno)); 3583 if (--bo_gem->map_count == 0) 3584 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 3585 } else { 3586 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 3587 bo_gem->wc_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 3588 } 3589 } 3590 pthread_mutex_unlock(&bufmgr_gem->lock); 3591 3592 return bo_gem->wc_virtual; 3593} 3594 3595/** 3596 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 3597 * and manage map buffer objections. 3598 * 3599 * \param fd File descriptor of the opened DRM device. 3600 */ 3601drm_intel_bufmgr * 3602drm_intel_bufmgr_gem_init(int fd, int batch_size) 3603{ 3604 drm_intel_bufmgr_gem *bufmgr_gem; 3605 struct drm_i915_gem_get_aperture aperture; 3606 drm_i915_getparam_t gp; 3607 int ret, tmp; 3608 bool exec2 = false; 3609 3610 pthread_mutex_lock(&bufmgr_list_mutex); 3611 3612 bufmgr_gem = drm_intel_bufmgr_gem_find(fd); 3613 if (bufmgr_gem) 3614 goto exit; 3615 3616 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 3617 if (bufmgr_gem == NULL) 3618 goto exit; 3619 3620 bufmgr_gem->fd = fd; 3621 atomic_set(&bufmgr_gem->refcount, 1); 3622 3623 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 3624 free(bufmgr_gem); 3625 bufmgr_gem = NULL; 3626 goto exit; 3627 } 3628 3629 memclear(aperture); 3630 ret = drmIoctl(bufmgr_gem->fd, 3631 DRM_IOCTL_I915_GEM_GET_APERTURE, 3632 &aperture); 3633 3634 if (ret == 0) 3635 bufmgr_gem->gtt_size = aperture.aper_available_size; 3636 else { 3637 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 3638 strerror(errno)); 3639 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 3640 fprintf(stderr, "Assuming %dkB available aperture size.\n" 3641 "May lead to reduced performance or incorrect " 3642 "rendering.\n", 3643 (int)bufmgr_gem->gtt_size / 1024); 3644 } 3645 3646 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 3647 3648 if (IS_GEN2(bufmgr_gem->pci_device)) 3649 bufmgr_gem->gen = 2; 3650 else if (IS_GEN3(bufmgr_gem->pci_device)) 3651 bufmgr_gem->gen = 3; 3652 else if (IS_GEN4(bufmgr_gem->pci_device)) 3653 bufmgr_gem->gen = 4; 3654 else if (IS_GEN5(bufmgr_gem->pci_device)) 3655 bufmgr_gem->gen = 5; 3656 else if (IS_GEN6(bufmgr_gem->pci_device)) 3657 bufmgr_gem->gen = 6; 3658 else if (IS_GEN7(bufmgr_gem->pci_device)) 3659 bufmgr_gem->gen = 7; 3660 else if (IS_GEN8(bufmgr_gem->pci_device)) 3661 bufmgr_gem->gen = 8; 3662 else if (IS_GEN9(bufmgr_gem->pci_device)) 3663 bufmgr_gem->gen = 9; 3664 else if (IS_GEN10(bufmgr_gem->pci_device)) 3665 bufmgr_gem->gen = 10; 3666 else { 3667 free(bufmgr_gem); 3668 bufmgr_gem = NULL; 3669 goto exit; 3670 } 3671 3672 if (IS_GEN3(bufmgr_gem->pci_device) && 3673 bufmgr_gem->gtt_size > 256*1024*1024) { 3674 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 3675 * be used for tiled blits. To simplify the accounting, just 3676 * subtract the unmappable part (fixed to 256MB on all known 3677 * gen3 devices) if the kernel advertises it. */ 3678 bufmgr_gem->gtt_size -= 256*1024*1024; 3679 } 3680 3681 memclear(gp); 3682 gp.value = &tmp; 3683 3684 gp.param = I915_PARAM_HAS_EXECBUF2; 3685 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3686 if (!ret) 3687 exec2 = true; 3688 3689 gp.param = I915_PARAM_HAS_BSD; 3690 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3691 bufmgr_gem->has_bsd = ret == 0; 3692 3693 gp.param = I915_PARAM_HAS_BLT; 3694 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3695 bufmgr_gem->has_blt = ret == 0; 3696 3697 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 3698 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3699 bufmgr_gem->has_relaxed_fencing = ret == 0; 3700 3701 gp.param = I915_PARAM_HAS_EXEC_ASYNC; 3702 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3703 bufmgr_gem->has_exec_async = ret == 0; 3704 3705 bufmgr_gem->bufmgr.bo_alloc_userptr = check_bo_alloc_userptr; 3706 3707 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 3708 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3709 bufmgr_gem->has_wait_timeout = ret == 0; 3710 3711 gp.param = I915_PARAM_HAS_LLC; 3712 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3713 if (ret != 0) { 3714 /* Kernel does not supports HAS_LLC query, fallback to GPU 3715 * generation detection and assume that we have LLC on GEN6/7 3716 */ 3717 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 3718 IS_GEN7(bufmgr_gem->pci_device)); 3719 } else 3720 bufmgr_gem->has_llc = *gp.value; 3721 3722 gp.param = I915_PARAM_HAS_VEBOX; 3723 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3724 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 3725 3726 gp.param = I915_PARAM_HAS_EXEC_SOFTPIN; 3727 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3728 if (ret == 0 && *gp.value > 0) 3729 bufmgr_gem->bufmgr.bo_set_softpin_offset = drm_intel_gem_bo_set_softpin_offset; 3730 3731 if (bufmgr_gem->gen < 4) { 3732 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 3733 gp.value = &bufmgr_gem->available_fences; 3734 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3735 if (ret) { 3736 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 3737 errno); 3738 fprintf(stderr, "param: %d, val: %d\n", gp.param, 3739 *gp.value); 3740 bufmgr_gem->available_fences = 0; 3741 } else { 3742 /* XXX The kernel reports the total number of fences, 3743 * including any that may be pinned. 3744 * 3745 * We presume that there will be at least one pinned 3746 * fence for the scanout buffer, but there may be more 3747 * than one scanout and the user may be manually 3748 * pinning buffers. Let's move to execbuffer2 and 3749 * thereby forget the insanity of using fences... 3750 */ 3751 bufmgr_gem->available_fences -= 2; 3752 if (bufmgr_gem->available_fences < 0) 3753 bufmgr_gem->available_fences = 0; 3754 } 3755 } 3756 3757 if (bufmgr_gem->gen >= 8) { 3758 gp.param = I915_PARAM_HAS_ALIASING_PPGTT; 3759 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3760 if (ret == 0 && *gp.value == 3) 3761 bufmgr_gem->bufmgr.bo_use_48b_address_range = drm_intel_gem_bo_use_48b_address_range; 3762 } 3763 3764 /* Let's go with one relocation per every 2 dwords (but round down a bit 3765 * since a power of two will mean an extra page allocation for the reloc 3766 * buffer). 3767 * 3768 * Every 4 was too few for the blender benchmark. 3769 */ 3770 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 3771 3772 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 3773 bufmgr_gem->bufmgr.bo_alloc_for_render = 3774 drm_intel_gem_bo_alloc_for_render; 3775 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 3776 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 3777 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 3778 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 3779 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 3780 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 3781 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 3782 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 3783 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 3784 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 3785 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 3786 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 3787 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 3788 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 3789 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 3790 /* Use the new one if available */ 3791 if (exec2) { 3792 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 3793 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3794 } else 3795 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 3796 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 3797 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 3798 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref; 3799 bufmgr_gem->bufmgr.debug = 0; 3800 bufmgr_gem->bufmgr.check_aperture_space = 3801 drm_intel_gem_check_aperture_space; 3802 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3803 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 3804 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 3805 drm_intel_gem_get_pipe_from_crtc_id; 3806 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 3807 3808 init_cache_buckets(bufmgr_gem); 3809 3810 DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 3811 bufmgr_gem->vma_max = -1; /* unlimited by default */ 3812 3813 DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list); 3814 3815exit: 3816 pthread_mutex_unlock(&bufmgr_list_mutex); 3817 3818 return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL; 3819} 3820